Skip to content

Commit 4e142d4

Browse files
authored
Clarify the Dataloading porcess
clarify the comment, trainset and index position to see the lines.txt file
1 parent e03608e commit 4e142d4

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

Diff for: src/DataLoader.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def __init__(self, filePath, batchSize, imgSize, maxTextLen, load_aug=True):
7575
lineSplit[0] + '.png'
7676

7777
# GT text are columns starting at 10
78+
# see the lines.txt and check where the GT text starts, in this case it is 10
7879
gtText_list = lineSplit[9].split('|')
7980
gtText = self.truncateLabel(' '.join(gtText_list), maxTextLen)
8081
chars = chars.union(set(list(gtText))) ## taking the unique characters present
@@ -93,7 +94,7 @@ def __init__(self, filePath, batchSize, imgSize, maxTextLen, load_aug=True):
9394
print("Warning, damaged images found:", bad_samples)
9495
print("Damaged images expected:", bad_samples_reference)
9596

96-
# split into training and validation set: 90% - 10%
97+
# split into training and validation set: 95% - 10%
9798
splitIdx = int(0.95 * len(self.samples))
9899
self.trainSamples = self.samples[:splitIdx]
99100
self.validationSamples = self.samples[splitIdx:]
@@ -130,7 +131,7 @@ def trainSet(self):
130131
"switch to randomly chosen subset of training set"
131132
self.dataAugmentation = True
132133
self.currIdx = 0
133-
#random.shuffle(self.trainSamples)
134+
random.shuffle(self.trainSamples) # shuffle the samples in each epoch
134135
self.samples = self.trainSamples #[:self.numTrainSamplesPerEpoch]
135136

136137
def validationSet(self):

0 commit comments

Comments
 (0)