Python TextDataset示例

编程语言: Python

命名空间/包名称: pottan_ocr.dataset

类/类型: TextDataset

hotexamples.com的示例: 6

Python TextDataset - 已找到6个示例。这些是从开源项目中提取的最受好评的pottan_ocr.dataset.TextDataset现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

TextDataset(5)

__getitem__(1)

__len__(1)

getUnNormalized(1)

示例#1

显示文件

文件： data_gen.py 项目： ucodai/pottan-ocr

 def createDatasetSingleThread(self, opts):
     """ Used for debugging . It is hard to debug a multi-threaded application """
     dataset = TextDataset(self.WORD_LIST_FILE,
                           batchSize=opts.batchSize,
                           cache=opts.output,
                           limit=opts.count,
                           overwriteCache=True)
     results = [dataset.__getitem__(i) for i in range(len(dataset))]
     print('Total lines count=%d' % (len(dataset) * opts.batchSize))

示例#2

显示文件

文件： data_gen.py 项目： ucodai/pottan-ocr

def threadInitializer(fname, batchSize, cache, limit):
    global datasetInOtherthread
    datasetInOtherthread = TextDataset(fname,
                                       batchSize=batchSize,
                                       cache=cache,
                                       limit=limit,
                                       overwriteCache=True)

示例#3

显示文件

class DataGenerator( Sequence ):
    def __init__( self, txtFile, **kwargs):
        self.ds = TextDataset( txtFile, **kwargs )

    def __len__(self):
        return self.ds.__len__()

    def __getitem__( self, batchIndex ):
        unNormalized =  self.ds.getUnNormalized( batchIndex )
        images, labels = normalizeBatch( unNormalized, channel_axis=2 )
        labels, label_lengths  = converter.encodeStrListRaw( labels, labelWidth )
        inputs = {
                'the_images': images,
                'the_labels': np.array( labels ),
                'label_lengths': np.array( label_lengths ),
                }
        outputs = {'ctc': np.zeros([ batchSize ])}  # dummy data for dummy loss function
        return (inputs, outputs)

示例#4

显示文件

文件： data_gen.py 项目： ucodai/pottan-ocr

 def createDataset(self, opts):
     dataset = TextDataset(self.WORD_LIST_FILE,
                           batchSize=opts.batchSize,
                           cache=opts.output,
                           limit=opts.count,
                           overwriteCache=True)
     pool = multiprocessing.Pool(os.cpu_count(),
                                 initializer=threadInitializer,
                                 initargs=(self.WORD_LIST_FILE,
                                           opts.batchSize, opts.output,
                                           opts.count))
     results = [
         pool.apply_async(processInThread, (i, ))
         for i in range(len(dataset))
     ]
     print('Total lines count=%d' % (len(dataset) * opts.batchSize))
     for idx, result in enumerate(results):
         result.get()

示例#5

显示文件

    opt.outdir = 'expr'
os.system('mkdir -p {0}'.format(opt.outdir))

opt.manualSeed = random.randint(1, 10000)  # fix seed
print("Random Seed: ", opt.manualSeed)
random.seed(opt.manualSeed)
np.random.seed(opt.manualSeed)
torch.manual_seed(opt.manualSeed)

if torch.cuda.is_available() and not opt.cuda:
    print(
        "WARNING: You have a CUDA device, so you should probably run with --cuda"
    )

train_loader = TextDataset(opt.traindata,
                           batchSize=opt.batchSize,
                           limit=opt.traindata_limit,
                           cache=opt.traindata_cache)
test_loader = TextDataset(opt.valdata,
                          batchSize=opt.batchSize,
                          limit=opt.valdata_limit,
                          cache=opt.valdata_cache)

nclass = converter.totalGlyphs
print('Number of char class = %d' % nclass)

criterion = CTCLoss(blank=nclass - 1)


# custom weights initialization called on crnn
def weights_init(m):
    classname = m.__class__.__name__

示例#6

显示文件

文件： train.py 项目： rytse/pottan-ocr

 def __init__(self, txtFile, **kwargs):
     self.ds = TextDataset(txtFile, **kwargs)