def gen_iter_test(path, text_field, label_field, args): ''' Load TabularDataset from path, then convert it into a iterator return TabularDataset and iterator ''' tmp_data = data.TabularDataset( path=path, format='tsv', skip_header=False, fields=[ ('pid', label_field), ('question1', text_field), ('question2', text_field) ]) # tmp_iter = data.BucketIterator( # tmp_data, # batch_size=args.batch_size, # sort_key=lambda x: len(x.question1) + len(x.question2), # device=-1, # 0 for GPU, -1 for CPU # repeat=False) tmp_iter = data.Iterator( dataset=tmp_data, batch_size=args.batch_size, device=-1, # 0 for GPU, -1 for CPU shuffle=False, repeat=False) return tmp_data, tmp_iter
def gen_iter_test(path, text_field, label_field, args): ''' Load TabularDataset from path, then convert it into a iterator return TabularDataset and iterator ''' tmp_data = data.TabularDataset(path=path, format='tsv', skip_header=False, fields=[('id', label_field), ('question1', text_field), ('question2', text_field)]) tmp_iter = data.Iterator( dataset=tmp_data, batch_size=args.batch_size, device=0, # 0 for GPU, -1 for CPU shuffle=False, repeat=False) return tmp_data, tmp_iter