Пример #1
0
def gen_iter_test(path, text_field, label_field, args):
    '''
        Load TabularDataset from path,
        then convert it into a iterator
        return TabularDataset and iterator
    '''
    tmp_data = data.TabularDataset(
                            path=path,
                            format='tsv',
                            skip_header=False,
                            fields=[
                                    ('pid', label_field),
                                    ('question1', text_field),
                                    ('question2', text_field)
                                    ])

    # tmp_iter = data.BucketIterator(
    #                 tmp_data,
    #                 batch_size=args.batch_size,
    #                 sort_key=lambda x: len(x.question1) + len(x.question2),
    #                 device=-1, # 0 for GPU, -1 for CPU
    #                 repeat=False)
    tmp_iter = data.Iterator(
                    dataset=tmp_data,
                    batch_size=args.batch_size,
                    device=-1, # 0 for GPU, -1 for CPU
                    shuffle=False,
                    repeat=False)
    return tmp_data, tmp_iter
Пример #2
0
def gen_iter_test(path, text_field, label_field, args):
    '''
        Load TabularDataset from path,
        then convert it into a iterator
        return TabularDataset and iterator
    '''
    tmp_data = data.TabularDataset(path=path,
                                   format='tsv',
                                   skip_header=False,
                                   fields=[('id', label_field),
                                           ('question1', text_field),
                                           ('question2', text_field)])

    tmp_iter = data.Iterator(
        dataset=tmp_data,
        batch_size=args.batch_size,
        device=0,  # 0 for GPU, -1 for CPU
        shuffle=False,
        repeat=False)
    return tmp_data, tmp_iter