示例#1
0
def get_training_data_and_feature(opt, data_loader, preprocessor):
    """ prepare feature and data """
    if opt.load_feature:
        try:
            train_features, train_label2id, train_id2label, train_trans_mat = load_feature(opt.train_path.replace('.json', '.saved.pk'))
            dev_features, dev_label2id, dev_id2label, dev_trans_mat = load_feature(opt.dev_path.replace('.json', '.saved.pk'))
        except FileNotFoundError:
            # Not a saved feature file yet, make it
            opt.load_feature = False
            opt.save_feature = True
            train_features, train_label2id, train_id2label, train_trans_mat, \
                dev_features, dev_label2id, dev_id2label, dev_trans_mat =\
                get_training_data_and_feature(opt, data_loader, preprocessor)
            # restore option
            opt.load_feature = True
            opt.save_feature = False
    else:
        train_examples, train_max_len, train_max_support_size, train_trans_mat = data_loader.load_data(path=opt.train_path)
        dev_examples, dev_max_len, dev_max_support_size, dev_trans_mat = data_loader.load_data(path=opt.dev_path)

        train_label2id, train_id2label = make_dict(train_examples)
        dev_label2id, dev_id2label = make_dict(dev_examples)
        logger.info(' Finish train dev prepare dict ')

        train_features = preprocessor.construct_feature(
            train_examples, train_max_support_size, train_label2id, train_id2label)
        dev_features = preprocessor.construct_feature(
            dev_examples, dev_max_support_size, dev_label2id, dev_id2label)
        logger.info(' Finish prepare train dev features ')
        if opt.save_feature:
            save_feature(opt.train_path.replace('.json', '.saved.pk'),
                         train_features, train_label2id, train_id2label, train_trans_mat)
            save_feature(opt.dev_path.replace('.json', '.saved.pk'), dev_features, dev_label2id, dev_id2label, dev_trans_mat)
    return train_features, train_label2id, train_id2label, train_trans_mat, \
        dev_features, dev_label2id, dev_id2label, dev_trans_mat
示例#2
0
def get_training_data_and_feature(opt, data_loader, preprocessor):
    """ prepare feature and data """
    if opt.load_feature:
        try:
            train_features, (train_slot_label2id, train_slot_id2label), \
                (train_intent_label2id, train_intent_id2label) = \
                load_feature(opt.train_path.replace('.json', '.saved.pk'))
            dev_features, (dev_slot_label2id, dev_slot_id2label), \
                (dev_intent_label2id, dev_intent_id2label) = load_feature(opt.dev_path.replace('.json', '.saved.pk'))
        except FileNotFoundError:
            opt.load_feature, opt.save_feature = False, True  # Not a saved feature file yet, make it
            train_features, (train_slot_label2id, train_slot_id2label), (train_intent_label2id, train_intent_id2label),\
                dev_features, (dev_slot_label2id, dev_slot_id2label), (dev_intent_label2id, dev_intent_id2label) = \
                get_training_data_and_feature(opt, data_loader, preprocessor)
            opt.load_feature, opt.save_feature = True, False  # restore option
    else:
        train_examples, _, train_max_support_size = data_loader.load_data(
            path=opt.train_path)
        dev_examples, _, dev_max_support_size = data_loader.load_data(
            path=opt.dev_path)
        (train_slot_label2id, train_slot_id2label), (train_intent_label2id, train_intent_id2label) = \
            make_dict(opt, train_examples)
        (dev_slot_label2id, dev_slot_id2label), (dev_intent_label2id, dev_intent_id2label) = \
            make_dict(opt, dev_examples)
        logger.info(' Finish train dev prepare dict ')
        train_features = preprocessor.construct_feature(
            train_examples, train_max_support_size, train_slot_label2id,
            train_slot_id2label, train_intent_label2id, train_intent_id2label)
        dev_features = preprocessor.construct_feature(
            dev_examples, dev_max_support_size, dev_slot_label2id,
            dev_slot_id2label, dev_intent_label2id, dev_intent_id2label)
        logger.info(' Finish prepare train dev features ')
        if opt.do_debug:
            print('train_slot_label2id: {}'.format(train_slot_label2id))
            print('train_intent_label2id: {}'.format(train_intent_label2id))
            print('dev_slot_label2id: {}'.format(dev_slot_label2id))
            print('dev_intent_label2id: {}'.format(dev_intent_label2id))
            print('train_examples: {}'.format(len(train_examples),
                                              train_examples))
            print('train_features: {}'.format(len(train_features),
                                              train_features))
        if opt.save_feature:
            save_feature(opt.train_path.replace('.json',
                                                '.saved.pk'), train_features,
                         train_slot_label2id, train_slot_id2label,
                         train_intent_label2id, train_intent_id2label)
            save_feature(opt.dev_path.replace('.json', '.saved.pk'),
                         dev_features, dev_slot_label2id, dev_slot_id2label,
                         dev_intent_label2id, dev_intent_id2label)
    return train_features, (train_slot_label2id, train_slot_id2label), (train_intent_label2id, train_intent_id2label), \
        dev_features, (dev_slot_label2id, dev_slot_id2label), (dev_intent_label2id, dev_intent_id2label)
示例#3
0
def get_testing_data_feature(opt, data_loader, preprocessor):
    """ prepare feature and data """
    if opt.load_feature:
        try:
            test_features, test_label2id, test_id2label, test_trans_mat = \
                load_feature(opt.test_path.replace('.json', '.saved.pk'))
        except FileNotFoundError:
            # Not a saved feature file yet, make it
            opt.load_feature = False
            opt.save_feature = True
            test_features, test_label2id, test_id2label, test_trans_mat = \
                get_testing_data_feature(opt, data_loader, preprocessor)
            # restore option
            opt.load_feature = True
            opt.save_feature = False
    else:
        test_examples, test_max_len, test_max_support_size, test_trans_mat = data_loader.load_data(path=opt.test_path)
        test_label2id, test_id2label = make_dict(test_examples)
        logger.info(' Finish prepare test dict')
        test_features = preprocessor.construct_feature(
            test_examples, test_max_support_size, test_label2id, test_id2label)
        logger.info(' Finish prepare test feature')
        if opt.save_feature:
            save_feature(opt.test_path.replace('.json', '.saved.pk'),
                         test_features, test_label2id, test_id2label, test_trans_mat)
    return test_features, test_label2id, test_id2label, test_trans_mat
示例#4
0
def get_testing_data_feature(opt, data_loader, preprocessor):
    """ prepare feature and data """
    if opt.load_feature:
        try:
            test_features, (test_slot_label2id, test_slot_id2label), (test_intent_label2id, test_intent_id2label) = \
                load_feature(opt.test_path.replace('.json', '.saved.pk'))
        except FileNotFoundError:
            opt.load_feature, opt.save_feature = False, True  # Not a saved feature file yet, make it
            test_features, (test_slot_label2id, test_slot_id2label), (test_intent_label2id, test_intent_id2label) = \
                get_testing_data_feature(opt, data_loader, preprocessor)
            opt.load_feature, opt.save_feature = True, False  # restore option
    else:
        test_examples, test_max_len, test_max_support_size = data_loader.load_data(
            path=opt.test_path)
        (test_slot_label2id, test_slot_id2label), (test_intent_label2id, test_intent_id2label) = \
            make_dict(opt, test_examples)
        logger.info(' Finish prepare test dict')
        test_features = preprocessor.construct_feature(
            test_examples, test_max_support_size, test_slot_label2id,
            test_slot_id2label, test_intent_label2id, test_intent_id2label)
        logger.info(' Finish prepare test feature')
        if opt.do_debug:
            print('test_slot_label2id: {}'.format(test_slot_label2id))
            print('test_slot_id2label: {}'.format(test_slot_id2label))
        if opt.save_feature:
            save_feature(opt.test_path.replace('.json', '.saved.pk'),
                         test_features, test_slot_label2id, test_slot_id2label,
                         test_intent_label2id, test_intent_id2label)
    return test_features, (test_slot_label2id,
                           test_slot_id2label), (test_intent_label2id,
                                                 test_intent_id2label)