示例#1
0
def main():
    args = parse_args()
    config = configparser.ConfigParser()
    """ARGS DETAIL"""
    config_file = args.config_file
    batch_size = args.batch
    n_epoch = args.epoch
    pretrain_epoch = args.pretrain_epoch
    gpu_id = args.gpu
    model_type = args.model
    pretrain_w2v = args.pretrain_w2v
    data_path = args.data_path
    load_model = args.load_model
    """DIR PREPARE"""
    config.read(config_file)
    vocab_size = int(config['Parameter']['vocab_size'])
    coefficient = float(config['Parameter']['coefficient'])
    shuffle_data = bool(config['Parameter']['shuffle'])

    if pretrain_w2v:
        vocab_size = 'p' + str(vocab_size)

    if model_type == 'multi':
        if shuffle_data:
            base_dir = './pseudo_{}_{}_{}_c{}_shuffle/'.format(
                model_type, vocab_size, data_path[0], coefficient)
        else:
            base_dir = './pseudo_{}_{}_{}_c{}/'.format(model_type, vocab_size,
                                                       data_path[0],
                                                       coefficient)
    else:
        if shuffle_data:
            base_dir = './pseudo_{}_{}_{}_shuffle/'.format(
                model_type, vocab_size, data_path[0])
        else:
            base_dir = './pseudo_{}_{}_{}/'.format(model_type, vocab_size,
                                                   data_path[0])
    model_save_dir = base_dir

    if not os.path.exists(base_dir):
        os.mkdir(base_dir)
        shutil.copyfile(config_file, base_dir + config_file)
    config_file = base_dir + config_file
    config.read(config_file)
    """PARAMATER"""
    embed_size = int(config['Parameter']['embed_size'])
    hidden_size = int(config['Parameter']['hidden_size'])
    class_size = int(config['Parameter']['class_size'])
    dropout_ratio = float(config['Parameter']['dropout'])
    weight_decay = float(config['Parameter']['weight_decay'])
    gradclip = float(config['Parameter']['gradclip'])
    vocab_size = int(config['Parameter']['vocab_size'])
    valid_num = int(config['Parameter']['valid_num'])
    shuffle_data = bool(config['Parameter']['shuffle'])
    """LOGGER"""
    log_file = model_save_dir + 'log.txt'
    logger = dataset.prepare_logger(log_file)
    logger.info(args)  # 引数を記録
    logger.info('[Training start] logging to {}'.format(log_file))
    """DATASET"""
    train_src_file = config[data_path]['train_src_file']
    train_trg_file = config[data_path]['train_trg_file']
    valid_src_file = config[data_path]['valid_src_file']
    valid_trg_file = config[data_path]['valid_trg_file']
    test_src_file = config[data_path]['single_src_file']
    test_trg_file = config[data_path]['single_trg_file']
    src_w2v_file = config[data_path]['src_w2v_file']
    trg_w2v_file = config[data_path]['trg_w2v_file']

    train_data = dataset.load_label_corpus_file(train_src_file, train_trg_file)
    qa_data_sub_lit = dataset.split_valid_data(train_data, valid_num)
    valid_data = dataset.load_label_corpus_file(valid_src_file, valid_trg_file)
    test_data = dataset.load_label_corpus_file(test_src_file, test_trg_file)
    test_data_sub_lit = dataset.split_valid_data(test_data, valid_num)
    """VOCABULARY"""
    src_vocab, trg_vocab, sos, eos = dataset.prepare_vocab(
        base_dir, train_data, vocab_size, gpu_id)
    src_vocab_size = len(src_vocab.vocab)
    trg_vocab_size = len(trg_vocab.vocab)

    src_initialW, trg_initialW = None, None
    if pretrain_w2v:
        w2v = word2vec.Word2Vec()
        src_initialW, vector_size, src_match_word_count = w2v.make_initialW(
            src_vocab.vocab, src_w2v_file)
        trg_initialW, vector_size, trg_match_word_count = w2v.make_initialW(
            trg_vocab.vocab, trg_w2v_file)
        logger.info(
            'Initialize w2v embedding. Match: src {}/{}, trg {}/{}'.format(
                src_match_word_count, src_vocab_size, trg_match_word_count,
                trg_vocab_size))

    logger.info('src_vocab size: {}, trg_vocab size: {}'.format(
        src_vocab_size, trg_vocab_size))

    evaluater = evaluate.Evaluate()
    """GPU"""
    if gpu_id >= 0:
        logger.info('Use GPU')
        chainer.cuda.get_device_from_id(gpu_id).use()

    cross_valid_result = []
    for ite in range(1, valid_num + 1):
        model_valid_dir = base_dir + 'valid{}/'.format(ite)
        if not os.path.exists(model_valid_dir):
            os.mkdir(model_valid_dir)

        qa_train_data, qa_dev_data, qa_test_data = dataset.separate_train_dev_test(
            qa_data_sub_lit, ite)
        train_data, dev_data, test_data = dataset.separate_train_dev_test(
            test_data_sub_lit, ite)
        test_data_id = [t['id'] for t in test_data]

        qa_iter = dataset.Iterator(qa_train_data,
                                   src_vocab,
                                   trg_vocab,
                                   batch_size,
                                   gpu_id,
                                   sort=True,
                                   shuffle=True)
        valid_iter = dataset.Iterator(valid_data,
                                      src_vocab,
                                      trg_vocab,
                                      batch_size,
                                      gpu_id,
                                      sort=False,
                                      shuffle=False)
        train_iter = dataset.Iterator(train_data,
                                      src_vocab,
                                      trg_vocab,
                                      batch_size,
                                      gpu_id,
                                      sort=True,
                                      shuffle=True)
        dev_iter = dataset.Iterator(dev_data,
                                    src_vocab,
                                    trg_vocab,
                                    batch_size,
                                    gpu_id,
                                    sort=False,
                                    shuffle=False)
        test_iter = dataset.Iterator(test_data,
                                     src_vocab,
                                     trg_vocab,
                                     batch_size,
                                     gpu_id,
                                     sort=False,
                                     shuffle=False)

        qa_size = len(qa_train_data)
        train_size = len(train_data)
        logger.info('V{} ## QA:{}, train:{}, dev:{} ,test:{}'.format(
            ite, qa_size, train_size, len(dev_data), len(test_data)))
        """MODEL"""
        if model_type == 'multi':
            model = model.Multi(src_vocab_size, trg_vocab_size, embed_size,
                                hidden_size, class_size, dropout_ratio,
                                coefficient, src_initialW, trg_initialW)
        elif model_type in ['label', 'pretrain']:
            model = model.Label(src_vocab_size, trg_vocab_size, embed_size,
                                hidden_size, class_size, dropout_ratio,
                                src_initialW, trg_initialW)
        else:
            model = model.EncoderDecoder(src_vocab_size, trg_vocab_size,
                                         embed_size, hidden_size,
                                         dropout_ratio, src_initialW,
                                         trg_initialW)

        if gpu_id >= 0:
            model.to_gpu()
        """OPTIMIZER"""
        optimizer = chainer.optimizers.Adam()
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.GradientClipping(gradclip))
        optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))
        """PRETRAIN"""
        if model_type == 'pretrain' and load_model is None:
            logger.info('Pre-train start')
            pretrain_loss_dic = {}
            for epoch in range(1, pretrain_epoch + 1):
                train_loss = 0
                for i, batch in enumerate(train_iter.generate(), start=1):
                    try:
                        loss = model.pretrain(*batch)
                        train_loss += loss.data
                        optimizer.target.cleargrads()
                        loss.backward()
                        optimizer.update()

                    except Exception as e:
                        logger.info('P{} ## train iter: {}, {}'.format(
                            epoch, i, e))
                chainer.serializers.save_npz(
                    model_save_dir + 'p_model_epoch_{}.npz'.format(epoch),
                    model)
                """EVALUATE"""
                valid_loss = 0
                for batch in valid_iter.generate():
                    with chainer.no_backprop_mode(), chainer.using_config(
                            'train', False):
                        valid_loss += model.pretrain(*batch).data
                logger.info('P{} ## train loss: {}, val loss:{}'.format(
                    epoch, train_loss, valid_loss))
                pretrain_loss_dic[epoch] = valid_loss
            """MODEL SAVE & LOAD"""
            best_epoch = min(pretrain_loss_dic,
                             key=(lambda x: pretrain_loss_dic[x]))
            logger.info('best_epoch:{}, val loss: {}'.format(
                best_epoch, pretrain_loss_dic[best_epoch]))
            shutil.copyfile(
                model_save_dir + 'p_model_epoch_{}.npz'.format(best_epoch),
                model_save_dir + 'p_best_model.npz')
            logger.info('Pre-train finish')

        if load_model:
            logger.info('load model: {}'.format(load_model))
            chainer.serializers.load_npz(base_dir + load_model, model)
        """TRAIN"""
        epoch_info = {}
        for epoch in range(1, n_epoch + 1):
            train_loss = 0
            mix_train_iter = dataset.MixIterator(qa_iter,
                                                 train_iter,
                                                 seed=0,
                                                 shuffle=shuffle_data)
            for i, batch in enumerate(mix_train_iter.generate(), start=1):
                try:
                    loss = optimizer.target(*batch[0])
                    train_loss += loss.data
                    optimizer.target.cleargrads()
                    loss.backward()
                    optimizer.update()

                except Exception as e:
                    logger.info('V{} ## E{} ## train iter: {}, {}'.format(
                        ite, epoch, i, e))
            chainer.serializers.save_npz(
                model_valid_dir + 'model_epoch_{}.npz'.format(epoch), model)
            """DEV"""
            labels, alignments = [], []
            for i, batch in enumerate(dev_iter.generate(), start=1):
                try:
                    with chainer.no_backprop_mode(), chainer.using_config(
                            'train', False):
                        _, label, align = model.predict(batch[0], sos, eos)
                except Exception as e:
                    logger.info('V{} ## E{} ## dev iter: {}, {}'.format(
                        ite, epoch, i, e))

                if model_type == 'multi':
                    for l, a in zip(label, align):
                        labels.append(chainer.cuda.to_cpu(l))
                        alignments.append(chainer.cuda.to_cpu(a))
                elif model_type in ['label', 'pretrain']:
                    for l in label:
                        labels.append(chainer.cuda.to_cpu(l))
                else:
                    for a in align:
                        alignments.append(chainer.cuda.to_cpu(a))

            best_param_dic = evaluater.param_search(labels, alignments,
                                                    dev_data)
            param = max(best_param_dic,
                        key=lambda x: best_param_dic[x]['macro'])
            init, mix = evaluate.key_to_param(param)
            dev_score = round(best_param_dic[param]['macro'], 3)
            """TEST"""
            outputs, labels, alignments = [], [], []
            for i, batch in enumerate(test_iter.generate(), start=1):
                try:
                    with chainer.no_backprop_mode(), chainer.using_config(
                            'train', False):
                        output, label, align = model.predict(
                            batch[0], sos, eos)
                except Exception as e:
                    logger.info('V{} ## E{} ## test iter: {}, {}'.format(
                        ite, epoch, i, e))

                if model_type == 'multi':
                    for l, a in zip(label, align):
                        labels.append(chainer.cuda.to_cpu(l))
                        alignments.append(chainer.cuda.to_cpu(a))
                elif model_type in ['label', 'pretrain']:
                    for l in label:
                        labels.append(chainer.cuda.to_cpu(l))
                else:
                    for a in align:
                        alignments.append(chainer.cuda.to_cpu(a))

            rate, count, tf_lit, macro, micro = evaluater.eval_param(
                labels, alignments, test_data, init, mix)
            test_macro_score = round(macro, 3)
            test_micro_score = round(micro, 3)
            logger.info(
                'V{} ## E{} ## loss: {}, dev: {}, param: {}, micro: {}, macro: {}'
                .format(ite, epoch, train_loss, dev_score, param,
                        test_micro_score, test_macro_score))

            epoch_info[epoch] = {
                'id': test_data_id,
                'label': labels,
                'align': alignments,
                'hypo': outputs,
                'epoch': epoch,
                'dev_score': dev_score,
                'param': param,
                'rate': rate,
                'count': count,
                'tf': tf_lit,
                'macro': test_macro_score,
                'micro': test_micro_score
            }
            dataset.save_output(model_valid_dir, epoch_info[epoch])
        """MODEL SAVE"""
        best_epoch = max(epoch_info,
                         key=(lambda x: epoch_info[x]['dev_score']))
        cross_valid_result.append(epoch_info[best_epoch])
        logger.info(
            'V{} ## best_epoch: {}, dev: {}, micro: {}, macro: {}'.format(
                ite, best_epoch, epoch_info[best_epoch]['dev_score'],
                epoch_info[best_epoch]['micro'],
                epoch_info[best_epoch]['macro']))
        shutil.copyfile(
            model_valid_dir + 'model_epoch_{}.npz'.format(best_epoch),
            model_valid_dir + 'best_model.npz')

        logger.info('')

    ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0
    ave_test_score = [0 for _ in range(len(cross_valid_result[0]['rate']))]
    id_total, label_total, align_total, tf_total = [], [], [], []

    for v, r in enumerate(cross_valid_result, start=1):
        ave_dev_score += r['dev_score']
        ave_macro_score += r['macro']
        ave_micro_score += r['micro']
        for i, rate in enumerate(r['rate']):
            ave_test_score[i] += rate
        logger.info('   {}: e{}, {}\tdev: {}, micro: {}, macro: {} {}'.format(
            v, r['epoch'], r['param'], r['dev_score'], r['micro'],
            dataset.float_to_str(r['rate']), r['macro']))

        id_total.extend(r['id'])
        label_total.extend(r['label'])
        align_total.extend(r['align'])
        tf_total.extend(r['tf'])
    ave_dev_score = round(ave_dev_score / valid_num, 3)
    ave_macro_score = round(ave_macro_score / valid_num, 3)
    ave_micro_score = round(ave_micro_score / valid_num, 3)
    ave_test_score = [
        ave_test_score[i] / valid_num for i in range(len(ave_test_score))
    ]
    logger.info('dev: {}, micro: {}, macro: {} {}'.format(
        ave_dev_score, ave_micro_score, dataset.float_to_str(ave_test_score),
        ave_macro_score))

    label, align, tf = dataset.sort_multi_list(id_total, label_total,
                                               align_total, tf_total)
    dataset.save_list(base_dir + 'label.txt', label)
    dataset.save_list(base_dir + 'align.txt', align)
    dataset.save_list(base_dir + 'tf.txt', tf)
示例#2
0
def main():
    """
    model1: label
    model2: encdec を指定する
    """
    args = parse_args()
    model_name1 = args.label_model
    model_dir1 = re.search(r'^(.*/)', model_name1).group(1)

    model_name2 = args.encdec_model
    model_dir2 = re.search(r'^(.*/)', model_name2).group(1)

    valid_type = args.valid

    # 結果保存用ディレクトリ作成
    output_dir = model_dir1 + model_dir2
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    # 評価データ準備
    config = configparser.ConfigParser()
    config_files = glob.glob(os.path.join(model_dir1, '*.ini'))
    config.read(config_files[0])
    valid_num = int(config['Parameter']['valid_num'])
    test_src_file = config['server']['single_src_file']
    test_trg_file = config['server']['single_trg_file']
    data = dataset.load_label_corpus_file(test_src_file, test_trg_file)
    data_sub_lit = dataset.split_valid_data(data, valid_num)

    evaluater = evaluate.Evaluate()

    result_dic = {}
    # validファイルに分割されている時

    if valid_type == 'TT':
        """
        model1: validファイルあり
        model2: validファイルあり
        """
        model_file_num = len(
            glob.glob(os.path.join(model_dir1, 'valid1/model_epoch_*.npz')))

        label_dic = {}
        align_dic = {}
        for i in range(1, model_file_num + 1):
            label_dic[i] = []
            align_dic[i] = []
            for valid in [2, 3, 4, 5, 1]:
                label, _ = dataset.load_score_file(
                    model_dir1 + 'valid{}/model_epoch_{}'.format(valid, i))
                label_dic[i].append(label)
                _, align = dataset.load_score_file(
                    model_dir2 + 'valid{}/model_epoch_{}'.format(valid, i))
                align_dic[i].append(align)

        order = {1: [4, 5], 2: [5, 1], 3: [1, 2], 4: [2, 3], 5: [3, 4]}

        for i in tqdm(range(1, model_file_num + 1)):
            for j in range(1, model_file_num + 1):
                info = []
                for ite, v in order.items():
                    _, dev_data, test_data = dataset.separate_train_dev_test(
                        data_sub_lit, ite)
                    dev_label = label_dic[i][v[0] - 1]
                    test_label = label_dic[i][v[1] - 1]

                    dev_align = align_dic[j][v[0] - 1]
                    test_align = align_dic[j][v[1] - 1]

                    best_param_dic = evaluater.param_search(
                        dev_label, dev_align, dev_data)
                    param = max(best_param_dic,
                                key=lambda x: best_param_dic[x]['macro'])
                    init, mix = evaluate.key_to_param(param)
                    dev_score = round(best_param_dic[param]['macro'], 3)

                    rate, count, tf_lit, macro, micro = evaluater.eval_param(
                        test_label, test_align, test_data, init, mix)
                    test_macro_score = round(macro, 3)
                    test_micro_score = round(micro, 3)
                    info.append({
                        'dev_score': dev_score,
                        'param': param,
                        'macro': test_macro_score,
                        'micro': test_micro_score,
                        'tf': tf_lit
                    })

                ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0
                param = []
                tf_lit = []

                for v, r in enumerate(info, start=1):
                    ave_dev_score += r['dev_score']
                    ave_macro_score += r['macro']
                    ave_micro_score += r['micro']
                    param.append(r['param'])
                    tf_lit.extend(r['tf'])

                ave_dev_score = round(ave_dev_score / valid_num, 3)
                ave_macro_score = round(ave_macro_score / valid_num, 3)
                ave_micro_score = round(ave_micro_score / valid_num, 3)

                key = 'label{}_enc{}'.format(i, j)
                result_dic[key] = {
                    'dev': ave_dev_score,
                    'micro': ave_micro_score,
                    'macro': ave_macro_score,
                    'param': ' '.join(param),
                    'tf': tf_lit
                }

        best_score = max(result_dic, key=lambda x: result_dic[x]['dev'])
        with open(output_dir + 'merge.txt', 'w') as f:
            [
                f.write('{}: {}\n'.format(k, v))
                for k, v in sorted(result_dic.items())
            ]
            f.write('best score\n{}: {}\n'.format(best_score,
                                                  result_dic[best_score]))
        with open(output_dir + 'tf.txt', 'w') as f:
            [f.write(r + '\n') for r in result_dic[best_score]['tf']]

    elif valid_type == 'FF':
        """
        model1: validファイルなし
        model2: validファイルなし
        """
        model_file_num = len(
            glob.glob(os.path.join(model_dir1, 'model_epoch_*.npz')))
        for i in tqdm(range(1, model_file_num + 1)):
            label, _ = dataset.load_score_file(model_dir1 +
                                               'model_epoch_{}'.format(i))
            label_sub_lit = dataset.split_valid_data(label, valid_num)
            for j in range(1, model_file_num + 1):
                _, align = dataset.load_score_file(model_dir2 +
                                                   'model_epoch_{}'.format(j))
                align_sub_lit = dataset.split_valid_data(align, valid_num)
                info = []
                for ite in range(1, valid_num + 1):
                    _, dev_data, test_data = dataset.separate_train_dev_test(
                        data_sub_lit, ite)
                    _, dev_label, test_label = dataset.separate_train_dev_test(
                        label_sub_lit, ite)
                    _, dev_align, test_align = dataset.separate_train_dev_test(
                        align_sub_lit, ite)

                    best_param_dic = evaluater.param_search(
                        dev_label, dev_align, dev_data)
                    param = max(best_param_dic,
                                key=lambda x: best_param_dic[x]['macro'])
                    init, mix = evaluate.key_to_param(param)
                    dev_score = round(best_param_dic[param]['macro'], 3)

                    rate, count, tf_lit, macro, micro = evaluater.eval_param(
                        test_label, test_align, test_data, init, mix)
                    test_macro_score = round(macro, 3)
                    test_micro_score = round(micro, 3)
                    info.append({
                        'dev_score': dev_score,
                        'param': param,
                        'macro': test_macro_score,
                        'micro': test_micro_score,
                        'tf': tf_lit
                    })

                ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0
                param = []

                for v, r in enumerate(info, start=1):
                    ave_dev_score += r['dev_score']
                    ave_macro_score += r['macro']
                    ave_micro_score += r['micro']
                    param.append(r['param'])
                    tf_lit.extend(r['tf'])

                ave_dev_score = round(ave_dev_score / valid_num, 3)
                ave_macro_score = round(ave_macro_score / valid_num, 3)
                ave_micro_score = round(ave_micro_score / valid_num, 3)

                key = 'label{}_enc{}'.format(i, j)
                result_dic[key] = {
                    'dev': ave_dev_score,
                    'micro': ave_micro_score,
                    'macro': ave_macro_score,
                    'param': ' '.join(param),
                    'tf': tf_lit
                }

        best_score = max(result_dic, key=lambda x: result_dic[x]['dev'])
        with open(output_dir + 'merge.txt', 'w') as f:
            [
                f.write('{}: {}\n'.format(k, v))
                for k, v in sorted(result_dic.items())
            ]
            f.write('best score\n{}: {}\n'.format(best_score,
                                                  result_dic[best_score]))
        with open(output_dir + 'tf.txt', 'w') as f:
            [f.write(r + '\n') for r in result_dic[best_score]['tf']]

    elif valid_type == 'TF':
        """
        model1: validファイルなし
        model2: validファイルなし
        """
        model_file_num = len(
            glob.glob(os.path.join(model_dir1, 'valid1/model_epoch_*.npz')))

        label_dic = {}
        for i in range(1, model_file_num + 1):
            label_dic[i] = []
            for valid in [2, 3, 4, 5, 1]:
                label, _ = dataset.load_score_file(
                    model_dir1 + 'valid{}/model_epoch_{}'.format(valid, i))
                label_dic[i].append(label)

        for j in range(1, model_file_num + 1):
            _, align = dataset.load_score_file(model_dir2 +
                                               'model_epoch_{}'.format(j))
            align_sub_lit = dataset.split_valid_data(align, valid_num)

        # 5-fold crossvalidationでvalid, testのインデックスを指定している
        # 1: [4, 5]は1回目のテストでは4番目のデータをvalidation用,5番目のデータをテストで使用する
        order = {1: [4, 5], 2: [5, 1], 3: [1, 2], 4: [2, 3], 5: [3, 4]}

        for i in tqdm(range(1, model_file_num + 1)):
            for j in range(1, model_file_num + 1):
                info = []
                for ite, v in order.items():
                    _, dev_data, test_data = dataset.separate_train_dev_test(
                        data_sub_lit, ite)
                    dev_label = label_dic[i][v[0] - 1]
                    test_label = label_dic[i][v[1] - 1]

                    _, dev_align, test_align = dataset.separate_train_dev_test(
                        align_sub_lit, ite)

                    best_param_dic = evaluater.param_search(
                        dev_label, dev_align, dev_data)
                    param = max(best_param_dic,
                                key=lambda x: best_param_dic[x]['macro'])
                    init, mix = evaluate.key_to_param(param)
                    dev_score = round(best_param_dic[param]['macro'], 3)

                    rate, count, tf_lit, macro, micro = evaluater.eval_param(
                        test_label, test_align, test_data, init, mix)
                    test_macro_score = round(macro, 3)
                    test_micro_score = round(micro, 3)
                    info.append({
                        'dev_score': dev_score,
                        'param': param,
                        'macro': test_macro_score,
                        'micro': test_micro_score,
                        'tf': tf_lit
                    })

                ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0
                param = []
                tf_lit = []

                for v, r in enumerate(info, start=1):
                    ave_dev_score += r['dev_score']
                    ave_macro_score += r['macro']
                    ave_micro_score += r['micro']
                    param.append(r['param'])
                    tf_lit.extend(r['tf'])

                ave_dev_score = round(ave_dev_score / valid_num, 3)
                ave_macro_score = round(ave_macro_score / valid_num, 3)
                ave_micro_score = round(ave_micro_score / valid_num, 3)

                key = 'label{}_enc{}'.format(i, j)
                result_dic[key] = {
                    'dev': ave_dev_score,
                    'micro': ave_micro_score,
                    'macro': ave_macro_score,
                    'param': ' '.join(param),
                    'tf': tf_lit
                }

        best_score = max(result_dic, key=lambda x: result_dic[x]['dev'])
        with open(output_dir + 'merge.txt', 'w') as f:
            [
                f.write('{}: {}\n'.format(k, v))
                for k, v in sorted(result_dic.items())
            ]
            f.write('best score\n{}: {}\n'.format(best_score,
                                                  result_dic[best_score]))
        with open(output_dir + 'tf.txt', 'w') as f:
            [f.write(r + '\n') for r in result_dic[best_score]['tf']]