Exemplo n.º 1
0
def augment(features, source_path, input_nbest_path, output_nbest_path):
    ''' Function to augment the n-best list with a feature function
     :param feature: The feature function object
     :param source_path: Path to the original source sentences (maybe required for the feature function)
     :param input_nbest_path: Path to the n-best file
     :param output_nbest_path: Path to the output n-best file
    '''
    # Initialize NBestList objects
    logger.info('Initializing Nbest lists')
    input_nbest = NBestList(input_nbest_path, mode='r')
    output_nbest = NBestList(output_nbest_path, mode='w')

    # Load the source sentences
    logger.info('Loading source sentences')
    src_sents = codecs.open(source_path, mode='r', encoding='UTF-8')

    # For each of the item in the n-best list, append the feature
    sent_count = 0
    for group, src_sent in zip(input_nbest, src_sents):
        candidate_count = 0
        for item in group:
            for feature in features:
                item.append_feature(
                    feature.name,
                    feature.get_score(src_sent, item.hyp,
                                      (sent_count, candidate_count)))
            output_nbest.write(item)
            candidate_count += 1
        sent_count += 1
        if (sent_count % 100 == 0):
            logger.info('Augmented ' + L.b_yellow(str(sent_count)) +
                        ' sentences.')
    output_nbest.close()
Exemplo n.º 2
0
counter = 0
for group in input_aug_nbest:
    index = 0
    scores = dict()
    for item in group:
        features = np.asarray(
            [x for x in item.features.split() if is_number(x)], dtype=float)
        try:
            scores[index] = np.dot(features, weights)
        except ValueError:
            logger.error(
                'Number of features in the nbest and the weights file are not the same'
            )
        index += 1
    sorted_indices = sorted(scores, key=scores.get, reverse=True)
    for idx in sorted_indices:
        output_nbest.write(group[idx])
    output_1best.write(group[sorted_indices[0]].hyp + "\n")
    counter += 1
    if counter % 100 == 0:
        logger.info(L.b_yellow(str(counter)) + " groups processed")
        logger.info("%i groups processed" % (counter))
logger.info("Finished processing %i groups" % (counter))
logger.info(L.green('Reranking completed.'))
output_nbest.close()
output_1best.close()

if args.clean_up:
    os.remove(output_nbest_path)
Exemplo n.º 3
0
parser.add_argument("-s",
                    "--predictable-seed",
                    dest="pred_seed",
                    action='store_true',
                    help="Tune with predictable seed to avoid randomness")
parser.add_argument("--moses-dir",
                    dest="moses_dir",
                    required=True,
                    help="Path to Moses. Required for tuning scripts")
args = parser.parse_args()

fscore_arg = ""
if args.metric == 'm2':
    fscore_arg = " --sctype M2SCORER --scconfig ignore_whitespace_casing:true "
    logger.info("Using M2 Tuning")
    logger.info(L.b_yellow('Arguments: ') + fscore_arg)

if not os.path.exists(args.out_dir):
    os.makedirs(args.out_dir)

L.set_logger(os.path.abspath(args.out_dir), 'train_log.txt')
L.print_args(args)

logger.info("Reading weights from config file")
features = configreader.parse_ini(args.input_config)
logger.info("Feature weights: " + str(features))

output_nbest_path = args.out_dir + '/augmented.nbest'
shutil.copy(args.input_nbest, output_nbest_path)

logger.info('Extracting stats and features')