Пример #1
0
def evaluate(input_sample, gt_grams):
    # char level evaluation
    sample_lines = load_sample(input_sample, tokenize=False)
    u_samples, b_samples, t_samples, q_samples = get_grams(sample_lines)
    u_real, b_real, t_real, q_real = gt_grams
    print "Unigrams: %f" % percentage_real(u_samples, u_real)
    print "Bigrams: %f" % percentage_real(b_samples, b_real)
    print "Trigrams: %f" % percentage_real(t_samples, t_real)
    print "Quad grams: %f" % percentage_real(q_samples, q_real)
Пример #2
0
def get_gt_grams_cached(lines, dataset='training'):
    grams_filename = 'true-char-ngrams.pkl'
    if dataset == 'heldout':
        grams_filename = 'heldout_' + grams_filename
    grams_filename = FLAGS.PICKLE_PATH + '/' + grams_filename
    if os.path.exists(grams_filename):
        return model_and_data_serialization.load_picklized(grams_filename)
    else:
        grams = get_grams(lines)
        model_and_data_serialization.save_picklized(grams, grams_filename)
        return grams