Пример #1
0
def run_experiment(treebank_name,outdir=None,dep_style="ud", pos_style='ud',
                   metric='LAS'):
    #TODO: have options for what goes in table
    if not outdir: outdir= config.exp + treebank_name + "/"
    TM = TreebankTransformer(treebank_name=treebank_name, dep_style=dep_style, pos_style=pos_style)
    TM.transform_parse_detransform() #if you just want the eval you can just comment this out

    """FILES"""
    test_gold = TM.testfile
    parsed_baseline = outdir +  'dev_parsed_baseline.conll'
    parsed_ud = TM.parsed_ud
    #if you just eval comment the one up and uncomment the one down
    #parsed_ud = outdir + 'dev_parsed.ud.conll'

    """RESULTS"""
    buas, blas= malteval.accuracy(test_gold,parsed_baseline)
    uas, las = malteval.accuracy(test_gold,parsed_ud)
    output = ""
    if metric =="LAS":
        las = str(float(las)*100)
        blas = str(float(blas)*100)
        #significance of las
        sig = malteval.significance(test_gold, parsed_baseline, parsed_ud)
        las += sig
        output = "%s;%s;%s\n"%(treebank_name, blas, las)
    else:
        uas = str(float(uas)*100)
        buas = str(float(buas)*100)
        sig = malteval.significance_uas(test_gold, parsed_baseline, parsed_ud)
        uas += sig
        output = "%s;%s;%s\n"%(treebank_name, buas, uas)
    return output
Пример #2
0
def evaluate_back_transformation_accuracy(treebank_name,outdir=None):
    if not outdir: outdir= config.exp + treebank_name + "/"
    TM = TreebankTransformer(treebank_name=treebank_name)
    TM.transform_detransform_trainfile()
    train_gold = TM.trainfile
    train_backtransf = TM.back_transf
    accuracy_of_back_transf = malteval.accuracy(train_gold,train_backtransf)[0]
    accuracy_of_back_transf = str(float(accuracy_of_back_transf)*100)
    output = "%s;%s\n"%(treebank_name,accuracy_of_back_transf)
    return output
Пример #3
0
def run_stats_cop(treebank_name,outdir=None,trainfile=None, testfile=None,dep_style="ud"):
    if not outdir: outdir= config.exp + treebank_name
    TM = TreebankTransformer(treebank_name=treebank_name, dep_style=dep_style)
    #replace train and test files if they are given as arg
    if trainfile: TM.trainfile = trainfile
    if testfile: TM.testfile = testfile
    cop_train, tot_train= TM.count_cop(TM.trainfile)
    cop_test, tot_test= TM.count_cop(TM.testfile)
    tot_cop = cop_train + cop_test
    tot_s = tot_train + tot_test
    cop_freq = (tot_cop/float(tot_s))*100
    output = "%s;%s;%s\n"%(treebank_name, tot_s, cop_freq)
    return output
Пример #4
0
def run_stats(treebank_name,outdir=None,trainfile=None, testfile=None,dep_style="ud"):
    if not outdir: outdir= config.exp + treebank_name
    TM = TreebankTransformer(treebank_name=treebank_name, dep_style=dep_style)
    #replace train and test files if they are given as arg
    if trainfile: TM.trainfile = trainfile
    if testfile: TM.testfile = testfile
    aux_train, tot_train, s_train = TM.count_aux(TM.trainfile)
    aux_test, tot_test, s_test = TM.count_aux(TM.testfile)
    tot_s = s_train + s_test
    tot_aux = aux_train + aux_test
    tot_tokens = tot_train + tot_test
    aux_freq = (tot_aux/float(tot_tokens))*100
    output = "%s;%s;%s;%s\n"%(treebank_name, tot_s, tot_tokens, aux_freq)
    return output
Пример #5
0
def evaluate_on_transformed_gold(treebank_name,outdir=None):
    """Evaluate on the transformed representation as gold standard"""
    if not outdir: outdir= config.exp + treebank_name + "/"
    TM = TreebankTransformer(treebank_name=treebank_name)
    dev_gold_ms = "%sdev_gold.ms.conll"%outdir
    parsed_ms = "%sdev_parsed.ms.conll"%outdir
    parsed_baseline = outdir +  'dev_parsed_baseline.conll'
    baseline_ms = outdir + 'dev_parsed_baseline.ms.conll'
    TM.transform(TM.testfile, dev_gold_ms, "transform")
    TM.transform(parsed_baseline, baseline_ms, "transform")
    uas, las = malteval.accuracy(dev_gold_ms,parsed_ms)
    buas, blas = malteval.accuracy(dev_gold_ms,baseline_ms)
    las = str(float(las)*100)
    blas = str(float(blas)*100)
    output = "%s;%s;%s\n"%(treebank_name, las, blas)
    return output
Пример #6
0
def prepare_files(treebank_name, outdir=None, trainfile=None, testfile=None,
				  ambig_type=None, dep_style='ud', pos_style='ud'):
	if not outdir: outdir= config.exp + treebank_name
	if not os.path.exists(outdir): os.mkdir(outdir)
	if not trainfile and not testfile :
		tb = UDtreebank(treebank_name)
		trainfile = tb.trainfile
		testfile = tb.devfile
	TM = TreebankTransformer(treebank_name=treebank_name, dep_style=dep_style,
							 pos_style=pos_style)

	TM.transform(trainfile, TM.trainfile, 'to_conllx')
	TM.transform(testfile, TM.testfile, 'to_conllx')

	#experiments about ambiguity
	if ambig_type:
		TM.transform(TM.trainfile, TM.trainfile, ambig_type)
		TM.transform(TM.testfile, TM.testfile, ambig_type)
Пример #7
0
#!/usr/bin/env python
#==============================================================================
#author         :Miryam de Lhoneux
#email          :[email protected]
#date           :2015/12/30
#version        :1.0
#description    :Perform a transformation on a file
#usage          :python transform_file.py infile outfile [transform|detransform|to_conllx]
#Python version :2.7.6
#==============================================================================


import sys
from src.treebank_transformer import TreebankTransformer

if __name__=="__main__":
    infile = sys.argv[1]
    out = sys.argv[2]
    change = sys.argv[3]
    #dep_style = 'pdt'
    dep_style = 'ud'
    TM = TreebankTransformer(dep_style=dep_style)
    TM.transform(infile,out,change)
Пример #8
0
from src.treebank_transformer import TreebankTransformer
from matplotlib import pyplot as plt
import sys

def plot_f_dict(d, filename):
    plt.figure()
    plt.bar(range(len(d)), d.values(), align="center")
    plt.xticks(range(len(d)), list(d.keys()), rotation='vertical')
    plt.subplots_adjust(bottom=0.15)
    plt.savefig(filename)

if __name__=="__main__":
    treebank_name = sys.argv[1]
    dep_style = "ud"
    if len(sys.argv) > 2:
        dep_style = sys.argv[2]
    TT = TreebankTransformer(treebank_name,dep_style=dep_style)
    main_verb_pos, aux_pos = TT.collect_vg_postags(TT.trainfile)
    plot_f_dict(main_verb_pos, "main_verb_%s.png"%treebank_name)
    plot_f_dict(aux_pos, "aux_pos_%s.png"%treebank_name)