def evaluate_on_transformed_gold(treebank_name,outdir=None): """Evaluate on the transformed representation as gold standard""" if not outdir: outdir= config.exp + treebank_name + "/" TM = TreebankTransformer(treebank_name=treebank_name) dev_gold_ms = "%sdev_gold.ms.conll"%outdir parsed_ms = "%sdev_parsed.ms.conll"%outdir parsed_baseline = outdir + 'dev_parsed_baseline.conll' baseline_ms = outdir + 'dev_parsed_baseline.ms.conll' TM.transform(TM.testfile, dev_gold_ms, "transform") TM.transform(parsed_baseline, baseline_ms, "transform") uas, las = malteval.accuracy(dev_gold_ms,parsed_ms) buas, blas = malteval.accuracy(dev_gold_ms,baseline_ms) las = str(float(las)*100) blas = str(float(blas)*100) output = "%s;%s;%s\n"%(treebank_name, las, blas) return output
def prepare_files(treebank_name, outdir=None, trainfile=None, testfile=None, ambig_type=None, dep_style='ud', pos_style='ud'): if not outdir: outdir= config.exp + treebank_name if not os.path.exists(outdir): os.mkdir(outdir) if not trainfile and not testfile : tb = UDtreebank(treebank_name) trainfile = tb.trainfile testfile = tb.devfile TM = TreebankTransformer(treebank_name=treebank_name, dep_style=dep_style, pos_style=pos_style) TM.transform(trainfile, TM.trainfile, 'to_conllx') TM.transform(testfile, TM.testfile, 'to_conllx') #experiments about ambiguity if ambig_type: TM.transform(TM.trainfile, TM.trainfile, ambig_type) TM.transform(TM.testfile, TM.testfile, ambig_type)
#!/usr/bin/env python #============================================================================== #author :Miryam de Lhoneux #email :[email protected] #date :2015/12/30 #version :1.0 #description :Perform a transformation on a file #usage :python transform_file.py infile outfile [transform|detransform|to_conllx] #Python version :2.7.6 #============================================================================== import sys from src.treebank_transformer import TreebankTransformer if __name__=="__main__": infile = sys.argv[1] out = sys.argv[2] change = sys.argv[3] #dep_style = 'pdt' dep_style = 'ud' TM = TreebankTransformer(dep_style=dep_style) TM.transform(infile,out,change)