def auto_align_list(filesSource, filesTarget, lfAlignerSh='../LF_aligner_3.11.sh', lang1='de', lang2='de'): if len(filesSource) != len(filesTarget): raise ValueError('Numbers of source and target files are not equal') for kk in range(len(filesSource)): Aligner.call_lf_aligner_auto(filesSource[kk], filesTarget[kk], lfAlignerSh, lang1, lang2)
def auto_align_list_webcgi(filesSource, filesTarget, lfAlignerSh='./LF_aligner_3.11.sh', lang1='de', lang2='de'): """This copy of function is to cope with the web version using cgi-bin Note the change of path to LF_aligner """ if len(filesSource) != len(filesTarget): raise ValueError('Numbers of source and target files are not equal') for kk in range(len(filesSource)): Aligner.call_lf_aligner_auto(filesSource[kk], filesTarget[kk], lfAlignerSh, lang1, lang2)
def alignerSpark(dict,genome, hashDF, sc, dict_map): k = 10 for i in dict_map.keys(): print ("• Allineamento sequenza n°", i) reDF = Seeds.Sparkseeds(dict, i, k, hashDF, sc) reDF = reDF.withColumn('ex', F.explode('POS_GEN')) reDF = reDF.withColumn('Flag', F.when((F.col('ex') < dict_map[i][1]) & (F.col("ex") > dict_map[i][0] ), 1).otherwise(0)) reDF = reDF.filter(reDF.Flag == 1).select(reDF.NUM_SEQ, reDF.ID_SEQ, reDF.POS_SEQ, reDF.POS_GEN, reDF.Flag) if reDF.count() >= 3: # # print("0 per Allineamento locale") # # print("1 per Allineamento globale") # # scelta = int(input("Scelta tipologia di allineamento: ")) seedArray = [x["POS_SEQ"] for x in reDF.rdd.collect()] # print("SeedArray finale:", seedArray) PG = [x["POS_GEN"] for x in reDF.rdd.collect()] optloc = None df,min_percentage = best_choice(dict, i, PG, seedArray, genome,sc) Gen = [x["GEN"] for x in df.rdd.collect()] for gen in Gen: D, B = Aligner.createB(dict[i], gen) if ((100-min_percentage[0])<60.0): #if scelta == 0: A,optloc = Aligner.local_align(dict[i], gen, Aligner.ScoreParam()) # Smith-Waterman bt = Aligner.backtrack(B, optloc, A) else: M = Aligner.affine_align(dict[i], gen, Aligner.ScoreParam()) # Needleman-Wunsch bt = Aligner.backtrack(B, optloc, M) aligned_word_1, aligned_word_2, operations, line = Aligner.align(gen, dict[i], bt) print("Lunghezza sequenze: ", len(dict[i]), "| Numero operazioni: ", len(operations)) alignment_table = [aligned_word_1, line, operations, line, aligned_word_2] print(tb.tabulate(alignment_table, tablefmt="orgtbl")) print() else: print()
def MakeTwoLangCorpus(alignedBooksPath, corpusPath): alignedBooks = [] for alignedBookPath in [ os.path.join(alignedBooksPath, x) for x in os.listdir(alignedBooksPath) ]: alignedBook = Aligner.AlignedMultiText() alignedBook.LoadFromFile(alignedBookPath) alignedBooks.append(alignedBook) corpus = Aligner.MakeAlignedCorpus(alignedBooks) corpus.SaveToFile(corpusPath) return None
def get_weight(training_set): weights = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] features, weights = Aligner.align(training_set.p_str_tokens, training_set.h_str_tokens, weights) print weights
def Activated(self): # do something here... #import kicadStepUptools #reload_lib( kicadStepUptools ) import Caliper reload_lib(Caliper) Caliper.Cp_undock() Caliper.Cp_centerOnScreen(Caliper.CPDockWidget) import Mover reload_lib(Mover) Mover.Mv_undock() Mover.Mv_centerOnScreen(Mover.MVDockWidget) import Aligner reload_lib(Aligner) Aligner.Alg_undock() Aligner.Alg_centerOnScreen (Aligner.ALGDockWidget)
def setUp(self): self.p = "The current documentation emphasizes that Flask is best suited to smaller projects" self.h = "is flask good for large apps" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 4
def setUp(self): self.p = "he ate the hats" self.h = "he ate the pears" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 6
def setUp(self): self.p = '' self.h = '' self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 6
def setUp(self): self.p = "for scripts nginx can also serve as a very capable software load balancer" self.h = "can Nginx be a load balancer?" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 1
def setUp(self): self.p = "However Flask is just not designed for large applications" self.h = "is flask good for large apps" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 4
def main(): #print(len(sys.argv)) if len(sys.argv) == 7 and sys.argv[1] == '-MakeDictionary': firstLang = sys.argv[2] firstLangBiblePath = sys.argv[3] secondLang = sys.argv[4] secondLangBiblePath = sys.argv[5] dictionariesPath = sys.argv[6] MakeDictionary(firstLang, firstLangBiblePath, secondLang, secondLangBiblePath, dictionariesPath) if len(sys.argv) == 3 and sys.argv[1] == '-DownloadLibrary': path = sys.argv[2] DownloadLibrary(path) if len(sys.argv) == 7 and sys.argv[1] == '-AlignBooks': libraryPath = sys.argv[2] firstLang = sys.argv[3] secondLang = sys.argv[4] dictionariesPath = sys.argv[5] alignedBooksPath = sys.argv[6] AlignBooks(libraryPath, firstLang, secondLang, dictionariesPath, alignedBooksPath) if len(sys.argv) == 4 and sys.argv[1] == '-MakeTwoLangCorpus': alignedBooksPath = sys.argv[2] corpusPath = sys.argv[3] MakeTwoLangCorpus(alignedBooksPath, corpusPath) if len(sys.argv) == 1: dictionary1 = Dictionary.Dictionary() dictionary1.LoadFromFile( "C:\\Users\\Bober\\Desktop\\ботва\\8\\Мат СК\\Dictionaries_2\\EN-RU.xml" ) dictionary2 = Dictionary.Dictionary() dictionary2.LoadFromFile( "C:\\Users\\Bober\\Desktop\\ботва\\8\\Мат СК\\Dictionaries_2\\RU-EN.xml" ) aligner = Aligner.Aligner("EN", "RU", dictionary1, dictionary2) enSentence1 = "The emperor of Lilliput, attended by several of the nobility, comes to see the author in his confinement" ruSentence1 = "Император Лилипутии в сопровождении многочисленных вельмож приходит навестить автора в его заключении" print('-----Computing alignment value fo sentences:') print('1. ', enSentence1) print('2. ', ruSentence1) value1 = aligner._getSencencesAlignmentValue(enSentence1, ruSentence1) print(value1) enSentence2 = "I took them all in my right hand, put five of them into my coat-pocket; and as to the sixth, I made a countenance as if I would eat him alive" ruSentence2 = "Его императорское величество часто обращался ко мне с вопросами, на которые я отвечал ему, но ни он, ни я не понимали ни слова из того, что говорили друг другу" print('-----Computing alignment value fo sentences:') print('1. ', enSentence2) print('2. ', ruSentence2) value2 = aligner._getSencencesAlignmentValue(enSentence2, ruSentence2) print(value2)
def setUp(self): #self.p = """ #Highland Park native was overwhelmed by prospect of prison from charges #that he stole MIT articles electronically. #""" #self.h = "the highland park native was overwhelmed." self.p = "the cat ate the tasty pizza" self.h = 'the cat never ate' self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 4
def setUp(self): self.p = """ Highland Park native was overwhelmed by prospect of prison from charges that he stole MIT articles electronically. """ self.h = "the highland park native was overwhelmed." #self.p = "I ate a pizza." #self.h = 'I ate food.' self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 1
def setUp(self): self.answer = { 0: 'Yes', 1: 'Yes', 2: 'No', 3: 'No', 4: 'No', 5: 'No', 6: 'No' } self.p = "Sanger reports that the Stuxnet virus was developed first under President Bush in 2006 under the" self.h = "was stuxnet created under President Bush" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 1
def setUp(self): self.answer = { 0: 'Yes', 1: 'Yes', 2: 'No', 3: 'No', 4: 'No', 5: 'No', 6: 'No' } self.p = "jj Abrams is directing Star Wars: Episode vii" self.h = "is jj abrams directing star wars vii" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 1
def setUp(self): self.answer = { 0: 'Yes', 1: 'Yes', 2: 'No', 3: 'No', 4: 'No', 5: 'No', 6: 'No' } self.p = "Coca Cola Drink who invented Coca Cola?" self.h = "who invented Coca Cola?" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 1
def setUp(self): self.answer = { 0: 'Yes', 1: 'Yes', 2: 'No', 3: 'No', 4: 'No', 5: 'No', 6: 'No' } self.p = "David Gilmour is a guitarist and vocalist with British rock band Pink Floyd, and was voted No." self.h = "was david gilmour the guitarist for pink floyd" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 1
def setUp(self): self.answer = { 0: 'Yes', 1: 'Yes', 2: 'No', 3: 'No', 4: 'No', 5: 'No', 6: 'No' } self.p = "There's no official Google API for the text to speech" self.h = "does google have a text to speech API" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 3
def setUp(self): self.answer = { 0: 'Yes', 1: 'Yes', 2: 'No', 3: 'No', 4: 'No', 5: 'No', 6: 'No' } self.p = "When the paramedic inspected him, he realized that Ariel Sharon was having a stroke" self.h = "did Ariel Sharon have a stroke?" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 1
def learn_weights(training_set, learning_epochs, burn_in_epochs, learning_rate, learning_rate_multiplier): weights = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] weights_history = [] for i in range(learning_epochs): print '*** Starting epoch %s ***' % i learning_rate *= learning_rate_multiplier logging.warning('Starting epoch %s with learning rate %s' % (i, learning_rate)) shuffle(training_set) for index, problem in enumerate(training_set): print '* Starting problem %s of %s in epoch %s*' % ( index, len(training_set), i) gold_features = gold_featurizer.featurize(problem) logging.warning('\nStarting weights:\n%s' % weights) #logging.warning('Problem:\n%s\n%s' % (problem.p_str_tokens, #problem.h_str_tokens)) #logging.warning('\nGold features:\n%s' % (gold_features)) predicted_alignment, predicted_features = Aligner.align( problem.p_str_tokens, problem.h_str_tokens, weights) #logging.warning('\nPredicted features\n:%s' % predicted_features) weights = weights + (learning_rate * (gold_features - predicted_features)) #diff = gold_features - predicted_features #logging.warning('\nUnrated weights difference:\n%s' % diff) logging.warning('Summed rated weights:\n%s' % weights) weights = weights / sqrt(sum([i**2 for i in weights])) logging.warning('L2 normalization:\n%s' % weights) weights_history.append(weights) logging.warning('\n\nWeights history:\n%s' % weights_history) weights_averaged = 1 / (learning_epochs - burn_in_epochs) * sum( weights_history[burn_in_epochs:]) return weights_averaged
def setUp(self): self.answer = { 0: 'Yes', 1: 'Yes', 2: 'No', 3: 'No', 4: 'No', 5: 'No', 6: 'No' } #self.p = "Marissa Ann Mayer became the CEO of Yahoo on July 17, ... She was previously the vice president of geographic and local services at the search engine company Google." #self.p = "Marissa Mayer, one of the top executives at Google, will be the next chief of Yahoo" self.p = "Sergey Mikhaylovich Brin (born August 21, 1973) is a Russian -born American computer scientist and Internet entrepreneur who, with Larry Page, co-founded Google, one " self.h = "did Sergey Brin co-found google?" self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 1
def AlignBooks(libraryPath, firstLang, secondLang, dictionariesPath, alignedBooksPath): if os.path.exists(alignedBooksPath): shutil.rmtree(alignedBooksPath) os.makedirs(alignedBooksPath) library = Library.Library() library.LoadFromFile(os.path.join(libraryPath, 'library.xml')) parallelBooksToAlign = [] for parallelBook in library.ParallelBooks: hasFirstLangBook = len([ b for b in parallelBook.Books if b.Language == firstLang and b.LocalFilePath != None ]) == 1 hasSecondLangBook = len([ b for b in parallelBook.Books if b.Language == secondLang and b.LocalFilePath != None ]) == 1 if hasFirstLangBook and hasSecondLangBook: parallelBooksToAlign.append(parallelBook) dictionary1 = Dictionary.Dictionary() dictionary1.LoadFromFile( os.path.join(dictionariesPath, firstLang + '-' + secondLang + '.xml')) dictionary2 = Dictionary.Dictionary() dictionary2.LoadFromFile( os.path.join(dictionariesPath, secondLang + '-' + firstLang + '.xml')) aligner = Aligner.Aligner(firstLang, secondLang, dictionary1, dictionary2) for i, parallelBook in enumerate(parallelBooksToAlign): print(i) alignedBookPath = os.path.join(alignedBooksPath, str(i) + '.xml') firstBook = ([ b for b in parallelBook.Books if b.Language == firstLang ])[0] secondBook = ([ b for b in parallelBook.Books if b.Language == secondLang ])[0] alignedBook = aligner.AlignBooks(firstBook, secondBook) alignedBook.SaveToFile(alignedBookPath)
def learn_weights(training_set, learning_epochs, burn_in_epochs, learning_rate, learning_rate_multiplier): weights = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] weights_history = [] for i in range(learning_epochs): print '*** Starting epoch %s ***' % i learning_rate *= learning_rate_multiplier logging.warning('Starting epoch %s with learning rate %s' % (i, learning_rate)) shuffle(training_set) for index, problem in enumerate(training_set): print '* Starting problem %s of %s in epoch %s*' % (index, len(training_set), i) gold_features = gold_featurizer.featurize(problem) logging.warning('\nStarting weights:\n%s' % weights) #logging.warning('Problem:\n%s\n%s' % (problem.p_str_tokens, #problem.h_str_tokens)) #logging.warning('\nGold features:\n%s' % (gold_features)) predicted_alignment, predicted_features = Aligner.align( problem.p_str_tokens, problem.h_str_tokens, weights) #logging.warning('\nPredicted features\n:%s' % predicted_features) weights = weights + (learning_rate * (gold_features - predicted_features)) #diff = gold_features - predicted_features #logging.warning('\nUnrated weights difference:\n%s' % diff) logging.warning('Summed rated weights:\n%s' % weights) weights = weights / sqrt(sum([i ** 2 for i in weights])) logging.warning('L2 normalization:\n%s' % weights) weights_history.append(weights) logging.warning('\n\nWeights history:\n%s' % weights_history) weights_averaged = 1 / (learning_epochs - burn_in_epochs) * sum(weights_history[burn_in_epochs:]) return weights_averaged
def setUp(self): self.answer = { 0: 'Yes', 1: 'Yes', 2: 'No', 3: 'No', 4: 'No', 5: 'No', 6: 'No' } #self.p = "Butler did not defeat Marquette" #self.h = "Did Marquette lose to Butler?" self.p = "The Communist Party USA was a small Maoist political party which was founded in 1965 by members of the Communist Party around Michael Laski who took the side of China in the Sino-Soviet split." #self.h = "the first president of the United States" self.h = "Michael Laski was an opponent of China." self.p_str_tokens = word_tokenize(self.p) self.h_str_tokens = word_tokenize(self.h) self.weights = 'default' self.aligner = Aligner.Aligner() self.target = 6
lines = [] for (seq, sp) in seq2sp_dict.items(): if seq not in sequenceTodiscard: line = "%s:%s\n" % (sp, seq) lines.append(line) with open(FinalSp2Seq, "w") as sp2seqFile: sp2seqFile.write("".join(lines)) if not args.realign_ali: filteredfasta.write_fasta(FinalAli) else: AfterfilteringFasta = TmpAli filteredfasta.write_fasta(AfterfilteringFasta) ### Realign the final alignment MafftProcess = Aligner.Mafft(TmpAli) #MafftProcess.Maxiterate = 2 # too long MafftProcess.AutoOption = True MafftProcess.QuietOption = True MafftProcess.OutputFile = FinalAli if os.path.isfile(TmpAli): logger.info("Realign the filtered alignment") _ = MafftProcess.launch() StartingAlignment = MafftProcess.OutputFile else: logger.error("%s is not a file.", TmpAli) end(1) ### Built a tree with the final alignment logger.info("Built a tree with the final alignment")
def main(): arg_parser = argparse.ArgumentParser( description="Brandeis transition-based AMR parser 1.0") build_opts(arg_parser) args = arg_parser.parse_args() amr_file = args.amr_file instances = None train_instance = None constants.FLAG_COREF = args.coref constants.FLAG_PROP = args.prop constants.FLAG_RNE = args.rne constants.FLAG_VERB = args.verblist constants.FLAG_ONTO = args.onto constants.FLAG_DEPPARSER = args.depparser if args.mode == 'preprocess': # using corenlp to preprocess the sentences do_preproces(args) elif args.mode == 'test_gold_graph': # preprocess the JAMR aligned amr do_test_gold_graph(args) elif args.mode == 'align': # do alignment if args.input_file: instances = pickle.load(open(args.input_file, 'rb')) else: raise ValueError( "Missing data file! specify it using --input or using preprocessing!" ) gold_instances_file = args.input_file.split('.')[0] + '_gold.p' print >> log, "Doing alignment..." if LOGGED: saveerr = sys.stderr sys.stderr = open('./log/alignment.log', 'w') amr_aligner = Aligner(verbose=args.verbose) ref_graphs = [] begin = args.begin counter = 1 for i in range(len(instances)): snt = instances[i].text amr = instances[i].amr if args.verbose > 1: print >> log, counter print >> log, "Sentence:" print >> log, snt + '\n' print >> log, "AMR:" print >> log, amr.to_amr_string() alresult = amr_aligner.apply_align(snt, amr) ref_amr_graph = SpanGraph.init_ref_graph(amr, alresult) instances[i].addGoldGraph(ref_amr_graph) if args.verbose > 1: print >> log, amr_aligner.print_align_result(alresult, amr) counter += 1 pickle.dump(instances, open(gold_instances_file, 'wb'), pickle.HIGHEST_PROTOCOL) if LOGGED: sys.stderr.close() sys.stderr = saveerr print >> log, "Done alignment and gold graph generation." sys.exit() elif args.mode == 'userGuide': # test user guide actions print 'Read in training instances...' train_instances = preprocess(amr_file, False) sentID = int(raw_input("Input the sent ID:")) amr_parser = Parser() amr_parser.testUserGuide(train_instances[sentID]) sys.exit() elif args.mode == 'oracleGuide': # test deterministic oracle train_instances = preprocess(amr_file, start_corenlp=False, input_format=args.amrfmt, prp_format=args.prpfmt) try: hand_alignments = load_hand_alignments(amr_file + str('.hand_aligned')) except IOError: hand_alignments = [] start_step = args.start_step begin = args.begin amr_parser = Parser(oracle_type=DET_T2G_ORACLE_ABT, verbose=args.verbose) #ref_graphs = pickle.load(open('./data/ref_graph.p','rb')) n_correct_total = .0 n_parsed_total = .0 n_gold_total = .0 pseudo_gold_amr = [] n_correct_tag_total = .0 n_parsed_tag_total = 0. n_gold_tag_total = .0 gold_amr = [] aligned_instances = [] for instance in train_instances[begin:]: if hand_alignments and instance.comment[ 'id'] not in hand_alignments: continue state = amr_parser.testOracleGuide(instance, start_step) n_correct_arc, n1, n_parsed_arc, n_gold_arc, n_correct_tag, n_parsed_tag, n_gold_tag = state.evaluate( ) if n_correct_arc != n1: import pdb pdb.set_trace() n_correct_total += n_correct_arc n_parsed_total += n_parsed_arc n_gold_total += n_gold_arc p = n_correct_arc / n_parsed_arc if n_parsed_arc else .0 r = n_correct_arc / n_gold_arc if n_gold_arc else .0 indicator = 'PROBLEM!' if p < 0.5 else '' if args.verbose > 2: print >> sys.stderr, "Precision: %s Recall: %s %s\n" % ( p, r, indicator) n_correct_tag_total += n_correct_tag n_parsed_tag_total += n_parsed_tag n_gold_tag_total += n_gold_tag p1 = n_correct_tag / n_parsed_tag if n_parsed_tag else .0 r1 = n_correct_tag / n_gold_tag if n_gold_tag else .0 if args.verbose > 2: print >> sys.stderr, "Tagging Precision:%s Recall:%s" % (p1, r1) instance.comment['alignments'] +=\ ''.join(' %s-%s|%s' % (idx-1, idx, instance.amr.get_pid(state.A.abt_node_table[idx])) for idx in state.A.abt_node_table if isinstance(idx,int)) aligned_instances.append(instance) pseudo_gold_amr.append(GraphState.get_parsed_amr(state.A)) pt = n_correct_total / n_parsed_total if n_parsed_total != .0 else .0 rt = n_correct_total / n_gold_total if n_gold_total != .0 else .0 ft = 2 * pt * rt / (pt + rt) if pt + rt != .0 else .0 write_parsed_amr(pseudo_gold_amr, aligned_instances, amr_file, 'pseudo-gold', hand_alignments) print "Total Accuracy: %s, Recall: %s, F-1: %s" % (pt, rt, ft) tp = n_correct_tag_total / n_parsed_tag_total if n_parsed_tag_total != .0 else .0 tr = n_correct_tag_total / n_gold_tag_total if n_gold_tag_total != .0 else .0 print "Tagging Precision:%s Recall:%s" % (tp, tr) elif args.mode == 'train': do_train(args) elif args.mode == 'parse': # actual parsing test_instances = preprocess(amr_file, start_corenlp=False, input_format=args.amrfmt, prp_format=args.prpfmt) if args.section != 'all': print "Choosing corpus section: %s" % (args.section) tcr = constants.get_corpus_range(args.section, 'test') test_instances = test_instances[tcr[0]:tcr[1]] #random.shuffle(test_instances) print >> experiment_log, "Loading model: ", args.model model = Model.load_model(args.model) parser = Parser(model=model, oracle_type=DET_T2G_ORACLE_ABT, action_type=args.actionset, verbose=args.verbose, elog=experiment_log) print >> experiment_log, "BEGIN PARSING" span_graph_pairs, results = parser.parse_corpus_test(test_instances) parsed_suffix = '%s.%s.parsed' % (args.section, args.model.split('.')[-2]) write_parsed_amr(results, test_instances, amr_file, suffix=parsed_suffix) print >> experiment_log, "DONE PARSING" if args.smatcheval: smatch_path = "./smatch_2.0.2/smatch.py" python_path = 'python' options = '--pr -f' parsed_filename = amr_file + '.' + parsed_suffix command = '%s %s %s %s %s' % (python_path, smatch_path, options, parsed_filename, amr_file) print 'Evaluation using command: ' + (command) print subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True) elif args.mode == 'eval': '''break down error analysis''' # TODO: here use pickled file, replace it with parsed AMR and gold AMR span_graph_pairs = pickle.load(open(args.eval[0], 'rb')) instances = pickle.load(open(args.eval[1], 'rb')) amr_parser = Parser(oracle_type=DET_T2G_ORACLE_ABT, verbose=args.verbose) error_stat = defaultdict( lambda: defaultdict(lambda: defaultdict(list))) for spg_pair, instance in zip(span_graph_pairs, instances): amr_parser.errorAnalyze(spg_pair[0], spg_pair[1], instance, error_stat) else: arg_parser.print_help()
command = ["cp", In, Out] logger.debug(" ".join(command)) p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (out, err) = p.communicate() if err: logger.error(err) return (out, err) if args.realign_ali: ### Realign the input alignment InitialMafftProcess = Aligner.Mafft(StartingAlignment) InitialMafftProcess.Maxiterate = 2 InitialMafftProcess.QuietOption = True InitialMafftProcess.OutputFile = "%s/%s.fa" % (TmpDirName, "RealignAli") if os.path.isfile(StartingAlignment): logger.info("Realign the input alignment") _ = InitialMafftProcess.launch() StartingAlignment = InitialMafftProcess.OutputFile else: logger.error("%s is not a file.", StartingAlignment) end(1) ### Concate all sp2seq files logger.info("Concate all Sp2Seq files") Sp2Seq = "%s/StartingSp2Seq.txt" % (TmpDirName)
# n_seqs = int(sys.argv[1]) if len(sys.argv) >= 2 else 2 # n_seqs = min(n_seqs, len(seqs)) # n = int(ceil(log(n_seqs,2))) # for i in range(1,n+1): # for j in range(0, 1<<n, 1<<i): # if j+(1<<(i-1)) >= n_seqs: # break # print 'aligning',j,j+(1<<(i-1)) # align = Aligner(seqs[ j ],seqs[ j+(1<<(i-1)) ]) # align.align() align = Aligner(seqs[0], seqs[1]) align.align() data += seqs[0].graphData(arrows=True, vertical=True) print '01' * 50 print seqs[0] print '01' * 50 align = Aligner(seqs[2], seqs[3]) align.align() data += seqs[2].graphData(arrows=True, vertical=True) print '23' * 50 print seqs[2] print '23' * 50 align = Aligner(seqs[0], seqs[2]) align.align()
raw_seqs = [] seqs = [] while len(seqs) <= n or n == -1: name, seq = args.infile.readline(), args.infile.readline() if not name or not seq: break raw_seqs.append({'seq': seq[:-1], 'name': name[1:-1]}) seqs.append(Graph(seq=seq[:-1], name=name[1:-1])) if n == -1: n = len(seqs) for i in range(1, n): if args.verbosity >= 1: print 'aligning', 0, i align = Aligner(seqs[0], seqs[i]) align.align() if args.verbosity >= 5: print seqs[0] # lg = int(ceil(log(n,2))) # for i in range(1,lg+1): # for j in range(0, 1<<lg, 1<<i): # if j+(1<<(i-1)) >= n: # break # print 'aligning',j,j+(1<<(i-1)) # align = Aligner(seqs[j], seqs[ j+(1<<(i-1)) ]) # align.align()
def get_weight(training_set): weights = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] features, weights = Aligner.align(training_set.p_str_tokens, training_set.h_str_tokens, weights) print weights
def main(): ''' usage = "Usage:%prog [options] amr_file" opt = OptionParser(usage=usage) opt.add_option("-v",action="store",dest="verbose",type='int', default=0,help="set up verbose level") opt.add_option("-a",action="store_true",dest="align", default=False,help="do alignment between sentence and amr") opt.add_option("-b",action="store",dest="begin",type='int', default=0,help="for debugging" "When do alignment, where the alignment begins" "When test oracle, where to begin") opt.add_option("-s",action="store",dest="start_step",type='int', default=0,help="where the step begins,for testing oracle") opt.add_option("-o",action="store",dest="sentfilep", help="output sentences to file and parse the sentence into dependency graph") opt.add_option("-i",action="store",dest="parsedfilep", help="read parsed dependency graph from file") opt.add_option("-g",action="store",dest="userActfile", help="read user input action sequences as guide") opt.add_option("-d",action="store",dest="oracle",type='int',default=0,\ help="test the output actions of deterministic oracle: " "1: tree oracle 2: list-based oracle") ''' arg_parser = argparse.ArgumentParser(description="Brandeis transition-based AMR parser 1.0") arg_parser.add_argument('-v','--verbose',type=int,default=0,help='set up verbose level for debug') arg_parser.add_argument('-b','--begin',type=int,default=0,help='specify which sentence to begin the alignment or oracle testing for debug') arg_parser.add_argument('-s','--start_step',type=int,default=0,help='specify which step to begin oracle testing;for debug') #arg_parser.add_argument('-i','--input_file',help='the input: preprocessed data instances file for aligner or training') arg_parser.add_argument('-d','--dev',help='development file') arg_parser.add_argument('-as','--actionset',choices=['basic'],default='basic',help='choose different action set') arg_parser.add_argument('-m','--mode',choices=['preprocess','test_gold_graph','align','userGuide','oracleGuide','train','parse','eval'],help="preprocess:generate pos tag, dependency tree, ner\n" "align:do alignment between AMR graph and sentence string") arg_parser.add_argument('-dp','--depparser',choices=['stanford','stanfordConvert','stdconv+charniak','clear','mate','turbo'],default='stdconv+charniak',help='choose the dependency parser') arg_parser.add_argument('--coref',action='store_true',help='flag to enable coreference information') arg_parser.add_argument('--prop',action='store_true',help='flag to enable semantic role labeling information') arg_parser.add_argument('--model',help='specify the model file') arg_parser.add_argument('--feat',help='feature template file') arg_parser.add_argument('-iter','--iterations',default=1,type=int,help='training iterations') arg_parser.add_argument('amr_file',nargs='?',help='amr annotation file/input sentence file for parsing') arg_parser.add_argument('--amrfmt',action='store_true',help='specifying the input file is AMR annotation file') arg_parser.add_argument('-e','--eval',nargs=2,help='Error Analysis: give parsed AMR file and gold AMR file') arg_parser.add_argument('--section',choices=['proxy','all'],default='all',help='choose section of the corpus. Only works for LDC2014T12 dataset.') args = arg_parser.parse_args() amr_file = args.amr_file instances = None train_instance = None constants.FLAG_COREF=args.coref constants.FLAG_PROP=args.prop constants.FLAG_DEPPARSER=args.depparser # using corenlp to preprocess the sentences if args.mode == 'preprocess': instances = preprocess(amr_file,START_SNLP=True,INPUT_AMR=args.amrfmt) print "Done preprocessing!" # preprocess the JAMR aligned amr elif args.mode == 'test_gold_graph': instances = preprocess(amr_file,False) #instances = pickle.load(open('data/gold_edge_graph.pkl','rb')) gold_amr = [] for inst in instances: GraphState.sent = inst.tokens gold_amr.append(GraphState.get_parsed_amr(inst.gold_graph)) #pseudo_gold_amr = [GraphState.get_parsed_amr(inst.gold_graph) for inst in instances] write_parsed_amr(gold_amr,instances,amr_file,'abt.gold') #instances = preprocess_aligned(amr_file) print "Done output AMR!" # do alignment elif args.mode == 'align': if args.input_file: instances = pickle.load(open(args.input_file,'rb')) else: raise ValueError("Missing data file! specify it using --input or using preprocessing!") gold_instances_file = args.input_file.split('.')[0]+'_gold.p' print >> log, "Doing alignment..." if LOGGED: saveerr = sys.stderr sys.stderr = open('./log/alignment.log','w') amr_aligner = Aligner(verbose=args.verbose) ref_graphs = [] begin = args.begin counter = 1 #for snt, amr in zip(snts[begin:],amrs[begin:]): for i in range(len(instances)): snt = instances[i].text amr = instances[i].amr if args.verbose > 1: print >> log, counter print >> log, "Sentence:" print >> log, snt+'\n' print >> log, "AMR:" print >> log, amr.to_amr_string() alresult = amr_aligner.apply_align(snt,amr) ref_amr_graph = SpanGraph.init_ref_graph(amr,alresult) #ref_graphs.append(ref_amr_graph) instances[i].addGoldGraph(ref_amr_graph) if args.verbose > 1: #print >> log, "Reference tuples:" #print >> log, ref_depGraph.print_tuples() print >> log, amr_aligner.print_align_result(alresult,amr) #raw_input('ENTER to continue') counter += 1 pickle.dump(instances,open(gold_instances_file,'wb'),pickle.HIGHEST_PROTOCOL) #pickle.dump(ref_graphs,open('./data/ref_graph.p','wb'),pickle.HIGHEST_PROTOCOL) if LOGGED: sys.stderr.close() sys.stderr = saveerr print >> log, "Done alignment and gold graph generation." sys.exit() # test user guide actions elif args.mode == 'userGuide': print 'Read in training instances...' train_instances = preprocess(amr_file,False) sentID = int(raw_input("Input the sent ID:")) amr_parser = Parser() amr_parser.testUserGuide(train_instances[sentID]) sys.exit() # test deterministic oracle elif args.mode == 'oracleGuide': train_instances = preprocess(amr_file,START_SNLP=False) try: hand_alignments = load_hand_alignments(amr_file+str('.hand_aligned')) except IOError: hand_alignments = [] start_step = args.start_step begin = args.begin amr_parser = Parser(oracle_type=DET_T2G_ORACLE_ABT,verbose=args.verbose) #ref_graphs = pickle.load(open('./data/ref_graph.p','rb')) n_correct_total = .0 n_parsed_total = .0 n_gold_total = .0 pseudo_gold_amr = [] n_correct_tag_total = .0 n_parsed_tag_total = 0. n_gold_tag_total = .0 gold_amr = [] aligned_instances = [] #print "shuffling training instances" #random.shuffle(train_instances) for instance in train_instances[begin:]: if hand_alignments and instance.comment['id'] not in hand_alignments: continue state = amr_parser.testOracleGuide(instance,start_step) n_correct_arc,n1,n_parsed_arc, n_gold_arc,n_correct_tag,n_parsed_tag,n_gold_tag = state.evaluate() #assert n_correct_arc == n1 if n_correct_arc != n1: import pdb pdb.set_trace() n_correct_total += n_correct_arc n_parsed_total += n_parsed_arc n_gold_total += n_gold_arc p = n_correct_arc/n_parsed_arc if n_parsed_arc else .0 r = n_correct_arc/n_gold_arc if n_parsed_arc else .0 indicator = 'PROBLEM!' if p < 0.5 else '' if args.verbose > 2: print >> sys.stderr, "Precision: %s Recall: %s %s\n" % (p,r,indicator) n_correct_tag_total += n_correct_tag n_parsed_tag_total += n_parsed_tag n_gold_tag_total += n_gold_tag p1 = n_correct_tag/n_parsed_tag if n_parsed_tag else .0 r1 = n_correct_tag/n_gold_tag if n_parsed_tag else .0 if args.verbose > 2: print >> sys.stderr,"Tagging Precision:%s Recall:%s" % (p1,r1) instance.comment['alignments'] += ''.join(' %s-%s|%s'%(idx-1,idx,instance.amr.get_pid(state.A.abt_node_table[idx])) for idx in state.A.abt_node_table if isinstance(idx,int)) aligned_instances.append(instance) pseudo_gold_amr.append(GraphState.get_parsed_amr(state.A)) #gold_amr.append(instance.amr) #assert set(state.A.tuples()) == set(instance.gold_graph.tuples()) pt = n_correct_total/n_parsed_total if n_parsed_total != .0 else .0 rt = n_correct_total/n_gold_total if n_gold_total !=.0 else .0 ft = 2*pt*rt/(pt+rt) if pt+rt != .0 else .0 write_parsed_amr(pseudo_gold_amr,aligned_instances,amr_file,'pseudo-gold',hand_alignments) print "Total Accuracy: %s, Recall: %s, F-1: %s" % (pt,rt,ft) tp = n_correct_tag_total/n_parsed_tag_total if n_parsed_tag_total != .0 else .0 tr = n_correct_tag_total/n_gold_tag_total if n_gold_tag_total != .0 else .0 print "Tagging Precision:%s Recall:%s" % (tp,tr) #amr_parser.record_actions('data/action_set.txt') elif args.mode == 'train': # training print "Parser Config:" print "Incorporate Coref Information: %s"%(constants.FLAG_COREF) print "Incorporate SRL Information: %s"%(constants.FLAG_PROP) print "Dependency parser used: %s"%(constants.FLAG_DEPPARSER) train_instances = preprocess(amr_file,START_SNLP=False) if args.dev: dev_instances = preprocess(args.dev,START_SNLP=False) if args.section != 'all': print "Choosing corpus section: %s"%(args.section) tcr = constants.get_corpus_range(args.section,'train') train_instances = train_instances[tcr[0]:tcr[1]] if args.dev: dcr = constants.get_corpus_range(args.section,'dev') dev_instances = dev_instances[dcr[0]:dcr[1]] feat_template = args.feat if args.feat else None model = Model(elog=experiment_log) #model.output_feature_generator() parser = Parser(model=model,oracle_type=DET_T2G_ORACLE_ABT,action_type=args.actionset,verbose=args.verbose,elog=experiment_log) model.setup(action_type=args.actionset,instances=train_instances,parser=parser,feature_templates_file=feat_template) print >> experiment_log, "BEGIN TRAINING!" for iter in xrange(1,args.iterations+1): print >> experiment_log, "shuffling training instances" random.shuffle(train_instances) print >> experiment_log, "Iteration:",iter begin_updates = parser.perceptron.get_num_updates() parser.parse_corpus_train(train_instances) parser.perceptron.average_weight() #model.save_model(args.model+'-iter'+str(iter)+'-'+str(int(time.time()))+'.m') model.save_model(args.model+'-iter'+str(iter)+'.m') if args.dev: print >> experiment_log ,"Result on develop set:" _,parsed_amr = parser.parse_corpus_test(dev_instances) write_parsed_amr(parsed_amr,dev_instances,args.dev,args.section+'.'+str(iter)+'.parsed') print >> experiment_log ,"DONE TRAINING!" elif args.mode == 'parse': # actual parsing test_instances = preprocess(amr_file,START_SNLP=False,INPUT_AMR=False) if args.section != 'all': print "Choosing corpus section: %s"%(args.section) tcr = constants.get_corpus_range(args.section,'test') test_instances = test_instances[tcr[0]:tcr[1]] #random.shuffle(test_instances) print >> experiment_log, "Loading model: ", args.model model = Model.load_model(args.model) parser = Parser(model=model,oracle_type=DET_T2G_ORACLE_ABT,action_type=args.actionset,verbose=args.verbose,elog=experiment_log) print >> experiment_log ,"BEGIN PARSING" span_graph_pairs,results = parser.parse_corpus_test(test_instances) write_parsed_amr(results,test_instances,amr_file,suffix='%s.parsed'%(args.section)) #write_span_graph(span_graph_pairs,test_instances,amr_file,suffix='spg.50') ################ # for eval # ################ #pickle.dump(span_graph_pairs,open('data/eval/%s_spg_pair.pkl'%(amr_file),'wb'),pickle.HIGHEST_PROTOCOL) #pickle.dump(test_instances,open('data/eval/%s_instances.pkl'%(amr_file),'wb'),pickle.HIGHEST_PROTOCOL) print >> experiment_log ,"DONE PARSING" #plt.hist(results) #plt.savefig('result.png') elif args.mode == 'eval': '''break down error analysis''' # TODO: here use pickled file, replace it with parsed AMR and gold AMR span_graph_pairs = pickle.load(open(args.eval[0],'rb')) instances = pickle.load(open(args.eval[1],'rb')) amr_parser = Parser(oracle_type=DET_T2G_ORACLE_ABT,verbose=args.verbose) error_stat = defaultdict(lambda:defaultdict(lambda:defaultdict(list))) for spg_pair,instance in zip(span_graph_pairs,instances): amr_parser.errorAnalyze(spg_pair[0],spg_pair[1],instance,error_stat) else: arg_parser.print_help()
def main(): ''' usage = "Usage:%prog [options] amr_file" opt = OptionParser(usage=usage) opt.add_option("-v",action="store",dest="verbose",type='int', default=0,help="set up verbose level") opt.add_option("-a",action="store_true",dest="align", default=False,help="do alignment between sentence and amr") opt.add_option("-b",action="store",dest="begin",type='int', default=0,help="for debugging" "When do alignment, where the alignment begins" "When test oracle, where to begin") opt.add_option("-s",action="store",dest="start_step",type='int', default=0,help="where the step begins,for testing oracle") opt.add_option("-o",action="store",dest="sentfilep", help="output sentences to file and parse the sentence into dependency graph") opt.add_option("-i",action="store",dest="parsedfilep", help="read parsed dependency graph from file") opt.add_option("-g",action="store",dest="userActfile", help="read user input action sequences as guide") opt.add_option("-d",action="store",dest="oracle",type='int',default=0,\ help="test the output actions of deterministic oracle: " "1: tree oracle 2: list-based oracle") ''' arg_parser = argparse.ArgumentParser(description="Brandeis transition-based AMR parser 1.0") arg_parser.add_argument('-v','--verbose',type=int,default=0,help='set up verbose level for debug') arg_parser.add_argument('-b','--begin',type=int,default=0,help='specify which sentence to begin the alignment or oracle testing for debug') arg_parser.add_argument('-s','--start_step',type=int,default=0,help='specify which step to begin oracle testing for debug') #arg_parser.add_argument('-i','--input_file',help='the input: preprocessed data instances file for aligner or training') arg_parser.add_argument('-d','--dev',help='development file') arg_parser.add_argument('-as','--actionset',choices=['basic'],default='basic',help='choose different action set') arg_parser.add_argument('-m','--mode',choices=['preprocess','test_gold_graph','align','userGuide','oracleGuide','train','parse'],help="preprocess:generate pos tag, dependency tree, ner\n" "align:do alignment between AMR graph and sentence") arg_parser.add_argument('-dp','--depparser',choices=['stanford','turbo','mate','malt','stdconv+charniak'],default='stanford',help='choose the dependency parser, default:{stanford}') arg_parser.add_argument('--model',help='specify the model file') arg_parser.add_argument('--feat',help='feature template file') arg_parser.add_argument('-iter','--iterations',type=int,help='training iterations') arg_parser.add_argument('amr_file',nargs='?',help='amr bank file for preprocessing') args = arg_parser.parse_args() amr_file = args.amr_file instances = None train_instance = None constants.FLAG_DEPPARSER=args.depparser # using corenlp to preprocess the sentences if args.mode == 'preprocess': instances = preprocess(amr_file) print >> experiment_log, "Done preprocessing!" # preprocess the JAMR aligned amr elif args.mode == 'test_gold_graph': instances = preprocess(amr_file,False) #instances = pickle.load(open('data/gold_edge_graph.pkl','rb')) pseudo_gold_amr = [] for inst in instances: GraphState.sent = inst.tokens pseudo_gold_amr.append(GraphState.get_parsed_amr(inst.gold_graph)) #pseudo_gold_amr = [GraphState.get_parsed_amr(inst.gold_graph) for inst in instances] write_parsed_amr(pseudo_gold_amr,instances,amr_file,'gold') #instances = preprocess_aligned(amr_file) print "Done output AMR!" # do alignment elif args.mode == 'align': if args.input_file: instances = pickle.load(open(args.input_file,'rb')) else: raise ValueError("Missing data file! specify it using --input or using preprocessing!") gold_instances_file = args.input_file.split('.')[0]+'_gold.p' print >> log, "Doing alignment..." if LOGGED: saveerr = sys.stderr sys.stderr = open('./log/alignment.log','w') amr_aligner = Aligner(verbose=args.verbose) ref_graphs = [] begin = args.begin counter = 1 #for snt, amr in zip(snts[begin:],amrs[begin:]): for i in range(len(instances)): snt = instances[i].text amr = instances[i].amr if args.verbose > 1: print >> log, counter print >> log, "Sentence:" print >> log, snt+'\n' print >> log, "AMR:" print >> log, amr.to_amr_string() alresult = amr_aligner.apply_align(snt,amr) ref_amr_graph = SpanGraph.init_ref_graph(amr,alresult) #ref_graphs.append(ref_amr_graph) instances[i].addGoldGraph(ref_amr_graph) if args.verbose > 1: #print >> log, "Reference tuples:" #print >> log, ref_depGraph.print_tuples() print >> log, amr_aligner.print_align_result(alresult,amr) #raw_input('ENTER to continue') counter += 1 pickle.dump(instances,open(gold_instances_file,'wb'),pickle.HIGHEST_PROTOCOL) #pickle.dump(ref_graphs,open('./data/ref_graph.p','wb'),pickle.HIGHEST_PROTOCOL) if LOGGED: sys.stderr.close() sys.stderr = saveerr print >> log, "Done alignment and gold graph generation." sys.exit() # test user guide actions elif args.mode == 'userGuide': print 'Read in training instances...' train_instances = preprocess(amr_file,False) sentID = int(raw_input("Input the sent ID:")) amr_parser = Parser() amr_parser.testUserGuide(train_instances[sentID]) sys.exit() # test deterministic oracle elif args.mode == 'oracleGuide': train_instances = preprocess(amr_file,False) start_step = args.start_step begin = args.begin amr_parser = Parser(oracle_type=DETERMINE_TREE_TO_GRAPH_ORACLE_SC,verbose=args.verbose) #ref_graphs = pickle.load(open('./data/ref_graph.p','rb')) n_correct_total = .0 n_parsed_total = .0 n_gold_total = .0 pseudo_gold_amr = [] for instance in train_instances[begin:]: state = amr_parser.testOracleGuide(instance,start_step) n_correct_arc,n1,n_parsed_arc, n_gold_arc,_,_,_ = state.evaluate() assert n_correct_arc == n1 n_correct_total += n_correct_arc n_parsed_total += n_parsed_arc n_gold_total += n_gold_arc p = n_correct_arc/n_parsed_arc if n_parsed_arc else .0 indicator = 'PROBLEM!' if p < 0.5 else '' if args.dev > 2: print >> sys.stderr, "Accuracy: %s %s\n" % (p,indicator) #if instance.sentID == 704: # import pdb # pdb.set_trace() pseudo_gold_amr.append(GraphState.get_parsed_amr(state.A)) #assert set(state.A.tuples()) == set(instance.gold_graph.tuples()) pt = n_correct_total/n_parsed_total if n_parsed_total != .0 else .0 rt = n_correct_total/n_gold_total if n_gold_total !=.0 else .0 ft = 2*pt*rt/(pt+rt) if pt+rt != .0 else .0 write_parsed_amr(pseudo_gold_amr,train_instances,amr_file,'pseudo-gold') print "Total Accuracy: %s, Recall: %s, F-1: %s" % (pt,rt,ft) #amr_parser.record_actions('data/action_set.txt') elif args.mode == 'train': # actual parsing train_instances = preprocess(amr_file,False) if args.dev: dev_instances = preprocess(args.dev,False) feat_template = args.feat if args.feat else None model = Model(elog=experiment_log) model.setup(action_type=args.actionset,instances=train_instances,feature_templates_file=feat_template) #model.output_feature_generator() parser = Parser(model=model,action_type=args.actionset,verbose=args.verbose,elog=experiment_log) print >> experiment_log, "BEGIN TRAINING!" for iter in xrange(1,args.iterations+1): print >> experiment_log, "shuffling training instances" random.shuffle(train_instances) print >> experiment_log, "Iteration:",iter begin_updates = parser.perceptron.get_num_updates() parser.parse_corpus_train(train_instances) parser.perceptron.average_weight() #model.save_model(args.model+'-iter'+str(iter)+'-'+str(int(time.time()))+'.m') model.save_model(args.model+'-iter'+str(iter)+'.m') if args.dev: print >> experiment_log ,"Result on develop set:" parsed_amr = parser.parse_corpus_test(dev_instances) write_parsed_amr(parsed_amr,dev_instances,args.dev) print >> experiment_log ,"DONE TRAINING!" elif args.mode == 'parse': test_instances = preprocess(amr_file,False) model = Model.load_model(args.model) parser = Parser(model=model,action_type=args.actionset,verbose=args.verbose,elog=experiment_log) print >> experiment_log ,"BEGIN PARSING" results = parser.parse_corpus_test(test_instances) write_parsed_amr(results,test_instances,amr_file) print >> experiment_log ,"DONE PARSING" #pickle.dump(results,open('data/gold_edge_graph.pkl','wb'),pickle.HIGHEST_PROTOCOL) #plt.hist(results) #plt.savefig('result.png') else: arg_parser.print_help()