parser.add_argument('--no_multiwords',action='store_true') args = parser.parse_args(sys.argv[1:]) ApertiumLexformGroupProcessor.set_ignore_multiwords(args.no_multiwords) if args.print_only_tokens: ApertiumLexformGroupProcessor.printOnlyTokens=True if args.bildic_tl_expanded_file: #actual_process bilingual dictionary #we want as a result a dict() in python which contains, for each TL lemma #the different SL lemmas which is mapped from in the bilingual dictionary fileDesc=open(args.bildic_tl_expanded_file,'r') MultipleLineEntriesProcessor.process(SpaEngBilingualProcessor,BilingualDicLineEntry,fileDesc) if args.category == "adj": SpaAdjectivesProcessor.engAdjectivesDict=SpaEngBilingualProcessor.bilingualDictionaryRepresentation elif args.category == "n": SpaNounsProcessor.engNounsDict=SpaEngBilingualProcessor.bilingualDictionaryRepresentation elif args.category == "np": SpaProperNounsProcessor.engDict=SpaEngBilingualProcessor.bilingualDictionaryRepresentation elif args.category == "vblex": SpaVblexProcessor.engVblexDict=SpaEngBilingualProcessor.bilingualDictionaryRepresentation elif args.category == "adv": SpaAdverbsProcessor.engDict=SpaEngBilingualProcessor.bilingualDictionaryRepresentation else: print >> sys.stderr, "Category not defined" fileDesc.close() if args.valencies:
""" Created on 28/05/2013 @author: vitaka """ from lib.abstractLearningLib import MWEReader, ParallelMWE, MWESplitter from lib.portApertiumToGFLib import MultipleLineEntriesProcessor import argparse import sys if __name__ == "__main__": parser = argparse.ArgumentParser(description="Chooses alignment templates.") parser.add_argument("--groups_dir", required=True) args = parser.parse_args(sys.argv[1:]) groupSplitter = MWESplitter(args.groups_dir) MultipleLineEntriesProcessor.process(groupSplitter, ParallelMWE)
self.freq=0 self.sltree=None self.tltree=None def parse(self,rawstr): parts=rawstr.split("|") self.freq=int(parts[0]) self.sltree=parts[1].strip() self.tltree=parts[2].strip() def get_representative(self): return self.sltree class TreeTransformationGrouper(AbstractGroupProcessor): @staticmethod def process(mygroup): totalfreq=0 for entry in mygroup: totalfreq+=entry.freq #print SL side print str(totalfreq)+" | "+mygroup[0].sltree #print different TL sides for entry in mygroup: print "\t"+str(entry.freq)+" "+str(entry.freq*1.0/totalfreq)+" | "+entry.tltree if __name__ == "__main__": MultipleLineEntriesProcessor.process(TreeTransformationGrouper,TreeTransformationEntry)