def __init__(self): print "Loading Morph Dictionary!" tmp_str = {} self.features_dict = tmp_str hostnames = self.get_features alias_cleaned = {} self.morph_dict = alias_cleaned cmp_p = MontyUtils.MontyUtils().find_file(self.morph_dict_filename) if not cmp_p: print "Morph Dictionary not found...Now Building!" self.build_morph_dict() cmp_p = MontyUtils.MontyUtils().find_file(self.morph_dict_filename) case_p = open(cmp_p, 'rb') groups_arr = self.setitem cats = zlib.decompress(case_p.read()).split('\n') for names in cats: output, domain_arr = names.split('=') domain_arr = tuple( map(lambda arg_cleaned: tuple(arg_cleaned.split(',')), domain_arr.split(';'))) domain_arr = map( lambda arg_cleaned: (arg_cleaned[0], hostnames(arg_cleaned[1])), domain_arr) alias_cleaned[output] = domain_arr
def __init__(self, MontyLemmatiser_handle=None, MontyTagger_handle=None): if not MontyLemmatiser_handle: MontyLemmatiser_handle = MontyLemmatiser.MontyLemmatiser() self.theMontyLemmatiser = MontyLemmatiser_handle if not MontyTagger_handle: MontyTagger_handle = MontyTagger.MontyTagger() self.theMontyTagger = MontyTagger_handle self.tag_tokenized = self.theMontyTagger.tag_tokenized self.lemmatise_word = self.theMontyLemmatiser.lemmatise_word ps1 = {} _montylingua_cleaned = MontyUtils.MontyUtils().find_file( self.cssdb_filename) if not _montylingua_cleaned: self.build_cs_selection_db() _montylingua_cleaned = MontyUtils.MontyUtils().find_file( self.cssdb_filename) hash1 = open(_montylingua_cleaned, 'rb') cat_p = self.setitem map( lambda stripped: cat_p(ps1, stripped[0], (stripped[1].split(), stripped[2].split())), map( lambda tmps: tmps.split('|'), filter(lambda tagged_cleaned: tagged_cleaned.strip() != '', zlib.decompress(hash1.read()).split('\n')))) self.cssdb = ps1 print "Commonsense OK!" return
def load_fastlexicon(self): chown_p=MontyUtils.MontyUtils() names_p=chown_p.find_file(self.fast_lexicon_filename+'_1.MDF') res_arrk=chown_p.find_file(self.fast_lexicon_filename+'_2.MDF') aliass=chown_p.find_file(self.fast_lexicon_filename+'_3.MDF') dirname_dict=chown_p.find_file(self.fast_lexicon_filename+'_4.MDF') output_cleaned=chown_p.find_file(self.fast_lexicon_filename+'_5.MDF') pairs_cleaned=chown_p.find_file(self.fast_lexicon_filename+'_6.MDF') c_p=chown_p.find_file(self.fast_lexicon_filename+'_7.MDF') res_arr=open(c_p,'r') built_in_p,input_arr,chgrp1,cd_arr=map(lambda hostnames:int(hostnames),res_arr.read().split()) res_arr.close() res_arr=open(names_p,'r') self.packed_words=res_arr.read() res_arr.close() res_arr=open(res_arrk,'r') self.packed_pos=res_arr.read() res_arr.close() res_arr=open(aliass,'rb') line1=self.array_fromfile(res_arr,self.word_start_arr,built_in_p,self.java_p,java_code='ws') res_arr.close() res_arr=open(dirname_dict,'rb') self.array_fromfile(res_arr,self.word_end_arr,input_arr,self.java_p,java_code='we') res_arr.close() res_arr=open(output_cleaned,'rb') self.array_fromfile(res_arr,self.pos_start_arr,chgrp1,self.java_p,java_code='ps') res_arr.close() res_arr=open(pairs_cleaned,'rb') self.array_fromfile(res_arr,self.pos_end_arr,cd_arr,self.java_p,java_code='pe') res_arr.close()
def __init__(self): self.word_pos_table = {} if MontyUtils.MontyUtils().find_file( self.custom_lexicon_filename) != '': print "Custom Lexicon Found! Now Loading!" self.load_customlexicon()
def __init__(self): self.contextualrules_filename = MontyUtils.MontyUtils().find_file( self.contextualrules_filename) if self.contextualrules_filename == '': print "ERROR: could not find %s" % self.contextualrules_filename print "in current dir, %MONTYLINGUA% or %PATH%" self.populate_from_file(self.contextualrules_filename) print "ContextualRuleParser OK!" return
def __init__(self, LexiconHandle): self.theLexicon = LexiconHandle self.lexicalrules_filename = MontyUtils.MontyUtils().find_file( self.lexicalrules_filename) if self.lexicalrules_filename == '': print "ERROR: could not find %s" % self.lexicalrules_filename print "in current dir, %MONTYLINGUA% or %PATH%" self.populate_from_file(self.lexicalrules_filename) print 'LexicalRuleParser OK!' return
def __init__(self): self.lexicon_custom=MontyLexiconCustom.MontyLexiconCustom() if MontyUtils.MontyUtils().find_file(self.fast_lexicon_filename+'_1.MDF')!='': print "Fast Lexicon Found! Now Loading!" self.load_fastlexicon() elif self.auto_load_lexicon: print "No Fast Lexicon Detected...Now Building..." self.lexicon_filename=MontyUtils.MontyUtils().find_file(self.lexicon_filename) if self.lexicon_filename=='': print "ERROR: could not find %s" % self.lexicon_filename print "in current dir, %MONTYLINGUA% or %PATH%" self.populate_lexicon_from_file(self.lexicon_filename) self.make_fastlexicon() print "Finished building FASTLEXICON files!" else : print "No Fast Lexicon Detected. Standard Lexicon used." notify.append(-1) return print "Lexicon OK!" return
def load_customlexicon(self): awk1 = MontyUtils.MontyUtils() groupnames_p = awk1.find_file(self.custom_lexicon_filename) contents_cleaned = open(groupnames_p, 'r') cmp_cleaned = contents_cleaned.read() chmods = cmp_cleaned.split('\n') chmods = map(lambda case_cleaned: case_cleaned.strip(), chmods) chmods = map(lambda case_cleaned: case_cleaned.split(), chmods) tagged_str = map( lambda chroot_cleaned: [chroot_cleaned[0], chroot_cleaned[1:]], filter(lambda case_cleaned: len(case_cleaned) >= 2, chmods)) for pairss in tagged_str: file_p, chown = pairss self.word_pos_table[file_p] = chown return