def createSentenceStructureDict(self): def isSentStruct(item): (k, v) = item # is_sent_struct = (df.SENT_STRUCT_START_SYMB in k) is_sent_struct = ('s)/MX1' in k) # is_sent_struct = (re.search(r'it is as if', k) is not None) # is_sent_struct = (df.ENDING_WITH.search(k) is not None) return is_sent_struct temp_dict={} temp_set = list(filter(isSentStruct, self.items())) # temp_set = [(x, y) for (x, y) in self.items() if df.SENT_STRUCT_START_SYMB in x] # temp_set = [(x, y) for (x, y) in self.items() if '${`' in x] temp_list = sorted(temp_set, key=lambda x: len(x[0]), reverse=True) for key, value in temp_set: value = self.replaceTranRef(value) key_pattern = cm.creatSentRecogniserPattern(key) dict_sl_mm, dict_sl_word_list = cm.createSentRecogniserRecord(key) dict_tl_mm, dict_tl_word_list = cm.createSentRecogniserRecord(value) entry = {key_pattern: (key, dict_sl_word_list, dict_sl_mm, value, dict_tl_word_list, dict_tl_mm)} temp_dict.update(entry) temp_dict = OrderedDict(sorted(list(temp_dict.items()), key=lambda x: len(x[0]), reverse=True)) # print_list = [(x, y[0]) for (x, y) in temp_dict.items()] # pp(print_list, width=200) self.sentence_struct_dict = NoCaseDict(temp_dict)
def setupRecords(self): fname = INP.currentframe().f_code.co_name dict_tl_list = None try: if not self.dict_sl_rec: self.dict_sl_rec, self.dict_sl_wordlist = cm.createSentRecogniserRecord( self.dict_sl_txt) if not self.recog_pattern: self.recog_pattern = re.compile(cm.formPattern( self.dict_sl_wordlist), flags=re.I) if not self.dict_tl_rec: self.dict_tl_rec, dict_tl_list = cm.createSentRecogniserRecord( self.dict_tl_txt) if not self.dict_tl_wordlist: self.dict_tl_wordlist = self.dict_tl_rec.getSubEntriesAsList() sent_tl_list = CP.deepcopy(self.dict_tl_wordlist) self.sent_tl_rec = CP.copy(self.dict_tl_rec) self.sent_tl_rec.clear() self.sent_tl_rec.update(sent_tl_list) except Exception as e: # df.LOG(f'{e}', error=True) self.is_sent_struct = False self.is_sent_struct = bool(self.recog_pattern) self.setupSentSLRecord()