Пример #1
0
    def createSentenceStructureDict(self):
        def isSentStruct(item):
            (k, v) = item
            # is_sent_struct = (df.SENT_STRUCT_START_SYMB in k)
            is_sent_struct = ('s)/MX1' in k)
            # is_sent_struct = (re.search(r'it is as if', k) is not None)
            # is_sent_struct = (df.ENDING_WITH.search(k) is not None)
            return is_sent_struct

        temp_dict={}
        temp_set = list(filter(isSentStruct, self.items()))
        # temp_set = [(x, y) for (x, y) in self.items() if df.SENT_STRUCT_START_SYMB in x]
        # temp_set = [(x, y) for (x, y) in self.items() if '${`' in x]
        temp_list = sorted(temp_set, key=lambda x: len(x[0]), reverse=True)
        for key, value in temp_set:
            value = self.replaceTranRef(value)
            key_pattern = cm.creatSentRecogniserPattern(key)
            dict_sl_mm, dict_sl_word_list = cm.createSentRecogniserRecord(key)
            dict_tl_mm, dict_tl_word_list = cm.createSentRecogniserRecord(value)

            entry = {key_pattern: (key, dict_sl_word_list, dict_sl_mm, value, dict_tl_word_list, dict_tl_mm)}
            temp_dict.update(entry)
        temp_dict = OrderedDict(sorted(list(temp_dict.items()), key=lambda x: len(x[0]), reverse=True))
        # print_list = [(x, y[0]) for (x, y) in temp_dict.items()]
        # pp(print_list, width=200)
        self.sentence_struct_dict = NoCaseDict(temp_dict)
Пример #2
0
    def setupRecords(self):
        fname = INP.currentframe().f_code.co_name
        dict_tl_list = None
        try:
            if not self.dict_sl_rec:
                self.dict_sl_rec, self.dict_sl_wordlist = cm.createSentRecogniserRecord(
                    self.dict_sl_txt)

            if not self.recog_pattern:
                self.recog_pattern = re.compile(cm.formPattern(
                    self.dict_sl_wordlist),
                                                flags=re.I)

            if not self.dict_tl_rec:
                self.dict_tl_rec, dict_tl_list = cm.createSentRecogniserRecord(
                    self.dict_tl_txt)

            if not self.dict_tl_wordlist:
                self.dict_tl_wordlist = self.dict_tl_rec.getSubEntriesAsList()

            sent_tl_list = CP.deepcopy(self.dict_tl_wordlist)
            self.sent_tl_rec = CP.copy(self.dict_tl_rec)

            self.sent_tl_rec.clear()
            self.sent_tl_rec.update(sent_tl_list)
        except Exception as e:
            # df.LOG(f'{e}', error=True)
            self.is_sent_struct = False
        self.is_sent_struct = bool(self.recog_pattern)

        self.setupSentSLRecord()