class vertical_splitter: def __init__(self, messages): self.messages = messages self.wholeFieldInfer= WholeFieldTypeInfer(self.messages) def split_by_words_type(self, datas, T_max_range): fields_set = [] w_infer = word_infer() w_merger = base_merger() w_convert = Converter() b_analyzer = base_analyzer() for i in range(T_max_range): lo_datas = get_data_bylo(datas, i) w_cnt = w_convert.convert_raw_to_count(lo_datas) w_frequent = b_analyzer.convert_num_to_frequent(w_cnt) w_type = w_infer.is_const_word(w_frequent, 0.95) if w_type: t_field = loc_field((i,i), 0) else: t_field = loc_field((i,i), 4) fields_set.append(t_field) words_f = w_merger.merge_words(fields_set) candidate_borders = [w.loc[0] for w in words_f] return words_f, candidate_borders def splitWordSimple(self, word): if word[1] - word[0] == 1: return word, None else: j = word[0] + 1 tLo = -1 while(j < word[1]): if (self.wholeFieldInfer.inferConst((word[0], j)) \ and not self.wholeFieldInfer.inferConst((j, word[1]))) \ or (self.wholeFieldInfer.inferConst((j, word[1])) and \ not self.wholeFieldInfer.inferConst((word[0], j))): tLo = j j = j + 1 wA = (word[0], tLo) wB = (tLo, word[1]) if tLo != -1: return wA, wB else: return word, None def splitWordsSimple(self, words): i = 0 while(i < len(words)): self.splitWordSimple(words[i]) wOne, wTwo = self.splitWordSimple(words[i]) if wTwo != None: words.remove(words[i]) words.append(wOne) words.append(wTwo) words = sorted(words, key = lambda x:x[0]) i = i + 1 return words
class IcsFieldMerger(base_merger): def __init__(self, messages): super().__init__() self.wholeType = WholeFieldTypeInfer(messages) def mergeConstFields(self, words, messages): wordsType = [] for word in words: if self.wholeType.inferConst(word): wordsType.append()
class TestWholeField: def __init__(self, messages, locs): self.messages = messages self.locs = locs self.gFieldInfer = WholeFieldTypeInfer() def TestConst(self, lo): lodatas = [] for message in self.messages: if len(message) > lo[-1]: lodatas.append(message[lo[0]:lo[1]]) return self.gFieldInfer.inferConst(lodatas)
class ReAjustLogic: def __init__(self, words, msgs): self.words = words self.msgs = msgs self.wholeTypeInfer = WholeFieldTypeInfer(self.msgs) def reSplit(self): self.words.sort(key=lambda word: word[0]) t_len = len(self.words) i = 0 while (i < t_len): t_idom = self.words[i] t_pre = t_idom[0] t_last = t_idom[1] t_middle = t_pre + 1 if (t_idom[1] - t_idom[0] >= 2): if (((self.wholeTypeInfer.inferConst((t_pre, t_middle))) and not (self.wholeTypeInfer.inferConst( (t_middle, t_last)))) or ((self.wholeTypeInfer.inferConst( (t_middle, t_last))) and not (self.wholeTypeInfer.inferConst( (t_pre, t_middle))))): self.words.remove(t_idom) self.words.append((t_pre, t_middle)) self.words.append((t_middle, t_last)) self.words.sort(key=lambda word: word[0]) t_len = t_len + 1 i = i + 1 def reCluster(self): t_len = len(self.words) i = 0 while (i < t_len - 1): t_next = self.words[i + 1] t_now = self.words[i] if self.wholeTypeInfer.inferConst( (t_now)) and self.wholeTypeInfer.inferConst((t_next)): t_s = t_now[0] t_e = t_next[1] self.words.remove(t_now) self.words.remove(t_next) self.words.append((t_s, t_e)) t_len = t_len - 1 i = i - 1 i = i + 1 def reAjustBorders(self, words, messages): vSpliter = vertical_splitter(messages) words = vSpliter.splitWordsSimple(words) Nodes = [] typeInfer = WholeFieldTypeInfer(messages) mgerItoms = base_merger() for word in words: if typeInfer.inferConst(word): tNode = node(loc=word, wType=1) else: tNode = node(loc=word, wType=6) Nodes.append(tNode) return mgerItoms.merge_words(Nodes)
def reAjustBorders(self, words, messages): vSpliter = vertical_splitter(messages) words = vSpliter.splitWordsSimple(words) Nodes = [] typeInfer = WholeFieldTypeInfer(messages) mgerItoms = base_merger() for word in words: if typeInfer.inferConst(word): tNode = node(loc=word, wType=1) else: tNode = node(loc=word, wType=6) Nodes.append(tNode) return mgerItoms.merge_words(Nodes)