示例#1
0
class vertical_splitter:
    def __init__(self, messages):
        self.messages = messages
        self.wholeFieldInfer= WholeFieldTypeInfer(self.messages)

    def split_by_words_type(self, datas, T_max_range):
        fields_set = []
        w_infer = word_infer()
        w_merger = base_merger()
        w_convert = Converter()
        b_analyzer = base_analyzer()
        for i in range(T_max_range):
            lo_datas = get_data_bylo(datas, i)
            w_cnt = w_convert.convert_raw_to_count(lo_datas)
            w_frequent = b_analyzer.convert_num_to_frequent(w_cnt)
            w_type = w_infer.is_const_word(w_frequent, 0.95)
            if w_type:
                t_field = loc_field((i,i), 0)
            else:
                t_field = loc_field((i,i), 4)
            fields_set.append(t_field)
        words_f = w_merger.merge_words(fields_set)
        candidate_borders = [w.loc[0] for w in words_f]
        return words_f, candidate_borders

    def splitWordSimple(self, word):
        if word[1] - word[0] == 1:
            return word, None
        else:
            j = word[0] + 1
            tLo = -1
            while(j < word[1]):
                if (self.wholeFieldInfer.inferConst((word[0], j)) \
                    and not self.wholeFieldInfer.inferConst((j, word[1]))) \
                    or (self.wholeFieldInfer.inferConst((j, word[1])) and \
                        not self.wholeFieldInfer.inferConst((word[0], j))):
                    tLo = j
                j = j + 1
            wA = (word[0], tLo)
            wB = (tLo, word[1])
            if tLo != -1:
                return wA, wB
            else:
                return word, None

    def splitWordsSimple(self, words):
        i = 0
        while(i < len(words)):
            self.splitWordSimple(words[i])
            wOne, wTwo = self.splitWordSimple(words[i])
            if wTwo != None:
                words.remove(words[i])
                words.append(wOne)
                words.append(wTwo)
            words = sorted(words, key = lambda x:x[0])
            i = i + 1
        return words
示例#2
0
class IcsFieldMerger(base_merger):
    def __init__(self, messages):
        super().__init__()
        self.wholeType = WholeFieldTypeInfer(messages)

    def mergeConstFields(self, words, messages):
        wordsType = []
        for word in words:
            if self.wholeType.inferConst(word):
                wordsType.append()
示例#3
0
class TestWholeField:
    def __init__(self, messages, locs):
        self.messages = messages
        self.locs = locs
        self.gFieldInfer = WholeFieldTypeInfer()

    def TestConst(self, lo):
        lodatas = []
        for message in self.messages:
            if len(message) > lo[-1]:
                lodatas.append(message[lo[0]:lo[1]])
        return self.gFieldInfer.inferConst(lodatas)
示例#4
0
class ReAjustLogic:
    def __init__(self, words, msgs):
        self.words = words
        self.msgs = msgs
        self.wholeTypeInfer = WholeFieldTypeInfer(self.msgs)

    def reSplit(self):
        self.words.sort(key=lambda word: word[0])
        t_len = len(self.words)
        i = 0
        while (i < t_len):
            t_idom = self.words[i]
            t_pre = t_idom[0]
            t_last = t_idom[1]
            t_middle = t_pre + 1
            if (t_idom[1] - t_idom[0] >= 2):
                if (((self.wholeTypeInfer.inferConst((t_pre, t_middle)))
                     and not (self.wholeTypeInfer.inferConst(
                         (t_middle, t_last))))
                        or ((self.wholeTypeInfer.inferConst(
                            (t_middle, t_last)))
                            and not (self.wholeTypeInfer.inferConst(
                                (t_pre, t_middle))))):
                    self.words.remove(t_idom)
                    self.words.append((t_pre, t_middle))
                    self.words.append((t_middle, t_last))
                    self.words.sort(key=lambda word: word[0])
                    t_len = t_len + 1
            i = i + 1

    def reCluster(self):
        t_len = len(self.words)
        i = 0
        while (i < t_len - 1):
            t_next = self.words[i + 1]
            t_now = self.words[i]
            if self.wholeTypeInfer.inferConst(
                (t_now)) and self.wholeTypeInfer.inferConst((t_next)):
                t_s = t_now[0]
                t_e = t_next[1]
                self.words.remove(t_now)
                self.words.remove(t_next)
                self.words.append((t_s, t_e))
                t_len = t_len - 1
                i = i - 1
            i = i + 1

    def reAjustBorders(self, words, messages):
        vSpliter = vertical_splitter(messages)
        words = vSpliter.splitWordsSimple(words)
        Nodes = []
        typeInfer = WholeFieldTypeInfer(messages)
        mgerItoms = base_merger()
        for word in words:
            if typeInfer.inferConst(word):
                tNode = node(loc=word, wType=1)
            else:
                tNode = node(loc=word, wType=6)
            Nodes.append(tNode)
        return mgerItoms.merge_words(Nodes)
示例#5
0
 def reAjustBorders(self, words, messages):
     vSpliter = vertical_splitter(messages)
     words = vSpliter.splitWordsSimple(words)
     Nodes = []
     typeInfer = WholeFieldTypeInfer(messages)
     mgerItoms = base_merger()
     for word in words:
         if typeInfer.inferConst(word):
             tNode = node(loc=word, wType=1)
         else:
             tNode = node(loc=word, wType=6)
         Nodes.append(tNode)
     return mgerItoms.merge_words(Nodes)