Пример #1
0
    def create_lookup_table(self, min_freq=1, tokens_file=None):
        if tokens_file is not None and os.path.exists(tokens_file):
            print('loading', tokens_file)
            lst = load_list(tokens_file)
            midpos = lst.index('<@@@>')
            itokens = TokenList(lst[:midpos])
            otokens = TokenList(lst[midpos + 1:])
            return itokens, otokens

        data = self.command_data
        wdicts = [{}, {}]
        for key, ss in data.items():
            for seq, wd in zip([ss[0]['source'], ss[0]['target']], wdicts):
                for w in parenthesis_split(seq, self.delimiter, self.lparen, self.rparen):
                    wd[w] = wd.get(w, 0) + 1
        wlists = []
        for wd in wdicts:
            wd = freq_dict_2_list(wd)
            wlist = [x for x, y in wd if y >= min_freq]
            wlists.append(wlist)
        i_tokens = TokenList(wlists[0])
        o_tokens = TokenList(wlists[1])

        if tokens_file is not None:
            store_list(wlists[0] + ['<@@@>'] + wlists[1], tokens_file)
        return i_tokens, o_tokens
Пример #2
0
 def get_source_sequence(self, command_index=None, command=None):
     if command_index is not None:
         sequence = self.command_data[self.command_names[command_index]][1]['source']
     elif command is not None:
         sequence = command
     else:
         raise_from(ValueError('provide a command_index or a command_filename. command_index takes precedence'),
                    None)
     xs = parenthesis_split(sequence, self.delimiter, self.lparen, self.rparen)
     return xs
Пример #3
0
 def load_target(self, command_index):
     xt = []
     sequence = self.command_data[
         self.command_names[command_index]][0]['target']
     xt.append(
         list(
             parenthesis_split(sequence, self.delimiter, self.lparen,
                               self.rparen)))
     target = pad_to_fixed(xt, self.o_tokens,
                           self.sequence_max_length).squeeze()
     return target
Пример #4
0
 def load_source(self, command_index):
     xs = []
     sequence = self.command_data[
         self.command_names[command_index]][0]['source']
     xs.append(
         list(
             parenthesis_split(sequence, self.delimiter, self.lparen,
                               self.rparen)))
     source = pad_to_fixed(xs, self.i_tokens,
                           self.sequence_max_length).squeeze()
     return source
Пример #5
0
 def source_length(self, source_index):
     command_id = self.command_names[source_index]
     seq = self.command_data[command_id][0]['source']
     seq_length = parenthesis_split(seq, self.delimiter, self.lparen, self.rparen)
     return seq_length