Пример #1
0
 def __init__(self, config):
     self.config = config
     self._path = config.data_dir[0]
     self.max_utt_len = config.max_utt_len
     self.tokenize = get_chat_tokenize()
     self.train_corpus = self._read_file(self._path, 'Train')
     self.valid_corpus = self._read_file(self._path, 'Valid')
     self.test_corpus = self._read_file(self._path, 'Test')
     self._extract_vocab()
     print("Done loading corpus")
Пример #2
0
 def __init__(self, config):
     self.config = config
     self._path = config.data_dir[0]
     self.max_utt_len = config.max_utt_len
     self.tokenize = get_chat_tokenize()
     self.train_corpus = self._read_file(os.path.join(self._path, 'kvret_train_public.json'))
     self.valid_corpus = self._read_file(os.path.join(self._path, 'kvret_dev_public.json'))
     self.test_corpus = self._read_file(os.path.join(self._path, 'kvret_test_public.json'))
     self._build_vocab(config.max_vocab_cnt)
     print("Done loading corpus")
Пример #3
0
 def __init__(self, config):
     self.config = config
     self.config = config
     self._path = config.data_dir[0]
     self.max_utt_len = config.max_utt_len
     self.tokenize = get_chat_tokenize()
     self.train_corpus = self._read_file(
         os.path.join(self._path, 'train_sent_emo.csv'))
     # print(self.train_corpus[:3])
     self.valid_corpus = self._read_file(
         os.path.join(self._path, 'dev_sent_emo.csv'))
     self.test_corpus = self._read_file(
         os.path.join(self._path, 'test_sent_emo.csv'))
     self._build_vocab(config.max_vocab_cnt)
     print("Done loading corpus")
Пример #4
0
    def __init__(self, corpus, config, action2name):
        super(DialogExchanger, self).__init__()
        self.config = config
        self.vocab = corpus.vocab
        self.vocab_dict = corpus.vocab_dict
        self.rev_vocab = corpus.rev_vocab
        self.unk_id = corpus.unk_id
        self.vocab_size = len(self.vocab)
        self.tokenize = get_chat_tokenize()
        self.backward_size = config.backward_size
        # self.name2action = revert_action_dict(action2name)
        # actionencoder returns the corresponsing latent action ids given natural languages
        # self.action_encoder = ActionEncoder(corpus, config, self.name2action)

        self.max_utt_size = config.max_utt_len
        self.unk_word = {'PLACEHOLDE'}
        self.gamma = 0.95
Пример #5
0
 def __init__(self, config):
     self.config = config
     self._path = config.data_dir[0]
     self.max_utt_len = config.max_utt_len
     self.tokenize = get_chat_tokenize()
     self.temp_corpus = self._read_file(self._path)