def __init__(self, config): self.config = config self._path = config.data_dir[0] self.max_utt_len = config.max_utt_len self.tokenize = get_chat_tokenize() self.train_corpus = self._read_file(self._path, 'Train') self.valid_corpus = self._read_file(self._path, 'Valid') self.test_corpus = self._read_file(self._path, 'Test') self._extract_vocab() print("Done loading corpus")
def __init__(self, config): self.config = config self._path = config.data_dir[0] self.max_utt_len = config.max_utt_len self.tokenize = get_chat_tokenize() self.train_corpus = self._read_file(os.path.join(self._path, 'kvret_train_public.json')) self.valid_corpus = self._read_file(os.path.join(self._path, 'kvret_dev_public.json')) self.test_corpus = self._read_file(os.path.join(self._path, 'kvret_test_public.json')) self._build_vocab(config.max_vocab_cnt) print("Done loading corpus")
def __init__(self, config): self.config = config self.config = config self._path = config.data_dir[0] self.max_utt_len = config.max_utt_len self.tokenize = get_chat_tokenize() self.train_corpus = self._read_file( os.path.join(self._path, 'train_sent_emo.csv')) # print(self.train_corpus[:3]) self.valid_corpus = self._read_file( os.path.join(self._path, 'dev_sent_emo.csv')) self.test_corpus = self._read_file( os.path.join(self._path, 'test_sent_emo.csv')) self._build_vocab(config.max_vocab_cnt) print("Done loading corpus")
def __init__(self, corpus, config, action2name): super(DialogExchanger, self).__init__() self.config = config self.vocab = corpus.vocab self.vocab_dict = corpus.vocab_dict self.rev_vocab = corpus.rev_vocab self.unk_id = corpus.unk_id self.vocab_size = len(self.vocab) self.tokenize = get_chat_tokenize() self.backward_size = config.backward_size # self.name2action = revert_action_dict(action2name) # actionencoder returns the corresponsing latent action ids given natural languages # self.action_encoder = ActionEncoder(corpus, config, self.name2action) self.max_utt_size = config.max_utt_len self.unk_word = {'PLACEHOLDE'} self.gamma = 0.95
def __init__(self, config): self.config = config self._path = config.data_dir[0] self.max_utt_len = config.max_utt_len self.tokenize = get_chat_tokenize() self.temp_corpus = self._read_file(self._path)