w2i[UNK] = 0 i2w[0] = UNK try: pre_embd_w = load_pickle(embedding_file) except: # embedding_matrix = word2vec.wv # pre_embd_w_full = torch.from_numpy(embedding_matrix).type(torch.FloatTensor) # save_pickle(pre_embd_w_full, 'pre_embd_w_full.pickle') print('saving full embedding matrix...') pre_embd_w = load_embd_weights(word2vec, len(w2i), args.embd_size, w2i) save_pickle(pre_embd_w, embedding_file) else: vocab = [] vocab = preload( fpath_train, vocab ) # only read training for vocab because OOV vocabrary should not know. # print(vocab) training_words = len(vocab) master_unk_words = [] try: with open('unknown_words.txt', 'r') as f: for line in f: line = re.sub('\n', '', line) master_unk_words.append(line) except: pass for word in master_unk_words: if word not in vocab:
print('entities', idx, ent_name, ent_vals[0]) assert args.task == 5 or args.task == 6, 'task must be 5 or 6' if args.task == 5: fpath_train = 'dialog-bAbI-tasks/dialog-babi-task5-full-dialogs-trn.txt' #fpath_train = 'pretrain_dialogs.txt' fpath_test = 'dialog-bAbI-tasks/dialog-babi-task5-full-dialogs-tst-OOV.txt' elif args.task == 6: # this is not working yet fpath_train = 'dialog-bAbI-tasks/dialog-babi-task6-dstc2-trn.txt' fpath_test = 'dialog-bAbI-tasks/dialog-babi-task6-dstc2-tst.txt' system_acts = [g.SILENT] vocab = [] # only read training vocabs because OOV vocabrary should not be contained vocab, system_acts = preload(fpath_train, vocab, system_acts) vocab = [g.UNK] + vocab w2i = dict((w, i) for i, w in enumerate(vocab)) i2w = dict((i, w) for i, w in enumerate(vocab)) train_data, system_acts = load_data_from_file(fpath_train, entities, w2i, system_acts) test_data, system_acts = load_data_from_file(fpath_test, entities, w2i, system_acts) print('vocab size:', len(vocab)) print('action size:', len(system_acts)) max_turn_train = max([len(d[0]) for d in train_data]) max_turn_test = max([len(d[0]) for d in test_data]) max_turn = max(max_turn_train, max_turn_test) print('max turn:', max_turn) act2i = dict((act, i) for i, act in enumerate(system_acts))
"token": config['token'] }) if response.status_code == 200: messages = json.loads(response.text).get('messages', []) if not messages: print('Room history is empty. Write first message') else: for message in messages: print(f"{message.get('sender')}: {message.get('text')}") elif response.status_code == 400: print(json.loads(response.text).get('message')) else: print('Error has occured. Try again') config = preload() def main(): fire.Fire({ "register": register, "login": login, "newroom": newroom, "subscribe": subscribe, "publish": publish, "room": room }) if __name__ == '__main__': main()
assert args.task == 5 or args.task == 6, 'task must be 5 or 6' if args.task == 5: fpath_train = 'dialog-bAbI-tasks/dialog-babi-task5-full-dialogs-trn.txt' fpath_test = 'dialog-bAbI-tasks/dialog-babi-task5-full-dialogs-tst-OOV.txt' elif args.task == 6: fpath_train = 'dialog-bAbI-tasks/dialog-babi-task6-dstc2-trn.txt' fpath_test = 'dialog-bAbI-tasks/dialog-babi-task6-dstc2-tst.txt' SILENT = '<SILENT>' UNK = '<UNK>' system_acts = [SILENT] vocab = [] vocab, system_acts = preload( fpath_train, vocab, system_acts ) # only read training for vocab because OOV vocabrary should not know. # print(vocab) w2i = dict((w, i) for i, w in enumerate(vocab, 1)) i2w = dict((i, w) for i, w in enumerate(vocab, 1)) w2i[UNK] = 0 i2w[0] = UNK train_data, system_acts = load_data(fpath_train, entities, w2i, system_acts) test_data, system_acts = load_data(fpath_test, entities, w2i, system_acts) print('vocab size:', len(vocab)) print('action size:', len(system_acts)) max_turn_train = max([len(d[0]) for d in train_data]) max_turn_test = max([len(d[0]) for d in test_data]) max_turn = max(max_turn_train, max_turn_test) print('max turn:', max_turn)