Пример #1
0
    w2i[UNK] = 0
    i2w[0] = UNK

    try:
        pre_embd_w = load_pickle(embedding_file)
    except:
        # embedding_matrix = word2vec.wv
        # pre_embd_w_full = torch.from_numpy(embedding_matrix).type(torch.FloatTensor)
        # save_pickle(pre_embd_w_full, 'pre_embd_w_full.pickle')
        print('saving full embedding matrix...')
        pre_embd_w = load_embd_weights(word2vec, len(w2i), args.embd_size, w2i)
        save_pickle(pre_embd_w, embedding_file)
else:
    vocab = []
    vocab = preload(
        fpath_train, vocab
    )  # only read training for vocab because OOV vocabrary should not know.
    # print(vocab)
    training_words = len(vocab)

    master_unk_words = []
    try:
        with open('unknown_words.txt', 'r') as f:
            for line in f:
                line = re.sub('\n', '', line)
                master_unk_words.append(line)
    except:
        pass

    for word in master_unk_words:
        if word not in vocab:
Пример #2
0
    print('entities', idx, ent_name, ent_vals[0])

assert args.task == 5 or args.task == 6, 'task must be 5 or 6'
if args.task == 5:
    fpath_train = 'dialog-bAbI-tasks/dialog-babi-task5-full-dialogs-trn.txt'
    #fpath_train = 'pretrain_dialogs.txt'
    fpath_test = 'dialog-bAbI-tasks/dialog-babi-task5-full-dialogs-tst-OOV.txt'
elif args.task == 6:  # this is not working yet
    fpath_train = 'dialog-bAbI-tasks/dialog-babi-task6-dstc2-trn.txt'
    fpath_test = 'dialog-bAbI-tasks/dialog-babi-task6-dstc2-tst.txt'

system_acts = [g.SILENT]

vocab = []
# only read training vocabs because OOV vocabrary should not be contained
vocab, system_acts = preload(fpath_train, vocab, system_acts)
vocab = [g.UNK] + vocab
w2i = dict((w, i) for i, w in enumerate(vocab))
i2w = dict((i, w) for i, w in enumerate(vocab))
train_data, system_acts = load_data_from_file(fpath_train, entities, w2i,
                                              system_acts)
test_data, system_acts = load_data_from_file(fpath_test, entities, w2i,
                                             system_acts)
print('vocab size:', len(vocab))
print('action size:', len(system_acts))

max_turn_train = max([len(d[0]) for d in train_data])
max_turn_test = max([len(d[0]) for d in test_data])
max_turn = max(max_turn_train, max_turn_test)
print('max turn:', max_turn)
act2i = dict((act, i) for i, act in enumerate(system_acts))
Пример #3
0
                                 "token": config['token']
                             })
    if response.status_code == 200:
        messages = json.loads(response.text).get('messages', [])
        if not messages:
            print('Room history is empty. Write first message')
        else:
            for message in messages:
                print(f"{message.get('sender')}: {message.get('text')}")
    elif response.status_code == 400:
        print(json.loads(response.text).get('message'))
    else:
        print('Error has occured. Try again')


config = preload()


def main():
    fire.Fire({
        "register": register,
        "login": login,
        "newroom": newroom,
        "subscribe": subscribe,
        "publish": publish,
        "room": room
    })


if __name__ == '__main__':
    main()
Пример #4
0
assert args.task == 5 or args.task == 6, 'task must be 5 or 6'
if args.task == 5:
    fpath_train = 'dialog-bAbI-tasks/dialog-babi-task5-full-dialogs-trn.txt'
    fpath_test = 'dialog-bAbI-tasks/dialog-babi-task5-full-dialogs-tst-OOV.txt'
elif args.task == 6:
    fpath_train = 'dialog-bAbI-tasks/dialog-babi-task6-dstc2-trn.txt'
    fpath_test = 'dialog-bAbI-tasks/dialog-babi-task6-dstc2-tst.txt'

SILENT = '<SILENT>'
UNK = '<UNK>'
system_acts = [SILENT]

vocab = []
vocab, system_acts = preload(
    fpath_train, vocab, system_acts
)  # only read training for vocab because OOV vocabrary should not know.
# print(vocab)
w2i = dict((w, i) for i, w in enumerate(vocab, 1))
i2w = dict((i, w) for i, w in enumerate(vocab, 1))
w2i[UNK] = 0
i2w[0] = UNK
train_data, system_acts = load_data(fpath_train, entities, w2i, system_acts)
test_data, system_acts = load_data(fpath_test, entities, w2i, system_acts)
print('vocab size:', len(vocab))
print('action size:', len(system_acts))

max_turn_train = max([len(d[0]) for d in train_data])
max_turn_test = max([len(d[0]) for d in test_data])
max_turn = max(max_turn_train, max_turn_test)
print('max turn:', max_turn)