示例#1
0
def test():
    provider = DataProvider()
    model, optimizer = get_model_and_optimizer()
    criterion = nn.CrossEntropyLoss()

    for docs, quests, begin_idxs, end_idxs in provider.dev_batch(batch_size=batch_size):

        if torch.cuda.is_available():
            docs = docs.cuda()
            quests = quests.cuda()
            begin_idxs = begin_idxs.cuda()
            end_idxs = end_idxs.cuda()

        model.zero_grad()


        begin_idxs_out, end_idxs_out = model(docs, quests) 

        # TODO How to calculate accuracy?
        begin_idxs_diff = torch.mean(torch.abs(torch.argmax(begin_idxs_out, dim=1) - begin_idxs).double())
        end_idxs_diff = torch.mean(torch.abs(torch.argmax(end_idxs_out, dim=1) - end_idxs).double())

        loss = criterion(begin_idxs_out, begin_idxs) + criterion(end_idxs_out, end_idxs)
        print (f'Loss: {loss}')
        print (f'begin/end idx diff: {begin_idxs_diff}, {end_idxs_diff}')
示例#2
0
def submit():

    data_provider = DataProvider()
    model, _ = get_model_and_optimizer()

    for quests, docs, raw_docs, idx_maps, id in data_provider.test_batch():

        if torch.cuda.is_available():
            quests = quests.cuda()
            docs = docs.cuda()

        begin_idxs_out, end_idxs_out = model(docs, quests)

        begin_idxs = torch.argmax(begin_idxs_out, dim=1).tolist()
        end_idxs = torch.argmax(end_idxs_out, dim=1).tolist()

        answers = []
        for d, bi, ei, idx_map in zip(raw_docs, begin_idxs, end_idxs,
                                      idx_maps):
            raw_ans = d[idx_map[bi]:idx_map[ei] + 1]
            print(f'doc: {d}')
            print(f'answer: {raw_ans}')
            answers.append(raw_ans)
            input()

        # Construct json
        pred = {}
        pred['yesno_answers'] = []
        pred['question'] = raw_question
        pred['question_type'] = raw_json
示例#3
0
def train(epochs):

    e = 0
    cnt = 0

    provider = DataProvider()
    model, optimizer = get_model_and_optimizer()
    criterion = nn.CrossEntropyLoss()

    try:
        checkpoint = torch.load(checkpoint_path)
        e = checkpoint['epoch']
    except:
        print('No checkpoint found.')

    while e < epochs:

        print(f'Epoch: {e}')

        for docs, quests, begin_idxs, end_idxs, in provider.train_batch(
                batch_size=batch_size):

            if torch.cuda.is_available():
                docs = docs.cuda()
                quests = quests.cuda()
                begin_idxs = begin_idxs.cuda()
                end_idxs = end_idxs.cuda()

            model.zero_grad()

            print(f'docs len: {docs.shape[1]} ', end='')

            try:
                begin_idxs_out, end_idxs_out = model(docs, quests)
                loss = criterion(begin_idxs_out, begin_idxs) + criterion(
                    end_idxs_out, end_idxs)
                loss.backward()
                optimizer.step()
            except:
                print('Error when feed into model.')
                continue

            print(f'Loss: {loss}')
            # FIXME
            with open('loss.log', 'a') as f:
                f.write(f'{loss}\n')

            cnt += 1
            if cnt == save_per_steps:
                cnt = 0
                torch.save(
                    {
                        'epoch': e,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                    }, checkpoint_path)
                print(f'Save model at epoch {e}')

        e += 1
示例#4
0
def test():

    new_ref = True

    if os.path.exists(ref_file):
        print('Find old ref file. Want to create a new one? (y/n)', end='')
        a = input()
        if a == 'y':
            os.remove(ref_file)
            print('Remove old ref file')
        elif a == 'n':
            new_ref = False
        else:
            print('Invalid input')
            return

    if os.path.exists(pred_file):
        os.remove(pred_file)
        print('Remove old pred file.')

    data_provider = DataProvider()
    model, _ = get_model_and_optimizer()
    yesorno_model, _ = get_model_and_optimizer(yesorno_checkpoint_path)

    for docs, quests, begin_idxs, end_idxs, raw_docs, idx_maps, raw_datas in data_provider.dev_batch(
            batch_size=2, get_raw=True):

        if torch.cuda.is_available():
            quests = quests.cuda()
            docs = docs.cuda()

        # print (docs.shape, quests.shape)

        begin_idxs_out, end_idxs_out = model(docs, quests)

        begin_idxs_pred = torch.argmax(begin_idxs_out, dim=1).tolist()
        end_idxs_pred = torch.argmax(end_idxs_out, dim=1).tolist()

        begin_idxs_yn_pred, end_idxs_yn_pred = None, None

        answer_preds = []

        if use_yesorno_model:
            begin_idxs_yn_out, end_idxs_yn_out = yesorno_model(docs, quests)

            begin_idxs_yn_pred = torch.argmax(begin_idxs_yn_out,
                                              dim=1).tolist()
            end_idxs_yn_pred = torch.argmax(end_idxs_yn_out, dim=1).tolist()

            for d, bi, ei, bi_yn, ei_yn, idx_map, raw_data in zip(
                    raw_docs, begin_idxs_pred, end_idxs_pred,
                    begin_idxs_yn_pred, end_idxs_yn_pred, idx_maps, raw_datas):
                raw_ans = ''
                # use another model to answer yes or no questions
                if raw_data['question_type'] == 'YES_NO':
                    raw_ans = d[idx_map[bi_yn]:idx_map[ei_yn] + 1]
                else:
                    raw_ans = d[idx_map[bi]:idx_map[ei] + 1]
                answer_preds.append(raw_ans)

        else:
            for d, bi, ei, idx_map, raw_data in zip(raw_docs, begin_idxs_pred,
                                                    end_idxs_pred, idx_maps,
                                                    raw_datas):
                raw_ans = ''
                raw_ans = d[idx_map[bi]:idx_map[ei] + 1]
                answer_preds.append(raw_ans)

        # TODO select the best answer according to softmax

        for answer, data in zip(answer_preds, raw_datas):
            # Construct pred.json
            if answer == '': continue

            pred = {}
            pred['yesno_answers'] = []
            pred['question'] = data['question']
            pred['question_type'] = data['question_type']
            pred['answers'] = [answer]
            pred['question_id'] = data['question_id']
            pred_s = json.dumps(pred, ensure_ascii=False)

            with open(pred_file, 'a') as f:
                f.write(pred_s + '\n')

            # Construct ref.json
            if new_ref:
                ref = {}
                ref['yesno_answers'] = []
                ref['entity_answers'] = [[]]
                ref['source'] = 'search'
                ref['question'] = data['question']
                ref['question_type'] = data['question_type']
                ref['answers'] = data['answers']
                ref['question_id'] = data['question_id']
                ref_s = json.dumps(ref, ensure_ascii=False)

                with open(ref_file, 'a') as f:
                    f.write(ref_s + '\n')
示例#5
0
learning_rate = 0.01
embedding_dim = 256
hidden_size = 128
num_layers = 1
num_epochs = 10000
train_size = 0.8
batch_size = 128
save_per_num_steps = 20

included_extensions = ['.json']
# files = [directory + '/' + fn for fn in os.listdir(directory) if any(fn.endswith(ext) for ext in included_extensions)]
files = ['raw_data/out.json']
random.shuffle(files)

print('Preparing data...')
provider = DataProvider(files, batch_size=batch_size, padding_value=0)

vocab = provider.vocab
vocab_size = len(vocab)

print('Vocab size: ', vocab_size)

model = CharRNN(vocab_size=vocab_size,
                target_size=vocab_size,
                embedding_dim=embedding_dim,
                hidden_size=hidden_size,
                num_layers=num_layers)
criterion = nn.NLLLoss(ignore_index=vocab.padding_idx)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
# optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)
示例#6
0
def submit():

    if os.path.exists(submit_file):
        print ('Find old submit file. Want to create a new one? (y/n) ', end='')
        a = input ()
        if a == 'y':
            os.remove(submit_file)
            print ('Remove old submit file')
        elif a == 'n': 
            new_ref = False
        else:
            print ('Invalid input')
            return 
 

    data_provider = DataProvider()
    model, _ = get_model_and_optimizer()

    yesorno_model = None
    if use_yesorno_model:
        yesorno_model, _ = get_model_and_optimizer(yesorno_checkpoint_path)

    cnt = 0
    # TODO Actually test_batch batch size is 1
    for quests, docs, raw_docs, idx_maps, raw_data in data_provider.test_batch():
    
        if torch.cuda.is_available():
            quests = quests.cuda()
            docs = docs.cuda()

        begin_idxs_out, end_idxs_out = None, None
        if use_yesorno_model and raw_data['question_type'] == 'YES_NO':
            begin_idxs_out, end_idxs_out = yesorno_model(docs, quests) 
        else:
            begin_idxs_out, end_idxs_out = model(docs, quests) 
        
        begin_idxs = torch.argmax(begin_idxs_out, dim=1).tolist()
        end_idxs = torch.argmax(end_idxs_out, dim=1).tolist()

        answers = []
        for d, bi, ei, idx_map in zip(raw_docs, begin_idxs, end_idxs, idx_maps):
            raw_ans = d[idx_map[bi]: idx_map[ei] + 1]
            # print (f'doc: {d}')
            # print (f'answer: {raw_ans}')
            # input ()
            answers.append(raw_ans)

        # TODO select the best answer according to softmax
        # Construct json 
        pred = {}
        pred['yesno_answers'] = []
        pred['question'] = raw_data['question']
        pred['question_type'] = raw_data['question_type']
        pred['answers'] = answers
        pred['question_id'] = raw_data['question_id']
        pred_s = json.dumps(pred, ensure_ascii=False)

        with open(submit_file, 'a') as f:
            f.write(pred_s + '\n')

            cnt += 1
            print (f'\r {cnt} ', end='')
示例#7
0
sigmoid_logits = tf.squeeze(tf.nn.sigmoid(logits), -1)
thresholded_logits = tf.cast(tf.cast(sigmoid_logits + 0.5, tf.uint8),
                             tf.float32)

loss = tf.losses.sigmoid_cross_entropy(label_placeholder, logits)
accuracy = tf.reduce_sum(
    tf.cast(tf.equal(thresholded_logits, label_placeholder),
            tf.float32)) / batch_size

tf.summary.scalar('loss', loss)
tf.summary.scalar('accuracy', accuracy)

# optimizer
train_op = tf.train.AdamOptimizer(eta).minimize(loss)

data_provider = DataProvider(batch_size, [1, 5])
full_num_batches = data_provider.num_batches()

if not os.path.isdir('summaries'):
    os.mkdir('summaries')

merged = tf.summary.merge_all()

with tf.Session() as sess:
    train_writer = tf.summary.FileWriter('summaries/model', sess.graph)
    sess.run(tf.global_variables_initializer())

    # full training
    for epoch in range(num_epochs):
        for batch in range(full_num_batches):
            data, labels = data_provider.get_full_data()