Exemplo n.º 1
0
def dev(model):
    data_helper = DataHelper(mode='dev')
    total_pred = 0
    correct = 0
    accuracy = 0
    b_size = len(data_helper.label)
    print('*' * 100)
    print('dev set total:', b_size)
    loss_func = torch.nn.MSELoss(reduction='sum')  ##reduction='sum'
    loss_mae = torch.nn.L1Loss(reduction='sum')

    iter = 0
    total_loss = 0
    for content, label, _ in data_helper.batch_iter(batch_size=b_size,
                                                    num_epoch=1):
        iter += 1
        model.eval()
        ##need modify the regression task will minimize the mse error
        pred = model(content)
        pred_sq = torch.squeeze(pred, 1)
        loss = loss_func(pred_sq.cpu().data, label.cpu())
        #------------------------------------------------#
        error = loss_mae(pred_sq.cpu().data, label.cpu())
        # error = mean_absolute_error(pred_sq.cpu().data, label.cpu())
        accuracy += error
        total_pred = len(label)

    total_pred = float(total_pred)
    accuracy = float(accuracy)

    #return the overall accuracy
    return (accuracy / total_pred), (float(loss) / total_pred)
Exemplo n.º 2
0
def dev(model, dataset):
    data_helper = DataHelper(dataset, mode='dev')

    total_pred = 0
    correct = 0
    iter = 0
    for content, label, _ in data_helper.batch_iter(batch_size=64, num_epoch=1):
        iter += 1
        model.eval()

        logits = model(content)
        pred = torch.argmax(logits, dim=1)

        correct_pred = torch.sum(pred == label)

        correct += correct_pred
        total_pred += len(content)

    total_pred = float(total_pred)
    correct = correct.float()
    # print(torch.div(correct, total_pred))
    return torch.div(correct, total_pred)
Exemplo n.º 3
0
def test(model):
    model_name = 'temp_model'
    model = torch.load(os.path.join('.', model_name + '.pkl'))
    data_helper = DataHelper(mode='test')
    b_size = len(data_helper.label)
    print('test set total:', b_size)

    loss_func = torch.nn.MSELoss(reduction='sum')
    total_pred = 0
    correct = 0
    iter = 0
    accuracy = 0
    pre_score = []
    score = []
    for content, label, _ in data_helper.batch_iter(batch_size=b_size,
                                                    num_epoch=1):
        iter += 1
        model.eval()
        pred = model(content)
        pred_sq = torch.squeeze(pred, 1)
        loss = loss_func(pred_sq, label.float())
        error = mean_absolute_error(pred_sq.cpu().data,
                                    label.cpu(),
                                    multioutput='raw_values')
        accuracy += error
        total_pred = len(label)
        pre_score.append(pred_sq.cpu().data)
        score.append(label.cpu())

    total_pred = float(total_pred)
    accuracy = float(accuracy)
    print(torch.div(accuracy, total_pred))
    _ = result_pull(pre_score, score)
    # print('iter is:', iter)
    print('pred result:%.2f, true score:%d' % (pre_score, score))

    return torch.div(accuracy, total_pred)
Exemplo n.º 4
0
def test(model_name, dataset):
    model = torch.load(os.path.join('.', model_name + '.pkl'))

    data_helper = DataHelper(dataset, mode='test')

    total_pred = 0
    correct = 0
    iter = 0
    for content, label, _ in data_helper.batch_iter(batch_size=64, num_epoch=1):
        iter += 1
        model.eval()

        logits = model(content)
        pred = torch.argmax(logits, dim=1)

        correct_pred = torch.sum(pred == label)

        correct += correct_pred
        total_pred += len(content)

    total_pred = float(total_pred)
    correct = correct.float()
    # print(torch.div(correct, total_pred))
    return torch.div(correct, total_pred).to('cpu')
Exemplo n.º 5
0
def train(ngram, name, bar, drop_out, dataset, is_cuda=False, edges=True):
    print('load data helper.')
    data_helper = DataHelper(dataset, mode='train')
    if os.path.exists(os.path.join('.', name+'.pkl')) and name != 'temp_model':
        print('load model from file.')
        model = torch.load(os.path.join('.', name+'.pkl'))
    else:
        print('new model.')
        if name == 'temp_model':
            name = 'temp_model_%s' % dataset
        # edges_num, edges_matrix = edges_mapping(len(data_helper.vocab), data_helper.content, ngram)
        edges_weights, edges_mappings, count = cal_PMI(dataset=dataset)
        
        model = Model(class_num=len(data_helper.labels_str), hidden_size_node=200,
                      vocab=data_helper.vocab, n_gram=ngram, drop_out=drop_out, edges_matrix=edges_mappings, edges_num=count,
                      trainable_edges=edges, pmi=edges_weights, cuda=is_cuda)

    print(model)
    if is_cuda:
        print('cuda')
        model.cuda()
    loss_func = torch.nn.CrossEntropyLoss()

    optim = torch.optim.Adam(model.parameters(), weight_decay=1e-6)

    iter = 0
    if bar:
        pbar = tqdm.tqdm(total=NUM_ITER_EVAL)
    best_acc = 0.0
    last_best_epoch = 0
    start_time = time.time()
    total_loss = 0.0
    total_correct = 0
    total = 0
    for content, label, epoch in data_helper.batch_iter(batch_size=32, num_epoch=200):
        improved = ''
        model.train()

        logits = model(content)
        loss = loss_func(logits, label)

        pred = torch.argmax(logits, dim=1)

        correct = torch.sum(pred == label)

        total_correct += correct
        total += len(label)

        total_loss += loss.item()

        optim.zero_grad()
        loss.backward()
        optim.step()

        iter += 1
        if bar:
            pbar.update()
        if iter % NUM_ITER_EVAL == 0:
            if bar:
                pbar.close()

            val_acc = dev(model, dataset=dataset)
            if val_acc > best_acc:
                best_acc = val_acc
                last_best_epoch = epoch
                improved = '*'

                torch.save(model, name + '.pkl')

            if epoch - last_best_epoch >= EARLY_STOP_EPOCH:
                return name
            msg = 'Epoch: {0:>6} Iter: {1:>6}, Train Loss: {5:>7.2}, Train Acc: {6:>7.2%}' \
                  + 'Val Acc: {2:>7.2%}, Time: {3}{4}' \
                  # + ' Time: {5} {6}'

            print(msg.format(epoch, iter, val_acc, get_time_dif(start_time), improved, total_loss/ NUM_ITER_EVAL,
                             float(total_correct) / float(total)))

            total_loss = 0.0
            total_correct = 0
            total = 0
            if bar:
                pbar = tqdm.tqdm(total=NUM_ITER_EVAL)

    return name
Exemplo n.º 6
0
            Model.input_y: y_batch,
            Model.dropout_keep_prob: 1.0,
            Model.b_size: len(y_batch)
        }

        step, loss, accuracy = sess.run(
            [global_step, Model.cost, Model.accuracy], feed_dict)
        time_str = datetime.datetime.now().isoformat()
        print("DEV {}: step {}, loss {:g}, acc {:g}".format(
            time_str, step, loss, accuracy))
        summary_op_out = sess.run(train_summary_op, feed_dict=feed_dict)
        train_summary_writer.add_summary(summary_op_out, step)
        return accuracy

    # Generate batches
    batches = datahelper.batch_iter(list(zip(train_set[0], train_set[1])),
                                    FLAGS.batch_size, FLAGS.num_epochs)

    ptr = 0
    max_validation_acc = 0.0
    for nn in range(sum_no_of_batches * FLAGS.num_epochs):
        batch = batches.__next__()
        if len(batch) < 1:
            continue
        x1_batch, y_batch = zip(*batch)
        if len(y_batch) < 1:
            continue
        train_step(x1_batch, y_batch)
        current_step = tf.train.global_step(sess, global_step)
        sum_acc = 0.0
        if current_step % FLAGS.evaluate_every == 0:
            print("\nEvaluation:")
Exemplo n.º 7
0
def train(ngram, name, bar, drop_out, dataset, is_cuda=False, edges=False):
    plotter = LossAccPlotter(title="This is an example plot",
                             save_to_filepath="/tmp/my_plot.png",
                             show_regressions=True,
                             show_averages=True,
                             show_loss_plot=True,
                             show_acc_plot=True,
                             show_plot_window=False,
                             x_label="Epoch")

    print('load data helper.')
    data_helper = DataHelper(mode='train')
    b_size = len(data_helper.label)
    print('*' * 100)
    print('train set total:', b_size)

    if os.path.exists(os.path.join('.',
                                   name + '.pkl')) and name != 'temp_model':
        print('load model from file')
        model = torch.load(os.path.join('.', name + '.pkl'))

    else:
        print('new model')
        if name == 'temp_model':
            name == 'temp_model'

        edges_weights, edges_mappings, count = cal_PMI()

        ## -----------------************************** import the datahelper class to get the vocab-5 doc*****************------------------------

        ## class_num = len(data_helper.labels_str) is changed, consider just a score
        model = Model(class_num=data_helper.labels_str,
                      hidden_size_node=200,
                      vocab=data_helper.vocab,
                      n_gram=ngram,
                      drop_out=drop_out,
                      edges_matrix=edges_mappings,
                      edges_num=count,
                      trainable_edges=edges,
                      pmi=edges_weights,
                      cuda=is_cuda)
        ### --------------------------------------- ###
        print(model)

        if is_cuda:
            print('cuda')
            model.cuda()
        loss_func = torch.nn.MSELoss()
        loss_mae = torch.nn.L1Loss(reduction='sum')
        optim = torch.optim.Adam(model.parameters(), weight_decay=1e-3)
        iter = 0
        if bar:
            pbar = tqdm.tqdm(total=NUM_ITER_EVAL)

        best_acc = 0.0
        last_best_epoch = 0
        start_time = time.time()
        total_loss = 0.0
        total_correct = 0
        total = 0
        accuracy = 0.0
        num_epoch = 500
        weight_decays = 1e-4
        for content, label, epoch in data_helper.batch_iter(
                batch_size=32, num_epoch=num_epoch):
            improved = ''
            model.train()
            pred = model(content)
            pred_sq = torch.squeeze(pred, 1)
            l2_reg = 0.5 * weight_decays * (
                model.seq_edge_w.weight.to('cpu').detach().numpy()**2).sum()

            loss = loss_func(pred_sq, label.float()) + l2_reg

            #-------------------------------------------#
            error = loss_mae(pred_sq.cpu().data, label.cpu())
            accuracy += error
            total += len(pred)  ##batch size = len(label)
            total_loss += (loss.item() * len(pred))
            total_correct += loss.item()
            optim.zero_grad()
            loss.backward()
            optim.step()

            iter += 1
            if bar:
                pbar.update()

            if iter % NUM_ITER_EVAL == 0:
                if bar:
                    pbar.close()

                val_acc, val_loss = dev(model)

                if val_acc < best_acc:
                    best_acc = val_acc
                    last_best_epoch = epoch
                    improved = '* '
                    torch.save(model, name + '.pkl')

                msg = 'Epoch: {0:>6} Iter: {1:>6}, Train Loss: {5:>7.2}, Train Error: {6:>7.2}' \
                      + 'Val Acc: {2:>7.2}, Time: {3}{4}, val error:{7:>7.2}' \
                  # + ' Time: {5} {6}'

                print(
                    msg.format(epoch, iter, val_acc, get_time_dif(start_time),
                               improved, total_correct / (NUM_ITER_EVAL),
                               float(accuracy) / float(total), val_loss))

                plotter.add_values(epoch,
                                   loss_train=total_correct / (NUM_ITER_EVAL),
                                   acc_train=float(accuracy) / float(total),
                                   loss_val=val_loss,
                                   acc_val=best_acc)

                total_loss = 0.0
                total_correct = 0
                accuracy = 0.0
                total = 0
                if bar:
                    pbar = tqdm.tqdm(total=NUM_ITER_EVAL)

            plotter.block()
    return name