def ada_boost_testing(x_train, y_train, x_test, y_test, l=10):
    print('Adaboost\n\n')
    adbt = AdaBoostClassifier()
    L = l
    h = []
    e = np.zeros(L)
    a = np.zeros(L)
    d = np.full((L, 2098), 1 / 2098)
    for t in range(L):
        h.append(adbt.fit(x_train, y_train, d[t]))
        for i in range(2098):
            if (adbt._predict(x_train[i]) != y_train[i]):
                e[t] = e[t] + d[t][i]
        # print(e[t])
        a[t] = 1 / 2 * np.log(((1 - e[t]) / e[t]))
        # print(a[t])

        if (t < L - 1):
            for i in range(2098):
                if (adbt._predict(x_train[i]) == y_train[i]):
                    d[t + 1][i] = d[t][i] * np.exp(-a[t])
                else:
                    d[t + 1][i] = d[t][i] * np.exp(a[t])
                d[t + 1] = (d[t + 1] / d[t + 1].sum())
    preds_train = []
    for i in range(2098):
        preds_train = np.append(preds_train, 0)
        for t in range(L):
            if (x_train[i][h[t].feature] >= h[t].split):
                preds_train[i] += a[t] * h[t].right_tree
            else:
                preds_train[i] += a[t] * h[t].left_tree
        if (preds_train[i] > 0):
            preds_train[i] = -1
        else:
            preds_train[i] = 1

    preds_test = []
    for i in range(700):
        preds_test = np.append(preds_test, 0)
        for t in range(L):
            if (x_test[i][h[t].feature] >= h[t].split):
                preds_test[i] += a[t] * h[t].right_tree
            else:
                preds_test[i] += a[t] * h[t].left_tree
        if (preds_test[i] > 0):
            preds_test[i] = -1
        else:
            preds_test[i] = 1

    # print(preds_train)
    # print(preds_test)
    train_accuracy = (preds_train == y_train).sum() / len(y_train)
    test_accuracy = (preds_test == y_test).sum() / len(y_test)
    print('Train {}'.format(train_accuracy))
    print('Test {}'.format(test_accuracy))
    print('F1 Train {}'.format(f1(y_train, preds_train)))
    print('F1 Test {}'.format(f1(y_test, preds_test)))
    return train_accuracy, test_accuracy, f1(y_train, preds_train), f1(
        y_train, preds_train)
def ft_random_forest_testing(x_train, y_train, x_test, y_test):
    print('Random Forest Feature Loop\n\n')
    train_list = []
    test_list = []
    F1_list = []

    for i in [1, 2, 5, 8, 10, 20, 25, 35, 50]:
        rclf = RandomForestClassifier(max_depth=7, max_features=i, n_trees=50)
        rclf.fit(x_train, y_train)
        preds_train = rclf.predict(x_train)
        preds_test = rclf.predict(x_test)
        train_accuracy = accuracy_score(preds_train, y_train)
        test_accuracy = accuracy_score(preds_test, y_test)
        print('Train {}'.format(train_accuracy))
        print('Test {}'.format(test_accuracy))
        preds = rclf.predict(x_test)
        print('F1 Test {}'.format(f1(y_test, preds)))

        # Grab the useful number per cycle
        train_list.append(train_accuracy)
        test_list.append(test_accuracy)
        F1_list.append(f1(y_test, preds))

    plt.rcParams['font.family'] = ['serif']
    x = [1, 2, 5, 8, 10, 20, 25, 35, 50]
    ax = plt.subplot(111)
    ax.plot(x, train_list, label='training')
    ax.plot(x, test_list, label='testing')
    ax.plot(x, F1_list, label='F1')
    plt.xlabel("max_features")
    plt.xticks(x)
    plt.ylabel("Accuracies")
    ax.legend()
    plt.savefig("RandomForestFeatures.png")
    plt.clf()
示例#3
0
def do_eval(model, train, dev, input_model=None):
    """ Evaluates a model on training and development set

    Args:  
        model: QA model that has an instance variable 'answer' that returns answer span and takes placeholders  
        question, question_length, paragraph, paragraph_length  
        train: Training set  
        dev: Development set
    """
    checkpoint_dir = os.path.join(FLAGS.train_dir, FLAGS.model_name)
    parameter_space_size()
    saver = tf.train.Saver()
    # TODO add loop to run over all checkpoints in folder,
    # Training session
    with tf.Session() as session:
        saver.restore(session, tf.train.latest_checkpoint(checkpoint_dir))
        print('Evaluation in progress.', flush=True)

        # Train/Dev Evaluation
        start_evaluate = timer()

        prediction, truth = multibatch_prediction_truth(
            session, model, train, FLAGS.eval_batches, input_model=input_model)
        train_f1 = f1(prediction, truth)
        train_em = exact_match(prediction, truth)

        prediction, truth = multibatch_prediction_truth(
            session, model, dev, FLAGS.eval_batches, input_model=input_model)
        dev_f1 = f1(prediction, truth)
        dev_em = exact_match(prediction, truth)

        logging.info(f'Train/Dev F1: {train_f1:.3f}/{dev_f1:.3f}')
        logging.info(f'Train/Dev EM: {train_em:.3f}/{dev_em:.3f}')
        logging.info(f'Time to evaluate: {timer() - start_evaluate:.1f} sec')
示例#4
0
def evaluate(result, summary=False):
    avg = defaultdict(float)  # average
    tp = defaultdict(int)  # true positives
    tpfn = defaultdict(int)  # true positives + false negatives
    tpfp = defaultdict(int)  # true positives + false positives
    for _, y0, y1 in result:  # actual value, prediction
        for y0, y1 in zip(y0, y1):
            tp[y0] += y0 == y1
            tpfn[y0] += 1
            tpfp[y1] += 1
    for y in sorted(tpfn.keys()):
        pr = tp[y] / tpfp[y] if tpfp[y] else 0
        rc = tp[y] / tpfn[y] if tpfn[y] else 0
        avg["macro_pr"] += pr
        avg["macro_rc"] += rc
        if not summary:
            print()
            print("label = %s" % y)
            print("precision = {:f} ({:d}/{:d})".format(pr, tp[y], tpfp[y]))
            print("recall = {:f} ({:d}/{:d})".format(rc, tp[y], tpfn[y]))
            print("f1 = {:f}".format(f1(pr, rc)))
    avg["macro_pr"] /= len(tpfn)
    avg["macro_rc"] /= len(tpfn)
    avg["micro_f1"] = sum(tp.values()) / sum(tpfp.values())
    print()
    print("macro precision = %f" % avg["macro_pr"])
    print("macro recall = %f" % avg["macro_rc"])
    print("macro f1 = %f" % f1(avg["macro_pr"], avg["macro_rc"]))
    print("micro f1 = %f" % avg["micro_f1"])
示例#5
0
文件: main.py 项目: lunng/CS434_A3
def decision_tree_various_depth(x_train, y_train, x_test, y_test):
    print('Decision Tree with depths 1-25 (inclusive)\n')

    # these will keep our points
    graphTrain = []
    graphTest = []
    graphF1 = []

    # perform decision tree testing for each depth
    # i'd like to use the decision_tree_testing function here, but we need to set the proper depth for each iteration
    for layer in range(1, 26):
        print('Current depth: ', layer)
        clf = DecisionTreeClassifier(max_depth=layer)
        clf.fit(x_train, y_train)
        preds_train = clf.predict(x_train)
        preds_test = clf.predict(x_test)
        graphTrain.append(accuracy_score(preds_train, y_train))
        graphTest.append(accuracy_score(preds_test, y_test))
        print('Train {}'.format(accuracy_score(preds_train, y_train)))
        print('Test {}'.format(accuracy_score(preds_test, y_test)))
        preds = clf.predict(x_test)
        print('F1 Test {}\n'.format(f1(y_test, preds)))
        graphF1.append(f1(y_test, preds))

    table = pd.DataFrame({
        "Max Depth": [item for item in range(1, 26)],
        "Train Accuracy": graphTrain,
        "Test Accuracy": graphTest,
        "F1 Accuracy": graphF1
    })
    print(table)

    # plot our graph and output to a file
    plt.xlabel('Depth')
    plt.ylabel('Performance')
    plt.title('Accuracy & F1 Score vs Number of Trees')
    plt.plot('Max Depth', 'Train Accuracy', data=table, color='blue')
    plt.plot('Max Depth', 'Test Accuracy', data=table, color='green')
    plt.plot('Max Depth', 'F1 Accuracy', data=table, color='red')
    plt.legend()
    plt.savefig('q1.png')

    # get best depth in terms of validation accuracy
    topAccuracy = max(graphF1)
    print("The depth that gives the best validation accuracy is: ",
          [item for item in range(1, 26)][graphF1.index(topAccuracy)],
          "which has an F1 accuracy of ", topAccuracy)

    # get the most important feature for making a prediction
    clfMVP = DecisionTreeClassifier(
        max_depth=[item for item in range(1, 26)][graphF1.index(topAccuracy)])
    clfMVP.fit(x_train, y_train)
    print("The most important feature for making a prediction is: ",
          clfMVP.root.feature)
    print("The threshold to split on for this feature is: ", clfMVP.root.split)

    # return the most important feature for use in main
    return clfMVP.root.feature
示例#6
0
文件: main.py 项目: lunng/CS434_A3
def random_forest_various_features(x_train, y_train, x_test, y_test):
    # keep our values to use for max_features
    useFeatures = [1, 2, 5, 8, 10, 20, 25, 35, 50]

    # for whatever reason, same variable names cause issues despite being within local scope
    # so we have to make sure there are no matching variable names even between functions

    graphTrain2 = []
    graphTest2 = []
    graphF12 = []

    # let the user know which test this is
    print("== Beginning test for various max_features.\n")

    for features in useFeatures:
        print("max_features: ", features)
        rclf = RandomForestClassifier(max_depth=7,
                                      max_features=features,
                                      n_trees=50)
        rclf.fit(x_train, y_train)
        preds_train = rclf.predict(x_train)
        preds_test = rclf.predict(x_test)
        graphTrain2.append(accuracy_score(preds_train, y_train))
        graphTest2.append(accuracy_score(preds_test, y_test))
        print('Train {}'.format(accuracy_score(preds_train, y_train)))
        print('Test {}'.format(accuracy_score(preds_test, y_test)))
        preds = rclf.predict(x_test)
        graphF12.append(f1(y_test, preds))
        print('F1 Test {}\n'.format(f1(y_test, preds)))

    # print lengths for debugging
    print("== Length of Train", len(graphTrain2))
    print("== Length of Test", len(graphTest2))
    print("== Length of F1", len(graphF12))

    # table for easily reading data
    table2 = pd.DataFrame({
        "max_features": [i for i in useFeatures],
        "Train Accuracy": graphTrain2,
        "Test Accuracy": graphTest2,
        "F1 Accuracy": graphF12
    })
    print(table2)

    # plot our graph and output to a file
    plt.figure(3)
    plt.xlabel('Max Features')
    plt.ylabel('Performance')
    plt.title('Accuracy & F1 Score vs Max Features')
    plt.plot('max_features', 'Train Accuracy', data=table2, color='blue')
    plt.plot('max_features', 'Test Accuracy', data=table2, color='green')
    plt.plot('max_features', 'F1 Accuracy', data=table2, color='red')
    plt.legend()
    plt.savefig('q2pd.png')

    # return best value for max_features to use in main
    return [feature for feature in useFeatures][graphF12.index(max(graphF12))]
示例#7
0
def decision_tree_testing(x_train, y_train, x_test, y_test, depth):
	print('Decision Tree\n\n')
	clf = DecisionTreeClassifier(max_depth=depth)
	clf.fit(x_train, y_train)
	preds_train = clf.predict(x_train)
	preds_test = clf.predict(x_test)
	train_accuracy = accuracy_score(preds_train, y_train)
	test_accuracy = accuracy_score(preds_test, y_test)
	print('Train {}'.format(train_accuracy))
	print('Test {}'.format(test_accuracy))
	preds = clf.predict(x_test)
	print('F1 Test {}'.format(f1(y_test, preds)))
	return train_accuracy, test_accuracy, f1(y_train,preds_train), f1(y_test,preds)
示例#8
0
def adaboost_testing(x_train, y_train, x_test, y_test, M):
	print("Adaboost Tree\n\n")
	aclf = AdaBoostClassifier(max_depth = 1)
	aclf.fit(x_train, y_train, M)
	preds_train = aclf.predict(x_train)
	preds_test = aclf.predict(x_test)
	train_accuracy = accuracy_score(preds_train, y_train)
	test_accuracy = accuracy_score(preds_test, y_test)
	print('Train {}'.format(train_accuracy))
	print('Test {}'.format(test_accuracy))
	preds = aclf.predict(x_test)
	print('F1 Test {}'.format(f1(y_test, preds)))
	preds_train = aclf.predict(x_train)
	return train_accuracy, test_accuracy, f1(y_train, preds_train), f1(y_test, preds)
示例#9
0
def random_forest_testing(x_train, y_train, x_test, y_test, feat, tree):
	print('Random Forest\n\n')
	rclf = RandomForestClassifier(max_depth=7, max_features=feat, n_trees=tree)
	rclf.fit(x_train, y_train)
	preds_train = rclf.predict(x_train)
	preds_test = rclf.predict(x_test)
	train_accuracy = accuracy_score(preds_train, y_train)
	test_accuracy = accuracy_score(preds_test, y_test)
	print('Train {}'.format(train_accuracy))
	print('Test {}'.format(test_accuracy))
	preds = rclf.predict(x_test)
	print('F1 Test {}'.format(f1(y_test, preds)))
	preds_train = rclf.predict(x_train)
	return train_accuracy, test_accuracy, f1(y_train, preds_train), f1(y_test, preds)
示例#10
0
    def forward_f1(self, x_spt, y_spt, x_qry, y_qry):
        task_num = self.task_num
        querysz = self.n_way * self.k_qry

        losses_q = [0 for _ in range(self.update_step + 1)]
        f1s = [0 for _ in range(self.update_step + 1)]

        for i in range(task_num):
            logits = self.net(x_spt[i], vars=None, bn_training=True)
            loss = F.cross_entropy(logits, y_spt[i])
            grad = torch.autograd.grad(loss, self.net.parameters())
            fast_weights = list(map(lambda p: p[1] - self.update_lr * p[0], zip(grad, self.net.parameters())))

            with torch.no_grad():
                logits_q = self.net(x_qry[i], self.net.parameters(), bn_training=True)
                loss_q = F.cross_entropy(logits_q, y_qry[i])
                losses_q[0] += loss_q
                # pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
                result = f1(logits_q, y_qry[i])
                f1s[0] = f1s[0] + result

            with torch.no_grad():
                logits_q = self.net(x_qry[i], fast_weights, bn_training=True)
                loss_q = F.cross_entropy(logits_q, y_qry[i])
                losses_q[1] += loss_q
                # pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
                result = f1(logits_q, y_qry[i])
                f1s[1] = f1s[1] + result

            for k in range(1, self.update_step):
                logits = self.net(x_spt[i], fast_weights, bn_training=True)
                loss = F.cross_entropy(logits, y_spt[i])
                grad = torch.autograd.grad(loss, fast_weights)
                fast_weights = list(map(lambda p: p[1] - self.update_lr * p[0], zip(grad, fast_weights)))
                logits_q = self.net(x_qry[i], fast_weights, bn_training=True)
                loss_q = F.cross_entropy(logits_q, y_qry[i])
                losses_q[k + 1] += loss_q

                with torch.no_grad():
                    # pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
                    result = f1(logits_q, y_qry[i])
                    f1s[k + 1] = f1s[k + 1] + result

        loss_q = losses_q[-1] / task_num
        self.meta_optim.zero_grad()
        loss_q.backward()
        self.meta_optim.step()
        accs = np.array(f1s) / (querysz * task_num)
        return accs
示例#11
0
def train(epochs):

    print("Train start")
    writer = tensorboard.SummaryWriter(log_dir='./log', comment='Train loop')
    for ep in range(1, epochs + 1):
        epoch_loss, epoch_accuracy, epoch_precision = 0, 0, 0
        epoch_f1, idx = 0, 0
        for idx, (inp, label) in enumerate(train_loader):
            optimizer.zero_grad()
            op = model(inp)
            loss = criterion(op, label)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            epoch_accuracy += accuracy(op, label)
            epoch_precision += precision(op, label)
            epoch_f1 += f1(op, label)
        writer.add_scalars(
            'Training', {
                'Accuracy': epoch_accuracy / idx,
                'Precision': epoch_precision / idx,
                'F1': epoch_f1 / idx
            }, ep)
        writer.add_scalars('Loss', {'Training': epoch_loss / idx}, ep)
    writer.close()
    torch.save(model.state_dict(), PATH)
    print("Done training")
def vectorized_rf(x_train,
                  y_train,
                  x_test,
                  y_test,
                  checktrain=True,
                  ngram_range=(1, 1),
                  vector_type="count",
                  dataset="default"):
    vectorizer = CountVectorizer(tokenizer=tokenize, ngram_range=ngram_range) if vector_type=="count" \
        else TfidfVectorizer(tokenizer=tokenize, ngram_range=ngram_range)
    vectorized_x_train = vectorizer.fit_transform(x_train)
    vectorized_x_test = vectorizer.transform(x_test)
    model = train(vectorized_x_train,
                  y_train,
                  checktrain,
                  ngram_range,
                  vector_type=vector_type,
                  dataset=dataset)
    pred_x_train = predict(model, vectorized_x_train)
    pred_x_test = predict(model, vectorized_x_test)

    precision_test = precision(y_test, pred_x_test)
    recall_test = recall(y_test, pred_x_test)
    f1_test = f1(y_test, pred_x_test)

    print("Accuracy training accuracy (" + dataset, vector_type,
          " vectorized joint data) =", accuracy(y_train, pred_x_train))
    print("Accuracy testing accuracy (" + dataset, vector_type,
          "vectorized joint data) =", accuracy(y_test, pred_x_test), "\n")

    print("Precision (" + dataset, vector_type + " vectorized test data) =",
          precision_test)
    print("Recall (" + dataset, vector_type + " test data) =", recall_test)
    print("F1 (" + dataset, vector_type + " test data) =", f1_test, "\n")
def simple_lstm(x_train, y_train, x_test, y_test, dataset="default"):
    model = train(x_train, y_train, dataset=dataset)
    print(x_train.shape)
    print(x_test.shape)
    # print(model.summary())

    pred_x_train = model.predict(x_train)
    pred_x_test = model.predict(x_test)

    precision_test = precision(np.argmax(y_test, axis=1),
                               np.argmax(pred_x_test, axis=1),
                               labels=(0, 1, 2))
    recall_test = recall(np.argmax(y_test, axis=1),
                         np.argmax(pred_x_test, axis=1),
                         labels=(0, 1, 2))
    f1_test = f1(np.argmax(y_test, axis=1),
                 np.argmax(pred_x_test, axis=1),
                 labels=(0, 1, 2))
    #
    print(
        "Accuracy training accuracy = ",
        accuracy(np.argmax(y_train, axis=1), np.argmax(pred_x_train, axis=1)))
    print("Accuracy testing accuracy =",
          accuracy(np.argmax(y_test, axis=1), np.argmax(pred_x_test, axis=1)),
          "\n")
    #
    print("Precision (test data) =", precision_test)
    print("Recall (test data) =", recall_test)
    print("F1 (test data) =", f1_test, "\n")
def train(trainloader, model, criterion, optimizer, epoch, use_cuda):
    # switch to train mode
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    micro_f1 = AverageMeter()
    macro_f1 = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=len(trainloader))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
        inputs, targets = torch.autograd.Variable(
            inputs), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, _ = accuracy(outputs.data, targets.data, topk=(1, 2))
        # odb.set_trace()
        losses.update(loss.item(), inputs.size(0))
        top1.update(prec1.item(), inputs.size(0))
        _macrof1, _microf1 = f1(outputs.data, targets.data)
        micro_f1.update(_microf1, inputs.size(0))
        macro_f1.update(_macrof1, inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | micro: {micro: .4f} | macro: {macro: .4f}'.format(
            batch=batch_idx + 1,
            size=len(trainloader),
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            top1=top1.avg,
            micro=micro_f1.avg,
            macro=macro_f1.avg,
        )
        bar.next()
    bar.finish()
    return (losses.avg, top1.avg)
示例#15
0
文件: main.py 项目: lunng/CS434_A3
def random_forest_various_seeds(x_train, y_train, x_test, y_test,
                                best_max_features, best_n_trees):
    # let the user know which test this is
    print("== Beginning test for best result with random seeds.\n")

    # to hold data points
    randseedTrain = []
    randseedTest = []
    randseedF1 = []
    averageSeeds = []
    averageTrain = []
    averageTest = []
    averageF1 = []
    usedSeeds = []

    rclf = RandomForestClassifier(max_depth=7,
                                  max_features=best_max_features,
                                  n_trees=best_n_trees)

    for item in [i for i in range(10)]:
        rclf.seed = np.random.randint(1, 1000)
        usedSeeds.append(rclf.seed)
        rclf.fit(x_train, y_train)
        preds_train = rclf.predict(x_train)
        preds_test = rclf.predict(x_test)
        randseedTrain.append(accuracy_score(preds_train, y_train))
        randseedTest.append(accuracy_score(preds_test, y_test))
        print('Train {}'.format(accuracy_score(preds_train, y_train)))
        print('Test {}'.format(accuracy_score(preds_test, y_test)))
        preds = rclf.predict(x_test)
        randseedF1.append(f1(y_test, preds))
        print('F1 Test {}\n'.format(f1(y_test, preds)))

    # get averages
    averageSeeds.append("Average")
    averageTrain.append(sum(randseedTrain) / len(randseedTrain))
    averageTest.append(sum(randseedTest) / len(randseedTest))
    averageF1.append(sum(randseedF1) / len(randseedF1))

    # get table for data + add averages at the end
    table3 = pd.DataFrame({
        "Seed": [i for i in usedSeeds] + averageSeeds,
        "Train Accuracy": randseedTrain + averageTrain,
        "Test Accuracy": randseedTest + averageTest,
        "F1 Score": randseedF1 + averageF1
    })
    print(table3)
示例#16
0
文件: main.py 项目: wnsgur4322/CS-434
def ada_boost_testing(x_train, y_train, x_test, y_test, num_learner):
	print('Ada Boost and L(', num_learner, ')')
	aba = AdaBoostClassifier(num_learner)
	aba.fit(x_train, y_train)
	preds_train = aba.predict(x_train)
	preds_test = aba.predict(x_test)
	train_accuracy = accuracy_score(preds_train, y_train)
	test_accuracy = accuracy_score(preds_test, y_test)
	print('Train {}'.format(train_accuracy))
	print('Test {}'.format(test_accuracy))
	preds = aba.predict(x_test)
	preds_train = aba.predict(x_train)

	print('F1 Train {}'.format(f1(y_train, preds_train)))
	print('F1 Test {}\n'.format(f1(y_test, preds)))

	return train_accuracy, test_accuracy, f1(y_train, preds_train), f1(y_test, preds)
示例#17
0
文件: train.py 项目: Eudialyte/SepGAT
    def train(epoch):
        def closure():
            optimizer.zero_grad()
            output = model(features, adj_train)
            loss_train = F.cross_entropy(output[idx_train], labels[idx_train])
            loss_train.backward()

        t = time.time()
        model.train()
        optimizer.zero_grad()
        output = model(features, adj_train)
        loss_train = F.cross_entropy(output[idx_train], labels[idx_train])
        if args.dataset == 'reddit':
            acc_train = f1(output[idx_train], labels[idx_train])
        else:
            acc_train = accuracy(output[idx_train], labels[idx_train])
        if args.optimizer == 'lbfgs':
            optimizer.step(closure)
        else:
            loss_train.backward()
            if args.grad_clip:
                torch.nn.utils.clip_grad_value_(model.parameters(),
                                                args.grad_clip)
            optimizer.step()

        if not args.fastmode:
            # Evaluate validation set performance separately,
            # deactivates dropout during validation run.
            model.eval()
            output = model(features, adj_val)

        loss_val = F.cross_entropy(output[idx_val], labels[idx_val])
        if args.dataset == 'reddit':
            acc_val = f1(output[idx_val], labels[idx_val])
        else:
            acc_val = accuracy(output[idx_val], labels[idx_val])
        epoch_time = time.time() - t
        if args.print and epoch % args.print == args.print - 1:
            print('Epoch: {:04d}'.format(epoch + 1),
                  'loss_train: {:.4f}'.format(loss_train.item()),
                  'acc_train: {:.4f}'.format(acc_train.item()),
                  'loss_val: {:.4f}'.format(loss_val.item()),
                  'acc_val: {:.4f}'.format(acc_val.item()),
                  'time: {:.4f}s'.format(time.time() - t))
        return loss_val.item(), acc_val.item(), epoch_time
示例#18
0
文件: main.py 项目: lunng/CS434_A3
def random_forest_various_trees(x_train, y_train, x_test, y_test):
    graphTrain = []
    graphTest = []
    graphF1 = []

    # let the user know which test this is
    print("== Beginning test for various n_trees.\n")

    # plot accuracies for the number of trees specified in part b
    for i in range(10, 210, 10):
        print("n_trees: ", i)
        rclf = RandomForestClassifier(max_depth=7, max_features=11, n_trees=i)
        rclf.fit(x_train, y_train)
        preds_train = rclf.predict(x_train)
        preds_test = rclf.predict(x_test)
        graphTrain.append(accuracy_score(preds_train, y_train))
        graphTest.append(accuracy_score(preds_test, y_test))
        print('Train {}'.format(accuracy_score(preds_train, y_train)))
        print('Test {}'.format(accuracy_score(preds_test, y_test)))
        preds = rclf.predict(x_test)
        print('F1 Test {}\n'.format(f1(y_test, preds)))
        graphF1.append(f1(y_test, preds))

    # table for easily reading data
    table = pd.DataFrame({
        "n_trees": [i for i in range(10, 210, 10)],
        "Train Accuracy": graphTrain,
        "Test Accuracy": graphTest,
        "F1 Accuracy": graphF1
    })
    print(table)

    # plot our graph and output to a file
    plt.figure(2)
    plt.xlabel('Number of trees')
    plt.ylabel('Performance')
    plt.title('Accuracy & F1 Score vs Number of Trees in the Forest')
    plt.plot('n_trees', 'Train Accuracy', data=table, color='blue')
    plt.plot('n_trees', 'Test Accuracy', data=table, color='green')
    plt.plot('n_trees', 'F1 Accuracy', data=table, color='red')
    plt.legend()
    plt.savefig('q2pb.png')

    # return our best n__trees value for use in main
    return [i for i in range(10, 210, 10)][graphF1.index(max(graphF1))]
示例#19
0
文件: main.py 项目: wnsgur4322/CS-434
def random_forest_testing(x_train, y_train, x_test, y_test, n_trees, max_features):
	print('Random Forest')
	print("max_depth: %d, max_features: %d, n_trees: %d" % (7,max_features, n_trees))
	rclf = RandomForestClassifier(n_trees, max_features, max_depth=7)
	rclf.fit(x_train, y_train)
	preds_train = rclf.predict(x_train)
	preds_test = rclf.predict(x_test)
	train_accuracy = accuracy_score(preds_train, y_train)
	test_accuracy = accuracy_score(preds_test, y_test)
	print('Train {}'.format(train_accuracy))
	print('Test {}'.format(test_accuracy))
	preds = rclf.predict(x_test)
	preds_train = rclf.predict(x_train)

	print('F1 Train {}'.format(f1(y_train, preds_train)))
	print('F1 Test {}\n'.format(f1(y_test, preds)))

	return train_accuracy, test_accuracy, f1(y_train, preds_train), f1(y_test, preds)
示例#20
0
def f1_metric(label, pred):
    label = label.astype(np.int)
    start = 0
    res = []
    for i in list_idx:
        end = start + i
        res.append(f1(label[start:end], pred[start:end]))
        start = end
    sc = np.mean(res)
    return 'f1', sc, True
示例#21
0
def macro_f1(y_true, y_pred, num_classes=3):
    def f1(y_true, y_pred):
        y_pred = K.cast(y_pred >= 0.5, 'float32')
        TP = K.sum(y_pred * y_true)
        precision = TP/(K.sum(y_pred)+0.0001)
        recall = TP/(K.sum(y_true)+0.0001)
        return 2*precision*recall / (precision+recall+0.0001)
    sum = 0
    for i in range(num_classes):
        sum += f1(y_true[..., i], y_pred[..., i])
    return K.cast(sum/num_classes, 'float32')
示例#22
0
def ababoost(x_train, y_train, x_test, y_test):
    print('Ababoost\n\n')
    leni = len(x_train)
    L = 3
    D = np.array([1 / leni] * leni)  #The first D is 1/length of train set
    bclf = AdaBoostClassifier()
    for i in range(L):
        preds_train, preds_test, D, we = bclf.adaboost(x_train, y_train,
                                                       x_test, y_test, D)
    y_train[y_train == 0] = -1
    y_test[y_test == 0] = -1
    train_accuracy = accuracy_score(preds_train, y_train)
    test_accuracy = accuracy_score(preds_test, y_test)
    print('L = ', L)
    print(D)
    print('Train {}'.format(train_accuracy))
    print('Test {}'.format(test_accuracy))
    print('F1 Train {}'.format(f1(y_train, preds_train)))
    print('F1 Test {}'.format(f1(y_test, preds_test)))
    print('we = ', we)
示例#23
0
文件: train.py 项目: Eudialyte/SepGAT
 def test():
     model.eval()
     output = model(features, adj_test)
     loss_test = F.cross_entropy(output[idx_test], labels[idx_test])
     if args.dataset == 'reddit':
         acc_test = f1(output[idx_test], labels[idx_test])
     else:
         acc_test = accuracy(output[idx_test], labels[idx_test])
     print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
           "accuracy= {:.4f}".format(acc_test.item()))
     return acc_test.item()
示例#24
0
def decision_tree_testing(x_train, y_train, x_test, y_test):
    n = 1
    print('Decision Tree depth: ', n)
    clf = DecisionTreeClassifier(max_depth=n)
    clf.fit(x_train, y_train)
    preds_train = clf.predict(x_train)
    preds_test = clf.predict(x_test)
    train_accuracy = accuracy_score(preds_train, y_train)
    test_accuracy = accuracy_score(preds_test, y_test)
    print('Train {}'.format(train_accuracy))
    print('Test {}'.format(test_accuracy))
    preds = clf.predict(x_test)
    print('F1 Test {}'.format(f1(y_test, preds)))
def create_trees(x_train, y_train, x_test, y_test, maxdepth):
    #print('Decision Tree\n\n')
    clf = DecisionTreeClassifier(max_depth=maxdepth)
    clf.fit(x_train, y_train)
    preds_train = clf.predict(x_train)
    preds_test = clf.predict(x_test)
    train_accuracy = accuracy_score(preds_train, y_train)
    test_accuracy = accuracy_score(preds_test, y_test)
    #print('Train {}'.format(train_accuracy))
    #print('Test {}'.format(test_accuracy))
    preds = clf.predict(x_test)
    #print('F1 Test {}'.format(f1(y_test, preds)))
    return (f1(y_test, preds)), train_accuracy, test_accuracy
示例#26
0
文件: main.py 项目: lunng/CS434_A3
def ada_boost_testing(x_train, y_train, x_test, y_test):
    print('AdaBoost\n\n')
    graphTrain = []
    graphTest = []
    graphF1 = []
    for i in range(10, 200, 10):
        weak = AdaBoostClassifier(n_trees=i)
        weak.fit(x_train, y_train)
        preds_train = weak.predict(x_train)
        preds_test = weak.predict(x_test)
        train_accuracy = accuracy_score(preds_train, y_train)
        test_accuracy = accuracy_score(preds_test, y_test)
        print('L {}'.format(i))
        print('Train {}'.format(train_accuracy))
        print('Test {}'.format(test_accuracy))
        preds = weak.predict(x_test)
        print('F1 Test {}'.format(f1(y_test, preds)))

        graphTrain.append(train_accuracy)
        graphTest.append(test_accuracy)
        graphF1.append(f1(y_test, preds))

    table = pd.DataFrame({
        "L Parameter": [item for item in range(10, 200, 10)],
        "Train Accuracy": graphTrain,
        "Test Accuracy": graphTest,
        "F1 Accuracy": graphF1
    })
    print(table)

    plt.xlabel('L Parameter')
    plt.ylabel('Performance')
    plt.title('Accuracy & F1 Score vs L')
    plt.plot('L Parameter', 'Train Accuracy', data=table, color='blue')
    plt.plot('L Parameter', 'Test Accuracy', data=table, color='green')
    plt.plot('L Parameter', 'F1 Accuracy', data=table, color='red')
    plt.legend()
    plt.savefig('q3.png')
示例#27
0
def test_overfit(model, train, input_model=None):
    """ Tests that model can overfit on small datasets.

    Args:  
        model: QA model that has an instance variable 'answer' that returns answer span and takes placeholders  
        question, question_length, paragraph, paragraph_length  
        train: Training set  
    """
    epochs = 100
    test_size = 32
    steps_per_epoch = 10
    train.question, train.paragraph, train.question_length, train.paragraph_length, train.answer = train[:
                                                                                                         test_size]
    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        for epoch in range(epochs):
            epoch_start = timer()
            for step in range(steps_per_epoch):
                feed_dict_inputs = train.get_batch(FLAGS.batch_size,
                                                   replace=False)
                if input_model:
                    #feed into siamese model instead
                    question = feed_dict_inputs[0]
                    M = input_model.run(question)
                    input_dict_inputs[0] = M
                feed_dict = model.fill_feed_dict(*feed_dict_inputs)

                fetch_dict = {
                    'step': tf.train.get_global_step(),
                    'loss': model.loss,
                    'train': model.train
                }
                result = session.run(fetch_dict, feed_dict)
                loss = result['loss']

                if (step == 0 and epoch == 0):
                    print(
                        f'Entropy - Result: {loss:.2f}, Expected (approx.): {2*np.log(FLAGS.max_paragraph_length):.2f}'
                    )
                if step == steps_per_epoch - 1:
                    print(f'Cross entropy: {loss:.2f}')
                    train.length = test_size
                    prediction, truth = multibatch_prediction_truth(
                        session, model, train, 1, input_model=input_model)
                    overfit_f1 = f1(prediction, truth)
                    print(f'F1: {overfit_f1:.2f}')
            global_step = tf.train.get_global_step().eval()
            print(
                f'Epoch took {timer() - epoch_start:.2f} s (step: {global_step})'
            )
示例#28
0
def find_best_c(x, y, share):
    x_train, x_check = utils.split_data(x, share)
    y_train, y_check = utils.split_data(y, share)

    best_c = 2 ** -7
    best_f1 = 0
    for i in range(-7, 7):
        c = 2 ** i
        v = train(x_train, y_train, c)
        p, r = utils.process_result(test(x_check, y_check, v))
        f1 = utils.f1(p, r)
        if f1 > best_f1:
            best_f1 = f1
            best_c = c
    return best_c
示例#29
0
def find_best_c(x, y, share):
    x_train, x_check = utils.split_data(x, share)
    y_train, y_check = utils.split_data(y, share)

    best_c = 2**-7
    best_f1 = 0
    for i in range(-7, 7):
        c = 2**i
        v = train(x_train, y_train, c)
        p, r = utils.process_result(test(x_check, y_check, v))
        f1 = utils.f1(p, r)
        if f1 > best_f1:
            best_f1 = f1
            best_c = c
    return best_c
示例#30
0
def ada_boost_testing(x_train, y_train, x_test, y_test, num_learner=50):
    print('Ada Boost')
    print(x_train, y_train)
    aba = AdaBoostClassifier(num_learner)
    aba.fit(x_train, y_train)
    preds_train = aba.predict(x_train)
    preds_test = aba.predict(x_test)

    print(preds_train, preds_test)

    train_accuracy = accuracy_score(preds_train, y_train)
    test_accuracy = accuracy_score(preds_test, y_test)
    print('Train {}'.format(train_accuracy))
    print('Test {}'.format(test_accuracy))
    preds = aba.predict(x_test)
    print('F1 Test {}'.format(f1(y_test, preds)))
示例#31
0
def find_best_c(x, y, share, count):
    x_train, x_check = utils.split_data(x, share)
    y_train, y_check = utils.split_data(y, share)

    best_f1 = 0
    best_c = -1
    c = 10
    while c <= 40:
        w1, w2 = train(x_train, y_train, c, count)
        p, r = utils.process_result(test(x_check, y_check, w1, w2))
        f1 = utils.f1(p, r)
        if f1 > best_f1:
            best_f1 = f1
            best_c = c
        c += 10
    return best_c
示例#32
0
def find_best_c(x, y, share, count):
    x_train, x_check = utils.split_data(x, share)
    y_train, y_check = utils.split_data(y, share)

    best_f1 = 0
    best_c = -1
    c = 10
    while c <= 40:
        w1, w2 = train(x_train, y_train, c, count)
        p, r = utils.process_result(test(x_check, y_check, w1, w2))
        f1 = utils.f1(p, r)
        if f1 > best_f1:
            best_f1 = f1
            best_c = c
        c += 10
    return best_c
 def test_maclaurin_series_0(self):
   self.assertEqual(maclaurin.maclaurin_series(utils.f1(50, 25, 20), 0), 1)
 def test_maclaurin_series_5(self):
   self.assertEqual(maclaurin.maclaurin_series(utils.f1(50, 25, 20), 5), 0.7297882727154748)
 def test_maclaurin_series_1(self):
   self.assertEqual(maclaurin.maclaurin_series(utils.f1(50, 25, 20), 1), 0.7660444431189781)
 def test_maclaurin_series_4(self):
   self.assertEqual(maclaurin.maclaurin_series(utils.f1(50, 25, 20), 4), 0.7304015754171275)
示例#37
0
def print_result(name, precision, recall):
    print(name)
    print("precision: %.3f recall: %.3f" % (precision, recall))
    print("f1: %.3f" % (utils.f1(precision, recall)))
    print("###############")
 def test_maclaurin_series_2(self):
   self.assertEqual(maclaurin.maclaurin_series(utils.f1(50, 25, 20), 2), 0.7386768418212236)
 def test_maclaurin_series_3(self):
   self.assertEqual(maclaurin.maclaurin_series(utils.f1(50, 25, 20), 3), 0.7322740394191096)