Exemplo n.º 1
0
def plot(X_tr, y_tr, a_tr, X_val, y_val, a_val):
    class_err = np.zeros((11, 1))
    mae = np.zeros((11, 1))
    count = 0

    for i in [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]:
        nn = NN(i, 20)
        nn.fit(X_tr, y_tr, a_tr)
        nn.predict(X_tr)
        [class_err_1, class_err_2, mae1,
         mae2] = nn.calc_error(X_tr, y_tr, a_tr, X_val, y_val, a_val)
        class_err[count] = class_err_2
        mae[count] = mae2
        count = count + 1

    fig1 = plt.gcf()
    plt.plot(
        np.arange(0.0, 1.1, 0.1).tolist(),
        class_err,
        marker='o',
        linestyle='dashed',
        label=
        r'Observed Classification Error Rate (Y-axis) at Trade-off parameter $\alpha$ (X-axis)'
    )
    plt.legend(prop={'size': 22})
    plt.ylabel('Classification Error on Validation Data', fontsize=22)
    plt.yticks(np.arange(0.0, 1.1, 0.1).tolist())
    plt.xlabel(r'Trade-off parameter $\alpha$', fontsize=22)
    plt.xticks(np.arange(0.0, 1.1, 0.1).tolist())
    plt.title('Classification Error vs Trade-off Parameter', fontsize=22)
    mng = plt.get_current_fig_manager()
    mng.window.showMaximized()
    plt.show()
    fig1.savefig('C.png')

    fig2 = plt.gcf()
    plt.plot(
        np.arange(0.0, 1.1, 0.1).tolist(),
        mae,
        marker='o',
        linestyle='dashed',
        label=
        r'Observed Mean Absolute Error (Y-axis) at Trade-off parameter $\alpha$ (X-axis)'
    )
    plt.legend(prop={'size': 22})
    plt.ylabel('Mean Absolute Error on Validation Data', fontsize=22)
    plt.xlabel(r'Trade-off parameter $\alpha$', fontsize=22)
    plt.xticks(np.arange(0.0, 1.1, 0.1).tolist())
    plt.title('Mean Absolute Error vs Trade-off Parameter', fontsize=22)
    mng = plt.get_current_fig_manager()
    mng.window.showMaximized()
    plt.show()
    fig2.savefig('M.png')
Exemplo n.º 2
0
def external_validate(dataset):
    print('External: ')

    # load AD GWAS data, preprocess to match original dataset
    X, y, ref, feats = setup_val_data(dataset)

    # predict on AD GWAS and score
    print('nn: ')
    nn = load_model(dataset, 'nn')
    scores = nn.predict(X)

    AUC = roc_auc_score(y, scores)
    APR = average_precision_score(y, scores)
    print('AUC/APR: ', AUC, APR)

    pcs = np.percentile(scores, [5, 50, 95])
    print(pcs)
    print(np.mean(scores[y == 1]))
    print(np.mean(scores[y == 0]))

    try:
        print('glm: ')
        glm = load_model(dataset, 'glm')
        scores = glm.predict(X)

        AUC = roc_auc_score(y, scores)
        APR = average_precision_score(y, scores)
        print('AUC/APR: ', AUC, APR)

        pcs = np.percentile(scores, [5, 50, 95])
        print(pcs)
        print(np.mean(scores[y == 1]))
        print(np.mean(scores[y == 0]))
    except Exception as e:
        print(e)
Exemplo n.º 3
0
 def nn_latentspace(self, verbose=False):
     data_train, _, labels_train = self.labelled_set.get_latent()
     data_test, _, labels_test = self.unlabelled_set.get_latent()
     nn = KNeighborsClassifier()
     nn.fit(data_train, labels_train)
     score = nn.score(data_test, labels_test)
     if verbose:
         print("NN classifier score:", score)
         print("NN classifier tuple:",
               compute_accuracy_tuple(labels_test, nn.predict(data_test)))
     return score
Exemplo n.º 4
0
def main():

    DATA_DIR = 'data'
    data = np.load(os.path.join(DATA_DIR, "mnist_rot_train.npz"))
    X_tr, y_tr, a_tr = data["X"], data["labels"], data["angles"]

    data = np.load(os.path.join(DATA_DIR, "mnist_rot_validation.npz"))
    X_val, y_val, a_val = data["X"], data["labels"], data["angles"]

    #Note: test class labels and angles are not provided
    #in the data set
    data = np.load(os.path.join(DATA_DIR, "mnist_rot_test.npz"))
    X_te, y_te, a_te = data["X"], data["labels"], data["angles"]

    #plt.imshow(X_tr[0].reshape((28,28)))
    #plt.show()
    nn = BestNN(1.0, 30)
    #[X_tr, X_val, X_te] = nn.preprocess(X_tr, X_val, X_te)

    nn.fit(X_tr, y_tr, a_tr)
    nn.predict(X_tr)
    nn.calc_error(X_tr, y_tr, a_tr, X_val, y_val, a_val)
    nn.savetestpred(X_te)
Exemplo n.º 5
0
def experiment_scikitlearn_baselines(train_X, train_Y, test_X, test_Y):
    train_X = train_X.numpy().reshape(316, 28*50)
    train_Y.numpy()

    sv = svm.SVC()
    sv.fit(train_X, train_Y)

    nn = NearestCentroid()
    nn.fit(train_X, train_Y)

    ga = GaussianNB()
    ga.fit(train_X, train_Y)

    dt = tree.DecisionTreeClassifier()
    dt.fit(train_X, train_Y)

    test_X = test_X.numpy().reshape(100, 28*50)
    test_Y.numpy()
    print("SVM " + str(accuracy_score(test_Y, sv.predict(test_X))))
    print("NN " + str(accuracy_score(test_Y, nn.predict(test_X))))
    print("Gausian " + str(accuracy_score(test_Y, ga.predict(test_X))))
    print("DT " + str(accuracy_score(test_Y, dt.predict(test_X))))
    print("Warning: The following is taking approximately 1.5 hours in an average laptop.")
Exemplo n.º 6
0
X_tr = X_tr.reshape(X_tr.shape[0], -1)
X_t = X_t.reshape(X_t.shape[0], -1)

one_hot = OneHotEncoder()

y_tr = train_dataset.targets.numpy().reshape(-1, 1)
y_t = test_dataset.targets.numpy().reshape(-1, 1)

y_tr1 = one_hot.fit_transform(y_tr).toarray()
y_t1 = one_hot.fit_transform(y_t).toarray()

nn.fit(X_tr,
       y_tr1,
       epochs=1200,
       batch_size=64,
       loss=BinaryCrossEntropy(),
       optimizer=Adam(lr=0.001),
       show_progress=TQDM_TERMINAL)
preds = np.round(nn.predict(X_t))

total = len(preds)

correct = 0

for pred, y in zip(preds, y_t1):
    if pred.argmax() == y.argmax():
        correct += 1

print(f"Accuracy: {float(correct) * 100 / total}%")

nn.overview()
def main():
    # if GPU is availale, use GPU
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print("Use " + str(device))

    # create dataset
    file_list = None
    for path, dirs, files in os.walk(test_path, topdown=False):
        file_list = list(files)

    # preprocessing steps
    transform = transforms.Compose([
        transforms.Resize((512, 512)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    test_dataset = Leaf_test_Dataset(file_list, test_path, transform)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batchSize)

    print("Start testing:")

    # net model
    eff_models = []
    for model_path in eff_model_paths:
        eff_net = EfficientNet.from_name('efficientnet-b4')
        eff_net._fc = nn.Linear(eff_net._fc.in_features, 5)
        eff_net.load_state_dict(torch.load(model_path))
        eff_net = eff_net.to(device)
        eff_net.eval()
        eff_models.append(eff_net)

    preds = []
    result = None

    with torch.no_grad():
        batch_num = len(test_loader)
        for index, image in enumerate(test_loader):
            image = image.to(device)

            eff_result = []
            for eff_net in eff_models:
                output = eff_net(image)
                output = output.to('cpu')
                pred = output.argmax(dim=1, keepdim=True).flatten()
                eff_result.append(pred)

            if len(preds) == 0:
                preds = np.dstack(eff_result)[0]
            else:
                preds = np.vstack([preds, np.dstack(eff_result)[0]])

        # start train combine model
        df = pd.read_csv(pred_train_csv)

        # 移除全错选项
        # get the pred acc for this line
        def get_acc(pred_csv, index):
            label = pred_csv.loc[index, 'label']
            acc = 0
            if pred_csv.loc[index, 'pred_0'] == label:
                acc += 0.2
            if pred_csv.loc[index, 'pred_1'] == label:
                acc += 0.2
            if pred_csv.loc[index, 'pred_2'] == label:
                acc += 0.2
            if pred_csv.loc[index, 'pred_3'] == label:
                acc += 0.2
            if pred_csv.loc[index, 'pred_4'] == label:
                acc += 0.2
            return round(acc, 1)

        delete_index = []
        for index in range(len(df)):
            acc = get_acc(df, index)
            # remove noise data
            if acc <= 0:
                delete_index.append(index)

        df = df.drop(delete_index)
        df = df.reset_index(drop=True)

        X = np.array(df[["pred_0", "pred_1", "pred_2", "pred_3", "pred_4"]])
        y = np.array(df[["label"]]).flatten()
        from sklearn.neural_network import MLPClassifier
        # Neural Network
        nn = MLPClassifier(max_iter=2000)
        nn.fit(X, y)
        result = nn.predict(preds)

    pred_result = pd.concat([
        pd.DataFrame(file_list, columns=['image_id']),
        pd.DataFrame(result, columns=['label'])
    ],
                            axis=1)
    pred_result.to_csv(output_path + "submission.csv", index=False, sep=',')

    print("Done.")
Exemplo n.º 8
0
                pbar.update(processed, **training_stat)
            pbar.finish()

    def predict(self, X):
        X_var = make_var(X)
        return self.forward(X_var).data.numpy()


class batch_iterator():
    def __init__(self):
        self.batches_cnt = 100
        self.cur_batch = -1

    def next(self):
        if self.cur_batch >= self.batches_cnt:
            self.cur_batch = -1
            raise StopIteration()
        self.cur_batch += 1
        bsize = np.random.randint(16, 32)
        return self.cur_batch, np.random.randn(bsize, 1000)

    def __iter__(self):
        return self


if __name__ == '__main__':
    nn = DERN(1000)
    nn.train(batch_iterator(), epochs=10)
    X_test = np.random.rand(3, 1000)
    print nn.predict(X_test)