def plot(X_tr, y_tr, a_tr, X_val, y_val, a_val): class_err = np.zeros((11, 1)) mae = np.zeros((11, 1)) count = 0 for i in [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]: nn = NN(i, 20) nn.fit(X_tr, y_tr, a_tr) nn.predict(X_tr) [class_err_1, class_err_2, mae1, mae2] = nn.calc_error(X_tr, y_tr, a_tr, X_val, y_val, a_val) class_err[count] = class_err_2 mae[count] = mae2 count = count + 1 fig1 = plt.gcf() plt.plot( np.arange(0.0, 1.1, 0.1).tolist(), class_err, marker='o', linestyle='dashed', label= r'Observed Classification Error Rate (Y-axis) at Trade-off parameter $\alpha$ (X-axis)' ) plt.legend(prop={'size': 22}) plt.ylabel('Classification Error on Validation Data', fontsize=22) plt.yticks(np.arange(0.0, 1.1, 0.1).tolist()) plt.xlabel(r'Trade-off parameter $\alpha$', fontsize=22) plt.xticks(np.arange(0.0, 1.1, 0.1).tolist()) plt.title('Classification Error vs Trade-off Parameter', fontsize=22) mng = plt.get_current_fig_manager() mng.window.showMaximized() plt.show() fig1.savefig('C.png') fig2 = plt.gcf() plt.plot( np.arange(0.0, 1.1, 0.1).tolist(), mae, marker='o', linestyle='dashed', label= r'Observed Mean Absolute Error (Y-axis) at Trade-off parameter $\alpha$ (X-axis)' ) plt.legend(prop={'size': 22}) plt.ylabel('Mean Absolute Error on Validation Data', fontsize=22) plt.xlabel(r'Trade-off parameter $\alpha$', fontsize=22) plt.xticks(np.arange(0.0, 1.1, 0.1).tolist()) plt.title('Mean Absolute Error vs Trade-off Parameter', fontsize=22) mng = plt.get_current_fig_manager() mng.window.showMaximized() plt.show() fig2.savefig('M.png')
def external_validate(dataset): print('External: ') # load AD GWAS data, preprocess to match original dataset X, y, ref, feats = setup_val_data(dataset) # predict on AD GWAS and score print('nn: ') nn = load_model(dataset, 'nn') scores = nn.predict(X) AUC = roc_auc_score(y, scores) APR = average_precision_score(y, scores) print('AUC/APR: ', AUC, APR) pcs = np.percentile(scores, [5, 50, 95]) print(pcs) print(np.mean(scores[y == 1])) print(np.mean(scores[y == 0])) try: print('glm: ') glm = load_model(dataset, 'glm') scores = glm.predict(X) AUC = roc_auc_score(y, scores) APR = average_precision_score(y, scores) print('AUC/APR: ', AUC, APR) pcs = np.percentile(scores, [5, 50, 95]) print(pcs) print(np.mean(scores[y == 1])) print(np.mean(scores[y == 0])) except Exception as e: print(e)
def nn_latentspace(self, verbose=False): data_train, _, labels_train = self.labelled_set.get_latent() data_test, _, labels_test = self.unlabelled_set.get_latent() nn = KNeighborsClassifier() nn.fit(data_train, labels_train) score = nn.score(data_test, labels_test) if verbose: print("NN classifier score:", score) print("NN classifier tuple:", compute_accuracy_tuple(labels_test, nn.predict(data_test))) return score
def main(): DATA_DIR = 'data' data = np.load(os.path.join(DATA_DIR, "mnist_rot_train.npz")) X_tr, y_tr, a_tr = data["X"], data["labels"], data["angles"] data = np.load(os.path.join(DATA_DIR, "mnist_rot_validation.npz")) X_val, y_val, a_val = data["X"], data["labels"], data["angles"] #Note: test class labels and angles are not provided #in the data set data = np.load(os.path.join(DATA_DIR, "mnist_rot_test.npz")) X_te, y_te, a_te = data["X"], data["labels"], data["angles"] #plt.imshow(X_tr[0].reshape((28,28))) #plt.show() nn = BestNN(1.0, 30) #[X_tr, X_val, X_te] = nn.preprocess(X_tr, X_val, X_te) nn.fit(X_tr, y_tr, a_tr) nn.predict(X_tr) nn.calc_error(X_tr, y_tr, a_tr, X_val, y_val, a_val) nn.savetestpred(X_te)
def experiment_scikitlearn_baselines(train_X, train_Y, test_X, test_Y): train_X = train_X.numpy().reshape(316, 28*50) train_Y.numpy() sv = svm.SVC() sv.fit(train_X, train_Y) nn = NearestCentroid() nn.fit(train_X, train_Y) ga = GaussianNB() ga.fit(train_X, train_Y) dt = tree.DecisionTreeClassifier() dt.fit(train_X, train_Y) test_X = test_X.numpy().reshape(100, 28*50) test_Y.numpy() print("SVM " + str(accuracy_score(test_Y, sv.predict(test_X)))) print("NN " + str(accuracy_score(test_Y, nn.predict(test_X)))) print("Gausian " + str(accuracy_score(test_Y, ga.predict(test_X)))) print("DT " + str(accuracy_score(test_Y, dt.predict(test_X)))) print("Warning: The following is taking approximately 1.5 hours in an average laptop.")
X_tr = X_tr.reshape(X_tr.shape[0], -1) X_t = X_t.reshape(X_t.shape[0], -1) one_hot = OneHotEncoder() y_tr = train_dataset.targets.numpy().reshape(-1, 1) y_t = test_dataset.targets.numpy().reshape(-1, 1) y_tr1 = one_hot.fit_transform(y_tr).toarray() y_t1 = one_hot.fit_transform(y_t).toarray() nn.fit(X_tr, y_tr1, epochs=1200, batch_size=64, loss=BinaryCrossEntropy(), optimizer=Adam(lr=0.001), show_progress=TQDM_TERMINAL) preds = np.round(nn.predict(X_t)) total = len(preds) correct = 0 for pred, y in zip(preds, y_t1): if pred.argmax() == y.argmax(): correct += 1 print(f"Accuracy: {float(correct) * 100 / total}%") nn.overview()
def main(): # if GPU is availale, use GPU device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print("Use " + str(device)) # create dataset file_list = None for path, dirs, files in os.walk(test_path, topdown=False): file_list = list(files) # preprocessing steps transform = transforms.Compose([ transforms.Resize((512, 512)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) test_dataset = Leaf_test_Dataset(file_list, test_path, transform) test_loader = DataLoader(dataset=test_dataset, batch_size=batchSize) print("Start testing:") # net model eff_models = [] for model_path in eff_model_paths: eff_net = EfficientNet.from_name('efficientnet-b4') eff_net._fc = nn.Linear(eff_net._fc.in_features, 5) eff_net.load_state_dict(torch.load(model_path)) eff_net = eff_net.to(device) eff_net.eval() eff_models.append(eff_net) preds = [] result = None with torch.no_grad(): batch_num = len(test_loader) for index, image in enumerate(test_loader): image = image.to(device) eff_result = [] for eff_net in eff_models: output = eff_net(image) output = output.to('cpu') pred = output.argmax(dim=1, keepdim=True).flatten() eff_result.append(pred) if len(preds) == 0: preds = np.dstack(eff_result)[0] else: preds = np.vstack([preds, np.dstack(eff_result)[0]]) # start train combine model df = pd.read_csv(pred_train_csv) # 移除全错选项 # get the pred acc for this line def get_acc(pred_csv, index): label = pred_csv.loc[index, 'label'] acc = 0 if pred_csv.loc[index, 'pred_0'] == label: acc += 0.2 if pred_csv.loc[index, 'pred_1'] == label: acc += 0.2 if pred_csv.loc[index, 'pred_2'] == label: acc += 0.2 if pred_csv.loc[index, 'pred_3'] == label: acc += 0.2 if pred_csv.loc[index, 'pred_4'] == label: acc += 0.2 return round(acc, 1) delete_index = [] for index in range(len(df)): acc = get_acc(df, index) # remove noise data if acc <= 0: delete_index.append(index) df = df.drop(delete_index) df = df.reset_index(drop=True) X = np.array(df[["pred_0", "pred_1", "pred_2", "pred_3", "pred_4"]]) y = np.array(df[["label"]]).flatten() from sklearn.neural_network import MLPClassifier # Neural Network nn = MLPClassifier(max_iter=2000) nn.fit(X, y) result = nn.predict(preds) pred_result = pd.concat([ pd.DataFrame(file_list, columns=['image_id']), pd.DataFrame(result, columns=['label']) ], axis=1) pred_result.to_csv(output_path + "submission.csv", index=False, sep=',') print("Done.")
pbar.update(processed, **training_stat) pbar.finish() def predict(self, X): X_var = make_var(X) return self.forward(X_var).data.numpy() class batch_iterator(): def __init__(self): self.batches_cnt = 100 self.cur_batch = -1 def next(self): if self.cur_batch >= self.batches_cnt: self.cur_batch = -1 raise StopIteration() self.cur_batch += 1 bsize = np.random.randint(16, 32) return self.cur_batch, np.random.randn(bsize, 1000) def __iter__(self): return self if __name__ == '__main__': nn = DERN(1000) nn.train(batch_iterator(), epochs=10) X_test = np.random.rand(3, 1000) print nn.predict(X_test)