def disc_validate(agent, valid_feed, config, sample_shape, batch_cnt=None): # take all of validation data at one time. with torch.no_grad(): agent.eval() valid_feed.epoch_init(config, shuffle=False, verbose=True) losses = LossManager() acc_feed = np.array([0, 0, 0, 0, 0.0, 0.0]) if config.gan_type == 'wgan': acc_feed = np.array([0, 0]) batch_num = 0 while True: batch = valid_feed.next_batch() if batch is None: break loss, acc = agent.disc_train(sample_shape, batch) # wgan_reward.append(torch.stack(acc)) acc_feed = acc_feed + acc losses.add_loss(loss) losses.add_backward_loss( agent.discriminator.model_sel_loss(loss, batch_cnt)) batch_num += 1 valid_loss = losses.avg_loss() logger.info(losses.pprint(valid_feed.name)) logger.info("Total valid loss {}".format(valid_loss)) if config.gan_type == 'gan': print_accuracy(acc_feed, batch_num, config) else: logger.info("Wgan Disc Real and Fake Score: {}, {}".format( acc_feed[0] / batch_num, acc_feed[1] / batch_num)) return valid_loss
def evaluate(self, X_va=[], y_va=[]): print('Training performance:', end=' ') acc1, acc5 = print_accuracy(self.y_tr, self.model.predict_proba(self.X_tr)) print(f'Top-1 accuracy={acc1:.4f}, Top-5 accuracy={acc5:.4f}') d_accuracy = {'top-1': [acc1], 'top-5': [acc5]} if len(X_va) + len(y_va) > 0: print('Evaluation performance', end=' ') acc1, acc5 = print_accuracy(y_va, self.model.predict_proba(X_va)) print(f'Top-1 accuracy={acc1:.4f}, Top-5 accuracy={acc5:.4f}') d_accuracy['top-1'].append(acc1) d_accuracy['top-5'].append(acc5) pd.DataFrame(d_accuracy, index=['train', 'val']).to_csv(self.fn.replace('.pkl', '.csv'))
def test_model(sess_test, objData): # sess_test : session en tensorflow # objData : datos de test total = objData.total_images mbach = objData.minibatch if ((total / mbach) - int(total / mbach)) > 0: itertotal = int(total / mbach) + 1 else: itertotal = int(total / mbach) count_success = 0 count_by_class = np.zeros([num_class, num_class]) prob_predicted = [] # Iteraciones por Batch, en cada iteracion la session de tensorflow procesa los 'n' datos de entrada # donde 'n' es el 'mini_batch_test' print('\n# PHASE: Test classification') for i in range(itertotal): # Generamos el batch y sus respectivas etiquetas # el batch generado contiene las 'n' primeras imagenes batch, label = objData.generate_batch() # ejecutamos el grafo de tensorflow y almacenamos el vector de la ultima capa prob, layer = sess_test.run([vgg.prob, vgg.relu6], feed_dict={ vgg_batch: batch, train_mode: False }) # save output of a layer # utils.save_layer_output(layer, label, name='Train_SNC4_relu6') # Acumulamos la presicion de cada iteracion, para despues hacer un promedio count, count_by_class, prob_predicted = utils.print_accuracy( label, prob, matrix_confusion=count_by_class, predicted=prob_predicted) count_success = count_success + count # hacemos que el batch apunte a los siguiente grupo de imagenes de tamaño 'n' objData.next_batch_test() # promediamos la precision total accuracy_final = count_success / total print('\n# STATUS: Confusion Matrix') print(count_by_class) print(' Success total: ', str(count_success)) print(' Accuracy total: ', str(accuracy_final)) # a = objData.labels.tolist() # b = prob_predicted # cm = confusion_matrix(a, b) return accuracy_final
####################### Feature Expansion ################################ if classifier!="nn" and classifier!="bow": X_tr = feature_exp(X_tr) X_te = feature_exp(X_te) D = X_tr.shape[1] print "After Feature Expansion: Training : [Inputs x Features ] = [%d x %d]" % (N_tr,D) print "After Feature Expansion: Test : [Inputs x Features ] = [%d x %d]" % (N_te,D) ###################### Normalizing data ################################## scaler = preprocessing.StandardScaler().fit(X_tr) X_tr_n = scaler.transform(X_tr) X_te_n = scaler.transform(X_te) end = time.time() print "\nTime taken for Data preparation = %f sec" % (end-start) start = time.time() print time.ctime() y_te_p = models(X_tr_n, y_tr, X_te_n, classifier) if isinstance(y_te_p,np.ndarray): if submission != 1: print_accuracy(y_te, y_te_p, "Test") else: save_out(y_te_p,labels_string,sorted_files_te,submission_fname) end = time.time() print "\nTime taken by classifier = %f sec" % (end-start)
os.makedirs(res_dir_w) acc_list = [] word_acc = 0 fs = "+".join(featsset) for fold in range(1, K+1): #sys.stderr.write("[INFO] Fold %s\n" %str(fold)) reader = LexSampReader() dataset = data_dir + "/" + word + "/xval/fold"+str(fold) trainInstances = reader.getInstances(dataset+"/"+word+".train.ls.utf8.xml") testInstances = reader.getInstances(dataset+"/"+word+".test.ls.utf8.xml") wsd.setTrain(trainInstances) wsd.setTest(testInstances) wsd.learn() preds = wsd.predict() gold = [insLabel for (insId, insLabel, offset, tokens) in testInstances] acc = wsd.accuracy(preds, gold) acc_list.append(acc) word_acc += acc sys.stderr.write("[INFO] %s fold %s: %s\n" % (word, str(fold), str(acc))) pred_filename = word + ".f"+str(fold)+"."+class_name+"."+fs+".out" res_file = res_dir_w + "/" + pred_filename utils.print_predictions(preds, testInstances, res_file) acc_filename = word +"."+class_name+"."+fs+".acc" acc_file = res_dir_w + "/" + acc_filename utils.print_accuracy(acc_list, acc_file) word_acc = float(word_acc) / K sys.stderr.write("[INFO] %s avg fscore: %s\n" % (word, str(word_acc))) sys.stderr.write("[INFO] Results stored in %s\n\n" % res_dir_w)
resname = get_parent_path(resfile, 1)[1] res = [pd.read_csv(f) for f in resfile] for ii, rr in enumerate(res): sujid = [] for ff in rr.subject_id: dd = ff.split('/') if dd[-1] is '': dd.pop() nn = len(dd) sujid.append(dd[nn - 3] + '+' + dd[nn - 2] + '+' + dd[nn - 1]) rr.index = sujid res[ii] = rr.loc[labelsujid] # rr.loc[sujid[::-1]] print_accuracy(res, resname, ytrue, prediction_name='prob_y', inverse_prediction=False) print_accuracy_all(res[0:1], resname[0:1], ytrue, prediction_name='prob_y', inverse_prediction=False) # CAT12 rescat = pd.read_csv( '/home/romain.valabregue/datal/QCcnn/CATI_datasets/res_cat12_suj18999.csv') rescat.index = [sss.replace(';', '+') for sss in rescat.sujid] # .values.replace(";","+") rescat = rescat.loc[labelsujid] print_accuracy_df(rescat, ytrue) print_accuracy([rescat], ['IQR'],
def main(): ## parse flags config = Options().parse() utils.print_opts(config) ## set up folders exp_dir = os.path.join(config.exp_dir, config.exp_name) model_dir = os.path.join(exp_dir, 'models') img_dir = os.path.join(exp_dir, 'images') if not os.path.exists(exp_dir): os.makedirs(exp_dir) if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(img_dir): os.makedirs(img_dir) if config.solver == 'none': model = None else: if config.use_tbx: # remove old tensorboardX logs logs = glob.glob(os.path.join(exp_dir, 'events.out.tfevents.*')) if len(logs) > 0: os.remove(logs[0]) tbx_writer = SummaryWriter(exp_dir) else: tbx_writer = None ## initialize data loaders/generators & model r_loader, z_loader = get_loader(config) if config.solver == 'w1': model = W1(config, r_loader, z_loader) elif config.solver == 'w2': model = W2(config, r_loader, z_loader) elif config.solver == 'bary_ot': model = BaryOT(config, r_loader, z_loader) cudnn.benchmark = True networks = model.get_networks() utils.print_networks(networks) ## training ## stage 1 (dual stage) of bary_ot start_time = time.time() if config.solver == 'bary_ot': print("Starting: dual stage for %d iters." % config.dual_iters) for step in range(config.dual_iters): model.train_diter_only(config) if ((step + 1) % 100) == 0: stats = model.get_stats(config) end_time = time.time() stats['disp_time'] = (end_time - start_time) / 60. start_time = end_time utils.print_out(stats, step + 1, config.dual_iters, tbx_writer) print("dual stage iterations complete.") ## main training loop of w1 / w2 or stage 2 (map stage) of bary-ot map_iters = config.map_iters if config.solver == 'bary_ot' else config.train_iters if config.solver == 'bary_ot': print("Starting: map stage for %d iters." % map_iters) else: print("Starting training...") for step in range(map_iters): model.train_iter(config) if ((step + 1) % 100) == 0: stats = model.get_stats(config) end_time = time.time() stats['disp_time'] = (end_time - start_time) / 60. start_time = end_time utils.print_out(stats, step + 1, map_iters, tbx_writer) if ((step + 1) % 500) == 0: images = model.get_visuals(config) utils.visualize_iter(images, img_dir, step + 1, config) print("Training complete.") networks = model.get_networks() utils.save_networks(networks, model_dir) ## testing ## 1) classification accuracy print("Calculating domain adaptation accuracy...") utils.print_accuracy(config, model) ## 2) visualization if config.solver != 'none': root = "./usps_test" if config.direction == 'usps-mnist' else "./mnist_test" file = open(os.path.join(root, "data.pkl"), "rb") fixed_z = pickle.load(file) file.close() fixed_z = utils.to_var(fixed_z) fixed_gz = model.g(fixed_z).view(*fixed_z.size()) utils.visualize_single(fixed_gz, os.path.join(img_dir, 'test.png'), config)
def models(X_tr_n, y_tr, X_te_n, classifier): if(classifier == "c_svm"): ###################### C SVM - Accuracy - 0.44503 ############################# model = SVC() model.fit(X_tr_n, y_tr) y_tr_p = model.predict(X_tr_n) y_te_p = model.predict(X_te_n) # save_out(y_te_p,labels_string,sorted_files_te,'submission/testLabels_CSVM.csv') elif(classifier == "c_svm_l1"): ###################### C SVM L1 - Accuracy - 0.44503 ############################# model = LinearSVC(penalty='l1',dual=False) model.fit(X_tr_n, y_tr) y_tr_p = model.predict(X_tr_n) y_te_p = model.predict(X_te_n) elif(classifier == "log_reg"): ###################### Logistic regression ############################# model = linear_model.LogisticRegression() model.fit(X_tr_n, y_tr) y_tr_p = model.predict(X_tr_n) y_te_p = model.predict(X_te_n) elif(classifier == "c_svm_param"): ###################### C SVM Param - Accuracy - 0.50164 ############################# model = grid_search(X_tr_n,y_tr) print "Best params = " print model.best_params_ # model = SVC(C=10,kernel='rbf',gamma=0.001) # model.fit(X_tr_n, y_tr) y_tr_p = model.predict(X_tr_n) y_te_p = model.predict(X_te_n) elif(classifier == "knn"): ###################### KNN - Accuracy - ############################# model = KNeighborsClassifier(n_neighbors=20) model.fit(X_tr_n, y_tr) y_tr_p = model.predict(X_tr_n) y_te_p = model.predict(X_te_n) elif(classifier == "naive_bayes"): ###################### Naive Bayes - Accuracy - ############################# model = GaussianNB() model.fit(X_tr_n, y_tr) y_tr_p = model.predict(X_tr_n) y_te_p = model.predict(X_te_n) elif(classifier == "ols"): ###################### OLS - Accuracy - ############################# model = linear_model.LinearRegression() model.fit(X_tr_n,y_tr) y_tr_p = model.predict(X_tr_n) y_tr_p = np.round(y_tr_p) y_te_p = model.predict(X_te_n) y_te_p = np.round(y_te_p) elif(classifier == "ridge_reg"): ###################### Ridge Regression - Accuracy - ############################# model = linear_model.Ridge(alpha=0.001) model.fit(X_tr_n,y_tr) y_tr_p = model.predict(X_tr_n) y_tr_p = np.round(y_tr_p) y_te_p = model.predict(X_te_n) y_te_p = np.round(y_te_p) elif(classifier == "lasso"): ###################### Lasso - Accuracy - ############################# model = linear_model.Lasso(alpha=.15,max_iter=-1) model.fit(X_tr_n,y_tr) y_tr_p = model.predict(X_tr_n) y_tr_p = np.round(y_tr_p) y_te_p = model.predict(X_te_n) y_te_p = np.round(y_te_p) elif(classifier == "adaboost"): ###################### AdaBoost ########################################### # model = AdaBoostClassifier(RandomForestClassifier(max_features=50, n_estimators=10, max_depth=20), # n_estimators=100,learning_rate=2) model = AdaBoostClassifier(linear_model.SGDClassifier(n_iter=50),n_estimators=100,learning_rate=1, algorithm="SAMME") # model = AdaBoostClassifier(n_estimators=100,learning_rate=2) model.fit(X_tr_n,y_tr) y_tr_p = model.predict(X_tr_n) y_te_p = model.predict(X_te_n) # elif(classifier == "voting"): # clf1 = DecisionTreeClassifier(max_depth=4) # clf2 = KNeighborsClassifier(n_neighbors=7) # clf3 = SVC(kernel='rbf', probability=True) # model = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)], voting='soft', weights=[2,1,2]) # model.fit(X_tr_n,y_tr) # y_tr_p = model.predict(X_tr_n) # y_te_p = model.predict(X_te_n) elif(classifier == "random_forest"): ###################### Random Forest ########################################### # model = RandomForestClassifier(n_estimators=100,n_jobs=4) # Grid search clf = RandomForestClassifier(n_jobs=3) param_grid = {"max_depth": [10, 20, 30], "max_features": [50, 100, 200], "n_estimators": [10,50,100]} # run grid search model = GridSearchCV(clf, param_grid=param_grid) model.fit(X_tr_n,y_tr) print model.best_params_ y_tr_p = model.predict(X_tr_n) y_te_p = model.predict(X_te_n) elif(classifier == "nn"): ############################### NN ################################### # tensorFlowNN(X_tr,y_tr,X_te,y_te) y_tr_p, y_te_p = keras_CNN(X_tr, y_tr, X_te) elif(classifier == "bow"): ############################### BOW ################################### X_tr_full_res, s = read_X_full_res('data/train') X_te_full_res, s = read_X_full_res('data/test') bow_obj = bow(kmeans_K = 100) X_bow_tr = bow_obj.fit_predict(X_tr_full_res) X_bow_te = bow_obj.predict(X_te_full_res) model = SVC() model.fit(X_bow_tr, y_tr) y_tr_p = model.predict(X_bow_tr) y_te_p = model.predict(X_bow_te) else: print "No Classifier selected" return False print_accuracy(y_tr, y_tr_p, "Training") return y_te_p