def val(path, model, test_loader, device, criterion, epoch, batch_size): model.eval() sum_running_loss = 0.0 with torch.no_grad(): for batch_idx, data in enumerate(tqdm(test_loader)): task = data['task'].to(device).float() task_label = data['task_label'].to(device).float() # All black # init = data['init'] # init[:] = 0 # init = init.to(device).float() # Normal init = data['init'].to(device).float() # print("init shape", init.shape) label = data['label'].to(device).float() # model prediction prediction = model(subx=task_label, mainx=init) # loss loss_mse = criterion(prediction, label.data) # accumulate loss sum_running_loss += loss_mse.item() * init.size(0) # visualize the sum testing result visualize_sum_testing_result(path, init, prediction, task_label, label.data, batch_idx, epoch, batch_size) if batch_idx == 0: prediction_output = prediction.cpu().detach().numpy() label_output = label.cpu().detach().numpy() init_output = init.cpu().detach().numpy() else: prediction_output = np.append( prediction.cpu().detach().numpy(), prediction_output, axis=0) label_output = np.append(label.cpu().detach().numpy(), label_output, axis=0) init_output = np.append(init.cpu().detach().numpy(), init_output, axis=0) sum_running_loss = sum_running_loss / len(test_loader.dataset) print('\nTesting phase: epoch: {} Loss: {:.4f}\n'.format( epoch, sum_running_loss)) auc_path = os.path.join(path, "epoch_" + str(epoch)) auc(['flow'], [2, 4, 10, 100], [[label_output, prediction_output]], auc_path) return sum_running_loss, prediction_output, label_output, init_output
def test_step(images, labels, tp, tn, fn, fp): output = model(images, training=False) t_loss = loss_object(labels, output) test_loss(t_loss) test_accuracy(labels, output) aucvalue = auc(labels, output, weights=None, num_thresholds=200, name=None, summation_method='trapezoidal') test_auc(aucvalue) # print("output: ", output) # print("labels: ", labels) y_pred = tf.argmax(output, 1) y_true = tf.argmax(labels, 1) # print("y_pred: ", y_pred) # print("y_true: ", y_true) pred = tf.cast(y_pred, tf.int32) true = tf.cast(y_true, tf.int32) # print("pred: ", pred) # print("true: ", true) for i in range(len(pred)): if pred[i] == true[i] and true[i] == 1: tp += 1 elif pred[i] == true[i] and true[i] == 0: tn += 1 elif pred[i] == 1 and true[i] == 0: fp += 1 else: fn += 1 return tp, tn, fp, fn, output
def cal_auc(ans, i, preds): preds['auc'] = 0.0 for index, pred in preds.iterrows(): truth = ans.loc[index] a = auc(pred[0:28], truth) preds.at[index, 'auc'] = round(a, 4) cols = preds.columns.tolist() cols = cols[-1:] + cols[:-1] preds = preds[cols] # preds.to_csv('../auc/%s-%03d.csv' % (PREFIX, i)) return preds
def test_step(images, labels): output = model(images, training=False) t_loss = loss_object(labels, output) test_loss(t_loss) test_accuracy(labels, output) aucvalue = auc(labels, output, weights=None, num_thresholds=200, name=None, summation_method='trapezoidal') test_auc(aucvalue)
def train_step(images, labels): with tf.GradientTape() as tape: output = model(images, training=True) loss = loss_object(labels, output) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, output) aucvalue = auc(labels, output, weights=None, num_thresholds=200, name=None, summation_method='trapezoidal') train_auc(aucvalue)
def evaluate_c16_main(): eva_score = get_testset_score() #print "##################################" #print eva_score gt_score = get_ground_truth() #print "++++++++++++++++++++++++++++++++++++" #print gt_score #print "=================================" save_testset_score(eva_score, gt_score) fig_name = "c16_auc_mean_cnt_%d.jpg" % (MEAN_COUNT) fig_path = os.path.join(dir_c16_outs, fig_name) #print gt_score.shape #print eva_score.shape auc_score = auc.auc(gt_score, eva_score, fig_path) print("Meat count:%d C16 AUC is :%0.4lf" % (MEAN_COUNT, auc_score))
def conv1d_auc(i): pred_data = pd.read_csv('../con/%03d.csv' % i, index_col='user_id') pred_data = scaler.transform(pred_data) pred = model.predict(reshape(pred_data)) pred_df = pd.DataFrame(pred) pred_df['auc'] = 0.0 ans = pd.read_csv('../public/label-%03d.csv' % i, index_col='user_id') pi = 0 for index, ans_row in ans.iterrows(): a = auc.auc(pred[pi], ans_row) pred_df.at[pi, 'auc'] = a pi += 1 cols = pred_df.columns.tolist() cols = cols[-1:] + cols[:-1] pred_df = pred_df[cols] pred_df.to_csv('../conv1d-auc/%s-%03d.csv' % (datetime.now().isoformat(timespec='minutes'), i))
def make_learning_curve(X, Y, min_repeat, max_repeat, ebar, max_point_num, debug=False, useRPMat=False): if debug: print "ENTER MLC" """x, y, e = make_learning_curve(a, X, Y, min_repeat, max_repeat, ebar, max_point_num) % Make the learning curve % Inputs: % X -- data matrix % Y -- labels % Returns: % x -- Number of samples % y -- Performance (AUC) % e -- error bar """ """print 'X' print X.shape print X.sum() print 'Y' print Y.shape print Y.sum()""" #die if debug: print "Casting X to 64 bit" assert Y.any(axis=0).all(),\ ("Some classes have no example, at least one should be present " "for each class") X = N.cast['float64'](X) Y = N.cast['float64'](Y.copy()) # Verify dimensions and set target values p, n = X.shape if len(Y.shape) == 1: Y = (N.ones((1, 1)) * Y).T pp, cn = Y.shape sep_num = cn if pp != p: raise Exception('Size mismatch. X has ' + str(p) + ' examples but Y has ' + str(pp) + ' labels') # # If only 2 classes, the 2nd col is the same as the first but opposite if cn == 2 and N.all(Y.sum(axis=1)): Y = (N.ones((1, 1)) * Y[:, 0]).T sep_num = 1 # Y[Y == 0] = -1 if debug: time.sleep(2) print "Creating the data matrices" # Create the data matrices (Y at this stage is still multi-column) D = data_struct(X, Y) feat_num = D.X.shape[1] K = None #if not pd_check.pd_check(D.X): #die # if debug: # time.sleep(2) # print "kernelizing" # K = kernelize.kernelize(D); # if not pd_check.pd_check(D.X) and D.X.shape[0] < 2000: D = kernelize.kernelize(D) # Load random splits (these are the same for everyone) RP = None if useRPMat and os.path.exists(rp_path) and os.path.isfile(rp_path): RP = io.loadmat(rp_path, struct_as_record=False)['RP'] #print RP rp, mr = RP.shape if rp < p: if debug: print 'make_learning_curve::warning: RP too small' RP = None else: max_repeat = min(max_repeat, mr) RP = N.ceil(N.cast['float64'](RP) / (float(rp) / float(p))) RP = RP.astype(int) if debug: print 'make_learning_curve: using RP of dim ' + \ str(rp) + \ 'x' + \ str(mr) + \ ' min=' + \ str(RP.min()) + \ ' max=' + \ str(RP.max()) + \ ', max_repeat=' + \ str(max_repeat) else: print 'make_learning_curve::warning: no RP file found\n' if debug: time.sleep(2) print "Computing sample sizes" # Sample sizes scaled in log2 m = N.floor(math.log(p, 2)) x = 2.**N.arange(0, int(m) + 1) if x[-1] != p: x = N.hstack((x, [p])) x = x[0:-1] # need to remove the last point #print 'warning: this is a likely place i could have messed things up' if max_point_num is None: point_num = x.shape[0] else: point_num = min(x.shape[0], max_point_num) # # Loop over the sample sizes x = x[0:point_num] x = N.cast['uint32'](x) y = N.zeros(x.shape) e = N.zeros(x.shape) for k in xrange(0, point_num): if debug: print '-------------------- Point %d ----------------------' % k # A = N.zeros((sep_num, 1)) E = N.zeros((sep_num, 1)) e[k] = N.Inf # Loop over number of "1 vs all" separations for j in xrange(0, sep_num): if debug: print ' sep %d -- ' % j "" repnum = 0 area = [] # Loop over repeats (floating number of repeats) while repnum < min_repeat or \ (E[j] > ebar and repnum < max_repeat): if debug: print 'repeat %d **' % repnum print 'min_repeat: ' + str(min_repeat) print 'max_repeat: ' + str(max_repeat) pass # if RP is None: rp = randperm(p) else: rp = RP[0:p, repnum] - 1 tr_idx = rp[0:x[k]] te_idx = rp[x[k]:] if debug: print "Obtaining sub arrays" time.sleep(2) if pd_check.pd_check(D): # kernelized version if debug: print 'pd_check ok, using kernelized version' Dtr = D.subdim(tr_idx, tr_idx, [j]) Dte = D.subdim(te_idx, tr_idx, [j]) #elif x[k] < feat_num: # kernelized too (for speed reason) # if debug: # print 'x[k] < feat_num, using kernelized version' # Dtr = K.subdim(tr_idx, tr_idx, [j]); # Dte = K.subdim(te_idx, tr_idx, [j]); else: # primal version if debug: print 'using non-kernelized version' Dtr = data_struct(D.X[tr_idx, :], D.Y[tr_idx, j]) Dte = data_struct(D.X[te_idx, :], D.Y[te_idx, j]) if debug: time.sleep(2) print "Training classifier" d, m = train.train(Dtr) if debug: time.sleep(2) print "Computing test values" #print 'Dte.Y' #print Dte.Y #assert False d1 = test.test(m, Dte) assert d1.X.shape[0] != 0, "d1.X.shape[0] == 0" assert repnum == len(area), "repnum == len(area)" #print 'target' #print d1.Y #print d1.Y.shape #print d1.Y.sum() #assert False if debug: time.sleep(2) print "Computing auc" area.append(auc.auc(d1.X, d1.Y, dosigma=False)[0]) if debug: time.sleep(2) print "done" repnum += 1 E[j] = N.asarray(area).std() / N.sqrt(repnum) # repnum loop assert not N.any(N.isnan(area)) A[j] = N.asarray(area).mean() if N.isnan(A[j]): assert False, "Invalid area: " + str(area) # #end % for j=1:sep_num e[k] = E.mean() y[k] = A.mean() assert not N.isnan(y[k]) if debug: print '==> ' + \ str(repnum) + \ ' repeats, auc=' + \ str(y[k]) + \ '+-' + \ str(e[k]) + \ ' -----------------' # # % Loop over k # Add point with 0 examples x = N.concatenate((N.asarray([0]), x)) P = 0.5 y = N.concatenate((N.asarray([P]), y)) e = N.concatenate((N.asarray([N.sqrt(P * (1 - P) / p)]), e)) return x, y, e
model.load_weights('model_save/deep_fm_sample-ep001-loss0.184-val_loss0.172.h5') # model = load_model('model_save/deep_fm_sample-ep001-loss0.192-val_loss0.176.h5') data = pd.read_csv("./data/sample/validation.txt") # 1.Label Encoding for sparse features,and do simple Transformation for dense features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) # 2.count #unique features for each sparse field sparse_feature_dim = {feat: data[feat].nunique() for feat in sparse_features} # 3.generate input data for model model_input = [data[feat].values for feat in sparse_feature_dim] pred = model.predict(model_input, batch_size, 1) label = data[target].values.flatten().tolist() pred = pred.flatten().tolist() with open('data/pctr', 'w') as fw: for i in range(len(pred)): if i % 10000 == 0: print('label: %f, pred: %f' % (label[i], pred[i])) to_write = str(i+1)+','+str(label[i])+','+str(pred[i])+'\n' fw.write(to_write) fw.close() AUC = auc.auc(label, pred) print('auc: %f' % AUC) print("demo done")
s = "/valid/fluorescence_valid.txt" sn = None sp = "/valid/networkPositions_valid.txt" TV, _, PV = read(s, sn, sp, Knormal) print "reading test..." s = "/test/fluorescence_test.txt" sn = None sp = "/test/networkPositions_test.txt" TT, _, PT = read(s, sn, sp, Knormal) [RN2, RT, RV, RN3] = learnAndPredict(TN1, CN1, [TN2, TT, TV, TN3]) suff = np.random.randint(10000) f = open("./res_ver" + str(VER) + ".csv", 'w') f.write("NET_neuronI_neuronJ,Strength\n") for i in range(1000): for j in range(1000): f.write("valid_" + str(i + 1) + "_" + str(j + 1) + "," + str(RV[i][j]) + "\n") for i in range(1000): for j in range(1000): f.write("test_" + str(i + 1) + "_" + str(j + 1) + "," + str(RT[i][j]) + "\n") f.close() print "Wrote solution of VER ==", str(VER) RN2_ = RN2.flatten().tolist() a = auc.auc(CN2.flatten().tolist(), RN2_) RN3_ = RN3.flatten().tolist() a2 = auc.auc(CN3.flatten().tolist(), RN3_) print("RES: %.2f learning (%.2f, %.2f)" % ((a + a2) * 50, a * 100, a2 * 100))
batch[key + 1].values for key in np.hstack(( np.arange(0, 6), np.arange(6, len(sparse_feature_dim)))) ] labels.extend(batch[0].values) pred = model.predict_on_batch(X) preds.extend(pred.flatten().tolist()) cnt += batch_size if cnt % (batch_size * 100): print(pred[0]) print('calculating auc ......') AUC = auc.auc(labels, preds) print('auc: %f' % AUC) elif mode == 'pred': # model.load_weights('model_save/deep_fm_fn_bs10000-ep001-loss0.155-val_loss0.153.h5') # auc: 0.714774 #model.load_weights('model_save/deep_fm_fn_bs15000-ep001-loss0.156-val_loss0.152.h5') # auc: 0.717083 #model.load_weights('model_save/deep_fm_fn-ep002-loss0.154-val_loss0.154-bs15000-ee20-hz[128, 128].h5') # auc: 0.718581 #model.load_weights('model_save/deep_fm_fn-ep020-loss0.153-val_loss0.153-bs15000-ee20-hz[5, 600].h5') # auc: 0.719317 model.load_weights( 'model_save/deep_fm_fn-ep043-loss0.152-val_loss0.152-bs15000-ee20-hz[3, 600].h5' ) # auc: 0.722419 ctr = [] reader = pd.read_csv( '/home/yezhizi/Documents/python/2018DM_Project/track2/KDD_Track2_solution.csv', chunksize=chunk_size)
def auc_scorer(estimator, X, y): predicted = estimator.predict_proba(X)[:,1] return auc.auc(y, predicted)
pred = model.predict_on_batch(X) preds.extend(pred.flatten().tolist()) cnt += batch_size if cnt % (batch_size * 100): print(pred[0]) now_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print(now_time) print('calculating auc ......') print('labels; %d' % len(click)) print('preds: %d' % len(preds)) AUC = auc.auc(np.array(click, dtype=np.float) / np.array(imp, dtype=np.float), preds) print('auc: %f' % AUC) AUC = auc.scoreClickAUC(click, imp, preds) print('scoreClickAUC: %f' % AUC) now_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print(now_time) # writing preds to csv with open('data/' + model_name + '.csv', 'w') as fw: for i in range(len(preds)): if i % 1000000 == 0: print('label: %f, pred: %f' % (click[i]/imp[i], preds[i])) to_write = str(preds[i]) + '\n' fw.write(to_write) fw.close()
print "reading valid..." s = "/valid/fluorescence_valid.txt" sn = None sp = "/valid/networkPositions_valid.txt" TV, _, PV = read(s,sn,sp, Knormal) print "reading test..." s = "/test/fluorescence_test.txt" sn = None sp = "/test/networkPositions_test.txt" TT, _, PT = read(s,sn,sp, Knormal) [RN2, RT, RV, RN3] = learnAndPredict(TN1, CN1, [TN2, TT, TV, TN3]) suff = np.random.randint(10000) f = open("./res_ver"+str(VER)+".csv", 'w') f.write("NET_neuronI_neuronJ,Strength\n") for i in range (1000): for j in range (1000): f.write("valid_" +str(i+1)+"_"+str(j+1)+","+str(RV[i][j])+"\n") for i in range (1000): for j in range (1000): f.write("test_" +str(i+1)+"_"+str(j+1)+","+str(RT[i][j])+"\n") f.close() print "Wrote solution of VER ==", str(VER) RN2_ = RN2.flatten().tolist() a = auc.auc(CN2.flatten().tolist(),RN2_) RN3_ = RN3.flatten().tolist() a2 = auc.auc(CN3.flatten().tolist(),RN3_) print ("RES: %.2f learning (%.2f, %.2f)" % ((a+a2)*50, a*100, a2*100 ))
sp = "/" + "small" + "/networkPositions_" + name + ".txt" print name TSmall5, CSmall5, PSmall5 = read(s, sn, sp, Knormal) gc.collect() name = "iNet1_Size100_CC06inh" s = "/" + "small" + "/fluorescence_" + name + ".txt" sn = "/" + "small" + "/network_" + name + ".txt" sp = "/" + "small" + "/networkPositions_" + name + ".txt" print name TSmall6, CSmall6, PSmall6 = read(s, sn, sp, Knormal) gc.collect() [RS4, RS6] = learnAndPredict(TSmall5, CSmall5, [TSmall4, TSmall6]) RS4_ = RS4.flatten().tolist() a = auc.auc(CSmall4.flatten().tolist(), RS4_) RS6_ = RS6.flatten().tolist() a2 = auc.auc(CSmall6.flatten().tolist(), RS6_) print("RES: %.2f Small4, %.2f Small6" % (a * 100, a2 * 100)) f = open("res_small_4_6.csv", 'w') f.write("NET_neuronI_neuronJ,Strength\n") for i in range(NN): for j in range(NN): f.write("valid_" + str(i + 1) + "_" + str(j + 1) + "," + str(RS4[i][j]) + "\n") for i in range(NN): for j in range(NN): f.write("test_" + str(i + 1) + "_" + str(j + 1) + "," + str(RS6[i][j]) + "\n") f.close() print "Wrote solution to ./res_Small_4_6.csv"
import pandas as pd from auc import auc DATA_PATH = '../public/' i = 1 ans = pd.read_csv(DATA_PATH + 'label-%03d.csv' % i, index_col='user_id') preds = pd.read_csv('../predict-%03d.csv' % i, index_col='user_id') preds['auc'] = 0.0 for index, pred in preds.iterrows(): truth = ans.loc[index] a = auc(pred[0:28], truth) preds.at[index, 'auc'] = round(a, 4) cols = preds.columns.tolist() cols = cols[-1:] + cols[:-1] preds = preds[cols] preds.to_csv('../auc-%03d.csv' % i)
sn = "/" + "small" + "/network_" + name + ".txt" sp = "/" + "small" + "/networkPositions_" + name + ".txt" print name TSmall5, CSmall5, PSmall5 = read(s, sn, sp, Knormal) gc.collect() name = "iNet1_Size100_CC06inh" s = "/" + "small" + "/fluorescence_" + name + ".txt" sn = "/" + "small" + "/network_" + name + ".txt" sp = "/" + "small" + "/networkPositions_" + name + ".txt" print name TSmall6, CSmall6, PSmall6 = read(s, sn, sp, Knormal) gc.collect() [RS4, RS6] = learnAndPredict(TSmall5, CSmall5, [TSmall4, TSmall6]) RS4_ = RS4.flatten().tolist() a = auc.auc(CSmall4.flatten().tolist(), RS4_) RS6_ = RS6.flatten().tolist() a2 = auc.auc(CSmall6.flatten().tolist(), RS6_) print ("RES: %.2f Small4, %.2f Small6" % (a * 100, a2 * 100)) f = open("res_small_4_6.csv", "w") f.write("NET_neuronI_neuronJ,Strength\n") for i in range(NN): for j in range(NN): f.write("valid_" + str(i + 1) + "_" + str(j + 1) + "," + str(RS4[i][j]) + "\n") for i in range(NN): for j in range(NN): f.write("test_" + str(i + 1) + "_" + str(j + 1) + "," + str(RS6[i][j]) + "\n") f.close() print "Wrote solution to ./res_Small_4_6.csv"