def learn_method(self): for i in range(5): l = [0, 0, 0, 0] score = 0.0 for v in range(4): l[v] = random.randint(1, 10) cal = calculator.cal_map(l[0], l[1], l[2], l[3]) score = cal.get("map") * cal.get("ndcg") w = Weight(l, score) list.append(w) list.sort() #we use evolutionary algorithms for finding best weights #to ways to generate new weights, that is decided randomly #in first way, we take 2 array and combine weights # based on a nrandom alpha number for n in range(1000): choice = random.randint(0, 1) # w1 = Weight() l1 = [0, 0, 0, 0] s = 0.0 if choice == 0: alpha = random.random() c1 = random.randint(0, 3) c2 = random.randint(0, 3) for i in range(4): l1[i] = (list[c1].weights[i] * alpha) + (list[c2].weights[i] * (1 - alpha)) else: alpha = random.randint(1, 3) c1 = random.randint(0, 3) c2 = random.randint(0, 3) for i in range(0, alpha): l1[i] = list[c1].weights[i] for i in range(alpha, 4): l1[i] = list[c2].weights[i] #here we call ealsticSearch and calculate score, forexample imagine score is 9 #w1.score = f(l1[0], l1[1], l1[2], l1[3]) #w1.score = f(l1) cal = calculator.cal_map(l1[0], l1[1], l1[2], l1[3]) s = cal.get("map") * cal.get("ndcg") w1 = Weight(l1, s) list.append(w1) list.sort() list.pop() print("done")
def mini_batch_SGD(eta, batch_size, epochs): MB_start_time = time.time() np.random.seed(seed) beta_init = np.random.randn(X_train.shape[1], 1) w1 = Weight(X_train, y_train, beta_init, eta, epochs, batch_size=batch_size) final_betas_MB, _ = w1.train(w1.mini_batch_gradient_descent) prob_MB, y_pred_MB = classification(X_test, final_betas_MB, y_test)[0:2] false_pos_MB, true_pos_MB = roc_curve(y_test, prob_MB)[0:2] AUC_MB = auc(false_pos_MB, true_pos_MB) print("Area under curve MB%s: " %batch_size, AUC_MB) MB_time = time.time() - MB_start_time return AUC_MB, MB_time, false_pos_MB, true_pos_MB
def lmmd(source, target, s_label, t_label, kernel_mul=2.0, kernel_num=5, fix_sigma=None, num_classes=10): batch_size = source.size()[0] weight_ss, weight_tt, weight_st = Weight.cal_weight(s_label, t_label, type='visual', class_nums=num_classes) weight_ss = torch.from_numpy(weight_ss).cuda() weight_tt = torch.from_numpy(weight_tt).cuda() weight_st = torch.from_numpy(weight_st).cuda() kernels = guassian_kernel(source, target, kernel_mul=kernel_mul, kernel_num=kernel_num, fix_sigma=fix_sigma) loss = torch.Tensor([0]).cuda() #print(kernels) if torch.sum(torch.isnan(sum(kernels))): return loss SS = kernels[:batch_size, :batch_size] TT = kernels[batch_size:, batch_size:] ST = kernels[:batch_size, batch_size:] loss += torch.sum(weight_ss * SS + weight_tt * TT - 2 * weight_st * ST) return loss
def conditional(self, source, target, s_label, t_label, kernel_mul=2.0, kernel_num=5, fix_sigma=None): batch_size = source.size()[0] weight_ss, weight_tt, weight_st = Weight.cal_weight(s_label, t_label, type='visual') weight_ss = torch.from_numpy(weight_ss).cuda() weight_tt = torch.from_numpy(weight_tt).cuda() weight_st = torch.from_numpy(weight_st).cuda() kernels = self.guassian_kernel(source, target, kernel_mul=kernel_mul, kernel_num=kernel_num, fix_sigma=fix_sigma) loss = torch.Tensor([0]).cuda() if torch.sum(torch.isnan(sum(kernels))): return loss SS = kernels[:batch_size, :batch_size] TT = kernels[batch_size:, batch_size:] ST = kernels[:batch_size, batch_size:] loss += torch.sum(weight_ss * SS + weight_tt * TT - 2 * weight_st * ST) return loss
def Best_Parameters(epochs, batch_size, method, etamin, etamax, step, y_val, X_val): beta_init = np.random.randn(X_train.shape[1], 1) eta_vals = np.logspace(etamin, etamax, step) auc_array = np.zeros((2, step)) for i, eta in enumerate(eta_vals): np.random.seed(seed) print("Iteration: ",i) print("eta: ", eta) w = Weight(X_train, y_train, beta_init, eta, epochs, batch_size=batch_size) method_ = getattr(w, method) final_betas, _ = w.train(method_) prob = sigmoid(X_val, final_betas) auc_array[0][i] = roc_auc_score(y_val, prob) auc_array[1][i] = eta max_auc = np.max(auc_array[0]) best_eta = auc_array[1][np.argmax(auc_array[0])] return max_auc, best_eta
def get(self): statusCode = 200 emailAddress = str(self.request.get('emailAddress')) all_weight = [] for weight in Weight.query(Weight.emailAddress == emailAddress): all_weight.append(weight) all_weight = sorted(all_weight,key=lambda r: r.dateTime,reverse=True) return_list = [] for weight in all_weight: return_list.append({'weight': float(weight.weight), 'date': str(weight.dateTime)}) self.response.write(json.dumps({'statusCode': statusCode, 'weights': return_list}))
def post(self): statusCode = 202 emailAddress = str(self.request.get('emailAddress')) content = json.loads(self.request.body) logging.info(content) weight = float(content['weight']) date = '' try: date = str(content['date']) except KeyError: date = '' newWeight = '' if date == '': newWeight = Weight(weight=weight, emailAddress=emailAddress) else: newWeight = Weight(weight=weight, emailAddress=emailAddress, dateTime=datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S.%f")) if newWeight: newWeight.put() statusCode = 200 self.response.write(json.dumps({'statusCode': statusCode}))
def Build_Weight_Database( self, database, version, num_of_kpts, cutoff, force_update = False, K = 10, depth = 4, nleaves = 10000): updated = self.Train_Database_Sign(database,version, num_of_kpts, force_update, K, depth, nleaves) #load signature sign_dir = os.path.join(self.SIGN_DIR, 'db_version_' + str(version)) sign = Signature() sign.load_sign(sign_dir, self.SIGN_FILE+str(num_of_kpts)) print 'Siganture Loaded' wt_file = os.path.join(sign_dir, self.WEIGHT_FILE+str(num_of_kpts)+'_'+str(cutoff)) wts_file = os.path.join(sign_dir, self.WEIGHT_SIGN_FILE+str(num_of_kpts)+'_'+str(cutoff)) if updated or (not os.path.isfile(wt_file) and not os.path.isfile(wts_file)): wt = Weight(cutoff) wt.get_weight(sign.sign_database) wt.weight_train_database(sign.sign_database) wt.save_weights(sign_dir, self.WEIGHT_FILE+str(num_of_kpts)+'_'+str(cutoff)) wt.save_weighted_sign(sign_dir, self.WEIGHT_SIGN_FILE+str(num_of_kpts)+'_'+str(cutoff)) print ' ' print 'Wegihted Sign Generated' else: print 'Weighted Sign Has Already Been Generated'
feats.pseaac(lam=int(args.pseaac), weight=PSE_WEIGHT) if args.physico: feats.physicochem() if args.kmer == None and args.pseaac == None and not args.physico: print("You must specify at least one feature type (-k, -p, -y).") else: # Weight if needed if args.weight: # Get distance threshold d = args.dist[0] # Get cluster type cluster_type = args.cluster_type[0] # Weight GTA pairwiseGTA = Weight.load(args.weight[0]) GTA_weight = Weight(gta_profs, pairwiseGTA) GTA_clusters = GTA_weight.cluster(cluster_type, d) GTA_weight.weight(GTA_clusters) # Weight Virus pairwiseViral = Weight.load(args.weight[1]) virus_weight = Weight(viral_profs, pairwiseViral) virus_clusters = virus_weight.cluster(cluster_type, d) virus_weight.weight(virus_clusters) # Create SVM c = args.c[0] kernel = args.kernel[0] kernel_var = float(args.kernel[1]) svm = SVM(gta_profs, viral_profs, c, kernel, kernel_var)
def Plots(epochs, AUC_time_plot = 0, ROC_plot = 0, Lift_plot_test_NN = 0, Lift_plot_train_NN = 0, GD_plot = 0, MB_GD_plot = 0, Stoch_GD_plot = 0, Newton_plot = 0, Scatter_GD_plot = 0): if (ROC_plot == 1 or AUC_time_plot == 1): GRAD_start_time = time.time() np.random.seed(seed) beta_init = np.random.randn(X_train.shape[1],1) w = Weight(X_train,y_train,beta_init,6.892612104349695e-05, epochs) final_betas_grad,cost = w.train(w.gradient_descent) prob_grad, y_pred_grad = classification(X_test, final_betas_grad, y_test)[0:2] false_pos_grad, true_pos_grad = roc_curve(y_test, prob_grad)[0:2] AUC_GRAD = auc(false_pos_grad, true_pos_grad) print("Area under curve gradient: ", AUC_GRAD) GRAD_time = time.time() - GRAD_start_time SGD_start_time = time.time() np.random.seed(seed) beta_init = np.random.randn(X_train.shape[1], 1) w2 = Weight(X_train, y_train, beta_init, 0.0007924828983539169, epochs) final_betas_ST, _ = w2.train(w2.stochastic_gradient_descent) prob_ST, y_pred_ST = classification(X_test, final_betas_ST, y_test)[0:2] ### HERE false_pos_ST, true_pos_ST = roc_curve(y_test, prob_ST)[0:2] AUC_SGD = auc(false_pos_ST, true_pos_ST) print("Area under curve ST: ", AUC_SGD) SGD_time = time.time() - SGD_start_time """np.random.seed(seed) beta_init = np.random.randn(X_train.shape[1],1) w3 = Weight(X_train,y_train,beta_init,0.001, 20) final_betas_Newton,_ = w3.train(w3.newtons_method) prob_Newton, y_pred_Newton = classification(X_train,final_betas_Newton, y_test)[0:2] false_pos_Newton, true_pos_Newton = roc_curve(y_test, prob_Newton)[0:2] print("Area under curve Newton: ", auc(false_pos_Newton, true_pos_Newton))""" AUC_MB5 = 0 MB5_time = 0 AUC_MB1000 = 0 MB1000_time = 0 AUC_MB6000 = 0 MB6000_time = 0 AUC_MB = 0 false_pos_MB = 0 true_pos_MB = 0 if(AUC_time_plot != 0): AUC_MB5, MB5_time, _, _ = mini_batch_SGD(0.0038625017292608175, 5, epochs) AUC_MB1000, MB1000_time, _, _ = mini_batch_SGD(0.0009501185073181439, 1000, epochs) AUC_MB6000, MB6000_time, _ ,_ = mini_batch_SGD(0.0001999908383831537, 6000, epochs) return AUC_SGD, AUC_GRAD, AUC_MB5, AUC_MB1000, AUC_MB6000, SGD_time, GRAD_time, MB5_time, MB1000_time, MB6000_time else: AUC_MB, _,false_pos_MB, true_pos_MB = mini_batch_SGD(0.0038625017292608175, 32, epochs) np.random.seed(seed) beta_init = np.random.randn(X_train.shape[1], 1) w4 = Weight(X_train, y_train, beta_init, 0.0007924828983539169, epochs) final_betas_ST_Skl,_ = w.train(w4.stochastic_gradient_descent_Skl) prob_ST_Skl, y_pred_ST_Skl = classification(X_test,final_betas_ST_Skl[0], y_test)[0:2] false_pos_ST_Skl, true_pos_ST_Skl = roc_curve(y_test, prob_ST_Skl)[0:2] print("Area under curve ST_skl: ", auc(false_pos_ST_Skl, true_pos_ST_Skl)) epochs = 20 batch_size = 25 eta = 0.1 lmbd = 0.01 n_hidden_neurons = 41 #################### # epochs = 20 # batch_size = 26 # eta = 3.14230708e+00 # lmbd = 1.25472709e-02 # n_hidden_neurons = 66 np.random.seed(seed) n_categories = 1 dnn = NN(X_train, y_train, eta=eta, lmbd=lmbd, epochs=epochs, batch_size=batch_size, n_hidden_neurons=n_hidden_neurons, n_categories=n_categories, cost_grad = 'crossentropy', activation = 'sigmoid', activation_out='sigmoid') dnn.train_and_validate() y_predict = dnn.predict_probabilities(X_test) false_pos_NN, true_pos_NN = roc_curve(y_test, y_predict)[0:2] print("AUC score NN: ", auc(false_pos_NN, true_pos_NN)) plt.plot([0, 1], [0, 1], "k--") plt.plot(false_pos_grad, true_pos_grad,label="Gradient") plt.plot(false_pos_ST, true_pos_ST, label="Stoch") plt.plot(false_pos_ST_Skl, true_pos_ST_Skl, label="Stoch_Skl") plt.plot(false_pos_MB, true_pos_MB, label="Mini") # plt.plot(false_pos_Newton, true_pos_Newton, label="Newton") plt.plot(false_pos_NN, true_pos_NN, label='NeuralNetwork') plt.legend() plt.xlabel("False Positive rate") plt.ylabel("True Positive rate") plt.title("ROC curve") plt.show() """Creates cumulative gain charts/lift plots for Neural network. The two optimal parameters sets from tuning are listed below""" if (Lift_plot_test_NN == 1): np.random.seed(seed) # epochs = 20 # batch_size = 26 # eta = 3.14230708e+00 # lmbd = 1.25472709e-02 # n_hidden_neurons = 66 epochs = 20 batch_size = 25 eta = 0.1 lmbd = 0.01 n_hidden_neurons = 41 n_categories = 1 dnn = NN(X_train, y_train, eta=eta, lmbd=lmbd, epochs=epochs, batch_size=batch_size, n_hidden_neurons=n_hidden_neurons, n_categories=n_categories, cost_grad='crossentropy', activation='sigmoid', activation_out='sigmoid') dnn.train_and_validate() y_predict_proba = dnn.predict_probabilities(X_test) y_predict_proba_tuple = np.concatenate((1 - y_predict_proba, y_predict_proba), axis=1) pos_true = y_test.sum() pos_true_perc = pos_true / len(y_test) x = np.linspace(0, 1, len(y_test)) m = 1 / pos_true_perc best_line = np.zeros((len(x))) for i in range(len(x)): best_line[i] = m * x[i] if (x[i] > pos_true_perc): best_line[i] = 1 x_, y_ = skplt.helpers.cumulative_gain_curve(y_test, y_predict_proba_tuple[:, 1]) Score = (np.trapz(y_, x=x_) - 0.5) / (np.trapz(best_line, dx=(1 / len(y_predict_proba))) - 0.5) print('Area ratio score(test)', Score) # The score Area ratio = 0.49129354889528054 Neural Network test against predicted perc = np.linspace(0, 100, len(y_test)) plt.plot(x_*100, y_*100) plt.plot(perc, best_line*100) plt.plot(perc, perc, "k--") plt.xlabel("Percentage of clients") plt.ylabel("Cumulative % of defaults") plt.title("Cumulative Gain Chart for Test Data") plt.show() """Let's you insert a threshold and classify""" _, y_predict, y_predict_tot = classification(y_prob_input=y_predict_proba, threshold=0.5) pos = y_predict.sum() neg = len(y_predict) - pos pos_perc = (pos / len(y_predict)) neg_perc = (neg / len(y_predict)) print("default: ", pos_perc) print("Non-default: ", neg_perc) if (Lift_plot_train_NN == 1): np.random.seed(seed) # epochs = 20 # batch_size = 26 # eta = 3.14230708e+00 # lmbd = 1.25472709e-02 # n_hidden_neurons = 66 epochs = 20 batch_size = 25 eta = 0.1 lmbd = 0.01 n_hidden_neurons = 41 n_categories = 1 dnn = NN(X_train, y_train, eta=eta, lmbd=lmbd, epochs=epochs, batch_size=batch_size, n_hidden_neurons=n_hidden_neurons, n_categories=n_categories, cost_grad='crossentropy', activation='sigmoid', activation_out='sigmoid') dnn.train_and_validate() y_predict_proba = dnn.predict_probabilities(X_train) y_predict_proba_tuple = np.concatenate((1 - y_predict_proba, y_predict_proba), axis=1) pos_true = y_train.sum() pos_true_perc = pos_true / len(y_train) x = np.linspace(0, 1, len(y_train)) m = 1 / pos_true_perc best_line = np.zeros((len(x))) for i in range(len(x)): best_line[i] = m * x[i] if (x[i] > pos_true_perc): best_line[i] = 1 x_, y_ = skplt.helpers.cumulative_gain_curve(y_train, y_predict_proba_tuple[:, 1]) Score = (np.trapz(y_, x=x_) - 0.5) / (np.trapz(best_line, dx=(1 / len(y_predict_proba))) - 0.5) print('Area ratio score(train)', Score) perc = np.linspace(0, 100, len(y_train)) plt.plot(x_ * 100, y_ * 100) plt.plot(perc, best_line * 100) plt.plot(perc, perc, "k--") plt.xlabel("Percentage of clients") plt.ylabel("Cumulative % of defaults") plt.title("Cumulative Gain Chart for Train Data") plt.show() """Let's you insert a threshold and classify""" _, y_predict, y_predict_tot = classification(y_prob_input=y_predict_proba, threshold=0.5) pos = y_predict.sum() neg = len(y_predict) - pos pos_perc = (pos / len(y_predict)) neg_perc = (neg / len(y_predict)) print("default: ", pos_perc) print("Non-default: ", neg_perc) beta_init = np.random.randn(X_train.shape[1], 1) w = Weight(X_train, y_train, beta_init, 0.0007924828983539169, epochs) if (GD_plot == 1): _, cost_all = w.train(w.gradient_descent) epoch = np.arange(len(cost_all)) plt.plot(epoch, cost_all) plt.show() if (MB_GD_plot == 1): _, cost_all = w.train(w.mini_batch_gradient_descent) batch = np.arange(len(cost_all)) plt.plot(batch, cost_all) plt.show() if (Stoch_GD_plot == 1): _, cost_all = w.train(w.stochastic_gradient_descent) batch = np.arange(len(cost_all)) plt.plot(batch, cost_all) plt.show() if (Newton_plot == 1): _, cost_all = w.train(w.newtons_method) epochs = np.arange(len(cost_all)) plt.plot(epochs, cost_all) plt.show() if (Scatter_GD_plot == 1): final_betas, _ = w.train(w.gradient_descent) prob_train = classification(X_train, final_betas)[0] x_sigmoid = np.dot(X_train, final_betas) plt.scatter(x_sigmoid, prob_train) plt.show()
tmpNeuron = Neuron("INPUT", "I" + str(i)) neuronList.append(tmpNeuron) for i in range(0, numberOfHiddenNeurons): tmpNeuron = Neuron("HIDDEN", "H" + str(i)) neuronList.append(tmpNeuron) for i in range(0, numberOfOutputNeurons): tmpNeuron = Neuron("OUTPUT", "O" + str(i)) neuronList.append(tmpNeuron) # Now, generate all of the weights. # Start from the hidden layer. for i in range(numberOfInputNeurons, numberOfInputNeurons + numberOfHiddenNeurons): outputNeuron = neuronList[i] for j in range(0, numberOfInputNeurons): inputNeuron = neuronList[j] tmpWeight = Weight("W" + str(numberofWeights), random.uniform(0, 1), inputNeuron, outputNeuron) inputNeuron.UpdateOutputWeights(tmpWeight) outputNeuron.UpdateInputWeights(tmpWeight) weightList.append(tmpWeight) numberofWeights += 1 for i in range( numberOfInputNeurons + numberOfHiddenNeurons, numberOfInputNeurons + numberOfHiddenNeurons + numberOfOutputNeurons): outputNeuron = neuronList[i] for j in range(numberOfInputNeurons, numberOfInputNeurons + numberOfHiddenNeurons): inputNeuron = neuronList[j] tmpWeight = Weight("W" + str(numberofWeights), random.uniform(0, 1), inputNeuron, outputNeuron) inputNeuron.UpdateOutputWeights(tmpWeight) outputNeuron.UpdateInputWeights(tmpWeight)
from Temperature import Temperature from Distance import Distance from Memory import Memory from Weight import Weight if __name__ == '__main__': while (True): choice = input ('Available conversions ... \n' + '1. Temperature\n' + '2. Distance\n' + '3. Memory\n' + '4. Weight\n' + '5. Exit\n' + 'Please enter your choice ... ' ) if choice == '1' : t = Temperature() t.convert() elif choice == '2' : d = Distance() d.convert() elif choice == '3': m = Memory() m.convert() elif choice == '4' : w = Weight() w.convert() else: print('Exiting now ... Bye!') break
Created on 11-Oct-2014 @author: ghantasa ''' from Temperature import Temperature from Distance import Distance from Memory import Memory from Weight import Weight if __name__ == '__main__': while (True): choice = input('Available conversions ... \n' + '1. Temperature\n' + '2. Distance\n' + '3. Memory\n' + '4. Weight\n' + '5. Exit\n' + 'Please enter your choice ... ') if choice == '1': t = Temperature() t.convert() elif choice == '2': d = Distance() d.convert() elif choice == '3': m = Memory() m.convert() elif choice == '4': w = Weight() w.convert() else: print('Exiting now ... Bye!') break
class TestWeight: @pytest.mark.regression @pytest.mark.parametrize( "weight1, weight2, expected_result", [(Weight(10, WeightUnit.G), Weight(10, WeightUnit.G), 20), (Weight(1, WeightUnit.KG), Weight(10, WeightUnit.G), 1.01), (Weight(10, WeightUnit.KG), Weight(10, WeightUnit.LB), 14.53592), (10, Weight(10, WeightUnit.LB), 10.022046244201839)]) def test_adding(self, weight1: Weight, weight2: Weight, expected_result: int): assert weight1 + weight2 == expected_result weight1 += weight2 if type(weight1) == Weight: assert weight1.weight == expected_result else: assert weight1 == expected_result @pytest.mark.regression @pytest.mark.parametrize( "weight1, weight2, expected_result", [(Weight(10, WeightUnit.G), Weight(10, WeightUnit.G), 0), (Weight(1, WeightUnit.KG), Weight(10, WeightUnit.G), 0.99), (Weight(10, WeightUnit.KG), Weight(10, WeightUnit.LB), 5.46408), (10, Weight(10, WeightUnit.LB), 9.977953755798163)]) def test_subtracting(self, weight1: Weight, weight2: Weight, expected_result: int): assert weight1 - weight2 == expected_result weight1 -= weight2 if type(weight1) == Weight: assert weight1.weight == expected_result else: assert weight1 == expected_result
def Classifier(self, database, version, num_of_kpts, cutoff, top = 5, K = 10, depth=4): num_in_set = database[0] num_of_sets = database[1] test_database = database[3] total = num_in_set * num_of_sets num_in_test_set = len(test_database) / num_of_sets classify_score =np.zeros(num_of_sets) class_name = [[] for i in range(num_of_sets)] #load weight wt = Weight(cutoff) sign_dir = os.path.join(self.SIGN_DIR, 'db_version_' + str(version)) wt.load_weights(sign_dir, self.WEIGHT_FILE+str(num_of_kpts)+'_'+str(cutoff)) wt.load_weighted_sign(sign_dir, self.WEIGHT_SIGN_FILE+str(num_of_kpts)+'_'+str(cutoff)) #Load tree tree_dir = os.path.join(self.TREE_DIR, 'db_version_' + str(version)) tr = vl._vlfeat.VlHIKMTree(0, 0) tr.load(os.path.join(tree_dir, str(num_of_kpts) + self.TREE_FILE)) for k in test_database: #randomly get image from the img_dir img = Image.open(k[2]).convert('L') img = self.StandalizeImage(img, 480) img_data = np.asarray(img, dtype=float) #generate desc, sign and weighted sign kp = Keypoint() #kp.load_keypoint(self.KEYPOINT_DIR, self.KEYPOINT_FILE+str(num_of_kpts)) kp.generate_keypoint(num_of_kpts, img.size[0], img.size[1], self.SIGMA) desc = Descriptor() desc.generate_desc(img_data, kp.kpt) #very important !! convert desc to float type #desc_f = np.array(desc.desc, dtype=float) sign = Signature() s = sign.generate_sign(tr,desc.desc, K, depth) weighted_sign = wt.weight_sign(s) #vote d=np.empty(total) for i in range(0, total): d[i] = self.dist(wt.weighted_sign[i,:], weighted_sign) perm = np.argsort(d) vote_for = np.floor((perm[0:top])/num_in_set)+1 votes = vl.vl_binsum(np.zeros(num_of_sets), np.ones(top), vote_for) #print votes best = np.argmax(votes) if best == k[1]: classify_score[k[1]] += 1 print '=>'+str(k[0]) class_name[k[1]] = k[3] classify_score = classify_score / num_in_test_set return zip(class_name, classify_score.tolist())
def xval(self, nfold=5, nrep=10, pairwiseGTA=None, pairwiseViral=None, cluster_type='farthest', d=0.03): """n-fold cross validation of the test set. Input: n (int): number of folds for xval Returns: (fpr, fnr): false positive and false negative rates from the n xvals """ # keep track of label classification score0 = 0.0 score1 = 0.0 gta_as_phage = [] phage_as_gta = [] # repeat xval results nrep times for i in range(nrep): if not mini: sys.stdout.flush() sys.stdout.write("Starting rep: %d\r" % (i+1)) # randomly sort profiles random.shuffle(self.profiles) # split into folds split = [self.profiles[i::nfold] for i in range(nfold)] # cross val for j in range(nfold): # Build train and test sets train_fold = np.array([x for sublist in (split[:j]+split[j+1:]) for x in sublist]) test_fold = split[j] trainX = np.array([x.features for x in train_fold]) testX = np.array([x.features for x in test_fold]) trainY = np.array([-1.0 if y.label == self.label0 else 1.0 for y in train_fold]) testY = np.array([-1.0 if y.label == self.label0 else 1.0 for y in test_fold]) testNames = np.array([x.org_name for x in test_fold]) # randomize labels # random.shuffle(trainY) # Get training set weights if pairwiseGTA: # Reweight based on training set # GTA GTA_weight = Weight([x for x in train_fold if x.label == self.label0], pairwiseGTA) GTA_clusters = GTA_weight.cluster(cluster_type, d) GTA_weight.weight(GTA_clusters) # Virus virus_weight = Weight([x for x in train_fold if x.label != self.label0], pairwiseViral) virus_clusters = virus_weight.cluster(cluster_type, d) virus_weight.weight(virus_clusters) # Grab updated weights weights = np.array([x.weight for x in train_fold]) else: weights = np.array([1 for x in train_fold]) # evaluate results predictor = SVMTrain(self.kernel, self.c).train(trainX, trainY, weights) for r in range(len(testX)): # Positive product is correct classification if predictor.predict(testX[r]) * testY[r] > 0: # Update label0 if negative, label1 otherwise if testY[r] < 0: score0 += 1 else: score1 += 1 else: # predicted incorrectly if testY[r] > 0: #virus as GTA phage_as_gta.append(testNames[r]) else: #gta as virus gta_as_phage.append(testNames[r]) if not mini: print("\nPhages (%d) misclassified over %d reps: %s" % (len(phage_as_gta), nrep, phage_as_gta)) print("\nGTA (%d) misclassified over %d reps: %s\n" % (len(gta_as_phage), nrep, gta_as_phage)) return (score0/nrep, score1/nrep)
def xval(self, nfold=5, nrep=10, pairwiseGTA=None, pairwiseViral=None, cluster_type='farthest', d=0.03): """n-fold cross validation of the test set. Input: n (int): number of folds for xval Returns: (fpr, fnr): false positive and false negative rates from the n xvals """ # keep track of label classification score0 = 0.0 score1 = 0.0 gta_as_phage = [] phage_as_gta = [] # repeat xval results nrep times for i in range(nrep): if not mini: sys.stdout.flush() sys.stdout.write("Starting rep: %d\r" % (i + 1)) # randomly sort profiles random.shuffle(self.profiles) # split into folds split = [self.profiles[i::nfold] for i in range(nfold)] # cross val for j in range(nfold): # Build train and test sets train_fold = np.array([ x for sublist in (split[:j] + split[j + 1:]) for x in sublist ]) test_fold = split[j] trainX = np.array([x.features for x in train_fold]) testX = np.array([x.features for x in test_fold]) trainY = np.array([ -1.0 if y.label == self.label0 else 1.0 for y in train_fold ]) testY = np.array([ -1.0 if y.label == self.label0 else 1.0 for y in test_fold ]) testNames = np.array([x.org_name for x in test_fold]) # randomize labels # random.shuffle(trainY) # Get training set weights if pairwiseGTA: # Reweight based on training set # GTA GTA_weight = Weight( [x for x in train_fold if x.label == self.label0], pairwiseGTA) GTA_clusters = GTA_weight.cluster(cluster_type, d) GTA_weight.weight(GTA_clusters) # Virus virus_weight = Weight( [x for x in train_fold if x.label != self.label0], pairwiseViral) virus_clusters = virus_weight.cluster(cluster_type, d) virus_weight.weight(virus_clusters) # Grab updated weights weights = np.array([x.weight for x in train_fold]) else: weights = np.array([1 for x in train_fold]) # evaluate results predictor = SVMTrain(self.kernel, self.c).train(trainX, trainY, weights) for r in range(len(testX)): # Positive product is correct classification if predictor.predict(testX[r]) * testY[r] > 0: # Update label0 if negative, label1 otherwise if testY[r] < 0: score0 += 1 else: score1 += 1 else: # predicted incorrectly if testY[r] > 0: #virus as GTA phage_as_gta.append(testNames[r]) else: #gta as virus gta_as_phage.append(testNames[r]) if not mini: print("\nPhages (%d) misclassified over %d reps: %s" % (len(phage_as_gta), nrep, phage_as_gta)) print("\nGTA (%d) misclassified over %d reps: %s\n" % (len(gta_as_phage), nrep, gta_as_phage)) return (score0 / nrep, score1 / nrep)
"""计算不同因素的权重""" from Weight import Weight import os def dir_name(dirname): for root, dirs, files in os.walk(dirname): models = [file for file in files if 'model' == file[-5:]] return models if __name__ == '__main__': d_name = os.path.join(os.getcwd(), 'Main') models = dir_name(d_name) TI = Weight('news.csv', 'positive.txt', 'negative.txt', models) TI.run()