def test_classifier_chain_vs_independent_models(): # Verify that an ensemble of classifier chains (each of length # N) can achieve a higher Jaccard similarity score than N independent # models yeast = fetch_mldata('yeast') X = yeast['data'] Y = yeast['target'].transpose().toarray() X_train = X[:2000, :] X_test = X[2000:, :] Y_train = Y[:2000, :] Y_test = Y[2000:, :] ovr = OneVsRestClassifier(LogisticRegression()) ovr.fit(X_train, Y_train) Y_pred_ovr = ovr.predict(X_test) chain = ClassifierChain(LogisticRegression(), order=np.array([0, 2, 4, 6, 8, 10, 12, 1, 3, 5, 7, 9, 11, 13])) chain.fit(X_train, Y_train) Y_pred_chain = chain.predict(X_test) assert_greater(jaccard_similarity_score(Y_test, Y_pred_chain), jaccard_similarity_score(Y_test, Y_pred_ovr))
def test_jaccard_similarity_score(self): result = self.df.metrics.jaccard_similarity_score() expected = metrics.jaccard_similarity_score(self.target, self.pred) self.assertEqual(result, expected) result = self.df.metrics.jaccard_similarity_score(normalize=False) expected = metrics.jaccard_similarity_score(self.target, self.pred, normalize=False) self.assertEqual(result, expected)
def train_and_eval(x_train, y_train, x_test, y_test, model, param_result): print("\nTraining and evaluating...") for result_list in param_result: print("Fitting: " + str(result_list[2])) opt_model = result_list[2] opt_model.fit(x_train, y_train) y_pred = opt_model.predict(x_test) print("\nClassification Report:") print(metrics.classification_report(y_test, y_pred)) print("\nAccuracy Score:") print(metrics.accuracy_score(y_test, y_pred)) print("\nConfusion Matrix:") print(metrics.confusion_matrix(y_test, y_pred)) print("\nF1-Score:") print(metrics.f1_score(y_test, y_pred)) print("\nHamming Loss:") print(metrics.hamming_loss(y_test, y_pred)) print("\nJaccard Similarity:") print(metrics.jaccard_similarity_score(y_test, y_pred)) # vvv Not supported due to ValueError: y_true and y_pred have different number of classes 3, 2 # print('\nLog Loss:') # print(metrics.log_loss(y_test, y_pred)) # vvv multiclass not supported # print('\nMatthews Correlation Coefficient:') # print(metrics.matthews_corrcoef(y_test, y_pred)) print("\nPrecision:") print(metrics.precision_score(y_test, y_pred)) # vvv Not supported due to ValueError: y_true and y_pred have different number of classes 3, 2 # print('\nRecall:') # print(metrics.recall(y_test, y_pred)) print()
def calculateSimilarityItems(item1, item2): try: result = jaccard_similarity_score(Utility[item1], Utility[item2]) except Warning: #print "Exception at %d : %d" % (item1, item2) result = 0.5 return result
def calc_thresholds(self, patches_in, patches_out): prediction = self.model.predict(patches_in, batch_size=4) avg, trs = [], [] for i in range(self.out_chan): t_prd = prediction [:, :, :, i] t_msk = patches_out[:, :, :, i] t_prd = t_prd.reshape(t_msk.shape[0] * t_msk.shape[1], t_msk.shape[2]) t_msk = t_msk.reshape(t_msk.shape[0] * t_msk.shape[1], t_msk.shape[2]) t_msk = t_msk > 0.5 # threshold finder best_score = 0 best_threashold = 0 for j in range(10): threashold = (j+1) / 10.0 threshold_mask = (t_prd > threashold) jk = jaccard_similarity_score(t_msk, threshold_mask) if jk > best_score: best_score = jk best_threashold = threashold print " -- output:", i, "best:", best_score, "threashold:", best_threashold avg.append(best_score) trs.append(best_threashold) score = sum(avg) / 10.0 return score, trs
def analise(): datasets = load_data_from_pickle() classifier = get_conv_classifier() given_answers = list(classifier.predict(datasets.test.data)['classes']) wrong_answer_buckets = np.zeros(5) for i, test_data in enumerate(datasets.test.data): right_answer = datasets.test.target[i] given_answer = given_answers[i] if right_answer != given_answer: wrong_answer_buckets[right_answer] += 1 print(wrong_answer_buckets / sum(wrong_answer_buckets)) confusion_matrix = metrics.confusion_matrix(datasets.test.target, given_answers, range(5)) print(confusion_matrix) cohen_kappa_score = metrics.cohen_kappa_score(datasets.test.target, given_answers, range(5)) print(cohen_kappa_score) jaccard_similarity_score = metrics.jaccard_similarity_score(datasets.test.target, given_answers) print(jaccard_similarity_score) report = metrics.classification_report(datasets.test.target, given_answers, labels=range(5), target_names=['NORTH', 'EAST', 'SOUTH', 'WEST', 'STILL']) print(report)
def ComputeMetrics(prob, batch_labels, p1, p2, rgb=None, save_path=None, ind=0): GT = label(batch_labels.copy()) PRED = PostProcess(prob, p1, p2) lbl = GT.copy() pred = PRED.copy() aji = AJI_fast(lbl, pred) lbl[lbl > 0] = 1 pred[pred > 0] = 1 l, p = lbl.flatten(), pred.flatten() acc = accuracy_score(l, p) roc = roc_auc_score(l, p) jac = jaccard_similarity_score(l, p) f1 = f1_score(l, p) recall = recall_score(l, p) precision = precision_score(l, p) if rgb is not None: xval_n = join(save_path, "xval_{}.png").format(ind) yval_n = join(save_path, "yval_{}.png").format(ind) prob_n = join(save_path, "prob_{}.png").format(ind) pred_n = join(save_path, "pred_{}.png").format(ind) c_gt_n = join(save_path, "C_gt_{}.png").format(ind) c_pr_n = join(save_path, "C_pr_{}.png").format(ind) ## CHECK PLOT FOR PROB AS IT MIGHT BE ILL ADAPTED imsave(xval_n, rgb) imsave(yval_n, color_bin(GT)) imsave(prob_n, prob) imsave(pred_n, color_bin(PRED)) imsave(c_gt_n, add_contours(rgb, GT)) imsave(c_pr_n, add_contours(rgb, PRED)) return acc, roc, jac, recall, precision, f1, aji
def neighbor_rating(self, neighbor, itemID, sigma, threshold): self.ratings_sum=0 self.similarity_sum=0 #print(type(user)) #print(user) #print(neighbor[2:10]) #print(df.itemID) ratings = df[(df.userID == neighbor.userID) & (df.itemID == itemID)] #print(ratings.shape) for index, user_rating in ratings.iterrows(): similarity = jaccard_similarity_score(neighbor[2:-1],user_rating[2:-1]) #print(similarity) if similarity > threshold: self.ratings_sum += user_rating.rating * similarity self.similarity_sum += similarity #print(self.similarity_sum) #print(self.ratings_sum) #print(neighbor[-10:],user_rating[-10:]) #print("Rating sum") #print(self.ratings_sum) #print("Similarity Sum") #print(self.similarity_sum) try: rating = self.ratings_sum / self.similarity_sum #rating = ratings.rating.mean() except: rating = 0 #print(rating) return rating
def svmDesc(lab_pred,lab_test, title='Confusion matrix', cmap=plot.cm.Blues,taskLabels=taskLabels,normal=True): #build confussion matrix itself conM = confusion_matrix(lab_test, lab_pred) if normal== True: conM = conM.astype('float') / conM.sum(axis=1)[:, np.newaxis] #build heatmap graph of matrix plot.imshow(conM, interpolation='nearest', cmap=cmap) plot.title(title) plot.colorbar() tick_marks = np.arange(len(taskLabels)) plot.xticks(tick_marks, taskLabels, rotation=45) plot.yticks(tick_marks, taskLabels) plot.tight_layout() plot.ylabel('True label') plot.xlabel('Predicted label') #classification report creport = classification_report(lab_test,lab_pred) print "CLASSIFICATION REPORT: " print creport #hamming distance hamming = hamming_loss(lab_test,lab_pred) print "HAMMING DISTANCE: %s" % str(hamming) #jaccard similarity score jaccard = jaccard_similarity_score(lab_test,lab_pred) print "JACCARD SIMILARITY SCORE: %s" % str(jaccard) #precision score pscore = precision_score(lab_test,lab_pred) print "PRECISION SCORE: %s" % str(pscore)
def tribunalTrain(data,predict,tribunal,split=.2,stat=False,statLis=None): #data for testing the tribunal performance, not in actual judge training dat_train, dat_test, lab_train, lab_test = train_test_split(data,predict, test_size=split) verdict = [] print 'Tribunal in session' for judge in tribunal: jdat_train, jdat_test, jlab_train, jlab_test = train_test_split(dat_train,lab_train, test_size=split) judge.fit(jdat_train, jlab_train) print 'judge trained' for d in dat_test: votes = [] for judge in tribunal: v = judge.predict(d) votes.append(v) decision = stats.mode(votes,axis=None) verdict.append(decision[0]) npVerdict = np.array(verdict) if stat == False: svmDesc(npVerdict,lab_test,title='Tribunal Confusion Matrix') else: jac = jaccard_similarity_score(npVerdict,lab_test) statLis.append(jac)
def calc_jacc(model): img = np.load(xtmp_file) msk = np.load(ytmp_file) prd = model.predict(img, batch_size=4) print prd.shape, msk.shape avg, trs = [], [] for i in range(num_classes): t_msk = msk[:, i, :, :] t_prd = prd[:, i, :, :] t_msk = t_msk.reshape(msk.shape[0] * msk.shape[2], msk.shape[3]) t_prd = t_prd.reshape(msk.shape[0] * msk.shape[2], msk.shape[3]) m, b_tr = 0, 0 for j in range(10): tr = j / 10.0 pred_binary_mask = t_prd > tr jk = jaccard_similarity_score(t_msk, pred_binary_mask) if jk > m: m = jk b_tr = tr print i, m, b_tr avg.append(m) trs.append(b_tr) score = sum(avg) / 10.0 return score, trs
def jaccard_score(self, row): query = row['search_term'] title = row['product_title'] corpus = np.array([query, title]) tfidf_matrix = self.tfidf_vectorizer.fit_transform(corpus) return jaccard_similarity_score(tfidf_matrix[0], tfidf_matrix[1])
def test_jaccard_binary_index(): y_test = np.array([0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0]) y_pred = np.array([0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0]) sk_jaccard_score = metrics.jaccard_similarity_score(y_test, y_pred) print(sk_jaccard_score) jaccard_index = jaccard_binary_index(y_test, y_pred) print(jaccard_index) assert jaccard_index == 0.5
def test_classifier_chain_crossval_fit_and_predict(): # Fit classifier chain with cross_val_predict and verify predict # performance X, Y = generate_multilabel_dataset_with_correlations() classifier_chain_cv = ClassifierChain(LogisticRegression(), cv=3) classifier_chain_cv.fit(X, Y) classifier_chain = ClassifierChain(LogisticRegression()) classifier_chain.fit(X, Y) Y_pred_cv = classifier_chain_cv.predict(X) Y_pred = classifier_chain.predict(X) assert_equal(Y_pred_cv.shape, Y.shape) assert_greater(jaccard_similarity_score(Y, Y_pred_cv), 0.4) assert_not_equal(jaccard_similarity_score(Y, Y_pred_cv), jaccard_similarity_score(Y, Y_pred))
def getJaccardSimilarity(user1=None, user2=None): if user1.ndim != 1 or user2.ndim != 1: print 'Input arrays must be 1-dimensional' return elif user1.shape != user2.shape: print 'Input arrays must have the same length' return else: return jaccard_similarity_score(user1, user2)
def jaccard_driver(a_driver): a_driver["DStats"] = (a_driver["DStats"] * 100).round() a_driver["Baseline"] = (a_driver["Baseline"] * 100).round() a_driver["Predicts"] = [] for i in range(0, len(a_driver["DStats"])): a_driver["Predicts"].append(metrics.jaccard_similarity_score(a_driver["DStats"][i], a_driver["Baseline"])) return a_driver["Predicts"]
def jaccard_index(y, y_pred): """Computes Jaccard Index which is the Intersection Over Union metric which is commonly used in image segmentation tasks Parameters ---------- y: ground truth array y_pred: predicted array """ return jaccard_similarity_score(y, y_pred)
def jaccard_driver(a_driver): a_driver['DStats'] = (a_driver['DStats']*100).round() a_driver['Baseline'] = (a_driver['Baseline']*100).round() a_driver['Predicts'] = [] for i in range (0,len(a_driver['DStats'])): a_driver['Predicts'].append(metrics.jaccard_similarity_score(a_driver['DStats'][i],a_driver['Baseline'])) return a_driver['Predicts']
def _get_max_similarity(list1,list2, coassoc_vec): n = len(coassoc_vec.keys()) max = 0 for i in range(len(list1)): checkee = coassoc_vec[coassoc_vec.keys()[list1[i]]] for j in range(len(list2)): neu = coassoc_vec[coassoc_vec.keys()[list2[j]]] jaccard = jaccard_similarity_score(checkee.binarycoassoc_vs,neu.binarycoassoc_vs) if jaccard> max: max = jaccard return max
def eval_mclf(y, y_hat): results = { "jaccard": jaccard_similarity_score(numpy.array(y), numpy.array(y_hat)), "f1-macro": f1_score(numpy.array(y), numpy.array(y_hat), average='macro'), "f1-micro": f1_score(numpy.array(y), numpy.array(y_hat), average='micro') } return results
def test_classifier_chain_vs_independent_models(): # Verify that an ensemble of classifier chains (each of length # N) can achieve a higher Jaccard similarity score than N independent # models X, Y = generate_multilabel_dataset_with_correlations() X_train = X[:600, :] X_test = X[600:, :] Y_train = Y[:600, :] Y_test = Y[600:, :] ovr = OneVsRestClassifier(LogisticRegression()) ovr.fit(X_train, Y_train) Y_pred_ovr = ovr.predict(X_test) chain = ClassifierChain(LogisticRegression()) chain.fit(X_train, Y_train) Y_pred_chain = chain.predict(X_test) assert_greater(jaccard_similarity_score(Y_test, Y_pred_chain), jaccard_similarity_score(Y_test, Y_pred_ovr))
def evalClassifier(vScore_test, thePredictedScores): target_names = ['Low_Risk', 'High_Risk'] ''' the way skelarn treats is the following: first index -> lower index -> 0 -> 'Low' the way skelarn treats is the following: next index after first -> next lower index -> 1 -> 'high' ''' print "precison, recall, F-stat" print(classification_report(vScore_test, thePredictedScores, target_names=target_names)) print"*********************" # preserve the order first test(real values from dataset), then predcited (from the classifier ) ''' are under the curve values .... reff: http://gim.unmc.edu/dxtests/roc3.htm 0.80~0.90 -> good, any thing less than 0.70 bad , 0.90~1.00 -> excellent ''' area_roc_output = roc_auc_score(vScore_test, thePredictedScores) # preserve the order first test(real values from dataset), then predcited (from the classifier ) print "Area under the ROC curve is ", area_roc_output print"*********************" ''' mean absolute error (mae) values .... reff: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html the smaller the better , ideally expect 0.0 ''' mae_output = mean_absolute_error(vScore_test, thePredictedScores) # preserve the order first test(real values from dataset), then predcited (from the classifier ) print "Mean absolute errro output is ", mae_output print"*********************" ''' accuracy_score ... reff: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter .... percentage of correct predictions ideally 1.0, higher the better ''' accuracy_score_output = accuracy_score(vScore_test, thePredictedScores) # preserve the order first test(real values from dataset), then predcited (from the classifier ) print "Accuracy output is ", accuracy_score_output print"*********************" ''' hamming_loss ... reff: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter .... percentage of correct predictions ideally 0.0, lower the better ''' hamming_loss_output = hamming_loss(vScore_test, thePredictedScores) # preserve the order first test(real values from dataset), then predcited (from the classifier ) print "Hamming loss output is ", hamming_loss_output print"*********************" ''' jaccardian score ... reff: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter .... percentage of correct predictions ideally 1.0, higher the better ''' jaccardian_output = jaccard_similarity_score(vScore_test, thePredictedScores) # preserve the order first test(real values from dataset), then predcited (from the classifier ) print "Jaccardian output is ", jaccardian_output print"*********************"
def neighbor_average(self,neighbor,sigma,threshold): ratings_of_neighbor = df[df.userID == neighbor.userID]#.rating.mean() rating_sum=0; count=0; for index, row in ratings_of_neighbor.iterrows(): similarity = jaccard_similarity_score(row[2:-1], neighbor[2:-1]) if similarity > threshold: rating_sum+=row.rating count+=1 average_rating_in_given_context = rating_sum/count #print(average_rating_in_given_context) return average_rating_in_given_context
def _main(): base = list(range(0, 30)) SIZE = 30 x = random.sample(base, k=SIZE) print('x: ', x) y = random.sample(base, k=SIZE) print('y: ', y) result = jaccard_similarity_score(x, y) print(result, "\n")
def similarity(featureVector,maxTimeDiff,doctext,doc1,doc2): location = 0 featureVector1 = featureVector[doc1] featureVector2 = featureVector[doc2] print featureVector1 jcSim = jaccard_similarity_score(featureVector1,featureVector2) if doctext[doc1]["places"] == doctext[doc2]["places"]: location+=1 date = abs(int(doctext[doc1]["date"])- int(doctext[doc2]["date"])) w1 = 1 #Weight for word vector w2 = 1 #Weight for location w3 = 1 #Weight for time distribution alpha = 1.0 #Time decay sim = w1*jcSim+w2*location return sim*math.exp(-alpha*(date)/maxTimeDiff)
def get_hotspot_scores(data): distance_matrix = [[-1 for _ in xrange(len(data))] for _ in xrange(len(data))] from sklearn.metrics import jaccard_similarity_score for i in xrange(len(data)): for j in xrange(len(data)): if distance_matrix[i][j] == -1 and i != j: distance_matrix[i][j] = decimal.Decimal(jaccard_similarity_score(data[i].decisions, data[j].decisions)) distance_matrix[j][i] = distance_matrix[i][j] elif distance_matrix[i][j] == -1 and i == j: distance_matrix[j][i] = 1 else: pass hotspot_scores = [sum(distance_matrix[i]) for i in xrange(len(data))] print "Done calculating hotspot scores" return hotspot_scores
def performance(self, preds): accuracy = accuracy_score(self.y_test, preds) precision = precision_score(self.y_test, preds) recall = recall_score(self.y_test, preds) f1 = f1_score(self.y_test, preds) jss = jaccard_similarity_score(self.y_test, preds) hl = hamming_loss(self.y_test, preds) zol = zero_one_loss(self.y_test, preds) return {'accuracy_score': accuracy, 'precision_score': precision, 'recall_score': recall, 'f1_score': f1, 'jaccard_similarity_score': jss, 'hamming_loss': hl, 'zero_one_loss': zol}
def main(params, train): si = ScreenImage() if train: # Initialization trainset = glob(join("face_training", "face*.png")) t0 = time() print_(verbosity, "Begin collecting training Samples") Labels, Samples = get_training_samples(trainset, params) print_(verbosity, "Success. Elapsed: %.2f s." % (time() - t0)) print_(verbosity, "Begin classifier training using %s..." % (params["classifier"])) if params["classifier"] == "NB": clf = GaussianNB() elif params["classifier"] == "RF": clf = RandomForestClassifier() clf.fit(Samples, Labels) pickle.dump([clf, params], open(params["name"], "w")) else: testset = glob(join("face_testing", "face*.png")) print_(verbosity, "Begin classifier prediction...") score = np.zeros(len(testset),) models = glob("._*") for i, testname in enumerate(testset): im_orig = imread(testname) truthname = get_groundname(testname) im_skin = [[] for k in models] title = ["" for k in models] for j, model in enumerate(models): im_truth = rgb2gray(imread(truthname)).astype(np.uint8)*255 pkl = pickle.load(open(model, "r")) clf = pkl[0] params = pkl[1] _, _, fvec = im2feature(testname, params) im_skin[j] = clf.predict(fvec).reshape(im_truth.shape).astype(np.uint8) score = jaccard_similarity_score(im_truth, im_skin[j], normalize=True) title[j] = "%s\nClassifier: %s, Thresh: %.2f\nK: %d, Score: %.2f" \ % (params["classifier"], params["feature"], params["thresh"], params["n_cluster"], score) print_(verbosity, "\tTest %d of %d, Score %.2f\n" % (i+1, len(testset), score)) si.show(testname, [im_orig, im_skin[0], im_skin[1], im_skin[2], im_skin[3], im_skin[4]], ["Original\n%s" % testname, title[0], title[1], title[2], title[3], title[4]])
def test_multilabel_jaccard_similarity_score(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) # size(y1 \inter y2) = [1, 2] # size(y1 \union y2) = [2, 2] assert_equal(jaccard_similarity_score(y1, y2), 0.75) assert_equal(jaccard_similarity_score(y1, y1), 1) assert_equal(jaccard_similarity_score(y2, y2), 1) assert_equal(jaccard_similarity_score(y2, np.logical_not(y2)), 0) assert_equal(jaccard_similarity_score(y1, np.logical_not(y1)), 0) assert_equal(jaccard_similarity_score(y1, np.zeros(y1.shape)), 0) assert_equal(jaccard_similarity_score(y2, np.zeros(y1.shape)), 0)
def _get_incident_matrix_binary(coassoc_vec, fusion_threshold): n = len(coassoc_vec.keys()) incidence_matrix = np.zeros(shape=(n,n)) for i in range(len(coassoc_vec.keys())-1): checkee = coassoc_vec[coassoc_vec.keys()[i+1]] if i == len(coassoc_vec.keys())-2: incidence_matrix[i+1][i+1] = 1 incidence_matrix[i][i] = 1 for j in range(i+1): neu = coassoc_vec[coassoc_vec.keys()[j]] if jaccard_similarity_score(checkee.binarycoassoc_vs,neu.binarycoassoc_vs) > fusion_threshold : incidence_matrix[j][i+1] = 1 return incidence_matrix
y=lantsat[[36]].values # Split the dataset into training dataset and testing dataset x_train, x_test,y_train,y_test= train_test_split(x,y,test_size=0.2,random_state=1) # #===================Perceptron========================= from sklearn.multioutput import MultiOutputClassifier from sklearn.linear_model import Perceptron ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) #y=w.x+b multi_target_ppn = MultiOutputClassifier(ppn) y_pred = multi_target_ppn.fit(x_train, y_train).predict(x_test) print('Perceptron:') print(classification_report(y_test,y_pred)) print('Accuracy classification score: %.2f' % accuracy_score(y_test,y_pred)) print('Average Hamming loss: %.2f' % hamming_loss(y_test,y_pred)) print('Jaccard similarity coefficient score: %.2f' % jaccard_similarity_score(y_test,y_pred)) print('Matthews correlation coefficient (MCC): %.2f' % matthews_corrcoef(y_test,y_pred)) print('Zero-one classification loss: %.2f' % zero_one_loss(y_test,y_pred)) # #===================SVM========================= from sklearn.multioutput import MultiOutputClassifier from sklearn import svm #调用SVC() clf = svm.SVC() multi_target_clf = MultiOutputClassifier(clf) y_pred = multi_target_clf.fit(x_train, y_train).predict(x_test) print('SVM:') print(classification_report(y_test,y_pred)) print('Accuracy classification score: %.2f' % accuracy_score(y_test,y_pred)) print('Average Hamming loss: %.2f' % hamming_loss(y_test,y_pred)) print('Jaccard similarity coefficient score: %.2f' % jaccard_similarity_score(y_test,y_pred))
# from sklearn.metrics import jaccard_similarity_score # 1 st_1 = "dogs chase cats" st_2 = "dogs hate cats" # 2 st_1_wrds = set(st_1.split()) st_2_wrds = set(st_2.split()) unq_wrds = st_1_wrds.union(st_2_wrds) a = [1 if w in st_1_wrds else 0 for w in unq_wrds] b = [1 if w in st_2_wrds else 0 for w in unq_wrds] print a print b print jaccard_similarity_score(a, b)
def jaccardCoefficientLavaMD(errListJaccard): expected = [] read = [] for err in errListJaccard: try: readGStr = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', err[2])) expectedGStr = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', err[3])) readGStr2 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', err[4])) expectedGStr2 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', err[5])) readGStr3 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', err[6])) expectedGStr3 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', err[7])) readGStr4 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', err[8])) expectedGStr4 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', err[9])) except OverflowError: readGStr = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!d', err[2])) expectedGStr = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!d', err[3])) readGStr2 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!d', err[4])) expectedGStr2 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!d', err[5])) readGStr3 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!d', err[6])) expectedGStr3 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!d', err[7])) readGStr4 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!d', err[8])) expectedGStr4 = ''.join( bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!d', err[9])) read.extend([n for n in readGStr]) read.extend([n for n in readGStr2]) read.extend([n for n in readGStr3]) read.extend([n for n in readGStr4]) expected.extend([n for n in expectedGStr]) expected.extend([n for n in expectedGStr2]) expected.extend([n for n in expectedGStr3]) expected.extend([n for n in expectedGStr4]) try: jac = jaccard_similarity_score(expected, read) dissimilarity = float(1.0 - jac) return dissimilarity except: return None
def test(experiment_path, test_epoch): # ========= CONFIG FILE TO READ FROM ======= config = configparser.RawConfigParser() config.read('./' + experiment_path + '/' + experiment_path + '_config.txt') # =========================================== # run the training on invariant or local path_data = config.get('data paths', 'path_local') model = config.get('training settings', 'model') # original test images (for FOV selection) DRIVE_test_imgs_original = path_data + config.get('data paths', 'test_imgs_original') test_imgs_orig = load_hdf5(DRIVE_test_imgs_original) full_img_height = test_imgs_orig.shape[2] full_img_width = test_imgs_orig.shape[3] # the border masks provided by the DRIVE DRIVE_test_border_masks = path_data + config.get('data paths', 'test_border_masks') test_border_masks = load_hdf5(DRIVE_test_border_masks) # dimension of the patches patch_height = int(config.get('data attributes', 'patch_height')) patch_width = int(config.get('data attributes', 'patch_width')) # the stride in case output with average stride_height = int(config.get('testing settings', 'stride_height')) stride_width = int(config.get('testing settings', 'stride_width')) assert (stride_height < patch_height and stride_width < patch_width) # model name name_experiment = config.get('experiment name', 'name') path_experiment = './' + name_experiment + '/' # N full images to be predicted Imgs_to_test = int(config.get('testing settings', 'full_images_to_test')) # Grouping of the predicted images N_visual = int(config.get('testing settings', 'N_group_visual')) # ====== average mode =========== average_mode = config.getboolean('testing settings', 'average_mode') #N_subimgs = int(config.get('training settings', 'N_subimgs')) #batch_size = int(config.get('training settings', 'batch_size')) #epoch_size = N_subimgs // (batch_size) # #ground truth # gtruth= path_data + config.get('data paths', 'test_groundTruth') # img_truth= load_hdf5(gtruth) # visualize(group_images(test_imgs_orig[0:20,:,:,:],5),'original')#.show() # visualize(group_images(test_border_masks[0:20,:,:,:],5),'borders')#.show() # visualize(group_images(img_truth[0:20,:,:,:],5),'gtruth')#.show() # ============ Load the data and divide in patches patches_imgs_test = None new_height = None new_width = None masks_test = None patches_masks_test = None if average_mode == True: patches_imgs_test, new_height, new_width, masks_test= get_data_testing_overlap( DRIVE_test_imgs_original = DRIVE_test_imgs_original, #original'DRIVE_datasets_training_testing/test_hard_masks.npy' DRIVE_test_groudTruth = path_data + config.get('data paths', 'test_groundTruth'), #masks Imgs_to_test = int(config.get('testing settings', 'full_images_to_test')), patch_height = patch_height, patch_width = patch_width, stride_height = stride_height, stride_width = stride_width) else: patches_imgs_test, patches_masks_test = get_data_testing_test( DRIVE_test_imgs_original = DRIVE_test_imgs_original, #original DRIVE_test_groudTruth = path_data + config.get('data paths', 'test_groundTruth'), #masks Imgs_to_test = int(config.get('testing settings', 'full_images_to_test')), patch_height = patch_height, patch_width = patch_width ) #np.save(path_experiment + 'test_patches.npy', patches_imgs_test) #visualize(group_images(patches_imgs_test,100),'./'+name_experiment+'/'+"test_patches") # ================ Run the prediction of the patches ================================== best_last = config.get('testing settings', 'best_last') # Load the saved model if model == 'UNet': net = UNet(n_channels=1, n_classes=2) elif model == 'UNet_cat': net = UNet_cat(n_channels=1, n_classes=2) else: net = UNet_level4_our(n_channels=1, n_classes=2) # load data test_data = data.TensorDataset(torch.tensor(patches_imgs_test),torch.zeros(patches_imgs_test.shape[0])) test_loader = data.DataLoader(test_data, batch_size=1, pin_memory=True, shuffle=False) trained_model = path_experiment + 'DRIVE_' + str(test_epoch) + 'epoch.pth' print(trained_model) # trained_model= path_experiment+'DRIVE_unet2_B'+str(60*epoch_size)+'.pth' net.load_state_dict(torch.load(trained_model)) net.eval() print('Finished loading model :' + trained_model) net = net.cuda() cudnn.benchmark = True # Calculate the predictions predictions_out = np.empty((patches_imgs_test.shape[0],patch_height*patch_width,2)) for i_batch, (images, targets) in enumerate(test_loader): images = Variable(images.float().cuda()) out1= net(images) pred = out1.permute(0,2,3,1) pred = F.softmax(pred, dim=-1) pred = pred.data.view(-1,patch_height*patch_width,2) predictions_out[i_batch] = pred # ===== Convert the prediction arrays in corresponding images pred_patches_out = pred_to_imgs(predictions_out, patch_height, patch_width, "original") #np.save(path_experiment + 'pred_patches_' + str(test_epoch) + "_epoch" + '.npy', pred_patches_out) #visualize(group_images(pred_patches_out,100),'./'+name_experiment+'/'+"pred_patches") #========== Elaborate and visualize the predicted images ==================== pred_imgs_out = None orig_imgs = None gtruth_masks = None if average_mode == True: pred_imgs_out = recompone_overlap(pred_patches_out,new_height,new_width, stride_height, stride_width) orig_imgs = my_PreProc(test_imgs_orig[0:pred_imgs_out.shape[0],:,:,:]) #originals gtruth_masks = masks_test #ground truth masks else: pred_imgs_out = recompone(pred_patches_out,10,9) # predictions orig_imgs = recompone(patches_imgs_test,10,9) # originals gtruth_masks = recompone(patches_masks_test,10,9) #masks # apply the DRIVE masks on the repdictions #set everything outside the FOV to zero!! # DRIVE MASK #only for visualization kill_border(pred_imgs_out, test_border_masks) # back to original dimensions orig_imgs = orig_imgs[:,:,0:full_img_height,0:full_img_width] pred_imgs_out = pred_imgs_out[:, :, 0:full_img_height, 0:full_img_width] gtruth_masks = gtruth_masks[:, :, 0:full_img_height, 0:full_img_width] print ("Orig imgs shape: "+str(orig_imgs.shape)) print("pred imgs shape: " + str(pred_imgs_out.shape)) print("Gtruth imgs shape: " + str(gtruth_masks.shape)) np.save(path_experiment + 'pred_img_' + str(test_epoch) + "_epoch" + '.npy',pred_imgs_out) # visualize(group_images(orig_imgs,N_visual),path_experiment+"all_originals")#.show() if average_mode == True: visualize(group_images(pred_imgs_out, N_visual), path_experiment + "all_predictions_" + str(test_epoch) + "thresh_epoch") else: visualize(group_images(pred_imgs_out, N_visual), path_experiment + "all_predictions_" + str(test_epoch) + "epoch_no_average") visualize(group_images(gtruth_masks, N_visual), path_experiment + "all_groundTruths") # visualize results comparing mask and prediction: # assert (orig_imgs.shape[0] == pred_imgs_out.shape[0] and orig_imgs.shape[0] == gtruth_masks.shape[0]) # N_predicted = orig_imgs.shape[0] # group = N_visual # assert (N_predicted%group == 0) # ====== Evaluate the results print("\n\n======== Evaluate the results =======================") # predictions only inside the FOV y_scores, y_true = pred_only_FOV(pred_imgs_out, gtruth_masks, test_border_masks) # returns data only inside the FOV ''' print("Calculating results only inside the FOV:") print("y scores pixels: " + str( y_scores.shape[0]) + " (radius 270: 270*270*3.14==228906), including background around retina: " + str( pred_imgs_out.shape[0] * pred_imgs_out.shape[2] * pred_imgs_out.shape[3]) + " (584*565==329960)") print("y true pixels: " + str( y_true.shape[0]) + " (radius 270: 270*270*3.14==228906), including background around retina: " + str( gtruth_masks.shape[2] * gtruth_masks.shape[3] * gtruth_masks.shape[0]) + " (584*565==329960)") ''' # Area under the ROC curve fpr, tpr, thresholds = roc_curve((y_true), y_scores) AUC_ROC = roc_auc_score(y_true, y_scores) # test_integral = np.trapz(tpr,fpr) #trapz is numpy integration print("\nArea under the ROC curve: " + str(AUC_ROC)) rOc_curve = plt.figure() plt.plot(fpr, tpr, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC) plt.title('ROC curve') plt.xlabel("FPR (False Positive Rate)") plt.ylabel("TPR (True Positive Rate)") plt.legend(loc="lower right") plt.savefig(path_experiment + "ROC.png") # Precision-recall curve precision, recall, thresholds = precision_recall_curve(y_true, y_scores) precision = np.fliplr([precision])[0] # so the array is increasing (you won't get negative AUC) recall = np.fliplr([recall])[0] # so the array is increasing (you won't get negative AUC) AUC_prec_rec = np.trapz(precision, recall) print("\nArea under Precision-Recall curve: " + str(AUC_prec_rec)) prec_rec_curve = plt.figure() plt.plot(recall, precision, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_prec_rec) plt.title('Precision - Recall curve') plt.xlabel("Recall") plt.ylabel("Precision") plt.legend(loc="lower right") plt.savefig(path_experiment + "Precision_recall.png") # Confusion matrix threshold_confusion = 0.5 print("\nConfusion matrix: Custom threshold (for positive) of " + str(threshold_confusion)) y_pred = np.empty((y_scores.shape[0])) for i in range(y_scores.shape[0]): if y_scores[i] >= threshold_confusion: y_pred[i] = 1 else: y_pred[i] = 0 confusion = confusion_matrix(y_true, y_pred) print(confusion) accuracy = 0 if float(np.sum(confusion)) != 0: accuracy = float(confusion[0, 0] + confusion[1, 1]) / float(np.sum(confusion)) print("Global Accuracy: " + str(accuracy)) specificity = 0 if float(confusion[0, 0] + confusion[0, 1]) != 0: specificity = float(confusion[0, 0]) / float(confusion[0, 0] + confusion[0, 1]) print("Specificity: " + str(specificity)) sensitivity = 0 if float(confusion[1, 1] + confusion[1, 0]) != 0: sensitivity = float(confusion[1, 1]) / float(confusion[1, 1] + confusion[1, 0]) print("Sensitivity: " + str(sensitivity)) precision = 0 if float(confusion[1, 1] + confusion[0, 1]) != 0: precision = float(confusion[1, 1]) / float(confusion[1, 1] + confusion[0, 1]) print("Precision: " + str(precision)) # Jaccard similarity index jaccard_index = jaccard_similarity_score(y_true, y_pred, normalize=True) print("\nJaccard similarity score: " + str(jaccard_index)) # F1 score F1_score = f1_score(y_true, y_pred, labels=None, average='binary', sample_weight=None) print("\nF1 score (F-measure): " + str(F1_score)) ####evaluate the thin vessels thin_3pixel_recall_indivi = [] thin_3pixel_auc_roc = [] for j in range(pred_imgs_out.shape[0]): thick3=opening(gtruth_masks[j, 0, :, :], square(3)) thin_gt = gtruth_masks[j, 0, :, :] - thick3 thin_pred=pred_imgs_out[j, 0, :, :] thin_pred[thick3==1]=0 thin_3pixel_recall_indivi.append(round(thin_recall(thin_gt, pred_imgs_out[j, 0, :, :], thresh=0.5), 4)) thin_3pixel_auc_roc.append(round(roc_auc_score(thin_gt.flatten(), thin_pred.flatten()), 4)) thin_2pixel_recall_indivi = [] thin_2pixel_auc_roc = [] for j in range(pred_imgs_out.shape[0]): thick=opening(gtruth_masks[j, 0, :, :], square(2)) thin_gt = gtruth_masks[j, 0, :, :] - thick #thin_gt_only=thin_gt[thin_gt==1] #print(thin_gt_only) thin_pred=pred_imgs_out[j, 0, :, :] #thin_pred=thin_pred[thin_gt==1] thin_pred[thick==1]=0 thin_2pixel_recall_indivi.append(round(thin_recall(thin_gt, pred_imgs_out[j, 0, :, :], thresh=0.5), 4)) thin_2pixel_auc_roc.append(round(roc_auc_score(thin_gt.flatten(), thin_pred.flatten()), 4)) #print("thin 2vessel recall:", thin_2pixel_recall_indivi) #print('thin 2vessel auc score', thin_2pixel_auc_roc) # Save the results with open(path_experiment + 'test_performances_all_epochs.txt', mode='a') as f: f.write("\n\n" + path_experiment + " test epoch:" + str(test_epoch) + '\naverage mode is:' + str(average_mode) + "\nArea under the ROC curve: %.4f" % (AUC_ROC) + "\nArea under Precision-Recall curve: %.4f" % (AUC_prec_rec) + "\nJaccard similarity score: %.4f" % (jaccard_index) + "\nF1 score (F-measure): %.4f" % (F1_score) + "\nConfusion matrix:" + str(confusion) + "\nACCURACY: %.4f" % (accuracy) + "\nSENSITIVITY: %.4f" % (sensitivity) + "\nSPECIFICITY: %.4f" % (specificity) + "\nPRECISION: %.4f" % (precision) + "\nthin 2vessels recall indivi:\n" + str(thin_2pixel_recall_indivi) + "\nthin 2vessels recall mean:%.4f" % (np.mean(thin_2pixel_recall_indivi)) + "\nthin 2vessels auc indivi:\n" + str(thin_2pixel_auc_roc) + "\nthin 2vessels auc score mean:%.4f" % (np.mean(thin_2pixel_auc_roc)) + "\nthin 3vessels recall indivi:\n" + str(thin_3pixel_recall_indivi) + "\nthin 3vessels recall mean:%.4f" % (np.mean(thin_3pixel_recall_indivi)) + "\nthin 3vessels auc indivi:\n" + str(thin_3pixel_auc_roc) + "\nthin 3vessels auc score mean:%.4f" % (np.mean(thin_3pixel_auc_roc)) )
def jacc(matt, template): res = jaccard_similarity_score(matt, template) return res
plt.xlabel('Number of Nabors (K)') plt.tight_layout() plt.show() ## Final Model uses k=7 k = 7 #Train Model and Predict kNN_cls = KNeighborsClassifier(n_neighbors=k).fit(X_train, y_train) kNN_cls yhat_knn = kNN_cls.predict(X_test) # Predict using Test Data perckNN = metrics.accuracy_score( y_test, yhat_knn) # store accuracy score in mean_acc array print("KNN Accuracy percentage", perckNN) JaccardkNN = jaccard_similarity_score(y_test, yhat_knn) print("KNN Jaccard index: %.2f" % JaccardkNN) F1ScorekNN = f1_score(y_test, yhat_knn, average='weighted') print("KNN F1-score: %.2f" % F1ScorekNN) '''************************************''' ''' Decision Tree ''' '''*************************************''' from sklearn.tree import DecisionTreeClassifier from sklearn import metrics loanTree = DecisionTreeClassifier( criterion="entropy", max_depth=5) # Classify Decision Tree characteristics loanTree.fit(X_train, y_train) # Fit Decision Tree using training set
from sklearn.datasets import fetch_rcv1 from global_variables import is_leaf_topic from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import jaccard_similarity_score # Fetch the training dataset train_data = fetch_rcv1(subset='train') # Convert the scipy sparse matrix to a dense version usable by sklearn's functions train_data.target = train_data.target.todense() is_leaf_topic = np.array(is_leaf_topic) # Subset the data to choose documents part of leaf nodes train_data.target = train_data.target[:, is_leaf_topic] # Train the classifier with the training data classifier = RandomForestClassifier(n_estimators=10) classifier.fit(train_data.data, train_data.target) # Fetch the test data test_data = fetch_rcv1(subset='test', random_state=42, shuffle=True) test_data.data = test_data.data[0:1000, :] test_data.target = test_data.target[0:1000, :] test_data.target = test_data.target[:, is_leaf_topic] test_data.target = test_data.target.todense() test_predict = classifier.predict(test_data.data) print("The Jaccard Similiarity Score is : " + str(jaccard_similarity_score(test_data.target, test_predict)))
specificity = float( confusion[0, 0]) / float(confusion[0, 0] + confusion[0, 1]) print("Specificity: " + str(specificity)) sensitivity = 0 if float(confusion[1, 1] + confusion[1, 0]) != 0: sensitivity = float( confusion[1, 1]) / float(confusion[1, 1] + confusion[1, 0]) print("Sensitivity: " + str(sensitivity)) precision = 0 if float(confusion[1, 1] + confusion[0, 1]) != 0: precision = float(confusion[1, 1]) / float(confusion[1, 1] + confusion[0, 1]) print("Precision: " + str(precision)) #Jaccard similarity index jaccard_index = jaccard_similarity_score(y_true, y_pred, normalize=True) print("\nJaccard similarity score: " + str(jaccard_index)) #F1 score F1_score = f1_score(y_true, y_pred, labels=None, average='binary', sample_weight=None) print("\nF1 score (F-measure): " + str(F1_score)) #Save the results file_perf = open(path_experiment + 'performances.txt', 'w') file_perf.write("Area under the ROC curve: " + str(AUC_ROC) + "\nArea under Precision-Recall curve: " + str(AUC_prec_rec) + "\nJaccard similarity score: " + str(jaccard_index) +
import numpy as np from sklearn.metrics import hamming_loss, jaccard_similarity_score print(hamming_loss(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[0.0, 1.0], [1.0, 1.0]]))) print(hamming_loss(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[1.0, 1.0], [1.0, 1.0]]))) print(hamming_loss(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[0.0, 1.0], [1.0, 1.0]]))) print(jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[0.0, 1.0], [1.0, 1.0]]))) print(jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[1.0, 1.0], [1.0, 1.0]]))) print(jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[1.0, 1.0], [0.0, 1.0]])))
def jaccard_score(gt_lbl, mask): lbled_mask = label(mask > CELL_THRESHOLD, background=0) return jaccard_similarity_score(lbled_mask.flatten(), gt_lbl.flatten())
X, Y, center=False ) #center=True (the default) would not work ("ValueError: center=True only allowed for dense data") but should presumably work in general Jaccardtable = np.zeros((146, 2)) for K in range(1, 146): data_X_selected = SelectKBest(score_func=f_regression, k=K).fit_transform(data_X, data_y) # Permutiere die Indizes von data zufällig beim Aufteilen in Trainings- und # Testdaten (90% und 10% von Gesamtdaten). perm = np.random.permutation(len(data)) data_train_y = data_y[perm[:-len(data_y) // 10]] data_test_y = data_y[perm[-len(data_y) // 10:]] data_train_X = data_X_selected[perm[:-len(data_X_selected) // 10]] data_test_X = data_X_selected[perm[-len(data_X_selected) // 10:]] # Lernen und überprüfen mit Naive Bayes from sklearn.naive_bayes import GaussianNB from sklearn.metrics import jaccard_similarity_score nbayes = GaussianNB() nbayes.fit(data_train_X, data_train_y) expected = data_test_y predicted = nbayes.predict(data_test_X) Jaccardtable[K, 0] = K Jaccardtable[K, 1] = jaccard_similarity_score(expected, predicted) np.savetxt("Jaccardtable_NBayes.txt", Jaccardtable) print("done")
scheduler.step() # For each mini-batch... for batch, (data, labels) in enumerate(train_loader, 1): # Send to the GPU data = data.to(device) labels = labels.to(device) # Zero the parameter gradients optimizer.zero_grad() # Forward pass outputs = model(data) predictions = torch.argmax(outputs, 1) running_train_iou += metrics.jaccard_similarity_score( labels.cpu().numpy().flatten(), predictions.cpu().numpy().flatten()) # Calculate loss loss = criterion(outputs, labels) running_loss += loss.item() # Calculate gradients loss.backward() # for name, param in model.named_parameters(): # if param.requires_grad and param.grad is not None: # print(name) # print(torch.mean(torch.abs(param.grad)).item()) # print(torch.max(torch.abs(param.grad)).item())
# Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) #%% from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score print(classification_report(y_test, y_pred)) print("Accuracy :: ", accuracy_score(y_test, y_pred)) # F1 Score print("F1-Score :: ", f1_score(y_test, y_pred, average="weighted")) # jaccard_similarity_score from sklearn.metrics import jaccard_similarity_score jaccard_similarity_score(y_test, y_pred) print("jaccard_similarity_score :: ", f1_score(y_test, y_pred, average="weighted")) # ============================================================================= # # # Dump the trained RandomForestClassifier with Pickle # random_forest_classifier_filename = '../saved-pickles/random_forest_classifier.pkl' # # Open the file to save as pkl file # random_forest_classifier_pkl = open(random_forest_classifier_filename, 'wb') # pickle.dump(random_forest_classifier, random_forest_classifier_pkl) # # Close the pickle instances # random_forest_classifier_pkl.close() # # ============================================================================= #Saving model
swc_filename = folderpath + '/' + data_file + '/' + data_file + '.tif.v3dpbd.swc' num_sample_nodes = 2 imgs, labels, p_encoding, node_ids = sample_nodes_truth( swc_filename, img_filename, num_nodes_per_img=num_sample_nodes, child_step=1, vis_flag=False) n_ch, n_x, n_y, n_z = 1, 24, 24, 24 batch = num_sample_nodes n_label = 48 x_patch = np.zeros((batch, n_ch, n_x, n_y, n_z)) y_patch = np.zeros((batch, n_label)) x_patch[:, 0, 2:-1, 2:-1, 2:-1] = np.array(imgs) y_patch = np.array(labels) vis_enlarge_ratio = 5 ypred = model.predict(x_patch, batch_size=10) ypred[ypred > 0.5] = 1 ypred[ypred < 0.5] = 0 scores = [] for j in range(len(labels)): score1 = jaccard_similarity_score(labels[j], ypred[j, :]) score2 = utils.smoothed_jaccard(labels[j], ypred[j, :]) scores.append((score1, score2)) print(scores) print(np.where(ypred[0, :] > 0)) print(np.where(labels[0])) #for x,y in data_generator_undirected(train_dir,traindatalist): # pass
#Loading data data=pd.read_csv('D:\heart.csv') #Ceating feature and target data set X=data[['age','sex','cp','trestbps','chol','thalach']].values Y=data[['target']].values print(data.dtypes) #Creating training and testing data sets from sklearn.model_selection import train_test_split XTrain,XTest,YTrain,YTest=train_test_split(X,Y,test_size=0.2,random_state=4) print('Shape of training set: ',XTrain.shape,YTrain.shape) print('Shape of testing set: ',XTest.shape,YTest.shape) #Preparing model from sklearn import svm Model=svm.SVC(kernel='rbf') Model.fit(XTrain,YTrain) #Using model for prediction result=Model.predict(XTest) #Evaluation of accuracy from sklearn.metrics import jaccard_similarity_score print('Jaccard similaity score of training set: ',jaccard_similarity_score(YTrain,Model.predict(XTrain))) print('Jaccard similarity score of testing set: ',jaccard_similarity_score(YTest,result)) from sklearn.metrics import f1_score print('F1 score of training set: ',f1_score(YTrain,Model.predict(XTrain),average='weighted')) print('F1 score of testing set: ',f1_score(YTest,result,average='weighted'))
yhat = clf.predict(X_test) from sklearn.metrics import f1_score f1_score(y_test,yhat,average="weighted") # In[47]: from sklearn.linear_model import LogisticRegression LR = LogisticRegression(C=0.01, solver="liblinear").fit(X_train,y_train) yhat = LR.predict(X_test) yhat_prob = LR.predict_proba(X_test) from sklearn.metrics import jaccard_similarity_score jaccard_similarity_score(y_test,yhat) # In[48]: from sklearn.metrics import jaccard_similarity_score from sklearn.metrics import f1_score from sklearn.metrics import log_loss # In[49]: get_ipython().system('wget -O loan_test.csv https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/ML0101ENv3/labs/loan_test.csv')
mae = mean_absolute_error(y_test, Y_test, multioutput='uniform_average') # MAE output is non-negative floating point. The best value is 0.0. print("Mean Absolute Error: {}".format(mae)) mse = mean_squared_error(y_test, Y_test, multioutput='uniform_average') # MAE output is non-negative floating point. The best value is 0.0. print("Mean Squared Error: {}".format(mse)) r2 = r2_score(y_test, Y_test) # R^2 (coefficient of determination) regression score function. # Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A constant model that always # predicts the expected value of y, disregarding the input features, would get a R^2 score of 0.0. print("R - Squared value: {}".format(r2)) print('What percent of predictions are same: {}'.format(jaccard_similarity_score(y_test, Y_test))) # Confusion Matrix print(metrics.confusion_matrix(y_test, Y_test)) print(metrics.classification_report(y_test, Y_test)) actual = y_train predictions = model.predict(X_train) false_positive_rate, true_positive_rate, thresholds = roc_curve(actual, predictions) roc_auc = auc(false_positive_rate, true_positive_rate) print("Area Under the curve is: {}".format(roc_auc)) plt.title('Receiver Operating Characteristic') plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.2f'% roc_auc) plt.legend(loc='lower right')
print (classification_report(y_test, yhat)) # In[28]: from sklearn.metrics import f1_score f1_score(y_test, yhat, average='weighted') # In[29]: from sklearn.metrics import jaccard_similarity_score jaccard_similarity_score(y_test, yhat) # # Decision Tree # In[30]: # Import the decision tree model from sklearn.tree import DecisionTreeClassifier # In[31]:
def compute_performances_for_multiclass(y_test, y_test_predicted, class_names, performances): # Compute the accuracy classification score : return the fraction of correctly classified samples performances.accuracy_score_fraction = accuracy_score(y_test, y_test_predicted, normalize=True) # Compute the accuracy classification score : return return the number of correctly classified samples performances.accuracy_score_number = accuracy_score(y_test, y_test_predicted, normalize=False) # Print information in the console print("\nAccuracy classification score : ") print(" Fraction of correctly classified samples : %.2f" % performances.accuracy_score_fraction) print(" Number of correctly classified samples: %.2f" % performances.accuracy_score_number) # Compute the Cohen's kappa score performances.cohen_kappa_score = cohen_kappa_score(y_test, y_test_predicted) # Print information in the console print("\nCohen's kappa score : %.2f" % performances.cohen_kappa_score) # Compute the confusion matrix without normalization performances.confusion_matrix_without_normalization = confusion_matrix( y_test, y_test_predicted) # Compute the confusion matrix with normalization performances.confusion_matrix_with_normalization = \ performances.confusion_matrix_without_normalization.astype('float') \ / performances.confusion_matrix_without_normalization.sum(axis=1)[:, np.newaxis] # Print information in the console print("\nConfusion matrix : ") print(" Confusion matrix without normalization : ") square_matrix_size = len( performances.confusion_matrix_without_normalization) for i in range(square_matrix_size): if i == 0: print(' [' + np.array2string( performances.confusion_matrix_without_normalization[i])) elif i == square_matrix_size - 1: print(' ' + np.array2string( performances.confusion_matrix_without_normalization[i]) + ']') else: print(' ' + np.array2string( performances.confusion_matrix_without_normalization[i])) print(" Confusion matrix with normalization : ") square_matrix_size = len(performances.confusion_matrix_with_normalization) for i in range(square_matrix_size): if i == 0: print(' [' + np.array2string( performances.confusion_matrix_with_normalization[i])) elif i == square_matrix_size - 1: print(' ' + np.array2string( performances.confusion_matrix_with_normalization[i]) + ']') else: print(' ' + np.array2string( performances.confusion_matrix_with_normalization[i])) # Compute the classification_report performances.classification_report = classification_report( y_test, y_test_predicted, target_names=class_names, digits=4) # Print information in the console print("\nclassification_report : ") print(performances.classification_report) # Compute the average Hamming loss performances.hamming_loss = hamming_loss(y_test, y_test_predicted) # Print information in the console print("\nAverage Hamming loss : %.2f" % performances.hamming_loss) # Compute the Jaccard similarity coefficient score with normalization performances.jaccard_similarity_score_with_normalization = jaccard_similarity_score( y_test, y_test_predicted, normalize=True) # Compute the Jaccard similarity coefficient score without normalization performances.jaccard_similarity_score_without_normalization = jaccard_similarity_score( y_test, y_test_predicted, normalize=False) # Print information in the console print("\nJaccard similarity coefficient score : ") print(" Average of Jaccard similarity coefficient : %.2f" % performances.jaccard_similarity_score_with_normalization) print( " Sum of the Jaccard similarity coefficient over the sample set : %.2f" % performances.jaccard_similarity_score_without_normalization) # Compute the precision performances.micro_precision = precision_score(y_test, y_test_predicted, average='micro') performances.macro_precision = precision_score(y_test, y_test_predicted, average='macro') performances.weighted_precision = precision_score(y_test, y_test_predicted, average='weighted') performances.none_precision = precision_score(y_test, y_test_predicted, average=None) # Print information in the console print("\nPrecision score : ") print(" micro : %.2f" % performances.micro_precision) print(" macro : %.2f" % performances.macro_precision) print(" weighted : %.2f" % performances.weighted_precision) print(" None : " + np.array2string(performances.none_precision)) print(" Classes : " + np.array2string(class_names)) # Compute the recall performances.micro_recall = recall_score(y_test, y_test_predicted, average='micro') performances.macro_recall = recall_score(y_test, y_test_predicted, average='macro') performances.weighted_recall = recall_score(y_test, y_test_predicted, average='weighted') performances.none_recall = recall_score(y_test, y_test_predicted, average=None) # Print information in the console print("\nRecall score : ") print(" micro : %.2f" % performances.micro_recall) print(" macro : %.2f" % performances.macro_recall) print(" weighted : %.2f" % performances.weighted_recall) print(" None : " + np.array2string(performances.none_recall)) print(" Classes : " + np.array2string(class_names)) # Compute the F1 score performances.micro_f1_score = f1_score(y_test, y_test_predicted, average='micro') performances.macro_f1_score = f1_score(y_test, y_test_predicted, average='macro') performances.weighted_f1_score = f1_score(y_test, y_test_predicted, average='weighted') performances.none_f1_score = f1_score(y_test, y_test_predicted, average=None) # Print information in the console print("\nF1-score : ") print(" micro : %.2f" % performances.micro_f1_score) print(" macro : %.2f" % performances.macro_f1_score) print(" weighted : %.2f" % performances.weighted_f1_score) print(" None : " + np.array2string(performances.none_f1_score)) print(" Classes : " + np.array2string(class_names)) # Compute the Matthews correlation coefficient performances.matthews_corrcoef = matthews_corrcoef(y_test, y_test_predicted) # Print information in the console print("\nMatthews correlation coefficient : %.2f" % performances.matthews_corrcoef) return performances
ap = argparse.ArgumentParser() ap.add_argument("-p", "--params", required=True, help="Path to store all the configurable variables") args = vars(ap.parse_args()) params = args["params"] search = SearchImage(params) search.setMaterialCode() search.processQueryImage() print(search.materialCode) tmpList = sorted(search.materialCode.items(), key=lambda x: x[1]) for i in tmpList: search.targetNames.append(i[0]) print("Actual Values :") print(search.trueList) print("\nPredicted Values :") print(search.predList) print("\nConfusion Matrix") print(confusion_matrix(search.trueList, search.predList)) print("\nClassification Report") print( classification_report(search.trueList, search.predList, target_names=search.targetNames)) print("\nAccuracy Score") print(accuracy_score(search.trueList, search.predList)) print("\nJaccard Similarity Score") print(jaccard_similarity_score(search.trueList, search.predList))
def evaluate_model_svm(x, y, learn_path, k=10, thresh=0.5): print(len(y), len(y[0])) # create a k fold with no unique classes count = 0 while True: count += 1 # print(count, 'Finding a proper KF...') kf = list( KFold(n_splits=k, shuffle=True, random_state=randint(0, 100000)).split(x)) good_folds = True for train_index, test_index in kf: for i in range(len(y[0])): if len(np.unique( y[train_index, i])) < 2: # or len(np.unique(y[test_index, i])) < 2: # print(y[train_index, i],np.unique(y[train_index, i])) print(i) good_folds = False break if not good_folds: break if good_folds: break print('Found a good KF in', count, 'try!') with open(learn_path + 'topic_classifier-folds.pkl', 'wb') as out_file: pickle.dump(kf, out_file) fold_num = 0 stats = QuickDataFrame([ 'Jaccard (normalised)', 'Accuracy (normalised)', 'Accuracy', 'F1_score (micro averaged)', 'F1_score (macro averaged by labels)', 'F1_score (averaged by samples)', 'Hamming loss', 'Label Ranking loss:' ]) prog = Progresser(k) for train_index, test_index in kf: # print(train_index, test_index) print('___________________________________________________') x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] # cls = SVC(kernel='linear') # cls = SVC(kernel='poly', probability=True, tol=1e-5) cls = SVC(kernel='linear', probability=True, tol=1e-5) # cls = GaussianNB() # cls = RandomForestClassifier(max_features='auto', random_state=1) topic_classifier = BinaryRelevance(classifier=cls, require_dense=[True, True]) try: topic_classifier.fit(x_train, y_train) except Exception as e: print('\nfit error!:', e) continue # with open(learn_path + 'topic_classifier-SVC' + str(fold_num) + '.pkl', 'wb') as out_file: # pickle.dump(topic_classifier, out_file) try: # predictions = topic_classifier.predict(x_test) predictions = np.zeros((len(x_test), y.shape[1])) preds = topic_classifier.predict_proba(x_test) for i in range(len(x_test)): for j in range(y.shape[1]): predictions[i, j] = 1.0 if preds[i, j] > thresh else 0.0 s = [ jaccard_similarity_score(y_test, predictions, normalize=True), accuracy_score(y_test, predictions, normalize=True), accuracy_score(y_test, predictions, normalize=False), f1_score(y_test, predictions, average='micro'), f1_score(y_test, predictions, average='macro'), f1_score(y_test, predictions, average='samples'), hamming_loss(y_test, predictions), label_ranking_loss(y_test, predictions) ] stats.append(s) print(stats[stats.length - 1]) except Exception as e: print('Eval error!:', e) fold_num += 1 prog.count() for col in stats.cols: print(col, np.mean(stats[col]))
def _get_jaccard_index(self, test_row_obs_list, train_col_obs_list, jaccard_similarity_score): test_row_obs_list = [item for sublist in test_row_obs_list for item in sublist] train_col_obs_list = [item for sublist in train_col_obs_list for item in sublist] jacc_sim_score = jaccard_similarity_score(test_row_obs_list, train_col_obs_list) return jacc_sim_score
x='Clump', y='UnifSize', color='Yellow', label="benign", ax=ax) plt.show() cancer_df = cancer_df[pd.to_numeric(cancer_df['BareNuc'], errors='coerce').notnull()] cancer_df['BareNuc'] = cancer_df['BareNuc'].astype('int') features_df = cancer_df[[ 'Clump', 'UnifSize', 'UnifShape', 'MargAdh', 'SingEpiSize', 'BareNuc', 'BlandChrom', 'NormNucl', 'Mit' ]] X = np.asanyarray(features_df) cancer_df['Class'] = cancer_df['Class'].astype('int') y = np.asanyarray(cancer_df['Class']) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4) clf = svm.SVC(kernel='rbf') clf.fit(X_train, y_train) yhat = clf.predict(X_test) print('f1_score', f1_score(y_test, yhat, average='weighted')) print('jaccard_similarity_score', jaccard_similarity_score(y_test, yhat))
neigh = KNeighborsClassifier(n_neighbors=n).fit(x_train, y_train) yhat = neigh.predict(x_test) mean_acc[n - 1] = metrics.accuracy_score(y_test, yhat) std_acc[n - 1] = np.std(yhat == y_test) / np.sqrt(yhat.shape[0]) print("The best accuracy was with", mean_acc.max(), "with k=", mean_acc.argmax() + 1) #Using SVM Model from sklearn.svm import SVC from sklearn.metrics import classification_report, confusion_matrix from sklearn.svm import SVC svm_model = SVC(kernel='linear', random_state=0) svm_model.fit(x_train, y_train) y_predict = svm_model.predict(x_test) cm = confusion_matrix(y_test, y_predict) #model improvisation min_train = x_train.min() range_train = (x_train - min_train).max() x_train_scaled = (x_train - min_train) / range_train from sklearn.metrics import f1_score f1_score(y_test, yhat, average='weighted') from sklearn.metrics import jaccard_similarity_score jaccard_similarity_score(y_test, yhat)
# In[34]: test_df = pd.read_csv('loan_test.csv') test_df.head() # In[35]: y_test_evaluation = test_df['loan_status'].values # In[36]: yhat_knn = yhat_knn[:54] f1_score_knn = f1_score(y_test_evaluation, yhat_knn, average='weighted') f1_score_knn jaccard_score_knn = jaccard_similarity_score(y_test_evaluation, yhat_knn) jaccard_score_knn # In[37]: predTree = predTree[:54] f1_score_tree = f1_score(y_test_evaluation, predTree, average='weighted') f1_score_tree jaccard_score_tree = jaccard_similarity_score(y_test_evaluation, predTree) jaccard_score_tree # In[38]: yhat_vector = yhat_vector[:54] f1_score_vector = f1_score(y_test_evaluation, yhat_vector, average='weighted')
def test_multilabel_jaccard_similarity_score(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) # size(y1 \inter y2) = [1, 2] # size(y1 \union y2) = [2, 2] assert_equal(jaccard_similarity_score(y1, y2), 0.75) assert_equal(jaccard_similarity_score(y1, y1), 1) assert_equal(jaccard_similarity_score(y2, y2), 1) assert_equal(jaccard_similarity_score(y2, np.logical_not(y2)), 0) assert_equal(jaccard_similarity_score(y1, np.logical_not(y1)), 0) assert_equal(jaccard_similarity_score(y1, np.zeros(y1.shape)), 0) assert_equal(jaccard_similarity_score(y2, np.zeros(y1.shape)), 0) with ignore_warnings(): # sequence of sequences is deprecated # List of tuple of label y1 = [(1, 2,), (0, 2,)] y2 = [(2,), (0, 2,)] assert_equal(jaccard_similarity_score(y1, y2), 0.75) assert_equal(jaccard_similarity_score(y1, y1), 1) assert_equal(jaccard_similarity_score(y2, y2), 1) assert_equal(jaccard_similarity_score(y2, [(), ()]), 0) # |y3 inter y4 | = [0, 1, 1] # |y3 union y4 | = [2, 1, 3] y3 = [(0,), (1,), (3,)] y4 = [(4,), (4,), (5, 6)] assert_almost_equal(jaccard_similarity_score(y3, y4), 0) # |y5 inter y6 | = [0, 1, 1] # |y5 union y6 | = [2, 1, 3] y5 = [(0,), (1,), (2, 3)] y6 = [(1,), (1,), (2, 0)] assert_almost_equal(jaccard_similarity_score(y5, y6), (1 + 1 / 3) / 3)
#Split the training data randomly so that 15% of it will be used for testing accuracy X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.15, random_state=432) print ('Train set:', X_train.shape, y_train.shape) print ('Test set:', X_test.shape, y_test.shape) ##############Training and Perfomance analysis############## LR = LogisticRegression(C=0.015, solver='liblinear').fit(X_train,y_train) print(LR) yhat = LR.predict(X_test) yhat_prob = LR.predict_proba(X_test) print("Jaccard Index: ", jaccard_similarity_score(y_test, yhat)) print("Confusion Matrix:\n", confusion_matrix(y_test, yhat, labels=[1,0])) # Compute confusion matrix cnf_matrix = confusion_matrix(y_test, yhat, labels=[1,0]) np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=['Survived=1','Survived=0'],normalize= False, title='Confusion matrix') #plt.show() print (classification_report(y_test, yhat)) print("Log loss: ", log_loss(y_test, yhat_prob))
def add_weight(user_to_video_crosstab, active_user_data): # add_weight : input = crosstab, output = weighted crosstab active_user_data_attribute = [] ac_user_num = active_user_data.iloc[0]['user_num'] ac_user_sex = active_user_data.iloc[0]['sex'] ac_user_hp = active_user_data.iloc[0]['health_point'] ac_user_label = active_user_data.iloc[0]['label'] if ac_user_sex == 'f': active_user_data_attribute += [51] if ac_user_hp >= 40: active_user_data_attribute += [convert_cate_num('h')] elif ac_user_hp < 40 and ac_user_hp >= 33: active_user_data_attribute += [convert_cate_num('m')] else: active_user_data_attribute += [convert_cate_num('l')] else: active_user_data_attribute += [52] if ac_user_hp >= 50: active_user_data_attribute += [convert_cate_num('h')] elif ac_user_hp < 50 and ac_user_hp >= 44: active_user_data_attribute += [convert_cate_num('m')] else: active_user_data_attribute += [convert_cate_num('l')] active_user_data_attribute += [ convert_cate_num(active_user_data.iloc[0]['bodypart']) ] # active_user_data_attribute = [sex, level, bodypart] -> compared with video attribute vid_weight_exponent = pd.DataFrame(0, index=['weight_num'], columns=user_to_video_crosstab.columns) for i in vid_weight_exponent.columns: MyDB.execute( 'select sex, level, bodypart from ROUTINE where video_num = %s UNION select sex, level, bodypart from EXERCISE where video_num = %s' % (str(i), str(i))) attribute_list = list(MyDB.fetchone()) # ['sex', 'level', 'bodypart'] for j in range(3): if j == 0: if attribute_list[j] == 'm': attribute_list[j] = 52 elif attribute_list[j] == 'f': attribute_list[j] = 51 else: attribute_list[j] = convert_cate_num(attribute_list[j]) else: attribute_list[j] = convert_cate_num(attribute_list[j]) vid_weight_exponent.loc[ 'weight_num', i] = vid_weight_exponent.iloc[0][i] + 3 - jaccard_similarity_score( active_user_data_attribute, attribute_list, normalize=False) for i in vid_weight_exponent.columns: n = vid_weight_exponent.loc['weight_num'][i] # arithmetic sequence = a_n # a_n+1 = a_n * 0.8 + 0.3 (not more than 1.5) a_n = 1.5 - (0.4 * (0.8**(n - 1))) if n == 0: vid_weight_exponent.loc['weight_num', i] = 0.3 else: vid_weight_exponent.loc['weight_num', i] = a_n user_to_video_crosstab.loc[:, i] += vid_weight_exponent.loc[ 'weight_num'][i] MyDB.execute('select user_num from USER where label = ' + str(ac_user_label)) same_label_user = MyDB.fetchall() same_label_user = [i[0] for i in same_label_user] for i in user_to_video_crosstab.index: if i in same_label_user: user_to_video_crosstab.loc[i] += 0.3 # weight to video watched 7 days ago temp_time = datetime.now() timegap = timedelta(days=7) temp_time = temp_time.date() - timegap MyDB.execute('select video_num from HISTORY where user_num = ' + str(ac_user_num) + ' and time >= \'%s\'' % temp_time) watched_video = MyDB.fetchall() watched_Video = [i[0] for i in watched_video] for i in user_to_video_crosstab.columns: if i in watched_video: user_to_video_crosstab.loc[:, i] += 0.1 return user_to_video_crosstab
def j_score(yTrue, yPred): js=[] for yT, yP in zip(yTrue, yPred): js.append(jaccard_similarity_score((yT>0.1).flatten(), (yP>0.1).flatten())) js = np.stack(js) return np.mean(js)
def get_jaccard_index(y_true, y_pred): # Jaccard similarity index jaccard_index = jaccard_similarity_score(y_true, y_pred, normalize=True) print("Jaccard similarity score: " + str(jaccard_index)) return jaccard_index