Exemplo n.º 1
0
def test_classifier_chain_vs_independent_models():
    # Verify that an ensemble of classifier chains (each of length
    # N) can achieve a higher Jaccard similarity score than N independent
    # models
    yeast = fetch_mldata('yeast')
    X = yeast['data']
    Y = yeast['target'].transpose().toarray()
    X_train = X[:2000, :]
    X_test = X[2000:, :]
    Y_train = Y[:2000, :]
    Y_test = Y[2000:, :]

    ovr = OneVsRestClassifier(LogisticRegression())
    ovr.fit(X_train, Y_train)
    Y_pred_ovr = ovr.predict(X_test)

    chain = ClassifierChain(LogisticRegression(),
                            order=np.array([0, 2, 4, 6, 8, 10,
                                            12, 1, 3, 5, 7, 9,
                                            11, 13]))
    chain.fit(X_train, Y_train)
    Y_pred_chain = chain.predict(X_test)

    assert_greater(jaccard_similarity_score(Y_test, Y_pred_chain),
                   jaccard_similarity_score(Y_test, Y_pred_ovr))
Exemplo n.º 2
0
    def test_jaccard_similarity_score(self):
        result = self.df.metrics.jaccard_similarity_score()
        expected = metrics.jaccard_similarity_score(self.target, self.pred)
        self.assertEqual(result, expected)

        result = self.df.metrics.jaccard_similarity_score(normalize=False)
        expected = metrics.jaccard_similarity_score(self.target, self.pred, normalize=False)
        self.assertEqual(result, expected)
Exemplo n.º 3
0
    def train_and_eval(x_train, y_train, x_test, y_test, model, param_result):
        print("\nTraining and evaluating...")

        for result_list in param_result:
            print("Fitting: " + str(result_list[2]))

            opt_model = result_list[2]
            opt_model.fit(x_train, y_train)
            y_pred = opt_model.predict(x_test)

            print("\nClassification Report:")
            print(metrics.classification_report(y_test, y_pred))
            print("\nAccuracy Score:")
            print(metrics.accuracy_score(y_test, y_pred))
            print("\nConfusion Matrix:")
            print(metrics.confusion_matrix(y_test, y_pred))
            print("\nF1-Score:")
            print(metrics.f1_score(y_test, y_pred))
            print("\nHamming Loss:")
            print(metrics.hamming_loss(y_test, y_pred))
            print("\nJaccard Similarity:")
            print(metrics.jaccard_similarity_score(y_test, y_pred))
            # vvv Not supported due to ValueError: y_true and y_pred have different number of classes 3, 2
            # print('\nLog Loss:')
            # print(metrics.log_loss(y_test, y_pred))
            # vvv multiclass not supported
            # print('\nMatthews Correlation Coefficient:')
            # print(metrics.matthews_corrcoef(y_test, y_pred))
            print("\nPrecision:")
            print(metrics.precision_score(y_test, y_pred))
            # vvv Not supported due to ValueError: y_true and y_pred have different number of classes 3, 2
            # print('\nRecall:')
            # print(metrics.recall(y_test, y_pred))
            print()
def calculateSimilarityItems(item1, item2):
  try:
    result = jaccard_similarity_score(Utility[item1], Utility[item2])
  except Warning:
    #print "Exception at %d : %d" % (item1, item2)
    result = 0.5
  return result
Exemplo n.º 5
0
    def calc_thresholds(self, patches_in, patches_out):
        prediction = self.model.predict(patches_in, batch_size=4)
        avg, trs = [], []

        for i in range(self.out_chan):
            t_prd = prediction [:, :, :, i]
            t_msk = patches_out[:, :, :, i]

            t_prd = t_prd.reshape(t_msk.shape[0] * t_msk.shape[1], t_msk.shape[2])
            t_msk = t_msk.reshape(t_msk.shape[0] * t_msk.shape[1], t_msk.shape[2])

            t_msk = t_msk > 0.5 
            # threshold finder
            best_score = 0
            best_threashold = 0
            for j in range(10):
                threashold = (j+1) / 10.0
                threshold_mask = (t_prd > threashold) 

                jk = jaccard_similarity_score(t_msk, threshold_mask)
                if jk > best_score:
                    best_score = jk
                    best_threashold = threashold
            print " -- output:", i, "best:", best_score, "threashold:", best_threashold
            avg.append(best_score)
            trs.append(best_threashold)

        score = sum(avg) / 10.0
        return score, trs
def analise():
    datasets = load_data_from_pickle()
    classifier = get_conv_classifier()
    given_answers = list(classifier.predict(datasets.test.data)['classes'])

    wrong_answer_buckets = np.zeros(5)
    for i, test_data in enumerate(datasets.test.data):
        right_answer = datasets.test.target[i]
        given_answer = given_answers[i]
        if right_answer != given_answer:
            wrong_answer_buckets[right_answer] += 1
    print(wrong_answer_buckets / sum(wrong_answer_buckets))

    confusion_matrix = metrics.confusion_matrix(datasets.test.target, given_answers, range(5))
    print(confusion_matrix)

    cohen_kappa_score = metrics.cohen_kappa_score(datasets.test.target, given_answers, range(5))
    print(cohen_kappa_score)

    jaccard_similarity_score = metrics.jaccard_similarity_score(datasets.test.target, given_answers)
    print(jaccard_similarity_score)

    report = metrics.classification_report(datasets.test.target, given_answers, labels=range(5),
                                           target_names=['NORTH', 'EAST', 'SOUTH', 'WEST', 'STILL'])
    print(report)
Exemplo n.º 7
0
def ComputeMetrics(prob, batch_labels, p1, p2, rgb=None, save_path=None, ind=0):
    GT = label(batch_labels.copy())
    PRED = PostProcess(prob, p1, p2)
    lbl = GT.copy()
    pred = PRED.copy()
    aji = AJI_fast(lbl, pred)
    lbl[lbl > 0] = 1
    pred[pred > 0] = 1 
    l, p = lbl.flatten(), pred.flatten()
    acc = accuracy_score(l, p)
    roc = roc_auc_score(l, p)
    jac = jaccard_similarity_score(l, p)
    f1 = f1_score(l, p)
    recall = recall_score(l, p)
    precision = precision_score(l, p)
    if rgb is not None:
        xval_n = join(save_path, "xval_{}.png").format(ind)
        yval_n = join(save_path, "yval_{}.png").format(ind)
        prob_n = join(save_path, "prob_{}.png").format(ind)
        pred_n = join(save_path, "pred_{}.png").format(ind)
        c_gt_n = join(save_path, "C_gt_{}.png").format(ind)
        c_pr_n = join(save_path, "C_pr_{}.png").format(ind)
        ## CHECK PLOT FOR PROB AS IT MIGHT BE ILL ADAPTED

        imsave(xval_n, rgb)
        imsave(yval_n, color_bin(GT))
        imsave(prob_n, prob)
        imsave(pred_n, color_bin(PRED))
        imsave(c_gt_n, add_contours(rgb, GT))
        imsave(c_pr_n, add_contours(rgb, PRED))

    return acc, roc, jac, recall, precision, f1, aji
 def neighbor_rating(self, neighbor, itemID, sigma, threshold):
     self.ratings_sum=0
     self.similarity_sum=0
     #print(type(user))
     #print(user)
     #print(neighbor[2:10])
     #print(df.itemID)
     ratings = df[(df.userID == neighbor.userID) & (df.itemID == itemID)]
     #print(ratings.shape)
     for index, user_rating in ratings.iterrows():
         similarity = jaccard_similarity_score(neighbor[2:-1],user_rating[2:-1])
         #print(similarity)
         if similarity > threshold:
             self.ratings_sum += user_rating.rating * similarity
             self.similarity_sum += similarity
             #print(self.similarity_sum)
         #print(self.ratings_sum)
         #print(neighbor[-10:],user_rating[-10:])
     #print("Rating sum")
     #print(self.ratings_sum)
     #print("Similarity Sum")
     #print(self.similarity_sum)
     try:
         rating = self.ratings_sum / self.similarity_sum
         #rating = ratings.rating.mean()
     except:
         rating = 0
     #print(rating)
     return rating
Exemplo n.º 9
0
 def svmDesc(lab_pred,lab_test, title='Confusion matrix', cmap=plot.cm.Blues,taskLabels=taskLabels,normal=True):
     #build confussion matrix itself
     conM = confusion_matrix(lab_test, lab_pred)
     if normal== True:
         conM = conM.astype('float') / conM.sum(axis=1)[:, np.newaxis]
     #build heatmap graph of matrix
     plot.imshow(conM, interpolation='nearest', cmap=cmap)
     plot.title(title)
     plot.colorbar()
     tick_marks = np.arange(len(taskLabels))
     plot.xticks(tick_marks, taskLabels, rotation=45)
     plot.yticks(tick_marks, taskLabels)
     plot.tight_layout()
     plot.ylabel('True label')
     plot.xlabel('Predicted label')
     
     #classification report
     creport = classification_report(lab_test,lab_pred)
     print "CLASSIFICATION REPORT: "  
     print creport
     
     #hamming distance
     hamming = hamming_loss(lab_test,lab_pred)
     print "HAMMING DISTANCE:              %s" % str(hamming)
     
     #jaccard similarity score
     jaccard = jaccard_similarity_score(lab_test,lab_pred)
     print "JACCARD SIMILARITY SCORE:      %s" % str(jaccard)
     
     #precision score    
     pscore = precision_score(lab_test,lab_pred)
     print "PRECISION SCORE:               %s" % str(pscore)
Exemplo n.º 10
0
 def tribunalTrain(data,predict,tribunal,split=.2,stat=False,statLis=None):
     #data for testing the tribunal performance, not in actual judge training
     dat_train, dat_test, lab_train, lab_test = train_test_split(data,predict, test_size=split)
     verdict = []
      
     print 'Tribunal in session'
     
     for judge in tribunal:
         jdat_train, jdat_test, jlab_train, jlab_test = train_test_split(dat_train,lab_train, test_size=split)
         judge.fit(jdat_train, jlab_train)
         print 'judge trained'
 
     for d in dat_test:
         votes = []
         for judge in tribunal:
             v = judge.predict(d)
             votes.append(v)
         decision = stats.mode(votes,axis=None)
         verdict.append(decision[0])
     npVerdict = np.array(verdict)
     
     if stat == False:        
         svmDesc(npVerdict,lab_test,title='Tribunal Confusion Matrix')
     else:
         jac = jaccard_similarity_score(npVerdict,lab_test)
         statLis.append(jac)
Exemplo n.º 11
0
def calc_jacc(model):
    img = np.load(xtmp_file)
    msk = np.load(ytmp_file)

    prd = model.predict(img, batch_size=4)
    print prd.shape, msk.shape
    avg, trs = [], []

    for i in range(num_classes):
        t_msk = msk[:, i, :, :]
        t_prd = prd[:, i, :, :]
        t_msk = t_msk.reshape(msk.shape[0] * msk.shape[2], msk.shape[3])
        t_prd = t_prd.reshape(msk.shape[0] * msk.shape[2], msk.shape[3])

        m, b_tr = 0, 0
        for j in range(10):
            tr = j / 10.0
            pred_binary_mask = t_prd > tr

            jk = jaccard_similarity_score(t_msk, pred_binary_mask)
            if jk > m:
                m = jk
                b_tr = tr
        print i, m, b_tr
        avg.append(m)
        trs.append(b_tr)

    score = sum(avg) / 10.0
    return score, trs
Exemplo n.º 12
0
 def jaccard_score(self, row):
     query = row['search_term']
     title = row['product_title']
     
     corpus = np.array([query, title])
     tfidf_matrix = self.tfidf_vectorizer.fit_transform(corpus)
     
     return jaccard_similarity_score(tfidf_matrix[0], tfidf_matrix[1])
Exemplo n.º 13
0
def test_jaccard_binary_index():
    y_test = np.array([0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0])
    y_pred = np.array([0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0])
    sk_jaccard_score = metrics.jaccard_similarity_score(y_test, y_pred)
    print(sk_jaccard_score)
    jaccard_index = jaccard_binary_index(y_test, y_pred)
    print(jaccard_index)
    assert jaccard_index == 0.5
Exemplo n.º 14
0
def test_classifier_chain_crossval_fit_and_predict():
    # Fit classifier chain with cross_val_predict and verify predict
    # performance
    X, Y = generate_multilabel_dataset_with_correlations()
    classifier_chain_cv = ClassifierChain(LogisticRegression(), cv=3)
    classifier_chain_cv.fit(X, Y)

    classifier_chain = ClassifierChain(LogisticRegression())
    classifier_chain.fit(X, Y)

    Y_pred_cv = classifier_chain_cv.predict(X)
    Y_pred = classifier_chain.predict(X)

    assert_equal(Y_pred_cv.shape, Y.shape)
    assert_greater(jaccard_similarity_score(Y, Y_pred_cv), 0.4)

    assert_not_equal(jaccard_similarity_score(Y, Y_pred_cv),
                     jaccard_similarity_score(Y, Y_pred))
def getJaccardSimilarity(user1=None, user2=None):
    if user1.ndim != 1 or user2.ndim != 1:
        print 'Input arrays must be 1-dimensional'
        return
    elif user1.shape != user2.shape:
        print 'Input arrays must have the same length'
        return
    else:
        return jaccard_similarity_score(user1, user2)
Exemplo n.º 16
0
def jaccard_driver(a_driver):

    a_driver["DStats"] = (a_driver["DStats"] * 100).round()
    a_driver["Baseline"] = (a_driver["Baseline"] * 100).round()
    a_driver["Predicts"] = []

    for i in range(0, len(a_driver["DStats"])):
        a_driver["Predicts"].append(metrics.jaccard_similarity_score(a_driver["DStats"][i], a_driver["Baseline"]))

    return a_driver["Predicts"]
Exemplo n.º 17
0
def jaccard_index(y, y_pred):
  """Computes Jaccard Index which is the Intersection Over Union metric
       which is commonly used in image segmentation tasks

      Parameters
      ----------
      y: ground truth array
      y_pred: predicted array
    """
  return jaccard_similarity_score(y, y_pred)
Exemplo n.º 18
0
def jaccard_driver(a_driver):
    
    a_driver['DStats'] = (a_driver['DStats']*100).round()
    a_driver['Baseline'] = (a_driver['Baseline']*100).round()
    a_driver['Predicts'] = []
    
    for i in range (0,len(a_driver['DStats'])):
        a_driver['Predicts'].append(metrics.jaccard_similarity_score(a_driver['DStats'][i],a_driver['Baseline']))
                
    
    return a_driver['Predicts']
Exemplo n.º 19
0
def _get_max_similarity(list1,list2, coassoc_vec):
    n = len(coassoc_vec.keys())
    max = 0
    for i in range(len(list1)):
        checkee = coassoc_vec[coassoc_vec.keys()[list1[i]]]
        for j in range(len(list2)):
            neu = coassoc_vec[coassoc_vec.keys()[list2[j]]]
            jaccard = jaccard_similarity_score(checkee.binarycoassoc_vs,neu.binarycoassoc_vs)
            if jaccard> max:
                max = jaccard
    return max
Exemplo n.º 20
0
def eval_mclf(y, y_hat):
    results = {
        "jaccard": jaccard_similarity_score(numpy.array(y),
                                            numpy.array(y_hat)),
        "f1-macro": f1_score(numpy.array(y), numpy.array(y_hat),
                             average='macro'),
        "f1-micro": f1_score(numpy.array(y), numpy.array(y_hat),
                             average='micro')
    }

    return results
Exemplo n.º 21
0
def test_classifier_chain_vs_independent_models():
    # Verify that an ensemble of classifier chains (each of length
    # N) can achieve a higher Jaccard similarity score than N independent
    # models
    X, Y = generate_multilabel_dataset_with_correlations()
    X_train = X[:600, :]
    X_test = X[600:, :]
    Y_train = Y[:600, :]
    Y_test = Y[600:, :]

    ovr = OneVsRestClassifier(LogisticRegression())
    ovr.fit(X_train, Y_train)
    Y_pred_ovr = ovr.predict(X_test)

    chain = ClassifierChain(LogisticRegression())
    chain.fit(X_train, Y_train)
    Y_pred_chain = chain.predict(X_test)

    assert_greater(jaccard_similarity_score(Y_test, Y_pred_chain),
                   jaccard_similarity_score(Y_test, Y_pred_ovr))
def evalClassifier(vScore_test, thePredictedScores):  
  target_names = ['Low_Risk', 'High_Risk']
  '''
    the way skelarn treats is the following: first index -> lower index -> 0 -> 'Low'
    the way skelarn treats is the following: next index after first  -> next lower index -> 1 -> 'high'    
  '''
  print "precison, recall, F-stat"
  print(classification_report(vScore_test, thePredictedScores, target_names=target_names))
  print"*********************"
  # preserve the order first test(real values from dataset), then predcited (from the classifier )
  '''
    are under the curve values .... reff: http://gim.unmc.edu/dxtests/roc3.htm 
    0.80~0.90 -> good, any thing less than 0.70 bad , 0.90~1.00 -> excellent 
  '''
  area_roc_output = roc_auc_score(vScore_test, thePredictedScores)
  # preserve the order first test(real values from dataset), then predcited (from the classifier )  
  print "Area under the ROC curve is ", area_roc_output
  print"*********************"  
  '''
    mean absolute error (mae) values .... reff: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html
    the smaller the better , ideally expect 0.0 
  '''
  mae_output = mean_absolute_error(vScore_test, thePredictedScores)
  # preserve the order first test(real values from dataset), then predcited (from the classifier )  
  print "Mean absolute errro output  is ", mae_output  
  print"*********************"  
  '''
  accuracy_score ... reff: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter .... percentage of correct predictions 
  ideally 1.0, higher the better 
  '''
  accuracy_score_output = accuracy_score(vScore_test, thePredictedScores)
  # preserve the order first test(real values from dataset), then predcited (from the classifier )  
  print "Accuracy output  is ", accuracy_score_output   
  print"*********************"  
  
  '''
  hamming_loss ... reff: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter .... percentage of correct predictions 
  ideally 0.0, lower the better 
  '''
  hamming_loss_output = hamming_loss(vScore_test, thePredictedScores)
  # preserve the order first test(real values from dataset), then predcited (from the classifier )  
  print "Hamming loss output  is ", hamming_loss_output    
  print"*********************"  
  
  
  '''
  jaccardian score ... reff: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter .... percentage of correct predictions 
  ideally 1.0, higher the better 
  '''
  jaccardian_output = jaccard_similarity_score(vScore_test, thePredictedScores)
  # preserve the order first test(real values from dataset), then predcited (from the classifier )  
  print "Jaccardian output  is ", jaccardian_output     
  print"*********************"  
 def neighbor_average(self,neighbor,sigma,threshold):
     ratings_of_neighbor = df[df.userID == neighbor.userID]#.rating.mean()
     rating_sum=0;
     count=0;
     for index, row in ratings_of_neighbor.iterrows():
         similarity = jaccard_similarity_score(row[2:-1], neighbor[2:-1])
         if similarity > threshold:
             rating_sum+=row.rating
             count+=1
     average_rating_in_given_context = rating_sum/count
     #print(average_rating_in_given_context)
     return average_rating_in_given_context
Exemplo n.º 24
0
def _main():

	base = list(range(0, 30))

	SIZE = 30

	x = random.sample(base, k=SIZE)
	print('x: ', x)

	y = random.sample(base, k=SIZE)
	print('y: ', y)

	result = jaccard_similarity_score(x, y)
	print(result, "\n")
Exemplo n.º 25
0
def similarity(featureVector,maxTimeDiff,doctext,doc1,doc2):
	location = 0
	featureVector1 = featureVector[doc1]
	featureVector2 = featureVector[doc2]
	print featureVector1
	jcSim = jaccard_similarity_score(featureVector1,featureVector2)
	if doctext[doc1]["places"] == doctext[doc2]["places"]:
		location+=1
	date = abs(int(doctext[doc1]["date"])- int(doctext[doc2]["date"]))
	w1 = 1 						#Weight for word vector
	w2 = 1 						#Weight for location
	w3 = 1 						#Weight for time distribution
	alpha = 1.0 				#Time decay
	sim = w1*jcSim+w2*location
	return sim*math.exp(-alpha*(date)/maxTimeDiff)
Exemplo n.º 26
0
def get_hotspot_scores(data):
    distance_matrix = [[-1 for _ in xrange(len(data))] for _ in xrange(len(data))]
    from sklearn.metrics import jaccard_similarity_score
    for i in xrange(len(data)):
        for j in xrange(len(data)):
            if distance_matrix[i][j] == -1 and i != j:
                distance_matrix[i][j] = decimal.Decimal(jaccard_similarity_score(data[i].decisions, data[j].decisions))
                distance_matrix[j][i] = distance_matrix[i][j]
            elif distance_matrix[i][j] == -1 and i == j:
                distance_matrix[j][i] = 1
            else:
                pass
    hotspot_scores = [sum(distance_matrix[i]) for i in xrange(len(data))]
    print "Done calculating hotspot scores"
    return hotspot_scores
Exemplo n.º 27
0
    def performance(self, preds):
        accuracy = accuracy_score(self.y_test, preds)
        precision = precision_score(self.y_test, preds)
        recall = recall_score(self.y_test, preds)
        f1 = f1_score(self.y_test, preds)
        jss = jaccard_similarity_score(self.y_test, preds)
        hl = hamming_loss(self.y_test, preds)
        zol = zero_one_loss(self.y_test, preds)

        return {'accuracy_score': accuracy,
                'precision_score': precision,
                'recall_score': recall,
                'f1_score': f1,
                'jaccard_similarity_score': jss,
                'hamming_loss': hl,
                'zero_one_loss': zol}
Exemplo n.º 28
0
def main(params, train):
    si = ScreenImage()
    if train:
        # Initialization
        trainset = glob(join("face_training", "face*.png"))
        t0 = time()

        print_(verbosity, "Begin collecting training Samples")
        Labels, Samples = get_training_samples(trainset, params)
        print_(verbosity, "Success. Elapsed: %.2f s." % (time() - t0))

        print_(verbosity, "Begin classifier training using %s..."
               % (params["classifier"]))
        if params["classifier"] == "NB":
            clf = GaussianNB()
        elif params["classifier"] == "RF":
            clf = RandomForestClassifier()
        clf.fit(Samples, Labels)
        pickle.dump([clf, params], open(params["name"], "w"))
    else:
        testset = glob(join("face_testing", "face*.png"))
        print_(verbosity, "Begin classifier prediction...")
        score = np.zeros(len(testset),)
        models = glob("._*")

        for i, testname in enumerate(testset):
            im_orig = imread(testname)
            truthname = get_groundname(testname)
            im_skin = [[] for k in models]
            title = ["" for k in models]
            for j, model in enumerate(models):
                im_truth = rgb2gray(imread(truthname)).astype(np.uint8)*255
                pkl = pickle.load(open(model, "r"))
                clf = pkl[0]
                params = pkl[1]
                _, _, fvec = im2feature(testname, params)
                im_skin[j] = clf.predict(fvec).reshape(im_truth.shape).astype(np.uint8)
                score = jaccard_similarity_score(im_truth, im_skin[j], normalize=True)
                title[j] = "%s\nClassifier: %s, Thresh: %.2f\nK: %d, Score: %.2f" \
                    % (params["classifier"], params["feature"], params["thresh"],
                       params["n_cluster"], score)
                print_(verbosity, "\tTest %d of %d, Score %.2f\n" % (i+1, len(testset), score))

            si.show(testname, [im_orig, im_skin[0], im_skin[1],
                               im_skin[2], im_skin[3], im_skin[4]],
                    ["Original\n%s" % testname, title[0], title[1],
                    title[2], title[3], title[4]])
Exemplo n.º 29
0
def test_multilabel_jaccard_similarity_score():
    # Dense label indicator matrix format
    y1 = np.array([[0, 1, 1], [1, 0, 1]])
    y2 = np.array([[0, 0, 1], [1, 0, 1]])

    # size(y1 \inter y2) = [1, 2]
    # size(y1 \union y2) = [2, 2]

    assert_equal(jaccard_similarity_score(y1, y2), 0.75)
    assert_equal(jaccard_similarity_score(y1, y1), 1)
    assert_equal(jaccard_similarity_score(y2, y2), 1)
    assert_equal(jaccard_similarity_score(y2, np.logical_not(y2)), 0)
    assert_equal(jaccard_similarity_score(y1, np.logical_not(y1)), 0)
    assert_equal(jaccard_similarity_score(y1, np.zeros(y1.shape)), 0)
    assert_equal(jaccard_similarity_score(y2, np.zeros(y1.shape)), 0)
Exemplo n.º 30
0
def _get_incident_matrix_binary(coassoc_vec, fusion_threshold):
    n = len(coassoc_vec.keys())
    incidence_matrix = np.zeros(shape=(n,n))
    for i in range(len(coassoc_vec.keys())-1):
        checkee = coassoc_vec[coassoc_vec.keys()[i+1]]

        if i == len(coassoc_vec.keys())-2:
            incidence_matrix[i+1][i+1] = 1

        incidence_matrix[i][i] = 1

        for j in range(i+1):
            neu = coassoc_vec[coassoc_vec.keys()[j]]
            if jaccard_similarity_score(checkee.binarycoassoc_vs,neu.binarycoassoc_vs) > fusion_threshold :
                incidence_matrix[j][i+1] = 1

    return incidence_matrix
Exemplo n.º 31
0
y=lantsat[[36]].values


# Split the dataset into training dataset and testing dataset
x_train, x_test,y_train,y_test= train_test_split(x,y,test_size=0.2,random_state=1)
# #===================Perceptron=========================
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import Perceptron
ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)  #y=w.x+b
multi_target_ppn = MultiOutputClassifier(ppn) 
y_pred = multi_target_ppn.fit(x_train, y_train).predict(x_test)
print('Perceptron:')
print(classification_report(y_test,y_pred))
print('Accuracy classification score: %.2f' % accuracy_score(y_test,y_pred))
print('Average Hamming loss: %.2f' % hamming_loss(y_test,y_pred))
print('Jaccard similarity coefficient score: %.2f' % jaccard_similarity_score(y_test,y_pred))
print('Matthews correlation coefficient (MCC): %.2f' % matthews_corrcoef(y_test,y_pred))
print('Zero-one classification loss: %.2f' % zero_one_loss(y_test,y_pred))

# #===================SVM=========================
from sklearn.multioutput import MultiOutputClassifier
from sklearn import svm
#调用SVC()
clf = svm.SVC()
multi_target_clf = MultiOutputClassifier(clf)
y_pred = multi_target_clf.fit(x_train, y_train).predict(x_test)
print('SVM:')
print(classification_report(y_test,y_pred))
print('Accuracy classification score: %.2f' % accuracy_score(y_test,y_pred))
print('Average Hamming loss: %.2f' % hamming_loss(y_test,y_pred))
print('Jaccard similarity coefficient score: %.2f' % jaccard_similarity_score(y_test,y_pred))
Exemplo n.º 32
0
#
from sklearn.metrics import jaccard_similarity_score

# 1
st_1 = "dogs chase cats"
st_2 = "dogs hate cats"

# 2
st_1_wrds = set(st_1.split())
st_2_wrds = set(st_2.split())

unq_wrds = st_1_wrds.union(st_2_wrds)

a = [1 if w in st_1_wrds else 0 for w in unq_wrds]
b = [1 if w in st_2_wrds else 0 for w in unq_wrds]

print a
print b
print jaccard_similarity_score(a, b)
def jaccardCoefficientLavaMD(errListJaccard):
    expected = []
    read = []
    for err in errListJaccard:
        try:
            readGStr = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!f', err[2]))
            expectedGStr = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!f', err[3]))
            readGStr2 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!f', err[4]))
            expectedGStr2 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!f', err[5]))
            readGStr3 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!f', err[6]))
            expectedGStr3 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!f', err[7]))
            readGStr4 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!f', err[8]))
            expectedGStr4 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!f', err[9]))
        except OverflowError:
            readGStr = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!d', err[2]))
            expectedGStr = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!d', err[3]))
            readGStr2 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!d', err[4]))
            expectedGStr2 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!d', err[5]))
            readGStr3 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!d', err[6]))
            expectedGStr3 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!d', err[7]))
            readGStr4 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!d', err[8]))
            expectedGStr4 = ''.join(
                bin(ord(c)).replace('0b', '').rjust(8, '0')
                for c in struct.pack('!d', err[9]))

        read.extend([n for n in readGStr])
        read.extend([n for n in readGStr2])
        read.extend([n for n in readGStr3])
        read.extend([n for n in readGStr4])
        expected.extend([n for n in expectedGStr])
        expected.extend([n for n in expectedGStr2])
        expected.extend([n for n in expectedGStr3])
        expected.extend([n for n in expectedGStr4])

    try:
        jac = jaccard_similarity_score(expected, read)
        dissimilarity = float(1.0 - jac)
        return dissimilarity
    except:
        return None
def test(experiment_path, test_epoch):
    # ========= CONFIG FILE TO READ FROM =======
    config = configparser.RawConfigParser()
    config.read('./' + experiment_path + '/' + experiment_path + '_config.txt')
    # ===========================================
    # run the training on invariant or local
    path_data = config.get('data paths', 'path_local')
    model = config.get('training settings', 'model')
    # original test images (for FOV selection)
    DRIVE_test_imgs_original = path_data + config.get('data paths', 'test_imgs_original')
    test_imgs_orig = load_hdf5(DRIVE_test_imgs_original)
    full_img_height = test_imgs_orig.shape[2]
    full_img_width = test_imgs_orig.shape[3]
    # the border masks provided by the DRIVE
    DRIVE_test_border_masks = path_data + config.get('data paths', 'test_border_masks')
    test_border_masks = load_hdf5(DRIVE_test_border_masks)
    # dimension of the patches
    patch_height = int(config.get('data attributes', 'patch_height'))
    patch_width = int(config.get('data attributes', 'patch_width'))
    # the stride in case output with average
    stride_height = int(config.get('testing settings', 'stride_height'))
    stride_width = int(config.get('testing settings', 'stride_width'))
    assert (stride_height < patch_height and stride_width < patch_width)
    # model name
    name_experiment = config.get('experiment name', 'name')
    path_experiment = './' + name_experiment + '/'
    # N full images to be predicted
    Imgs_to_test = int(config.get('testing settings', 'full_images_to_test'))
    # Grouping of the predicted images
    N_visual = int(config.get('testing settings', 'N_group_visual'))
    # ====== average mode ===========
    average_mode = config.getboolean('testing settings', 'average_mode')
    #N_subimgs = int(config.get('training settings', 'N_subimgs'))
    #batch_size = int(config.get('training settings', 'batch_size'))
    #epoch_size = N_subimgs // (batch_size)
    # #ground truth
    # gtruth= path_data + config.get('data paths', 'test_groundTruth')
    # img_truth= load_hdf5(gtruth)
    # visualize(group_images(test_imgs_orig[0:20,:,:,:],5),'original')#.show()
    # visualize(group_images(test_border_masks[0:20,:,:,:],5),'borders')#.show()
    # visualize(group_images(img_truth[0:20,:,:,:],5),'gtruth')#.show()

    # ============ Load the data and divide in patches
    patches_imgs_test = None
    new_height = None
    new_width = None
    masks_test = None
    patches_masks_test = None

    if average_mode == True:
        patches_imgs_test, new_height, new_width, masks_test= get_data_testing_overlap(
            DRIVE_test_imgs_original = DRIVE_test_imgs_original, #original'DRIVE_datasets_training_testing/test_hard_masks.npy'
            DRIVE_test_groudTruth = path_data + config.get('data paths', 'test_groundTruth'),  #masks
            Imgs_to_test = int(config.get('testing settings', 'full_images_to_test')),
            patch_height = patch_height,
            patch_width = patch_width,
            stride_height = stride_height,
            stride_width = stride_width)
    else:
        patches_imgs_test, patches_masks_test = get_data_testing_test(
            DRIVE_test_imgs_original = DRIVE_test_imgs_original,  #original
            DRIVE_test_groudTruth = path_data + config.get('data paths', 'test_groundTruth'),  #masks
            Imgs_to_test = int(config.get('testing settings', 'full_images_to_test')),
            patch_height = patch_height,
            patch_width = patch_width
        )
    #np.save(path_experiment + 'test_patches.npy', patches_imgs_test)
    #visualize(group_images(patches_imgs_test,100),'./'+name_experiment+'/'+"test_patches")

    # ================ Run the prediction of the patches ==================================
    best_last = config.get('testing settings', 'best_last')
    # Load the saved model
    if model == 'UNet':
        net = UNet(n_channels=1, n_classes=2)
    elif model == 'UNet_cat':
        net = UNet_cat(n_channels=1, n_classes=2)
    else:
        net = UNet_level4_our(n_channels=1, n_classes=2)
    # load data
    test_data = data.TensorDataset(torch.tensor(patches_imgs_test),torch.zeros(patches_imgs_test.shape[0]))
    test_loader = data.DataLoader(test_data, batch_size=1, pin_memory=True, shuffle=False)
    trained_model = path_experiment + 'DRIVE_' + str(test_epoch) + 'epoch.pth'
    print(trained_model)
    # trained_model= path_experiment+'DRIVE_unet2_B'+str(60*epoch_size)+'.pth'
    net.load_state_dict(torch.load(trained_model))
    net.eval()
    print('Finished loading model :' + trained_model)
    net = net.cuda()
    cudnn.benchmark = True
    # Calculate the predictions
    predictions_out = np.empty((patches_imgs_test.shape[0],patch_height*patch_width,2))
    for i_batch, (images, targets) in enumerate(test_loader):
        images = Variable(images.float().cuda())
        out1= net(images)

        pred = out1.permute(0,2,3,1)

        pred = F.softmax(pred, dim=-1)

        pred = pred.data.view(-1,patch_height*patch_width,2)

        predictions_out[i_batch] = pred

    # ===== Convert the prediction arrays in corresponding images
    pred_patches_out = pred_to_imgs(predictions_out, patch_height, patch_width, "original")
    #np.save(path_experiment + 'pred_patches_' + str(test_epoch) + "_epoch" + '.npy', pred_patches_out)
    #visualize(group_images(pred_patches_out,100),'./'+name_experiment+'/'+"pred_patches")


    #========== Elaborate and visualize the predicted images ====================
    pred_imgs_out = None
    orig_imgs = None
    gtruth_masks = None
    if average_mode == True:
        pred_imgs_out = recompone_overlap(pred_patches_out,new_height,new_width, stride_height, stride_width)
        orig_imgs = my_PreProc(test_imgs_orig[0:pred_imgs_out.shape[0],:,:,:])    #originals
        gtruth_masks = masks_test  #ground truth masks
    else:
        pred_imgs_out = recompone(pred_patches_out,10,9)       # predictions
        orig_imgs = recompone(patches_imgs_test,10,9)  # originals
        gtruth_masks = recompone(patches_masks_test,10,9)  #masks

    # apply the DRIVE masks on the repdictions #set everything outside the FOV to zero!!
    # DRIVE MASK  #only for visualization
    kill_border(pred_imgs_out, test_border_masks)
    # back to original dimensions
    orig_imgs = orig_imgs[:,:,0:full_img_height,0:full_img_width]
    pred_imgs_out = pred_imgs_out[:, :, 0:full_img_height, 0:full_img_width]
    gtruth_masks = gtruth_masks[:, :, 0:full_img_height, 0:full_img_width]

    print ("Orig imgs shape: "+str(orig_imgs.shape))
    print("pred imgs shape: " + str(pred_imgs_out.shape))
    print("Gtruth imgs shape: " + str(gtruth_masks.shape))
    np.save(path_experiment + 'pred_img_' + str(test_epoch) + "_epoch" + '.npy',pred_imgs_out)
    # visualize(group_images(orig_imgs,N_visual),path_experiment+"all_originals")#.show()
    if average_mode == True:
        visualize(group_images(pred_imgs_out, N_visual),
                  path_experiment + "all_predictions_" + str(test_epoch) + "thresh_epoch")
    else:
        visualize(group_images(pred_imgs_out, N_visual),
                  path_experiment + "all_predictions_" + str(test_epoch) + "epoch_no_average")
    visualize(group_images(gtruth_masks, N_visual), path_experiment + "all_groundTruths")

    # visualize results comparing mask and prediction:
    # assert (orig_imgs.shape[0] == pred_imgs_out.shape[0] and orig_imgs.shape[0] == gtruth_masks.shape[0])
    # N_predicted = orig_imgs.shape[0]
    # group = N_visual
    # assert (N_predicted%group == 0)
    

    # ====== Evaluate the results
    print("\n\n========  Evaluate the results =======================")
   
    # predictions only inside the FOV
    y_scores, y_true = pred_only_FOV(pred_imgs_out, gtruth_masks, test_border_masks)  # returns data only inside the FOV
    '''
    print("Calculating results only inside the FOV:")
    print("y scores pixels: " + str(
        y_scores.shape[0]) + " (radius 270: 270*270*3.14==228906), including background around retina: " + str(
        pred_imgs_out.shape[0] * pred_imgs_out.shape[2] * pred_imgs_out.shape[3]) + " (584*565==329960)")
    print("y true pixels: " + str(
        y_true.shape[0]) + " (radius 270: 270*270*3.14==228906), including background around retina: " + str(
        gtruth_masks.shape[2] * gtruth_masks.shape[3] * gtruth_masks.shape[0]) + " (584*565==329960)")
    '''
    # Area under the ROC curve
    fpr, tpr, thresholds = roc_curve((y_true), y_scores)
    AUC_ROC = roc_auc_score(y_true, y_scores)
    # test_integral = np.trapz(tpr,fpr) #trapz is numpy integration
    print("\nArea under the ROC curve: " + str(AUC_ROC))
    rOc_curve = plt.figure()
    plt.plot(fpr, tpr, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC)
    plt.title('ROC curve')
    plt.xlabel("FPR (False Positive Rate)")
    plt.ylabel("TPR (True Positive Rate)")
    plt.legend(loc="lower right")
    plt.savefig(path_experiment + "ROC.png")

    # Precision-recall curve
    precision, recall, thresholds = precision_recall_curve(y_true, y_scores)
    precision = np.fliplr([precision])[0]  # so the array is increasing (you won't get negative AUC)
    recall = np.fliplr([recall])[0]  # so the array is increasing (you won't get negative AUC)
    AUC_prec_rec = np.trapz(precision, recall)
    print("\nArea under Precision-Recall curve: " + str(AUC_prec_rec))
    prec_rec_curve = plt.figure()
    plt.plot(recall, precision, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_prec_rec)
    plt.title('Precision - Recall curve')
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.legend(loc="lower right")
    plt.savefig(path_experiment + "Precision_recall.png")

    # Confusion matrix
    threshold_confusion = 0.5
    print("\nConfusion matrix:  Custom threshold (for positive) of " + str(threshold_confusion))
    y_pred = np.empty((y_scores.shape[0]))
    for i in range(y_scores.shape[0]):
        if y_scores[i] >= threshold_confusion:
            y_pred[i] = 1
        else:
            y_pred[i] = 0
    confusion = confusion_matrix(y_true, y_pred)
    print(confusion)
    accuracy = 0
    if float(np.sum(confusion)) != 0:
        accuracy = float(confusion[0, 0] + confusion[1, 1]) / float(np.sum(confusion))
    print("Global Accuracy: " + str(accuracy))
    specificity = 0
    if float(confusion[0, 0] + confusion[0, 1]) != 0:
        specificity = float(confusion[0, 0]) / float(confusion[0, 0] + confusion[0, 1])
    print("Specificity: " + str(specificity))
    sensitivity = 0
    if float(confusion[1, 1] + confusion[1, 0]) != 0:
        sensitivity = float(confusion[1, 1]) / float(confusion[1, 1] + confusion[1, 0])
    print("Sensitivity: " + str(sensitivity))
    precision = 0
    if float(confusion[1, 1] + confusion[0, 1]) != 0:
        precision = float(confusion[1, 1]) / float(confusion[1, 1] + confusion[0, 1])
    print("Precision: " + str(precision))

    # Jaccard similarity index
    jaccard_index = jaccard_similarity_score(y_true, y_pred, normalize=True)
    print("\nJaccard similarity score: " + str(jaccard_index))

    # F1 score
    F1_score = f1_score(y_true, y_pred, labels=None, average='binary', sample_weight=None)
    print("\nF1 score (F-measure): " + str(F1_score))
    ####evaluate the thin vessels
    thin_3pixel_recall_indivi = []
    thin_3pixel_auc_roc = []
    for j in range(pred_imgs_out.shape[0]):
        thick3=opening(gtruth_masks[j, 0, :, :], square(3))
        thin_gt = gtruth_masks[j, 0, :, :] - thick3
        
        thin_pred=pred_imgs_out[j, 0, :, :]
        
        thin_pred[thick3==1]=0
        thin_3pixel_recall_indivi.append(round(thin_recall(thin_gt, pred_imgs_out[j, 0, :, :], thresh=0.5), 4))
        thin_3pixel_auc_roc.append(round(roc_auc_score(thin_gt.flatten(), thin_pred.flatten()), 4))
    thin_2pixel_recall_indivi = []
    thin_2pixel_auc_roc = []
    for j in range(pred_imgs_out.shape[0]):
        thick=opening(gtruth_masks[j, 0, :, :], square(2))
        thin_gt = gtruth_masks[j, 0, :, :] - thick
        #thin_gt_only=thin_gt[thin_gt==1]
        #print(thin_gt_only)
        thin_pred=pred_imgs_out[j, 0, :, :]
        #thin_pred=thin_pred[thin_gt==1]
        thin_pred[thick==1]=0
        thin_2pixel_recall_indivi.append(round(thin_recall(thin_gt, pred_imgs_out[j, 0, :, :], thresh=0.5), 4))
        thin_2pixel_auc_roc.append(round(roc_auc_score(thin_gt.flatten(), thin_pred.flatten()), 4))
    
    #print("thin 2vessel recall:", thin_2pixel_recall_indivi)
    #print('thin 2vessel auc score', thin_2pixel_auc_roc)
    # Save the results
    with open(path_experiment + 'test_performances_all_epochs.txt', mode='a') as f:
        f.write("\n\n" + path_experiment + " test epoch:" + str(test_epoch)
                + '\naverage mode is:' + str(average_mode)
                + "\nArea under the ROC curve: %.4f" % (AUC_ROC)
                + "\nArea under Precision-Recall curve: %.4f" % (AUC_prec_rec)
                + "\nJaccard similarity score: %.4f" % (jaccard_index)
                + "\nF1 score (F-measure): %.4f" % (F1_score)
                + "\nConfusion matrix:"
                + str(confusion)
                + "\nACCURACY: %.4f" % (accuracy)
                + "\nSENSITIVITY: %.4f" % (sensitivity)
                + "\nSPECIFICITY: %.4f" % (specificity)
                + "\nPRECISION: %.4f" % (precision)
                + "\nthin 2vessels recall indivi:\n" + str(thin_2pixel_recall_indivi)
                + "\nthin 2vessels recall mean:%.4f" % (np.mean(thin_2pixel_recall_indivi))
                + "\nthin 2vessels auc indivi:\n" + str(thin_2pixel_auc_roc)
                + "\nthin 2vessels auc score mean:%.4f" % (np.mean(thin_2pixel_auc_roc))
                + "\nthin 3vessels recall indivi:\n" + str(thin_3pixel_recall_indivi)
                + "\nthin 3vessels recall mean:%.4f" % (np.mean(thin_3pixel_recall_indivi))
                + "\nthin 3vessels auc indivi:\n" + str(thin_3pixel_auc_roc)
                + "\nthin 3vessels auc score mean:%.4f" % (np.mean(thin_3pixel_auc_roc))
                )
Exemplo n.º 35
0
def jacc(matt, template):
    res = jaccard_similarity_score(matt, template)
    return res
plt.xlabel('Number of Nabors (K)')
plt.tight_layout()
plt.show()

## Final Model uses k=7
k = 7
#Train Model and Predict
kNN_cls = KNeighborsClassifier(n_neighbors=k).fit(X_train, y_train)
kNN_cls

yhat_knn = kNN_cls.predict(X_test)  # Predict using Test Data
perckNN = metrics.accuracy_score(
    y_test, yhat_knn)  # store accuracy score in mean_acc array
print("KNN Accuracy percentage", perckNN)

JaccardkNN = jaccard_similarity_score(y_test, yhat_knn)
print("KNN Jaccard index: %.2f" % JaccardkNN)

F1ScorekNN = f1_score(y_test, yhat_knn, average='weighted')
print("KNN F1-score: %.2f" % F1ScorekNN)
'''************************************'''
''' Decision Tree  '''
'''*************************************'''

from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

loanTree = DecisionTreeClassifier(
    criterion="entropy", max_depth=5)  # Classify Decision Tree characteristics
loanTree.fit(X_train, y_train)  # Fit Decision Tree using training set
Exemplo n.º 37
0
from sklearn.datasets import fetch_rcv1
from global_variables import is_leaf_topic
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import jaccard_similarity_score

# Fetch the training dataset
train_data = fetch_rcv1(subset='train')

# Convert the scipy sparse matrix to a dense version usable by sklearn's functions
train_data.target = train_data.target.todense()
is_leaf_topic = np.array(is_leaf_topic)

# Subset the data to choose documents part of leaf nodes
train_data.target = train_data.target[:, is_leaf_topic]

# Train the classifier with the training data
classifier = RandomForestClassifier(n_estimators=10)
classifier.fit(train_data.data, train_data.target)

# Fetch the test data
test_data = fetch_rcv1(subset='test', random_state=42, shuffle=True)
test_data.data = test_data.data[0:1000, :]
test_data.target = test_data.target[0:1000, :]
test_data.target = test_data.target[:, is_leaf_topic]
test_data.target = test_data.target.todense()

test_predict = classifier.predict(test_data.data)

print("The Jaccard Similiarity Score is : " +
      str(jaccard_similarity_score(test_data.target, test_predict)))
    specificity = float(
        confusion[0, 0]) / float(confusion[0, 0] + confusion[0, 1])
print("Specificity: " + str(specificity))
sensitivity = 0
if float(confusion[1, 1] + confusion[1, 0]) != 0:
    sensitivity = float(
        confusion[1, 1]) / float(confusion[1, 1] + confusion[1, 0])
print("Sensitivity: " + str(sensitivity))
precision = 0
if float(confusion[1, 1] + confusion[0, 1]) != 0:
    precision = float(confusion[1,
                                1]) / float(confusion[1, 1] + confusion[0, 1])
print("Precision: " + str(precision))

#Jaccard similarity index
jaccard_index = jaccard_similarity_score(y_true, y_pred, normalize=True)
print("\nJaccard similarity score: " + str(jaccard_index))

#F1 score
F1_score = f1_score(y_true,
                    y_pred,
                    labels=None,
                    average='binary',
                    sample_weight=None)
print("\nF1 score (F-measure): " + str(F1_score))

#Save the results
file_perf = open(path_experiment + 'performances.txt', 'w')
file_perf.write("Area under the ROC curve: " + str(AUC_ROC) +
                "\nArea under Precision-Recall curve: " + str(AUC_prec_rec) +
                "\nJaccard similarity score: " + str(jaccard_index) +
import numpy as np
from sklearn.metrics import hamming_loss, jaccard_similarity_score

print(hamming_loss(np.array([[0.0, 1.0], [1.0, 1.0]]),
                   np.array([[0.0, 1.0], [1.0, 1.0]])))

print(hamming_loss(np.array([[0.0, 1.0], [1.0, 1.0]]),
                   np.array([[1.0, 1.0], [1.0, 1.0]])))

print(hamming_loss(np.array([[0.0, 1.0], [1.0, 1.0]]),
                   np.array([[0.0, 1.0], [1.0, 1.0]])))

print(jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]),
                               np.array([[0.0, 1.0], [1.0, 1.0]])))

print(jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]),
                                np.array([[1.0, 1.0], [1.0, 1.0]])))

print(jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]),
                                np.array([[1.0, 1.0], [0.0, 1.0]])))
Exemplo n.º 40
0
def jaccard_score(gt_lbl, mask):
    lbled_mask = label(mask > CELL_THRESHOLD, background=0)
    return jaccard_similarity_score(lbled_mask.flatten(), gt_lbl.flatten())
Exemplo n.º 41
0
        X, Y, center=False
    )  #center=True (the default) would not work ("ValueError: center=True only allowed for dense data") but should presumably work in general


Jaccardtable = np.zeros((146, 2))

for K in range(1, 146):
    data_X_selected = SelectKBest(score_func=f_regression,
                                  k=K).fit_transform(data_X, data_y)

    # Permutiere die Indizes von data zufällig beim Aufteilen in Trainings- und
    # Testdaten (90% und 10% von Gesamtdaten).
    perm = np.random.permutation(len(data))
    data_train_y = data_y[perm[:-len(data_y) // 10]]
    data_test_y = data_y[perm[-len(data_y) // 10:]]
    data_train_X = data_X_selected[perm[:-len(data_X_selected) // 10]]
    data_test_X = data_X_selected[perm[-len(data_X_selected) // 10:]]

    # Lernen und überprüfen mit Naive Bayes
    from sklearn.naive_bayes import GaussianNB
    from sklearn.metrics import jaccard_similarity_score
    nbayes = GaussianNB()
    nbayes.fit(data_train_X, data_train_y)
    expected = data_test_y
    predicted = nbayes.predict(data_test_X)
    Jaccardtable[K, 0] = K
    Jaccardtable[K, 1] = jaccard_similarity_score(expected, predicted)

np.savetxt("Jaccardtable_NBayes.txt", Jaccardtable)
print("done")
Exemplo n.º 42
0
        scheduler.step()

        # For each mini-batch...
        for batch, (data, labels) in enumerate(train_loader, 1):
            # Send to the GPU
            data = data.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(data)
            predictions = torch.argmax(outputs, 1)
            running_train_iou += metrics.jaccard_similarity_score(
                labels.cpu().numpy().flatten(),
                predictions.cpu().numpy().flatten())

            # Calculate loss
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            # Calculate gradients
            loss.backward()

            # for name, param in model.named_parameters():
            #     if param.requires_grad and param.grad is not None:
            #         print(name)
            #         print(torch.mean(torch.abs(param.grad)).item())
            #         print(torch.max(torch.abs(param.grad)).item())
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

#%%
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
print(classification_report(y_test, y_pred))
print("Accuracy :: ", accuracy_score(y_test, y_pred))

# F1 Score
print("F1-Score :: ", f1_score(y_test, y_pred, average="weighted"))

# jaccard_similarity_score
from sklearn.metrics import jaccard_similarity_score
jaccard_similarity_score(y_test, y_pred)
print("jaccard_similarity_score :: ",
      f1_score(y_test, y_pred, average="weighted"))
# =============================================================================
#
# # Dump the trained RandomForestClassifier with Pickle
# random_forest_classifier_filename = '../saved-pickles/random_forest_classifier.pkl'
# # Open the file to save as pkl file
# random_forest_classifier_pkl = open(random_forest_classifier_filename, 'wb')
# pickle.dump(random_forest_classifier, random_forest_classifier_pkl)
# # Close the pickle instances
# random_forest_classifier_pkl.close()
#
# =============================================================================

#Saving model
Exemplo n.º 44
0
    swc_filename = folderpath + '/' + data_file + '/' + data_file + '.tif.v3dpbd.swc'
    num_sample_nodes = 2
    imgs, labels, p_encoding, node_ids = sample_nodes_truth(
        swc_filename,
        img_filename,
        num_nodes_per_img=num_sample_nodes,
        child_step=1,
        vis_flag=False)
    n_ch, n_x, n_y, n_z = 1, 24, 24, 24
    batch = num_sample_nodes
    n_label = 48
    x_patch = np.zeros((batch, n_ch, n_x, n_y, n_z))
    y_patch = np.zeros((batch, n_label))
    x_patch[:, 0, 2:-1, 2:-1, 2:-1] = np.array(imgs)
    y_patch = np.array(labels)
    vis_enlarge_ratio = 5
    ypred = model.predict(x_patch, batch_size=10)
    ypred[ypred > 0.5] = 1
    ypred[ypred < 0.5] = 0
    scores = []
    for j in range(len(labels)):
        score1 = jaccard_similarity_score(labels[j], ypred[j, :])
        score2 = utils.smoothed_jaccard(labels[j], ypred[j, :])
        scores.append((score1, score2))
    print(scores)
    print(np.where(ypred[0, :] > 0))
    print(np.where(labels[0]))

#for x,y in data_generator_undirected(train_dir,traindatalist):
#    pass
Exemplo n.º 45
0
#Loading data
data=pd.read_csv('D:\heart.csv')

#Ceating feature and target data set
X=data[['age','sex','cp','trestbps','chol','thalach']].values
Y=data[['target']].values

print(data.dtypes)

#Creating training and testing data sets
from sklearn.model_selection import train_test_split
XTrain,XTest,YTrain,YTest=train_test_split(X,Y,test_size=0.2,random_state=4)
print('Shape of training set: ',XTrain.shape,YTrain.shape)
print('Shape of testing set: ',XTest.shape,YTest.shape)

#Preparing model
from sklearn import svm
Model=svm.SVC(kernel='rbf')
Model.fit(XTrain,YTrain)

#Using model for prediction
result=Model.predict(XTest)

#Evaluation of accuracy
from sklearn.metrics import jaccard_similarity_score
print('Jaccard similaity score of training set: ',jaccard_similarity_score(YTrain,Model.predict(XTrain)))
print('Jaccard similarity score of testing set: ',jaccard_similarity_score(YTest,result))

from sklearn.metrics import f1_score
print('F1 score of training set: ',f1_score(YTrain,Model.predict(XTrain),average='weighted'))
print('F1 score of testing set: ',f1_score(YTest,result,average='weighted'))
Exemplo n.º 46
0
yhat = clf.predict(X_test)

from sklearn.metrics import f1_score
f1_score(y_test,yhat,average="weighted")


# In[47]:


from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(C=0.01, solver="liblinear").fit(X_train,y_train)
yhat = LR.predict(X_test)
yhat_prob = LR.predict_proba(X_test)

from sklearn.metrics import jaccard_similarity_score
jaccard_similarity_score(y_test,yhat)


# In[48]:


from sklearn.metrics import jaccard_similarity_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss


# In[49]:


get_ipython().system('wget -O loan_test.csv https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/ML0101ENv3/labs/loan_test.csv')
Exemplo n.º 47
0
mae = mean_absolute_error(y_test, Y_test, multioutput='uniform_average')
# MAE output is non-negative floating point. The best value is 0.0.
print("Mean Absolute Error: {}".format(mae))

mse = mean_squared_error(y_test, Y_test, multioutput='uniform_average')
# MAE output is non-negative floating point. The best value is 0.0.
print("Mean Squared Error: {}".format(mse))

r2 = r2_score(y_test, Y_test)
# R^2 (coefficient of determination) regression score function.
# Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A constant model that always
# predicts the expected value of y, disregarding the input features, would get a R^2 score of 0.0.
print("R - Squared value: {}".format(r2))

print('What percent of predictions are same: {}'.format(jaccard_similarity_score(y_test, Y_test)))

# Confusion Matrix
print(metrics.confusion_matrix(y_test, Y_test))
print(metrics.classification_report(y_test, Y_test))

actual = y_train
predictions = model.predict(X_train)
false_positive_rate, true_positive_rate, thresholds = roc_curve(actual, predictions)
roc_auc = auc(false_positive_rate, true_positive_rate)
print("Area Under the curve is: {}".format(roc_auc))

plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',
label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
print (classification_report(y_test, yhat))


# In[28]:


from sklearn.metrics import f1_score
f1_score(y_test, yhat, average='weighted')


# In[29]:


from sklearn.metrics import jaccard_similarity_score
jaccard_similarity_score(y_test, yhat)


# # Decision Tree

# In[30]:


# Import the decision tree model

from sklearn.tree import DecisionTreeClassifier


# In[31]:

Exemplo n.º 49
0
def compute_performances_for_multiclass(y_test, y_test_predicted, class_names,
                                        performances):

    # Compute the accuracy classification score : return the fraction of correctly classified samples
    performances.accuracy_score_fraction = accuracy_score(y_test,
                                                          y_test_predicted,
                                                          normalize=True)
    # Compute the accuracy classification score : return return the number of correctly classified samples
    performances.accuracy_score_number = accuracy_score(y_test,
                                                        y_test_predicted,
                                                        normalize=False)

    # Print information in the console
    print("\nAccuracy classification score : ")
    print("         Fraction of correctly classified samples : %.2f" %
          performances.accuracy_score_fraction)
    print("         Number of correctly classified samples: %.2f" %
          performances.accuracy_score_number)

    # Compute the Cohen's kappa score
    performances.cohen_kappa_score = cohen_kappa_score(y_test,
                                                       y_test_predicted)

    # Print information in the console
    print("\nCohen's kappa score : %.2f" % performances.cohen_kappa_score)

    # Compute the confusion matrix without normalization
    performances.confusion_matrix_without_normalization = confusion_matrix(
        y_test, y_test_predicted)
    # Compute the confusion matrix with normalization
    performances.confusion_matrix_with_normalization = \
        performances.confusion_matrix_without_normalization.astype('float') \
        / performances.confusion_matrix_without_normalization.sum(axis=1)[:, np.newaxis]

    # Print information in the console
    print("\nConfusion matrix : ")
    print("     Confusion matrix without normalization : ")
    square_matrix_size = len(
        performances.confusion_matrix_without_normalization)
    for i in range(square_matrix_size):
        if i == 0:
            print('                 [' + np.array2string(
                performances.confusion_matrix_without_normalization[i]))
        elif i == square_matrix_size - 1:
            print('                  ' + np.array2string(
                performances.confusion_matrix_without_normalization[i]) + ']')
        else:
            print('                  ' + np.array2string(
                performances.confusion_matrix_without_normalization[i]))
    print("     Confusion matrix with normalization : ")
    square_matrix_size = len(performances.confusion_matrix_with_normalization)
    for i in range(square_matrix_size):
        if i == 0:
            print('                 [' + np.array2string(
                performances.confusion_matrix_with_normalization[i]))
        elif i == square_matrix_size - 1:
            print('                  ' + np.array2string(
                performances.confusion_matrix_with_normalization[i]) + ']')
        else:
            print('                  ' + np.array2string(
                performances.confusion_matrix_with_normalization[i]))

    # Compute the classification_report
    performances.classification_report = classification_report(
        y_test, y_test_predicted, target_names=class_names, digits=4)

    # Print information in the console
    print("\nclassification_report : ")
    print(performances.classification_report)

    # Compute the average Hamming loss
    performances.hamming_loss = hamming_loss(y_test, y_test_predicted)

    # Print information in the console
    print("\nAverage Hamming loss : %.2f" % performances.hamming_loss)

    # Compute the Jaccard similarity coefficient score with normalization
    performances.jaccard_similarity_score_with_normalization = jaccard_similarity_score(
        y_test, y_test_predicted, normalize=True)
    # Compute the Jaccard similarity coefficient score without normalization
    performances.jaccard_similarity_score_without_normalization = jaccard_similarity_score(
        y_test, y_test_predicted, normalize=False)

    # Print information in the console
    print("\nJaccard similarity coefficient score : ")
    print("     Average of Jaccard similarity coefficient : %.2f" %
          performances.jaccard_similarity_score_with_normalization)
    print(
        "     Sum of the Jaccard similarity coefficient over the sample set : %.2f"
        % performances.jaccard_similarity_score_without_normalization)

    # Compute the precision
    performances.micro_precision = precision_score(y_test,
                                                   y_test_predicted,
                                                   average='micro')
    performances.macro_precision = precision_score(y_test,
                                                   y_test_predicted,
                                                   average='macro')
    performances.weighted_precision = precision_score(y_test,
                                                      y_test_predicted,
                                                      average='weighted')
    performances.none_precision = precision_score(y_test,
                                                  y_test_predicted,
                                                  average=None)

    # Print information in the console
    print("\nPrecision score : ")
    print("     micro : %.2f" % performances.micro_precision)
    print("     macro : %.2f" % performances.macro_precision)
    print("     weighted : %.2f" % performances.weighted_precision)
    print("     None : " + np.array2string(performances.none_precision))
    print("     Classes : " + np.array2string(class_names))

    # Compute the recall
    performances.micro_recall = recall_score(y_test,
                                             y_test_predicted,
                                             average='micro')
    performances.macro_recall = recall_score(y_test,
                                             y_test_predicted,
                                             average='macro')
    performances.weighted_recall = recall_score(y_test,
                                                y_test_predicted,
                                                average='weighted')
    performances.none_recall = recall_score(y_test,
                                            y_test_predicted,
                                            average=None)

    # Print information in the console
    print("\nRecall score : ")
    print("     micro : %.2f" % performances.micro_recall)
    print("     macro : %.2f" % performances.macro_recall)
    print("     weighted : %.2f" % performances.weighted_recall)
    print("     None : " + np.array2string(performances.none_recall))
    print("     Classes : " + np.array2string(class_names))

    # Compute the F1 score
    performances.micro_f1_score = f1_score(y_test,
                                           y_test_predicted,
                                           average='micro')
    performances.macro_f1_score = f1_score(y_test,
                                           y_test_predicted,
                                           average='macro')
    performances.weighted_f1_score = f1_score(y_test,
                                              y_test_predicted,
                                              average='weighted')
    performances.none_f1_score = f1_score(y_test,
                                          y_test_predicted,
                                          average=None)

    # Print information in the console
    print("\nF1-score : ")
    print("     micro : %.2f" % performances.micro_f1_score)
    print("     macro : %.2f" % performances.macro_f1_score)
    print("     weighted : %.2f" % performances.weighted_f1_score)
    print("     None : " + np.array2string(performances.none_f1_score))
    print("     Classes : " + np.array2string(class_names))

    # Compute the Matthews correlation coefficient
    performances.matthews_corrcoef = matthews_corrcoef(y_test,
                                                       y_test_predicted)

    # Print information in the console
    print("\nMatthews correlation coefficient : %.2f" %
          performances.matthews_corrcoef)

    return performances
Exemplo n.º 50
0
    ap = argparse.ArgumentParser()
    ap.add_argument("-p",
                    "--params",
                    required=True,
                    help="Path to store all the configurable variables")
    args = vars(ap.parse_args())
    params = args["params"]

    search = SearchImage(params)
    search.setMaterialCode()
    search.processQueryImage()
    print(search.materialCode)
    tmpList = sorted(search.materialCode.items(), key=lambda x: x[1])
    for i in tmpList:
        search.targetNames.append(i[0])
    print("Actual Values :")
    print(search.trueList)
    print("\nPredicted Values :")
    print(search.predList)
    print("\nConfusion Matrix")
    print(confusion_matrix(search.trueList, search.predList))
    print("\nClassification Report")
    print(
        classification_report(search.trueList,
                              search.predList,
                              target_names=search.targetNames))
    print("\nAccuracy Score")
    print(accuracy_score(search.trueList, search.predList))
    print("\nJaccard Similarity Score")
    print(jaccard_similarity_score(search.trueList, search.predList))
def evaluate_model_svm(x, y, learn_path, k=10, thresh=0.5):
    print(len(y), len(y[0]))
    # create a k fold with no unique classes
    count = 0
    while True:
        count += 1
        # print(count, 'Finding a proper KF...')
        kf = list(
            KFold(n_splits=k, shuffle=True,
                  random_state=randint(0, 100000)).split(x))
        good_folds = True
        for train_index, test_index in kf:
            for i in range(len(y[0])):
                if len(np.unique(
                        y[train_index,
                          i])) < 2:  # or len(np.unique(y[test_index, i])) < 2:
                    # print(y[train_index, i],np.unique(y[train_index, i]))
                    print(i)
                    good_folds = False
                    break
            if not good_folds:
                break
        if good_folds:
            break
    print('Found a good KF in', count, 'try!')

    with open(learn_path + 'topic_classifier-folds.pkl', 'wb') as out_file:
        pickle.dump(kf, out_file)
    fold_num = 0

    stats = QuickDataFrame([
        'Jaccard (normalised)', 'Accuracy (normalised)', 'Accuracy',
        'F1_score (micro averaged)', 'F1_score (macro averaged by labels)',
        'F1_score (averaged by samples)', 'Hamming loss', 'Label Ranking loss:'
    ])

    prog = Progresser(k)
    for train_index, test_index in kf:
        # print(train_index, test_index)
        print('___________________________________________________')
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # cls = SVC(kernel='linear')
        # cls = SVC(kernel='poly', probability=True, tol=1e-5)
        cls = SVC(kernel='linear', probability=True, tol=1e-5)
        # cls = GaussianNB()
        # cls = RandomForestClassifier(max_features='auto', random_state=1)

        topic_classifier = BinaryRelevance(classifier=cls,
                                           require_dense=[True, True])

        try:
            topic_classifier.fit(x_train, y_train)
        except Exception as e:
            print('\nfit error!:', e)
            continue

        # with open(learn_path + 'topic_classifier-SVC' + str(fold_num) + '.pkl', 'wb') as out_file:
        #     pickle.dump(topic_classifier, out_file)

        try:
            # predictions = topic_classifier.predict(x_test)
            predictions = np.zeros((len(x_test), y.shape[1]))
            preds = topic_classifier.predict_proba(x_test)
            for i in range(len(x_test)):
                for j in range(y.shape[1]):
                    predictions[i, j] = 1.0 if preds[i, j] > thresh else 0.0
            s = [
                jaccard_similarity_score(y_test, predictions, normalize=True),
                accuracy_score(y_test, predictions, normalize=True),
                accuracy_score(y_test, predictions, normalize=False),
                f1_score(y_test, predictions, average='micro'),
                f1_score(y_test, predictions, average='macro'),
                f1_score(y_test, predictions, average='samples'),
                hamming_loss(y_test, predictions),
                label_ranking_loss(y_test, predictions)
            ]

            stats.append(s)
            print(stats[stats.length - 1])
        except Exception as e:
            print('Eval error!:', e)

        fold_num += 1
        prog.count()

    for col in stats.cols:
        print(col, np.mean(stats[col]))
Exemplo n.º 52
0
 def _get_jaccard_index(self, test_row_obs_list, train_col_obs_list, jaccard_similarity_score):
     test_row_obs_list = [item for sublist in test_row_obs_list for item in sublist]
     train_col_obs_list = [item for sublist in train_col_obs_list for item in sublist]
     jacc_sim_score = jaccard_similarity_score(test_row_obs_list, train_col_obs_list)
     return jacc_sim_score
Exemplo n.º 53
0
                                              x='Clump',
                                              y='UnifSize',
                                              color='Yellow',
                                              label="benign",
                                              ax=ax)
plt.show()

cancer_df = cancer_df[pd.to_numeric(cancer_df['BareNuc'],
                                    errors='coerce').notnull()]
cancer_df['BareNuc'] = cancer_df['BareNuc'].astype('int')

features_df = cancer_df[[
    'Clump', 'UnifSize', 'UnifShape', 'MargAdh', 'SingEpiSize', 'BareNuc',
    'BlandChrom', 'NormNucl', 'Mit'
]]
X = np.asanyarray(features_df)

cancer_df['Class'] = cancer_df['Class'].astype('int')
y = np.asanyarray(cancer_df['Class'])
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=4)

clf = svm.SVC(kernel='rbf')
clf.fit(X_train, y_train)

yhat = clf.predict(X_test)
print('f1_score', f1_score(y_test, yhat, average='weighted'))
print('jaccard_similarity_score', jaccard_similarity_score(y_test, yhat))
Exemplo n.º 54
0
    neigh = KNeighborsClassifier(n_neighbors=n).fit(x_train, y_train)
    yhat = neigh.predict(x_test)
    mean_acc[n - 1] = metrics.accuracy_score(y_test, yhat)

    std_acc[n - 1] = np.std(yhat == y_test) / np.sqrt(yhat.shape[0])
print("The best accuracy was with", mean_acc.max(), "with k=",
      mean_acc.argmax() + 1)

#Using SVM Model
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC

svm_model = SVC(kernel='linear', random_state=0)
svm_model.fit(x_train, y_train)
y_predict = svm_model.predict(x_test)
cm = confusion_matrix(y_test, y_predict)

#model improvisation
min_train = x_train.min()
range_train = (x_train - min_train).max()
x_train_scaled = (x_train - min_train) / range_train

from sklearn.metrics import f1_score

f1_score(y_test, yhat, average='weighted')

from sklearn.metrics import jaccard_similarity_score

jaccard_similarity_score(y_test, yhat)
Exemplo n.º 55
0
# In[34]:

test_df = pd.read_csv('loan_test.csv')
test_df.head()

# In[35]:

y_test_evaluation = test_df['loan_status'].values

# In[36]:

yhat_knn = yhat_knn[:54]
f1_score_knn = f1_score(y_test_evaluation, yhat_knn, average='weighted')
f1_score_knn

jaccard_score_knn = jaccard_similarity_score(y_test_evaluation, yhat_knn)
jaccard_score_knn

# In[37]:

predTree = predTree[:54]
f1_score_tree = f1_score(y_test_evaluation, predTree, average='weighted')
f1_score_tree

jaccard_score_tree = jaccard_similarity_score(y_test_evaluation, predTree)
jaccard_score_tree

# In[38]:

yhat_vector = yhat_vector[:54]
f1_score_vector = f1_score(y_test_evaluation, yhat_vector, average='weighted')
Exemplo n.º 56
0
def test_multilabel_jaccard_similarity_score():
    # Dense label indicator matrix format
    y1 = np.array([[0, 1, 1], [1, 0, 1]])
    y2 = np.array([[0, 0, 1], [1, 0, 1]])

    # size(y1 \inter y2) = [1, 2]
    # size(y1 \union y2) = [2, 2]

    assert_equal(jaccard_similarity_score(y1, y2), 0.75)
    assert_equal(jaccard_similarity_score(y1, y1), 1)
    assert_equal(jaccard_similarity_score(y2, y2), 1)
    assert_equal(jaccard_similarity_score(y2, np.logical_not(y2)), 0)
    assert_equal(jaccard_similarity_score(y1, np.logical_not(y1)), 0)
    assert_equal(jaccard_similarity_score(y1, np.zeros(y1.shape)), 0)
    assert_equal(jaccard_similarity_score(y2, np.zeros(y1.shape)), 0)

    with ignore_warnings():  # sequence of sequences is deprecated
        # List of tuple of label
        y1 = [(1, 2,), (0, 2,)]
        y2 = [(2,), (0, 2,)]

        assert_equal(jaccard_similarity_score(y1, y2), 0.75)
        assert_equal(jaccard_similarity_score(y1, y1), 1)
        assert_equal(jaccard_similarity_score(y2, y2), 1)
        assert_equal(jaccard_similarity_score(y2, [(), ()]), 0)

        # |y3 inter y4 | = [0, 1, 1]
        # |y3 union y4 | = [2, 1, 3]
        y3 = [(0,), (1,), (3,)]
        y4 = [(4,), (4,), (5, 6)]
        assert_almost_equal(jaccard_similarity_score(y3, y4), 0)

        # |y5 inter y6 | = [0, 1, 1]
        # |y5 union y6 | = [2, 1, 3]
        y5 = [(0,), (1,), (2, 3)]
        y6 = [(1,), (1,), (2, 0)]

        assert_almost_equal(jaccard_similarity_score(y5, y6), (1 + 1 / 3) / 3)
#Split the training data randomly so that 15% of it will be used for testing accuracy
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.15, random_state=432)
print ('Train set:', X_train.shape,  y_train.shape)
print ('Test set:', X_test.shape,  y_test.shape)


##############Training and Perfomance analysis##############
LR = LogisticRegression(C=0.015, solver='liblinear').fit(X_train,y_train)

print(LR)

yhat = LR.predict(X_test)
yhat_prob = LR.predict_proba(X_test)

print("Jaccard Index: ", jaccard_similarity_score(y_test, yhat))
    
print("Confusion Matrix:\n", confusion_matrix(y_test, yhat, labels=[1,0]))

# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test, yhat, labels=[1,0])
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=['Survived=1','Survived=0'],normalize= False,  title='Confusion matrix')
#plt.show()

print (classification_report(y_test, yhat))

print("Log loss: ", log_loss(y_test, yhat_prob))
Exemplo n.º 58
0
def add_weight(user_to_video_crosstab, active_user_data):
    # add_weight : input = crosstab, output = weighted crosstab
    active_user_data_attribute = []

    ac_user_num = active_user_data.iloc[0]['user_num']
    ac_user_sex = active_user_data.iloc[0]['sex']
    ac_user_hp = active_user_data.iloc[0]['health_point']
    ac_user_label = active_user_data.iloc[0]['label']

    if ac_user_sex == 'f':
        active_user_data_attribute += [51]
        if ac_user_hp >= 40:
            active_user_data_attribute += [convert_cate_num('h')]
        elif ac_user_hp < 40 and ac_user_hp >= 33:
            active_user_data_attribute += [convert_cate_num('m')]
        else:
            active_user_data_attribute += [convert_cate_num('l')]

    else:
        active_user_data_attribute += [52]
        if ac_user_hp >= 50:
            active_user_data_attribute += [convert_cate_num('h')]
        elif ac_user_hp < 50 and ac_user_hp >= 44:
            active_user_data_attribute += [convert_cate_num('m')]
        else:
            active_user_data_attribute += [convert_cate_num('l')]

    active_user_data_attribute += [
        convert_cate_num(active_user_data.iloc[0]['bodypart'])
    ]
    # active_user_data_attribute = [sex, level, bodypart] -> compared with video attribute

    vid_weight_exponent = pd.DataFrame(0,
                                       index=['weight_num'],
                                       columns=user_to_video_crosstab.columns)

    for i in vid_weight_exponent.columns:
        MyDB.execute(
            'select sex, level, bodypart from ROUTINE where video_num = %s UNION select sex, level, bodypart from EXERCISE where video_num = %s'
            % (str(i), str(i)))
        attribute_list = list(MyDB.fetchone())
        # ['sex', 'level', 'bodypart']

        for j in range(3):
            if j == 0:
                if attribute_list[j] == 'm':
                    attribute_list[j] = 52
                elif attribute_list[j] == 'f':
                    attribute_list[j] = 51
                else:
                    attribute_list[j] = convert_cate_num(attribute_list[j])
            else:
                attribute_list[j] = convert_cate_num(attribute_list[j])

        vid_weight_exponent.loc[
            'weight_num',
            i] = vid_weight_exponent.iloc[0][i] + 3 - jaccard_similarity_score(
                active_user_data_attribute, attribute_list, normalize=False)

    for i in vid_weight_exponent.columns:
        n = vid_weight_exponent.loc['weight_num'][i]
        # arithmetic sequence = a_n
        # a_n+1 = a_n * 0.8 + 0.3 (not more than 1.5)
        a_n = 1.5 - (0.4 * (0.8**(n - 1)))

        if n == 0:
            vid_weight_exponent.loc['weight_num', i] = 0.3
        else:
            vid_weight_exponent.loc['weight_num', i] = a_n

        user_to_video_crosstab.loc[:, i] += vid_weight_exponent.loc[
            'weight_num'][i]

    MyDB.execute('select user_num from USER where label = ' +
                 str(ac_user_label))
    same_label_user = MyDB.fetchall()
    same_label_user = [i[0] for i in same_label_user]

    for i in user_to_video_crosstab.index:
        if i in same_label_user:
            user_to_video_crosstab.loc[i] += 0.3

    # weight to video watched 7 days ago
    temp_time = datetime.now()
    timegap = timedelta(days=7)
    temp_time = temp_time.date() - timegap

    MyDB.execute('select video_num from HISTORY where user_num = ' +
                 str(ac_user_num) + ' and time >= \'%s\'' % temp_time)
    watched_video = MyDB.fetchall()
    watched_Video = [i[0] for i in watched_video]

    for i in user_to_video_crosstab.columns:
        if i in watched_video:
            user_to_video_crosstab.loc[:, i] += 0.1

    return user_to_video_crosstab
def j_score(yTrue, yPred):
     js=[]
     for yT, yP in zip(yTrue, yPred):
          js.append(jaccard_similarity_score((yT>0.1).flatten(), (yP>0.1).flatten()))
     js = np.stack(js)
     return np.mean(js)
Exemplo n.º 60
0
def get_jaccard_index(y_true, y_pred):
    # Jaccard similarity index
    jaccard_index = jaccard_similarity_score(y_true, y_pred, normalize=True)
    print("Jaccard similarity score: " + str(jaccard_index))
    return jaccard_index