Exemplo n.º 1
0
def check_alternative_lrap_implementation(lrap_score, n_classes=5,
                                          n_samples=20, random_state=0):
    _, y_true = make_multilabel_classification(n_features=1,
                                               allow_unlabeled=False,
                                               random_state=random_state,
                                               n_classes=n_classes,
                                               n_samples=n_samples)

    # Score with ties
    y_score = sparse_random_matrix(n_components=y_true.shape[0],
                                   n_features=y_true.shape[1],
                                   random_state=random_state)

    if hasattr(y_score, "toarray"):
        y_score = y_score.toarray()
    score_lrap = label_ranking_average_precision_score(y_true, y_score)
    score_my_lrap = _my_lrap(y_true, y_score)
    assert_almost_equal(score_lrap, score_my_lrap)

    # Uniform score
    random_state = check_random_state(random_state)
    y_score = random_state.uniform(size=(n_samples, n_classes))
    score_lrap = label_ranking_average_precision_score(y_true, y_score)
    score_my_lrap = _my_lrap(y_true, y_score)
    assert_almost_equal(score_lrap, score_my_lrap)
Exemplo n.º 2
0
 def label_ranking_average_precision_score(self, predictor, batch_size=50):
     from sklearn.metrics import label_ranking_average_precision_score 
     # 计算predict
     p = []
     for xq_batch, xa_batch, _ in super(QaPairsTest, self).sampling(batch_size):
         delta = predictor(xq_batch, xa_batch)
         p += delta[0].tolist()
     p = np.array(p)
     # 筛选可以用来评估的样本
     # 1. 没有正例无法计算得分
     # 2. 没有负例评分没有意义
     map_record = []
     skip1 = 0
     skip2 = 0
     for question, entry in self.questions.items():
         idx = np.array(entry['idx'])
         if self.y_np[idx].max() == 0:
             skip1 += 1
             continue
         if self.y_np[idx].min() != 0:
             skip2 += 1
             #continue
         score = p[idx].reshape(idx.shape).tolist()
         map = label_ranking_average_precision_score(np.array([entry['label']]), np.array([score]))
         map_record.append(map)
     logging.info('Skip1 %d Skip2 %d' % (skip1, skip2))
     return np.array(map_record).mean()
Exemplo n.º 3
0
    def forward(self, bottom, top):
        """Compute the label ranking average precision."""
        y_score = bottom[0].data
        y_true = bottom[1].data
        label_rank_avg_prec = metrics.label_ranking_average_precision_score(y_true, y_score)

        top[0].data[...] = label_rank_avg_prec
Exemplo n.º 4
0
 def label_ranking_average_precision_score2(self, model, batch_size=50): 
     def label_ranking_average_precision_score(label, score):
         assert len(label) == len(score)
         data = zip(label, score)
         data = sorted(data, key=lambda x:x[1],reverse=True)
         count = 0.0
         values = []
         for i in range(len(data)):
             if data[i][0]:
                 count += 1
                 values.append(count / (i + 1))
         assert len(values)
         return sum(values) / count, values[0]
     p = model.predict(
         {'q_input': self.xq_np, 'a_input':self.xa_np},
         batch_size=batch_size
     )
     map_record = []
     for question, entry in self.questions.items():
         idx = np.array(entry['idx'])
         if self.y_np[idx].max() == 0:
             continue
         score = p[idx].reshape(idx.shape).tolist()
         map, _ = label_ranking_average_precision_score(entry['label'], score)
         map_record.append(map)
         self.saveResult(question, map, score)
     map = np.array(map_record).mean()
     self.saveResult('__TOTAL_MAP__', map)
     return map
Exemplo n.º 5
0
def print_report(name_classificator, testing_problems, testing_tags, predicted_problems, predicted_tags):
    predicted_problems, predicted_tags = make_right_order(testing_problems, predicted_problems, predicted_tags)
    mlb = MultiLabelBinarizer().fit(testing_tags + predicted_tags)
    testing_tags = mlb.transform(testing_tags)
    predicted_tags = mlb.transform(predicted_tags)
    print(name_classificator)
    print(classification_report(testing_tags, predicted_tags, target_names=mlb.classes_))
    print('label ranking average precision score =',
          label_ranking_average_precision_score(testing_tags, predicted_tags))
    print('\n', ('#'*100), '\n')
Exemplo n.º 6
0
def evaluate_network(network, X_test, y_test, classes_names, length=1000, batch_size=64):
    resp = network.predict_proba(X_test[:length], batch_size=batch_size, verbose=False)
    resc = network.predict_classes(X_test[:length], batch_size=batch_size, verbose=False)

    a1 = []
    a2 = []
    cpt = 0
    cpt_on = []
    cpt_real = []
    cpt_should = 0
    should = []
    cpt_shouldnt = 0
    shouldnt = []
    for idx, i in enumerate(resc):
        a1.append(i)
        a2.append(np.array(y_test[idx]).argmax())
        if i.tolist() == [0, 0, 0, 0]:
            cpt += 1
            cpt_on.append(resp[idx].argmax())
            cpt_real.append(np.array(y_test[idx]).argmax())
            if cpt_on[-1] == cpt_real[-1]:
                cpt_should += 1
                should.append(resp[idx].argmax())
            else:
                cpt_shouldnt += 1
                shouldnt.append(resp[idx].argmax())
            # print(resp[idx])
    print("No decision: %d / %d  [%.02f%%]" % (cpt, len(resc), (cpt / float(len(resc))) * 100), end="")
    print(cpt_should, cpt_shouldnt)

    print("Accuracy: %.06f" % metrics.label_ranking_average_precision_score(y_test[:length], resp))

    cpt_on = np.array(cpt_on)
    print(metrics.classification_report(a1, a2, target_names=classes_names))

    print("Confusion matrix:")
    cm = confusion_matrix(a1, a2)
    print(cm)
    sns.set_style("ticks")
    sns.mpl.rc("figure", figsize=(8, 4))

    np.set_printoptions(precision=2)
    cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
    fig = plt.figure()
    plt.imshow(cm_normalized, interpolation="nearest", cmap=plt.cm.Blues)
    plt.title("Normalized confusion matrix")
    plt.colorbar()
    tick_marks = np.arange(len(classes_names))
    plt.xticks(tick_marks, classes_names, rotation=45)
    plt.yticks(tick_marks, classes_names)
    plt.ylabel("True label")
    plt.xlabel("Predicted label")
    plt.tick_params(which="both", direction="in", length=0)
    plt.show()
Exemplo n.º 7
0
 def ranking_precision(self):
     """
     Label ranking average precision (LRAP) is the average over each 
     ground truth label assigned to each sample, of the ratio of 
     true vs. total labels with lower score. This metric will yield 
     better scores if you are able to give better rank to the labels 
     associated with each sample. The obtained score is always strictly 
     greater than 0, and the best value is 1.
     """
     self.ranking_precision = metrics.label_ranking_average_precision_score(
         self.ground_truth, self.predictions_raw)
     return self.ranking_precision
Exemplo n.º 8
0
def multi_label_evaluate(y, y_prob, threshold):
    statistics = Statistics()
    y_pred = (y_prob >= threshold).astype(int)
    y_pred_50 = (y_prob >= 0.5).astype(int)

    ranking_loss = label_ranking_loss(y, y_pred)
    lraps = label_ranking_average_precision_score(y, y_pred)
    ranking_loss_50 = label_ranking_loss(y, y_pred_50)
    lraps_50 = label_ranking_average_precision_score(y, y_pred_50)

    f1_macro = f1_score(y, y_pred, average='macro')
    f1_macro_50 = f1_score(y, y_pred_50, average='macro')

    statistics.update_statistics("Multi-Label", "Ranking Loss", ranking_loss)
    statistics.update_statistics("Multi-Label", "Ranking Precision", lraps)
    statistics.update_statistics("Multi-Label", "Ranking Loss (t=0.5)", ranking_loss_50)
    statistics.update_statistics("Multi-Label", "Ranking Precision (t=0.5)", lraps_50)

    statistics.update_statistics("Multi-Label", "Macro F1", f1_macro)
    statistics.update_statistics("Multi-Label", "Macro F1 (t=0.5)", f1_macro_50)

    try:
        auc_macro = roc_auc_score(y, y_pred, average='macro')
        auc_macro_50 = roc_auc_score(y, y_pred_50, average='macro')
        auc_pr_macro = roc_auc_score(y, y_prob, average='macro')

        statistics.update_statistics("Multi-Label", "Macro AUC", auc_macro)
        statistics.update_statistics("Multi-Label", "Macro AUC (t=0.5)", auc_macro_50)
        statistics.update_statistics("Multi-Label", "Macro AUC (Pr)", auc_pr_macro)

    except ValueError:
        statistics.update_statistics("Multi-Label", "Macro AUC", np.NaN)
        statistics.update_statistics("Multi-Label", "Macro AUC (t=0.5)", np.NaN)
        statistics.update_statistics("Multi-Label", "Macro AUC (Pr)", np.NaN)

    return statistics
Exemplo n.º 9
0
def _generate_classification_reports(y_true, y_pred, target_names=None):
    # Calculate additional stats
    total_accuracy = accuracy_score(y_true, y_pred)
    cov_error = coverage_error(y_true, y_pred)
    lrap = label_ranking_average_precision_score(y_true, y_pred)

    report = metrics.multilabel_prediction_report(y_true, y_pred)
    report += '\n\n'
    report += metrics.multilabel_classification_report(y_true, y_pred, target_names=target_names)
    report += '\n\n'
    report += 'coverage error:  %.3f' % cov_error
    report += '\n'
    report += 'LRAP:            %.3f' % lrap
    report += '\n'
    report += 'total accuracy:  %.3f' % total_accuracy
    return report
Exemplo n.º 10
0
 def _local_evaluate(n_plain_t, n_plain_p):
     c = 0
     for idx, i in enumerate(n_plain_p):
         isit = False
         for idx2, x in enumerate(i):
             if x == 1 and x == n_plain_t[idx][idx2]:
                 isit = True
         if isit:
             c += 1
     acc = float(c) / len(n_plain_p)
     rps = metrics.label_ranking_average_precision_score(n_plain_t, n_plain_p)
     print(
         "\x1b[33mAccuracy: %.02f%%\x1b[0m [%d/%d], \x1b[33mRPS: %.03f\x1b[0m" % (acc * 100, c, len(n_plain_p), rps),
         end="",
     )
     return acc, rps
Exemplo n.º 11
0
def test_lrap_sample_weighting_zero_labels():
    # Degenerate sample labeling (e.g., zero labels for a sample) is a valid
    # special case for lrap (the sample is considered to achieve perfect
    # precision), but this case is not tested in test_common.
    # For these test samples, the APs are 0.5, 0.75, and 1.0 (default for zero
    # labels).
    y_true = np.array([[1, 0, 0, 0], [1, 0, 0, 1], [0, 0, 0, 0]],
                      dtype=np.bool)
    y_score = np.array([[0.3, 0.4, 0.2, 0.1], [0.1, 0.2, 0.3, 0.4],
                        [0.4, 0.3, 0.2, 0.1]])
    samplewise_lraps = np.array([0.5, 0.75, 1.0])
    sample_weight = np.array([1.0, 1.0, 0.0])

    assert_almost_equal(
        label_ranking_average_precision_score(y_true, y_score,
                                              sample_weight=sample_weight),
        np.sum(sample_weight * samplewise_lraps) / np.sum(sample_weight))
def evaluate(predictions, labels, threshold=0.4, multi_label=True):
    '''
        True Positive  :  Label : 1, Prediction : 1
        False Positive :  Label : 0, Prediction : 1
        False Negative :  Label : 0, Prediction : 0
        True Negative  :  Label : 1, Prediction : 0
        Precision      :  TP/(TP + FP)
        Recall         :  TP/(TP + FN)
        F Score        :  2.P.R/(P + R)
        Ranking Loss   :  The average number of label pairs that are incorrectly ordered given predictions
        Hammming Loss  :  The fraction of labels that are incorrectly predicted. (Hamming Distance between predictions and labels)
    '''
    assert predictions.shape == labels.shape, "Shapes: %s, %s" % (predictions.shape, labels.shape,)
    metrics = dict()
    if not multi_label:
        metrics['bae'] = BAE(labels, predictions)
        labels, predictions = np.argmax(labels, axis=1), np.argmax(predictions, axis=1)

        metrics['accuracy'] = accuracy_score(labels, predictions)
        metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], _ = \
            precision_recall_fscore_support(labels, predictions, average='micro')
        metrics['macro_precision'], metrics['macro_recall'], metrics['macro_f1'], metrics['coverage'], \
            metrics['average_precision'], metrics['ranking_loss'], metrics['pak'], metrics['hamming_loss'] \
            = 0, 0, 0, 0, 0, 0, 0, 0

    else:
        metrics['coverage'] = coverage_error(labels, predictions)
        metrics['average_precision'] = label_ranking_average_precision_score(labels, predictions)
        metrics['ranking_loss'] = label_ranking_loss(labels, predictions)
        
        for i in range(predictions.shape[0]):
            predictions[i, :][predictions[i, :] >= threshold] = 1
            predictions[i, :][predictions[i, :] < threshold] = 0

        metrics['bae'] = 0
        metrics['patk'] = patk(predictions, labels)
        metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], metrics['macro_precision'], \
            metrics['macro_recall'], metrics['macro_f1'] = bipartition_scores(labels, predictions)
    return metrics
Exemplo n.º 13
0
        if not check_outputs(outputs, False):
            warnings.warn(
                "Warning, the ouputs appear to have wrong values!!!!!")

        allPreds = np.append(allPreds, outputs, axis=0)
        allLabels = np.append(allLabels, labels, axis=0)

        total += labels.shape[0]
        numBatches += 1

    try:
        score_accuracy = accuracy_score(
            allLabels,
            indices_to_one_hot(allPreds.argmax(axis=1),
                               len(train_mnist.classes)))
        score_lwlrap = label_ranking_average_precision_score(
            allLabels, allPreds)
        score_mse = math.sqrt(mean_squared_error(allLabels, allPreds))
        score_pr_auc = average_precision_score(allLabels, allPreds)
        score_roc_auc = roc_auc_score(allLabels, allPreds)
    except ValueError as e:
        print("Soemthing wrong with evaluation")
        print(e)

    print("Accuracy =  %f" % (score_accuracy))
    print("Label ranking average precision for train =  %f" % (score_lwlrap))
    print("AUC_ROC score for train = %f" % (score_roc_auc))
    print("AUC_PR score for train = %f" % (score_pr_auc))
    print("MSE score for train = %f" % (score_mse))
def train_eval_dummy(dummy_strategy, train_df, eval_df, output_dirp):
    """
    Train and eval test a dummy model
    :param train_df:
    :param eval_df:
    :param output_dirp:
    :return:
    """
    print(train_df.head())

    # Write train
    Path(output_dirp).mkdir(parents=True, exist_ok=True)
    train_fp = Path(output_dirp) / "trainset.tsv"
    train_df.to_csv(train_fp, sep="\t", index=False)

    # reload train for testing
    train_df = pd.read_csv(train_fp,
                           sep="\t",
                           converters={"labels": literal_eval})
    # write and reload eval set for testing
    eval_fp = Path(output_dirp) / "testset.tsv"
    eval_df.to_csv(eval_fp, sep="\t", index=False)
    eval_df = pd.read_csv(eval_fp,
                          sep="\t",
                          converters={"labels": literal_eval})

    # Dataframe to sklearn matrix
    X_train = np.random.rand(train_df["text"].shape[0],
                             )  # random dummy data with same shape as train
    X_eval = np.random.rand(eval_df["text"].shape[0],
                            )  # random dummy data with same shape as train
    y_train = np.array(
        train_df["labels"].to_list())  # train labels in multihot np.array
    y_eval = np.array(
        eval_df["labels"].to_list())  # eval labels in multihot np.array

    # Define model
    if dummy_strategy == "constant":
        c = Counter(
            np.apply_along_axis(lambda x: str(x.tolist()), 1,
                                y_train).tolist())
        most_freq_nn = next(x[0] for x in c.most_common() if "1" in x[0])
        most_freq_nn = np.array(literal_eval(most_freq_nn))
        model = DummyClassifier(strategy=dummy_strategy,
                                constant=most_freq_nn,
                                random_state=settings.RANDOM_STATE)
    else:
        model = DummyClassifier(strategy=dummy_strategy,
                                random_state=settings.RANDOM_STATE)

    # Train the model
    print(f"Training dummy model with strategy: {dummy_strategy}")
    model.fit(X_train, y_train)

    # Evaluate the model on eval set
    y_pred = model.predict(X_eval)
    print(y_pred)
    result = {
        "LRAP": label_ranking_average_precision_score(y_eval, y_pred),
        "eval_loss": label_ranking_loss(y_eval, y_pred),
    }

    # Write model result and outputs
    eval_df["y_pred"] = y_pred.tolist()
    predictions_fp = Path(output_dirp) / "testset_with_predictions.tsv"
    eval_df.to_csv(predictions_fp, sep="\t", index=False)

    with open(Path(output_dirp) / "result.json", "wt") as result_out:
        json.dump(result, result_out)

    return result
Exemplo n.º 15
0
    allPreds = np.empty((0, 80), float)
    allLabels = np.empty((0, 80), int)

    for batch in trainloader:
        images = batch['spectrogram'].to(device)
        labels = batch['labels'].cpu().numpy()
        outputs = model(images).cpu().numpy()

        allPreds = np.append(allPreds, outputs, axis=0)
        allLabels = np.append(allLabels, labels, axis=0)

        total += labels.shape[0]
        numBatches += 1

    try:
        score_lwlrap = label_ranking_average_precision_score(binarize(allLabels, threshold=0.5), allPreds)
        score_roc_auc = roc_auc_score(binarize(allLabels, threshold=0.5), allPreds)
        score_pr_auc = average_precision_score(binarize(allLabels, threshold=0.5), allPreds)
        score_mse = math.sqrt(average_precision_score(allLabels, allPreds))
    except ValueError as e:
        print("Soemthing wrong with evaluation")
        print(e)

    print("Label ranking average precision for train =  %f" % (score_lwlrap / numBatches))
    print("AUC_ROC score for train = %f" % (score_roc_auc / numBatches))
    print("AUC_PR score for train = %f" % (score_pr_auc / numBatches))
    print("MSE score for train = %f" % (score_mse / numBatches))



## ------------------------------------------------------------------------
Exemplo n.º 16
0
def ranking_average_prescision(y, y_pred_probs):
   cntr, uc, u0c, dc, jm, pcd, fpc = fuzz.cluster.cmeans(X.T, 19, 2, error=0.005, maxiter=1000, init=None)
   u0p, up, dp, jm, pp, fpc =  fuzz.cluster.cmeans_predict(up, cntr, 4, error=0.0005, maxiter=1000)
   ranking_score = label_ranking_average_precision_score(y, u0p.T)
    def train_predict_all_output_files(self):
        to_lower_case = None  # Used to manage the lower case
        # parameter in the various configurations
        j = 1  # Iterator
        data_set_file_list = None  # Temporary variable used to build
        # the name of each configuration
        with_lower_case = "_with_to_lower_case."  # Variable used to
        # build the name of each configuration using conversion to
        # lower case
        without_lower_case = "_without_to_lower_case."  # Variable used
        # to build the name of each configuration not using conversion
        # to lower case
        temp = None  # Temporary variable used to build the name of
        # each configuration
        print(os.listdir(self._current_dir))  # Debug
        print(len(os.listdir(self._current_dir)))  # Debug
        for file in os.listdir(self._current_dir):
            #             if j == 2:
            #                 break
            print(file)  # Debug
            if file.endswith(".json") and \
            file != "cleaned_pre_processing_experiment_results.json":
                # print(os.path.join(self._current_dir, file)) # Debug
                start_time = time.time()  # We get the time expressed
                # in seconds since the epoch
                self._data_set_file = os.path.join( \
                self._current_dir, file)
                np.random.seed(0)  # We set the seed
                self._build_data_set()  # We build the data set
                self._data_set_file = file
                for to_lower_case in [False, True]:
                    # We iterate to manage the potential conversion
                    # to lower case
                    temp = self._data_set_file
                    data_set_file_list = self._data_set_file.split(".")
                    if to_lower_case:
                        self._data_set_file = \
                        data_set_file_list[0] + with_lower_case + \
                        data_set_file_list[1]
                    else:
                        self._data_set_file = \
                        data_set_file_list[0] + without_lower_case + \
                        data_set_file_list[1]
                    print_log("##### File name: {} #####" \
                              .format(self._data_set_file)) # Debug
                    print_log("--- {} seconds ---" \
                              .format(time.time() - start_time))
                    i = 1
                    for train_indices, val_indices in self._tscv.split(
                            self._train_set):
                        print_log("********* Evaluation on fold {} *********"\
                        .format(i)) # Debug

                        print_log(
                            "We count the occurrence of each term")  # Debug
                        count_vectorizer = CountVectorizer(lowercase=to_lower_case, \
                        token_pattern=u'(?u)\S+')
                        X_counts = count_vectorizer \
                        .fit_transform(self._train_set.iloc[train_indices]['text'].values)

                        print_log("Use of the TF-IDF model")  # Debug
                        tfidf_transformer = TfidfTransformer(use_idf=False, \
                        smooth_idf=False)
                        print_log(X_counts.shape)  # Debug

                        print_log(
                            "Computation of the weights of the TF-IDF model")
                        X_train = tfidf_transformer.fit_transform(X_counts)
                        y_train = self._train_set.iloc[train_indices][
                            'class'].values
                        print_log(X_train.shape)

                        print_log("--- {} seconds ---".format(time.time() -
                                                              start_time))

                        print_log("Training of the models")  # Debug
                        self._model.fit(X_train, y_train)

                        print_log("--- {} seconds ---".format(time.time() -
                                                              start_time))

                        print_log("We count the occurrence of " + \
                                  "each term in the val. set") # Debug
                        X_val_counts = count_vectorizer \
                        .transform(self._train_set.iloc[val_indices]['text'].values)
                        print_log("Computation of the weights of " + \
                                  "the TF-IDF model for the " + \
                                  "validation set") # Debug
                        X_val = tfidf_transformer. \
                        transform(X_val_counts)
                        y_val = self._train_set. \
                        iloc[val_indices]['class'].values
                        print_log("Making predictions")  # Debug

                        if i == 1:
                            self._configurations_accuracies[
                                self._data_set_file] = []
                            self._configurations_mrr_values[
                                self._data_set_file] = []
                        self._configurations_accuracies[self._data_set_file].append(\
                        np.mean(self._model.predict(X_val) == y_val))

                        found_function = False
                        try:
                            if callable(getattr(self._model, "predict_proba")):
                                #                                 print_log(self._model.classes_)
                                #                                 print_log(self._model.predict_proba(X_val))
                                lb = LabelBinarizer()
                                _ = lb.fit_transform(self._model.classes_)
                                #                                 print_log(lb.classes_)
                                #                                 print_log(y_classes_bin)
                                #                                 print_log(lb.transform(["X"]))
                                y_val_bin = lb.transform(y_val)
                                self._configurations_mrr_values[self._data_set_file].append(\
                                label_ranking_average_precision_score( \
                                y_val_bin, \
                                self._model.predict_proba(X_val)))
                                found_function = True
                        except AttributeError:
                            pass

                        try:
                            if not found_function and callable(
                                    getattr(self._model, "decision_function")):
                                #                                 print_log(self._model.classes_)
                                #                                 print_log(self._model.decision_function(X_val))
                                lb = LabelBinarizer()
                                _ = lb.fit_transform(self._model.classes_)
                                #                                 print_log(lb.classes_)
                                #                                 print_log(y_classes_bin)
                                #                                 print_log(lb.transform(["X"]))
                                y_val_bin = lb.transform(y_val)
                                self._configurations_mrr_values[self._data_set_file].append(\
                                label_ranking_average_precision_score( \
                                y_val_bin, \
                                self._model.decision_function(X_val)))
                                found_function = True
                        except AttributeError:
                            pass
                        print_log("Mean Reciprocal Rank:")
                        print_log(self._configurations_mrr_values[
                            self._data_set_file][-1])
                        print_log("--- {} seconds ---".format(time.time() -
                                                              start_time))

                        i += 1

                    self._data_set_file = temp

                print_log("*** File {} done ***".format(j))  # Debug
                j += 1

        self._results_to_save_to_a_file["avg_accuracy"] = {}
        self._results_to_save_to_a_file["avg_mrr"] = {}

        avg_accuracy = None
        avg_mrr = None

        # Below, we print the average accuracies
        for key, value in self._configurations_accuracies.items():
            print_log("Accuracy of {}".format(key))  # Debug
            print_log("Each fold")
            print_log(value)
            print_log("Average")
            avg_accuracy = sum(value) / len(value)
            self._results_to_save_to_a_file["avg_accuracy"][key] = \
            avg_accuracy
            print_log(avg_accuracy)  # Debug
            print_log("MRR of {}".format(key))  # Debug
            print_log("Each fold")
            mrr = self._configurations_mrr_values[key]
            print_log(mrr)
            print_log("Average")
            avg_mrr = sum(mrr) / len(mrr)
            self._results_to_save_to_a_file["avg_mrr"][key] = \
            avg_mrr
            print_log(avg_mrr)

        self._results_to_save_to_a_file["accuracy_per_fold"] = \
        self._configurations_accuracies
        self._results_to_save_to_a_file["mrr_per_fold"] = \
        self._configurations_mrr_values
        print_log("--- {} seconds ---".format(time.time() - start_time))
        self._train_set = None
        self._test_set = None
        self._configurations_accuracies = {}
        self._configurations_mrr_values = {}
Exemplo n.º 18
0
print('labels: \n', y_lables)
print('probabilities of each label: \n', y_probas)
print('(Nsamples, Nlables):', np.shape(y_lables))
print('Coverage Error: ', ce)
# 若将y_labels修改为如下所示,则第一行就有两个1了,一个排第2,一个排第3,我们取最大值则需要取3
y_lables = np.array([[1, 1, 0], [0, 0, 1]])
ce = sm.coverage_error(y_lables, y_probas)
print('labels: \n', y_lables)
print('Coverage Error with : ', ce)  # (3 + 3) / 2

print('{0:-^70}'.format('LRAP'))

# 计算LRAP, 就是将Coverage Error中的各中间值按调和平均进行计算
y_lables = np.array([[1, 0, 0], [0, 0, 1]])
# LRAP = 1/2 * (1/3 + 1/2) = 5/12 = 0.4166....
lrap = sm.label_ranking_average_precision_score(y_lables, y_probas)
print('labels: \n', y_lables)
print('probabilities of each label: \n', y_probas)
print('Label ranking average precision: ', lrap)

print('{0:-^70}'.format('Ranking Loss'))

# 对第一个样本来说,k只能取0,因为只有y[0, 0] = 1,
# 继而l也只能取2了,因为只有f[0, 0] < f[0, 2](0.75 < 1), 所以第一个样本的ranking loss为1
# 对第二个样本来说,k只能取2,只有y[1, 2] = 1, l可以取0和1都行,因为他们的概率都大于f[1, 2], 所以这一行为2
# 因为labels只包含0和1,所以|yi| = 2, labels总共有3个,所以Nlabels = 3
# 那么最终结果Ranking Loss = (1 / [2 * (3 - 2)] + 2 / [2 * (3 - 2)]) / 2 = 3/4 = 0.75
rl = sm.label_ranking_loss(y_lables, y_probas)
print('labels: \n', y_lables)
print('probabilities of each label: \n', y_probas)
print('Ranking loss: ', rl)
Exemplo n.º 19
0
    print(t, ':')
    print('    correct: ', end='')
    for idx in np.where(y_val[t] > 0.5)[0].tolist():
        sys.stdout.write('[' + ntagslist_sorted[idx] + '] ')
    print()
    print('  predicted: ', end='')
    for idx in np.where(predictions[t] > threshold)[0].tolist():
        sys.stdout.write('[' + ntagslist_sorted[idx] + '] ')
    print()

# Scikit-learn has some applicable performance [metrics]
# (http://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics)
# we can try:

print('Precision: {0:.3f} (threshold: {1:.2f})'.format(
    metrics.precision_score(y_val.flatten(),
                            predictions.flatten() > threshold), threshold))
print('Recall: {0:.3f} (threshold: {1:.2f})'.format(
    metrics.recall_score(y_val.flatten(),
                         predictions.flatten() > threshold), threshold))
print('F1 score: {0:.3f} (threshold: {1:.2f})'.format(
    metrics.f1_score(y_val.flatten(),
                     predictions.flatten() > threshold), threshold))

average_precision = metrics.average_precision_score(y_val.flatten(),
                                                    predictions.flatten())
print('Average precision: {0:.3f}'.format(average_precision))
print('Coverage: {0:.3f}'.format(metrics.coverage_error(y_val, predictions)))
print('LRAP: {0:.3f}'.format(
    metrics.label_ranking_average_precision_score(y_val, predictions)))
Exemplo n.º 20
0
def multilabel_metrics(pred_list, verbose, extra_vars, split):
    """
    Multiclass classification metrics. see multilabel ranking metrics in sklearn library for more info:
        http://scikit-learn.org/stable/modules/model_evaluation.html#multilabel-ranking-metrics

    :param pred_list: dictionary of hypothesis sentences
    :param verbose: if greater than 0 the metric measures are printed out
    :param extra_vars: extra variables
                        extra_vars['word2idx'] - dictionary mapping from words to indices
                        extra_vars['references'] - list of GT labels
    :param split: split on which we are evaluating
    :return: Dictionary of multilabel metrics
    """
    from sklearn import metrics as sklearn_metrics

    word2idx = extra_vars[split]['word2idx']

    # check if an additional dictionary matching raw to basic and general labels is provided
    # in that case a more general evaluation will be considered
    raw2basic = extra_vars[split].get('raw2basic', None)
    if raw2basic is not None:
        logging.info('Applying general evaluation with raw2basic dictionary.')

    if raw2basic is None:
        n_classes = len(word2idx)
    else:
        basic_values = set(raw2basic.values())
        n_classes = len(basic_values)
    n_samples = len(pred_list)

    # Create prediction matrix
    y_pred = np.zeros((n_samples, n_classes))
    for i_s, sample in list(enumerate(pred_list)):
        for word in sample:
            if raw2basic is None:
                y_pred[i_s, word2idx[word]] = 1
            else:
                word = word.strip()
                y_pred[i_s, raw2basic[word]] = 1

    # Prepare GT
    gt_list = extra_vars[split]['references']

    if raw2basic is None:
        y_gt = np.array(gt_list)
    else:
        idx2word = {v: k for k, v in iteritems(word2idx)}
        y_gt = np.zeros((n_samples, n_classes))
        for i_s, sample in list(enumerate(gt_list)):
            for raw_idx, is_active in list(enumerate(sample)):
                if is_active:
                    word = idx2word[raw_idx].strip()
                    y_gt[i_s, raw2basic[word]] = 1

    # Compute Coverage Error
    coverr = sklearn_metrics.coverage_error(y_gt, y_pred)
    # Compute Label Ranking AvgPrec
    avgprec = sklearn_metrics.label_ranking_average_precision_score(y_gt, y_pred)
    # Compute Label Ranking Loss
    rankloss = sklearn_metrics.label_ranking_loss(y_gt, y_pred)
    # Compute Precision, Recall and F1 score
    precision, recall, f1, _ = sklearn_metrics.precision_recall_fscore_support(y_gt, y_pred, average='micro')

    if verbose > 0:
        logging.info(
            '"coverage_error" (best: avg labels per sample = %f): %f' % (float(np.sum(y_gt)) / float(n_samples), coverr))
        logging.info('Label Ranking "average_precision" (best: 1.0): %f' % avgprec)
        logging.info('Label "ranking_loss" (best: 0.0): %f' % rankloss)
        logging.info('precision: %f' % precision)
        logging.info('recall: %f' % recall)
        logging.info('f1: %f' % f1)

    return {'coverage_error': coverr,
            'average_precision': avgprec,
            'ranking_loss': rankloss,
            'precision': precision,
            'recall': recall,
            'f1': f1}
Exemplo n.º 21
0
                            loss = criterion(output, tags)
                        loss.backward()
                        optimizer.step()

                        output_prob = output
                        if args.loss_type == "cross_entropy":
                            output_prob = F.softmax(output, dim=1)
                        elif args.loss_type == "bce":
                            output_prob = torch.sigmoid(output)

                        predict_vector = np.argmax(to_np(output_prob), axis=1)

                        tags_np = to_np(tags)
                        output_prob_np = to_np(output_prob)

                        ranking_ap_score = label_ranking_average_precision_score(
                            tags_np, output_prob_np)
                        ranking_loss = label_ranking_loss(
                            tags_np, output_prob_np)

                        label_vector = np.argmax(to_np(tags), axis=1)

                        bool_vector = predict_vector == label_vector
                        accuracy = bool_vector.sum() / len(bool_vector)

                        if batch_idx % args.log_interval == 0:
                            print(
                                'Train [{}] Batch {} / {}: Batch Loss {:2.4f} / Batch Acc {:2.4f} / Lank AP {:2.4f} / Lank Loss {:2.4f}'
                                .format(
                                    datetime.now().strftime(
                                        '%Y/%m/%d %H:%M:%S'), batch_idx,
                                    len(dataloader), loss.item(), accuracy,
Exemplo n.º 22
0
y_val = np.array(encoded_labels_df_val)

# Define model
linsvm = LinearSVC(loss='hinge')
#multi_class='ovr',
#verbose=True,
#max_iter=1000)
model = OneVsRestClassifier(linsvm, n_jobs=-1)

start = time.process_time()
model.fit(X_train, Y_train)
elapsed_fit = time.process_time() - start

print("Time to fit model (min):", elapsed_fit / 60)

start_predict = time.process_time()
### change
y_pred = model.decision_function(x_val)
elapsed_predict = time.process_time() - start_predict

print("Time to predict (min):", elapsed_predict / 60)

# Evaluate
### change
y_true = y_val
LRAP = label_ranking_average_precision_score(y_true, y_pred)

print("LRAP:", LRAP)

print(y_pred[0:3])
Exemplo n.º 23
0
def multilabel_metrics(pred_list, verbose, extra_vars, split):
    """
    Multiclass classification metrics. see multilabel ranking metrics in sklearn library for more info:
        http://scikit-learn.org/stable/modules/model_evaluation.html#multilabel-ranking-metrics

    :param pred_list: dictionary of hypothesis sentences
    :param verbose: if greater than 0 the metric measures are printed out
    :param extra_vars: extra variables
                        extra_vars['word2idx'] - dictionary mapping from words to indices
                        extra_vars['references'] - list of GT labels
    :param split: split on which we are evaluating
    :return: Dictionary of multilabel metrics
    """
    from sklearn import metrics as sklearn_metrics

    word2idx = extra_vars[split]['word2idx']

    # check if an additional dictionary matching raw to basic and general labels is provided
    # in that case a more general evaluation will be considered
    raw2basic = extra_vars[split].get('raw2basic', None)
    if raw2basic is not None:
        logger.info('Applying general evaluation with raw2basic dictionary.')

    if raw2basic is None:
        n_classes = len(word2idx)
    else:
        basic_values = set(raw2basic.values())
        n_classes = len(basic_values)
    n_samples = len(pred_list)

    # Create prediction matrix
    y_pred = np.zeros((n_samples, n_classes))
    for i_s, sample in list(enumerate(pred_list)):
        for word in sample:
            if raw2basic is None:
                y_pred[i_s, word2idx[word]] = 1
            else:
                word = word.strip()
                y_pred[i_s, raw2basic[word]] = 1

    # Prepare GT
    gt_list = extra_vars[split]['references']

    if raw2basic is None:
        y_gt = np.array(gt_list)
    else:
        idx2word = {v: k for k, v in iteritems(word2idx)}
        y_gt = np.zeros((n_samples, n_classes))
        for i_s, sample in list(enumerate(gt_list)):
            for raw_idx, is_active in list(enumerate(sample)):
                if is_active:
                    word = idx2word[raw_idx].strip()
                    y_gt[i_s, raw2basic[word]] = 1

    # Compute Coverage Error
    coverr = sklearn_metrics.coverage_error(y_gt, y_pred)
    # Compute Label Ranking AvgPrec
    avgprec = sklearn_metrics.label_ranking_average_precision_score(y_gt, y_pred)
    # Compute Label Ranking Loss
    rankloss = sklearn_metrics.label_ranking_loss(y_gt, y_pred)
    # Compute Precision, Recall and F1 score
    precision, recall, f1, _ = sklearn_metrics.precision_recall_fscore_support(y_gt, y_pred, average='micro')

    if verbose > 0:
        logger.info(
            '"coverage_error" (best: avg labels per sample = %f): %f' % (float(np.sum(y_gt)) / float(n_samples), coverr))
        logger.info('Label Ranking "average_precision" (best: 1.0): %f' % avgprec)
        logger.info('Label "ranking_loss" (best: 0.0): %f' % rankloss)
        logger.info('precision: %f' % precision)
        logger.info('recall: %f' % recall)
        logger.info('f1: %f' % f1)

    return {'coverage_error': coverr,
            'average_precision': avgprec,
            'ranking_loss': rankloss,
            'precision': precision,
            'recall': recall,
            'f1': f1}
Exemplo n.º 24
0
def stacking(cfg, files):

    print(list(files.keys()))
    ave_oof, ave_pred = average(cfg, files, True)
    tr_oof_files = [
        np.load(f'../output/{name}oof.npy')[:, :, np.newaxis]
        for name in files.keys()
    ] + [ave_oof[:, :, np.newaxis]]
    tr_oof = np.concatenate(tr_oof_files, axis=-1)
    test_files = [
        np.load(f'../output/{name}pred.npy')[:, :, np.newaxis]
        for name in files.keys()
    ] + [ave_pred[:, :, np.newaxis]]
    test_pred = np.concatenate(test_files, axis=-1)
    df = pd.read_csv(f'../input/train_curated.csv')
    y = split_and_label(df['labels'].values)

    mskfold = MultilabelStratifiedKFold(cfg.n_folds,
                                        shuffle=False,
                                        random_state=66666)
    folds = list(mskfold.split(y, y))

    predictions = np.zeros_like(test_pred)[:, :, 0]
    oof = np.zeros_like((y))
    for fold, (tr_idx, val_idx) in enumerate(folds):
        print('fold ', fold)
        if True:  # init
            K.clear_session()
            model = stacker(cfg, tr_oof.shape[2])
            best_epoch = 0
            best_score = -1

        for epoch in range(1000):
            if epoch - best_epoch > 15:
                break

            tr_x, tr_y = tr_oof[tr_idx], y[tr_idx]
            val_x, val_y = tr_oof[val_idx], y[val_idx]

            val_pred = model.predict(val_x)

            score = label_ranking_average_precision_score(val_y, val_pred)

            if score > best_score:
                best_score = score
                best_epoch = epoch
                oof[val_idx] = val_pred
                model.save_weights(f"../model/stacker{cfg.name}{fold}.h5")

            model.fit(x=tr_x, y=tr_y, batch_size=cfg.bs, verbose=0)
            print(f'{epoch} score {score} ,  best {best_score}...')

        model.load_weights(f"../model/stacker{cfg.name}{fold}.h5")
        predictions += model.predict(test_pred)

    print('lrap: ', label_ranking_average_precision_score(y, oof))
    predictions /= cfg.n_folds
    print(label_ranking_average_precision_score(y, oof))
    test = pd.read_csv('../input/sample_submission.csv')
    test.loc[:, test.columns[1:].tolist()] = predictions
    test.to_csv('submission.csv', index=False)
Exemplo n.º 25
0
def label_ranking_average_precision(targets, preds, target_threshold=0):
  targets, preds = to_numpy(targets), to_numpy(preds)
  if target_threshold is not None:
    targets = targets > target_threshold
  return metrics.label_ranking_average_precision_score(targets, preds)
Exemplo n.º 26
0
def eval_running_model(dataloader, test=False):
    loss_fct = CrossEntropyLoss()
    model.eval()
    eval_loss, eval_hit_times = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    r10 = r2 = r1 = r5 = 0
    mrr = []
    if test:
        results_out = []
    for step, batch in enumerate(dataloader):
        batch = tuple(t.to(device) for t in batch)
        context_token_ids_list_batch, context_input_masks_list_batch, \
        response_token_ids_list_batch, response_input_masks_list_batch, labels_batch = batch

        with torch.no_grad():
            logits = model(context_token_ids_list_batch,
                           context_input_masks_list_batch,
                           response_token_ids_list_batch,
                           response_input_masks_list_batch)
            loss = loss_fct(logits, torch.argmax(labels_batch, 1))

        r2_indices = torch.topk(logits, 2)[1]  # R 2 @ 100
        r5_indices = torch.topk(logits, 5)[1]  # R 5 @ 100
        r10_indices = torch.topk(logits, 10)[1]  # R 10 @ 100
        r1 += (logits.argmax(-1) == 0).sum().item()
        r2 += ((r2_indices == 0).sum(-1)).sum().item()
        r5 += ((r5_indices == 0).sum(-1)).sum().item()
        r10 += ((r10_indices == 0).sum(-1)).sum().item()
        # mrr
        logits = logits.data.cpu().numpy()
        for logit in logits:
            if test: results_out.append(logit.tolist())
            y_true = np.zeros(len(logit))
            y_true[0] = 1
            mrr.append(label_ranking_average_precision_score([y_true],
                                                             [logit]))
        eval_loss += loss.item()
        nb_eval_examples += labels_batch.size(0)
        nb_eval_steps += 1
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = r1 / nb_eval_examples
    if not test:
        result = {
            'train_loss': tr_loss / nb_tr_steps,
            'eval_loss': eval_loss,
            'R1': r1 / nb_eval_examples,
            'R2': r2 / nb_eval_examples,
            'R5': r5 / nb_eval_examples,
            'R10': r10 / nb_eval_examples,
            'MR': np.mean(1 / (np.array(mrr))),
            'MRR': np.mean(mrr),
            'epoch': epoch,
            'global_step': global_step,
        }
    else:
        result = {
            'eval_loss': eval_loss,
            'R1': r1 / nb_eval_examples,
            'R2': r2 / nb_eval_examples,
            'R5': r5 / nb_eval_examples,
            'R10': r10 / nb_eval_examples,
            'MR': np.mean(1 / (np.array(mrr))),
            'MRR': np.mean(mrr),
        }
    if test:
        if args.generate:
            export_scores_json(list(results_out))
        if (args.testset == 'devtest') and not args.generate:
            export_results(result)

    return result
Exemplo n.º 27
0
                                          cuda_device=device)
    eval_file = os.path.join(eval_path, frame_type + '.tsv')
    eval_df = pd.read_csv(eval_file,
                          sep='\t',
                          converters={'labels': literal_eval})
    labels = list(eval_df.columns)[3:-1]
    predictions, raw_outputs = model.predict(list(eval_df['text']))

    full_y_true = np.array(eval_df[labels].astype(int))
    full_y_pred = np.array(predictions)

    print(full_y_true.shape)
    print(full_y_pred.shape)

    cat_f1 = f1_score(full_y_true, full_y_pred, average='macro')
    cat_lrap = label_ranking_average_precision_score(full_y_true, full_y_pred)
    category_scores.append((frame_type, cat_f1, cat_lrap))

    for i, label in enumerate(labels):
        y_true = np.array(eval_df[label].astype(int))
        y_pred = np.array([predictions[j][i] for j in range(len(predictions))])
        score = f1_score(y_true, y_pred, average='macro')
        print(frame_type, label, score)
        all_scores.append((frame_type, label, score))

df_cat = pd.DataFrame(category_scores,
                      columns=['Frame Type', 'Macro F1', 'LRAP'])
df_cat.to_csv(
    f'/shared/2/projects/framing/models/classify/09-24-20_overall_eval.tsv',
    sep='\t')
Exemplo n.º 28
0
 def ranking_precision(self):
     self.ranking_precision = metrics.label_ranking_average_precision_score(self.ground_truth, self.predictions_raw)
     return 'Ranking Precision (0, 1]: ' + str(self.ranking_precision)
Exemplo n.º 29
0
cntr = 1.0
for iCntr in range(len(qList)):
    qCntr = qList[iCntr]

    if args.removeQuery:
        qLabel = qLabelArr[iCntr]
    else:
        qLabel = labelArr[qCntr]
    y_true = np.zeros((dist.shape[1]))
    indices = np.where(labelArr == qLabel)

    y_true[np.where(labelArr == qLabel)] = 1
    y_true = y_true[ind[iCntr, :]]
    maxVal = np.max(dist[iCntr, :])
    if args.removeQuery:
        currAP = label_ranking_average_precision_score(
            [y_true], [maxVal - dist[iCntr, :]])
    else:
        currAP = label_ranking_average_precision_score(
            [y_true[1:]], [maxVal - dist[iCntr, 1:]])

    running_ap += currAP
    cntr += 1
    print('AP for Query:%d Text:%s Occ:%d is %.4f' %
          (qCntr, vocabIdx[qLabel], vocabCntr[qLabel], currAP))

    with open(logs_dir + 'retrieval_accuracy.txt', 'a+') as fLog:
        fLog.write('AP for Query:%d Text:%s Occ:%d is %.4f \n' %
                   (qCntr, vocabIdx[qLabel], vocabCntr[qLabel], currAP))

    if args.printFlag:
        with open(os.path.join(ret_dir, 'CombAcc.txt'), 'a+') as fLog:
Exemplo n.º 30
0
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file):

    model.train(False)

    val_corrects1 = 0
    val_corrects2 = 0
    val_corrects3 = 0
    val_size = data_loader.__len__()
    item_count = data_loader.total_item_len
    t0 = time.time()
    get_l1_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()
    get_ce_sig_loss = nn.BCELoss()
    val_batch_size = data_loader.batch_size
    val_epoch_step = data_loader.__len__()
    num_cls = data_loader.num_cls

    val_loss_recorder = LossRecord(val_batch_size)
    val_celoss_recorder = LossRecord(val_batch_size)
    print('evaluating %s ...' % val_version, flush=True)

    eval_t = locals()
    sum_fbeta = 0
    y_pred, Y_test = [], []
    sum_fbeta = 0
    best_fbeta = 0
    ave_test_accu_final = 0
    test_file = open("./result_log/val.log", "a+")

    with torch.no_grad():
        for batch_cnt_val, data_val in enumerate(data_loader):
            # inputs = Variable(data_val[0].cuda())
            # labels = Variable(torch.LongTensor(np.array(data_val[1])).long().cuda())

            inputs, labels, labels_swap, swap_law, img_names = data_val
            labels_npy = np.array(labels)

            labels_tensor = Variable(
                torch.FloatTensor(np.array(labels)).cuda())

            labels_ = labels_npy.astype(np.uint8)

            inputs = Variable(inputs.cuda())

            outputs = model(inputs)
            loss = 0

            # ce_loss = get_ce_loss(outputs[0], labels).item()
            ce_loss = get_ce_sig_loss(outputs[0], labels_tensor).item()
            loss += ce_loss

            val_loss_recorder.update(loss)
            val_celoss_recorder.update(ce_loss)

            if Config.use_dcl and Config.cls_2xmul:
                outputs_pred = outputs[0] + outputs[1][:, 0:num_cls] + outputs[
                    1][:, num_cls:2 * num_cls]
            else:
                outputs_pred = outputs[0]

            # cal_sigmoid = nn.Sigmoid()
            # outputs_pred_s = cal_sigmoid(outputs_pred)
            ########  MAP is label ranking, do not need normilization
            # predict_multensor = torch.ge(outputs_pred, 0.5)     ###   大于0.5的置为一,其他置为0,类似于阈值化操作
            predict_mul_ = outputs_pred.cpu().numpy()

            temp_fbeta = label_ranking_average_precision_score(
                labels_, predict_mul_)

            predict_multensor = torch.ge(outputs_pred,
                                         0.5)  ###   大于0.5的置为一,其他置为0,类似于阈值化操作
            predict_mul = predict_multensor.cpu().numpy()

            sum_fbeta = sum_fbeta + temp_fbeta
            ave_num = batch_cnt_val + 1

            y_pred.extend(predict_mul[:])
            Y_test.extend(labels_[:])

        ave_acc = sum_fbeta / ave_num

        y_pred_ = np.array(y_pred)
        Y_test_ = np.array(Y_test)

        log_file.write(val_version + '\t' + str(val_loss_recorder.get_val()) +
                       '\t' + str(val_celoss_recorder.get_val()) + '\t' +
                       str(ave_acc) + '\n')

        t1 = time.time()
        since = t1 - t0
        print('--' * 30, flush=True)
        print('% 3d %s %s %s-loss: %.4f ||%s-ave@acc: %.4f ||time: %d' %
              (epoch_num, val_version, dt(), val_version,
               val_loss_recorder.get_val(init=True), val_version, ave_acc,
               since),
              flush=True)
        print('--' * 30, flush=True)

        eval_t['metrics_' + str(0.5)] = evaluate_test(predictions=y_pred_,
                                                      labels=Y_test_)

        metrics = eval_t['metrics_' + str(0.5)]

        output = "=> Test : Coverage = {}\n Average Precision = {}\n Micro Precision = {}\n Micro Recall = {}\n Micro F Score = {}\n".format(
            metrics['coverage'], ave_acc, metrics['micro_precision'],
            metrics['micro_recall'], metrics['micro_f1'])
        output += "=> Test : Macro Precision = {}\n Macro Recall = {}\n Macro F Score = {}\n ranking_loss = {}\n hamming_loss = {}\n\n".format(
            metrics['macro_precision'], metrics['macro_recall'],
            metrics['macro_f1'], metrics['ranking_loss'],
            metrics['hamming_loss'])
        # output += "\n=> Test : ma-False_positive_rate(FPR) = {}, mi-False_positive_rate(FPR) = {}\n".format(metrics['ma-FPR'],metrics['mi-FPR'])
        print(output)
        test_file.write('epoch:%d\n' % epoch_num)
        test_file.write(output)
        test_file.close()

    return ave_acc
         labels = np.array(labels)
         X_w = np.array(np.split(couples, len(seq)))
         X_t = np.array(np.split(tag_couples, len(seq)))
         if test_labels ==0 :
             # Divide number of examples to rank so that GPU does not cause out of memory error
             splitter = get_min_divisor(len(labels))
             test_y = np.reshape(np.empty_like(labels, dtype = 'float32'),(labels.shape[0],1))
             for j in range(splitter):
                 test_loss, test_y_block = model.test_on_batch([X_w[:,j*(labels.shape[0]/splitter): (j+1)*(labels.shape[0]/splitter) ,:], 
                                                                X_t[:,j*(labels.shape[0]/splitter): (j+1)*(labels.shape[0]/splitter) ,:]],
                                                         labels[j*(labels.shape[0]/splitter): (j+1)*(labels.shape[0]/splitter)]) 
                 test_y[j*(labels.shape[0]/splitter): (j+1)*(labels.shape[0]/splitter)] = test_y_block
         else:
             test_loss, test_y = model.test_on_batch([X_w, X_t], labels) 
         
         lraps.append(label_ranking_average_precision_score(np.reshape(np.array(labels),test_y.shape).T , test_y.T))
         mrr, recall, prec = test_utils.print_accuracy_results(np.array(labels) , np.reshape(test_y, np.array(labels).shape))
         mrrs.append(mrr)
         recalls.append(recall)
         precs.append(prec)
         losses.append(test_loss)
         test_losses.append(test_loss)
         if len(losses) % 100 == 0:
             progbar.update(i, values=[("loss", np.sum(losses))])
             losses = []
         samples_seen += len(labels)
 
     print("\nSkipped="+str(skipped))        
     print("\nlrap="+"{0:.5f}".format(np.mean(np.array(lraps)))+" :loss=" + str(np.mean(test_losses)) + " :Samples seen="+str(test_docs)+ "\n")
     print("mrr=" + "{0:.5f}".format(np.mean(mrrs))+"\n")
     
Exemplo n.º 32
0
def build_model(config: TrainBertParams):  # TODO: add typing
    """
    Runs the main loop to build the trained Bert model for multi-label emotion classification
    """

    tokenizer = transformers.BertTokenizer.from_pretrained(config.bert_path, do_lower_case=True)

    # TODO: consider adding stratify for multi-label
    dfx, _, _ = load_data_file(config.emotions_train_file)
    df_train, df_valid = model_selection.train_test_split(dfx.sample(n=config.training_sample,
                                                                     random_state=1),
                                                          test_size=config.test_size,
                                                          random_state=42)
    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)
    train_dataset = BERTDataset(text=df_train.text.values,
                                target=df_train[EMOTIONS].values,
                                tokenizer=tokenizer,
                                max_len=MAX_LEN)
    train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                    batch_size=config.train_batch_size,
                                                    num_workers=4)

    valid_dataset = BERTDataset(text=df_valid.text.values,
                                target=df_valid[EMOTIONS].values,
                                tokenizer=tokenizer,
                                max_len=MAX_LEN)

    valid_data_loader = torch.utils.data.DataLoader(valid_dataset,
                                                    batch_size=config.valid_batch_size,
                                                    num_workers=1)

    device = torch.device(config.device)
    model = BERTBaseUncased(config.bert_path, config.bert_base_uncase_params)
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = config.no_decay_components
    optimizer_parameters = [
        {
            "params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
            "weight_decay": config.weight_decay,
        },
        {
            "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            "weight_decay": 0.0,
        },
    ]
    num_train_steps = int(len(df_train) / config.train_batch_size * config.epochs)
    optimizer = AdamW(optimizer_parameters, lr=_ADAM_LEARNING_RATE)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0,
                                                num_training_steps=num_train_steps)

    current_date = datetime.now().date().isoformat()
    output_model_file = os.path.join(config.output_model_path, f'emo_bert_model_{current_date}.bin')
    best_accuracy = 0
    for epoch in tqdm(range(config.epochs), desc='epochs'):
        train_model(train_data_loader, model, optimizer, device, scheduler,
                    config.bert_base_uncase_params.num_labels, epoch)
        outputs, targets = evaluate(valid_data_loader, model, device)
        targets = np.array(np.array(targets) >= _TARGETS_THRESHOLD).astype(int)
        outputs = np.array(outputs)
        accuracy = label_ranking_average_precision_score(targets, outputs)
        if accuracy > best_accuracy:
            torch.save(model.state_dict(), output_model_file)
            best_accuracy = accuracy

    TB_WRITER.close()
Exemplo n.º 33
0
    def _evaluate_samples(self, y_true, y_pred, metrics='all'):
        y_pred_binary = y_pred > 0.0

        # define the available metrics as lazy lambda functions
        # so we can execute only the ones actually requested
        all_metrics = {
            'Precision (doc avg)':
            lambda: precision_score(y_true, y_pred_binary, average='samples'),
            'Recall (doc avg)':
            lambda: recall_score(y_true, y_pred_binary, average='samples'),
            'F1 score (doc avg)':
            lambda: f1_score(y_true, y_pred_binary, average='samples'),
            'Precision (subj avg)':
            lambda: precision_score(y_true, y_pred_binary, average='macro'),
            'Recall (subj avg)':
            lambda: recall_score(y_true, y_pred_binary, average='macro'),
            'F1 score (subj avg)':
            lambda: f1_score(y_true, y_pred_binary, average='macro'),
            'Precision (weighted subj avg)':
            lambda: precision_score(y_true, y_pred_binary, average='weighted'),
            'Recall (weighted subj avg)':
            lambda: recall_score(y_true, y_pred_binary, average='weighted'),
            'F1 score (weighted subj avg)':
            lambda: f1_score(y_true, y_pred_binary, average='weighted'),
            'Precision (microavg)':
            lambda: precision_score(y_true, y_pred_binary, average='micro'),
            'Recall (microavg)':
            lambda: recall_score(y_true, y_pred_binary, average='micro'),
            'F1 score (microavg)':
            lambda: f1_score(y_true, y_pred_binary, average='micro'),
            'F1@5':
            lambda: f1_score(
                y_true, filter_pred_top_k(y_pred, 5) > 0.0, average='samples'),
            'NDCG':
            lambda: ndcg_score(y_true, y_pred),
            'NDCG@5':
            lambda: ndcg_score(y_true, y_pred, limit=5),
            'NDCG@10':
            lambda: ndcg_score(y_true, y_pred, limit=10),
            'Precision@1':
            lambda: precision_at_k_score(y_true, y_pred, limit=1),
            'Precision@3':
            lambda: precision_at_k_score(y_true, y_pred, limit=3),
            'Precision@5':
            lambda: precision_at_k_score(y_true, y_pred, limit=5),
            'LRAP':
            lambda: label_ranking_average_precision_score(y_true, y_pred),
            'True positives':
            lambda: true_positives(y_true, y_pred_binary),
            'False positives':
            lambda: false_positives(y_true, y_pred_binary),
            'False negatives':
            lambda: false_negatives(y_true, y_pred_binary),
        }

        if metrics == 'all':
            metrics = all_metrics.keys()

        with warnings.catch_warnings():
            warnings.simplefilter('ignore')

            return {metric: all_metrics[metric]() for metric in metrics}
Exemplo n.º 34
0
def score(y_true, y_pred, item_lst, six_db=False, A=1, B=1, C=1, top_k=150, mode='a',
          file_name='results.txt', save_path=''):
    idx_lst = [1]
    if six_db:
        item_lst = ['AraCyc', 'EcoCyc', 'HumanCyc', 'LeishCyc', 'TrypanoCyc', 'YeastCyc']
        idx_lst = [idx for idx in np.arange(len(item_lst))]
    print('\t>> Scores are saved to {0:s}...'.format(str(file_name)))
    for i, idx in enumerate(idx_lst):
        y = y_true
        y_hat = y_pred
        if six_db:
            y = y_true[idx]
            y_hat = y_pred[idx]
            y = y.reshape((1, y.shape[0]))
            y_hat = np.reshape(y_hat, (1, len(y_hat)))
            save_data(data='*** Scores for {0:s}...\n'.format(str(item_lst[i])), file_name=file_name,
                      save_path=save_path, mode=mode, w_string=True, print_tag=False)
        else:
            save_data(data='*** Scores for {0:s}...\n'.format(item_lst[i]), file_name=file_name, save_path=save_path,
                      mode='w', w_string=True, print_tag=False)
        ce_samples = coverage_error(y, y_hat)
        save_data(data='\t\t1)- Coverage error score: {0:.4f}\n'.format(ce_samples), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)

        lrl_samples = label_ranking_loss(y, y_hat)
        save_data(data='\t\t2)- Ranking loss score: {0:.4f}\n'.format(lrl_samples), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)

        lrap_samples = label_ranking_average_precision_score(y, y_hat)
        save_data(data='\t\t3)- Label ranking average precision score: {0:.4f}\n'.format(lrap_samples),
                  file_name=file_name, save_path=save_path, mode=mode, w_string=True, print_tag=False)

        if not np.array_equal(y_pred, y_pred.astype(bool)):
            top_k = y_true.shape[1] if top_k > y_true.shape[1] else top_k
            psp_samples = psp(y_prob=y_hat, y_true=y, A=A, B=B, C=C, top_k=top_k)
            save_data(data='\t\t4)- Propensity Scored Precision at {0}: {1:.4f}\n'.format(top_k, psp_samples),
                      file_name=file_name, save_path=save_path, mode=mode, w_string=True, print_tag=False)

            ndcg_samples = psndcg(y_prob=y_hat, y_true=y, A=A, B=B, C=C, top_k=top_k)
            save_data(data='\t\t5)- Propensity Scored nDCG at {0}: {1:.4f}\n'.format(top_k, ndcg_samples),
                      file_name=file_name, save_path=save_path, mode=mode, w_string=True, print_tag=False)
            continue

        hl_samples = hamming_loss(y, y_hat)
        save_data(data='\t\t4)- Hamming-Loss score: {0:.4f}\n'.format(hl_samples), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)

        pr_samples_average = precision_score(y, y_hat, average='samples')
        pr_samples_micro = precision_score(y, y_hat, average='micro')
        pr_samples_macro = precision_score(y, y_hat, average='macro')
        save_data(data='\t\t5)- Precision...\n', file_name=file_name, save_path=save_path, mode=mode, w_string=True,
                  print_tag=False)
        save_data(data='\t\t\t--> Average sample precision: {0:.4f}\n'.format(pr_samples_average), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> Micro precision: {0:.4f}\n'.format(pr_samples_micro), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> Macro precision: {0:.4f}\n'.format(pr_samples_macro), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)

        rc_samples_average = recall_score(y, y_hat, average='samples')
        rc_samples_micro = recall_score(y, y_hat, average='micro')
        rc_samples_macro = recall_score(y, y_hat, average='macro')
        save_data(data='\t\t6)- Recall...\n', file_name=file_name, save_path=save_path, mode=mode, w_string=True,
                  print_tag=False)
        save_data(data='\t\t\t--> Average sample recall: {0:.4f}\n'.format(rc_samples_average), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> Micro recall: {0:.4f}\n'.format(rc_samples_micro), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> Macro recall: {0:.4f}\n'.format(rc_samples_macro), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)

        f1_samples_average = f1_score(y, y_hat, average='samples')
        f1_samples_micro = f1_score(y, y_hat, average='micro')
        f1_samples_macro = f1_score(y, y_hat, average='macro')
        save_data(data='\t\t7)- F1-score...\n', file_name=file_name, save_path=save_path, mode=mode, w_string=True,
                  print_tag=False)
        save_data(data='\t\t\t--> Average sample f1-score: {0:.4f}\n'.format(f1_samples_average), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> Micro f1-score: {0:.4f}\n'.format(f1_samples_micro), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> Macro f1-score: {0:.4f}\n'.format(f1_samples_macro), file_name=file_name,
                  save_path=save_path, mode=mode, w_string=True, print_tag=False)

        js_score_samples = jaccard_score(y, y_hat, average='samples')
        js_score_micro = jaccard_score(y, y_hat, average='micro')
        js_score_macro = jaccard_score(y, y_hat, average='macro')
        js_score_weighted = jaccard_score(y, y_hat, average='weighted')
        save_data(data='\t\t8)- Jaccard score...\n', file_name=file_name, save_path=save_path, mode=mode,
                  w_string=True, print_tag=False)
        save_data(data='\t\t\t--> Jaccard score (samples): {0:.4f}\n'.format(js_score_samples),
                  file_name=file_name, save_path=save_path, mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> Jaccard score (micro): {0:.4f}\n'.format(js_score_micro),
                  file_name=file_name, save_path=save_path, mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> Jaccard score (macro): {0:.4f}\n'.format(js_score_macro),
                  file_name=file_name, save_path=save_path, mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> Jaccard score (weighted): {0:.4f}\n'.format(js_score_weighted),
                  file_name=file_name, save_path=save_path, mode=mode, w_string=True, print_tag=False)

        tn, fp, fn, tp = confusion_matrix(y.flatten(), y_hat.flatten()).ravel()
        save_data(data='\t\t9)- Confusion matrix...\n', file_name=file_name, save_path=save_path, mode=mode,
                  w_string=True, print_tag=False)
        save_data(data='\t\t\t--> True positive: {0}\n'.format(tp), file_name=file_name, save_path=save_path,
                  mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> True negative: {0}\n'.format(tn), file_name=file_name, save_path=save_path,
                  mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> False positive: {0}\n'.format(fp), file_name=file_name, save_path=save_path,
                  mode=mode, w_string=True, print_tag=False)
        save_data(data='\t\t\t--> False negative: {0}\n'.format(fn), file_name=file_name, save_path=save_path,
                  mode=mode, w_string=True, print_tag=False)
Exemplo n.º 35
0
plrloss = []
plraprecision = []

nscoreone = 0
nscoretwo = 0
correctness = 0
for i in range(0,10):
	p = classifier2.predictor()
	p.learnPredictor()
	n_predicted = p.predict()
	correct = p.mlb.transform(util2.getCorrectGenres(p.testExamples))

	ny_score = np.array(n_predicted)
	y_true = np.array(correct)
	nscoreone += label_ranking_loss(y_true, ny_score)
	nscoretwo += label_ranking_average_precision_score(y_true, ny_score)
	correctness += util2.printCorrectness(p.mlb, p.testExamples, n_predicted, correct)

print "LABEL RANKING LOSS: " + str(float(nscoreone)/10)
print "LABEL RANKING AVERAGE PRECISION: " + str(float(nscoretwo)/10)
print "CORRECTNESS: " + str(float(correctness)/10)
# util2.printAccuracyByGenre(p.mlb, p.testExamples, n_predicted, correct)
# util2.printOutput(p.mlb, p.testExamples, n_predicted, correct)




# print "=========="
# print "PERCENT RESULTS"
# print "LABEL RANKING LOSS:"
# pscoreone = label_ranking_loss(y_true, py_score)
Exemplo n.º 36
0
# make predictions on test data
predicted = clf.predict_proba(Y)
scores = np.reshape(np.concatenate((predicted[0][:,1], predicted[1][:,1], predicted[2][:,1])), (TEST_SIZE, OUTPUTS))

# compute error for multilabel ranking
# coverage error
# the best value is equal to the avg number of labels in test_label per sample
# i.e. 2.0550000000000002
cover_err = coverage_error(test_label, scores)
# cov_errs.append(cover_err)
print("RandomForest - Coverage error: " + str(cover_err))

# label ranking average precision score
# best value is 1
lrap_score = label_ranking_average_precision_score(test_label, scores)
# lrap_scores.append(lrap_score)
print("RandomForest - Label ranking avg precision score: " + str(lrap_score))

# compute label ranking loss
# best value is 0
ranking_loss = label_ranking_loss(test_label, scores)
# ranking_losses.append(ranking_loss)
print("RandomForest - Ranking loss: " + str(ranking_loss))


    # avg_cov_err = np.mean(cover_err)
    # print "RandomForest CV avg coverage error - Estimators " + str(estimators) + " " + str(avg_cov_err)
    # avg_lrap_score = np.mean(lrap_scores)
    # print "RandomForest CV avg lrap score - Estimators " + str(estimators) + " " + str(avg_lrap_score)
    # avg_rank_loss = np.mean(ranking_losses)
Exemplo n.º 37
0
def main():
    #     matplotlib.style.use('ggplot')
    logging.basicConfig(filename='classify.log', filemode='w', \
    level=logging.DEBUG)
    current_dir = os.path.dirname(os.path.abspath( \
    inspect.getfile(inspect.currentframe())))

    data_set_file = "./mozilla_firefox/pre_processing_experiments/output_without_cleaning_without_stemming_without_lemmatizing_without_stop_words_removal_without_punctuation_removal_without_numbers_removal.json"  # The path of the file which
    # "./eclipse_jdt/pre_processing_experiments/output_without_cleaning_without_stemming_without_lemmatizing_without_stop_words_removal_without_punctuation_removal_without_numbers_removal.json"
    # "./mozilla_firefox/pre_processing_experiments/output_without_cleaning_without_stemming_without_lemmatizing_without_stop_words_removal_without_punctuation_removal_without_numbers_removal.json"

    # contains the pre-processed output
    # Below, the path of the file which contains a dictionary related
    # to the mappings of the developers
    developers_dict_file = "../developers_dict.json"
    # Below, the path of the file which contains a list of the
    # relevant distinct developers
    developers_list_file = "../developers_list.json"

    np.random.seed(0)  # We set the seed

    start_time = time.time()  # We get the time expressed in seconds
    # since the epoch

    # First we load the data of the three aforementioned files
    json_data = load_data_set(data_set_file)
    developers_dict_data = None
    #     developers_dict_data = load_developers_mappings(developers_dict_file)
    developers_list_data = None
    #     developers_list_data = load_distinct_developers_list(developers_list_file)

    #     sm = SMOTE(random_state=42)

    # Then, we build a data frame using the loaded data set, the
    # loaded developers mappings, the loaded distinct developers.
    df = build_data_frame(json_data, developers_dict_data,
                          developers_list_data)

    s = " ".join([tr.lower() for tr in df['text'].tolist()])

    wordcloud = WordCloud(max_font_size=40).generate(s)
    fig = plt.figure()
    # Display the generated image:
    # the matplotlib way:
    plt.imshow(wordcloud)
    plt.axis("off")

    save_wordcloud_file = os.path.join(current_dir, "wordcloud.png")

    if save_wordcloud_file:
        plt.savefig(save_wordcloud_file, bbox_inches="tight")
        plt.close(fig)
    else:
        plt.show()


#     TO DO: Fix the lines below later
#     print("Histogram of the the frequencies of each class")
#     pd.value_counts(df['class'], sort=False).hist()
#     pylab.show()

# Below, there is a dictionary to store the names, the classifiers
# used, the parameters sent to the constructor of the classifiers
# and the fitted classifiers
    models = { \
        "RDF": [RandomForestClassifier, {
            "n_estimators": 50,
            "n_jobs": -1
        }, None], \
#         "ExtraTreesClassifier": [ExtraTreesClassifier, {
#             "n_estimators": 50,
#             "n_jobs": -1
#         }, None], \
        "NB": [MultinomialNB, {}, None], \
#         "SVM": [SVC, { \
#             "kernel": "linear", \
#             "probability": True
#         }, None], \
        "Perceptron": [Perceptron, { \
           "n_jobs": -1, \
           "class_weight": "balanced"
        }, None], \
        "PassiveAggressiveClassifier": [PassiveAggressiveClassifier, { \
            "n_jobs": -1, \
            "class_weight": "balanced"
        }, None], \
#         "RidgeClassifier": [RidgeClassifier, { \
#             "solver": "sag", \
#             "normalize": True
#         }, None], \
        "RidgeClassifier (with wrapper)": [OneVsRestClassifier, { \
            "n_jobs": -1, \
            "estimator": RidgeClassifier(solver="sag", \
            normalize=True \
#             class_weight="balanced" # Bug: should be fixed

            )
        }, None], \
        "Linear SVM": [LinearSVC, { \
            "random_state": 0, \
            "class_weight": "balanced" \
        }, None], \
        "Linear SVM (with wrapper)": [OneVsRestClassifier, { \
            "n_jobs": -1, \
            "estimator": LinearSVC(random_state=0, class_weight="balanced") \
        }, None], \
        "CalibratedClassifierCV (Linear SVM with wrapper)": [CalibratedClassifierCV, { \
            "base_estimator": OneVsRestClassifier(n_jobs=-1, \
            estimator=LinearSVC(random_state=0, class_weight="balanced")) \
        }, None], \
        "Logistic Regression": [LogisticRegression, {
            "n_jobs": -1, \
            "class_weight": "balanced" \
#             "multi_class": "multinomial", \
#             "solver": "newton-cg"


        }, None], \
        "Stochastic Gradient Descent": [SGDClassifier, {
            "n_jobs": -1, \
            "n_iter": 50, \
            "shuffle": True, \
            "class_weight": "balanced"
        }, None], \
        "Nearest Centroid": [NearestCentroid, {}, None],
    #         "RadiusNeighborsClassifier": [RadiusNeighborsClassifier, { \
    #         }, None]
    #         "LinearDiscriminantAnalysis": [LinearDiscriminantAnalysis, \
    #         { \
    #         }, None], \
    #         "QuadraticDiscriminantAnalysis": [ \
    #         QuadraticDiscriminantAnalysis, { \
    #         }, None], \
    #         "K Nearest Neighbors": [KNeighborsClassifier, { \
    #         }, None], \
    #         "DecisionTreeClassifier": [DecisionTreeClassifier, { \
    #         }, None], \
    #         "Bagging Linear SVM": [BaggingClassifier, { \
    #             "base_estimator": LinearSVC(random_state=0, \
    #         class_weight="balanced"), \
    #             "max_samples": 0.5, \
    #             "max_features": 0.5, \
    #             "random_state": 0, \
    #             "n_jobs": -1, \
    #             "n_estimators": 100
    #         }, None], \
    #         "Bagging K Nearest Neighbors": [BaggingClassifier, { \
    #             "base_estimator": KNeighborsClassifier() \
    #         }, None]
    #         "AdaBoostClassifier": [AdaBoostClassifier, { \
    #             "base_estimator": SGDClassifier(loss="log", n_jobs=-1, \
    #             n_iter=50, shuffle=True, class_weight="balanced"), \
    #             "n_estimators": 50 \
    #         }, None], \
    #         "GradientBoostingClassifier": [GradientBoostingClassifier, \
    #         { \
    #             "n_estimators": 10 \
    #         }, None] \
    #         "VotingClassifier": [VotingClassifier, { \
    #             "estimators": [ \
    #                 ("pac", PassiveAggressiveClassifier(n_jobs=-1)),
    #                 ("rc", RidgeClassifier(solver="sag")),
    #                 ("lsvc", LinearSVC()),
    #                 ("lr", LogisticRegression(n_jobs=-1)),
    #                 ("sgdc", SGDClassifier(n_jobs=-1, n_iter=50))
    #             ], \
    #             "voting": "hard", \
    #             "n_jobs": -1
    #         }, None],
    #         "VotingClassifier2": [VotingClassifier, { \
    #             "estimators": [ \
    #                 ("lr", LogisticRegression(n_jobs=-1)), \
    #                 ("sgdc", SGDClassifier(loss="modified_huber", \
    #                 n_jobs=-1, n_iter=50)), \
    #                 ("RandomForestClassifier", RandomForestClassifier( \
    #                 n_estimators=50, n_jobs=-1)) \
    #             ], \
    #             "voting": "soft", \
    #             "n_jobs": -1
    #         }, None],
    #         "StackingClassifier": [StackingClassifier, { \
    #             "classifiers": [ \
    #                 SGDClassifier(loss="modified_huber", \
    #                 n_jobs=-1, n_iter=50), \
    #                 CalibratedClassifierCV(base_estimator= \
    #                 OneVsRestClassifier(n_jobs=-1, \
    #                 estimator=LinearSVC(random_state=0))), \
    #                 CalibratedClassifierCV(base_estimator= \
    #                 OneVsRestClassifier(n_jobs=-1, \
    #                 estimator=RidgeClassifier(solver="sag"))) \
    #             ], \
    #             "use_probas": True, \
    #             "average_probas": False, \
    #             "meta_classifier": LogisticRegression(n_jobs=-1)
    #         }, None]
    #         "SoftmaxRegression": [SoftmaxRegression, {}, None], \
    #         "MultiLayerPerceptron": [MultiLayerPerceptron, {}, None]
    }

    # Below, there is a dictionary to store the accuracies for each
    # classifier
    models_predictions = {}

    chi2_feature_selection = SelectKBest(chi2, k="all")

    print_log("Splitting the data set")  # Debug
    #     df = df[-30000:]
    train_set, val_set, test_set = np.split(df, \
        [int(.9*len(df)), int(.9999999999999999999999*len(df))])

    #     train_set, val_set, test_set = np \
    #     .split(df.sample(frac=1), \
    #            [int(.6*len(df)), int(.8*len(df))])

    print_log("Shape of the initial Data Frame")  # Debug
    print_log(df.shape)  # Debug
    print_log(df['class'].value_counts(normalize=True))
    print_log("Shape of the training set")  # Debug
    print_log(train_set.shape)  # Debug
    print_log(train_set['class'].value_counts(normalize=True))
    print_log("Shape of the validation set")  # Debug
    print_log(val_set.shape)  # Debug
    print_log(val_set['class'].value_counts(normalize=True))
    print_log("Shape of the test set")  # Debug
    print_log(test_set.shape)  # Debug
    print_log(test_set['class'].value_counts(normalize=True))
    print_log("We count the occurrence of each term")  # Debug

    count_vectorizer = CountVectorizer( \
        lowercase=False, \
        token_pattern=u"(?u)\S+"
    #        ngram_range=(1,2) \
    #        max_df=0.5, \
    #        max_features=100000
    )
    print_log("Size of the vocabulary")
    X_train_counts = count_vectorizer \
    .fit_transform(df['text'].values)
    print_log(X_train_counts.shape)
    X_train_counts = count_vectorizer \
    .fit_transform(train_set['text'].values)
    print_log(X_train_counts.shape)
    print_log("Use of the TF-IDF model")  # Debug
    tfidf_transformer = TfidfTransformer(use_idf=True, smooth_idf=False)
    # Debug
    print_log("Computation of the weights of the TF-IDF model")
    X_train = tfidf_transformer.fit_transform(X_train_counts)
    y_train = train_set['class'].values

    #     standard_scaler = StandardScaler(with_mean=False)
    #
    #     X_train = standard_scaler.fit_transform(X_train, y_train)
    #
    #     print("Shape of the training set before over sampling") # Debug
    #     print(X_train.shape) # Debug
    #
    #     X_train, y_train = resample(X_train, y_train, random_state=0)

    X_train = chi2_feature_selection.fit_transform(X_train, y_train)
    #     X_train_dense = None
    #     y_train_dense = None
    #
    #     dense_transformer = DenseTransformer()
    #     le = LabelEncoder()

    #     X_train, y_train = sm.fit_sample(X_train.toarray(), y_train)
    #
    #     if hasattr(X_train, 'dtype') and np.issubdtype(X_train.dtype, np.float):
    #         # preserve float family dtype
    #         X_train = sp.csr_matrix(X_train)
    #     else:
    #         # convert counts or binary occurrences to floats
    #         X_train = sp.csr_matrix(X_train, dtype=np.float64)
    #
    #     print("Shape of the training set after over sampling") # Debug
    #     print(X_train.shape)
    #     print(pd.Series(y_train).value_counts(normalize=True))

    print_log("Training of the models")  # Debug
    for key, value in models.items():
        print_log(key)
        #         if key == "SoftmaxRegression" or key == "MultiLayerPerceptron":
        #             X_train_dense = dense_transformer.fit_transform(X_train)
        #             y_train_dense = le.fit_transform(y_train)
        #             models[key][-1] = models[key][0](minibatches=1) \
        #             .fit(X_train_dense, y_train_dense)
        #         else:
        #         if key == "LinearDiscriminantAnalysis":
        #             models[key][-1] = models[key][0](**models[key][1]) \
        #             .fit(X_train.toarray(), y_train)
        #         else:
        models[key][-1] = models[key][0](**models[key][1]) \
        .fit(X_train, y_train)
        print_log("--- {} seconds ---".format(time.time() - start_time))

    print_log("We count the occurrence of each term in the val. " + \
              "set") # Debug
    X_val_counts = count_vectorizer \
    .transform(val_set['text'].values)
    print_log("Computation of the weights of the TF-IDF model " + \
              "for the validation set") # Debug
    X_val = tfidf_transformer.transform(X_val_counts)
    #     X_val = standard_scaler.transform(X_val)
    X_val = chi2_feature_selection.transform(X_val)
    y_val = val_set['class'].values
    #     X_val_dense = None
    #     y_val_dense = None
    print_log("Making predictions")  # Debug

    for key, value in models.items():
        print_log(key)
        #         if key == "SoftmaxRegression" or key == "MultiLayerPerceptron":
        #             X_val_dense = dense_transformer.transform(X_val)
        #             y_val_dense = le.transform(y_val)
        #             models_predictions[key] = np.mean(value[-1] \
        #             .predict(X_val_dense) == y_val_dense)
        #         else:
        #         if key == "LinearDiscriminantAnalysis":
        #             models_predictions[key] = np.mean(value[-1] \
        #             .predict(X_val.toarray()) == y_val)
        #         else:
        models[key].append(value[-1] \
        .predict(X_val))
        models_predictions[key] = np.mean(models[key][-1] == y_val)
        print_log("--- {} seconds ---".format(time.time() - start_time))

    # Below, we print the accuracy of each classifier
    for key, value in models_predictions.items():
        print_log("Accuracy of {}".format(key))  # Debug
        print_log(value)  # Debug
        print_log("Predicted labels")
        print_log(models[key][-1])
        print_log("True labels")
        print_log(y_val)
        try:
            if callable(getattr(models[key][-2], "predict_proba")):

                #                 print_log(models[key][-2].classes_)
                #                 print_log(models[key][-2].predict_proba(X_val))

                lb = LabelBinarizer()

                _ = lb.fit_transform(models[key][-2].classes_)
                #                 print_log(lb.classes_)
                #                 print_log(y_classes_bin)
                #                 print_log(lb.transform(["exclude"]))

                y_val_bin = lb.transform(y_val)
                print_log("Mean Reciprocal Rank:")
                print_log(label_ranking_average_precision_score( \
                y_val_bin, models[key][-2].predict_proba(X_val)))
        except AttributeError:
            pass
        print_log("Detailed report:")
        print_log(classification_report(y_val, models[key][-1]))

    print_log("Confusion matrix of Linear SVM (with wrapper):")
    cm = confusion_matrix(y_val,
                          models["Linear SVM"][-1],
                          labels=df['class'].unique())
    print_log(df['class'].unique())
    print_log(cm)
    df_cm = pd.DataFrame(cm,
                         index=df['class'].unique(),
                         columns=df['class'].unique())
    fig = plt.figure(figsize=(20.0, 12.5))
    sn.set(font_scale=0.5)
    sn.heatmap(df_cm, annot=True, annot_kws={"size": 8})

    save_file = os.path.join(current_dir, "confusion_matrix.png")

    if save_file:
        plt.savefig(save_file, bbox_inches="tight")
        plt.close(fig)
    else:
        plt.show()

    print_log("--- {} seconds ---".format(time.time() - start_time))
    # We dump the data frame
    df.to_csv("pre_processed_data.csv")
Exemplo n.º 38
0
import numpy as np
from sklearn.metrics import label_ranking_average_precision_score

# In our use case 1 means that the image is relevant (same label as the query image)
# And 0 means that the image is irrelevant
y_true = np.array([[1, 1, 0, 0]])

# For each train image we compute the relevance score
''' Example 1 '''
y_score = np.array([[28, 10, 1, 0.5]])
label_ranking_average_precision_score(y_true, y_score)
# In this first example, the two relevant items have the highest score,
# the scoring function returns 1.0
''' Example 2'''
y_score = np.array([[28, 10, 10, 0.5]])
label_ranking_average_precision_score(y_true, y_score)
# returns 0.83333333333333326
''' Example 3'''
y_score = np.array([[28, 10, 28, 0.5]])
label_ranking_average_precision_score(y_true, y_score)
# returns 0.58333333333333326
''' Example 4'''
y_score = np.array([[10, 10, 28, 28]])
label_ranking_average_precision_score(y_true, y_score)
# returns 0.5
                    for j in range(splitter):
                        test_loss, test_y_block = model.test_on_batch([
                            X_w[:, j * (labels.shape[0] / splitter):(j + 1) *
                                (labels.shape[0] / splitter), :],
                            X_t[:, j * (labels.shape[0] / splitter):(j + 1) *
                                (labels.shape[0] / splitter), :]
                        ], labels[j * (labels.shape[0] / splitter):(j + 1) *
                                  (labels.shape[0] / splitter)])
                        test_y[j * (labels.shape[0] / splitter):(j + 1) *
                               (labels.shape[0] / splitter)] = test_y_block
                else:
                    test_loss, test_y = model.test_on_batch([X_w, X_t], labels)

                lraps.append(
                    label_ranking_average_precision_score(
                        np.reshape(np.array(labels), test_y.shape).T,
                        test_y.T))
                mrr, recall, prec = test_utils.print_accuracy_results(
                    np.array(labels), np.reshape(test_y,
                                                 np.array(labels).shape))
                mrrs.append(mrr)
                recalls.append(recall)
                precs.append(prec)
                losses.append(test_loss)
                test_losses.append(test_loss)
                if len(losses) % 100 == 0:
                    progbar.update(i, values=[("loss", np.sum(losses))])
                    losses = []
                samples_seen += len(labels)

            print("\nSkipped=" + str(skipped))
Exemplo n.º 40
0
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file):

    model.train(False)

    val_corrects1 = 0
    val_corrects2 = 0
    val_corrects3 = 0
    val_size = data_loader.__len__()
    item_count = data_loader.total_item_len
    t0 = time.time()
    get_l1_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()

    val_batch_size = data_loader.batch_size
    val_epoch_step = data_loader.__len__()
    num_cls = data_loader.num_cls

    val_loss_recorder = LossRecord(val_batch_size)
    val_celoss_recorder = LossRecord(val_batch_size)
    print('evaluating %s ...' % val_version, flush=True)

    sum_fbeta = 0

    with torch.no_grad():
        for batch_cnt_val, data_val in enumerate(data_loader):

            # inputs = Variable(data_val[0].cuda())
            # labels = Variable(torch.LongTensor(np.array(data_val[1])).long().cuda())

            inputs, labels, labels_swap, swap_law, img_names = data_val
            labels_npy = np.array(labels)

            labels_tensor = Variable(torch.FloatTensor(labels_npy).cuda())

            labels_ = labels_npy.astype(np.uint8)

            inputs = Variable(inputs.cuda())

            outputs = model(inputs)
            loss = 0

            # ce_loss = get_ce_loss(outputs[0], labels).item()
            ce_loss = get_sigmoid_ce(outputs[0], labels_tensor).item()
            loss += ce_loss

            val_loss_recorder.update(loss)
            val_celoss_recorder.update(ce_loss)

            if Config.use_dcl and Config.cls_2xmul:
                outputs_pred = outputs[0] + outputs[1][:, 0:num_cls] + outputs[
                    1][:, num_cls:2 * num_cls]
            else:
                outputs_pred = outputs[0]
            ########  MAP is label ranking, do not need normilization
            # predict_multensor = torch.ge(outputs_pred, 0.5)     ###   大于0.5的置为一,其他置为0,类似于阈值化操作
            predict_mul = outputs_pred.cpu().numpy()

            temp_fbeta = label_ranking_average_precision_score(
                labels_, predict_mul)
            #################################################################  dy modify    Micro precision
            # cor_sum = 0
            # num_sum =0

            # for j in range(10):

            #     query_col = labels_[j,:]
            #     label_col = predict_mul[j,:]

            #     index = np.where(label_col > 0.5)
            #     index_ = index[0]
            #     number_=index_.size

            #     query_binary = query_col[index]
            #     query_label = label_col[index]

            #     batch_corrects1 = np.count_nonzero(query_binary == query_label)

            #     cor_sum = cor_sum + batch_corrects1
            #     num_sum = num_sum + number_

            # temp_fbeta = cor_sum/num_sum
            ##################################################################

            sum_fbeta = sum_fbeta + temp_fbeta
            ave_num = batch_cnt_val + 1

            # top3_val, top3_pos = torch.topk(outputs_pred, 3)

            # print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True)

        #     batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item()
        #     val_corrects1 += batch_corrects1

        #     batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item()
        #     val_corrects2 += (batch_corrects2 + batch_corrects1)
        #     batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item()
        #     val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1)

        # val_acc1 = val_corrects1 / item_count
        # val_acc2 = val_corrects2 / item_count
        # val_acc3 = val_corrects3 / item_count

        # log_file.write(val_version  + '\t' +str(val_loss_recorder.get_val())+'\t' + str(val_celoss_recorder.get_val()) + '\t' + str(val_acc1) + '\t' + str(val_acc3) + '\n')

        # t1 = time.time()
        # since = t1-t0
        # print('--'*30, flush=True)
        # print('% 3d %s %s %s-loss: %.4f ||%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc1,val_version, val_acc2, val_version, val_acc3, since), flush=True)
        # print('--' * 30, flush=True)

    # return val_acc1, val_acc2, val_acc3

        ave_acc = sum_fbeta / ave_num
        log_file.write(val_version + '\t' + str(val_loss_recorder.get_val()) +
                       '\t' + str(val_celoss_recorder.get_val()) + '\t' +
                       str(ave_acc) + '\n')

        t1 = time.time()
        since = t1 - t0
        print('--' * 30, flush=True)
        print('% 3d %s %s %s-loss: %.4f ||%s-ave@acc: %.4f ||time: %d' %
              (epoch_num, val_version, dt(), val_version,
               val_loss_recorder.get_val(init=True), val_version, ave_acc,
               since),
              flush=True)
        print('--' * 30, flush=True)

    return ave_acc
Exemplo n.º 41
0
                # else:
                output_prob = torch.sigmoid(output)

                if output_probs is None:
                    output_probs = to_np(output_prob)
                else:
                    output_probs = np.concatenate([output_probs, to_np(output_prob)], axis=0)

            if total_output_probs is None:
                total_output_probs = output_probs * model_weight
            else:
                total_output_probs += (output_probs * model_weight)

        predict_vector = np.argmax(total_output_probs, axis=1)

        ranking_ap_score = label_ranking_average_precision_score(total_tags, total_output_probs)
        ranking_loss = label_ranking_loss(total_tags, total_output_probs)

        label_vector = np.argmax(total_tags, axis=1)
        bool_vector = predict_vector == label_vector
        accuracy = bool_vector.sum() / len(bool_vector)

        ens_ranking_ap_score = ranking_ap_score
        ens_ranking_loss = ranking_loss

        print(
            'Ens Val [{}] Acc {:2.4f} / Lank AP {:2.4f} / Lank Loss {:2.4f}'.format(
                datetime.now().strftime('%Y/%m/%d %H:%M:%S'),
                accuracy, ens_ranking_ap_score, ens_ranking_loss))

        m_name_list = m_names.split(",")
Exemplo n.º 42
0
def retrieve_closest_images(test_element, test_label, n_samples=10):
    #test_element = x_test[test_element]
    #print(test_element)
    test_label = y_test[test_label]
    learned_codes = encoder.predict(x_train)
    learned_codes = learned_codes.reshape(
        learned_codes.shape[0], learned_codes.shape[1] *
        learned_codes.shape[2] * learned_codes.shape[3])

    test_code = encoder.predict(np.array([test_element]))
    test_code = test_code.reshape(test_code.shape[1] * test_code.shape[2] *
                                  test_code.shape[3])

    distances = []

    for code in learned_codes:
        distance = np.linalg.norm(code - test_code)
        distances.append(distance)
    nb_elements = learned_codes.shape[0]
    distances = np.array(distances)
    learned_code_index = np.arange(nb_elements)
    labels = np.copy(y_train).astype('float32')
    labels[labels != test_label] = -1
    labels[labels == test_label] = 1
    labels[labels == -1] = 0
    labels = labels.reshape(distances.shape)
    distance_with_labels = np.stack((distances, labels, learned_code_index),
                                    axis=-1)
    sorted_distance_with_labels = distance_with_labels[
        distance_with_labels[:, 0].argsort()]

    sorted_distances = 28 - sorted_distance_with_labels[:, 0]
    sorted_labels = sorted_distance_with_labels[:, 1]
    sorted_indexes = sorted_distance_with_labels[:, 2]
    kept_indexes = sorted_indexes[:n_samples]

    score = label_ranking_average_precision_score(
        np.array([sorted_labels[:n_samples]]),
        np.array([sorted_distances[:n_samples]]))

    print("Average precision ranking score for tested element is {}".format(
        score))

    #original_image = x_test[70]
    original_image = test_element
    retrieved_images_labels = []
    # cv2.imshow('original_image', original_image)
    #retrieved_images = []
    retrieved_images = x_train[int(kept_indexes[0]), :]
    for i in range(1, n_samples):
        retrieved_images = np.hstack(
            (retrieved_images, x_train[int(kept_indexes[i]), :]))
        #retrieved_images.append(x_train[int(kept_indexes[i]), :])
    for i in range(0, n_samples):
        retrieved_images_labels.append(y_train[int(kept_indexes[i]), :])
    print("Retrieved labels:")
    labels = []
    for label in retrieved_images_labels:
        if label[0] == 9:
            print("truck")
            labels.append("truck")
        elif label[0] == 0:
            print("airplane")
            labels.append("airplane")
        elif label[0] == 1:
            print("automobile")
            labels.append("auto")
        elif label[0] == 2:
            print("bird")
            labels.append("bird")
        elif label[0] == 3:
            print("cat")
            labels.append("cat")
        elif label[0] == 4:
            print("deer")
            labels.append("deer")
        elif label[0] == 5:
            print("dog")
            labels.append("dog")
        elif label[0] == 6:
            print("frog")
            labels.append("frog")
        elif label[0] == 7:
            print("horse")
            labels.append("horse")
        elif label[0] == 8:
            print("ship")
            labels.append("ship")
        else:
            print(label[0])
    # cv2.imshow('Results', retrieved_images)
    # cv2.waitKey(0)

    # cv2.imwrite('E:/Facultate/unsupervises-image-retrieval/test_results_64v3/original_image5.jpg', 255 * cv2.resize(original_image, (0,0), fx=3, fy=3))
    # cv2.imwrite('E:/Facultate/unsupervises-image-retrieval/test_results_64v3/retrieved_results5.jpg', 255 * cv2.resize(retrieved_images, (0,0), fx=2, fy=2))
    return (255 * cv2.resize(original_image, (0, 0), fx=3, fy=3),
            255 * cv2.resize(retrieved_images,
                             (0, 0), fx=2, fy=2), score, labels)