示例#1
0
    def __init__(self, approach, task):

        self.rf_classifier = TrainClassifier()
        self.approach = approach

        if task == "test":
            # self.sr_output = "./data/fever-full/classifier_results/shared_dev_true_docs_"+str(approach)+"_features_new.jsonl"
            # self.sr_output = "./data/fever-full/classifier_results/shared_dev_true_docs_"+str(approach)+"_features_new_k_5.jsonl"
            self.sr_output = "./data/fever-full/classifier_results/shared_dev_true_docs_" + str(
                approach) + "_features_new_k_5_recall.jsonl"
        else:
            # self.sr_output = "./data/fever-full/classifier_results/subsample_train_true_docs_"+str(approach)+"_features_new.jsonl"
            # self.sr_output = "./data/fever-full/classifier_results/subsample_train_true_docs_"+str(approach)+"_features_new_k_5.jsonl"
            self.sr_output = "./data/fever-full/classifier_results/subsample_train_true_docs_" + str(
                approach) + "_features_new_k_5_recall.jsonl"

        self.tfidf = TFIDF()
        self.vs = VectorSpace()
        self.wmd = wordMoverDistance()
示例#2
0
    def __init__(self, task):

        self.task = task
        self.tfidf = TFIDF()
        self.vs = VectorSpace()
        self.wmd = wordMoverDistance()
示例#3
0
class featureCore:
    def __init__(self, task):

        self.task = task
        self.tfidf = TFIDF()
        self.vs = VectorSpace()
        self.wmd = wordMoverDistance()

    def get_tf_idf_score(self, list_of_defactoNlps):

        # print ("nlp mdoes ", list_of_defactoNlps)
        tf_idf_score = 0
        if self.task == 'classification':
            for model in list_of_defactoNlps:
                # print ("model.claim ", model.claim)
                # print ("model sentence ", model.sentences)
                relevant_sentence, score = self.tfidf.apply_tf_idf(
                    model.claim, model.sentences)
                #0.2
                if score >= 0.2:
                    model.method_name["tfidf"] = {
                        "Classification": {
                            "pred_label": 1
                        }
                    }
                # 	print ("claim ", model.claim)
                # 	print ("most similar sentence ", relevant_sentence)
                else:
                    model.method_name["tfidf"] = {
                        "Classification": {
                            "pred_label": 0
                        }
                    }

        else:
            for model in list_of_defactoNlps:
                relevant_sentence, score = self.tfidf.apply_tf_idf(
                    model.claim, model.sentences)
                # classification: > 0.2 --> Yes, < 0.2 --> NEI
                # Detection: score > 0.6 --> Support, 0.2 < score < 0.6 --> Refutes, NEI < 0.2
                if score >= 0.2:
                    #detection
                    # Supports
                    if score > 0.4:
                        model.method_name["tfidf"] = {
                            "Detection": {
                                "pred_label": 0
                            }
                        }
                    # refutes
                    elif score <= 0.4:  # REFUTES
                        model.method_name["tfidf"] = {
                            "Detection": {
                                "pred_label": 1
                            }
                        }
                #label as NEI
                else:
                    model.method_name["tfidf"] = {
                        "Detection": {
                            "pred_label": 2
                        }
                    }

        return list_of_defactoNlps

    def get_vector_space_score(self, list_of_defactoNlps):

        if self.task == 'classification':
            for model in list_of_defactoNlps:

                relevant_sentence, vector_space_score = self.vs.apply_vector_space(
                    model.claim, model.sentences)
                if vector_space_score >= 0.2:
                    model.method_name["vspace"] = {
                        "Classification": {
                            "pred_label": 1
                        }
                    }

                else:
                    model.method_name["vspace"] = {
                        "Classification": {
                            "pred_label": 0
                        }
                    }

        else:
            for model in list_of_defactoNlps:
                relevant_sentence, vector_space_score = self.vs.apply_vector_space(
                    model.claim, model.sentences)
                # classification: > 0.2 --> Yes, < 0.2 --> NEI
                # Detection: vector_space_score > 0.6 --> Support, 0.2 < vector_space_score < 0.6 --> Refutes, NEI < 0.2
                if vector_space_score >= 0.1:
                    #detection
                    # Supports
                    if vector_space_score > 0.4:
                        model.method_name["vspace"] = {
                            "Detection": {
                                "pred_label": 0
                            }
                        }
                    # refutes
                    elif vector_space_score <= 0.4:  # REFUTES
                        model.method_name["vspace"] = {
                            "Detection": {
                                "pred_label": 1
                            }
                        }
                #label as NEI
                else:
                    model.method_name["vspace"] = {
                        "Detection": {
                            "pred_label": 2
                        }
                    }

        return list_of_defactoNlps

    def get_wmd_score(self, list_of_defactoNlps):

        # print ("nlp mdoes ", list_of_defactoNlps)
        wmd_score = 0
        if self.task == 'classification':
            for model in list_of_defactoNlps:
                relevant_sentence, wmd_score = self.wmd.compute_wm_distance(
                    model.claim, model.sentences)
                if wmd_score < 2.0:

                    model.method_name["wmd"] = {
                        "Classification": {
                            "pred_label": 1
                        }
                    }

                else:
                    model.method_name["wmd"] = {
                        "Classification": {
                            "pred_label": 0
                        }
                    }

        else:
            for model in list_of_defactoNlps:
                relevant_sentence, wmd_score = self.wmd.compute_wm_distance(
                    model.claim, model.sentences)
                # classification: > 0.2 --> Yes, < 0.2 --> NEI
                # Detection: wmd_score > 0.6 --> Support, 0.2 < wmd_score < 0.6 --> Refutes, NEI < 0.2
                if wmd_score <= 2:
                    #detection
                    # Supports
                    if wmd_score < 1:
                        model.method_name["wmd"] = {
                            "Detection": {
                                "pred_label": 0
                            }
                        }
                    # refutes
                    elif wmd_score >= 1:  # REFUTES
                        model.method_name["wmd"] = {
                            "Detection": {
                                "pred_label": 1
                            }
                        }
                #label as NEI
                else:
                    model.method_name["wmd"] = {"Detection": {"pred_label": 2}}

        return list_of_defactoNlps
示例#4
0
    def __init__(self):

        self.tfidf = TFIDF()
        self.vs = VectorSpace()
        # self.wmd = wordMoverDistance()
        self.count_avg_true_evidences = 0
示例#5
0
class SentenceClassifier:
    def __init__(self):

        self.tfidf = TFIDF()
        self.vs = VectorSpace()
        # self.wmd = wordMoverDistance()
        self.count_avg_true_evidences = 0

    def store_tf_idf_results(self, example, approach):

        tmp_dict = {}
        scores = []

        tmp_dict["id"] = example["id"]
        tmp_dict["true_label"] = example["label"]
        tmp_dict["claim"] = example["claim"]
        top_k_sents = 5
        false_negatives_scores = []
        tmp_dict["true_evidences"] = example["true_evidences"]
        tmp_dict["actual_true_positives"] = len(example["true_evidences"])
        tmp_dict["total_sentences"] = len(example["relevant_sentences"])
        tmp_dict["actual_true_negatives"] = len(
            example["relevant_sentences"]) - len(example["true_evidences"])

        for evidence in example["relevant_sentences"]:

            if approach == "tfidf":
                _, similarity_score = self.tfidf.apply_tf_idf(
                    example["claim"], evidence["sentence"])

                # print ("similarity_score ", similarity_score)
                # print ("\n")
                if similarity_score > 0.05:
                    # print (" scores != 0", similarity_score)
                    scores.append(similarity_score)
                else:
                    false_negatives_scores.append(similarity_score)

            elif approach == "vs":
                _, similarity_score = self.vs.apply_vector_space(
                    example["claim"], evidence["sentence"])

                # print ("similarity_score ", similarity_score)
                if similarity_score > 0.1:
                    scores.append(similarity_score)

                else:
                    false_negatives_scores.append(similarity_score)

            else:
                print("no approach matched")

        #take top k
        sorted_indexes = np.argsort(scores)
        filtered_indexes = []

        if len(scores) == 0:
            # if score is 0, means there is no related sentence
            tmp_dict["predicted_sentences"] = "null"
            tmp_dict["predicted_sentences_ids"] = [["null", "null"]]
            tmp_dict["predicted_sentences"] = ["null"]
            # just fill random value
            tmp_dict["tf_idf_features"] = [10000, 10000, 10000, 10000, 10000]

        elif len(scores) >= top_k_sents:
            # print ("stored indexes ", sorted_indexes[-5:])
            # filtered_indexes = [idx for idx in sorted_indexes[-5:] if scores[idx] > 0.05]

            # print ("filtered_indexes ", filtered_indexes)
            tmp_dict["predicted_sentences"] = itemgetter(
                *sorted_indexes[-top_k_sents:])(example["relevant_sentences"])
            tmp_dict["predicted_sentences_ids"] = [[
                sent["id"], sent["line_num"]
            ] for sent in tmp_dict["predicted_sentences"]]
            tmp_dict["predicted_sentences"] = [
                sent["sentence"] for sent in tmp_dict["predicted_sentences"]
            ]
            tmp_dict["tf_idf_features"] = sorted(scores)[-top_k_sents:]

        else:
            # if scores size is less than 5, just add extra 10000s to feed to classifier
            # print ("sorted_indexes ", sorted_indexes)
            tmp_dict["predicted_sentences"] = itemgetter(*sorted_indexes)(
                example["relevant_sentences"])
            # tmp_dict["predicted_sentences"] = example["relevant_sentences"]
            # print ("tmp dict ", tmp_dict["predicted_sentences"])

            if len(sorted_indexes) == 1:
                tmp_dict["predicted_sentences"] = [
                    tmp_dict["predicted_sentences"]
                ]
                # print ("indexes 0")

            tmp_dict["predicted_sentences_ids"] = [[
                sent["id"], sent["line_num"]
            ] for sent in tmp_dict["predicted_sentences"]]
            tmp_dict["predicted_sentences"] = [
                sent["sentence"] for sent in tmp_dict["predicted_sentences"]
            ]
            tmp_dict["tf_idf_features"] = sorted(scores) + (
                [10000] * (top_k_sents - len(sorted(scores))))

        tmp_dict["accuracy"], t_correct_evds = self.compute_score(
            tmp_dict["true_evidences"], tmp_dict["predicted_sentences_ids"])
        tmp_dict["predicted_true_positives"] = t_correct_evds
        tmp_dict["predicted_false_positives"] = len(scores) - t_correct_evds
        tmp_dict["predicted_true_negatives"] = len(false_negatives_scores)
        tmp_dict["predicted_false_negatives"] = tmp_dict[
            "predicted_true_negatives"] - len(scores) - t_correct_evds

        tmp_dict["Recall"] = self.handle_errors(
            tmp_dict["predicted_true_positives"],
            tmp_dict["actual_true_positives"])
        tmp_dict["Precision"] = self.handle_errors(
            tmp_dict["predicted_true_positives"],
            tmp_dict["predicted_false_positives"] +
            tmp_dict["predicted_true_positives"])
        tmp_dict["accuracy_formula"] = self.handle_errors(
            tmp_dict["predicted_true_positives"],
            tmp_dict["actual_true_positives"])
        tmp_dict["f1_score"] = self.handle_errors(
            2 * tmp_dict["Recall"] * tmp_dict["Precision"],
            tmp_dict["Recall"] + tmp_dict["Precision"])

        return tmp_dict

    # because multiple variables can give zeroDivisionError
    def handle_errors(self, a, b):

        try:
            z = a / b
        except:
            z = 0

        return z

    def store_features(self, sub_sampled_data, features_path, approach):

        sub_sampled_data = jsonlines.open(sub_sampled_data)
        sub_sampled_data = [example for example in sub_sampled_data]

        # for eg in sub_sampled_data:
        # 	self.count_avg_true_evidences = self.count_avg_true_evidences + len(eg["true_evidences"])
        # print ("total evidences ", self.count_avg_true_evidences)
        # print ("leng of true evidence ", self.count_avg_true_evidences/len(sub_sampled_data))

        print("len of data ", len(sub_sampled_data))
        # features = pd.DataFrame(columns=['S1','S2','S3','S4','S5', 'Label']) # similarity scores of top 5 sents
        approach = [approach] * len(sub_sampled_data)
        print("approach inside store features ", len(approach))
        pool = Pool(processes=10)

        accuracy = []
        precisions = []
        recalls = []
        f1_scores = []
        formula_acc = []

        count_accurate = 0
        with jsonlines.open(features_path, mode='w') as f:
            for tmp_dict in pool.starmap(self.store_tf_idf_results,
                                         zip(sub_sampled_data, approach)):
                # features.loc[index] = df
                # print ("dictionary ", tmp_dict)
                if tmp_dict["accuracy"] == 1.0:
                    count_accurate += 1

                accuracy.append((tmp_dict["accuracy"]))

                precisions.append((tmp_dict["Precision"]))
                recalls.append((tmp_dict["Recall"]))
                f1_scores.append((tmp_dict["f1_score"]))
                formula_acc.append((tmp_dict["accuracy_formula"]))

                f.write(tmp_dict)

        pool.close()

        print("total accurate answers ", count_accurate)

        return accuracy, precisions, recalls, f1_scores, formula_acc

    def store_tf_idf_results_wmd(self, data, f):

        count = 0
        accuracies = []
        precisions = []
        recalls = []
        f1_scores = []
        formula_acc = []

        k = 3

        for example in data:
            # print ("count ", count)

            tmp_dict = {}
            scores = []

            false_negatives_scores = []
            tmp_dict["true_evidences"] = example["true_evidences"]
            tmp_dict["actual_true_positives"] = len(example["true_evidences"])
            tmp_dict["total_sentences"] = len(example["relevant_sentences"])
            tmp_dict["actual_true_negatives"] = len(
                example["relevant_sentences"]) - len(example["true_evidences"])

            tmp_dict["id"] = example["id"]
            tmp_dict["true_label"] = example["label"]
            tmp_dict["claim"] = example["claim"]
            tmp_dict["true_evidences"] = example["true_evidences"]

            for evidence in example["relevant_sentences"]:
                _, similarity_score = self.wmd.compute_wm_distance(
                    example["claim"], evidence["sentence"])
                # print (similarity_score)
                if similarity_score != "inf" and similarity_score < 1.5:
                    # similarity_score = 4
                    scores.append(similarity_score)

                else:
                    false_negatives_scores.append(similarity_score)

        #take top 5
            sorted_indexes = np.argsort(scores)

            if len(scores) == 0:
                tmp_dict["predicted_sentences"] = example["relevant_sentences"]
                tmp_dict["predicted_sentences_ids"] = [["null", "null"]]
                tmp_dict["predicted_sentences"] = ["null"]
                # because if similarity is 4, means sentences are not similar
                tmp_dict["tf_idf_features"] = [4, 4, 4, 4, 4]

            elif len(scores) >= k:
                tmp_dict["predicted_sentences"] = itemgetter(
                    *sorted_indexes[:k])(example["relevant_sentences"])
                tmp_dict["predicted_sentences_ids"] = [[
                    sent["id"], sent["line_num"]
                ] for sent in tmp_dict["predicted_sentences"]]
                tmp_dict["predicted_sentences"] = [
                    sent["sentence"]
                    for sent in tmp_dict["predicted_sentences"]
                ]
                tmp_dict["tf_idf_features"] = sorted(scores)[:k]

# df.loc[0] = sorted(scores)[-5:] + [example["label"]]
            else:
                # if scores size is less than 5, just add extra 0s to feed to classifier
                tmp_dict["predicted_sentences"] = itemgetter(*sorted_indexes)(
                    example["relevant_sentences"])

                if len(sorted_indexes) == 1:
                    tmp_dict["predicted_sentences"] = [
                        tmp_dict["predicted_sentences"]
                    ]

                tmp_dict["predicted_sentences_ids"] = [[
                    sent["id"], sent["line_num"]
                ] for sent in tmp_dict["predicted_sentences"]]
                tmp_dict["predicted_sentences"] = [
                    sent["sentence"]
                    for sent in tmp_dict["predicted_sentences"]
                ]
                tmp_dict["tf_idf_features"] = sorted(scores) + (
                    [0] * (k - len(sorted(scores))))

            tmp_dict["accuracy"], t_correct_evds = self.compute_score(
                tmp_dict["true_evidences"],
                tmp_dict["predicted_sentences_ids"])

            tmp_dict["predicted_true_positives"] = t_correct_evds
            tmp_dict["predicted_false_positives"] = len(
                scores) - t_correct_evds
            tmp_dict["predicted_true_negatives"] = len(false_negatives_scores)
            tmp_dict["predicted_false_negatives"] = tmp_dict[
                "predicted_true_negatives"] - len(scores) - t_correct_evds

            tmp_dict["Recall"] = self.handle_errors(
                tmp_dict["predicted_true_positives"],
                tmp_dict["actual_true_positives"])
            tmp_dict["Precision"] = self.handle_errors(
                tmp_dict["predicted_true_positives"],
                tmp_dict["predicted_false_positives"] +
                tmp_dict["predicted_true_positives"])
            tmp_dict["accuracy_formula"] = self.handle_errors(
                tmp_dict["predicted_true_positives"],
                tmp_dict["actual_true_positives"])
            tmp_dict["f1_score"] = self.handle_errors(
                2 * tmp_dict["Recall"] * tmp_dict["Precision"],
                tmp_dict["Recall"] + tmp_dict["Precision"])

            accuracies.append(tmp_dict["accuracy"])
            precisions.append(tmp_dict["Precision"])
            recalls.append(tmp_dict["Recall"])
            f1_scores.append(tmp_dict["f1_score"])
            formula_acc.append(tmp_dict["accuracy_formula"])

            f.write(tmp_dict)
            count += 1

        return accuracies, precisions, recalls, f1_scores, formula_acc

    def store_features_wmd(self, sub_sampled_data, features_path, approach):

        sub_sampled_data = jsonlines.open(sub_sampled_data)
        sub_sampled_data = [example for example in sub_sampled_data]
        print("len of data ", len(sub_sampled_data))

        with jsonlines.open(features_path, mode='w') as f:
            accuracies, precisions, recalls, f1_scores, formula_acc = self.store_tf_idf_results_wmd(
                sub_sampled_data, f)

        return accuracies, precisions, recalls, f1_scores, formula_acc

    '''
	preprocess data returns features and labels that are used
	to train classifier
	'''

    def preprocess_data(self, dataset):

        print("processing the dataset ")

        features = []
        labels = []
        features = pd.DataFrame(columns=['s1', 's2', 's3', 's4', 's5'])

        with jsonlines.open(dataset, mode='r') as f:
            for (index, example) in enumerate(f):
                features.loc[index] = example['tf_idf_features']
                if example['true_label'] == 'SUPPORTS':
                    labels.append(0)
                elif example['true_label'] == 'REFUTES':
                    labels.append(1)
                # NOT ENOUGH INFO
                else:
                    labels.append(2)

        labels = np.array(labels)
        print("feature shape ", len(features))
        print("labels shape ", labels.shape)

        return (features, labels)

    # count shows how many evidences were predicted correctly
    def compute_score(self, true_labels, pred_labels):

        count = 0
        for true_evd_set in true_labels:
            if true_evd_set in pred_labels:
                count += 1

        return (count / len(true_labels)), count

    '''
	evaluate clf 
	'''

    def evaluate_clf(self, x, y_true, model):

        y_pred = model.predict(x)
        print(
            "score of tfidf ",
            precision_recall_fscore_support(y_true, y_pred,
                                            average='weighted'))
        print("accuracy score ", accuracy_score(y_true, y_pred))

    def train_clf(self, X, Y, path):

        print("inside classifier")
        clf = RandomForestClassifier()
        clf.fit(X, Y)

        joblib.dump(clf, path)
        print("model saved")
示例#6
0
class featureCore:
    def __init__(self, task):

        self.task = task
        self.tfidf = TFIDF()
        self.vs = VectorSpace()
        self.wmd = wordMoverDistance()

    def get_tf_idf_score(self, list_of_defactoNlps):

        # print ("nlp mdoes ", list_of_defactoNlps)
        if self.task == 'bin-classification-fever' or self.task == 'bin-classification-google':

            for model in list_of_defactoNlps:
                relevant_sentence, score = self.tfidf.apply_tf_idf(
                    model.claim, model.sentences)
                #0.2
                # bin-classification-fever, 1 represents sup, ref and 0 represents nei
                # bin-classification-google classes are binary
                # 0 represents supports, 1 represents refutes
                if score >= 0.1:
                    model.method_name["tfidf"] = {self.task: {"pred_label": 0}}

                else:
                    model.method_name["tfidf"] = {self.task: {"pred_label": 1}}

        #for fever-3 and google dataset
        # self.task == 'tri-classification-fever' or self.task == 'bin-classification-google':
        else:
            for model in list_of_defactoNlps:
                relevant_sentence, score = self.tfidf.apply_tf_idf(
                    model.claim, model.sentences)

                # classification: > 0.2 --> Yes, < 0.2 --> NEI
                # Detection: score > 0.6 --> Support, 0.2 < score < 0.6 --> Refutes, NEI < 0.2
                if score >= 0.2:
                    #detection
                    # Supports
                    if score > 0.3:
                        # print ("score > 0.7")
                        model.method_name["tfidf"] = {
                            self.task: {
                                "pred_label": 0
                            }
                        }
                    # refutes
                    else:  # REFUTES
                        # print ("score < 0.7")
                        model.method_name["tfidf"] = {
                            self.task: {
                                "pred_label": 1
                            }
                        }
                #label as NEI
                else:
                    # print ("score < 0.05")
                    model.method_name["tfidf"] = {self.task: {"pred_label": 2}}

        return list_of_defactoNlps

    def get_vector_space_score(self, list_of_defactoNlps):

        if self.task == 'bin-classification-fever' or self.task == 'bin-classification-google':
            for model in list_of_defactoNlps:

                relevant_sentence, vector_space_score = self.vs.apply_vector_space(
                    model.claim, model.sentences)
                # bin-classification-fever, 1 represents sup, ref and 0 represents nei
                # bin-classification-google classes are binary
                # 0 represents supports, 1 represents refutes
                if vector_space_score >= 0.2:
                    model.method_name["vspace"] = {
                        self.task: {
                            "pred_label": 0
                        }
                    }

                else:
                    model.method_name["vspace"] = {
                        self.task: {
                            "pred_label": 1
                        }
                    }
        #for fever-3 and google dataset

        else:

            # print ("inside else of vector_space")
            for model in list_of_defactoNlps:
                relevant_sentence, vector_space_score = self.vs.apply_vector_space(
                    model.claim, model.sentences)
                # classification: > 0.2 --> Yes, < 0.2 --> NEI
                # Detection: vector_space_score > 0.6 --> Support, 0.2 < vector_space_score < 0.6 --> Refutes, NEI < 0.2
                if vector_space_score >= 0.2:
                    #detection
                    # Supports
                    if vector_space_score > 0.3:
                        model.method_name["vspace"] = {
                            self.task: {
                                "pred_label": 0
                            }
                        }
                    # refutes
                    else:  # REFUTES
                        model.method_name["vspace"] = {
                            self.task: {
                                "pred_label": 1
                            }
                        }
                #label as NEI
                else:
                    model.method_name["vspace"] = {
                        self.task: {
                            "pred_label": 2
                        }
                    }

        return list_of_defactoNlps

    def get_wmd_score(self, list_of_defactoNlps):

        # print ("nlp mdoes ", list_of_defactoNlps)
        wmd_score = 0
        if self.task == 'bin-classification-fever' or self.task == 'bin-classification-google':
            # print ("inside wmd score ")
            for model in list_of_defactoNlps:
                relevant_sentence, wmd_score = self.wmd.compute_wm_distance(
                    model.claim, model.sentences)
                #in bin-classification google : 0 represents suports, 1 represents refutes
                if wmd_score < 0.8:

                    model.method_name["wmd"] = {self.task: {"pred_label": 0}}

                else:
                    model.method_name["wmd"] = {self.task: {"pred_label": 1}}
        #for fever-3 and google dataset

        else:
            for model in list_of_defactoNlps:
                relevant_sentence, wmd_score = self.wmd.compute_wm_distance(
                    model.claim, model.sentences)
                # classification: > 0.2 --> Yes, < 0.2 --> NEI
                # Detection: wmd_score > 0.6 --> Support, 0.2 < wmd_score < 0.6 --> Refutes, NEI < 0.2
                if wmd_score <= 2.2:
                    #detection
                    # Supports
                    if wmd_score < 1:
                        model.method_name["wmd"] = {
                            self.task: {
                                "pred_label": 0
                            }
                        }
                    # refutes
                    else:  # REFUTES
                        model.method_name["wmd"] = {
                            self.task: {
                                "pred_label": 1
                            }
                        }
                #label as NEI
                else:
                    model.method_name["wmd"] = {self.task: {"pred_label": 2}}

        return list_of_defactoNlps
示例#7
0
class SentenceClassifier:
    def __init__(self):

        self.tfidf = TFIDF()
        self.vs = VectorSpace()
        self.wmd = wordMoverDistance()
        self.count_avg_true_evidences = 0

    def store_tf_idf_results(self, example, approach):

        tmp_dict = {}
        scores = []

        tmp_dict["id"] = example["id"]
        # tmp_dict["true_label"] = example["label"]
        tmp_dict["claim"] = example["claim"]
        top_k_sents = 5
        false_negatives_scores = []
        # tmp_dict["true_evidences"] = example["true_evidences"]
        # tmp_dict["actual_true_positives"] = len(example["true_evidences"])
        # tmp_dict["total_sentences"] = len(example["relevant_sentences"])
        # tmp_dict["actual_true_negatives"] = len(example["relevant_sentences"]) - len(example["true_evidences"])

        for evidence in example["relevant_sentences"]:

            if approach == "tfidf":
                _, similarity_score = self.tfidf.apply_tf_idf(
                    example["claim"], evidence["sentence"])

                # print ("similarity_score ", similarity_score)
                # print ("\n")
                if similarity_score > 0.2:
                    # print (" scores != 0", similarity_score)
                    scores.append(similarity_score)

            elif approach == "vs":
                _, similarity_score = self.vs.apply_vector_space(
                    example["claim"], evidence["sentence"])

                # print ("similarity_score ", similarity_score)
                if similarity_score > 0.2:
                    scores.append(similarity_score)

            else:
                print("no approach matched")

        #take top k
        sorted_indexes = np.argsort(scores)
        filtered_indexes = []

        if len(scores) == 0:
            # if score is 0, means there is no related sentence
            # tmp_dict["predicted_sentences"] = "null"
            tmp_dict["predicted_sentences_ids"] = [["null", "null"]]
            tmp_dict["predicted_sentences"] = ["null"]
            # just fill random value
            tmp_dict["tf_idf_features"] = [10000, 10000, 10000, 10000, 10000]

        elif len(scores) >= top_k_sents:
            # print ("stored indexes ", sorted_indexes[-5:])
            # filtered_indexes = [idx for idx in sorted_indexes[-5:] if scores[idx] > 0.05]

            # print ("filtered_indexes ", filtered_indexes)
            tmp_dict["predicted_sentences"] = itemgetter(
                *sorted_indexes[-top_k_sents:])(example["relevant_sentences"])
            tmp_dict["predicted_sentences_ids"] = [[
                sent["id"], sent["line_num"]
            ] for sent in tmp_dict["predicted_sentences"]]
            tmp_dict["predicted_sentences"] = [
                sent["sentence"] for sent in tmp_dict["predicted_sentences"]
            ]
            tmp_dict["tf_idf_features"] = sorted(scores)[-top_k_sents:]

        else:
            # if scores size is less than 5, just add extra 10000s to feed to classifier
            # print ("sorted_indexes ", sorted_indexes)
            tmp_dict["predicted_sentences"] = itemgetter(*sorted_indexes)(
                example["relevant_sentences"])
            # tmp_dict["predicted_sentences"] = example["relevant_sentences"]
            # print ("tmp dict ", tmp_dict["predicted_sentences"])

            if len(sorted_indexes) == 1:
                tmp_dict["predicted_sentences"] = [
                    tmp_dict["predicted_sentences"]
                ]
                # print ("indexes 0")

            tmp_dict["predicted_sentences_ids"] = [[
                sent["id"], sent["line_num"]
            ] for sent in tmp_dict["predicted_sentences"]]
            tmp_dict["predicted_sentences"] = [
                sent["sentence"] for sent in tmp_dict["predicted_sentences"]
            ]
            tmp_dict["tf_idf_features"] = sorted(scores) + (
                [10000] * (top_k_sents - len(sorted(scores))))

        # tmp_dict["accuracy"], t_correct_evds = self.compute_score(tmp_dict["true_evidences"], tmp_dict["predicted_sentences_ids"])
        # tmp_dict["predicted_true_positives"] = t_correct_evds
        # tmp_dict["predicted_false_positives"] = len(scores) - t_correct_evds
        # tmp_dict["predicted_true_negatives"] = len(false_negatives_scores)
        # tmp_dict["predicted_false_negatives"] = tmp_dict["predicted_true_negatives"] - len(scores) - t_correct_evds

        # tmp_dict["Recall"] = self.handle_errors(tmp_dict["predicted_true_positives"], tmp_dict["actual_true_positives"])
        # tmp_dict["Precision"] = self.handle_errors(tmp_dict["predicted_true_positives"], tmp_dict["predicted_false_positives"] + tmp_dict["predicted_true_positives"])
        # tmp_dict["accuracy_formula"] = self.handle_errors(tmp_dict["predicted_true_positives"], tmp_dict["actual_true_positives"])
        # tmp_dict["f1_score"] = self.handle_errors(2 * tmp_dict["Recall"] * tmp_dict["Precision"], tmp_dict["Recall"] + tmp_dict["Precision"])

        return tmp_dict

    # because multiple variables can give zeroDivisionError
    def handle_errors(self, a, b):

        try:
            z = a / b
        except:
            z = 0

        return z

    def store_features(self, sub_sampled_data, features_path, approach):

        sub_sampled_data = jsonlines.open(sub_sampled_data)
        sub_sampled_data = [example for example in sub_sampled_data]

        print("len of data ", len(sub_sampled_data))
        # features = pd.DataFrame(columns=['S1','S2','S3','S4','S5', 'Label']) # similarity scores of top 5 sents
        approach = [approach] * len(sub_sampled_data)
        print("approach inside store features ", len(approach))
        pool = Pool(processes=10)

        accuracy = []
        # precisions = []
        # recalls = []
        # f1_scores = []
        # formula_acc = []

        with jsonlines.open(features_path, mode='w') as f:
            for tmp_dict in pool.starmap(self.store_tf_idf_results,
                                         zip(sub_sampled_data, approach)):
                # features.loc[index] = df
                # print ("dictionary ", tmp_dict)
                # accuracy.append((tmp_dict["accuracy"]))
                # precisions.append((tmp_dict["Precision"]))
                # recalls.append((tmp_dict["Recall"]))
                # f1_scores.append((tmp_dict["f1_score"]))
                # formula_acc.append((tmp_dict["accuracy_formula"]))

                f.write(tmp_dict)

        pool.close()

    def store_tf_idf_results_wmd(self, data, f):

        count = 0
        # accuracies = []
        # precisions = []
        # recalls = []
        # f1_scores = []
        # formula_acc = []

        k = 5

        for example in data:
            print("count ", count)

            tmp_dict = {}
            scores = []

            false_negatives_scores = []
            # tmp_dict["true_evidences"] = example["true_evidences"]
            # tmp_dict["actual_true_positives"] = len(example["true_evidences"])
            # tmp_dict["total_sentences"] = len(example["relevant_sentences"])
            # tmp_dict["actual_true_negatives"] = len(example["relevant_sentences"]) - len(example["true_evidences"])

            tmp_dict["id"] = example["id"]
            # tmp_dict["true_label"] = example["label"]
            tmp_dict["claim"] = example["claim"]
            # tmp_dict["true_evidences"] = example["true_evidences"]

            for evidence in example["relevant_sentences"]:
                _, similarity_score = self.wmd.compute_wm_distance(
                    example["claim"], evidence["sentence"])
                # print (similarity_score)
                if similarity_score != "inf" and similarity_score < 1.5:
                    # similarity_score = 4
                    scores.append(similarity_score)

                else:
                    false_negatives_scores.append(similarity_score)

        #take top 5
            sorted_indexes = np.argsort(scores)

            if len(scores) == 0:
                # tmp_dict["predicted_sentences"] = example["relevant_sentences"]
                tmp_dict["predicted_sentences_ids"] = [["null", "null"]]
                tmp_dict["predicted_sentences"] = ["null"]
                # because if similarity is 4, means sentences are not similar
                tmp_dict["tf_idf_features"] = [4, 4, 4, 4, 4]

            elif len(scores) >= k:
                tmp_dict["predicted_sentences"] = itemgetter(
                    *sorted_indexes[:k])(example["relevant_sentences"])
                tmp_dict["predicted_sentences_ids"] = [[
                    sent["id"], sent["line_num"]
                ] for sent in tmp_dict["predicted_sentences"]]
                tmp_dict["predicted_sentences"] = [
                    sent["sentence"]
                    for sent in tmp_dict["predicted_sentences"]
                ]
                tmp_dict["tf_idf_features"] = sorted(scores)[:k]

# df.loc[0] = sorted(scores)[-5:] + [example["label"]]
            else:
                # if scores size is less than 5, just add extra 0s to feed to classifier
                tmp_dict["predicted_sentences"] = itemgetter(*sorted_indexes)(
                    example["relevant_sentences"])

                if len(sorted_indexes) == 1:
                    tmp_dict["predicted_sentences"] = [
                        tmp_dict["predicted_sentences"]
                    ]

                tmp_dict["predicted_sentences_ids"] = [[
                    sent["id"], sent["line_num"]
                ] for sent in tmp_dict["predicted_sentences"]]
                tmp_dict["predicted_sentences"] = [
                    sent["sentence"]
                    for sent in tmp_dict["predicted_sentences"]
                ]
                tmp_dict["tf_idf_features"] = sorted(scores) + (
                    [0] * (k - len(sorted(scores))))

            # tmp_dict["accuracy"], t_correct_evds = self.compute_score(tmp_dict["true_evidences"], tmp_dict["predicted_sentences_ids"])

            # tmp_dict["predicted_true_positives"] = t_correct_evds
            # tmp_dict["predicted_false_positives"] = len(scores) - t_correct_evds
            # tmp_dict["predicted_true_negatives"] = len(false_negatives_scores)
            # tmp_dict["predicted_false_negatives"] = tmp_dict["predicted_true_negatives"] - len(scores) - t_correct_evds

            # tmp_dict["Recall"] = self.handle_errors(tmp_dict["predicted_true_positives"], tmp_dict["actual_true_positives"])
            # tmp_dict["Precision"] = self.handle_errors(tmp_dict["predicted_true_positives"], tmp_dict["predicted_false_positives"] + tmp_dict["predicted_true_positives"])
            # tmp_dict["accuracy_formula"] = self.handle_errors(tmp_dict["predicted_true_positives"], tmp_dict["actual_true_positives"])
            # tmp_dict["f1_score"] = self.handle_errors(2 * tmp_dict["Recall"] * tmp_dict["Precision"], tmp_dict["Recall"] + tmp_dict["Precision"])

            # accuracies.append(tmp_dict["accuracy"])
            # precisions.append(tmp_dict["Precision"])
            # recalls.append(tmp_dict["Recall"])
            # f1_scores.append(tmp_dict["f1_score"])
            # formula_acc.append(tmp_dict["accuracy_formula"])

            f.write(tmp_dict)
            count += 1

    def store_features_wmd(self, sub_sampled_data, features_path, approach):

        sub_sampled_data = jsonlines.open(sub_sampled_data)
        sub_sampled_data = [example for example in sub_sampled_data]
        print("len of data ", len(sub_sampled_data))

        with jsonlines.open(features_path, mode='w') as f:
            self.store_tf_idf_results_wmd(sub_sampled_data, f)

    # count shows how many evidences were predicted correctly
    def compute_score(self, true_labels, pred_labels):

        count = 0
        for true_evd_set in true_labels:
            if true_evd_set in pred_labels:
                count += 1

        return (count / len(true_labels)), count
示例#8
0
class TestModels:
    def __init__(self, approach, task):

        self.rf_classifier = TrainClassifier()
        self.approach = approach

        if task == "test":
            # self.sr_output = "./data/fever-full/classifier_results/shared_dev_true_docs_"+str(approach)+"_features_new.jsonl"
            # self.sr_output = "./data/fever-full/classifier_results/shared_dev_true_docs_"+str(approach)+"_features_new_k_5.jsonl"
            self.sr_output = "./data/fever-full/classifier_results/shared_dev_true_docs_" + str(
                approach) + "_features_new_k_5_recall.jsonl"
        else:
            # self.sr_output = "./data/fever-full/classifier_results/subsample_train_true_docs_"+str(approach)+"_features_new.jsonl"
            # self.sr_output = "./data/fever-full/classifier_results/subsample_train_true_docs_"+str(approach)+"_features_new_k_5.jsonl"
            self.sr_output = "./data/fever-full/classifier_results/subsample_train_true_docs_" + str(
                approach) + "_features_new_k_5_recall.jsonl"

        self.tfidf = TFIDF()
        self.vs = VectorSpace()
        self.wmd = wordMoverDistance()

    # load sentence retrieval results
    # find claim classifier results
    def load_sr_results(self, cc_results_path):

        read_file = jsonlines.open(self.sr_output, mode="r")
        #
        with jsonlines.open(cc_results_path, mode="w") as f:

            for example in read_file:

                tmp_dict = {}
                tmp_dict["id"] = example["id"]

                if example["true_label"] == "SUPPORTS":
                    tmp_dict["label"] = "SUPPORTS"
                    # tmp_dict["label"] = 1

                elif example["true_label"] == "REFUTES":
                    tmp_dict["label"] = "REFUTES"
                    # tmp_dict["label"] = 2

                else:
                    tmp_dict["label"] = "Not Enough Info"
                    # tmp_dict["label"] = 0

                tmp_dict["claim"] = example["claim"]
                predicted_labels = []

                for sent in example["predicted_sentences"]:

                    if self.approach == "tfidf":

                        _, similarity_score = self.tfidf.apply_tf_idf(
                            example["claim"], sent)

                        #support
                        if similarity_score >= 0.5:
                            predicted_labels.append("SUPPORTS")
                            # predicted_labels.append(1)

                        #refute
                        elif similarity_score >= 0.15 and similarity_score < 0.5:
                            predicted_labels.append("REFUTES")

                            # predicted_labels.append(2)
                        #NEI
                        else:
                            predicted_labels.append("Not Enough Info")
                            # predicted_labels.append(0)

                    elif self.approach == "vs":

                        _, similarity_score = self.vs.apply_vector_space(
                            example["claim"], sent)

                        #support
                        if similarity_score >= 0.5:
                            predicted_labels.append("SUPPORTS")
                            # predicted_labels.append(1)

                        #refuute
                        elif similarity_score >= 0.15 and similarity_score < 0.5:
                            predicted_labels.append("REFUTES")
                            # predicted_labels.append(2)
                        #NEI
                        else:
                            predicted_labels.append("Not Enough Info")
                            # predicted_labels.append(0)

                    elif approach == "wmd":

                        _, similarity_score = self.wmd.compute_wm_distance(
                            example["claim"], sent)

                        # print ("similarity_score ", similarity_score)
                        #support
                        # 0.4
                        if similarity_score <= 0.4:
                            predicted_labels.append("SUPPORTS")
                            # predicted_labels.append(1)

                        #refute
                        # similarity_score > 0.4 and similarity_score <=0.9
                        elif similarity_score > 0.4 and similarity_score <= 0.9:
                            predicted_labels.append("REFUTES")
                            # predicted_labels.append(2)
                        #NEI
                        else:
                            predicted_labels.append("Not Enough Info")
                            # predicted_labels.append(0)

                tmp_dict["claims_labels"] = predicted_labels
                tmp_dict["true_evidence"] = example["true_evidences"]
                tmp_dict["predicted_evidence"] = example[
                    "predicted_sentences_ids"]

                f.write(tmp_dict)

    # load claim classification (cc) result
    # cc_results_path is cc results stored
    # final results will path where final label for each label will be stored
    # Save final label based on voting
    def predict_final_label(self, cc_results_path, final_results_path):

        cc_results = jsonlines.open(cc_results_path, mode='r')

        possible_labels = ["Not Enough Info", "SUPPORTS", "REFUTES"]

        with jsonlines.open(final_results_path, mode='w') as f:

            for example in cc_results:
                tmp_dict = {}
                tmp_dict["id"] = example["id"]
                tmp_dict["label"] = example["label"]
                tmp_dict["claim"] = example["claim"]
                # tmp_dict["cc_labels"] = example["claims_labels"]
                tmp_dict["evidence"] = example["true_evidence"]
                tmp_dict["predicted_evidence"] = example["predicted_evidence"]
                count_s_labels = 0
                count_r_labels = 0
                count_nei_labels = 0

                for cc_labels in example["claims_labels"]:
                    # if cc_labels == 1:
                    if cc_labels == "SUPPORTS":
                        count_s_labels += 1
                    # elif cc_labels == 2:
                    elif cc_labels == "REFUTES":
                        count_r_labels += 1
                    else:
                        count_nei_labels += 1

                if count_s_labels > count_r_labels:
                    tmp_dict["predicted_label"] = "SUPPORTS"
                    # tmp_dict["predicted_label"] = 1

                elif count_s_labels == count_r_labels and count_nei_labels == 0:
                    random_number = random.randint(1, 2)
                    tmp_dict["predicted_label"] = possible_labels[
                        random_number]
                    # tmp_dict["predicted_label"] = random_number

                elif count_s_labels > 0 and count_r_labels > 0 and count_s_labels == count_r_labels and count_nei_labels > 0:
                    random_number = random.randint(1, 2)
                    tmp_dict["predicted_label"] = possible_labels[
                        random_number]
                    # tmp_dict["predicted_label"] = random_number

                elif count_s_labels == 0 and count_r_labels == 0 and count_nei_labels > 0:
                    tmp_dict["predicted_label"] = "Not Enough Info"
                    # tmp_dict["predicted_label"] = 0

                elif count_r_labels > count_s_labels:
                    tmp_dict["predicted_label"] = "REFUTES"
                    # tmp_dict["predicted_label"] = 2

                f.write(tmp_dict)

    def compute_score(self, dataset):

        with jsonlines.open(dataset, mode='r') as f:

            true_labels = []
            pred_labels = []

            for example in f:

                true_labels.append(example["label"])
                pred_labels.append(example["predicted_label"])

            true_labels = np.array(true_labels)
            pred_labels = np.array(pred_labels)

        print(
            "score of " + self.approach,
            precision_recall_fscore_support(true_labels,
                                            pred_labels,
                                            average='weighted'))
        print("accuracy score ", accuracy_score(true_labels, pred_labels))