Пример #1
0
def preprocess_before_running_model(source_file_, facts_file_,
                                    confidence_value_computation_info_dir_,
                                    dataitem_index_file_, g_):
    # load source information
    header = False  # dictionary with the original trustworthiness
    T_actual_ = utils_dataset.load_sources_info(source_file_, header)
    T_ = utils_dataset.load_sources_info(source_file_, header)
    print(str(len(T_)) + " sources loaded")
    # load fact information
    header = True
    sources_dataItemValues_ = utils_dataset.load_facts(facts_file_, header)
    # load data item set
    D_ = list(sources_dataItemValues_.keys())

    # compute (1) all the facts that are claimed by a source and (2) all the sources that claim a specific fact
    # (1) set of facts that are claimed by a specific source < key = source id, value = set of facts (dataitem + value) >
    # (2) all the sources that claim a specific fact <key = dataitem + value, value = set of source ids>
    print("Fact loading")
    fact_and_source_info_ = utils_dataset.load_fact_and_source_info(
        sources_dataItemValues_)
    F_s_ = fact_and_source_info_[0]
    S_ = fact_and_source_info_[1]

    print("Computing sources for " + str(len(sources_dataItemValues_)) +
          " data items FOR COMPUTATION PURPOSE")
    if not (len(os.listdir(confidence_value_computation_info_dir_))
            == len(D_)):
        # compute the files for belief propagation information
        print("graph nodes " + str(len(g_.nodes)))
        print("LENGH source data item values" +
              str(len(sources_dataItemValues_.values())))
        res = utils_taxonomy.create_value_info_computation(
            g_, sources_dataItemValues_, dataitem_index_file_,
            confidence_value_computation_info_dir_)
        sources_dataItemValues_.clear()
        header = True
        sources_dataItemValues_ = utils_dataset.load_facts(facts_file_, header)
        if res:
            print("Computation DONE")
    # else: the files for contained the info for the belief propagation have been already computed
    # then load the relative dataitem id for using the files
    dataitem_ids_ = utils_dataset.load_dataitem_ids(dataitem_index_file_)
    # load the information
    dataitem_values_info_ = utils_dataset.load_all_dataitem_values_confidence_infos_low_memory(
        dataitem_ids_, confidence_value_computation_info_dir_,
        sources_dataItemValues_)
    # S_prop is a dictionary contained for each fact all the sources that it has to take into account for leveraging the belief propagation framework
    S_prop_ = dataitem_values_info_[2]
    app_conf_dict_ = dataitem_values_info_[3]
    app_source_dict_ = dataitem_values_info_[4]
    # delete folder confidence_value_computation_info_dir_ and the relative index file
    #shutil.rmtree(confidence_value_computation_info_dir_)
    # os.remove(dataitem_index_file_)

    return [
        T_, T_actual_, sources_dataItemValues_, D_, F_s_, S_, S_prop_,
        app_conf_dict_, app_source_dict_
    ]
Пример #2
0
def preprocess_before_running_model_only_trad(source_file_, facts_file_):
    # load source information
    header = False  # dictionary with the original trustworthiness
    T_ = utils_dataset.load_sources_info(source_file_, header)
    print(str(len(T_)) + " sources loaded")
    # load fact information
    header = True
    sources_dataItemValues_ = utils_dataset.load_facts(facts_file_, header)
    # load data item set

    # compute (1) all the facts that are claimed by a source and (2) all the sources that claim a specific fact
    # (1) set of facts that are claimed by a specific source < key = source id, value = set of facts (dataitem + value) >
    # (2) all the sources that claim a specific fact <key = dataitem + value, value = set of source ids>
    print("Fact loading")
    fact_and_source_info_ = utils_dataset.load_fact_and_source_info(
        sources_dataItemValues_)
    F_s_ = fact_and_source_info_[0]
    S_ = fact_and_source_info_[1]

    sources_dataItemValues_.clear()
    header = True
    sources_dataItemValues_ = utils_dataset.load_facts(facts_file_, header)

    return [T_, sources_dataItemValues_, F_s_, S_]
Пример #3
0
def analysis_trustworthiness_estimations(dataset_dir, dir_path,
                                         output_error_rate_dir):

    if not os.path.exists(output_error_rate_dir):
        os.makedirs(output_error_rate_dir)

    for root, dirs, files in os.walk(dir_path):
        if ["EXP", "LOW_E", "UNI"] == dirs: continue
        if ["EXP"] == dirs: continue
        if ["UNI"] == dirs: continue
        if ["LOW_E"] == dirs: continue
        if root.count('/') != 3: continue

        for dir_name in dirs:
            #print("dir name :" + str(dir_name))
            dir_name = dir_name.replace("dataset", "")

            n_dataset = dir_name
            if "-" in n_dataset:
                n_dataset = n_dataset.replace("UNI-", "")
                n_dataset = n_dataset.replace("EXP-", "")
                n_dataset = n_dataset.replace("LOW_E-", "")
                n_dataset = n_dataset.replace("-", "")
                n_folder_app = "-" + str(n_dataset[0:2])
                n_dataset = n_dataset[2:]
                n_folder_app = n_folder_app + n_dataset

            else:
                n_dataset = n_dataset.replace("UNI_", "")
                n_dataset = n_dataset.replace("EXP_", "")
                n_dataset = n_dataset.replace("LOW_E_", "")
                n_dataset = n_dataset.replace("_", "")
                n_folder_app = "_"
                n_folder_app = n_folder_app + n_dataset

            # facts file path
            if "UNI" in root:
                subfolder_path_results = "UNI/"
                subfolder_path = "UNI/dataset" + str(n_folder_app) + "/"
                source_file = dataset_dir + "/UNI/dataset" + str(
                    n_folder_app) + "/Output_acc_" + str(n_dataset) + ".txt"

            if "EXP" in root:
                subfolder_path_results = "EXP/"
                subfolder_path = "EXP/dataset" + str(n_folder_app) + "/"
                source_file = dataset_dir + "/EXP/dataset" + str(
                    n_folder_app) + "/Output_acc_" + str(n_dataset) + ".txt"

            if "LOW_E" in root:
                subfolder_path_results = "LOW_E/"
                subfolder_path = "LOW_E/dataset" + str(n_folder_app) + "/"
                source_file = dataset_dir + "/LOW_E/dataset" + str(
                    n_folder_app) + "/Output_acc_" + str(n_dataset) + ".txt"

            output_estimations = os.path.join(output_estimations_main_dir,
                                              subfolder_path)
            output_error_rate = os.path.join(output_error_rate_dir,
                                             subfolder_path_results)
            if not os.path.exists(output_error_rate):
                os.makedirs(output_error_rate)

            output_file_path = output_error_rate + "/trust_error_rate_dataset_" + str(
                n_folder_app) + ".csv"

            header = False  # dictionary with the original trustworthiness
            T_actual = utils_dataset.load_sources_info(source_file, header)

            trust_file_trad = os.path.join(
                output_estimations,
                "trad_est_trust_" + str(n_dataset) + ".csv")
            trust_trad = utils_dataset.read_trust_estimation_file(
                trust_file_trad)
            error_rate_trad = compute_error_rate_for_trustworthiness(
                T_actual, trust_trad)

            # adapted model 1 (only confidence) results files and dir
            trust_file_adapt = os.path.join(
                output_estimations,
                "adapt_est_trust_" + str(n_dataset) + ".csv")
            trust_adapt = utils_dataset.read_trust_estimation_file(
                trust_file_adapt)
            error_rate_adapt = compute_error_rate_for_trustworthiness(
                T_actual, trust_adapt)

            utils_writing_results.writing_trustworthiness_error_rate_file(
                output_file_path, error_rate_trad, error_rate_adapt)

    return True
Пример #4
0
			if AdaptedSums_and_Rules_flag:
				res_list = preprocessing_sums_model.preprocess_before_running_model(source_file, facts_file,
																					confidence_value_computation_info_dir,
																					dataitem_index_file,
																					g)
				T = res_list[0]
				T_actual = res_list[1]
				sources_dataItemValues = res_list[2]
				D = res_list[3]
				F_s = res_list[4]
				S = res_list[5]
				S_prop = res_list[6]
				app_conf_dict = res_list[7]

			header = False  # original trustworthiness file
			T_actual = utils_dataset.load_sources_info(source_file, header)

			header = False # load trustworthiness of sources
			T = utils_dataset.load_sources_info(source_file, header)
			print(str(len(T)) + " sources loaded")
			S_set = list(T.keys())

			# load facts
			header = True
			sources_dataItemValues = utils_dataset.load_facts(facts_file, header)
			D = list(sources_dataItemValues.keys())

			# compute (1) all the facts that are claimed by a source and (2) all the sources that claim a specific fact
			# (1) set of facts that are claimed by a specific source < key = source id, value = set of facts (dataitem + value) >
			# (2) all the sources that claim a specific fact <key = dataitem + value, value = set of source ids>
			print("Fact loading")