def preprocess_before_running_model(source_file_, facts_file_, confidence_value_computation_info_dir_, dataitem_index_file_, g_): # load source information header = False # dictionary with the original trustworthiness T_actual_ = utils_dataset.load_sources_info(source_file_, header) T_ = utils_dataset.load_sources_info(source_file_, header) print(str(len(T_)) + " sources loaded") # load fact information header = True sources_dataItemValues_ = utils_dataset.load_facts(facts_file_, header) # load data item set D_ = list(sources_dataItemValues_.keys()) # compute (1) all the facts that are claimed by a source and (2) all the sources that claim a specific fact # (1) set of facts that are claimed by a specific source < key = source id, value = set of facts (dataitem + value) > # (2) all the sources that claim a specific fact <key = dataitem + value, value = set of source ids> print("Fact loading") fact_and_source_info_ = utils_dataset.load_fact_and_source_info( sources_dataItemValues_) F_s_ = fact_and_source_info_[0] S_ = fact_and_source_info_[1] print("Computing sources for " + str(len(sources_dataItemValues_)) + " data items FOR COMPUTATION PURPOSE") if not (len(os.listdir(confidence_value_computation_info_dir_)) == len(D_)): # compute the files for belief propagation information print("graph nodes " + str(len(g_.nodes))) print("LENGH source data item values" + str(len(sources_dataItemValues_.values()))) res = utils_taxonomy.create_value_info_computation( g_, sources_dataItemValues_, dataitem_index_file_, confidence_value_computation_info_dir_) sources_dataItemValues_.clear() header = True sources_dataItemValues_ = utils_dataset.load_facts(facts_file_, header) if res: print("Computation DONE") # else: the files for contained the info for the belief propagation have been already computed # then load the relative dataitem id for using the files dataitem_ids_ = utils_dataset.load_dataitem_ids(dataitem_index_file_) # load the information dataitem_values_info_ = utils_dataset.load_all_dataitem_values_confidence_infos_low_memory( dataitem_ids_, confidence_value_computation_info_dir_, sources_dataItemValues_) # S_prop is a dictionary contained for each fact all the sources that it has to take into account for leveraging the belief propagation framework S_prop_ = dataitem_values_info_[2] app_conf_dict_ = dataitem_values_info_[3] app_source_dict_ = dataitem_values_info_[4] # delete folder confidence_value_computation_info_dir_ and the relative index file #shutil.rmtree(confidence_value_computation_info_dir_) # os.remove(dataitem_index_file_) return [ T_, T_actual_, sources_dataItemValues_, D_, F_s_, S_, S_prop_, app_conf_dict_, app_source_dict_ ]
def preprocess_before_running_model_only_trad(source_file_, facts_file_): # load source information header = False # dictionary with the original trustworthiness T_ = utils_dataset.load_sources_info(source_file_, header) print(str(len(T_)) + " sources loaded") # load fact information header = True sources_dataItemValues_ = utils_dataset.load_facts(facts_file_, header) # load data item set # compute (1) all the facts that are claimed by a source and (2) all the sources that claim a specific fact # (1) set of facts that are claimed by a specific source < key = source id, value = set of facts (dataitem + value) > # (2) all the sources that claim a specific fact <key = dataitem + value, value = set of source ids> print("Fact loading") fact_and_source_info_ = utils_dataset.load_fact_and_source_info( sources_dataItemValues_) F_s_ = fact_and_source_info_[0] S_ = fact_and_source_info_[1] sources_dataItemValues_.clear() header = True sources_dataItemValues_ = utils_dataset.load_facts(facts_file_, header) return [T_, sources_dataItemValues_, F_s_, S_]
F_s = res_list[4] S = res_list[5] S_prop = res_list[6] app_conf_dict = res_list[7] header = False # original trustworthiness file T_actual = utils_dataset.load_sources_info(source_file, header) header = False # load trustworthiness of sources T = utils_dataset.load_sources_info(source_file, header) print(str(len(T)) + " sources loaded") S_set = list(T.keys()) # load facts header = True sources_dataItemValues = utils_dataset.load_facts(facts_file, header) D = list(sources_dataItemValues.keys()) # compute (1) all the facts that are claimed by a source and (2) all the sources that claim a specific fact # (1) set of facts that are claimed by a specific source < key = source id, value = set of facts (dataitem + value) > # (2) all the sources that claim a specific fact <key = dataitem + value, value = set of source ids> print("Fact loading") fact_and_source_info = utils_dataset.load_fact_and_source_info(sources_dataItemValues) F_s = fact_and_source_info[0] S = fact_and_source_info[1] ####################################################################################################### if Sums_and_Rules_flag: #print("Compute boost dict") #compute boosting factor based on EB method boost_dict = utils_rules.compute_boost_dict_EBS(sources_dataItemValues, R_bayes, eligible_rules, valid_values_for_r_and_d)