def store_dataset_k(dataset_k_path, dataset_k, X_k_min_dict, X_t_next_dict, index): h5_store = h5_open(dataset_k_path) for osn_name in dataset_k.keys(): h5store_at( h5_store, "/data/" + osn_name + "/X_branching", pd.DataFrame(dataset_k[osn_name]["X_branching"], columns=sorted( list(get_branching_feature_names(osn_name))))) h5store_at( h5_store, "/data/" + osn_name + "/X_usergraph", pd.DataFrame(dataset_k[osn_name]["X_usergraph"], columns=sorted( list(get_usergraph_feature_names(osn_name))))) h5store_at( h5_store, "/data/" + osn_name + "/X_temporal", pd.DataFrame(dataset_k[osn_name]["X_temporal"], columns=sorted( list(get_temporal_feature_names(osn_name))))) utility_arrays = dict() utility_arrays["X_k_min_array"] = X_k_min_dict[osn_name] utility_arrays["X_t_next_array"] = X_t_next_dict[osn_name] h5store_at(h5_store, "/data/" + osn_name + "/utility_arrays", pd.DataFrame(utility_arrays)) h5_close(h5_store)
def store_dataset_k(dataset_k_path, dataset_k, X_k_min_dict, X_t_next_dict, index): h5_store = h5_open(dataset_k_path) for osn_name in dataset_k.keys(): h5store_at(h5_store, "/data/" + osn_name + "/X_branching", pd.DataFrame(dataset_k[osn_name]["X_branching"], columns=sorted(list(get_branching_feature_names(osn_name))))) h5store_at(h5_store, "/data/" + osn_name + "/X_usergraph", pd.DataFrame(dataset_k[osn_name]["X_usergraph"], columns=sorted(list(get_usergraph_feature_names(osn_name))))) h5store_at(h5_store, "/data/" + osn_name + "/X_temporal", pd.DataFrame(dataset_k[osn_name]["X_temporal"], columns=sorted(list(get_temporal_feature_names(osn_name))))) utility_arrays = dict() utility_arrays["X_k_min_array"] = X_k_min_dict[osn_name] utility_arrays["X_t_next_array"] = X_t_next_dict[osn_name] h5store_at(h5_store, "/data/" + osn_name + "/utility_arrays", pd.DataFrame(utility_arrays)) h5_close(h5_store)
def fill_X_handcrafted_k(dataset_k, h5_store_files, h5_keys, offset, k, X_k_min_dict, X_t_next_dict, osn_name): concatenate_features.fill_X_handcrafted_k_actual( dataset_k, h5_store_files, h5_keys, offset, k, X_k_min_dict, X_t_next_dict, sorted(list(get_branching_feature_names(osn_name))), sorted(list(get_usergraph_feature_names(osn_name))), sorted(list(get_temporal_feature_names(osn_name))), osn_name)
def fill_X_handcrafted_k(dataset_k, h5_store_files, h5_keys, offset, k, X_k_min_dict, X_t_next_dict, osn_name): concatenate_features.fill_X_handcrafted_k_actual(dataset_k, h5_store_files, h5_keys, offset, k, X_k_min_dict, X_t_next_dict, sorted(list(get_branching_feature_names(osn_name))), sorted(list(get_usergraph_feature_names(osn_name))), sorted(list(get_temporal_feature_names(osn_name))), osn_name)
def select_dataset(self, feature_osn_name_list, target_osn_name): data_folder = self.data_folder add_branching_features = self.add_branching_features add_usergraph_features = self.add_usergraph_features add_temporal_features = self.add_temporal_features uniform_folder = data_folder + "/features" results_folder = data_folder + "/results/" + self.osn_name_focus branching_feature_dict = dict() usergraph_feature_dict = dict() temporal_feature_dict = dict() for feature_osn_name in feature_osn_name_list: if add_branching_features: branching_feature_dict[feature_osn_name] = get_branching_feature_names(osn_name=feature_osn_name) else: branching_feature_dict[feature_osn_name] = set() if add_usergraph_features: usergraph_feature_dict[feature_osn_name] = get_usergraph_feature_names(osn_name=feature_osn_name) else: usergraph_feature_dict[feature_osn_name] = set() if add_temporal_features: temporal_feature_dict[feature_osn_name] = get_temporal_feature_names(osn_name=feature_osn_name) else: temporal_feature_dict[feature_osn_name] = set() if "baseline" in self.experiment_construction_dict.keys(): if "comments" == self.experiment_construction_dict["baseline"]: for feature_osn_name in feature_osn_name_list: branching_feature_dict[feature_osn_name] = set() branching_feature_dict[feature_osn_name].add("basic_comment_count") elif "users" == self.experiment_construction_dict["baseline"]: for feature_osn_name in feature_osn_name_list: usergraph_feature_dict[feature_osn_name] = set() usergraph_feature_dict[feature_osn_name].add("user_graph_user_count") elif "comments_users" == self.experiment_construction_dict["baseline"]: for feature_osn_name in feature_osn_name_list: branching_feature_dict[feature_osn_name] = set() branching_feature_dict[feature_osn_name].add("basic_comment_count") usergraph_feature_dict[feature_osn_name] = set() usergraph_feature_dict[feature_osn_name].add("user_graph_user_count") elif "simple graph" == self.experiment_construction_dict["baseline"]: for feature_osn_name in feature_osn_name_list: branching_feature_dict[feature_osn_name] = set() branching_feature_dict[feature_osn_name].add("basic_comment_count") branching_feature_dict[feature_osn_name].add("basic_max_depth") branching_feature_dict[feature_osn_name].add("basic_max_width") branching_feature_dict[feature_osn_name].add("basic_ave_width") usergraph_feature_dict[feature_osn_name] = set() usergraph_feature_dict[feature_osn_name].add("user_graph_user_count") return uniform_folder,\ results_folder,\ branching_feature_dict,\ usergraph_feature_dict,\ temporal_feature_dict
def make_feature_matrices(features_folder, osn_focus): # Read comparison lifetimes. k_list_file_path = features_folder + "/k_list/focus_" + "post" + ".txt" k_list = load_valid_k_list(k_list_file_path) # Get feature names. branching_feature_dict = dict() usergraph_feature_dict = dict() temporal_feature_dict = dict() branching_feature_dict[osn_focus] = get_branching_feature_names( osn_name=osn_focus) usergraph_feature_dict[osn_focus] = get_usergraph_feature_names( osn_name=osn_focus) temporal_feature_dict[osn_focus] = get_temporal_feature_names( osn_name=osn_focus) branching_feature_names_list_dict = dict() usergraph_feature_names_list_dict = dict() temporal_feature_names_list_dict = dict() branching_feature_names_list_dict[osn_focus] = sorted( branching_feature_dict[osn_focus]) usergraph_feature_names_list_dict[osn_focus] = sorted( usergraph_feature_dict[osn_focus]) temporal_feature_names_list_dict[osn_focus] = sorted( temporal_feature_dict[osn_focus]) number_of_branching_features_dict = dict() number_of_usergraph_features_dict = dict() number_of_temporal_features_dict = dict() number_of_branching_features_dict[osn_focus] = len( branching_feature_names_list_dict[osn_focus]) number_of_usergraph_features_dict[osn_focus] = len( usergraph_feature_names_list_dict[osn_focus]) number_of_temporal_features_dict[osn_focus] = len( temporal_feature_names_list_dict[osn_focus]) # Make dataset matrix at time t_{\infty}. dataset_full_path = features_folder + "/dataset_full/dataset_full.h5" h5_stores_and_keys = get_h5_stores_and_keys(features_folder, "post") dataset_size = get_dataset_size(h5_stores_and_keys, "post") dataset_full,\ index = form_dataset_full(dataset_size, h5_stores_and_keys, osn_focus, branching_feature_names_list_dict, usergraph_feature_names_list_dict, temporal_feature_names_list_dict, number_of_branching_features_dict, number_of_usergraph_features_dict, number_of_temporal_features_dict) store_dataset_full(dataset_full_path, dataset_full, index, branching_feature_names_list_dict, usergraph_feature_names_list_dict, temporal_feature_names_list_dict) X_k_min_dict = dict() X_t_next_dict = dict() X_k_min_dict[osn_focus] = np.zeros(dataset_size, dtype=int) X_t_next_dict[osn_focus] = np.zeros(dataset_size, dtype=float) for k_index, k in enumerate(k_list): dataset_k,\ X_k_min_dict,\ X_t_next_dict,\ index = form_dataset_k(dataset_size, h5_stores_and_keys, float(k), X_k_min_dict, X_t_next_dict, feature_osn_name_list=[osn_focus]) try: dataset_k_path = features_folder + "/dataset_k/" + osn_focus + "_lifetime_" + k + "_dataset_k.h5" except TypeError: dataset_k_path = features_folder + "/dataset_k/" + osn_focus + "_lifetime_" + repr( k) + "_dataset_k.h5" store_dataset_k(dataset_k_path, dataset_k, X_k_min_dict, X_t_next_dict, index)
def make_feature_matrices(features_folder, osn_focus): # Read comparison lifetimes. k_list_file_path = features_folder + "/k_list/focus_" + "post" + ".txt" k_list = load_valid_k_list(k_list_file_path) # Get feature names. branching_feature_dict = dict() usergraph_feature_dict = dict() temporal_feature_dict = dict() branching_feature_dict[osn_focus] = get_branching_feature_names(osn_name=osn_focus) usergraph_feature_dict[osn_focus] = get_usergraph_feature_names(osn_name=osn_focus) temporal_feature_dict[osn_focus] = get_temporal_feature_names(osn_name=osn_focus) branching_feature_names_list_dict = dict() usergraph_feature_names_list_dict = dict() temporal_feature_names_list_dict = dict() branching_feature_names_list_dict[osn_focus] = sorted(branching_feature_dict[osn_focus]) usergraph_feature_names_list_dict[osn_focus] = sorted(usergraph_feature_dict[osn_focus]) temporal_feature_names_list_dict[osn_focus] = sorted(temporal_feature_dict[osn_focus]) number_of_branching_features_dict = dict() number_of_usergraph_features_dict = dict() number_of_temporal_features_dict = dict() number_of_branching_features_dict[osn_focus] = len(branching_feature_names_list_dict[osn_focus]) number_of_usergraph_features_dict[osn_focus] = len(usergraph_feature_names_list_dict[osn_focus]) number_of_temporal_features_dict[osn_focus] = len(temporal_feature_names_list_dict[osn_focus]) # Make dataset matrix at time t_{\infty}. dataset_full_path = features_folder + "/dataset_full/dataset_full.h5" h5_stores_and_keys = get_h5_stores_and_keys(features_folder, "post") dataset_size = get_dataset_size(h5_stores_and_keys, "post") dataset_full,\ index = form_dataset_full(dataset_size, h5_stores_and_keys, osn_focus, branching_feature_names_list_dict, usergraph_feature_names_list_dict, temporal_feature_names_list_dict, number_of_branching_features_dict, number_of_usergraph_features_dict, number_of_temporal_features_dict) store_dataset_full(dataset_full_path, dataset_full, index, branching_feature_names_list_dict, usergraph_feature_names_list_dict, temporal_feature_names_list_dict) X_k_min_dict = dict() X_t_next_dict = dict() X_k_min_dict[osn_focus] = np.zeros(dataset_size, dtype=int) X_t_next_dict[osn_focus] = np.zeros(dataset_size, dtype=float) for k_index, k in enumerate(k_list): dataset_k,\ X_k_min_dict,\ X_t_next_dict,\ index = form_dataset_k(dataset_size, h5_stores_and_keys, float(k), X_k_min_dict, X_t_next_dict, feature_osn_name_list=[osn_focus]) try: dataset_k_path = features_folder + "/dataset_k/" + osn_focus + "_lifetime_" + k + "_dataset_k.h5" except TypeError: dataset_k_path = features_folder + "/dataset_k/" + osn_focus + "_lifetime_" + repr(k) + "_dataset_k.h5" store_dataset_k(dataset_k_path, dataset_k, X_k_min_dict, X_t_next_dict, index)
def select_dataset(self, feature_osn_name_list, target_osn_name): data_folder = self.data_folder add_branching_features = self.add_branching_features add_usergraph_features = self.add_usergraph_features add_temporal_features = self.add_temporal_features uniform_folder = data_folder + "/features" results_folder = data_folder + "/results/" + self.osn_name_focus branching_feature_dict = dict() usergraph_feature_dict = dict() temporal_feature_dict = dict() for feature_osn_name in feature_osn_name_list: if add_branching_features: branching_feature_dict[ feature_osn_name] = get_branching_feature_names( osn_name=feature_osn_name) else: branching_feature_dict[feature_osn_name] = set() if add_usergraph_features: usergraph_feature_dict[ feature_osn_name] = get_usergraph_feature_names( osn_name=feature_osn_name) else: usergraph_feature_dict[feature_osn_name] = set() if add_temporal_features: temporal_feature_dict[ feature_osn_name] = get_temporal_feature_names( osn_name=feature_osn_name) else: temporal_feature_dict[feature_osn_name] = set() if "baseline" in self.experiment_construction_dict.keys(): if "comments" == self.experiment_construction_dict["baseline"]: for feature_osn_name in feature_osn_name_list: branching_feature_dict[feature_osn_name] = set() branching_feature_dict[feature_osn_name].add( "basic_comment_count") elif "users" == self.experiment_construction_dict["baseline"]: for feature_osn_name in feature_osn_name_list: usergraph_feature_dict[feature_osn_name] = set() usergraph_feature_dict[feature_osn_name].add( "user_graph_user_count") elif "comments_users" == self.experiment_construction_dict[ "baseline"]: for feature_osn_name in feature_osn_name_list: branching_feature_dict[feature_osn_name] = set() branching_feature_dict[feature_osn_name].add( "basic_comment_count") usergraph_feature_dict[feature_osn_name] = set() usergraph_feature_dict[feature_osn_name].add( "user_graph_user_count") elif "simple graph" == self.experiment_construction_dict[ "baseline"]: for feature_osn_name in feature_osn_name_list: branching_feature_dict[feature_osn_name] = set() branching_feature_dict[feature_osn_name].add( "basic_comment_count") branching_feature_dict[feature_osn_name].add( "basic_max_depth") branching_feature_dict[feature_osn_name].add( "basic_max_width") branching_feature_dict[feature_osn_name].add( "basic_ave_width") usergraph_feature_dict[feature_osn_name] = set() usergraph_feature_dict[feature_osn_name].add( "user_graph_user_count") return uniform_folder,\ results_folder,\ branching_feature_dict,\ usergraph_feature_dict,\ temporal_feature_dict