def preprocessing_turnout(n_data_list=None): file_processor = LoadSave() signal_data_list = [] # Experiment fault data def liststr_to_listnumeric(list_str): return list(map(float, list_str.split(","))) signal_fault_data = pd.read_csv( "..//demo_dataset//turnout//fault_data.csv", nrows=None).query("error_code != 0").reset_index(drop=True) signal_fault_data["Phase_A"] = signal_fault_data["Phase_A"].apply( liststr_to_listnumeric) signal_fault_data["Phase_B"] = signal_fault_data["Phase_B"].apply( liststr_to_listnumeric) signal_fault_data["Phase_C"] = signal_fault_data["Phase_C"].apply( liststr_to_listnumeric) for i in range(len(signal_fault_data)): signal = [ signal_fault_data["Phase_A"].iloc[i], signal_fault_data["Phase_B"].iloc[i], signal_fault_data["Phase_C"].iloc[i] ] signal_data_list.append(signal) # Operation fault data signal_data = file_processor.load_data( path="..//demo_dataset//turnout//chengdu5_raw_table.pkl") signal_anomaly_scores = file_processor.load_data( path="..//demo_dataset//turnout//chengdu5_anomaly_scores.pkl") signal_data = pd.merge(signal_data, signal_anomaly_scores, on=["device_id", "record_id"], how="left") signal_data = signal_data.sort_values( by="if_score", ascending=False).reset_index(drop=True) for i in range(len(signal_data)): signal = [ signal_data["phase_a"].iloc[i], signal_data["phase_b"].iloc[i], signal_data["phase_c"].iloc[i] ] signal_data_list.append(signal) # Save the proprocessed data for ind in range(len(n_data_list)): if n_data_list[ind] is None: n_data_list[ind] = len(signal_data_list) file_name = [ ".//data//fault_turnout_current_{}.pkl".format(i) for i in n_data_list ] for ind, item in enumerate(n_data_list): tmp_signal_data = signal_data_list[:item] tmp_file_name = file_name[ind] file_processor.save_data(path=tmp_file_name, data=tmp_signal_data)
def traj_data_signal_embedding(): """Loading the embedding vectors.""" file_processor = LoadSave() train_embedding = file_processor.load_data( path=".//tcdata_tmp//train_signal_embedding.pkl") test_embedding = file_processor.load_data( path=".//tcdata_tmp//test_signal_embedding.pkl") return pd.concat([train_embedding, test_embedding], axis=0, ignore_index=True)
def preprocessing_HAR(n_data_list=None): file_processor = LoadSave() har_dataset, har_dataset_label = file_processor.load_data( path= "..//demo_dataset//human_activity_recognition//human_activity_recognition.pkl" ) har_dataset_label = np.array(har_dataset_label) # Shuffle the dataset ind = np.random.choice(np.arange(0, len(har_dataset_label)), size=len(har_dataset_label), replace=False) har_dataset = har_dataset[ind] har_dataset_label = har_dataset_label[ind] for ind in range(len(n_data_list)): if n_data_list[ind] is None: n_data_list[ind] = len(har_dataset) file_name = [ ".//data//human_activity_recognition_{}.pkl".format(i) for i in n_data_list ] file_processor = LoadSave() for ind, item in enumerate(n_data_list): tmp_data = har_dataset[:item] tmp_data_label = har_dataset_label[:item] tmp_file_name = file_name[ind] file_processor.save_data(path=tmp_file_name, data=[tmp_data, tmp_data_label])
def load_data(name=None): """Loading *.pkl data from .//tcdata_tmp//""" assert name is not None, "Invalid file name!" file_processor = LoadSave() return file_processor.load_data(path=".//tcdata_tmp//{}".format(name))
def load_data(name=None): """Load data from .//tcdata_tmp//""" file_processor = LoadSave() data = file_processor.load_data(path=".//tcdata_tmp//" + name) return data
def load_data(path_name=None): """Loading *.pkl from path_name, path_name is like: .//data//mnist.pkl""" file_processor = LoadSave() return file_processor.load_data(path=path_name)
def load_pkl(file_name=None): """Loading *.pkl from the path .//cached_data//""" file_processor = LoadSave() return file_processor.load_data( path=".//cached_data//{}".format(file_name))
def load_fishing_ground(): file_processor = LoadSave() data = file_processor.load_data(".//tcdata//fishing_ground.pkl") return data