def __fit_gmcm_using_matlab(self, gmcm_fitting_k: int, gmcm_max_fitting_iteration: int, gmcm_fitting_attempt: int, debug: bool = False): """ 用MATLAB拟合GMCM,并储存数据 :param gmcm_fitting_k: GMCM的component数量 :param gmcm_max_fitting_iteration: GMCM的max fitting iteration :return: """ if gmcm_fitting_attempt == 1: # eng = matlab.engine.start_matlab() eng.addpath(self.matlab_script_folder_path, nargout=0) if not try_to_find_file(self.gmcm_model_file_): eng.estimate_gmcm_and_save(double( self.ndarray_data_in_uniform.tolist()), self.gmcm_model_file_, gmcm_fitting_k, gmcm_max_fitting_iteration, nargout=0) if debug: self.plot_ndarray_data_in_uniform_and_simulated(1_000_000) # eng.quit() else: for i in range(gmcm_fitting_attempt): if i > 0: renamed = self.gmcm_model_file_.split(".mat") save_name = renamed[0] + "_" + str(i) + ".mat" else: save_name = self.gmcm_model_file_ # eng = matlab.engine.start_matlab() eng.addpath(self.matlab_script_folder_path, nargout=0) if not try_to_find_file(save_name): eng.estimate_gmcm_and_save(double( self.ndarray_data_in_uniform.tolist()), save_name, gmcm_fitting_k, gmcm_max_fitting_iteration, nargout=0) protect = self.gmcm_model_file_ if i > 0: save_name = self.gmcm_model_file_.split( f".mat")[0] + "_" + str(i) + ".mat" self.gmcm_model_file_ = save_name if debug: if i == 0: self.plot_ndarray_data_in_uniform_and_simulated( 200_000, title=f"attempt = {i} ") else: self.plot_simulated(200_000, title=f"attempt = {i} ") self.gmcm_model_file_ = protect
def energies_paper_train_nilm_models_for_ampds2_dataset(top_n: int = 3): """ 只从ampds2_dataset中的training_set中选取top_n个(默认3个)appliance。分别是HPE, FRE and CDE。 这里是符合nilmtk标准的算法。 torch的算法单独处理 :return: """ # 准备训练数据 training_set, test_set, _ = get_training_set_and_test_set_for_ampds2_dataset( ) top_n_train_elec = training_set.select_using_appliances( original_name=['HPE', 'FRE', 'CDE']) # 模型save的路径 models_path = Path('../../../Data/Results/Energies_paper/Ampds2') models_path.mkdir(parents=True, exist_ok=True) # 训练所有模型 models = {'CO': CombinatorialOptimisation(), 'FHMM': FHMM()} sample_period = 60 # 都是down sample到60s for this_model_name, this_model in models.items(): this_model_file = models_path / (this_model_name + '.pkl') if try_to_find_file(this_model_file): this_model.import_model(this_model_file) else: print("*" * 20) print(this_model_name) print("*" * 20) start = time.time() this_model.train(top_n_train_elec, sample_period=sample_period) this_model.export_model(this_model_file) end = time.time() print("Runtime =", end - start, "seconds.")
def load_npy_file(file_path: Path): """ 载入np文件。有的话就返回,如果没有的话则返回None """ file_path = str(file_path) if try_to_find_file(file_path) is False: return None else: return np.load(file_path)
def load_pkl_file(file_path: Path): if not re.match(r".*\.pkl$", str(file_path)): raise Exception("'file_path' should end with '.pkl'") file_path = str(file_path) if try_to_find_file(file_path) is False: return None else: with open(file_path, "rb") as f: return pickle.load(f)
def fit(self, only_at_edge_idx: int = None, gmcm_fitting_attempt: int = 1, *, gmcm_fitting_k: int = 8): # fit模型 initialised_pair_copula_of_each_edge = [] for edge_idx, this_edge_gmcm in enumerate( self.gmcm_model_files_for_construction): if only_at_edge_idx is not None: if edge_idx > only_at_edge_idx: continue # 没有的话就fit并且写入ndarray_data_like_in_uniform信息 if not try_to_find_file(this_edge_gmcm): if self.resolved_construction['conditioning'][edge_idx] == (): # edge_var_1 和 edge_var_2分别代表两个考虑的变量。至此,还是1代表第一维度,等下生成gmcm的时候的索引要减去1 edge_var_1, edge_var_2 = self.resolved_construction[ 'conditioned'][edge_idx] input_left = self.ndarray_data_in_uniform[:, edge_var_1 - 1] input_right = self.ndarray_data_in_uniform[:, edge_var_2 - 1] else: (input_left_copula_idx, input_right_copula_idx, left_conditioning, right_conditioning ) = self.identify_input_copula_for_tree(edge_idx) # 计算左右输入 input_left = initialised_pair_copula_of_each_edge[ input_left_copula_idx].cal_copula_cdf_partial_derivative( use_ndarray_data_in_uniform=True, partial_derivative_var_idx=(left_conditioning, )) input_right = initialised_pair_copula_of_each_edge[ input_right_copula_idx].cal_copula_cdf_partial_derivative( use_ndarray_data_in_uniform=True, partial_derivative_var_idx=(right_conditioning, )) initialised_pair_copula_of_each_edge.append( GMCM(gmcm_model_file_=this_edge_gmcm, ndarray_data_in_uniform=self.all_vars_valid_data( np.stack((input_left, input_right), axis=1)), gmcm_fitting_k=gmcm_fitting_k, gmcm_max_fitting_iteration=50_000, gmcm_fitting_attempt=gmcm_fitting_attempt, debug=True, str_name='GMCM_{}'.format( str(self.resolved_construction['conditioned'] [edge_idx]) + '|' + str(self.resolved_construction['conditioning'] [edge_idx])))) # 重新修正ndarray_data_in_uniform使其包含nan,size对齐输入 initialised_pair_copula_of_each_edge[ -1].ndarray_data_in_uniform = np.stack( (input_left, input_right), axis=1)
def pair_copula_instance_of_each_edge(self): gmcm_models = [] for edge_idx, this_edge_gmcm in enumerate( self.gmcm_model_files_for_construction): if try_to_find_file(this_edge_gmcm): gmcm_models.append( GMCM(gmcm_model_file_=this_edge_gmcm, str_name='GMCM_{}'.format( str(self.resolved_construction['conditioned'] [edge_idx]) + '|' + str(self.resolved_construction['conditioning'] [edge_idx])))) else: gmcm_models.append(None) return tuple(gmcm_models)
def update_exist_pkl_file_otherwise_run_and_save(file_path: Path): if not re.match(r".*\.pkl$", str(file_path)): raise Exception("'file_path' should end with '.pkl'") assert try_to_find_file(file_path) def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): existing_file = load_pkl_file(file_path) obj = func(*args, existing_file=existing_file, **kwargs) save_pkl_file(file_path, obj) return obj return wrapper return decorator