def __init__(self, batch_size): Dataset.__init__(self, batch_size) in_reshape = (-1, 28, 28, 1) self.set_train( DataValues(x=mnist.train.images.reshape(*in_reshape), y=mnist.train.labels)) self.set_val( DataValues(x=mnist.test.images.reshape(*in_reshape), y=mnist.test.labels)) self.set_test( DataValues(x=mnist.test.images.reshape(*in_reshape), y=mnist.test.labels))
def __init__(self, batch_size): Dataset.__init__(self, batch_size) in_reshape = (-1, N_STEPS, N_STEPS) self.set_train( DataValues(x=mnist.train.images.reshape(*in_reshape), y=mnist.train.labels)) print("Label size is ", self.get_train().y.shape) self.set_val( DataValues(x=mnist.test.images.reshape(*in_reshape), y=mnist.test.labels)) self.set_test( DataValues(x=mnist.test.images.reshape(*in_reshape), y=mnist.test.labels))
def aimFunc(self, pop): vars = pop.Phen objs = np.zeros(shape=(vars.shape[0], 1)) popNum = vars.shape[0] for i in range(popNum): var = np.array(vars[i, :]) if (var == 0).all(): # 如果随机出来全部特征均不选取,则把目标值定义为无穷大 loss = float("inf") else: featureCombinationParams = { "isPerformCombination": True, "featureFlags": var } featureCombination = FeatureCombination( self.datasetX, self.datasetY, self.featureNamesAfterCombination, featureCombinationParams) datasetAfterOptimizationX, datasetAfterOptimizationY, featureNamesAfterOptimization = featureCombination.getDatasetAfterCombination( ) dataset = Dataset(datasetAfterOptimizationX, datasetAfterOptimizationY, self.datasetParams) forecastTemplate = ForecastTemplate(dataset, self.forecastModelParams) loss = forecastTemplate.getObjective() # loss = np.random.random() processDraw = str(i + 1) + "/" + str(popNum) + "\t[" + ("".join( ["=" for _ in range(i)])) + ">" + ("".join([ "." for _ in range(popNum - i - 1) ])) + "]-" + str(round((i + 1) / popNum * 100)) + "%" print(processDraw) objs[i, 0] = loss pop.ObjV = objs
def __init__(self, tree, branch_names, validation_fraction, test_fraction, is_signal, weight_name=None, only_positive_weights=True): #test if sensible input is given if (validation_fraction + test_fraction) >= 1: raise ValueError( 'validation and test fractions sum to a value greater or equal to 1!' ) #read total dataset from tree, and only retain positive weight events if asked reading_cut = '{}>0'.format(weight_name) if ( only_positive_weights and not weight_name is None) else '' samples_total = treeToArray(tree, branch_names, reading_cut) number_of_samples = len(samples_total) weights_total = treeToArray(tree, weight_name, reading_cut) if ( not weight_name is None) else np.ones(number_of_samples) labels_total = np.ones(number_of_samples) if is_signal else np.zeros( number_of_samples) total_dataset = Dataset(samples_total, weights_total, labels_total) #randomly shuffle the dataset to prevent structure before splitting total_dataset.shuffle() #split training/validation and test sets max_index_training = int(number_of_samples * (1 - validation_fraction - test_fraction)) max_index_validation = int(number_of_samples * (1 - test_fraction)) self.__training_set = total_dataset[:max_index_training] self.__validation_set = total_dataset[ max_index_training:max_index_validation] self.__test_set = total_dataset[max_index_validation:]
def main(): args = DefaultConfig() """ create dataset and dataloader """ dataset_train = Dataset(args.train_data_path, scale=(args.trainsize, args.trainsize),augmentations=args.augmentations) dataloader_train = DataLoader( dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True ) # build model os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda """ load model """ model_all={'MyNet':MyNet()} model=model_all[args.net_work] print(args.net_work) cudnn.benchmark = True # model._initialize_weights() if torch.cuda.is_available() and args.use_gpu: model = torch.nn.DataParallel(model).cuda() """ optimizer """ if args.optimizer == 'AdamW': print("using AdamW") optimizer = torch.optim.AdamW(model.parameters(), args.lr,weight_decay=1e-4) else: print("using SGD") optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) total = len(dataloader_train) train(args, model, optimizer,dataloader_train,total)
def __init__(self, orgData, featureParams, datasetParams, forecastModelParams): self.orgData = orgData self.featureParams = featureParams self.datasetParams = datasetParams self.forecastModelParams = forecastModelParams # 完成 特征生成-->特征选择-->特征组合-->特征优化 featureTemplate = FeatureTemplate(orgData, featureParams, datasetParams, forecastModelParams) self.datasetX, self.datasetY = featureTemplate.getFinalDataset() # 构造数据集 self.dataset = Dataset(self.datasetX, self.datasetY, datasetParams) # 完成 超参数优化-->模型训练 forecastTemplate = ForecastTemplate(self.dataset, forecastModelParams) self.forecastModel = forecastTemplate.getFinalForecastModel() # 完成最终的预报 self.forecastModel.predict() self.forecastModel.getProbabilisticResults()
def create_dataset(data_dir, p=16, k=8): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. p(int): randomly choose p classes from all classes. k(int): randomly choose k images from each of the chosen p classes. p * k is the batchsize. Returns: dataset """ dataset = Dataset(data_dir) de_dataset = de.GeneratorDataset(dataset, ["image", "label1", "label2"]) resize_height = config.image_height resize_width = config.image_width rescale = 1.0 / 255.0 shift = 0.0 resize_op = CV.Resize((resize_height, resize_width)) rescale_op = CV.Rescale(rescale, shift) normalize_op = CV.Normalize([0.486, 0.459, 0.408], [0.229, 0.224, 0.225]) change_swap_op = CV.HWC2CHW() trans = [] trans += [resize_op, rescale_op, normalize_op, change_swap_op] type_cast_op_label1 = C.TypeCast(mstype.int32) type_cast_op_label2 = C.TypeCast(mstype.float32) de_dataset = de_dataset.map(input_columns="label1", operations=type_cast_op_label1) de_dataset = de_dataset.map(input_columns="label2", operations=type_cast_op_label2) de_dataset = de_dataset.map(input_columns="image", operations=trans) de_dataset = de_dataset.batch(p*k, drop_remainder=False) return de_dataset
import skimage.transform import numpy as np import tensorflow as tf import ssd300 import time import cv2 from dataset.Dataset import Dataset ''' SSD检测 ''' batch_size = 1 class_size = 22 graph_config = 'dsod' dataset = Dataset(batch_size=batch_size, img_preprocess_fn=lambda x: x - whitened_RGB_mean) def testing(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: ssd_model = ssd300.SSD300(sess, False, class_size=class_size, graph_config=graph_config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(var_list=tf.trainable_variables()) if os.path.exists('./session_params/session.ckpt.index'): saver.restore(sess, './session_params/session.ckpt') image, actual, file_list = next(dataset.next_test()) pred_class, pred_class_val, pred_location = ssd_model.run(
def optimize(datasetX, datasetY, featureNamesAfterCombination, featureOptimizationParams, datasetParams, forecastModelParams): featureNamesAfterOptimization = [] featureOptimizationProcess = None isPerformOptimization = featureOptimizationParams[ "isPerformOptimization"] if isPerformOptimization: dim = len(featureNamesAfterCombination) problem = FeatureOptimizationProblem(datasetX, datasetY, featureNamesAfterCombination, datasetParams, forecastModelParams) encoding = 'RI' popNum = 10 field = ea.crtfld(encoding, problem.varTypes, problem.ranges, problem.borders) pop = ea.Population(encoding, field, popNum) if dim < 7: # 维度不高时采用穷举 Phen = [] for i in range(1, pow(2, dim)): numStr = MathNormalUtils.toBinaryWithFixedLength(i, dim) numArray = list(map(int, numStr)) Phen.append(numArray) # 受维数灾影响 随机优化5个特征组合 resultlist = random.sample(range(len(Phen)), 3) Phen = np.array(Phen) # Phen = Phen[resultlist, :] pop.Phen = Phen problem.aimFunc(pop) objTrace, varTrace = pop.ObjV, pop.Phen objTrace = NumpyUtils.hStackByCutHead(objTrace, objTrace) else: algorithm = ea.soea_SEGA_templet(problem, pop) # 算法最大进化代数 algorithm.MAXGEN = 10 # 0表示不绘图;1表示绘图;2表示动态绘图 algorithm.drawing = 1 [pop, objTrace, varTrace] = algorithm.run() featureOptimizationProcess = { "objTrace": objTrace, "varTrace": varTrace } bestGen = np.argmin(problem.maxormins * objTrace[:, 1]) bestVar = varTrace[bestGen, :] bestVar = [int(x) for x in bestVar] bestVar = np.array(bestVar) featureCombinationParams = { "isPerformCombination": True, "featureFlags": bestVar } featureCombination = FeatureCombination( datasetX, datasetY, featureNamesAfterCombination, featureCombinationParams) datasetAfterOptimizationX, datasetAfterOptimizationY, featureNamesAfterOptimization = featureCombination.getDatasetAfterCombination( ) else: datasetAfterOptimizationX = datasetX datasetAfterOptimizationY = datasetY featureNamesAfterOptimization = featureNamesAfterCombination dataset = Dataset(datasetAfterOptimizationX, datasetAfterOptimizationY, datasetParams) forecastTemplate = ForecastTemplate(dataset, forecastModelParams) loss = forecastTemplate.getObjective() if (datasetAfterOptimizationX is None) or (datasetAfterOptimizationY is None): raise Exception("特征优化中全部特征未被选取,请检查参数!") return datasetAfterOptimizationX, datasetAfterOptimizationY, featureNamesAfterOptimization, featureOptimizationProcess
def create_data_matrix(tsv_path, lang_pair, features, max_len, voc_size, batch_size, config, fixed_length_x=None, fixed_length_y=None, mean_subtraction=False, feature_standardization=False, excluded_concepts=[], only_cognates=False, cognate_detection=False, valtest=False, train_mean=None, train_std=None): lang_a, lang_b = lang_pair i_enc = config["input_encoding"] o_enc = config["output_encoding"] # Read in TSV file df = pd.read_csv(tsv_path, sep="\t", engine="python", skipfooter=3, index_col=False) df = df[df["DOCULECT"].isin(lang_pair)] if df.empty: raise ValueError("The supplied language(s) is/are not in the corpus!") concepts = df["CONCEPT"].unique() # Sort to have same list of concepts for every language for cognate detection concepts = sorted(concepts) matrix_x = [] matrix_x_unnormalized = [] matrix_y = [] mask_x = [] matrix_x_unbounded = [] matrix_y_unbounded = [] datafile_ids = [] word_lengths_unbounded = [] for concept in concepts: if concept in excluded_concepts: continue concept_entries = df[df["CONCEPT"] == concept] lang0_entries = concept_entries[concept_entries["DOCULECT"] == lang_a] lang1_entries = concept_entries[concept_entries["DOCULECT"] == lang_b] if len(lang0_entries) == 0 or len(lang1_entries) == 0: # Concept not available for one of the languages in langpair, skip. continue # Add word pairs for all possible combinations of words for this concept for _, lang0_entry in lang0_entries.iterrows(): for _, lang1_entry in lang1_entries.iterrows(): x = lang0_entry["TOKENS"] y = lang1_entry["TOKENS"] # Save id of line in datafile, so line can later be looked up x_id = lang0_entry["ID"] y_id = lang1_entry["ID"] datafile_ids.append((x_id, y_id)) # Encode words, for use in RNN data matrix # (max_len, voc_size) word_encoded_x, word_mask_x = encode_word( x, features[0], max_len[0], voc_size[i_enc][lang_a]) word_encoded_y, _ = encode_word(y, features[1], max_len[1], voc_size[o_enc][lang_b]) # Encode unbounded words (max len of word pair is max of words in pair), # for use in SeqModel data matrix. max_len_pair = np.maximum(len(x), len(y)) # X for SeqModel is encoded word_encoded_x_unbounded, _ = encode_word( x, features[0], max_len_pair, voc_size[i_enc][lang_a]) # Y for SeqModel is not encoded, just filled to maxlen word_encoded_y_unbounded = _fill_word(y, max_len_pair) # Keep track of word lengths, needed for SeqModel algorithm word_lengths_unbounded.append(max_len_pair) if mean_subtraction and not feature_standardization: word_encoded_x_norm = perform_mean_subtraction( word_encoded_x) matrix_x.append(word_encoded_x_norm) else: matrix_x.append(word_encoded_x) matrix_x_unnormalized.append(word_encoded_x) matrix_y.append(word_encoded_y) mask_x.append(word_mask_x) # Unbounded matrix for SeqModel # Unbounded X matrix is always unnormalized matrix_x_unbounded.append(word_encoded_x_unbounded) matrix_y_unbounded.append(word_encoded_y_unbounded) # In cognate detection mode: only add one form per concept, # to keep languages synchronzied if cognate_detection: break if cognate_detection: break word_lengths_2 = [len(x) for x in matrix_x_unbounded] assert np.sum(word_lengths_2) == np.sum(word_lengths_unbounded) # Convert list of NumPy arrays to full NumPy array # (n_samples, max_len, voc_size) matrix_x = np.array(matrix_x) matrix_x_unnormalized = np.array(matrix_x_unnormalized) matrix_y = np.array(matrix_y) mask_x = np.array(mask_x) assert matrix_x.shape[0] == matrix_y.shape[0] # SeqModel matrices: convert to NP array, to enable fancy indexing # Row lengths are uneven, because of uneven word lengths matrix_x_unbounded = np.array(matrix_x_unbounded, dtype=object) matrix_y_unbounded = np.array(matrix_y_unbounded, dtype=object) word_lengths_unbounded = np.array(word_lengths_unbounded) # Feature standardization train_mean_calc = None train_std_calc = None if feature_standardization: # During training: standardize using own mean and std if not valtest: # Save calculcated train mean and std, # to give to valtest set matrix_x, train_mean_calc, train_std_calc = standardize( matrix_x_unnormalized) # During valtest: standardize using mean and std from train set if valtest: matrix_x, _, _ = standardize(matrix_x_unnormalized, valtest=True, train_mean=train_mean, train_std=train_std) return Dataset(batch_size, matrix_x, matrix_x_unnormalized, matrix_y, mask_x, max_len[0], max_len[1], matrix_x_unbounded, matrix_y_unbounded, tsv_path, datafile_ids, word_lengths_unbounded), train_mean_calc, train_std_calc
def __init__(self, caseParams): timeFlag = str(int(time.time())) timeFlag = "19941111" rootPath = GlobalConstants.getRootPath() # 获取基础设置 baseSetting = caseParams["baseSetting"] caseFlag = baseSetting["caseFlag"] caseDescription = baseSetting["caseDescription"] isFixedRandomSeed = baseSetting["isFixedRandomSeed"] if isFixedRandomSeed: fixedRandomSeed = baseSetting["fixedRandomSeed"] # 整个项目用到随机数的地方太多了,并且引用的包也多,难以将所有随机过程都设置随机种子 # 有些引用的包内不一定暴露了设置随机种子的接口,所以有些时候即使采用了固定随机种子也可能会失效 # 下面已经尽可能把用到的随机种子都设置了,大概率能复现了,但不敢保证100% random.seed(fixedRandomSeed) np.random.seed(fixedRandomSeed) set_random_seed(fixedRandomSeed) runTimes = baseSetting["runTimes"] caseFlagPath = rootPath + "resources/" + caseFlag + "/" timeFlagPath = caseFlagPath + timeFlag + "/" # 获取数据集设置 datasetSettings = caseParams["datasetSettings"] # 获取预报模型设置 forecastModelSettings = caseParams["forecastModelSettings"] # 案例运行流程 datasetNum = len(datasetSettings) forecastModelNum = len(forecastModelSettings) for i in range(runTimes): # 第i次运行 runTimePath = timeFlagPath + "run" + str(i + 1) + "/" for j in range(datasetNum): # 第j个数据集 datasetSetting = datasetSettings[j]["datasetSetting"] datasetFlag = datasetSetting["datasetFlag"] datasetDescription = datasetSetting["datasetDescription"] datasetPath = rootPath + datasetSetting["datasetPath"] datasetSubPath = datasetSetting["datasetSubPath"] smallDataParams = datasetSetting["smallDataParams"] featureParams = datasetSetting["featureParams"] datasetFlagPath = runTimePath + datasetFlag + "/" datasetSuffix = "_" + timeFlag + "_" + str(i + 1) + "_" + str(j + 1) # 读取数据 orgData = NumpyInputOutputUtils.readExcelDataToNumpy( datasetPath, datasetSubPath) orgDataPath = datasetFlagPath + "common/" + "orgData" + datasetSuffix + ".xlsx" isOutputOrgData = datasetSetting["isOutputOrgData"] if isOutputOrgData: ResultsOutput.outputOrgData(orgData, orgDataPath, "orgData") # 大数据集到小数据 smallData = SmallDataFactory.getSmallData( orgData, smallDataParams) smallDataPath = datasetFlagPath + "common/" + "smallData" + datasetSuffix + ".xlsx" isOutputSmallData = datasetSetting["isOutputSmallData"] if isOutputSmallData: ResultsOutput.outputSmallData(smallData, smallDataPath, "smallData") # 小数据数据生成特征和标签 featureGenerationParams = featureParams[ "featureGenerationParams"] featureGeneration = FeatureGeneration(smallData, featureGenerationParams) self.datasetAfterGenerationX, self.datasetAfterGenerationY, self.featureNamesAfterGeneration = featureGeneration.getDatasetAfterGeneration( ) isOutputDatasetAfterFeatureGeneration = featureParams[ "isOutputDatasetAfterFeatureGeneration"] datasetAfterFeatureGenerationPath = datasetFlagPath + "common/" + "datasetAfterFeatureGeneration" + datasetSuffix + ".xlsx" if isOutputDatasetAfterFeatureGeneration: ResultsOutput.outputDatasetAfterFeatureOperation( self.datasetAfterGenerationX, self.datasetAfterGenerationY, self.featureNamesAfterGeneration, datasetAfterFeatureGenerationPath, "datasetAfterFeatureGeneration") # 将生成的特征进行初选 featureSelectionParams = featureParams[ "featureSelectionParams"] featureSelection = FeatureSelection( self.datasetAfterGenerationX, self.datasetAfterGenerationY, self.featureNamesAfterGeneration, featureSelectionParams) self.datasetAfterSelectionX, self.datasetAfterSelectionY, self.featureNamesAfterSelection, self.metricValues = featureSelection.getDatasetAfterSelection( ) isOutputDatasetAfterFeatureSelection = featureParams[ "isOutputDatasetAfterFeatureSelection"] datasetAfterFeatureSelectionPath = datasetFlagPath + "common/" + "datasetAfterFeatureSelection" + datasetSuffix + ".xlsx" if isOutputDatasetAfterFeatureSelection: ResultsOutput.outputDatasetAfterFeatureOperation( self.datasetAfterSelectionX, self.datasetAfterSelectionY, self.featureNamesAfterSelection, datasetAfterFeatureSelectionPath, "datasetAfterFeatureSelection") isOutputMetricInFeatureSelection = featureParams[ "isOutputMetricInFeatureSelection"] metricInFeatureSelectionPath = datasetFlagPath + "common/" + "metricInFeatureSelection" + datasetSuffix + ".xlsx" if isOutputMetricInFeatureSelection: ResultsOutput.outputMetricInFeatureSelection( self.metricValues, self.featureNamesAfterGeneration, featureSelectionParams["operateEnums"], metricInFeatureSelectionPath, "metricInFeatureSelection") # 手动进行特征组合 featureCombinationParams = featureParams[ "featureCombinationParams"] featureCombination = FeatureCombination( self.datasetAfterSelectionX, self.datasetAfterSelectionY, self.featureNamesAfterSelection, featureCombinationParams) self.datasetAfterCombinationX, self.datasetAfterCombinationY, self.featureNamesAfterCombination = featureCombination.getDatasetAfterCombination( ) isOutputDatasetAfterFeatureCombination = featureParams[ "isOutputDatasetAfterFeatureCombination"] datasetAfterFeatureCombinationPath = datasetFlagPath + "common/" + "datasetAfterFeatureCombination" + datasetSuffix + ".xlsx" if isOutputDatasetAfterFeatureCombination: ResultsOutput.outputDatasetAfterFeatureOperation( self.datasetAfterCombinationX, self.datasetAfterCombinationY, self.featureNamesAfterCombination, datasetAfterFeatureCombinationPath, "datasetAfterFeatureCombination") for k in range(forecastModelNum): # 第k个预报模型 # 预报模型设置 forecastModelSetting = forecastModelSettings[k][ "forecastModelSetting"] forecastModelParams = forecastModelSetting[ "forecastModelParams"] forecastModelEnum = forecastModelParams[ "forecastModelEnum"] forecastModelSuffix = datasetSuffix + "_" + forecastModelEnum.name finalForecastModelPath = datasetFlagPath + forecastModelEnum.name + "/" + "finalForecastModel" + forecastModelSuffix + ".txt" bestLossPath = datasetFlagPath + forecastModelEnum.name + "/" + "bestLoss" + forecastModelSuffix + ".txt" finalHyperParametersOptimizationProcessPath = datasetFlagPath + forecastModelEnum.name + "/" + "finalHyperParametersOptimizationProcess" + forecastModelSuffix + ".txt" GlobalConstants.setFinalForecastModelAndBestLossPath( finalForecastModelPath, bestLossPath) GlobalConstants.setFinalHyperParametersOptimizationProcessPath( finalHyperParametersOptimizationProcessPath) ToolNormalUtils.deleteFile(bestLossPath) ToolNormalUtils.deleteFile(finalForecastModelPath) ToolNormalUtils.deleteFile( finalHyperParametersOptimizationProcessPath) # 数据集设置 datasetParams = datasetSetting["datasetParams"] # 特征优化: 特征优化和超参数优化必须同步进行,因此这步骤里完成了超参数优化和最终模型的优化 featureOptimizationParams = featureParams[ "featureOptimizationParams"] featureOptimization = FeatureOptimization( self.datasetAfterCombinationX, self.datasetAfterCombinationY, self.featureNamesAfterCombination, featureOptimizationParams, datasetParams, forecastModelParams) self.datasetAfterOptimizationX, self.datasetAfterOptimizationY, self.featureNamesAfterOptimization, self.featureOptimizationProcess = featureOptimization.getDatasetAfterOptimization( ) isOutputDatasetAfterFeatureOptimization = featureParams[ "isOutputDatasetAfterFeatureOptimization"] datasetAfterFeatureOptimizationPath = datasetFlagPath + forecastModelEnum.name + "/" + "datasetAfterFeatureOptimization" + forecastModelSuffix + ".xlsx" if isOutputDatasetAfterFeatureOptimization: ResultsOutput.outputDatasetAfterFeatureOperation( self.datasetAfterOptimizationX, self.datasetAfterOptimizationY, self.featureNamesAfterOptimization, datasetAfterFeatureOptimizationPath, "datasetAfterFeatureOptimization") isOutputFeatureOptimizationProcess = featureParams[ "isOutputFeatureOptimizationProcess"] featureOptimizationProcessPath = datasetFlagPath + forecastModelEnum.name + "/" + "featureOptimizationProcess" + forecastModelSuffix + ".xlsx" if isOutputFeatureOptimizationProcess: ResultsOutput.outputFeatureOptimizationProcess( self.featureOptimizationProcess, self.featureNamesAfterCombination, featureOptimizationProcessPath, "featureOptimizationProcess") # 特征优化和超参数优化需要同时进行,并且要及时保存双优化整个过程中的最优结果 # 因为即使获取了最优特征和最优超参数,但由于随机因素存在,不同时间启动训练预报模型导致预测结果与双优化过程中的最优结果无法完全相同 # 采用最优特征和最优超参数来再次训练和预报,只能说平均来讲比其他特征和参数大概率会优 # 构造数据集 self.finalDataset = Dataset(self.datasetAfterOptimizationX, self.datasetAfterOptimizationY, datasetParams) isOutputFinalDataset = datasetParams[ "isOutputFinalDataset"] finalDatasetPath = datasetFlagPath + forecastModelEnum.name + "/" + "finalDataset" + forecastModelSuffix + ".xlsx" if isOutputFinalDataset: ResultsOutput.outputDatasetAfterFeatureOperation( self.finalDataset.trainX2D, self.finalDataset.trainY2D, self.featureNamesAfterOptimization, finalDatasetPath, "trainSet") ResultsOutput.outputDatasetAfterFeatureOperation( self.finalDataset.validationX2D, self.finalDataset.validationY2D, self.featureNamesAfterOptimization, finalDatasetPath, "validationSet") # 完成 超参数优化-->模型训练 这步骤在特征优化中已经完成,取出来存和用就可以了 self.finalForecastModel = ToolNormalUtils.loadData( finalForecastModelPath) self.hyperParametersOptimizationProcess = ToolNormalUtils.loadData( finalHyperParametersOptimizationProcessPath) isPerformOptimization = forecastModelParams[ "isPerformOptimization"] isOutputHyperParametersOptimizationProcess = forecastModelParams[ "isOutputHyperParametersOptimizationProcess"] hyperParametersOptimizationProcessPath = datasetFlagPath + forecastModelEnum.name + "/" + "hyperParametersOptimizationProcess" + forecastModelSuffix + ".xlsx" if isPerformOptimization and isOutputHyperParametersOptimizationProcess: ResultsOutput.outputHyperParametersOptimizationProcess( self.hyperParametersOptimizationProcess, hyperParametersOptimizationProcessPath) isOutputFinalHyperParameters = forecastModelParams[ "isOutputFinalHyperParameters"] finalHyperParametersPath = datasetFlagPath + forecastModelEnum.name + "/" + "finalHyperParameters" + forecastModelSuffix + ".xlsx" if isOutputFinalHyperParameters: ResultsOutput.outputFinalHyperParameters( self.finalForecastModel, finalHyperParametersPath) # 完成最终的预报 # 获取整个优化过程中的最优模型来进行最终预报 predictions = self.finalForecastModel.predict( isFlatten=True) observations = self.finalForecastModel.dataset.validationY.flatten( ) predictionsReverse = self.finalDataset.reverseLabel( predictions) observationsReverse = self.finalDataset.reverseLabel( observations) isOutputDeterministicForecastingResults = forecastModelParams[ "isOutputDeterministicForecastingResults"] deterministicForecastingResultsPath = datasetFlagPath + forecastModelEnum.name + "/" + "deterministicForecastingResults" + forecastModelSuffix + ".xlsx" if isOutputDeterministicForecastingResults: ResultsOutput.outputDeterministicForecastingResults( predictionsReverse, observationsReverse, deterministicForecastingResultsPath) isOutputDeterministicForecastingMetrics = forecastModelParams[ "isOutputDeterministicForecastingMetrics"] forecastMetricsPath = datasetFlagPath + forecastModelEnum.name + "/" + "forecastMetrics" + forecastModelSuffix + ".xlsx" if isOutputDeterministicForecastingMetrics: pointMetricEnums = [ ForecastPerformanceMetricEnum. PointForecastMetricR2, ForecastPerformanceMetricEnum. PointForecastMetricRMSE, ForecastPerformanceMetricEnum. PointForecastMetricMSE, ForecastPerformanceMetricEnum. PointForecastMetricMAPE, ForecastPerformanceMetricEnum. PointForecastMetricMAE ] pointMetrics = [] pointMetricNames = [] for pointMetricEnum in pointMetricEnums: metric = ForecastPerformanceMetricFactory.getPointForecastMetric( self.finalForecastModel.dataset, pointMetricEnum) pointMetrics.append(metric) pointMetricNames.append(pointMetricEnum.name) ResultsOutput.outputForecastingMetrics( pointMetrics, pointMetricNames, forecastMetricsPath, "点预测指标") # 概率预报 isPerformProbabilisticForecasting = forecastModelParams[ "isPerformProbabilisticForecasting"] if isPerformProbabilisticForecasting: probabilisticForecastModelParams = forecastModelParams[ "probabilisticForecastModelParams"] probabilisticResults = self.finalForecastModel.getProbabilisticResults( probabilisticForecastModelParams) isOutputProbabilisticForecastingResults = forecastModelParams[ "isOutputProbabilisticForecastingResults"] probabilisticForecastingResultsPath = datasetFlagPath + forecastModelEnum.name + "/" + "probabilisticForecastingResults" + forecastModelSuffix + ".xlsx" intervalForecastingResultsPath = datasetFlagPath + forecastModelEnum.name + "/" + "intervalForecastingResults" + forecastModelSuffix + ".xlsx" if isOutputProbabilisticForecastingResults: ResultsOutput.outputProbabilisticForecastingResults( probabilisticResults, probabilisticForecastingResultsPath) ResultsOutput.outputIntervalForecastingResults( self.finalDataset, probabilisticResults, intervalForecastingResultsPath) isOutputProbabilisticForecastingMetrics = forecastModelParams[ "isOutputProbabilisticForecastingMetrics"] if isOutputProbabilisticForecastingMetrics: intervalMetricEnums = [ ForecastPerformanceMetricEnum. IntervalForecastMetricCP, ForecastPerformanceMetricEnum. IntervalForecastMetricMWP, ForecastPerformanceMetricEnum. IntervalForecastMetricCM, ForecastPerformanceMetricEnum. IntervalForecastMetricMC ] intervalMetrics = [] intervalMetricNames = [] for alpha in [0.8, 0.85, 0.9, 0.95]: for intervalMetricEnum in intervalMetricEnums: metric = ForecastPerformanceMetricFactory.getIntervalForecastMetric( self.finalForecastModel.dataset, intervalMetricEnum, alpha) intervalMetrics.append(metric) intervalMetricNames.append( intervalMetricEnum.name + "_" + str(alpha)) ResultsOutput.outputForecastingMetrics( intervalMetrics, intervalMetricNames, forecastMetricsPath, "区间预测指标") probabilisticMetricEnums = [ ForecastPerformanceMetricEnum. ProbabilisticForecastMetricCRPS, ForecastPerformanceMetricEnum. ProbabilisticForecastMetricPIT ] probabilisticMetrics = [] probabilisticMetricNames = [] for probabilisticMetricEnum in probabilisticMetricEnums: metric = ForecastPerformanceMetricFactory.getProbabilisticForecastMetric( self.finalForecastModel.dataset, probabilisticMetricEnum) probabilisticMetrics.append(metric) probabilisticMetricNames.append( probabilisticMetricEnum.name) ResultsOutput.outputForecastingMetrics( probabilisticMetrics, probabilisticMetricNames, forecastMetricsPath, "概率预测指标") # 绘图 isShowPredictionPlots = forecastModelParams[ "isShowPredictionPlots"] isSavePredictionPlots = forecastModelParams[ "isSavePredictionPlots"] labelName = forecastModelParams["labelName"] drawSuffix = "_" + datasetFlag + "_" + forecastModelEnum.name + "_" + timeFlag + "_" + str( i + 1) + "_" + str(j + 1) savePredictionPath = datasetFlagPath + "plots/" + "predictions" + drawSuffix + ".jpg" if isPerformProbabilisticForecasting: lower = ForecastModelBase.getPredictionsByQuantile( probabilisticResults, 0.05) upper = ForecastModelBase.getPredictionsByQuantile( probabilisticResults, 0.95) alpha = "90%" else: lower = None upper = None alpha = None title = "predictions" + drawSuffix Draw.drawPredictions(predictionsReverse, observationsReverse, lower, upper, alpha, "时段", labelName, title, isShowPredictionPlots, isSavePredictionPlots, savePredictionPath) if isPerformProbabilisticForecasting: isShowProbabilisticPlots = forecastModelParams[ "isShowProbabilisticPlots"] isSaveProbabilisticPlots = forecastModelParams[ "isSaveProbabilisticPlots"] isShowReliablePlots = forecastModelParams[ "isShowReliablePlots"] isSaveReliablePlots = forecastModelParams[ "isSaveReliablePlots"] pdfs = probabilisticResults["pdfs"] cdfs = probabilisticResults["cdfs"] sampleNum = len(pdfs) periods = [0, round(0.5 * sampleNum), sampleNum - 1] for period in periods: pdf = pdfs[period] cdf = cdfs[period] observation = observationsReverse[period] title = "PDF_" + "period_" + str( period) + drawSuffix saveProbabilisticPath = datasetFlagPath + "plots/" + "PDF_" + "period_" + str( period) + drawSuffix + ".jpg" Draw.drawPDForCDF(pdf["x"], pdf["f"], observation, labelName, "概率密度", title, isShowProbabilisticPlots, isSaveProbabilisticPlots, saveProbabilisticPath) title = "CDF_" + "period_" + str( period) + drawSuffix saveProbabilisticPath = datasetFlagPath + "plots/" + "CDF_" + "period_" + str( period) + drawSuffix + ".jpg" Draw.drawPDForCDF(cdf["x"], cdf["F"], observation, labelName, "累计分布", title, isShowProbabilisticPlots, isSaveProbabilisticPlots, saveProbabilisticPath) title = "PIT" + drawSuffix saveReliablePath = datasetFlagPath + "plots/" + "PIT" + drawSuffix + ".jpg" pits = ForecastPerformanceMetricFactory.getPIT( cdfs, observationsReverse) Draw.drawPIT(pits, stats.uniform, title=title, isShow=isShowReliablePlots, isSave=isSaveReliablePlots, savePath=saveReliablePath) # pre5 = finalForecastModel.getPredictionsByQuantile(probabilisticResults, 0.5) # daaa = finalForecastModel.dataset # daaa.validationP = pre5 # print("0.5Q:" + str( # ForecastPerformanceMetricFactory.getPointForecastMetric(daaa, # ForecastPerformanceMetricEnum.PointForecastMetricR2)) # ) # draw = Draw() # draw.drawPredictions(pre5, observations, None, None, None, False, "period", "zd", # "dataset", # [1.0, 0.35], True, False, None) print("CaseProcess-->__init__()中断点1")
def __init__(self, batch_size): Dataset.__init__(self, batch_size) self.set_train(DataValues(x=mnist.train.images, y=mnist.train.labels)) self.set_val(DataValues(x=mnist.test.images, y=mnist.test.labels)) self.set_test(DataValues(x=mnist.test.images, y=mnist.test.labels))