def main(): matrix = util.readMatrix('./data/mod_data/ratings_mod_1000.csv') train, test = util.train_test_split(matrix) # Dense Matrix usim = findSimilarity(train) start_time = time.time() #Normal CF upred = predict_topk(train, usim) np.around(upred, decimals=3) upred_save = upred[test.nonzero()].flatten() np.savetxt('CFpredwithout.csv', upred_save, delimiter=',') print('User Based with bias RMSE : ' + str(util.rmse(upred, test))) print('User based with bias Precision top k: ' + str(util.precision_topk(upred, test, 25))) #print('User based with bias SC: ' + str(util.spearman_corr(upred, test))) print('Took ' + str(time.time() - start_time)) next_time = time.time() # CF with Baseline upred1 = predict_topk_nobias(train) np.around(upred1, decimals=3) upred1_save = upred1[test.nonzero()].flatten() test_save = test[test.nonzero()].flatten() np.savetxt('CFpred.csv', upred1_save, delimiter=',') np.savetxt('CFtest.csv', test_save, delimiter=',') print('User Based without bias RMSE : ' + str(util.rmse(upred1, test))) print('User based without bias Precision top k: ' + str(util.precision_topk(upred1, test, 25))) #print('User based without bias SC: ' + str(util.spearman_corr(upred1, test))) print('Took ' + str(time.time() - next_time))
def main(): # Read Matrix mat = util.readMatrix('./data/mod_data/ratings_mod_1000.csv') # Calculate number of rows or cols to select k = np.linalg.matrix_rank(mat) k = 4 * k train, test = util.train_test_split(mat) user_bias = train.sum(1) / (train != 0).sum(1) user_bias_1d = user_bias.copy() user_bias = np.diag(user_bias) train_ones = train.copy() train_ones[train_ones != 0] = 1 user_bias = user_bias @ train_ones train -= user_bias start_time = time.time() C, col_ind = select_cols(train, k) R, _ = select_rows(train, k) W = R[:, col_ind] print("CUR Normal") U = pseudoInverse(W, reduce=True, energy=0.999) Pred = C @ U @ R Pred += user_bias_1d[:, np.newaxis] end_time = time.time() flatp = Pred[test.nonzero()].flatten() flatt = test[test.nonzero()].flatten() print(flatp) print(flatt) print(util.precision_topk(Pred, test, 25)) print(util.rmse(Pred, test)) # print(util.correlation_coefficient(flatp, flatt)) print(end_time - start_time) print("CUR With Energy 90%") start_time = time.time() U = pseudoInverse(W, reduce=True, energy=0.9) Pred = C @ U @ R Pred += user_bias_1d[:, np.newaxis] end_time = time.time() flatp = Pred[test.nonzero()].flatten() flatt = test[test.nonzero()].flatten() print(flatp) print(flatt) print(end_time - start_time) print("Finished Execution")
def main(): imgs = util.read_and_norm_imgs(IMG_DIR) train_imgs, test_imgs = util.train_test_split(imgs) print('Start training..') model = GAN() sess = tf.Session() sess.run(tf.global_variables_initializer()) trained_model = train(train_imgs, model, sess) print('Start testing..') test(test_imgs, trained_model, sess)
def train(self, inputs, epochs=10, attention=True, perceptron=True): KG, dataset, T = inputs train_set, test_set = train_test_split(dataset) # Hyperparameters configuration self.T = T self.KG = KG self.initialise() self.model.use_attention = attention self.model.use_perceptron = perceptron train_acc = [] train_losses = [] val_acc = [] val_losses = [] for i in range(epochs): print("\n\n>>>>>>>>>>>> EPOCH: ", i + 1, " / ", epochs) train_accuracy, train_loss = self.run_train_op(train_set) val_accuracy, val_loss = self.run_val_op(test_set) train_acc.append(train_accuracy) train_losses.append(train_loss) val_acc.append(val_accuracy) val_losses.append(val_loss) # Save Model model_name = 'model' if attention and perceptron: model_name += '_combined' model_type = 'combined' elif attention and not perceptron: model_name += '_att' model_type = 'attention' elif perceptron and not attention: model_name += '_per' model_type = 'perceptron' model_name += str(i + 1) write_model_name(model_name, model_type) # Save Results results_file = self.save_model_dir + \ "{}_results.csv".format(model_type) with open(results_file, "a+") as f: f.write("epoch {}, {}, {}, {}, {}\n".format( i, train_acc, train_losses, val_acc, val_losses)) return (train_acc, train_losses), (val_acc, val_losses)
def fit(self, inclass_data_known, inclass_data_novelty): (inclass_X_known, inclass_Y_known), (inclass_X_novelty, inclass_Y_novelty)\ = inclass_data_known, inclass_data_novelty known_sample_n, novelty_sample_n= get_sample_number_of_inclass_data(inclass_X_known), get_sample_number_of_inclass_data(inclass_X_novelty) if DEBUG: print 'known_sample_n, novelty_sample_n: ', known_sample_n, novelty_sample_n test_size = novelty_sample_n inclass_X_known_for_classify, inclass_X_known_for_novelty_classify, inclass_Y_known_for_classify, inclass_Y_known_for_novelty_classify \ = util.train_test_split(inclass_X_known, inclass_Y_known, test_size=test_size) if DEBUG: print 'known sample for novelty classify: ', get_sample_number_of_inclass_data(inclass_X_known_for_novelty_classify) X_known_for_classify, Y_known_for_classify = inclass_data_to_data(inclass_X_known_for_classify, inclass_Y_known_for_classify) Y_known_for_classify_reindex = [self.reindex_dict[c] for c in Y_known_for_classify] self.classify_svm_model = svm_classify(X_known_for_classify, Y_known_for_classify_reindex) if not self.new_algorithm: # tính các giá trị theta_i (theta_os) for label in self.known_class: inclass_X_of_label = get_X_of_label(inclass_X_known_for_novelty_classify, inclass_Y_known_for_novelty_classify, label) X_of_label = inclass_data_to_data(inclass_X_of_label) theta = self.get_theta_of_sequence(X_of_label) self.theta_os[self.reindex_dict[label]] = theta # novelty classify novelty_classify_X, novelty_classify_Y = [], [] for sequence, sequence_label in zip(inclass_X_known_for_novelty_classify, inclass_Y_known_for_novelty_classify): theta_s, reindex_label, probability = self.get_theta_and_result_of_sequence(sequence) if self.new_algorithm: sencond_feature = self.theta_os[reindex_label] else: sencond_feature = probability novelty_classify_X.append([theta_s, sencond_feature]) novelty_classify_Y.append(0) for sequence, sequence_label in zip(inclass_X_novelty, inclass_Y_novelty): theta_s, reindex_label, _ = self.get_theta_and_result_of_sequence(sequence) if self.new_algorithm: sencond_feature = self.theta_os[reindex_label] else: sencond_feature = probability novelty_classify_X.append([theta_s, sencond_feature]) novelty_classify_Y.append(1) # training SVM self.novelty_model = svm_classify(novelty_classify_X, novelty_classify_Y)
def train(X, y, args, quantiles): num_ts, num_periods, num_features = X.shape num_quantiles = len(quantiles) model = MQRNN(args.seq_len, num_quantiles, num_features, args.embedding_size, args.encoder_hidden_size, args.n_layers, args.decoder_hidden_size) optimizer = Adam(model.parameters(), lr=args.lr) Xtr, ytr, Xte, yte = util.train_test_split(X, y) losses = [] yscaler = None if args.standard_scaler: yscaler = util.StandardScaler() elif args.log_scaler: yscaler = util.LogScaler() elif args.mean_scaler: yscaler = util.MeanScaler() if yscaler is not None: ytr = yscaler.fit_transform(ytr) num_obs_to_train = args.num_obs_to_train seq_len = args.seq_len progress = ProgressBar() for epoch in progress(range(args.num_epoches)): # print("Epoch {} start...".format(epoch)) for step in range(args.step_per_epoch): X_train_batch, y_train_batch, Xf, yf = batch_generator( Xtr, ytr, num_obs_to_train, args.seq_len, args.batch_size) X_train_tensor = torch.from_numpy(X_train_batch).float() y_train_tensor = torch.from_numpy(y_train_batch).float() Xf = torch.from_numpy(Xf).float() yf = torch.from_numpy(yf).float() ypred = model(X_train_tensor, y_train_tensor, Xf) # quantile loss loss = torch.zeros_like(yf) num_ts = Xf.size(0) for q, rho in enumerate(quantiles): ypred_rho = ypred[:, :, q].view(num_ts, -1) e = ypred_rho - yf loss += torch.max(rho * e, (rho - 1) * e) loss = loss.mean() losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() mape_list = [] X_test = Xte[:, -seq_len - num_obs_to_train:-seq_len, :].reshape( (num_ts, -1, num_features)) Xf_test = Xte[:, -seq_len:, :].reshape((num_ts, -1, num_features)) y_test = yte[:, -seq_len - num_obs_to_train:-seq_len].reshape((num_ts, -1)) if yscaler is not None: y_test = yscaler.transform(y_test) yf_test = yte[:, -seq_len:] ypred = model(X_test, y_test, Xf_test) # (1, num_quantiles, output_horizon) ypred = ypred.data.numpy() if yscaler is not None: ypred = yscaler.inverse_transform(ypred) ypred = np.maximum(0, ypred) # P50 quantile MAPE mape = util.MAPE(yf_test, ypred[:, :, 1]) print("MAPE: {}".format(mape)) mape_list.append(mape) if args.show_plot: show_idx = 0 plt.figure(1) plt.plot([k + seq_len + num_obs_to_train - seq_len \ for k in range(seq_len)], ypred[show_idx, :, 1], "r-") plt.fill_between(x=[k + seq_len + num_obs_to_train - seq_len for k in range(seq_len)], \ y1=ypred[show_idx, :, 0], y2=ypred[show_idx, :, 2], alpha=0.5) plt.title('Prediction uncertainty') yplot = yte[show_idx, -seq_len - num_obs_to_train:] plt.plot(range(len(yplot)), yplot, "k-") plt.legend(["P50 forecast", "true", "P10-P90 quantile"], loc="upper left") plt.xlabel("Periods") plt.ylabel("Y") plt.show() return losses, mape_list
def train(model, n_epochs, labels, images, batch_size, train_test_ratio): train_labels, train_images, test_labels, test_images, valid_labels, valid_images = train_test_split( labels, images, train_test_ratio) for epoch in range(n_epochs): train_targets, sample_images = create_train_batch( batch_size, train_labels, train_images) train_cost, train_ler = model.fit(train_targets, sample_images) train_ler = train_ler * batch_size # test_targets, test_sample_images = create_train_batch(batch_size, test_labels, test_images, False) # # test_cost, test_ler = model.validate(test_targets, test_sample_images) # test_ler = test_ler * batch_size # # valid_targets, valid_sample_images = create_train_batch(batch_size, valid_labels, valid_images, False) # # valid_cost, valid_ler = model.validate(valid_targets, valid_sample_images, 'valid') # valid_ler = valid_ler * batch_size log = '[Epoch {}] train_cost: {:.3f}, train_ler: {:.3f}, test_cost: {:.3f}, test_ler: {:.3f}, val_cost: {:.3f}, val_ler: {:.3f}' # print(log.format(epoch, train_cost, train_ler, test_cost, test_ler, valid_cost, valid_ler)) print(log.format(epoch, train_cost, train_ler, 0, 0, 0, 0)) if train_cost < 0: model.save()
acw_features = util.get_features(all_features, 0) act_features = util.get_features(all_features, 1) pm_features = util.get_features(all_features, 2) # get all people ids all_people = all_features.keys() # pm # to make sure all windows have same length padded_pm_features = util.pad_features(pm_features) # to reduce the frame rate to mex.frames_per_second rate reduced_pm_features = util.frame_reduce(padded_pm_features) for i in range(len(all_people)): test_persons = [all_people[i]] pm_train_features, pm_test_features = util.train_test_split( reduced_pm_features, test_persons) pm_train_features, pm_train_labels = util.flatten(pm_train_features) pm_test_features, pm_test_labels = util.flatten(pm_test_features) pm_train_labels = np_utils.to_categorical(pm_train_labels, len(mex.activity_list)) pm_test_labels = np_utils.to_categorical(pm_test_labels, len(mex.activity_list)) util.run_model_2D(pm_train_features, pm_train_labels, pm_test_features, pm_test_labels) # acw for i in range(len(all_people)): test_persons = [all_people[i]]
def train(X, y, args): ''' Args: - X (array like): shape (num_samples, num_features, num_periods) - y (array like): shape (num_samples, num_periods) - epoches (int): number of epoches to run - step_per_epoch (int): steps per epoch to run - seq_len (int): output horizon - likelihood (str): what type of likelihood to use, default is gaussian - num_skus_to_show (int): how many skus to show in test phase - num_results_to_sample (int): how many samples in test phase as prediction ''' # rho = args.quantile num_ts, num_periods, num_features = X.shape model = DFRNN(num_features, args.noise_nlayers, args.noise_hidden_size, args.global_nlayers, args.global_hidden_size, args.n_factors) optimizer = Adam(model.parameters(), lr=args.lr) random.seed(2) # select sku with most top n quantities Xtr, ytr, Xte, yte = util.train_test_split(X, y) losses = [] cnt = 0 yscaler = None if args.standard_scaler: yscaler = util.StandardScaler() elif args.log_scaler: yscaler = util.LogScaler() elif args.mean_scaler: yscaler = util.MeanScaler() if yscaler is not None: ytr = yscaler.fit_transform(ytr) # training progress = ProgressBar() seq_len = args.seq_len num_obs_to_train = args.num_obs_to_train for epoch in progress(range(args.num_epoches)): # print("Epoch {} starts...".format(epoch)) for step in range(args.step_per_epoch): Xtrain, ytrain, Xf, yf = batch_generator(Xtr, ytr, num_obs_to_train, seq_len, args.batch_size) Xtrain_tensor = torch.from_numpy(Xtrain).float() ytrain_tensor = torch.from_numpy(ytrain).float() Xf = torch.from_numpy(Xf).float() yf = torch.from_numpy(yf).float() mu, sigma = model(Xtrain_tensor) loss = util.gaussian_likelihood_loss(ytrain_tensor, mu, sigma) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() cnt += 1 # test mape_list = [] # select skus with most top K X_test = Xte[:, -seq_len - num_obs_to_train:-seq_len, :].reshape( (num_ts, -1, num_features)) Xf_test = Xte[:, -seq_len:, :].reshape((num_ts, -1, num_features)) y_test = yte[:, -seq_len - num_obs_to_train:-seq_len].reshape((num_ts, -1)) yf_test = yte[:, -seq_len:].reshape((num_ts, -1)) if yscaler is not None: y_test = yscaler.transform(y_test) result = [] n_samples = args.sample_size for _ in tqdm(range(n_samples)): y_pred = model.sample(Xf_test) y_pred = y_pred.data.numpy() if yscaler is not None: y_pred = yscaler.inverse_transform(y_pred) result.append(y_pred.reshape((-1, 1))) result = np.concatenate(result, axis=1) p50 = np.quantile(result, 0.5, axis=1) p90 = np.quantile(result, 0.9, axis=1) p10 = np.quantile(result, 0.1, axis=1) mape = util.MAPE(yf_test, p50) print("P50 MAPE: {}".format(mape)) mape_list.append(mape) if args.show_plot: plt.figure(1, figsize=(20, 5)) plt.plot([k + seq_len + num_obs_to_train - seq_len \ for k in range(seq_len)], p50, "r-") plt.fill_between(x=[k + seq_len + num_obs_to_train - seq_len for k in range(seq_len)], \ y1=p10, y2=p90, alpha=0.5) plt.title('Prediction uncertainty') yplot = yte[-1, -seq_len - num_obs_to_train:] plt.plot(range(len(yplot)), yplot, "k-") plt.legend(["P50 forecast", "true", "P10-P90 quantile"], loc="upper left") ymin, ymax = plt.ylim() plt.vlines(seq_len + num_obs_to_train - seq_len, ymin, ymax, color="blue", linestyles="dashed", linewidth=2) plt.ylim(ymin, ymax) plt.xlabel("Periods") plt.ylabel("Y") plt.show() return losses, mape_list
def train(X, y, args): num_ts, num_periods, num_features = X.shape Xtr, ytr, Xte, yte = util.train_test_split(X, y) yscaler = None if args.standard_scaler: yscaler = util.StandardScaler() elif args.log_scaler: yscaler = util.LogScaler() elif args.mean_scaler: yscaler = util.MeanScaler() if yscaler is not None: ytr = yscaler.fit_transform(ytr) progress = ProgressBar() seq_len = args.seq_len num_obs_to_train = args.num_obs_to_train model = ExtremeModel(num_features) optimizer = Adam(model.parameters(), lr=args.lr) losses = [] cnt = 0 for epoch in progress(range(args.num_epoches)): # print("Epoch {} starts...".format(epoch)) for step in range(args.step_per_epoch): Xtrain, ytrain, Xf, yf = batch_generator(Xtr, ytr, num_obs_to_train, seq_len, args.batch_size) Xtrain_tensor = torch.from_numpy(Xtrain).float() ytrain_tensor = torch.from_numpy(ytrain).float() Xf = torch.from_numpy(Xf).float() yf = torch.from_numpy(yf).float() ypred = model(Xtrain_tensor, Xf) loss = F.mse_loss(ypred, yf) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() cnt += 1 mape_list = [] mse_list = [] rmse_list = [] mae_list = [] # select skus with most top K X_test = Xte[:, -seq_len - num_obs_to_train:-seq_len, :].reshape( (num_ts, -1, num_features)) Xf_test = Xte[:, -seq_len:, :].reshape((num_ts, -1, num_features)) y_test = yte[:, -seq_len - num_obs_to_train:-seq_len].reshape((num_ts, -1)) yf_test = yte[:, -seq_len:].reshape((num_ts, -1)) if yscaler is not None: y_test = yscaler.transform(y_test) ypred = model(X_test, Xf_test) ypred = ypred.data.numpy() if yscaler is not None: ypred = yscaler.inverse_transform(ypred) mape = util.MAPE(yf_test, ypred) mae = util.MAE(yf_test, ypred) mse = util.MSE(yf_test, ypred) rmse = util.RMSE(yf_test, ypred) print("MAE: {}".format(mae)) print("RMSE: {}".format(rmse)) print("MSE: {}".format(mse)) print("MAPE: {}".format(mape)) mape_list.append(mape) mse_list.append(mse) mae_list.append(mae) rmse_list.append(rmse) if args.show_plot: plt.figure(1) plt.plot( [k + seq_len + num_obs_to_train - seq_len for k in range(seq_len)], ypred[-1], "r-") plt.title('Prediction uncertainty') yplot = yte[-1, -seq_len - num_obs_to_train:] plt.plot(range(len(yplot)), yplot, "k-") plt.legend(["forecast", "true"], loc="upper left") plt.xlabel("Periods") plt.ylabel("Y") plt.show() return losses, mape_list, mse_list, mae_list, rmse_list
from common import * import util # one iteration X = np.array(R) X, X_test = util.train_test_split(X, train_size=0.9, random_state=util.randint()) #print X, X_test #create_matlab_input(X) #execute_matlab_code()
from bayes import NaiveBayes from util import FileOperate from util import train_test_split from metrics import accuracy_score # 运行这部分代码的时候,要将 playML 这个文件夹设置为源代码的根文件夹 if __name__ == '__main__': # 1、加载数据,spam 表示垃圾短信(1),ham 表示非垃圾短信(0) data_path = '../input/SMSSpamCollection' label = '\t' fo = FileOperate(data_path, label) X, y = fo.load_data() # 2、分割数据集,得到训练数据集与测试数据集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=666) # 开始训练 nb = NaiveBayes() nb.fit(X_train, y_train) # 开始预测 y_pred = nb.predict(X_test) # 计算得分 score = accuracy_score(y_test, y_pred) print('准确率:', score)
def train(X, y, args): ''' Args: - X (array like): shape (num_samples, num_features, num_periods) - y (array like): shape (num_samples, num_periods) - epoches (int): number of epoches to run - step_per_epoch (int): steps per epoch to run - seq_len (int): output horizon - likelihood (str): what type of likelihood to use, default is gaussian - num_skus_to_show (int): how many skus to show in test phase - num_results_to_sample (int): how many samples in test phase as prediction ''' rho = args.quantile num_ts, num_periods, num_features = X.shape model = DeepAR(num_features, args.embedding_size, args.hidden_size, args.n_layers, args.lr, args.likelihood) optimizer = Adam(model.parameters(), lr=args.lr) random.seed(2) # select sku with most top n quantities Xtr, ytr, Xte, yte = util.train_test_split(X, y) losses = [] cnt = 0 yscaler = None if args.standard_scaler: yscaler = util.StandardScaler() elif args.log_scaler: yscaler = util.LogScaler() elif args.mean_scaler: yscaler = util.MeanScaler() if yscaler is not None: ytr = yscaler.fit_transform(ytr) # training seq_len = args.seq_len num_obs_to_train = args.num_obs_to_train progress = ProgressBar() for epoch in progress(range(args.num_epoches)): # print("Epoch {} starts...".format(epoch)) for step in range(args.step_per_epoch): Xtrain, ytrain, Xf, yf = batch_generator(Xtr, ytr, num_obs_to_train, seq_len, args.batch_size) Xtrain_tensor = torch.from_numpy(Xtrain).float() ytrain_tensor = torch.from_numpy(ytrain).float() Xf = torch.from_numpy(Xf).float() yf = torch.from_numpy(yf).float() ypred, mu, sigma = model(Xtrain_tensor, ytrain_tensor, Xf) # ypred_rho = ypred # e = ypred_rho - yf # loss = torch.max(rho * e, (rho - 1) * e).mean() ## gaussian loss ytrain_tensor = torch.cat([ytrain_tensor, yf], dim=1) loss = util.gaussian_likelihood_loss(ytrain_tensor, mu, sigma) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() cnt += 1 # test mape_list = [] # select skus with most top K X_test = Xte[:, -seq_len - num_obs_to_train:-seq_len, :].reshape( (num_ts, -1, num_features)) Xf_test = Xte[:, -seq_len:, :].reshape((num_ts, -1, num_features)) y_test = yte[:, -seq_len - num_obs_to_train:-seq_len].reshape((num_ts, -1)) yf_test = yte[:, -seq_len:].reshape((num_ts, -1)) if yscaler is not None: y_test = yscaler.transform(y_test) y_pred, _, _ = model(X_test, y_test, Xf_test) y_pred = y_pred.data.numpy() if yscaler is not None: y_pred = yscaler.inverse_transform(y_pred) mape = util.MAPE(yf_test, y_pred) print("MAPE: {}".format(mape)) mape_list.append(mape) if args.show_plot: plt.figure(1) plt.plot([k + seq_len + num_obs_to_train - seq_len \ for k in range(seq_len)], y_pred[-1], "r-") plt.title('Prediction uncertainty') yplot = yte[-1, -seq_len - num_obs_to_train:] plt.plot(range(len(yplot)), yplot, "k-") plt.legend(["P50 forecast", "true", "P10-P90 quantile"], loc="upper left") plt.xlabel("Periods") plt.ylabel("Y") plt.show() return losses, mape_list
def train(X, y, args): ''' Args: - X (array like): shape (num_samples, num_features, num_periods) - y (array like): shape (num_samples, num_periods) - epoches (int): number of epoches to run - step_per_epoch (int): steps per epoch to run - seq_len (int): output horizon - likelihood (str): what type of likelihood to use, default is gaussian - num_skus_to_show (int): how many skus to show in test phase - num_results_to_sample (int): how many samples in test phase as prediction ''' num_ts, num_periods, num_features = X.shape model = TPALSTM(1, args.seq_len, args.hidden_size, args.num_obs_to_train, args.n_layers) optimizer = Adam(model.parameters(), lr=args.lr) random.seed(2) # select sku with most top n quantities Xtr, ytr, Xte, yte = util.train_test_split(X, y) losses = [] cnt = 0 yscaler = None if args.standard_scaler: yscaler = util.StandardScaler() elif args.log_scaler: yscaler = util.LogScaler() elif args.mean_scaler: yscaler = util.MeanScaler() elif args.max_scaler: yscaler = util.MaxScaler() if yscaler is not None: ytr = yscaler.fit_transform(ytr) # training seq_len = args.seq_len obs_len = args.num_obs_to_train progress = ProgressBar() for epoch in progress(range(args.num_epoches)): # print("Epoch {} starts...".format(epoch)) for step in range(args.step_per_epoch): Xtrain, ytrain, Xf, yf = util.batch_generator( Xtr, ytr, obs_len, seq_len, args.batch_size) Xtrain = torch.from_numpy(Xtrain).float() ytrain = torch.from_numpy(ytrain).float() Xf = torch.from_numpy(Xf).float() yf = torch.from_numpy(yf).float() ypred = model(ytrain) # loss = util.RSE(ypred, yf) loss = F.mse_loss(ypred, yf) losses.append(loss.item()) optimizer.zero_grad() loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() # test mape_list = [] # select skus with most top K X_test = Xte[:, -seq_len - obs_len:-seq_len, :].reshape( (num_ts, -1, num_features)) Xf_test = Xte[:, -seq_len:, :].reshape((num_ts, -1, num_features)) y_test = yte[:, -seq_len - obs_len:-seq_len].reshape((num_ts, -1)) yf_test = yte[:, -seq_len:].reshape((num_ts, -1)) yscaler = None if args.standard_scaler: yscaler = util.StandardScaler() elif args.log_scaler: yscaler = util.LogScaler() elif args.mean_scaler: yscaler = util.MeanScaler() elif args.max_scaler: yscaler = util.MaxScaler() if yscaler is not None: ytr = yscaler.fit_transform(ytr) if yscaler is not None: y_test = yscaler.fit_transform(y_test) X_test = torch.from_numpy(X_test).float() y_test = torch.from_numpy(y_test).float() Xf_test = torch.from_numpy(Xf_test).float() ypred = model(y_test) ypred = ypred.data.numpy() if yscaler is not None: ypred = yscaler.inverse_transform(ypred) ypred = ypred.ravel() loss = np.sqrt(np.sum(np.square(yf_test - ypred))) print("losses: ", loss) if args.show_plot: plt.figure(1, figsize=(20, 5)) plt.plot([k + seq_len + obs_len - seq_len \ for k in range(seq_len)], ypred, "r-") plt.title('Prediction uncertainty') yplot = yte[-1, -seq_len - obs_len:] plt.plot(range(len(yplot)), yplot, "k-") plt.legend(["prediction", "true", "P10-P90 quantile"], loc="upper left") ymin, ymax = plt.ylim() plt.vlines(seq_len + obs_len - seq_len, ymin, ymax, color="blue", linestyles="dashed", linewidth=2) plt.ylim(ymin, ymax) plt.xlabel("Periods") plt.ylabel("Y") plt.show() return losses, mape_list
X = np.c_[np.asarray(hours), np.asarray(dows)] num_features = X.shape[1] # 2 num_periods = len(data) # 721 X = np.asarray(X).reshape((-1, num_periods, num_features)) y = np.asarray(data["主机CPU平均负载"]).reshape((-1, num_periods)) # (num_ts=1, num_periods=744, num_features=2) num_ts, num_periods, num_features = X.shape model = DeepAR(num_features, embedding_size=10, hidden_size=50, num_layers=1, likelihood="nb") optimizer = Adam(model.parameters(), lr=1e-3) random.seed(2) Xtr, ytr, Xte, yte = util.train_test_split(X, y) losses = [] cnt = 0 yscaler = None # 数据放缩预处理 # if args.standard_scaler: # yscaler = util.StandardScaler() # elif args.log_scaler: # yscaler = util.LogScaler() # elif args.mean_scaler: yscaler = util.MeanScaler() if yscaler is not None: ytr = yscaler.fit_transform(ytr) seq_len = 12
title = r'$GHF - \widehat{GHF}$ on validation set with ' + \ r'$\rho_{ROI}$ = %d'%roi_density save_cur_fig('%d-diff-map.png' % roi_density, title=title) plot_test_pred_linregress(y_test, y_pred, label='GBRT', color='b') save_cur_fig('%d_linear_correlation.png' % roi_density, title=r'$\rho_{ROI}$ = %i, $r^2=%.2f, RMSE=%.2f$' % (roi_density, r2, rmse)) ### Main Text Figure 1 ## gbrt regression X = data.drop(['GHF'], axis=1) y = data.GHF X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=11, test_size=0.10) reg_gbrt = train_gbrt(X_train.drop(['lat', 'lon'], axis=1), y_train) y_pred_gbrt = reg_gbrt.predict(X_test.drop(['lat', 'lon'], axis=1)) plt.clf() m = Basemap(projection='robin', lon_0=0, resolution='c') m.drawlsmask(land_color="#ffffff", ocean_color="#e8f4f8", resolution='l') diff_cmap = plt.get_cmap('PiYG', 20) scatter_args = { 'marker': 'o', 's': 15, 'lw': 0.25, 'edgecolor': 'black',