def descriptors_analysis(): """Make a comparison with different types of descriptors. There are nine types of descriptors to test totally. -Crystal part, -CW, -Structure, -Statistical part, Crystal part, CW, Structure, Statistical part, Crystal+CW and All descriptor. ’-’ indicates the name of the hold-out feature category and the descriptors are generated from the ensemble of the other three categories. Args: None Returns: A numpy array with shape '(10, 4)'. """ data_path = './data' labels = pd.read_csv(os.path.join(data_path, 'labels.2020.1.29.csv')).to_numpy() raw_train_data = pd.read_csv(os.path.join(data_path, 'td.2020.1.29.csv')).to_numpy() result = [] for descriptors_i in range(10): new_train_data = load_and_split_descriptor(raw_train_data, descriptors_i) length = new_train_data.shape[1] t, l = read_label(new_train_data, labels, label_index=0) MAEs_list = [] for i in range(4): if i == 0: epochs = 1 model = svr() model_type = 'SVR' elif i == 1: epochs = 200 model = fc(length) model_type = 'FC' elif i == 2: epochs = 1 model = krr() model_type = 'KRR' else: epochs = 1 model = xgb_model() model_type = 'XGBoost' x_train, x_test, y_train, y_test = transform_data( t, l, 0.1, 4, model_type=model_type) kappa_model = KappaModel(x_train, x_test, y_train, y_test) kappa_model.train_model(model, epochs=epochs) predict_train = kappa_model.predict(model, 'train') predict_test = kappa_model.predict(model, 'test') MAEs_train = mae(np.exp(y_train), np.exp(predict_train)) MAEs_test = mae(np.exp(y_test), np.exp(predict_test)) print(MAEs_test) MAEs_list.append(MAEs_test) result.append(MAEs_list) return result
def test(self): #mae_list = list() violated_const_dataset = 0 violated_const_model = 0 (X, y) = self._test_data._dataset y_pred = self._model.predict(Ten(X)) test_mae = util.mae(y, y_pred) violated_const_dataset += len(util.violated_const(X, y)) violated_pairs = util.violated_const(X, y_pred) violated_const_model += len(violated_pairs) y_pred = copy.deepcopy(np.array([10 ** -t for t in y_pred])) y_true = copy.deepcopy(np.array([10 ** -t for t in y])) y_diff = np.abs(y_true - y_pred) median_ae = np.median(y_diff) mean_ae = np.mean(y_diff) rmse_ = np.sqrt(mse(y, y_pred)) sum_mag_viol = np.sum([abs(y_pred[i] - y_pred[j]) for (i, j) in violated_pairs]) #print(f"Test precision: {test_mae}, " # f"Violated constraints Dataset: {violated_const_dataset}, " # f"Violated constraints Model: {violated_const_model}, " # f"Duplicates: {duplicates(y_pred)}") self.y_true = y_true self.y_pred = y_pred self.violated_pairs = violated_pairs return (test_mae, violated_const_model, violated_const_dataset, median_ae, mean_ae, rmse_, sum_mag_viol )
def validation_step(self, epoch): #mae_res = list() violated_const_model = 0 violated_const_dataset = 0 (X, y) = self._valid_data._dataset y_pred = self._model.predict(Ten(X)) val_mae = util.mae(y, y_pred) violated_const_dataset += len(util.violated_const(X, y)) violated_pairs = util.violated_const(X, y_pred) violated_const_model += len(violated_pairs) y_pred = copy.deepcopy( np.array([10 ** -t for t in y_pred])) y_true = copy.deepcopy( np.array([10 ** -t for t in y])) val_rmse = np.sqrt( mse(y_true, y_pred)) y_diff = np.abs(y_true - y_pred) sum_mag_viol = np.sum([abs( y_pred[i] -y_pred[j]) for (i,j) in violated_pairs] ) median_ae = np.median(y_diff) mean_ae = np.mean(y_diff) if self.verbose: print(f"epoch: {epoch}, " f"MAE: {val_mae}, " f"Violated constraints dataset: {violated_const_dataset}, " f"Violated constraints model: {violated_const_model}") self.logs.append([val_mae, violated_const_dataset, violated_const_model, median_ae, mean_ae, val_rmse, sum_mag_viol])
def significance_analysis(): x = np.load('./data/trains/train_x.npy') y = np.load('./data/trains/train_y.npy') model = pickle.load(open('./models/ptc_ab.pkl', 'rb')) pk_list = [] for k in range(32): sum = 0 for i in [-0.2, -0.15, -0.1, -0.05, 0.05, 0.1, 0.15]: tmp = np.copy(x) tmpp = np.copy(x) tmp[:, k] = tmp[:, k] * (1 + i) tmpp[:, k] = tmp[:, k] * (1 + i + 0.05) predict_y = model.predict(tmp) predict_yy = model.predict(tmpp) delta_ki = mae(predict_y, y) delta_kii = mae(predict_yy, y) sum = sum + np.abs((delta_kii - delta_ki)) / delta_ki pk = sum / 7 pk_list.append(pk) return pk_list
def test_one_epoch(self, loader, epoch): self.G.eval() self.D.eval() test_loss = 0.0 num_examples = 0 imgs = [] pred_labels = [] labels = [] for data in tqdm(loader): img, label = data img = img.to(self.device) label = label.to(self.device) pred_label = self.predict(img) loss = self.criterion(pred_label, label) batch_size = img.size(0) test_loss += loss.item() * batch_size num_examples += batch_size imgs.append(img.cpu().numpy()) labels.append(label.cpu().numpy()) pred_labels.append(pred_label.detach().cpu().numpy()) img = np.concatenate(imgs, axis=0) label = np.concatenate(labels, axis=0) pred_label = np.concatenate(pred_labels, axis=0) log = { 'loss': test_loss / num_examples, 'img': img, 'label': label, 'pred_label': pred_label, 'pp_r2': pp_r2(pred_label, label), 'mse': mse(pred_label, label), 'rmse': rmse(pred_label, label), 'mae': mae(pred_label, label), 'pp_mse': pp_mse(pred_label, label).tolist(), 'pp_rmse': pp_rmse(pred_label, label).tolist(), 'pp_mae': pp_mae(pred_label, label).tolist(), } log['avg_r2'] = np.mean(log['pp_r2']) self.logger.write(log, epoch=epoch, stage='test') if test_loss < self.best_test_loss: self.best_test_loss = test_loss self.save(os.path.join(self.exp_path, 'models', 'model.best.t7')) return log
def analysis(self): ''' 分析预测误差分布 :return: ''' n = len(self.current_prod) Y_pred, Y_future = np.array(self.Y_pred), np.array(self.Y_future) i, c = list(zip(*self.current_prod)) _y_pred = Y_pred[:, i] _y_future = Y_future[:, i] _y_pred_mean = np.mean(_y_pred, axis=0) _y_future_mean = np.mean(_y_future, axis=0) _mae = mae(y_pred=_y_pred, y_true=_y_future, axis=0) _mer = mean_error_ratio(y_pred=_y_pred, y_true=_y_future, axis=0) _y_pred_DF, _y_future_DF = pd.DataFrame( _y_pred_mean, columns=['数值']), pd.DataFrame(_y_future_mean, columns=['数值']) _y_pred_DF['标签'], _y_future_DF['标签'] = ['预测'] * n, ['实际'] * n _y_pred_DF['井号'] = _y_future_DF['井号'] = c _y_pred_DF['误差'] = _y_future_DF['误差'] = _mer _y_DF = pd.concat([_y_pred_DF, _y_future_DF], axis=0) plt.subplot(2, 1, 1) sns.barplot( data=_y_DF, x='井号', y='数值', hue='标签', ) plt.xticks([]) plt.yticks(fontsize=15) plt.subplot(2, 1, 2) sns.pointplot( data=_y_DF, x='井号', y='误差', ) plt.ylim(0, 1) plt.xticks(fontsize=15, rotation=15) plt.yticks(fontsize=15) plt.grid() plt.show()
def train_one_epoch(self, loader, epoch): self.G.train() self.scheduler.step() train_loss = 0.0 num_examples = 0 pred_labels = [] labels = [] for data in tqdm(loader): img, label = data img = img.to(self.device) label = label.to(self.device) self.opt.zero_grad() pred_label = self.predict(img) pred_labels.append(pred_label.detach().cpu().numpy()) labels.append(label.cpu().numpy()) loss = self.criterion(pred_label*self.param_scale, label*self.param_scale) \ + torch.sum((self.psf*(pred_label-label))**2) loss.backward() self.opt.step() batch_size = img.size(0) train_loss += loss.item() * batch_size num_examples += batch_size pred_label = np.concatenate(pred_labels, axis=0) label = np.concatenate(labels, axis=0) log = { 'loss': train_loss / num_examples, 'pp_r2': pp_r2(pred_label, label), 'mse': mse(pred_label, label), 'rmse': rmse(pred_label, label), 'mae': mae(pred_label, label), 'pp_mse': pp_mse(pred_label, label).tolist(), 'pp_rmse': pp_rmse(pred_label, label).tolist(), 'pp_mae': pp_mae(pred_label, label).tolist(), } log['avg_r2'] = np.mean(log['pp_r2']) self.logger.write(log, epoch=epoch) self.save(os.path.join(self.exp_path, 'models', 'model.%d.t7' % epoch)) return log
r2.insert(0, model_type) csv_writer.writerow(r2) if train_on_whole_data: x_train, x_test, y_train, y_test = transform_data( x_data=train_data, y_data=label, test_size=0.1, random_state=4) kappa_model = KappaModel(x_train, x_test, y_train, y_test) kappa_model.train_model(model, epochs=epochs) predict_train = kappa_model.predict(model, 'train') predict_test = kappa_model.predict(model, 'test') r2_train, mae_log_train, rmse_train = metric( y_train, predict_train) r2_test, mae_log_test, rmse_test = metric(y_cal=y_test, y_pred=predict_test) MAEs_train = mae(np.exp(y_train), np.exp(predict_train)) MAEs_test = mae(np.exp(y_test), np.exp(predict_test)) print(mae_log_test, r2_test) metric_list = [ MAEs_train, MAEs_test, r2_train, r2_test, mae_log_train, mae_log_test, rmse_train, rmse_test ] metric_matrix.append(metric_list) if train_on_whole_data: metric_matrix.insert(0, [ 'MAEs of train data', 'MAEs of test data', 'R2 of train data', 'R2 of test data', 'Logarithmic mae of train data', 'Logarithmic mae of test data', 'RMSE_train', 'RMSE_test' ])