def train(self): X_train, y_train, _ = self.load_results_from_result_paths(self.train_paths) X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths) base_learner_config = self.parse_config("base:") param_config = self.parse_config("param:") # train base_learner = DecisionTreeRegressor(criterion='friedman_mse', random_state=None, splitter='best', **base_learner_config) self.model = NGBRegressor(Dist=Normal, Base=base_learner, Score=LogScore, verbose=True, **param_config) self.model = self.model.fit(X_train, y_train, X_val=X_val, Y_val=y_val, early_stopping_rounds=self.model_config["early_stopping_rounds"]) train_pred, var_train = self.model.predict(X_train), None val_pred, var_val = self.model.predict(X_val), None # self.save() fig_train = utils.scatter_plot(np.array(train_pred), np.array(y_train), xlabel='Predicted', ylabel='True', title='') fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg')) plt.close() fig_val = utils.scatter_plot(np.array(val_pred), np.array(y_val), xlabel='Predicted', ylabel='True', title='') fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg')) plt.close() train_metrics = utils.evaluate_metrics(y_train, train_pred, prediction_is_first_arg=False) valid_metrics = utils.evaluate_metrics(y_val, val_pred, prediction_is_first_arg=False) logging.info('train metrics: %s', train_metrics) logging.info('valid metrics: %s', valid_metrics) return valid_metrics
def train(self): X_train, y_train, _ = self.load_results_from_result_paths( self.train_paths) X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths) logging.info( "LGBOOST TRAIN: Careful categoricals not specified in dataset conversion" ) dtrain = lgb.Dataset(X_train, label=y_train) dval = lgb.Dataset(X_val, label=y_val) param_config = self.parse_param_config() param_config["seed"] = self.seed self.model = lgb.train( param_config, dtrain, early_stopping_rounds=self.model_config["early_stopping_rounds"], verbose_eval=1, valid_sets=[dval]) train_pred, var_train = self.model.predict(X_train), None val_pred, var_val = self.model.predict(X_val), None # self.save() fig_train = utils.scatter_plot(np.array(train_pred), np.array(y_train), xlabel='Predicted', ylabel='True', title='') fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg')) plt.close() fig_val = utils.scatter_plot(np.array(val_pred), np.array(y_val), xlabel='Predicted', ylabel='True', title='') fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg')) plt.close() train_metrics = utils.evaluate_metrics(y_train, train_pred, prediction_is_first_arg=False) valid_metrics = utils.evaluate_metrics(y_val, val_pred, prediction_is_first_arg=False) logging.info('train metrics: %s', train_metrics) logging.info('valid metrics: %s', valid_metrics) return valid_metrics
def train(self): X_train, y_train, _ = self.load_results_from_result_paths( self.train_paths) X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths) dtrain = xgb.DMatrix(X_train, label=y_train) dval = xgb.DMatrix(X_val, label=y_val) param_config = self.parse_param_config() param_config["seed"] = self.seed self.model = xgb.train( param_config, dtrain, num_boost_round=self.model_config["param:num_rounds"], early_stopping_rounds=self.model_config["early_stopping_rounds"], verbose_eval=1, evals=[(dval, 'val')]) train_pred, var_train = self.model.predict(dtrain), None val_pred, var_val = self.model.predict(dval), None # self.save() fig_train = utils.scatter_plot(np.array(train_pred), np.array(y_train), xlabel='Predicted', ylabel='True', title='') fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg')) plt.close() fig_val = utils.scatter_plot(np.array(val_pred), np.array(y_val), xlabel='Predicted', ylabel='True', title='') fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg')) plt.close() train_metrics = utils.evaluate_metrics(y_train, train_pred, prediction_is_first_arg=False) valid_metrics = utils.evaluate_metrics(y_val, val_pred, prediction_is_first_arg=False) logging.info('train metrics: %s', train_metrics) logging.info('valid metrics: %s', valid_metrics) return valid_metrics
def evaluate(self, result_paths): X_test, y_test, _ = self.load_results_from_result_paths(result_paths) test_pred, var_test = self.model.predict(X_test), None test_metrics = utils.evaluate_metrics(y_test, test_pred, prediction_is_first_arg=False) return test_metrics, test_pred, y_test
def test(self): preds = [] targets = [] self.model.eval() test_queue = self.load_results_from_result_paths(self.test_paths) for step, graph_batch in enumerate(test_queue): graph_batch = graph_batch.to(self.device) if self.model_config['model'] == 'gnn_vs_gae_classifier': pred_bins, pred = self.model(graph_batch=graph_batch) else: pred = self.model(graph_batch=graph_batch) preds.extend(pred.detach().cpu().numpy() * 100) targets.extend(graph_batch.y.detach().cpu().numpy()) fig = utils.scatter_plot(np.array(preds), np.array(targets), xlabel='Predicted', ylabel='True', title='') fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_test.jpg')) plt.close() test_results = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False) logging.info('test metrics %s', test_results) return test_results
def validate(self): preds = [] targets = [] self.model.eval() valid_queue = self.load_results_from_result_paths(self.val_paths) for step, (arch_path_enc, y_true) in enumerate(valid_queue): arch_path_enc = arch_path_enc.to(self.device).float() y_true = y_true.to(self.device).float() pred = self.model(arch_path_enc) preds.extend(pred.detach().cpu().numpy() * 100) targets.extend(y_true.detach().cpu().numpy()) fig = utils.scatter_plot(np.array(preds), np.array(targets), xlabel='Predicted', ylabel='True', title='') fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_valid.jpg')) plt.close() val_results = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False) logging.info('validation metrics %s', val_results) return val_results
def validate(self): X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths) val_pred, var_val = self.model.predict(X_val), None valid_metrics = utils.evaluate_metrics(y_val, val_pred, prediction_is_first_arg=False) logging.info('validation metrics %s', valid_metrics) return valid_metrics
def train(self): from nasbench301.surrogate_models import utils X_train, y_train, _ = self.load_results_from_result_paths( self.train_paths) X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths) self.model.fit(X_train, y_train) train_pred, var_train = self.model.predict(X_train), None val_pred, var_val = self.model.predict(X_val), None #self.save() fig_train = utils.scatter_plot(np.array(train_pred), np.array(y_train), xlabel='Predicted', ylabel='True', title='') fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg')) plt.close() fig_val = utils.scatter_plot(np.array(val_pred), np.array(y_val), xlabel='Predicted', ylabel='True', title='') fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg')) plt.close() train_metrics = utils.evaluate_metrics(y_train, train_pred, prediction_is_first_arg=False) valid_metrics = utils.evaluate_metrics(y_val, val_pred, prediction_is_first_arg=False) logging.info('train metrics: %s', train_metrics) logging.info('valid metrics: %s', valid_metrics) return valid_metrics
def infer(self, train_queue, valid_queue, model, criterion, optimizer, lr, epoch): objs = utils.AvgrageMeter() # VALIDATION preds = [] targets = [] model.eval() for step, graph_batch in enumerate(valid_queue): graph_batch = graph_batch.to(self.device) if self.model_config['model'] == 'gnn_vs_gae_classifier': pred_bins, pred = self.model(graph_batch=graph_batch) criterion = torch.nn.BCELoss() criterion_2 = torch.nn.MSELoss() bins = self.create_bins(lower_bound=0, width=10, quantity=9) binned_weights = [] for value in graph_batch.y.cpu().numpy(): bin_index = self.find_bin(value, bins) binned_weights.append(bin_index) bins = torch.FloatTensor(binned_weights) make_one_hot = lambda index: torch.eye(self.model_config['no_bins'])[index.view(-1).long()] binns_one_hot = make_one_hot(bins).to(self.device) loss_1 = criterion(pred_bins, binns_one_hot) loss_2 = criterion_2(pred, self.normalize_data(graph_batch.y)) alpha = self.model_config['classification_loss'] beta = self.model_config['regression_loss'] loss = alpha * loss_1 + beta * loss_2 else: pred = self.model(graph_batch=graph_batch) loss = criterion(self.normalize_data(pred), self.normalize_data(graph_batch.y / 100)) preds.extend(pred.detach().cpu().numpy() * 100) targets.extend(graph_batch.y.detach().cpu().numpy()) n = graph_batch.num_graphs objs.update(loss.data.item(), n) if step % self.data_config['report_freq'] == 0: logging.info('valid %03d %e ', step, objs.avg) fig = utils.scatter_plot(np.array(preds), np.array(targets), xlabel='Predicted', ylabel='True', title='') fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_valid_{}.jpg'.format(epoch))) plt.close() val_results = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False) return objs.avg, val_results
def test(self): X_test, y_test, _ = self.load_results_from_result_paths(self.test_paths) test_pred, var_test = self.model.predict(X_test), None fig = utils.scatter_plot(np.array(test_pred), np.array(y_test), xlabel='Predicted', ylabel='True', title='') fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_test.jpg')) plt.close() test_metrics = utils.evaluate_metrics(y_test, test_pred, prediction_is_first_arg=False) logging.info('test metrics %s', test_metrics) return test_metrics
def evaluate(self, result_paths): # Get evaluation data eval_queue = self.load_results_from_result_paths(result_paths) preds = [] targets = [] self.model.eval() for step, graph_batch in enumerate(eval_queue): graph_batch = graph_batch.to(self.device) pred = self.model(graph_batch=graph_batch) preds.extend(pred.detach().cpu().numpy() * 100) targets.extend(graph_batch.y.detach().cpu().numpy()) test_metrics = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False) return test_metrics, np.array(preds), np.array(targets)
def evaluate(self, result_paths): # Get evaluation data eval_queue = self.load_results_from_result_paths(result_paths) preds = [] targets = [] self.model.eval() for step, (arch_path_enc, y_true) in enumerate(eval_queue): arch_path_enc = arch_path_enc.to(self.device).float() y_true = y_true.to(self.device).float() pred = self.model(arch_path_enc) preds.extend(pred.detach().cpu().numpy() * 100) targets.extend(y_true.detach().cpu().numpy()) test_metrics = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False) return test_metrics, preds, targets
def infer(self, train_queue, valid_queue, model, criterion, optimizer, lr, epoch): objs = utils.AvgrageMeter() # VALIDATION preds = [] targets = [] for step, (arch_path_enc, y_true) in enumerate(valid_queue): arch_path_enc = arch_path_enc.to(self.device).float() y_true = y_true.to(self.device).float() pred = self.model(arch_path_enc) loss = torch.mean( torch.abs((self.normalize_data(pred) / self.normalize_data(y_true / 100)) - 1)) preds.extend(pred.detach().cpu().numpy() * 100) targets.extend(y_true.detach().cpu().numpy()) objs.update(loss.data.item(), len(arch_path_enc)) if step % self.data_config['report_freq'] == 0: logging.info('valid %03d %e ', step, objs.avg) fig = utils.scatter_plot(np.array(preds), np.array(targets), xlabel='Predicted', ylabel='True', title='') fig.savefig( os.path.join(self.log_dir, 'pred_vs_true_valid_{}.jpg'.format(epoch))) plt.close() val_results = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False) return objs.avg, val_results
def evaluate_ensemble(self, result_paths, apply_noise): """Evaluates the metrics on the result paths using ensemble predicitons""" preds, targets = [], [] # Collect individuals predictions for member_model in self.ensemble_members: member_metrics, member_preds, member_targets = member_model.evaluate( result_paths) logging.info("==> Eval member metrics: %s", member_metrics) if len(targets) == 0: preds.append(member_preds) targets = member_targets continue if np.any((targets - member_targets) > 1e-5): raise ValueError("Ensemble members have different targets!") preds.append(member_preds) means = np.mean(preds, axis=0) stddevs = np.std(preds, axis=0) # Apply noise if apply_noise: noisy_predictions = [ np.random.normal(loc=mean, scale=stddev, size=1)[0] for mean, stddev in zip(means, stddevs) ] ensemble_predictions = noisy_predictions else: ensemble_predictions = means # Evaluate metrics metrics = utils.evaluate_metrics(targets, ensemble_predictions, prediction_is_first_arg=False) return metrics, ensemble_predictions, stddevs, targets
def train_epoch(self, train_queue, valid_queue, model, criterion, optimizer, lr, epoch): objs = utils.AvgrageMeter() # TRAINING preds = [] targets = [] model.train() for step, graph_batch in enumerate(train_queue): graph_batch = graph_batch.to(self.device) # print(step) if self.model_config['model'] == 'gnn_vs_gae_classifier': pred_bins, pred = self.model(graph_batch=graph_batch) criterion = torch.nn.BCELoss() criterion_2 = torch.nn.MSELoss() bins = self.create_bins(lower_bound=0, width=10, quantity=9) binned_weights = [] for value in graph_batch.y.cpu().numpy(): bin_index = self.find_bin(value, bins) binned_weights.append(bin_index) bins = torch.FloatTensor(binned_weights) make_one_hot = lambda index: torch.eye(self.model_config['no_bins'])[index.view(-1).long()] binns_one_hot = make_one_hot(bins).to(self.device) loss_1 = criterion(pred_bins, binns_one_hot) loss_2 = criterion_2(pred, self.normalize_data(graph_batch.y)) alpha = self.model_config['classification_loss'] beta = self.model_config['regression_loss'] loss = alpha * loss_1 + beta * loss_2 else: pred = self.model(graph_batch=graph_batch) if self.model_config['loss:loss_log_transform']: loss = criterion(self.normalize_data(pred), self.normalize_data(graph_batch.y / 100)) else: loss = criterion(pred, graph_batch.y / 100) if self.model_config['loss:pairwise_ranking_loss']: m = 0.1 ''' y = list(map(lambda y_i: 1 if y_i == True else -1, graph_batch.y[0: -1] > graph_batch.y[1:])) pairwise_ranking_loss = torch.nn.HingeEmbeddingLoss(margin=m)(pred[0:-1] - pred[1:], target=torch.from_numpy(np.array(y))) ''' pairwise_ranking_loss = [] sort_idx = torch.argsort(graph_batch.y, descending=True) for idx, idx_y_i in enumerate(sort_idx): for idx_y_i_p1 in sort_idx[idx + 1:]: pairwise_ranking_loss.append(torch.max(torch.tensor(0.0, dtype=torch.float), m - (pred[idx_y_i] - pred[idx_y_i_p1]))) pairwise_ranking_loss = torch.mean(torch.stack(pairwise_ranking_loss)) loss += pairwise_ranking_loss if step % self.data_config['report_freq'] == 0: logging.info('Pairwise ranking loss {}'.format(pairwise_ranking_loss)) preds.extend(pred.detach().cpu().numpy() * 100) targets.extend(graph_batch.y.detach().cpu().numpy()) optimizer.zero_grad() loss.backward() optimizer.step() n = graph_batch.num_graphs objs.update(loss.data.item(), n) if step % self.data_config['report_freq'] == 0: logging.info('train %03d %e', step, objs.avg) fig = utils.scatter_plot(np.array(preds), np.array(targets), xlabel='Predicted', ylabel='True', title='') fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_train_{}.jpg'.format(epoch))) plt.close() train_results = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False) return objs.avg, train_results
def train_epoch(self, train_queue, valid_queue, model, criterion, optimizer, lr, epoch): objs = utils.AvgrageMeter() # TRAINING preds = [] targets = [] model.train() for step, (arch_path_enc, y_true) in enumerate(train_queue): arch_path_enc = arch_path_enc.to(self.device).float() y_true = y_true.to(self.device).float() pred = self.model(arch_path_enc) if self.model_config['loss:loss_log_transform']: loss = torch.mean( torch.abs((self.normalize_data(pred) / self.normalize_data(y_true / 100)) - 1)) else: loss = criterion(1 - pred, 1 - y_true / 100) if self.model_config['loss:pairwise_ranking_loss']: m = 0.1 pairwise_ranking_loss = [] sort_idx = torch.argsort(y_true, descending=True) for idx, idx_y_i in enumerate(sort_idx): for idx_y_i_p1 in sort_idx[idx + 1:]: pairwise_ranking_loss.append( torch.max(torch.tensor(0.0, dtype=torch.float), m - (pred[idx_y_i] - pred[idx_y_i_p1]))) pairwise_ranking_loss = torch.mean( torch.stack(pairwise_ranking_loss)) loss += pairwise_ranking_loss if step % self.data_config['report_freq'] == 0: logging.info('Pairwise ranking loss {}'.format( pairwise_ranking_loss)) preds.extend(pred.detach().cpu().numpy() * 100) targets.extend(y_true.detach().cpu().numpy()) optimizer.zero_grad() loss.backward() optimizer.step() objs.update(loss.data.item(), len(arch_path_enc)) if step % self.data_config['report_freq'] == 0: logging.info('train %03d %e', step, objs.avg) fig = utils.scatter_plot(np.array(preds), np.array(targets), xlabel='Predicted', ylabel='True', title='') fig.savefig( os.path.join(self.log_dir, 'pred_vs_true_train_{}.jpg'.format(epoch))) plt.close() train_results = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False) return objs.avg, train_results