def evaluate_model(model, train_data, test_data, trainvars, \ global_settings, choose_data, particles, nodeWise=False): if global_settings['ml_method'] == 'lbn': train_predicted_probabilities = model.predict([ dlt.get_low_level(train_data, particles), dlt.get_high_level(train_data, particles, trainvars) ], batch_size=1024) test_predicted_probabilities = model.predict([ dlt.get_low_level(test_data, particles), dlt.get_high_level(test_data, particles, trainvars) ], batch_size=1024) #print test_var["ll"][0], 'hl==', test_var["hl"][0] #print 'proba===', test_predicted_probabilities[0] else: train_predicted_probabilities = model.predict( train_data[trainvars].values) test_predicted_probabilities = model.predict( test_data[trainvars].values) if not nodeWise: plot_confusion_matrix(test_data, test_predicted_probabilities, \ global_settings["output_dir"], choose_data+'_test') plot_confusion_matrix(train_data, train_predicted_probabilities, \ global_settings["output_dir"], choose_data+'_train') plot_DNNScore(train_data, model, global_settings['ml_method'], \ global_settings['output_dir'], trainvars, particles, choose_data+'_train') plot_DNNScore(test_data, model, global_settings['ml_method'], \ global_settings['output_dir'], trainvars, particles, choose_data+'_test') test_fpr, test_tpr = mt.roc_curve(test_data['multitarget'].astype(int), test_predicted_probabilities, test_data['evtWeight'].astype(float)) train_fpr, train_tpr = mt.roc_curve(train_data['multitarget'].astype(int), train_predicted_probabilities, train_data['evtWeight'].astype(float)) train_auc = auc(train_fpr, train_tpr, reorder=True) test_auc = auc(test_fpr, test_tpr, reorder=True) test_info = { 'fpr': test_fpr, 'tpr': test_tpr, 'auc': test_auc, 'type': 'test', 'prediction': test_predicted_probabilities } train_info = { 'fpr': train_fpr, 'tpr': train_tpr, 'auc': train_auc, 'type': 'train', 'prediction': train_predicted_probabilities } return train_info, test_info
def plot_DNNScore(data, model, lbn, output_dir, trainvars, particles, addition): data["max_node_pos"] = -1 data["max_node_val"] = -1 if lbn != 'lbn': for process in set(data["process"]): data = data.loc[data["process"] == process] value = model.predict(data[trainvars].values) data.loc[data["process"] == process, "max_node_pos"]\ = np.argmax(value, axis=1) data.loc[data["process"] == process, "max_node_val"]\ = np.amax(value, axis=1) else: for process in set(data["process"]): process_only_data = data.loc[data["process"] == process] value = model.predict([ dlt.get_low_level(process_only_data, particles), dlt.get_high_level(process_only_data, particles, trainvars) ], batch_size=1024) data.loc[data['process'] == process, "max_node_pos"] = np.argmax(value, axis=1) data.loc[data['process'] == process, "max_node_val"] = np.amax(value, axis=1) hhvt.plot_DNNScore(data, output_dir, addition)
def fit_model(self): history = self.model.fit( [dlt.get_low_level(self.train_data, self.particles[self.channel]), dlt.get_high_level(self.train_data, self.particles[self.channel], self.trainvars)], self.train_data['multitarget'].values, epochs=self.epoch, batch_size=self.batch_size, sample_weight=self.train_data['totalWeight'].values, validation_data=( [dlt.get_low_level(self.val_data, self.particles[self.channel]), dlt.get_high_level(self.val_data, self.particles[self.channel], self.trainvars)], self.val_data["multitarget"].values, self.val_data["totalWeight"].values ), callbacks=[self.reduce_lr, self.early_stopping] ) if self.plot_history: hhvt.plot_loss_accuracy(history, self.output_dir, self.addition)
def evaluate_model(data_dict, global_settings, model): """Evaluates the model for the XGBoost method Parameters: ---------- data_dict : dict Contains all the necessary information for the evaluation. global_settings : dict Preferences for the optimization model : XGBoost Booster? Model created by the xgboost. Returns: ------- score : float The score calculated according to the fitness_fn """ trainvars = data_dict['trainvars'] train_data = data_dict['train'] test_data = data_dict['test'] particles = PARTICLE_INFO[global_settings['channel']] pred_train = model.predict( [dlt.get_low_level(train_data, particles), dlt.get_high_level(train_data, particles, trainvars)], batch_size=1024) pred_test = model.predict( [dlt.get_low_level(test_data, particles), dlt.get_high_level(test_data, particles, trainvars)], batch_size=1024) kappa = global_settings['kappa'] if global_settings['fitness_fn'] == 'd_roc': return et.calculate_d_roc( data_dict, pred_train, pred_test, kappa=kappa, multiclass=True) elif global_settings['fitness_fn'] == 'd_ams': return et.calculate_d_ams( data_dict, pred_train, pred_test, kappa=kappa) else: raise ValueError( 'The' + str(global_settings['fitness_fn']) + ' fitness_fn is not implemented' )
def predict_from_model(self, data_): ll = dlt.get_low_level(data_, self.particles[self.channel]) hl = dlt.get_high_level(data_, self.particles[self.channel], self.trainvars) prediction = self.model.predict([ll, hl]) return prediction