def on_epoch_end(self, epoch, logs={}): if not epoch % self.n == 0: return None y_pred = [] sites = [] #gather site and species matrix y_pred = self.model.predict(self.eval_dataset) if self.submodel in ["spectral", "spatial"]: y_pred = y_pred[0] #F1 macro, micro = metrics.f1_scores(self.y_true, y_pred) self.experiment.log_metric("MicroF1", micro) self.experiment.log_metric("MacroF1", macro) #Log number of predictions to make sure its constant self.experiment.log_metric("Prediction samples", y_pred.shape[0]) results = pd.DataFrame({ "true": np.argmax(self.y_true, 1), "predicted": np.argmax(y_pred, 1) }) self.experiment.log_table("results_{}.csv".format(epoch), results.values)
def test_evaluate(test_config): #Create class mod = main.AttentionModel() #Replace config for testing env for key, value in test_config.items(): for nested_key, nested_value in value.items(): mod.config[key][nested_key] = nested_value #Create mod.create() mod.read_data(validation_split=True) #Method 1, class eval method print("Before evaluation") y_pred, y_true = mod.evaluate(mod.val_split) print("evaluated") test_acc = keras_metrics.CategoricalAccuracy() test_acc.update_state(y_true=y_true, y_pred=y_pred) method1_eval_accuracy = test_acc.result().numpy() assert y_pred.shape == y_true.shape #Method 2, keras eval method metric_list = mod.model.evaluate(mod.val_split) metric_dict = {} for index, value in enumerate(metric_list): metric_dict[mod.model.metrics_names[index]] = value assert method1_eval_accuracy == metric_dict["acc"] #F1 requires integer, not softmax f1s = metrics.f1_scores(y_true, y_pred)
def on_epoch_end(self, epoch, logs={}): if not epoch % self.n == 0: return None y_true = [] y_pred = [] sites = [] #gather site and species matrix for data, label in self.eval_dataset: pred = self.model.predict(data) if self.submodel in ["spectral", "spatial"]: y_pred.append(pred[0]) y_true.append(label[0]) #sites.append(data[1]) else: y_pred.append(pred) y_true.append(label) y_true_list = np.concatenate(y_true) y_pred_list = np.concatenate(y_pred) #F1 macro, micro = metrics.f1_scores(y_true_list, y_pred_list) self.experiment.log_metric("MicroF1", micro) self.experiment.log_metric("MacroF1", macro)
def test_evaluate(test_config): #Create class mod = Houston2018.AttentionModel(config="conf/houston_config.yml") #Replace config for testing env for key, value in test_config.items(): for nested_key, nested_value in value.items(): mod.config[key][nested_key] = nested_value #Create mod.create() mod.read_data(validation_split=True) metric_list = [ keras_metrics.TopKCategoricalAccuracy(k=2, name="top_k"), keras_metrics.CategoricalAccuracy(name="acc") ] mod.model.compile(loss="categorical_crossentropy", optimizer=tf.keras.optimizers.Adam( lr=float(mod.config['train']['learning_rate'])), metrics=metric_list) #Method 1, class eval method print("Before evaluation") y_pred, y_true = mod.evaluate(mod.val_split) print("evaluated") test_acc = keras_metrics.CategoricalAccuracy() test_acc.update_state(y_true=y_true, y_pred=y_pred) method1_eval_accuracy = test_acc.result().numpy() assert y_pred.shape == y_true.shape #Method 2, keras eval method metric_list = mod.model.evaluate(mod.val_split) metric_dict = {} for index, value in enumerate(metric_list): metric_dict[mod.model.metrics_names[index]] = value assert method1_eval_accuracy == metric_dict["acc"] #F1 requires integer, not softmax f1s = metrics.f1_scores(y_true, y_pred)
def on_epoch_end(self, epoch, logs={}): if not epoch % self.n == 0: return None y_pred = [] sites = [] #gather site and species matrix y_pred = self.model.predict(self.eval_dataset) if self.submodel in ["spectral", "spatial"]: y_pred = y_pred[0] #F1 macro, micro = metrics.f1_scores(self.y_true, y_pred) self.experiment.log_metric("MicroF1", micro) self.experiment.log_metric("MacroF1", macro) #Log number of predictions to make sure its constant self.experiment.log_metric("Prediction samples", y_pred.shape[0])
def on_epoch_end(self, epoch, logs={}): y_true = [] y_pred = [] for image, label in self.dataset: pred = self.model.predict(image) y_pred.append(pred) y_true.append(label) y_true = np.vstack(y_true) y_pred = np.vstack(y_pred) macro, micro = metrics.f1_scores(y_true, y_pred) self.experiment.log_metric("MicroF1",micro) self.experiment.log_metric("MacroF1",macro) self.experiment.log_confusion_matrix( y_true, y_pred, title="Confusion Matrix, Epoch #%d" % (epoch + 1), file_name="confusion-matrix-%03d.json" % (epoch + 1), labels = self.label_names )
def on_train_end(self, logs={}): y_pred = [] sites = [] #gather site and species matrix y_pred = self.model.predict(self.eval_dataset) if self.submodel in ["spectral", "spatial"]: y_pred = y_pred[0] #F1 macro, micro = metrics.f1_scores(self.y_true, y_pred) self.experiment.log_metric("Final MicroF1", micro) self.experiment.log_metric("Final MacroF1", macro) #Log number of predictions to make sure its constant self.experiment.log_metric("Prediction samples", y_pred.shape[0]) results = pd.DataFrame({ "true": np.argmax(self.y_true, 1), "predicted": np.argmax(y_pred, 1) }) #assign labels if self.label_names: results["true_taxonID"] = results.true.apply( lambda x: self.label_names[x]) results["predicted_taxonID"] = results.predicted.apply( lambda x: self.label_names[x]) #Within site confusion site_lists = self.train_shp.groupby("taxonID").siteID.unique() site_confusion = metrics.site_confusion( y_true=results.true_taxonID, y_pred=results.predicted_taxonID, site_lists=site_lists) self.experiment.log_metric(name="Within_site confusion[training]", value=site_confusion) plot_lists = self.train_shp.groupby("taxonID").plotID.unique() plot_confusion = metrics.site_confusion( y_true=results.true_taxonID, y_pred=results.predicted_taxonID, site_lists=plot_lists) self.experiment.log_metric(name="Within_plot confusion[training]", value=plot_confusion) domain_lists = self.train_shp.groupby("taxonID").domainID.unique() domain_confusion = metrics.site_confusion( y_true=results.true_taxonID, y_pred=results.predicted_taxonID, site_lists=domain_lists) self.experiment.log_metric( name="Within_domain confusion[training]", value=domain_confusion) #Genus of all the different taxonID variants should be the same, take the first scientific_dict = self.train_shp.groupby( 'taxonID')['scientific'].apply( lambda x: x.head(1).values.tolist()).to_dict() genus_confusion = metrics.genus_confusion( y_true=results.true_taxonID, y_pred=results.predicted_taxonID, scientific_dict=scientific_dict) self.experiment.log_metric(name="Within Genus confusion", value=genus_confusion) #Most confused most_confused = results.groupby( ["true_taxonID", "predicted_taxonID"]).size().reset_index(name="count") most_confused = most_confused[~( most_confused.true_taxonID == most_confused.predicted_taxonID )].sort_values("count", ascending=False) self.experiment.log_table("most_confused.csv", most_confused.values)
#Get Alpha score for the weighted spectral/spatial average. Higher alpha favors spatial network. if model.config["train"]["weighted_sum"]: estimate_a = model.model.layers[-1].get_weights() experiment.log_metric(name="spatial-spectral weight", value=estimate_a[0][0]) ##Evaluate #Evaluation scores, see config.yml for tfrecords path y_pred, y_true = model.evaluate(model.val_split) #Evaluation accuracy eval_acc = keras_metrics.CategoricalAccuracy() eval_acc.update_state(y_true, y_pred) experiment.log_metric("Evaluation Accuracy",eval_acc.result().numpy()) macro, micro = metrics.f1_scores(y_true, y_pred) experiment.log_metric("MicroF1",micro) experiment.log_metric("MacroF1",macro) print("Unique labels in ytrue {}, unique labels in y_pred {}".format(np.unique(np.argmax(y_true,1)),np.unique(np.argmax(y_pred,1)))) #Read class labels labeldf = pd.read_csv(model.classes_file) experiment.log_confusion_matrix(y_true = y_true, y_predicted = y_pred, labels=list(labeldf.taxonID.values), title="Confusion Matrix") #Save model model.model.save("{}/{}.h5".format(save_dir,timestamp))