def _fit(self, tensor_provider, train_idx, y, verbose=0): """ :param TensorProvider tensor_provider: :param list train_idx: :param int verbose: :return: """ # Use model's graph and run initializer with self._tf_graph.as_default(): self._sess.run(tf.global_variables_initializer()) # Close all figures and make a new one fig = None if self.results_path is not None: plt.close("all") plt.ioff() print("Making figure") fig = plt.figure(figsize=(14, 11)) if verbose: print(verbose * " " + "Fitting {}".format(self.name)) verbose += 2 # Get training data input_tensor = tensor_provider.load_concat_input_tensors( data_keys_or_idx=train_idx, word_embedding=self.use_word_embedding, char_embedding=self.use_char_embedding, pos_tags=self.use_pos_tags) input_lengths = tensor_provider.load_data_tensors( data_keys_or_idx=train_idx, word_counts=True)["word_counts"] train_idx = list(range(len(train_idx))) # Get static features if wanted static_input_tensor = None if self.use_static_features: static_input_tensor = tensor_provider.load_concat_input_tensors( data_keys_or_idx=train_idx, bow=True) # Note learning rates if isinstance(self.learning_rate_progression, float): learning_rates = [self.learning_rate_progression] * self.n_batches else: learning_rates = self.learning_rate_progression # Calculate sample probability based on class-size # TODO: Move this to own function and implement a "batch_strategy" input non_claim_if = 1.0 / sum(y == 0) claim_if = 1.0 / sum(y == 1) sample_weights = np.empty((len(train_idx))) sample_weights[y == 0] = non_claim_if sample_weights[y == 1] = claim_if sample_weights = sample_weights / sum( sample_weights) # normalize to yield probabilities # Run training batches costs = [] batches = [] start_time = time() for batch_nr in range(self.n_batches): c_learning_rate = learning_rates[batch_nr] c_indices = np.random.choice(train_idx, self.batch_size, replace=False, p=sample_weights) c_inputs = input_tensor[c_indices, :, :] c_truth = y[c_indices] c_truth = np.stack([c_truth == 0, c_truth == 1], axis=1) * 1 c_input_lengths = input_lengths[c_indices] # Feeds feed_dict = { self.recurrent_inputs: c_inputs, self.input_lengths: c_input_lengths, self.truth: c_truth, self.learning_rate: c_learning_rate, self.is_training: True } # Add static features if needed if self.use_static_features: feed_dict = { **feed_dict, self.static_inputs: static_input_tensor[c_indices, :] } # Fetching fetch = [self.optimize_op, self.regularized_cost] if self.results_path is not None: fetch.append(self._summary_merged) # Run batch training _, c, *summary = self._sess.run(fetches=fetch, feed_dict=feed_dict) # Tensorboard summaries if self.results_path is not None: self._summary_train_writer.add_summary(summary[0], batch_nr) # Note performance costs.append(c) batches.append(batch_nr + 1) if verbose: # Plot error and learning rate if self.results_path is not None: fig.clear() primary_secondary_plot( primary_xs=batches, primary_values=costs, secondary_plots=[learning_rates], x_limit=self.n_batches, primary_label="Cost", secondary_label="Learning Rate", x_label="Batch", title="BasicRecurrent: Cost and learning rate", primary_y_limit=self.training_curve_y_limit) save_fig(Path(self.results_path, "training_curve"), only_pdf=True) # Print validation if (batch_nr + 1) % self.display_step == 0 and verbose: print(verbose * " ", end="") if isinstance(self.learning_rate_progression, float): print_formatter = "Batch {: 8d} / {: 8d}. cost = {:5.3e}." else: print_formatter = "Batch {: 8d} / {: 8d}. cost = {:5.3e}. learning_rate = {:.2e}" time_label = "{}, {:7.2f}s : ".format( datetime.now().strftime("%H:%M:%S"), time() - start_time) print(time_label + print_formatter.format(batch_nr + 1, self.n_batches, c, learning_rates[batch_nr])) # Done if self.results_path is not None: plt.close("all") plt.ion()
ax2.tick_params(axis='y', colors=secondary_colors[0]) ax2.set_ylabel(secondary_label, color=secondary_colors[0]) if __name__ == "__main__": plt.close("all") n_batches = 1000 # Make learning rates learning_rates = linear_geometric_curve(n=n_batches, starting_value=1e-2, end_value=1e-8, geometric_component=3. / 4, geometric_end=1.4) validation_x = range(0, n_batches, 5) validation = [val / n_batches for val in validation_x] primary_secondary_plot(primary_xs=validation_x, primary_values=validation, secondary_plots=[learning_rates], x_limit=n_batches, secondary_label="Learning rate", primary_label="Accuracy", title="Validation", x_label="Batch") save_fig(Path("delete"))
plt.imshow(tensor.T, aspect="auto") plt.xlabel("Sample") plt.ylabel("Features") else: the_rows = cols = np.math.ceil(np.math.sqrt(tensor.shape[0])) if the_rows * (cols - 1) == tensor.shape[0]: cols -= 1 for nr in range(tensor.shape[0]): plt.subplot(the_rows, cols, nr + 1) plt.imshow(tensor[nr, :, :]) plt.xlabel(a_key) plt.ylabel("Time") plt.suptitle(a_key) save_fig(Path(results_dir, "figure_{}".format(fig_count))) fig_count += 1 else: print(a_key) print(test[a_key]) n = len(the_tensor_provider._keys) assert all([ len(a_val) == n for a_val in [ the_tensor_provider._keys, the_tensor_provider.labels, the_tensor_provider.tokens, the_tensor_provider.pos_tags ] ]), "Not all resources in TensorProvider has same length." ########### # Test that all annotated keys have a label and that all non-annotated do not
def leave_one_program_out_cv(tensor_provider, model_list, path, eval_functions=None, limit=None, return_predictions=False, save_ranked_sentences=True, save_full_predictions=True, save_model_weights=True): """ :param TensorProvider tensor_provider: Class providing all data to models. :param list[DetektorModel] model_list: List of model-classes for testing. :param list[Evaluation] eval_functions: List of evaluation functions used to test models. :param bool return_predictions: If True, the method stores all model test-predictions and returns them as well. Can be used to determine whether errors are the same across models. :param int | None limit: Only perform analysis on some programs (for testing) If None - run on all programs. :param Path path: Path for storing results :return: """ ensure_folder(path) # TODO: Consider also looping over loss-functions: classic ones and weighed ones n_models = len(model_list) # Default evaluation score if eval_functions is None: eval_functions = [ Accuracy(), F1(), TruePositives(), TrueNegatives(), FalsePositives(), FalseNegatives(), Samples(), AreaUnderROC(), ROC() ] # Elements keys keys = list(sorted(tensor_provider.accessible_annotated_keys)) # Get program ids and number of programs program_ids = np.array(list(zip(*keys))[0]) unique_programs = np.array(sorted(set(program_ids))) n_programs = len(unique_programs) program_names = ["P{:02d}".format(val + 1) for val in range(n_programs)] # Dictionary for holding actual predictions (they vary in length which discourages an array) test_predictions = dict() # Initialize array for holding results special_results = dict() evaluation_names = [ val.name() for val in eval_functions if val.is_single_value ] classification_results = np.full( (n_programs, n_models, len(evaluation_names)), np.nan) classification_results = xr.DataArray( classification_results, name="Loo Results", dims=["Program", "Model", "Evaluation"], coords=dict(Program=program_names, Model=[model.name for model in model_list], Evaluation=evaluation_names)) # Initialize file for storing ranked sentences if save_ranked_sentences: rank_file = Path(path, "ranked_sentences.txt").open("w") # Loop over programs loo = LeaveOneOut() limit = len(unique_programs) if limit is None else limit print("\n\nRunning Leave-One-Out Tests.\n" + "-" * 75) for program_nr, (train, test) in enumerate( list(loo.split(unique_programs))[:limit]): program_name = program_names[program_nr] # Get split indices train_idx = np.where(program_ids != unique_programs[test])[0] test_idx = np.where(program_ids == unique_programs[test])[0] # Convert to keys train_idx = [keys[val] for val in train_idx] test_idx = [keys[val] for val in test_idx] # Report print("Program {}, using {} training samples and {} test samples.". format(program_nr + 1, len(train_idx), len(test_idx))) # Make and set BoW-vocabulary bow_vocabulary = tensor_provider.extract_programs_vocabulary(train_idx) tensor_provider.set_bow_vocabulary(bow_vocabulary) # Get truth of test-set y_true = tensor_provider.load_labels(data_keys_or_idx=test_idx) # Go through models for model_nr, model in enumerate(model_list): model_name = model.name # Initialize model model.initialize_model(tensor_provider=tensor_provider) # Fit model model.fit(tensor_provider=tensor_provider, train_idx=train_idx, verbose=2) # Predict on test-data for performance y_pred, y_pred_binary = model.predict( tensor_provider=tensor_provider, predict_idx=test_idx) y_pred = np.squeeze(y_pred) y_pred_binary = np.squeeze(y_pred_binary) # Store predictions if return_predictions: test_predictions.setdefault(model_name, dict())[program_name] = y_pred # Save the best ranked senteces (in terms of claim) if save_ranked_sentences: rank_file.write("Test program: %s \n" % program_names[program_nr]) rank_file.write(model.summary_to_string()) ranked_sentences, rank_score, rank_indices \ = tensor_provider.get_ranked_predictions(y_pred, test_idx) rank_file.write("Sentence, Proability of claim, Truth \n") ranked_labels = tensor_provider.load_labels(rank_indices) for r in range(len(ranked_sentences)): rank_file.write( "%s , %.5f, %i \n" % (ranked_sentences[r], rank_score[r], ranked_labels[r])) rank_file.write("\n") # Save predictions on full test set if save_full_predictions: with Path(path, "%s_predictions.txt" % program_names[program_nr]).open("w") as file: all_sentences = tensor_provider.load_original_sentences( test_idx) for r in range(len(all_sentences)): file.write("%i;%.5f;%s\n" % (y_true[r], y_pred[r], all_sentences[r])) # Save model weights in case of logistic regression if save_model_weights and model_name == "LogisticRegressionSKLEARN": # TODO: Save most important weights in classification print(' ') # Evaluate with eval_functions evaluation_nr = 0 for evalf in eval_functions: assert y_pred.shape == y_true.shape, "y_pred ({}) and y_true ({}) " \ "do not have same shape".format(y_pred.shape, y_true.shape) if evalf.is_single_value: evaluation_result = evalf(y_true=y_true, y_pred=y_pred, y_pred_binary=y_pred_binary) classification_results[program_nr, model_nr, evaluation_nr] = evaluation_result evaluation_nr += 1 else: special_results[(model.name, evalf.name(), program_nr)] = evalf( y_true=y_true, y_pred=y_pred, y_pred_binary=y_pred_binary) ### # Plot ROC curves if wanted # Go through models models_mean_rocs = [] for model in model_list: rocs = [] labels = [] # Go through programs for program_nr in range(len(unique_programs)): key = (model.name, "ROC", program_nr) if key in special_results: rocs.append(special_results[key]) labels.append("Program {}".format(program_nr)) # Plot ROCs for each program for this model plot_multiple_rocs(rocs=rocs, labels=labels, center_line=False) mean = mean_rocs(rocs) models_mean_rocs.append(mean) plot_roc(*mean, title=model.name, label="Mean", color="black", linestyle="--") plt.legend() # Store figure file_name = "ROC_{}".format(model.name) save_fig(Path(path, file_name)) plt.close() # Plot mean-ROCs for models names = [model.name for model in model_list] plot_multiple_rocs(rocs=models_mean_rocs, labels=names, center_line=True, title="Models Mean-ROC") plt.legend() save_fig(Path(path, "Models_ROC")) plt.close() if save_ranked_sentences: rank_file.close() if return_predictions: return classification_results, special_results, test_predictions return classification_results, special_results
print(". Validation accuracy: {:.2%}.".format(acc_val)) # Plot validation ax1.plot(val_batch_nr, accs_val, 'b-') ax1.set_ylabel('Validation Accuracy', color="blue") ax1.set_xlabel('Batches') ax1.set_title('Validation', fontsize=20) ax1.grid('on') ax1.set_ylim(0, 1) ax1.set_xlim(0, n_batches) plt.draw() with warnings.catch_warnings(): warnings.simplefilter("ignore") plt.pause(1) save_fig(Path(output_dir, "training_graph")) print("Storing Recurrent Encoder") recurrent_speller.save_encoder(sess=sess, file_path=str(Path(output_dir, "checkpoint", "speller_encode.ckpt"))) print("Storing Recurrent Decoder") recurrent_speller.save_decoder(sess=sess, file_path=str(Path(output_dir, "checkpoint", "speller_decode.ckpt"))) print("Running on test-set! (may be too big to handle)") # Process validation batch c_test_words = random.sample(words_test, test_batch_size) (c_word_sample_test, c_input_lengths_test, c_target_lengths_test, c_inputs_numerical_test, c_targets_in_numerical_test,
def single_training(tensor_provider, model, test_split, training_split, base_path, eval_functions=None, return_predictions=False, split_is_keys=False, access_restricted_data=False): """ :param TensorProvider tensor_provider: Class providing all data to models. :param DetektorModel model: Model-class to train and test. :param list | np.ndarray test_split: List of program IDs or sentence-keys used for testing (depending on programs_are_keys). :param list | np.ndarray training_split: List of program IDs or sentence-keys used for training. (depending on programs_are_keys). :param Path base_path: Path of directory where we can put results (in a subdirectory with the model's name). :param list[Evaluation] eval_functions: List of evaluation functions used to test models. :param bool return_predictions: If True, the method stores all model test-predictions and returns them as well. Can be used to determine whether errors are the same across models. :param bool split_is_keys: False: test_split and training_split are program numbers. True: test_split and training_split are sentence KEYS (list of (program_id, sentence_id)-tuples). """ # Create model-specific path and ensure directory results_path = model.results_path if results_path is None: results_path = model.create_model_path(results_path=base_path) ensure_folder(results_path) # Write name with Path(results_path, "name.txt").open("w") as file: file.write(model.generate_settings_name()) # Redirect prints to a file and denote script start-time redirect_stdout_to_file(Path(results_path, "log.txt")) print("Script starting at: {}".format( datetime.now().strftime("%d-%m-%Y %H:%M:%S"))) # Default evaluation score if eval_functions is None: eval_functions = [ Accuracy(), F1(), TruePositives(), TrueNegatives(), FalsePositives(), FalseNegatives(), Samples(), AreaUnderROC(), ROC() ] # Initialize array for holding results special_results_train = dict() evaluation_names = [ val.name() for val in eval_functions if val.is_single_value ] classification_results_train = np.full((1, len(evaluation_names)), np.nan) classification_results_train = SDataArray(classification_results_train, name="Training Results", dims=["Model", "Evaluation"], coords=dict( Evaluation=evaluation_names, Model=[model.name])) special_results_test = dict() classification_results_test = np.full((1, len(evaluation_names)), np.nan) classification_results_test = SDataArray(classification_results_test, name="Test Results", dims=["Model", "Evaluation"], coords=dict( Evaluation=evaluation_names, Model=[model.name])) # Check if split is in keys and not programs if split_is_keys: train_idx = training_split test_idx = test_split # Otherwise use program-indices to get keys for training and test (the correct and default way) else: # Sentences keys if not access_restricted_data: keys = list(sorted(tensor_provider.accessible_annotated_keys)) else: keys = list( sorted( tensor_provider.annotated_keys( access_restricted_data=True))) # Get program ids and number of programs program_ids = np.array(list(zip(*keys))[0]) # Get test-indices test_idx = np.sum([program_ids == val for val in test_split], axis=0) test_idx = np.where(test_idx > 0.5)[0] # Get test-indices train_idx = np.sum([program_ids == val for val in training_split], axis=0) train_idx = np.where(train_idx > 0.5)[0] # Convert to keys train_idx = [keys[val] for val in train_idx] test_idx = [keys[val] for val in test_idx] # Sanity check assert not set(test_idx).intersection( set(train_idx)), "Overlap between training and test set." # Report if not split_is_keys: print( "Test programs {}, using {} training samples and {} test samples.". format(test_split, len(train_idx), len(test_idx))) else: print( "Training and testing with specifically selected keys. {} training and {} test." .format(len(train_idx), len(test_idx))) # Make and set BoW-vocabulary bow_vocabulary = tensor_provider.extract_programs_vocabulary(train_idx) tensor_provider.set_bow_vocabulary(bow_vocabulary) # Get truth of train-set y_true_train = tensor_provider.load_labels(data_keys_or_idx=train_idx) # Get truth of test-set y_true = tensor_provider.load_labels(data_keys_or_idx=test_idx) # Initialize model model.initialize_model(tensor_provider=tensor_provider) # Number of parameters if model.save_type == "tf": with model._tf_graph.as_default(): print("Number of trainable parameters: {}".format( tf_number_of_trainable_parameters())) # Fit model model.fit(tensor_provider=tensor_provider, train_idx=train_idx, verbose=2) # Predict on training-data print("\tPredicting on training data") y_pred_train, y_pred_train_binary = model.predict( tensor_provider=tensor_provider, predict_idx=train_idx) y_pred_train = np.squeeze(y_pred_train) y_pred_train_binary = np.squeeze(y_pred_train_binary) train_predictions = y_pred_train # Predict on test-data for performance print("\tPredicting on test data") y_pred, y_pred_binary = model.predict(tensor_provider=tensor_provider, predict_idx=test_idx) y_pred = np.squeeze(y_pred) y_pred_binary = np.squeeze(y_pred_binary) # Store predictions test_predictions = y_pred # Evaluate with eval_functions print("\tRunning evaluation functions") evaluation_nr = 0 for evalf in eval_functions: # Training evaluation assert y_pred_train.shape == y_true_train.shape, "y_pred ({}) and y_true ({}) " \ "do not have same shape".format(y_pred_train.shape, y_true_train.shape) if evalf.is_single_value: evaluation_result = evalf(y_true=y_true_train, y_pred=y_pred_train, y_pred_binary=y_pred_train_binary) classification_results_train[0, evaluation_nr] = evaluation_result else: special_results_train[(model.name, evalf.name())] = evalf( y_true=y_true_train, y_pred=y_pred_train, y_pred_binary=y_pred_train_binary) # Test evaluation assert y_pred.shape == y_true.shape, "y_pred ({}) and y_true ({}) " \ "do not have same shape".format(y_pred.shape, y_true.shape) if evalf.is_single_value: evaluation_result = evalf(y_true=y_true, y_pred=y_pred, y_pred_binary=y_pred_binary) classification_results_test[0, evaluation_nr] = evaluation_result evaluation_nr += 1 else: special_results_test[(model.name, evalf.name())] = evalf( y_true=y_true, y_pred=y_pred, y_pred_binary=y_pred_binary) # Save model print("\tSaving model") model.save_model() # Return list returns = [ classification_results_train, classification_results_test, special_results_train, special_results_test, model.summary_to_string() ] # Additional returns if return_predictions: returns.extend([train_predictions, test_predictions]) ############################################ # Print, plot and store! # Make summary model_summary = model.summary_to_string() # Print mean results results_train = classification_results_train.to_dataset_split( "Model").to_dataframe() results_test = classification_results_test.to_dataset_split( "Model").to_dataframe() with Path(results_path, "results.txt").open("w") as file: file.write(model_summary + "\n\n") print("Training\n") file.write(str(results_train) + "\n\n") print("Test\n") file.write(str(results_test) + "\n\n") # Store results pickle.dump(results_train, Path(results_path, "results_train.p").open("wb")) pickle.dump(results_test, Path(results_path, "results_test.p").open("wb")) # Basic settings settings = dict() if not split_is_keys: settings["test_programs"] = test_split settings["training_programs"] = training_split else: settings["test_programs"] = "specific keys" settings["training_programs"] = "specific keys" pickle.dump(settings, Path(results_path, "settings.p").open("wb")) # Print results for each data-set print("\nSingle training Results - TRAINING \n" + "-" * 75) print(results_train) print("\nSingle training Results - TEST \n" + "-" * 75) print(results_test) print("\nModel Summary \n" + "-" * 75) print(model_summary) # Plot ROC of training roc_key = (model.name, "ROC") if roc_key in special_results_train: positive_rate, negative_rate = special_results_train[roc_key] plot_roc(tp_rate=positive_rate, fp_rate=negative_rate, title="{} ROC Training".format(model.name)) save_fig(Path(results_path, "ROC_Train")) # Plot ROC of test if roc_key in special_results_test: positive_rate, negative_rate = special_results_test[roc_key] plot_roc(tp_rate=positive_rate, fp_rate=negative_rate, title="{} ROC Test".format(model.name)) save_fig(Path(results_path, "ROC_Test")) # Print ending print("Script ended at: {}".format( datetime.now().strftime("%d-%m-%Y %H:%M:%S"))) close_stdout_file() # Write a file called done.txt to mark that the script is done with Path(results_path, "done.txt").open("w") as file: file.write("The deed is done. ") return tuple(returns)