def model_search(self): """Do a model search with binary search. Returns ------- results : pandas.DataFrame selected_model : dict or pandas.Series Containing with keys: model_file model_iteration mean_loss mean_acc f1_weighted """ results = {} # Don't allow the zero index; We should never select an # untrained model! index = 1 if len(self.param_list) > 0 else 0 end_ind = len(self.param_list) - 1 logger.info("Linear Model Search from:{} to:{} [total #: {}]".format( utils.filebase(self.param_list[index]), utils.filebase(self.param_list[end_ind]), len(self.param_list) - 1)) # kinda hacky, but it'll do for now. increment_amount = int(np.round(min(max(10**(np.log10( len(self.param_list)) - 1), 1), 25))) while index < end_ind: logger.info("Evaluating {}".format( utils.filebase(self.param_list[index]))) if index not in results: model = self.param_list[index] results[index] = self.evaluate_model(model) index += increment_amount results_df = pandas.DataFrame.from_dict(results, orient='index') selected_index = results_df['f1_weighted'].idxmax() # Now select the one with the lowest score logger.info( utils.colored("Selected model index:{} / params: {}".format( selected_index, utils.filebase(self.param_list[selected_index])))) logger.info("For reference, here's the model selection results:") logger.info("\n{}".format(results_df.to_string())) return results_df, results[selected_index]
def evaluate_model(self, params_file): """Evaluate a model as defined by a params file, returning a single value (mean loss by default) to compare over the validation set.""" model = hcnn.train.models.NetworkManager.deserialize_npz( params_file) # Results contains one point accross the whole dataset logger.debug("Evaluating params {}".format(params_file)) validation_predictions_df = predict.predict_many( self.valid_df, model, self.slicer_fx, self.t_len, show_progress=True).dropna() evaluation_results = pandas.Series({ "mean_loss": validation_predictions_df['loss'].mean(), "mean_acc": sklearn.metrics.accuracy_score( validation_predictions_df['y_true'].astype(np.int), validation_predictions_df['y_pred'].astype(np.int)), "f1_weighted": sklearn.metrics.f1_score( validation_predictions_df['y_true'].astype(np.int), validation_predictions_df['y_pred'].astype(np.int), average='weighted') }) # Include the metadata in the series. model_iteration = utils.filebase(params_file)[6:] model_iteration = int(model_iteration) if model_iteration.isdigit() \ else model_iteration return evaluation_results.append(pandas.Series({ "model_file": params_file, "model_iteration": model_iteration }))
def model_search(self): """Do a model search with binary search. Returns ------- results : pandas.DataFrame selected_model : dict or pandas.Series Containing with keys: model_file model_iteration mean_loss ... """ results = {} # Don't allow the zero index; We should never select an # untrained model! start_ind = 1 if len(self.param_list) > 0 else 0 end_ind = len(self.param_list) - 1 # start_ind = len(self.param_list)/2 # end_ind = start_ind while start_ind != end_ind: logger.info("Model Search - L:{} R:{}".format( utils.filebase(self.param_list[start_ind]), utils.filebase(self.param_list[end_ind]))) if start_ind not in results: model = self.param_list[start_ind] results[start_ind] = self.evaluate_model(model) if end_ind not in results: model = self.param_list[end_ind] results[end_ind] = self.evaluate_model(model) best_model = self.compare_models( results[start_ind], results[end_ind]) new_ind = np.int(np.round((end_ind + start_ind) / 2)) if (end_ind - start_ind) > 1: start_ind, end_ind = (new_ind, end_ind) if best_model >= 0 \ else (start_ind, new_ind) else: start_ind, end_ind = (new_ind, new_ind) logger.info("Selected model {} / {}".format( start_ind, self.param_list[start_ind])) return pandas.DataFrame.from_dict(results, orient='index'), \ results[start_ind]
def test_filebase(): fnames = ['y', 'y.z', 'x/y.z', 'x.y.z'] results = ['y', 'y', 'y', 'x.y'] for fn, res in zip(fnames, results): yield __eq, utils.filebase(fn), res
def features_path_for_audio(audio_path): return os.path.join(write_dir, utils.filebase(audio_path) + ".npz")