def plot_series(errs, dir_out, name): """ Plotting the mean word and character errors :param errs: tuples of PHOCNet paths and respective errors :param dir_out: output directory :param name: plot base-title """ plot_w_err_path = sanity_util.unique_file_name(dir_out, name + '_w_err', '.png') plot_c_err_path = sanity_util.unique_file_name(dir_out, name + '_c_err', '.png') # gathering data x_ticks = [] w_errs = [] c_errs = [] for n_path, e_dict in errs: n_name = os.path.basename(n_path) x_ticks.append(n_name) w_errs.append(e_dict['mean_w_err']) c_errs.append(e_dict['mean_c_err']['mean_pct']) # plotting word error plt.plot(np.arange(len(w_errs)), w_errs) plt.xticks(np.arange(len(w_errs)), x_ticks, rotation=25, rotation_mode="anchor", horizontalalignment='right', verticalalignment='top') fig = plt.gcf() fig.set_size_inches(15, 10) plt.gca().set_ylabel('WER') plt.savefig(plot_w_err_path) plt.close(fig) plt.clf() # plotting character error plt.plot(np.arange(len(c_errs)), c_errs) plt.xticks(np.arange(len(c_errs)), x_ticks, rotation=25, rotation_mode="anchor", horizontalalignment='right', verticalalignment='top') fig = plt.gcf() fig.set_size_inches(15, 10) plt.gca().set_ylabel('CER') plt.savefig(plot_c_err_path) plt.close(fig) plt.clf()
def save(self, dir_out, train=None, test=None, pfx=''): """saving the NN, aswell as all relevant meta-data""" # creating save path sanity_util.safe_dir_path(dir_out) # not deleting prior data file_path = sanity_util.unique_file_name(dir=dir_out, fn='nn_{}'.format(pfx), suffix='.pth') file_path_setup = sanity_util.unique_file_name( dir=dir_out, fn='setup_{}'.format(pfx), suffix='.json') # writing nn torch.save(self.nn.state_dict(), file_path) # writing the training setup with open(file_path_setup, 'w') as f_json: json.dump(self.set_up(), f_json)
def save(self, dir, name='estimator'): """ This method pikles the estimator. NOTE: Pre-existing data/ estimators will NOT be overwritten. Please clean up outdated estimators manually. :param dir: directory to pikle :param name: name of the file """ dir = sanity_util.safe_dir_path(dir) file_name = sanity_util.unique_file_name(dir=dir, fn=name, suffix='.pkl') with open(file_name, 'wb') as f_out: pickle.dump(self, f_out)
def new_logger(dir_out, name): """initializes a logger for training""" logger = logging.getLogger(name) dir_out = sanity_util.safe_dir_path(dir_out) log_file_path = sanity_util.unique_file_name(dir=dir_out, fn=name, suffix='.log') hdlr = logging.FileHandler(log_file_path) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.setLevel(logging.INFO) return logger
def save(dir_out, json_dict, name): """ This method handles saving the errors to json files :param dir_out: Directory to save output file at :param json_dict: JSON object/ dictionary containing the errors :param name: The output files name """ # safe path sanity_util.safe_dir_path(dir_out) file_name = sanity_util.unique_file_name(dir=dir_out, fn='{}_ERR'.format(name), suffix='.json') # writing JSON file with open(file_name, 'w') as f_json: json.dump(json_dict, f_json)
dir_out = args['dir_out'] dir_out = sanity_util.safe_dir_path(dir_out) name = args['model_name'] t_phocnet = args['PHOCNet_type'].lower() k_fold = int(args['k_fold']) n_codes_lvl = int(args['n_codes_lvl']) ## loading dataset dset, train, test = get_dsets(name_dset, gt_path, imgs_path, alphabet, scale, k_fold) words = list(set(train.words).union(test.words)) ## estimator estimator = get_estimator(str_est, words, alphabet) ## loading PHOCNet phocnet = get_PHOCNet(t_phocnet, alphabet) ## collect error rates logger = new_logger(dir_out, name) errs = evaluate_dir(dir, device, test, estimator, phocnet, logger, s_batch=s_batch, n_codes_lvl=n_codes_lvl) # save error rates for plotting file_path = sanity_util.unique_file_name(dir_out, name, '.json') with open(file_path, 'w') as f_out: json.dump(errs, f_out) # plotting and savong the plot plot_series(errs, dir_out, name)
net_log_dir=os.path.join(dir_out, 'tmp', model_pfx, ''), device=device, logger=log, loss=loss, s_batch=s_batch, augmented=augment_dset, tmp_save_mod=intv_save, alphabet=alphabet, phoc_lvls=phoc_lvls, quant_aug=t_augment, FP=FP) """run training""" shuffle = args['shuffle'].lower() in ['true', '1', 't', 'yes', 'y'] trainer.train_on(d_set=train_set, optimizer=optimizer, n_iter=max_iter) """save net""" ids_train = train_set.ids ids_test = test_set.ids trainer.save(dir_out=dir_out, train=ids_train, test=ids_test, pfx=model_pfx) """saving args, so you have a reference to the training-config of your model""" sanity_util.safe_dir_path(dir_out) file_path = sanity_util.unique_file_name(dir=dir_out, fn='args_{}'.format(model_pfx), suffix='.json') with open(file_path, 'w') as args_out: t_end_training = time.asctime() args['date'] = {'started': t_start_training, 'ended': t_end_training} json.dump(args, args_out)
def save(self, dir, name='estimator'): super().save(dir, name) # additionally keeping track of configuration file_config = sanity_util.unique_file_name(dir, name, '.json') with open(file_config, 'w') as f_config: json.dump({'reg': self.reg, 'n_dim': self.n_dim}, f_config)