def load_bottleneck_data(training_file, validation_file, breadth): """ Utility function to load bottleneck features. Arguments: training_file - String validation_file - String """ print("Training file", training_file) print("Validation file", validation_file) print("Output breadth", breadth) with open(training_file, 'rb') as f: train_data = pickle.load(f) with open(validation_file, 'rb') as f: validation_data = pickle.load(f) X_train = train_data['features'] y_train = train_data['labels'] X_val = validation_data['features'] y_val = validation_data['labels'] D_train = Dataset('Training', Data(X_train), Likelihoods(y_train, breadth)) D_val = Dataset('Validation', Data(X_val), Likelihoods(y_val, breadth)) return (D_train, D_val)
def distances_dist(datasets, conditions, scale=1): plt.close("all") fig, ax = plt.subplots() bins = np.linspace(0, 1, num=11) for i, dataset in enumerate(sorted(datasets)): data = Data(dataset).get_df() sm_cols = [ col for col in data.col_action if data.col_action[col] == 'se' ] print(dataset) distances = [] for sm_col in sm_cols[:1]: print('Column name: %s' % sm_col) A = data.df[sm_col][:10000].astype(str) B = data.df[sm_col].unique().astype(str) sm = similarity_matrix(A, B, conditions['Distance'], -1) # print(sm.shape) # # take the 10% highest distances for each value # sm_nmax = np.array([sorted(row)[:-1] # for row in sm]) # distances += list(sm_nmax.ravel()) # bin_counts = [0 for bin in bins] # bin_width = 1/(len(bins)-1) # distances2 = np.zeros(len(distances)) # for i, distance in enumerate(distances): # bin_number = int(distance // bin_width) # bin_counts[bin_number] += 1 # distances2[i] = bin_number * bin_width # bin_counts = np.array(bin_counts) #/len(distances) # s = interpolate.interp1d(bins, bin_counts) # kernel = stats.gaussian_kde(distances2, bw_method=.6) x = np.linspace(0, 1, 11) # y = list(reversed(list(accumulate(list(reversed(bin_counts)))))) # plt.semilogy(x, s(x)*scale, label=dataset) plt.semilogy(x, ball_elements(sm, bins) / sm.shape[0], label=dataset) plt.legend(fontsize=14) sns.plt.xlim([0, 1]) sns.plt.ylim([1, 2000]) # sns.despine(bottom=False, left=False, right=True, trim=True) # plt.yticks([], []) # y_ticks = np.array([val/10 for val in ax.get_yticks()]) # ax.set_yticklabels(y_ticks) ax.set_xlabel('Similarity', fontsize=16) ax.tick_params(axis='x', which='major', labelsize=14) filename = 'DistanceDist_' + '_'.join( [key + '-' + conditions[key] for key in conditions]) + '.pdf' plt.savefig(os.path.join(os.getcwd(), '..', 'figures', filename), transparent=False, bbox_inches='tight', pad_inches=0.2)
def word_freq(datasets, conditions): plt.close("all") fig, ax = plt.subplots() values = sorted(datasets) sns.set_palette(set_colors(values, 'Dataset')) for i, dataset in enumerate(sorted(datasets)): data = Data(dataset).get_df() sm_cols = [ col for col in data.col_action if data.col_action[col] == 'se' ] for sm_col in sm_cols[:1]: counts = data.df[sm_col].value_counts() # Plot histogram using matplotlib bar(). indexes = list(counts.index) vals = counts.values f = interpolate.interp1d(np.linspace(0, 1, len(indexes)), vals) x = np.linspace(0, 1, 1000) plt.semilogy(x, f(x), label=dataset, linewidth=3.0) plt.legend(fontsize=14) sns.plt.ylim([1, ax.get_ylim()[1]]) sns.despine(bottom=True, right=False, trim=True) sns.despine() sns.plt.xlim([-.03, 1.03]) sns.plt.ylim([pow(10, -.2), ax.get_ylim()[1]]) plt.xticks([0, 1], ['', '']) plt.minorticks_off() ax.set_xlabel('Classes', fontsize=16) ax.set_ylabel('log(Frequency)', fontsize=16) ax.tick_params(axis='both', which='major', labelsize=14) filename = ('ClassFreq_' + '_'.join([key + '-' + conditions[key] for key in conditions]) + '.pdf') plt.savefig(os.path.join(os.getcwd(), '..', 'figures', filename), transparent=False, bbox_inches='tight', pad_inches=0.2)
def test_datasets(dataset): data = Data(dataset).get_df() # There is one target variable assert len([col for col in data.col_action if data.col_action[col] == 'y']) == 1 # There is only one 'se' variable assert len( [col for col in data.col_action if data.col_action[col] == 'se']) == 1 # Fetch only columns in data.col_action assert len(data.df.columns) == len(data.col_action) for name, action in data.col_action.items(): assert action in ['y', 'se', 'num', 'ohe', 'ohe-1'] if action == 'num': assert data.df[name].dtype in [ np.dtype('int64'), np.dtype('float64') ] # Missing values assert (data.df[name].isna().sum() == 0), \ ("Error in: dataset '%s', column '%s'" % (dataset, name))
def fit_predict_categorical_encoding(datasets, n_jobs, n_splits, test_size, encoders, str_preprocess, dimension_reductions, results_path, model_path=None): ''' Learning with dirty categorical variables. ''' logger = logs.getLogger('{},{}'.format( __name__, inspect.currentframe().f_code.co_name)) path = get_data_folder() results_path = os.path.join(path, results_path) model_path = os.path.join(path, model_path) if not os.path.exists(results_path): os.makedirs(results_path) for dataset in datasets: n_rows = choose_nrows(dataset_name=dataset) for encoder in encoders: logger.debug('Dataset:{}'.format(dataset)) data = Data(dataset).get_df() data.preprocess(n_rows=n_rows, str_preprocess=str_preprocess) for dimension_reduction in dimension_reductions: logger.debug('Data shape: %d, %d' % data.df.shape) ss = select_shuffle_split(data.clf_type, n_splits, test_size) scaler = preprocessing.StandardScaler(with_mean=False) # Define classifiers clfs = instanciate_estimators( data.clf_type, y=data.df.loc[:, data.ycol].values, model_path=model_path, dropout=dropout) for clf in clfs: logger.info( '{}: {} \n{}: {} \n{}: {} \n{}: {} \n{}: {},{}'.format( 'Prediction column', data.ycol, 'Task', str(data.clf_type), 'Classifier', clf, 'Encoder', encoder, 'Dimension reduction', dimension_reduction[0], dimension_reduction[1])) if not isinstance(clf, NNetEstimator): if 'random_state' in clf.get_params(): clf.set_params(random_state=clf_seed) results_dict = {'dataset': data.name, 'n_splits': n_splits, 'test_size': test_size, 'n_rows': n_rows, 'encoder': encoder, 'str_preprocess': str_preprocess, 'clf': [clf.__class__.__name__, clf.get_params()], 'ShuffleSplit': [ss.__class__.__name__], 'scaler': [scaler.__class__.__name__, scaler.get_params()], 'sample_seed': sample_seed, 'shuffleseed': shuffle_seed, 'col_action': data.col_action, 'clf_type': data.clf_type, 'dimension_reduction': dimension_reduction } if verify_if_exists(results_path, results_dict): print('Prediction already exists.\n') continue start = time.time() MX, y = (data.df.loc[:, data.xcols].values, data.df.loc[:, data.ycol].values) data.make_configs(encoder=encoder) pred = Parallel(n_jobs=n_jobs)( delayed(fit_predict_fold)( MX, y, train_index, test_index, data.col_action, data.xcols, data.name, encoder, fold, n_splits, clf, data.clf_type, scaler, dimension_reduction, configs=data.configs) for (train_index, test_index), fold in zip(ss.split(MX, y), range(1, n_splits + 1))) pred = list(itertools.chain.from_iterable(pred)) pred = np.array(pred) results = {'fold': list(pred[:, 0]), 'n_train_samples': list(pred[:, 1]), 'n_train_features': list(pred[:, 2]), 'score': list(pred[:, 3]), 'encoding_time': list(pred[:, 4]), 'training_time': list(pred[:, 5])} results_dict['results'] = results # Saving results pc_name = socket.gethostname() now = ''.join([c for c in str(datetime.datetime.now()) if c.isdigit()]) results_file = os.path.join( results_path, pc_name + '_' + now + '.json') write_json(results_dict, results_file) print('prediction time: %.1f s.' % (time.time() - start)) print('Saving results to: %s\n' % results_file)
'beer_reviews': 'beer\nreviews', 'beer_reviews2': 'beer\nreviews 2', 'midwest_survey': 'midwest\nsurvey', 'docs_payments': 'open\npayments', 'medical_charge': 'medical\ncharges', 'road_safety': 'road\nsafety' } clfs = ['Ridge', 'GradientBoosting'] for clf in clfs: plt.close('all') df_all = pd.DataFrame() score_type = {} dimSE_nored = {} # change this for a friendlier method for dataset in datasets: data = Data(dataset) if dataset in ['docs_payments', 'crime_data', 'beer_reviews2', 'traffic_violations']: n_rows = 100000 # -1 if using all rows for prediction elif dataset in ['beer_reviews', 'road_safety']: n_rows = 10000 else: n_rows = -1 if dataset in ['adult', 'adult2', 'adult3']: typo_prob = .1 else: typo_prob = 0 conditions = {'dataset': data.name, 'n_splits': 100, 'test_size': .2, 'n_rows': n_rows,
shutil.move(path_ori, path_target) print( f'Validation dataset build finished! face: {face_val}, background: {background_val}' ) if __name__ == "__main__": PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__)) IOU_pos, IOU_neg = 0.7, 0.3 path_train = ''.join( [PROJECT_ROOT, '/data/FDDB_crop/iou_', str(IOU_pos), '/train/']) path_val = ''.join( [PROJECT_ROOT, '/data/FDDB_crop/iou_', str(IOU_pos), '/val/']) for path in [path_train, path_val]: for label in ['0/', '1/']: if not os.path.exists(path + label): os.makedirs(path) print("Start to prepare dataset") annotations = read_from_file(PROJECT_ROOT + "/data/FDDB/FDDB-folds/") datasets = Data(annotations) prepare_data(datasets, annotations, threthoud_pos=IOU_pos, threthoud_neg=IOU_neg, save_path=path_train) dataset_split(path_train + '1/', path_train + '0/', path_val)
'3gram_SimilarityEncoder': '3-gram' } datasets_name = { 'employee_salaries': 'employee\nsalaries', 'traffic_violations': 'traffic\nviolations', 'beer_reviews': 'beer\nreviews', 'midwest_survey': 'midwest\nsurvey', 'docs_payments': 'open\npayments', 'medical_charge': 'medical\ncharges', 'road_safety': 'road\nsafety' } X = {dataset: dict() for dataset in datasets} median = {dataset: dict() for dataset in datasets} for dataset in datasets: print(dataset) data = Data(dataset).get_df(preprocess_df=True) if dataset in [ 'docs_payments', 'crime_data', 'beer_reviews2', 'traffic_violations' ]: n_rows = 100000 # -1 if using all rows for prediction elif dataset in ['beer_reviews', 'road_safety']: n_rows = 10000 else: n_rows = -1 df = data.df.sample(frac=1, random_state=5).reset_index(drop=True)[:n_rows] SE_var = [col for col in data.col_action if data.col_action[col] == 'se'][0] SE_cats = df[SE_var].unique() m = len(SE_cats)
def score_plot(datasets, conditions, condition, score, percentile_thresh=1, delta_text=0, delta_top=0, percentile_dict={ 'levenshtein-ratio': -1, 'jaro-winkler': -1, '3-gram': -1 }): plt.close("all") fig, ax = plt.subplots() # # add similarity distribution ########################################### # scale = .035 # bins = np.linspace(0, 1, num=11) # for i, dataset in enumerate(sorted(datasets)): # data = Data(dataset).get_df() # sm_cols = [col for col in data.col_action # if data.col_action[col] == 'se'] # print(dataset) # distances = [] # for sm_col in sm_cols[:1]: # print('Column name: %s' % sm_col) # A = data.df[sm_col][:10000].astype(str) # B = data.df[sm_col].unique().astype(str) # sm = similarity_matrix(A, B, conditions['Distance'], -1) # print(sm.shape) # # take the 10% highest distances for each value # sm_nmax = np.array([sorted(row)[:-1] # for row in sm]) # distances += list(sm_nmax.ravel()) # bin_counts = [0 for bin in bins] # bin_width = 1/(len(bins)-1) # distances2 = np.zeros(len(distances)) # for i, distance in enumerate(distances): # bin_number = int(distance // bin_width) # bin_counts[bin_number] += 1 # distances2[i] = bin_number * bin_width # bin_counts = np.array(bin_counts)/len(distances) # s = interpolate.interp1d(bins, bin_counts) # kernel = stats.gaussian_kde(distances2, bw_method=.6) # x = np.linspace(0, 1, 201) # plt.semilogy(x, s(x)*scale-.02, '--') # ######################################################################### df_all = pd.DataFrame() for dataset in sorted(datasets): data = Data(dataset) results_path = os.path.join(data.path, 'output', 'results') figures_path = os.path.join(data.path, 'output', 'figures') create_folder(data.path, 'output/figures') files = glob.glob(os.path.join(results_path, '*')) files, params = file_meet_conditions(dataset, files, conditions) print('Relevant files:') for f in files: print(f.split('..')[-1]) df = pd.read_csv(f) df = df.drop_duplicates(subset=df.columns[1:]) df_ohe = df[df.threshold == 1.0].set_index('fold')[['score']] df_ohe.rename(columns={'score': 'score(ohe)'}, inplace=True) df = df.join(df_ohe, on='fold') df['score-score(ohe)'] = df['score'] - df['score(ohe)'] df['Dataset'] = data.name df['Classifier'] = results_parameters(f)['Classifier'][:-4] df['Distance'] = results_parameters(f)['Distance'] df['TyposProb'] = results_parameters(f)['TyposProb'] percentiles = percentile_dict[results_parameters(f)['Distance']] percentiles[10] = 100 if percentile_thresh == 1: for i in range(len(df)): df.loc[i, 'threshold'] = percentiles[int( df.loc[i, 'threshold'] * 10)] name = f.split('/')[-1] name = name.split('_') dict_name = {} for n in name: key, value = [n.split('-')[0], '-'.join(n.split('-')[1:])] dict_name[key] = value df_all = pd.concat([df_all, df], axis=0) if percentile_thresh == 1: df_all = df_all.drop_duplicates( subset=['threshold', 'Distance', 'fold']) # plot scores values = df_all[condition].unique() sns.set_palette(set_colors(values, condition)) sns.tsplot(data=df_all, time='threshold', unit='fold', condition=condition, value=score, ci=95, ax=ax, marker='.', markersize=10) max_all = df_all[score].max() min_all = df_all[score].min() if min_all <= 0: ax.axhline(y=0, xmin=-10, xmax=110, linewidth=1, color='grey') sns.plt.ylim([min_all, max_all]) sns.despine(bottom=True, right=False, trim=True) sns.despine() sns.plt.xlim([-10, 110]) sns.plt.ylim([ min_all - (max_all - min_all) * .1, max_all + (max_all - min_all) * .1 + delta_top ]) df.groupby('threshold') mean_score_ohe = np.mean(df[score][df.threshold == 1]) ax.text(0, min_all + delta_text, 'Raw\nsimilarity\nencoding', fontsize=14, horizontalalignment='center', verticalalignment='top', color='gray') ax.text(100, min_all + delta_text, 'One-hot\nencoding', fontsize=14, color='gray', horizontalalignment='center', verticalalignment='top') ax.set_xlabel('Hard-thresholding value', fontsize=16) if score == 'score': ax.set_ylabel('Score', fontsize=16) elif score == 'score-score(ohe)': ax.set_ylabel('Score - Score(one-hot-encoding)', fontsize=16) ax.tick_params(axis='both', which='major', labelsize=14) leg = ax.get_legend() leg = ax.legend(fontsize=14, ncol=1) leg.set_title(condition, prop={'size': 16}) # sns.axes_style() # sns._orig_rc_params return ax
params = {'mathtext.fontset': 'cm', 'mathtext.rm': 'serif', 'mathtext.bf': 'serif:bold', 'mathtext.it': 'serif:italic', 'mathtext.sf': 'sans\\-serif', 'font.family': 'serif', 'font.serif': "Times New Roman", # or "Times" 'text.latex.preamble': [r'\usepackage{siunitx}', r'\usepackage{amsmath}', r'\usepackage{libertine}', r'\usepackage[libertine]{newtxmath}']} plt.rcParams.update(params) fig, ax = plt.subplots(figsize=(9, 6)) fontsize = 25 for dataset in datasets: print(dataset) data = Data(dataset).get_df(preprocess_df=False) data.df = data.df.sample(frac=1, random_state=5).reset_index(drop=True) cat_variable = [x for x in data.col_action if data.col_action[x] is 'se'] nrows_log10 = np.log10(data.df.shape[0]) X = np.logspace(2, int(nrows_log10), int(nrows_log10)-1) X = np.append(X, pow(10, nrows_log10)) Y = [len(np.unique(data.df[cat_variable].astype(str).values[:int(x)])) for x in X] ax.plot(X, Y, color=palette[dataset_cm[dataset]], linewidth=2, marker=list(markers)[dataset_cm[dataset]], markersize=10, zorder=3) del data plt.savefig(os.path.join(path, figname), transparent=False, bbox_inches='tight', pad_inches=0.2)
def train(config): # rng rng = np.random.RandomState(config["seed"]) torch.cuda.manual_seed(config["seed"]) torch.cuda.manual_seed_all(config["seed"]) # occupy occ = Occupier() if config["occupy"]: occ.occupy() # Compute input shape c = UNet.get_optimal_shape(output_shape_lower_bound=config["output_size"], steps=config["num_unet_steps"], num_convs=config["num_unet_convs"]) input_size = [int(ci) for ci in c["input"]] config['margin'] = np.asarray(input_size) - np.asarray( config["output_size"]) # m = np.asarray(input_size) - np.asarray(config["output_size"]) # if len(np.unique(m)) == 1: # config["margin"] = m[0] # else: # raise RuntimeError("Should never be here?") if len(np.unique(config["margin"])) > 1: raise RuntimeError("Beware: this might not work?") data = Data(config) # writer writer = SummaryWriter(log_dir="output/logs/" + config["force_hash"]) board = { 'dataset': data.loss_label, 'loss': config['loss'], 'writer': writer, } # Save config file, for reference os.system('cp {} {}/{}'.format(config["config_filename"], config["output"], config["config_filename"].split('/')[-1])) fn = config["output"] + "/config.h5" print("Storing config file: '{}'".format(fn)) dd.io.save(fn, config) if config["model"] == "UNet": print("Instantiating UNet") model = UNet( steps=config["num_unet_steps"], num_input_channels=data.num_channels, first_layer_channels=config["num_unet_filters"], num_classes=data.num_classes, num_convs=config["num_unet_convs"], output_size=config["output_size"], pooling=config["pooling"], activation=config["activation"], use_dropout=config["use_dropout"], use_batchnorm=config["use_batchnorm"], init_type=config["init_type"], final_unit=config["final_unit"], ) # Need to overwrite this if model.is_3d: config["input_size"] = model.input_size else: config["input_size"] = [model.input_size[1], model.input_size[2]] # config["margin"] = model.margin print("UNet -> Input size: {}. Output size: {}".format( config["input_size"], config["output_size"])) else: raise RuntimeError("Unknown model") model.cuda() # Sanity check for j in range(len(data.train_images_optim)): s = data.train_images_optim[j].shape for i in range(len(s) - 1): if model.input_size[i] > s[i + 1]: raise RuntimeError('Input patch larger than training data ' '({}>{}) for dim #{}, sample #{}'.format( model.input_size[i], s[i + 1], i, j)) if data.val_images_mirrored: for j in range(len(data.val_images_mirrored)): s = data.val_images_mirrored[j].shape for i in range(len(s) - 1): if model.input_size[i] > s[i + 1]: raise RuntimeError( 'Input patch larger than validation data ' '({}>{}) for dim #{}, sample #{}'.format( model.input_size[i], s[i + 1], i, j)) if data.test_images_mirrored: for j in range(len(data.test_images_mirrored)): s = data.test_images_mirrored[j].shape for i in range(len(s) - 1): if model.input_size[i] > s[i + 1]: raise RuntimeError( 'Input patch larger than test data ' '({}>{}) for dim #{}, sample #{}'.format( model.input_size[i], s[i + 1], i, j)) if config["optimizer"] == "Adam": optimizer = optim.Adam( model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"], ) elif config["optimizer"] == "SGD": optimizer = optim.SGD( model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"], momentum=config["momentum"], ) elif config["optimizer"] == "RMSprop": optimizer = optim.RMSprop( model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"], momentum=config["momentum"], ) else: raise RuntimeError("Unsupported optimizer") # Load state first_batch = 0 fn = config["output"] + "/state.h5" if isfile(fn): # print("Loading state: '{}'".format(fn)) # with open(fn, "rb") as handle: # state = pickle.load(handle) state = dd.io.load(fn) first_batch = state["cur_batch"] + 1 else: state = {} # Load model fn = "{}/model-last.pth".format(config["output"]) if isfile(fn): print("Loading model: '{}'".format(fn)) model.load_state_dict(torch.load(fn)) else: print("No model to load") # Load optimizer fn = "{}/optim-last.pth".format(config["output"]) if isfile(fn): optimizer.load_state_dict(torch.load(fn)) else: print("No optimizer to load") state.setdefault("epoch", 0) state.setdefault("cur_batch", 0) state.setdefault("loss", np.zeros(config["max_steps"])) state.setdefault("res_train", {"batch": [], "metrics": []}) for t in config["test_thresholds"]: state.setdefault("res_train_th_{}".format(t), { "batch": [], "metrics": [] }) state.setdefault("res_val_th_{}".format(t), { "batch": [], "metrics": [] }) state.setdefault("res_test_th_{}".format(t), { "batch": [], "metrics": [] }) # TODO Learn to sample and update this accordingly if config["loss"] == "classification": # loss_criterion = torch.nn.NLLLoss(data.weights.cuda(), reduce=False) loss_criterion = F.nll_loss elif config["loss"] == "regression": raise RuntimeError("TODO") elif config['loss'] == 'jaccard' or config['loss'] == 'dice': from loss import OverlapLoss loss_criterion = OverlapLoss(config['loss'], config['overlap_loss_smoothness'], config['overlap_fp_factor']) else: raise RuntimeError("TODO") if model.is_3d: batch = torch.Tensor( config["batch_size"], data.num_channels, config["input_size"][0], config["input_size"][1], config["input_size"][2], ) # labels = torch.ByteTensor( if not data.dot_annotations: labels = torch.LongTensor( config["batch_size"], config["output_size"][0], config["output_size"][1], config["output_size"][2], ) else: labels = [] else: batch = torch.Tensor( config["batch_size"], data.num_channels, config["input_size"][0], config["input_size"][1], ) # labels = torch.ByteTensor( if not data.dot_annotations: labels = torch.LongTensor( config["batch_size"], config["output_size"][0], config["output_size"][1], ) else: labels = [] do_save_state = False model.train() # Sampler print("Instantiating sampler") sampler = Sampler( model.is_3d, { "images": data.train_images_optim, "labels": data.train_labels_optim, "mean": data.train_mean, "std": data.train_std }, config, rng, data.dot_annotations, ) if occ.is_busy(): occ.free() # Loop for state["cur_batch"] in range(first_batch, config["max_steps"]): # Sample ts = time() coords = [] elastic = [] for i in range(config["batch_size"]): b, l, cur_coords, cur_elastic = sampler.sample() batch[i] = torch.from_numpy(b) if not data.dot_annotations: labels[i] = torch.from_numpy(l) else: labels.append(torch.from_numpy(l)) coords.append(cur_coords) elastic.append(cur_elastic) # Forward pass inputs = Variable(batch).cuda() outputs = model(inputs) optimizer.zero_grad() if config['loss'] == 'jaccard' or config['loss'] == 'dice': targets = Variable(labels.float()).cuda() o = F.softmax(outputs, dim=1)[:, 1, :, :] loss = loss_criterion.forward(o, targets) loss = sum(loss) / len(loss) elif config['loss'] == 'classification': targets = Variable(labels).cuda() if data.is_3d: # Do it slice by slice. Ugly but it works! loss = [] for z in range(outputs.shape[2]): loss.append( loss_criterion(F.log_softmax(outputs[:, :, z, :, :], dim=1), targets[:, z, :, :], weight=data.weights.cuda(), reduce=True, ignore_index=2)) loss = sum(loss) / len(loss) else: # f(reduce=True) is equivalent to f(reduce=False).mean() # no need to average over the batch size then loss = loss_criterion(F.log_softmax(outputs, dim=1), targets, weight=data.weights.cuda(), reduce=True, ignore_index=2) else: raise RuntimeError('Bad loss type') # Sanity check # if not data.dot_annotations and loss.data.cpu().sum() > 10: # print("very high loss?") # embed() # Backward pass loss.backward() optimizer.step() # Get class stats ws = [0, 0] for l in labels: ws[0] += (l == 0).sum() ws[1] += (l == 1).sum() # Update state cur_loss = loss.data.cpu().sum() state["loss"][state["cur_batch"]] = cur_loss board['writer'].add_scalar(board['dataset'] + '-loss-' + board['loss'], cur_loss, state['cur_batch']) print( "Batch {it:d} -> Avg. loss {loss:.05f}: [{t:.02f} s.] (Range: {rg:.1f})" .format( it=state["cur_batch"] + 1, loss=cur_loss, t=time() - ts, rg=outputs.data.max() - outputs.data.min(), )) # Cross-validation force_eval = False if config["check_val_every"] > 0 and data.evaluate_val: if (state["cur_batch"] + 1) % config["check_val_every"] == 0: res = model.inference( { "images": data.val_images_mirrored, "mean": data.val_mean, "std": data.val_std, }, config['batch_size'], config['use_lcn'], ) is_best = model.validation_by_classification( images=data.val_images, gt=data.val_labels_th, prediction=res, state=state, board=board, output_folder=config['output'], xval_metric=config['xval_metric'], dilation_thresholds=config['test_thresholds'], subset='val', make_stack=data.plot_make_stack, force_save=False, ) # Save models if they are the best at any test threshold for k, v in is_best.items(): if v is True: save_model(config, state, model, optimizer, 'best_th_{}'.format(k)) do_save_state = True # Force testing on train/test force_eval = any(is_best.keys()) # Test on the training data if config["check_train_every"] > 0 and data.evaluate_train: if ((state["cur_batch"] + 1) % config["check_train_every"] == 0) or force_eval: res = model.inference( { "images": data.train_images_mirrored[:data.num_train_orig], "mean": data.train_mean, "std": data.train_std, }, config['batch_size'], config['use_lcn'], ) model.validation_by_classification( images=data.train_images[:data.num_train_orig], gt=data.train_labels_th, prediction=res, state=state, board=board, output_folder=config['output'], xval_metric=config['xval_metric'], dilation_thresholds=config['test_thresholds'], subset='train', make_stack=data.plot_make_stack, force_save=force_eval, ) # Test on the test data if config["check_test_every"] > 0 and data.evaluate_test: if ((state["cur_batch"] + 1) % config["check_test_every"] == 0) or force_eval: res = model.inference( { "images": data.test_images_mirrored, "mean": data.test_mean, "std": data.test_std, }, config['batch_size'], config['use_lcn'], ) model.validation_by_classification( images=data.test_images, gt=data.test_labels_th, prediction=res, state=state, board=board, output_folder=config['output'], xval_metric=config['xval_metric'], dilation_thresholds=config['test_thresholds'], subset='test', make_stack=data.plot_make_stack, force_save=force_eval, ) # Also save models periodically, to resume executions if config["save_models_every"] > 0: if (state["cur_batch"] + 1) % config["save_models_every"] == 0: save_model(config, state, model, optimizer, 'last') do_save_state = True # Save training state periodically (or if forced) if do_save_state: save_state(config, state) do_save_state = False board['writer'].close()