def load_bottleneck_data(training_file, validation_file, breadth):
    """
    Utility function to load bottleneck features.

    Arguments:
        training_file - String
        validation_file - String
    """
    print("Training file", training_file)
    print("Validation file", validation_file)
    print("Output breadth", breadth)

    with open(training_file, 'rb') as f:
        train_data = pickle.load(f)
    with open(validation_file, 'rb') as f:
        validation_data = pickle.load(f)

    X_train = train_data['features']
    y_train = train_data['labels']
    X_val = validation_data['features']
    y_val = validation_data['labels']

    D_train = Dataset('Training', Data(X_train), Likelihoods(y_train, breadth))
    D_val = Dataset('Validation', Data(X_val), Likelihoods(y_val, breadth))

    return (D_train, D_val)
def distances_dist(datasets, conditions, scale=1):
    plt.close("all")
    fig, ax = plt.subplots()
    bins = np.linspace(0, 1, num=11)
    for i, dataset in enumerate(sorted(datasets)):
        data = Data(dataset).get_df()
        sm_cols = [
            col for col in data.col_action if data.col_action[col] == 'se'
        ]
        print(dataset)
        distances = []
        for sm_col in sm_cols[:1]:
            print('Column name: %s' % sm_col)
            A = data.df[sm_col][:10000].astype(str)
            B = data.df[sm_col].unique().astype(str)
            sm = similarity_matrix(A, B, conditions['Distance'], -1)
    #         print(sm.shape)
    #         # take the 10% highest distances for each value
    #         sm_nmax = np.array([sorted(row)[:-1]
    #                             for row in sm])
    #         distances += list(sm_nmax.ravel())
    #     bin_counts = [0 for bin in bins]
    #     bin_width = 1/(len(bins)-1)
    #     distances2 = np.zeros(len(distances))
    #     for i, distance in enumerate(distances):
    #         bin_number = int(distance // bin_width)
    #         bin_counts[bin_number] += 1
    #         distances2[i] = bin_number * bin_width
    #     bin_counts = np.array(bin_counts) #/len(distances)
    #     s = interpolate.interp1d(bins, bin_counts)
    #     kernel = stats.gaussian_kde(distances2, bw_method=.6)
        x = np.linspace(0, 1, 11)
        # y = list(reversed(list(accumulate(list(reversed(bin_counts))))))
        # plt.semilogy(x, s(x)*scale, label=dataset)
        plt.semilogy(x, ball_elements(sm, bins) / sm.shape[0], label=dataset)
    plt.legend(fontsize=14)
    sns.plt.xlim([0, 1])
    sns.plt.ylim([1, 2000])
    # sns.despine(bottom=False, left=False, right=True, trim=True)
    # plt.yticks([], [])
    # y_ticks = np.array([val/10 for val in ax.get_yticks()])
    # ax.set_yticklabels(y_ticks)
    ax.set_xlabel('Similarity', fontsize=16)
    ax.tick_params(axis='x', which='major', labelsize=14)
    filename = 'DistanceDist_' + '_'.join(
        [key + '-' + conditions[key] for key in conditions]) + '.pdf'
    plt.savefig(os.path.join(os.getcwd(), '..', 'figures', filename),
                transparent=False,
                bbox_inches='tight',
                pad_inches=0.2)
def word_freq(datasets, conditions):
    plt.close("all")
    fig, ax = plt.subplots()
    values = sorted(datasets)
    sns.set_palette(set_colors(values, 'Dataset'))
    for i, dataset in enumerate(sorted(datasets)):

        data = Data(dataset).get_df()
        sm_cols = [
            col for col in data.col_action if data.col_action[col] == 'se'
        ]

        for sm_col in sm_cols[:1]:
            counts = data.df[sm_col].value_counts()

            # Plot histogram using matplotlib bar().
            indexes = list(counts.index)
            vals = counts.values
            f = interpolate.interp1d(np.linspace(0, 1, len(indexes)), vals)
            x = np.linspace(0, 1, 1000)
            plt.semilogy(x, f(x), label=dataset, linewidth=3.0)
    plt.legend(fontsize=14)
    sns.plt.ylim([1, ax.get_ylim()[1]])
    sns.despine(bottom=True, right=False, trim=True)
    sns.despine()
    sns.plt.xlim([-.03, 1.03])
    sns.plt.ylim([pow(10, -.2), ax.get_ylim()[1]])
    plt.xticks([0, 1], ['', ''])
    plt.minorticks_off()
    ax.set_xlabel('Classes', fontsize=16)
    ax.set_ylabel('log(Frequency)', fontsize=16)
    ax.tick_params(axis='both', which='major', labelsize=14)
    filename = ('ClassFreq_' +
                '_'.join([key + '-' + conditions[key]
                          for key in conditions]) + '.pdf')
    plt.savefig(os.path.join(os.getcwd(), '..', 'figures', filename),
                transparent=False,
                bbox_inches='tight',
                pad_inches=0.2)
示例#4
0
def test_datasets(dataset):
    data = Data(dataset).get_df()

    # There is one target variable
    assert len([col for col in data.col_action
                if data.col_action[col] == 'y']) == 1

    # There is only one 'se' variable
    assert len(
        [col for col in data.col_action if data.col_action[col] == 'se']) == 1

    # Fetch only columns in data.col_action
    assert len(data.df.columns) == len(data.col_action)

    for name, action in data.col_action.items():
        assert action in ['y', 'se', 'num', 'ohe', 'ohe-1']
        if action == 'num':
            assert data.df[name].dtype in [
                np.dtype('int64'), np.dtype('float64')
            ]
        # Missing values
        assert (data.df[name].isna().sum() == 0), \
            ("Error in: dataset '%s', column '%s'" % (dataset, name))
示例#5
0
def fit_predict_categorical_encoding(datasets, n_jobs, n_splits, test_size,
                                     encoders, str_preprocess,
                                     dimension_reductions, results_path,
                                     model_path=None):
    '''
    Learning with dirty categorical variables.
    '''
    logger = logs.getLogger('{},{}'.format(
        __name__, inspect.currentframe().f_code.co_name))
    path = get_data_folder()
    results_path = os.path.join(path, results_path)
    model_path = os.path.join(path, model_path)
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    for dataset in datasets:
        n_rows = choose_nrows(dataset_name=dataset)
        for encoder in encoders:
            logger.debug('Dataset:{}'.format(dataset))
            data = Data(dataset).get_df()
            data.preprocess(n_rows=n_rows, str_preprocess=str_preprocess)
            for dimension_reduction in dimension_reductions:
                logger.debug('Data shape: %d, %d' % data.df.shape)
                ss = select_shuffle_split(data.clf_type, n_splits, test_size)
                scaler = preprocessing.StandardScaler(with_mean=False)

                # Define classifiers
                clfs = instanciate_estimators(
                    data.clf_type,
                    y=data.df.loc[:, data.ycol].values,
                    model_path=model_path, dropout=dropout)

                for clf in clfs:
                    logger.info(
                        '{}: {} \n{}: {} \n{}: {} \n{}: {} \n{}: {},{}'.format(
                            'Prediction column', data.ycol,
                            'Task', str(data.clf_type),
                            'Classifier', clf,
                            'Encoder', encoder,
                            'Dimension reduction', dimension_reduction[0],
                            dimension_reduction[1]))

                    if not isinstance(clf, NNetEstimator):
                        if 'random_state' in clf.get_params():
                            clf.set_params(random_state=clf_seed)
                    results_dict = {'dataset': data.name,
                                    'n_splits': n_splits,
                                    'test_size': test_size,
                                    'n_rows': n_rows,
                                    'encoder': encoder,
                                    'str_preprocess': str_preprocess,
                                    'clf': [clf.__class__.__name__,
                                            clf.get_params()],
                                    'ShuffleSplit':
                                        [ss.__class__.__name__],
                                    'scaler': [scaler.__class__.__name__,
                                               scaler.get_params()],
                                    'sample_seed': sample_seed,
                                    'shuffleseed': shuffle_seed,
                                    'col_action': data.col_action,
                                    'clf_type': data.clf_type,
                                    'dimension_reduction':
                                        dimension_reduction
                                    }
                    if verify_if_exists(results_path, results_dict):
                        print('Prediction already exists.\n')
                        continue
                    start = time.time()
                    MX, y = (data.df.loc[:, data.xcols].values,
                             data.df.loc[:, data.ycol].values)
                    data.make_configs(encoder=encoder)
                    pred = Parallel(n_jobs=n_jobs)(
                        delayed(fit_predict_fold)(
                            MX, y, train_index, test_index,
                            data.col_action, data.xcols, data.name, encoder,
                            fold, n_splits, clf, data.clf_type, scaler,
                            dimension_reduction, configs=data.configs)
                        for (train_index, test_index), fold
                        in zip(ss.split(MX, y), range(1, n_splits + 1)))
                    pred = list(itertools.chain.from_iterable(pred))
                    pred = np.array(pred)
                    results = {'fold': list(pred[:, 0]),
                               'n_train_samples': list(pred[:, 1]),
                               'n_train_features': list(pred[:, 2]),
                               'score': list(pred[:, 3]),
                               'encoding_time': list(pred[:, 4]),
                               'training_time': list(pred[:, 5])}
                    results_dict['results'] = results

                    # Saving results
                    pc_name = socket.gethostname()
                    now = ''.join([c for c in str(datetime.datetime.now())
                                   if c.isdigit()])
                    results_file = os.path.join(
                        results_path, pc_name + '_' + now + '.json')

                    write_json(results_dict, results_file)
                    print('prediction time: %.1f s.' % (time.time() - start))
                    print('Saving results to: %s\n' % results_file)
示例#6
0
                 'beer_reviews': 'beer\nreviews',
                 'beer_reviews2': 'beer\nreviews 2',
                 'midwest_survey': 'midwest\nsurvey',
                 'docs_payments': 'open\npayments',
                 'medical_charge': 'medical\ncharges',
                 'road_safety': 'road\nsafety'
                 }

clfs = ['Ridge', 'GradientBoosting']
for clf in clfs:
    plt.close('all')
    df_all = pd.DataFrame()
    score_type = {}
    dimSE_nored = {}  # change this for a friendlier method
    for dataset in datasets:
        data = Data(dataset)
        if dataset in ['docs_payments', 'crime_data', 'beer_reviews2',
                       'traffic_violations']:
            n_rows = 100000  # -1 if using all rows for prediction
        elif dataset in ['beer_reviews', 'road_safety']:
            n_rows = 10000
        else:
            n_rows = -1
        if dataset in ['adult', 'adult2', 'adult3']:
            typo_prob = .1
        else:
            typo_prob = 0
        conditions = {'dataset': data.name,
                      'n_splits': 100,
                      'test_size': .2,
                      'n_rows': n_rows,
示例#7
0
                shutil.move(path_ori, path_target)
    print(
        f'Validation dataset build finished! face: {face_val}, background: {background_val}'
    )


if __name__ == "__main__":
    PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
    IOU_pos, IOU_neg = 0.7, 0.3

    path_train = ''.join(
        [PROJECT_ROOT, '/data/FDDB_crop/iou_',
         str(IOU_pos), '/train/'])
    path_val = ''.join(
        [PROJECT_ROOT, '/data/FDDB_crop/iou_',
         str(IOU_pos), '/val/'])

    for path in [path_train, path_val]:
        for label in ['0/', '1/']:
            if not os.path.exists(path + label):
                os.makedirs(path)

    print("Start to prepare dataset")
    annotations = read_from_file(PROJECT_ROOT + "/data/FDDB/FDDB-folds/")
    datasets = Data(annotations)
    prepare_data(datasets,
                 annotations,
                 threthoud_pos=IOU_pos,
                 threthoud_neg=IOU_neg,
                 save_path=path_train)
    dataset_split(path_train + '1/', path_train + '0/', path_val)
示例#8
0
    '3gram_SimilarityEncoder': '3-gram'
}
datasets_name = {
    'employee_salaries': 'employee\nsalaries',
    'traffic_violations': 'traffic\nviolations',
    'beer_reviews': 'beer\nreviews',
    'midwest_survey': 'midwest\nsurvey',
    'docs_payments': 'open\npayments',
    'medical_charge': 'medical\ncharges',
    'road_safety': 'road\nsafety'
}
X = {dataset: dict() for dataset in datasets}
median = {dataset: dict() for dataset in datasets}
for dataset in datasets:
    print(dataset)
    data = Data(dataset).get_df(preprocess_df=True)
    if dataset in [
            'docs_payments', 'crime_data', 'beer_reviews2',
            'traffic_violations'
    ]:
        n_rows = 100000  # -1 if using all rows for prediction
    elif dataset in ['beer_reviews', 'road_safety']:
        n_rows = 10000
    else:
        n_rows = -1
    df = data.df.sample(frac=1, random_state=5).reset_index(drop=True)[:n_rows]

    SE_var = [col for col in data.col_action
              if data.col_action[col] == 'se'][0]
    SE_cats = df[SE_var].unique()
    m = len(SE_cats)
def score_plot(datasets,
               conditions,
               condition,
               score,
               percentile_thresh=1,
               delta_text=0,
               delta_top=0,
               percentile_dict={
                   'levenshtein-ratio': -1,
                   'jaro-winkler': -1,
                   '3-gram': -1
               }):
    plt.close("all")
    fig, ax = plt.subplots()

    # # add similarity distribution ###########################################
    # scale = .035
    # bins = np.linspace(0, 1, num=11)
    # for i, dataset in enumerate(sorted(datasets)):
    #     data = Data(dataset).get_df()
    #     sm_cols = [col for col in data.col_action
    #                if data.col_action[col] == 'se']
    #     print(dataset)
    #     distances = []
    #     for sm_col in sm_cols[:1]:
    #         print('Column name: %s' % sm_col)
    #         A = data.df[sm_col][:10000].astype(str)
    #         B = data.df[sm_col].unique().astype(str)
    #         sm = similarity_matrix(A, B, conditions['Distance'], -1)
    #         print(sm.shape)
    #         # take the 10% highest distances for each value
    #         sm_nmax = np.array([sorted(row)[:-1]
    #                             for row in sm])
    #         distances += list(sm_nmax.ravel())
    #     bin_counts = [0 for bin in bins]
    #     bin_width = 1/(len(bins)-1)
    #     distances2 = np.zeros(len(distances))
    #     for i, distance in enumerate(distances):
    #         bin_number = int(distance // bin_width)
    #         bin_counts[bin_number] += 1
    #         distances2[i] = bin_number * bin_width
    #     bin_counts = np.array(bin_counts)/len(distances)
    #     s = interpolate.interp1d(bins, bin_counts)
    #     kernel = stats.gaussian_kde(distances2, bw_method=.6)
    #     x = np.linspace(0, 1, 201)
    #     plt.semilogy(x, s(x)*scale-.02, '--')
    # #########################################################################

    df_all = pd.DataFrame()
    for dataset in sorted(datasets):
        data = Data(dataset)
        results_path = os.path.join(data.path, 'output', 'results')
        figures_path = os.path.join(data.path, 'output', 'figures')
        create_folder(data.path, 'output/figures')

        files = glob.glob(os.path.join(results_path, '*'))
        files, params = file_meet_conditions(dataset, files, conditions)
        print('Relevant files:')
        for f in files:
            print(f.split('..')[-1])
            df = pd.read_csv(f)
            df = df.drop_duplicates(subset=df.columns[1:])
            df_ohe = df[df.threshold == 1.0].set_index('fold')[['score']]
            df_ohe.rename(columns={'score': 'score(ohe)'}, inplace=True)
            df = df.join(df_ohe, on='fold')
            df['score-score(ohe)'] = df['score'] - df['score(ohe)']
            df['Dataset'] = data.name
            df['Classifier'] = results_parameters(f)['Classifier'][:-4]
            df['Distance'] = results_parameters(f)['Distance']
            df['TyposProb'] = results_parameters(f)['TyposProb']
            percentiles = percentile_dict[results_parameters(f)['Distance']]
            percentiles[10] = 100
            if percentile_thresh == 1:
                for i in range(len(df)):
                    df.loc[i, 'threshold'] = percentiles[int(
                        df.loc[i, 'threshold'] * 10)]

            name = f.split('/')[-1]
            name = name.split('_')
            dict_name = {}
            for n in name:
                key, value = [n.split('-')[0], '-'.join(n.split('-')[1:])]
                dict_name[key] = value
            df_all = pd.concat([df_all, df], axis=0)
            if percentile_thresh == 1:
                df_all = df_all.drop_duplicates(
                    subset=['threshold', 'Distance', 'fold'])
    # plot scores
    values = df_all[condition].unique()
    sns.set_palette(set_colors(values, condition))

    sns.tsplot(data=df_all,
               time='threshold',
               unit='fold',
               condition=condition,
               value=score,
               ci=95,
               ax=ax,
               marker='.',
               markersize=10)

    max_all = df_all[score].max()
    min_all = df_all[score].min()
    if min_all <= 0:
        ax.axhline(y=0, xmin=-10, xmax=110, linewidth=1, color='grey')
    sns.plt.ylim([min_all, max_all])
    sns.despine(bottom=True, right=False, trim=True)
    sns.despine()
    sns.plt.xlim([-10, 110])
    sns.plt.ylim([
        min_all - (max_all - min_all) * .1,
        max_all + (max_all - min_all) * .1 + delta_top
    ])
    df.groupby('threshold')

    mean_score_ohe = np.mean(df[score][df.threshold == 1])
    ax.text(0,
            min_all + delta_text,
            'Raw\nsimilarity\nencoding',
            fontsize=14,
            horizontalalignment='center',
            verticalalignment='top',
            color='gray')
    ax.text(100,
            min_all + delta_text,
            'One-hot\nencoding',
            fontsize=14,
            color='gray',
            horizontalalignment='center',
            verticalalignment='top')
    ax.set_xlabel('Hard-thresholding value', fontsize=16)
    if score == 'score':
        ax.set_ylabel('Score', fontsize=16)
    elif score == 'score-score(ohe)':
        ax.set_ylabel('Score - Score(one-hot-encoding)', fontsize=16)

    ax.tick_params(axis='both', which='major', labelsize=14)

    leg = ax.get_legend()
    leg = ax.legend(fontsize=14, ncol=1)
    leg.set_title(condition, prop={'size': 16})

    # sns.axes_style()
    # sns._orig_rc_params
    return ax
示例#10
0
params = {'mathtext.fontset': 'cm',
          'mathtext.rm': 'serif',
          'mathtext.bf': 'serif:bold',
          'mathtext.it': 'serif:italic',
          'mathtext.sf': 'sans\\-serif',
          'font.family': 'serif',
          'font.serif': "Times New Roman",  # or "Times"
          'text.latex.preamble':
          [r'\usepackage{siunitx}', r'\usepackage{amsmath}',
           r'\usepackage{libertine}', r'\usepackage[libertine]{newtxmath}']}
plt.rcParams.update(params)
fig, ax = plt.subplots(figsize=(9, 6))
fontsize = 25
for dataset in datasets:
    print(dataset)
    data = Data(dataset).get_df(preprocess_df=False)
    data.df = data.df.sample(frac=1, random_state=5).reset_index(drop=True)
    cat_variable = [x for x in data.col_action if data.col_action[x] is 'se']
    nrows_log10 = np.log10(data.df.shape[0])
    X = np.logspace(2, int(nrows_log10), int(nrows_log10)-1)
    X = np.append(X, pow(10, nrows_log10))
    Y = [len(np.unique(data.df[cat_variable].astype(str).values[:int(x)]))
         for x in X]
    ax.plot(X, Y, color=palette[dataset_cm[dataset]],
            linewidth=2,
            marker=list(markers)[dataset_cm[dataset]], markersize=10,
            zorder=3)
    del data

plt.savefig(os.path.join(path, figname),
            transparent=False, bbox_inches='tight', pad_inches=0.2)
示例#11
0
def train(config):
    # rng
    rng = np.random.RandomState(config["seed"])
    torch.cuda.manual_seed(config["seed"])
    torch.cuda.manual_seed_all(config["seed"])

    # occupy
    occ = Occupier()
    if config["occupy"]:
        occ.occupy()

    # Compute input shape
    c = UNet.get_optimal_shape(output_shape_lower_bound=config["output_size"],
                               steps=config["num_unet_steps"],
                               num_convs=config["num_unet_convs"])
    input_size = [int(ci) for ci in c["input"]]
    config['margin'] = np.asarray(input_size) - np.asarray(
        config["output_size"])
    # m = np.asarray(input_size) - np.asarray(config["output_size"])
    # if len(np.unique(m)) == 1:
    #     config["margin"] = m[0]
    # else:
    #     raise RuntimeError("Should never be here?")
    if len(np.unique(config["margin"])) > 1:
        raise RuntimeError("Beware: this might not work?")
    data = Data(config)

    # writer
    writer = SummaryWriter(log_dir="output/logs/" + config["force_hash"])
    board = {
        'dataset': data.loss_label,
        'loss': config['loss'],
        'writer': writer,
    }

    # Save config file, for reference
    os.system('cp {} {}/{}'.format(config["config_filename"], config["output"],
                                   config["config_filename"].split('/')[-1]))
    fn = config["output"] + "/config.h5"
    print("Storing config file: '{}'".format(fn))
    dd.io.save(fn, config)

    if config["model"] == "UNet":
        print("Instantiating UNet")

        model = UNet(
            steps=config["num_unet_steps"],
            num_input_channels=data.num_channels,
            first_layer_channels=config["num_unet_filters"],
            num_classes=data.num_classes,
            num_convs=config["num_unet_convs"],
            output_size=config["output_size"],
            pooling=config["pooling"],
            activation=config["activation"],
            use_dropout=config["use_dropout"],
            use_batchnorm=config["use_batchnorm"],
            init_type=config["init_type"],
            final_unit=config["final_unit"],
        )

        # Need to overwrite this
        if model.is_3d:
            config["input_size"] = model.input_size
        else:
            config["input_size"] = [model.input_size[1], model.input_size[2]]
        # config["margin"] = model.margin
        print("UNet -> Input size: {}. Output size: {}".format(
            config["input_size"], config["output_size"]))
    else:
        raise RuntimeError("Unknown model")
    model.cuda()

    # Sanity check
    for j in range(len(data.train_images_optim)):
        s = data.train_images_optim[j].shape
        for i in range(len(s) - 1):
            if model.input_size[i] > s[i + 1]:
                raise RuntimeError('Input patch larger than training data '
                                   '({}>{}) for dim #{}, sample #{}'.format(
                                       model.input_size[i], s[i + 1], i, j))
    if data.val_images_mirrored:
        for j in range(len(data.val_images_mirrored)):
            s = data.val_images_mirrored[j].shape
            for i in range(len(s) - 1):
                if model.input_size[i] > s[i + 1]:
                    raise RuntimeError(
                        'Input patch larger than validation data '
                        '({}>{}) for dim #{}, sample #{}'.format(
                            model.input_size[i], s[i + 1], i, j))
    if data.test_images_mirrored:
        for j in range(len(data.test_images_mirrored)):
            s = data.test_images_mirrored[j].shape
            for i in range(len(s) - 1):
                if model.input_size[i] > s[i + 1]:
                    raise RuntimeError(
                        'Input patch larger than test data '
                        '({}>{}) for dim #{}, sample #{}'.format(
                            model.input_size[i], s[i + 1], i, j))

    if config["optimizer"] == "Adam":
        optimizer = optim.Adam(
            model.parameters(),
            lr=config["learning_rate"],
            weight_decay=config["weight_decay"],
        )
    elif config["optimizer"] == "SGD":
        optimizer = optim.SGD(
            model.parameters(),
            lr=config["learning_rate"],
            weight_decay=config["weight_decay"],
            momentum=config["momentum"],
        )
    elif config["optimizer"] == "RMSprop":
        optimizer = optim.RMSprop(
            model.parameters(),
            lr=config["learning_rate"],
            weight_decay=config["weight_decay"],
            momentum=config["momentum"],
        )
    else:
        raise RuntimeError("Unsupported optimizer")

    # Load state
    first_batch = 0
    fn = config["output"] + "/state.h5"
    if isfile(fn):
        # print("Loading state: '{}'".format(fn))
        # with open(fn, "rb") as handle:
        #     state = pickle.load(handle)
        state = dd.io.load(fn)
        first_batch = state["cur_batch"] + 1
    else:
        state = {}

    # Load model
    fn = "{}/model-last.pth".format(config["output"])
    if isfile(fn):
        print("Loading model: '{}'".format(fn))
        model.load_state_dict(torch.load(fn))
    else:
        print("No model to load")

    # Load optimizer
    fn = "{}/optim-last.pth".format(config["output"])
    if isfile(fn):
        optimizer.load_state_dict(torch.load(fn))
    else:
        print("No optimizer to load")

    state.setdefault("epoch", 0)
    state.setdefault("cur_batch", 0)
    state.setdefault("loss", np.zeros(config["max_steps"]))
    state.setdefault("res_train", {"batch": [], "metrics": []})
    for t in config["test_thresholds"]:
        state.setdefault("res_train_th_{}".format(t), {
            "batch": [],
            "metrics": []
        })
        state.setdefault("res_val_th_{}".format(t), {
            "batch": [],
            "metrics": []
        })
        state.setdefault("res_test_th_{}".format(t), {
            "batch": [],
            "metrics": []
        })

    # TODO Learn to sample and update this accordingly
    if config["loss"] == "classification":
        # loss_criterion = torch.nn.NLLLoss(data.weights.cuda(), reduce=False)
        loss_criterion = F.nll_loss
    elif config["loss"] == "regression":
        raise RuntimeError("TODO")
    elif config['loss'] == 'jaccard' or config['loss'] == 'dice':
        from loss import OverlapLoss
        loss_criterion = OverlapLoss(config['loss'],
                                     config['overlap_loss_smoothness'],
                                     config['overlap_fp_factor'])
    else:
        raise RuntimeError("TODO")

    if model.is_3d:
        batch = torch.Tensor(
            config["batch_size"],
            data.num_channels,
            config["input_size"][0],
            config["input_size"][1],
            config["input_size"][2],
        )
        # labels = torch.ByteTensor(
        if not data.dot_annotations:
            labels = torch.LongTensor(
                config["batch_size"],
                config["output_size"][0],
                config["output_size"][1],
                config["output_size"][2],
            )
        else:
            labels = []
    else:
        batch = torch.Tensor(
            config["batch_size"],
            data.num_channels,
            config["input_size"][0],
            config["input_size"][1],
        )
        # labels = torch.ByteTensor(
        if not data.dot_annotations:
            labels = torch.LongTensor(
                config["batch_size"],
                config["output_size"][0],
                config["output_size"][1],
            )
        else:
            labels = []

    do_save_state = False
    model.train()

    # Sampler
    print("Instantiating sampler")
    sampler = Sampler(
        model.is_3d,
        {
            "images": data.train_images_optim,
            "labels": data.train_labels_optim,
            "mean": data.train_mean,
            "std": data.train_std
        },
        config,
        rng,
        data.dot_annotations,
    )

    if occ.is_busy():
        occ.free()

    # Loop
    for state["cur_batch"] in range(first_batch, config["max_steps"]):
        # Sample
        ts = time()
        coords = []
        elastic = []
        for i in range(config["batch_size"]):
            b, l, cur_coords, cur_elastic = sampler.sample()
            batch[i] = torch.from_numpy(b)
            if not data.dot_annotations:
                labels[i] = torch.from_numpy(l)
            else:
                labels.append(torch.from_numpy(l))
            coords.append(cur_coords)
            elastic.append(cur_elastic)

        # Forward pass
        inputs = Variable(batch).cuda()
        outputs = model(inputs)
        optimizer.zero_grad()
        if config['loss'] == 'jaccard' or config['loss'] == 'dice':
            targets = Variable(labels.float()).cuda()
            o = F.softmax(outputs, dim=1)[:, 1, :, :]
            loss = loss_criterion.forward(o, targets)
            loss = sum(loss) / len(loss)
        elif config['loss'] == 'classification':
            targets = Variable(labels).cuda()
            if data.is_3d:
                # Do it slice by slice. Ugly but it works!
                loss = []
                for z in range(outputs.shape[2]):
                    loss.append(
                        loss_criterion(F.log_softmax(outputs[:, :, z, :, :],
                                                     dim=1),
                                       targets[:, z, :, :],
                                       weight=data.weights.cuda(),
                                       reduce=True,
                                       ignore_index=2))
                loss = sum(loss) / len(loss)
            else:
                # f(reduce=True) is equivalent to f(reduce=False).mean()
                # no need to average over the batch size then
                loss = loss_criterion(F.log_softmax(outputs, dim=1),
                                      targets,
                                      weight=data.weights.cuda(),
                                      reduce=True,
                                      ignore_index=2)
        else:
            raise RuntimeError('Bad loss type')

        # Sanity check
        # if not data.dot_annotations and loss.data.cpu().sum() > 10:
        #     print("very high loss?")
        #     embed()

        # Backward pass
        loss.backward()
        optimizer.step()

        # Get class stats
        ws = [0, 0]
        for l in labels:
            ws[0] += (l == 0).sum()
            ws[1] += (l == 1).sum()

        # Update state
        cur_loss = loss.data.cpu().sum()
        state["loss"][state["cur_batch"]] = cur_loss
        board['writer'].add_scalar(board['dataset'] + '-loss-' + board['loss'],
                                   cur_loss, state['cur_batch'])

        print(
            "Batch {it:d} -> Avg. loss {loss:.05f}: [{t:.02f} s.] (Range: {rg:.1f})"
            .format(
                it=state["cur_batch"] + 1,
                loss=cur_loss,
                t=time() - ts,
                rg=outputs.data.max() - outputs.data.min(),
            ))

        # Cross-validation
        force_eval = False
        if config["check_val_every"] > 0 and data.evaluate_val:
            if (state["cur_batch"] + 1) % config["check_val_every"] == 0:
                res = model.inference(
                    {
                        "images": data.val_images_mirrored,
                        "mean": data.val_mean,
                        "std": data.val_std,
                    },
                    config['batch_size'],
                    config['use_lcn'],
                )

                is_best = model.validation_by_classification(
                    images=data.val_images,
                    gt=data.val_labels_th,
                    prediction=res,
                    state=state,
                    board=board,
                    output_folder=config['output'],
                    xval_metric=config['xval_metric'],
                    dilation_thresholds=config['test_thresholds'],
                    subset='val',
                    make_stack=data.plot_make_stack,
                    force_save=False,
                )

                # Save models if they are the best at any test threshold
                for k, v in is_best.items():
                    if v is True:
                        save_model(config, state, model, optimizer,
                                   'best_th_{}'.format(k))
                        do_save_state = True

                # Force testing on train/test
                force_eval = any(is_best.keys())

        # Test on the training data
        if config["check_train_every"] > 0 and data.evaluate_train:
            if ((state["cur_batch"] + 1) % config["check_train_every"]
                    == 0) or force_eval:
                res = model.inference(
                    {
                        "images":
                        data.train_images_mirrored[:data.num_train_orig],
                        "mean": data.train_mean,
                        "std": data.train_std,
                    },
                    config['batch_size'],
                    config['use_lcn'],
                )

                model.validation_by_classification(
                    images=data.train_images[:data.num_train_orig],
                    gt=data.train_labels_th,
                    prediction=res,
                    state=state,
                    board=board,
                    output_folder=config['output'],
                    xval_metric=config['xval_metric'],
                    dilation_thresholds=config['test_thresholds'],
                    subset='train',
                    make_stack=data.plot_make_stack,
                    force_save=force_eval,
                )

        # Test on the test data
        if config["check_test_every"] > 0 and data.evaluate_test:
            if ((state["cur_batch"] + 1) % config["check_test_every"]
                    == 0) or force_eval:
                res = model.inference(
                    {
                        "images": data.test_images_mirrored,
                        "mean": data.test_mean,
                        "std": data.test_std,
                    },
                    config['batch_size'],
                    config['use_lcn'],
                )

                model.validation_by_classification(
                    images=data.test_images,
                    gt=data.test_labels_th,
                    prediction=res,
                    state=state,
                    board=board,
                    output_folder=config['output'],
                    xval_metric=config['xval_metric'],
                    dilation_thresholds=config['test_thresholds'],
                    subset='test',
                    make_stack=data.plot_make_stack,
                    force_save=force_eval,
                )

        # Also save models periodically, to resume executions
        if config["save_models_every"] > 0:
            if (state["cur_batch"] + 1) % config["save_models_every"] == 0:
                save_model(config, state, model, optimizer, 'last')
                do_save_state = True

        # Save training state periodically (or if forced)
        if do_save_state:
            save_state(config, state)
            do_save_state = False

    board['writer'].close()