示例#1
0
def get_dict_by_id(id):
    from incense import ExperimentLoader

    loader = ExperimentLoader(
        # None if MongoDB is running on localhost or "mongodb://mongo:27017"
        # when running in devcontainer.
        mongo_uri=None,
        db_name='GPBayes')

    exp = loader.find_by_id(id)

    max_pow = exp.config['max_pow']

    scores = exp.info['scores']
    scores = {
        key: value
        for key, value in scores.items() if key != 'brownian_qda'
    }

    theoretical_mean = exp.info['theoretical_mean']
    theoretical_std = exp.info['theoretical_std']

    return {
        'max_pow': max_pow,
        'scores': scores,
        'theoretical_mean': theoretical_mean,
        'theoretical_std': theoretical_std,
        'optimal_accuracy': 1
    }
示例#2
0
def extract_model_outputs(mongo_db, db_name, run_id=None):
    loader = ExperimentLoader(mongo_uri=mongo_db, db_name=db_name)
    if run_id is None:
        experiments = loader.find(query={"status": "COMPLETED"})
    else:
        experiments = [loader.find_by_id(run_id)]

    model_output = {}
    for e in experiments:
        for k in e.artifacts.keys():

            if 'evaluation_results' in k:
                art = e.artifacts[k]
                art.save(tempfile.gettempdir())
                tmp_filename = os.path.join(tempfile.gettempdir(),
                                            art._make_filename())
                exp_res: EvaluationResults = pickle.load(
                    open(tmp_filename, 'rb'))
                os.remove(tmp_filename)

                model_output[exp_res.subset_path + "_" + exp_res.machine_id +
                             "_" + str(exp_res.start_time) + "_" +
                             str(e.id)] = exp_res

    return model_output
def get_dict_by_id(id):
    from incense import ExperimentLoader

    loader = ExperimentLoader(
        # None if MongoDB is running on localhost or "mongodb://mongo:27017"
        # when running in devcontainer.
        mongo_uri=None,
        db_name='GPBayes'
    )

    exp = loader.find_by_id(id)

    max_pow = exp.config['max_pow']

    scores = exp.info['scores']
    scores = {
        'optimal': scores['optimal'],
        'brownian_qda': scores['brownian_qda'],
        'qda': scores['qda']
    }

    return {
        'max_pow': max_pow,
        'scores': scores,
        'optimal_accuracy': 1}
def extract_plot_data_conv_ae(mongo_db, aggregate=True):
    db_name = 'dcase_task2_fully_conv_ae'
    loader = ExperimentLoader(mongo_uri=mongo_db, db_name=db_name)

    experiments = loader.find(query={
        "status": "COMPLETED",
        "config.num_epochs": 100
    })

    # e1.config['preprocessing_params']['n_mels'], <- not changed in this experiment
    # arch_params might be different for different experiments
    # results per channel multiplier should be compared to baseline
    results = pd.DataFrame([
        (e1.config['machine_type'], e1.config['machine_id'],
         e1.config['arch_params']['channel_multiplier'],
         e1.config.get('apply_normalization',
                       False), e1.metrics['eval_rocauc'][0] * 100,
         e1.metrics['eval_p_rocauc'][0] * 100) for e1 in experiments
    ])
    results = results.rename(
        columns={
            0: 'machine_type',
            1: 'machine_id',
            2: 'channel_multiplier',
            3: 'apply_normalization',
            4: 'rocauc',
            5: 'p_rocauc'
        })

    group_by_args = [
        'machine_type', 'machine_id', 'channel_multiplier',
        'apply_normalization'
    ]
    selected_columns = copy.deepcopy(group_by_args)
    selected_columns.extend(['rocauc', 'p_rocauc'])

    results = results[results.columns.intersection(selected_columns)]

    plot_data = aggregate_data(results, group_by_args, aggregate)

    plot_data['ID'] = plot_data[['machine_type',
                                 'machine_id']].apply(lambda x: ' '.join(x),
                                                      axis=1)

    plot_data['channel_multiplier'] = plot_data['channel_multiplier'].astype(
        str)
    plot_data['apply_normalization'] = plot_data['apply_normalization'].astype(
        str)

    plot_data['Class'] = plot_data[[
        'channel_multiplier', 'apply_normalization'
    ]].apply(lambda x: ' '.join(x), axis=1)
    plot_data['Class'] = plot_data['Class'] + "_" + db_name

    return plot_data
    def __init__(self, mongo_db_uri, db_name, model_path, run_id):

        # load experiment
        loader = ExperimentLoader(mongo_uri=mongo_db_uri, db_name=db_name)

        self.preds = None
        self.run_id = run_id
        self.e = loader.find_by_id(run_id)

        self.model_path = path.join(model_path, self.e.config['uid'],
                                    'best_model.ckpt')
def extract_exp_data_flows(mongo_db, db_name, run_id=None):
    loader = ExperimentLoader(mongo_uri=mongo_db, db_name=db_name)

    if (run_id is not None):
        experiments = loader.find({"status": "COMPLETED", "_id": run_id})
    else:
        experiments = loader.find({"status": "COMPLETED"})

    col_names = [
        'run_id', 'apply_normalization', 'norm_per_set', 'transpose_flatten',
        'frames_per_snippet', 'batch_size', 'arch_params.n_hidden',
        'fixed_flow_evaluation', 'arch_params.hidden_size',
        'arch_params.n_blocks', 'arch_params.flow_model_type',
        'arch_params.cond_label_size', 'optimizer', 'optimizer_params.lr',
        'status', 'valid_loss'
    ]

    results = pd.DataFrame(columns=col_names)

    for e in experiments:
        print("Processing experiment {}".format(e.id))
        losses = e.metrics['valid_loss']
        new_row = pd.DataFrame([[
            e.id, e.config['apply_normalization'],
            e.config.get('norm_per_set', False),
            e.config.get('transpose_flatten',
                         False), e.config['frames_per_snippet'],
            e.config['batch_size'], e.config['arch_params']['n_hidden'],
            e.config.get('fixed_flow_evaluation',
                         False), e.config['arch_params']['hidden_size'],
            e.config['arch_params']['n_blocks'],
            e.config['arch_params']['flow_model_type'],
            e.config['arch_params'].get('cond_label_size', 6),
            e.config['optimizer'], e.config['optimizer_params']['lr'],
            e.status, losses[len(losses) - 1 -
                             e.config.get('early_stopping_patience', 0)]
        ]],
                               columns=col_names)
        results = results.append(new_row)

    return results
示例#7
0
def plot_experiments(ids, titles, data_path, **kwargs):
    from incense import ExperimentLoader

    loader = ExperimentLoader(
        # None if MongoDB is running on localhost or "mongodb://mongo:27017"
        # when running in devcontainer.
        mongo_uri=None,
        db_name='GPBayes'
    )

    configure_matplotlib()

    n_experiments = len(ids)
    default_figsize = matplotlib.rcParams['figure.figsize']

    fig, axes = plt.subplots(2, n_experiments, figsize=(
        default_figsize[0] * n_experiments, default_figsize[1] * 3))

    for i, id in enumerate(ids):
        exp = loader.find_by_id(id)

        max_pow = exp.config['max_pow']

        compare_tesla = exp.config['compare_tesla']
        compare_gm = exp.config['compare_gm']
        compare_bmw = exp.config['compare_bmw']
        asset_labels_used = get_asset_labels_used(
            compare_tesla=compare_tesla,
            compare_gm=compare_gm,
            compare_bmw=compare_bmw)

        real_data = get_real_data(data_path, max_pow)
        real_data = filter_data(real_data, asset_labels_used)

        plot_trajectories(real_data, max_pow,
                          print_left=(i == 0),
                          axes=axes[0, i])
        axes[0, i].set_title(titles[i])

    return plot_experiments_common(ids, get_dict_by_id, axes=axes[1],
                                   top=0.95, bottom=0.15, **kwargs)
def get_dict_by_id(id):
    from incense import ExperimentLoader

    loader = ExperimentLoader(
        # None if MongoDB is running on localhost or "mongodb://mongo:27017"
        # when running in devcontainer.
        mongo_uri=None,
        db_name='GPBayes')

    exp = loader.find_by_id(id)

    max_pow = exp.config['max_pow']
    end_position = exp.config['end_position']

    scores = exp.info['scores']

    return {
        'max_pow': max_pow,
        'scores': scores,
        'optimal_accuracy': 1 - bayes_error(end_position)
    }
def plot_confusion_matrix_stat(id,
                               stat,
                               title=None,
                               plot_y_label=True,
                               ylim_top=None):
    from incense import ExperimentLoader

    loader = ExperimentLoader(
        # None if MongoDB is running on localhost or "mongodb://mongo:27017"
        # when running in devcontainer.
        mongo_uri=None,
        db_name='GPBayes')

    exp = loader.find_by_id(id)

    max_pow = exp.config['max_pow']

    confusion_matrices = exp.info['confusion_matrices']

    title = exp.experiment.name

    titles_dict = {
        'brownian_step': 'Brownian step example',
        'brownian_bridge': 'Brownian bridge example',
        'brownian_variances': 'Brownian variances example',
        'cars': 'Cars experiment'
    }

    stat_dict = get_confusion_matrix_stat(confusion_matrices, stat)

    fig = plot_scores(max_pow=max_pow,
                      scores=stat_dict,
                      _run=None,
                      optimal_accuracy=0,
                      plot_y_label=plot_y_label,
                      ylim_top=ylim_top)

    fig.axes[0].set_title(titles_dict[title])

    return fig
def extract_plot_data_baseline(mongo_db, aggregate=True):
    db_name = 'dcase2020_task2_baseline'
    loader = ExperimentLoader(mongo_uri=mongo_db, db_name=db_name)

    experiments = loader.find(query={
        "status": "COMPLETED",
        "_id": {
            "$lt": 329
        }
    })  # at 329 metrics were renamed

    # arch_params might be different for different experiments
    # results per channel multiplier should be compared to baseline
    results = pd.DataFrame([
        (e1.config['machine_type'], e1.config['machine_id'],
         e1.metrics['eval_rocauc'][0] * 100,
         e1.metrics['eval_p_rocauc'][0] * 100) for e1 in experiments
    ])
    results = results.rename(columns={
        0: 'machine_type',
        1: 'machine_id',
        2: 'rocauc',
        3: 'p_rocauc'
    })

    group_by_args = ['machine_type', 'machine_id']
    selected_columns = copy.deepcopy(group_by_args)
    selected_columns.extend(['rocauc', 'p_rocauc'])

    results = results[results.columns.intersection(selected_columns)]

    plot_data = aggregate_data(results, group_by_args, aggregate)

    plot_data['ID'] = plot_data[['machine_type',
                                 'machine_id']].apply(lambda x: ' '.join(x),
                                                      axis=1)
    plot_data['Class'] = 'Baseline'

    return plot_data
示例#11
0
def loader():
    loader = ExperimentLoader(mongo_uri=None, db_name="incense_test")
    return loader
from incense import ExperimentLoader
import pandas as pd

loader = ExperimentLoader(mongo_uri='rechenknecht2.cp.jku.at:37373',
                          db_name='dcase2020_task2_baseline_v')

experiments = loader.find(query={"status": "COMPLETED"})
results = pd.DataFrame([
    (e1.config['machine_type'], e1.config['machine_id'],
     e1.config['preprocessing_params']['n_mels'],
     e1.metrics['eval_rocauc'][0] * 100, e1.metrics['eval_p_rocauc'][0] * 100)
    for e1 in experiments
])
results = results.rename(columns={
    0: 'machine_type',
    1: 'machine_id',
    2: 'n_mels',
    3: 'rocauc',
    4: 'p_rocauc'
})

summary = results.groupby(['machine_type', 'machine_id',
                           'n_mels']).aggregate(['mean', 'std', 'count'])
示例#13
0
def get_loader(uri=mongo_uri, db=db_name):
    loader = ExperimentLoader(mongo_uri=uri, db_name=db)
    return loader
def extract_plot_data_flows(mongo_db, aggregate=True, grouping_vars=None):
    db_name = 'dcase2020_task2_flows_maf'
    loader = ExperimentLoader(mongo_uri=mongo_db, db_name=db_name)

    # experiments = loader.find_all()
    experiments = loader.find({"status": "COMPLETED"})

    col_names = [
        'run_id', 'machine_type', 'machine_id', 'rocauc', 'p_rocauc',
        'apply_normalization', 'frames_per_snippet', 'n_hidden',
        'fixed_flow_evaluation', 'hidden_size', 'n_blocks', 'optimizer',
        'optimizer_params.lr', 'status'
    ]

    if grouping_vars is None:
        grouping_vars = col_names[5:]

    results = pd.DataFrame(columns=col_names)

    for e in experiments:
        for type in all_devtest_machines.keys():
            for id in all_devtest_machines[type]:
                print(e.id, type + '_' + id + "_rocauc")

                rocauc = e.metrics.get(type + '_' + id + "_rocauc", [0])[0]
                if rocauc is not None:
                    rocauc = rocauc * 100
                p_rocauc = e.metrics.get(type + '_' + id + "_p_rocauc", [0])[0]
                if p_rocauc is not None:
                    p_rocauc = p_rocauc * 100

                if rocauc is None or p_rocauc is None:
                    print("Skipping experiment", e.id)
                    continue

                new_row = pd.DataFrame([[
                    e.id,
                    type.split('/')[1], id, rocauc, p_rocauc,
                    e.config['apply_normalization'],
                    e.config['frames_per_snippet'],
                    e.config['arch_params']['n_hidden'],
                    e.config.get('fixed_flow_evaluation', False),
                    e.config['arch_params']['hidden_size'],
                    e.config['arch_params']['n_blocks'], e.config['optimizer'],
                    e.config['optimizer_params']['lr'], e.status
                ]],
                                       columns=col_names)
                results = results.append(new_row)

    group_by_args = ['run_id', 'machine_type', 'machine_id']
    group_by_args.extend(grouping_vars)

    selected_columns = copy.deepcopy(group_by_args)
    selected_columns.extend(['rocauc', 'p_rocauc'])

    results = results[results.columns.intersection(selected_columns)]

    plot_data = aggregate_data(results, group_by_args, aggregate)

    plot_data['ID'] = plot_data[['machine_type',
                                 'machine_id']].apply(lambda x: ' '.join(x),
                                                      axis=1)

    for var in grouping_vars:
        plot_data[var] = plot_data[var].astype(str)

    plot_data['Class'] = plot_data[grouping_vars].apply(lambda x: ' '.join(x),
                                                        axis=1)
    plot_data['Class'] = plot_data['Class'] + "_" + db_name

    return plot_data
示例#15
0
def loader():
    loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name=TEST_DB_NAME)
    return loader
def get_loader(db_name='sacred'):
    loader = ExperimentLoader(mongo_uri=mongo_uri, db_name=db_name)
    return loader
示例#17
0
def info_db_loader_pickled():
    loader = ExperimentLoader(mongo_uri=MONGO_URI,
                              db_name=INFO_DB_NAME,
                              unpickle=False)
    return loader
示例#18
0
def get_experiment(exp_id):
    uri, database = get_uri_db_pair()
    loader = ExperimentLoader(mongo_uri=uri, db_name=database)
    ex = loader.find_by_id(exp_id)
    return ex
示例#19
0
def get_experiment_loader():

    return ExperimentLoader(mongo_uri=constants.MONGO_URI,
                            db_name=constants.DB_NAME)
示例#20
0
def recent_db_loader():
    loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name=RECENT_DB_NAME)
    return loader
示例#21
0
def delete_db_loader():
    loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name=DELETE_DB_NAME)
    return loader
示例#22
0
def heterogenous_db_loader():
    loader = ExperimentLoader(mongo_uri=MONGO_URI,
                              db_name=HETEROGENOUS_DB_NAME)
    return loader
示例#23
0
def plot_confusion_matrix(id,
                          n_samples,
                          ylim_top=None,
                          optimal_accuracy=[1, 0, 0, 1],
                          theoretical_accuracy=None,
                          title=None):
    from incense import ExperimentLoader

    configure_matplotlib()

    loader = ExperimentLoader(
        # None if MongoDB is running on localhost or "mongodb://mongo:27017"
        # when running in devcontainer.
        mongo_uri=None,
        db_name='GPBayes')

    exp = loader.find_by_id(id)

    max_pow = exp.config['max_pow']

    confusion_matrices = exp.info['confusion_matrices']

    confusion_matrices = {
        key: value
        for key, value in confusion_matrices.items() if key != 'brownian_qda'
    }

    if title is None:
        title = exp.experiment.name

        titles_dict = {
            'brownian_step': 'Brownian step example',
            'brownian_bridge': 'Brownian bridge example',
            'brownian_variances': 'Brownian variances example',
            'cars': 'Cars experiment'
        }

        title = titles_dict[title]

    default_figsize = matplotlib.rcParams['figure.figsize']

    fig, axes = plt.subplots(2,
                             2,
                             figsize=(default_figsize[0] * 2.2,
                                      default_figsize[1] * 3))

    true_pos = get_confusion_matrix_stat(confusion_matrices, lambda x: x[0, 0])
    false_pos = get_confusion_matrix_stat(confusion_matrices,
                                          lambda x: x[0, 1])
    false_neg = get_confusion_matrix_stat(confusion_matrices,
                                          lambda x: x[1, 0])
    true_neg = get_confusion_matrix_stat(confusion_matrices, lambda x: x[1, 1])

    for i, (scores, index, optimal) in enumerate(
            zip([true_pos, false_pos, false_neg, true_neg], [(0, 0), (0, 1),
                                                             (1, 0), (1, 1)],
                optimal_accuracy)):
        plot_scores(max_pow=max_pow,
                    scores=scores,
                    _run=None,
                    optimal_accuracy=optimal * n_samples // 2,
                    plot_y_label=False,
                    ylim_top=ylim_top,
                    ylim_bottom=0,
                    plot_legend=False,
                    theoretical_mean=theoretical_accuracy[i] * n_samples //
                    2 if theoretical_accuracy is not None else None,
                    axes=axes[index],
                    start_pow=0)

    axes[0, 0].set_xlabel(None)
    axes[0, 1].set_xlabel(None)

    axes[0, 0].set_ylabel('Class 0')
    axes[1, 0].set_ylabel('Class 1')

    axes[0, 0].set_title('Predicted class 0')
    axes[0, 1].set_title('Predicted class 1')

    fig.suptitle(title)

    fig.tight_layout()
    fig.subplots_adjust(top=0.89, bottom=0.15, hspace=0.1)
    handles, labels = axes[0, 0].get_legend_handles_labels()

    leg = fig.legend(handles,
                     labels,
                     loc="lower center",
                     bbox_to_anchor=(0.5, 0),
                     bbox_transform=fig.transFigure,
                     ncol=7)
    leg.get_frame().set_alpha(1)

    return fig
示例#24
0
        json.dump(json.loads(bson_dumps(config)), f, indent=4)

    with open(path + 'metrics.json', mode='w') as f:
        json.dump(json.loads(bson_dumps(metrics)), f, indent=4)

    for k in exp.artifacts:
        artifact_content = torch.load(BytesIO(exp.artifacts[k].content))

        with open(path + k, mode='wb') as f:
            torch.save(artifact_content, f)


if __name__ == "__main__":
    args = parser.parse_args()

    path = args.path

    loader = ExperimentLoader(mongo_uri=args.mongo_uri, db_name=args.db)

    if args.r:
        exp_ids_starts = args.expids[::2]
        exp_ids_ends = args.expids[1::2]
        expids = chain(
            *[range(s, e + 1) for s, e in zip(exp_ids_starts, exp_ids_ends)])

    else:
        expids = args.expids

    for exp_id in expids:
        export(exp_id, loader, path)
示例#25
0
def info_db_loader():
    # Unregister handlers to simulate that sacred is not currently imported.
    jsonpickle.handlers.unregister(np.ndarray)
    jsonpickle.handlers.unregister(pd.DataFrame)
    loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name=INFO_DB_NAME)
    return loader