Пример #1
0
def test_pickle_yaml():
    a = {'one': 1, 'two': [2, 3]}
    b = {'three': 3, 'four': [4, 5]}
    c = [a, b]

    def check(x):
        assert isinstance(x, list)
        assert len(x) == 2
        assert all([isinstance(i, dict) for i in x])
        assert list(x[0].keys()) == ['one', 'two']
        assert list(x[1].keys()) == ['three', 'four']
        assert list(x[0].values()) == [1, [2, 3]]
        assert list(x[1].values()) == [3, [4, 5]]

    pickle_dump(c, '.tmp_pickle')
    check(pickle_load('.tmp_pickle.pkl'))
    os.unlink('.tmp_pickle.pkl')

    yaml_dump(c, '.tmp_yaml')
    check(yaml_load('.tmp_yaml.yml'))
    os.unlink('.tmp_yaml.yml')
Пример #2
0
def read_xy(log_folder,
            file_name,
            get_x,
            get_y,
            smooth_out=False,
            smooth_kws=None,
            point_step=1):
    glob_dir = lambda x: [
        p for p in x.glob('*/') if p.is_dir() and str(p.name).isdigit()
    ]
    dfs = []
    for id_folder in glob_dir(Path(log_folder)):
        x = []
        y = []
        for seed_folder in glob_dir(id_folder):
            logs = pickle_load(seed_folder / file_name)
            x.append([get_x(log) for log in logs])
            y.append([get_y(log) for log in logs])
        new_x, ys = interp_curves(x, y)  # all seeds share same x values

        if smooth_out:
            if smooth_kws is None:
                smooth_kws = {'window_length': 51, 'polyorder': 3}
            ys = [smooth_filter(y, **smooth_kws) for y in ys]

        if point_step > 1:
            idx = np.arange(0, new_x.size, step=point_step)
            new_x = new_x[idx, ...]
            ys = [y[idx, ...] for y in ys]

        df = pd.DataFrame({'x': np.tile(new_x, len(ys)), 'y': np.hstack(ys)})
        config = yaml_load(id_folder / 'config.yml')
        config = pd.DataFrame([config.values()], columns=config.keys())
        config = config.applymap(lambda x: tuple(x)
                                 if isinstance(x, list) else x)
        df = pd.concat([df, config], axis=1, ignore_index=False)
        df = df.fillna(method='pad')  # padding all NaN configs
        dfs.append(df)
    dfs = pd.concat(dfs, axis=0, ignore_index=True)
    return dfs