Пример #1
0
def train(base_log_path, benchmark_name, task):
    results = []
    for trial in range(task['trials']):
        trial_logdir = os.path.join(
            base_log_path, '{}_{}_{}'.format(benchmark_name, task['env_id'],
                                             trial))
        os.makedirs(trial_logdir)

        if benchmark_name.lower().startswith('mujoco'):
            train_mujoco(task['env_id'],
                         num_timesteps=task['num_timesteps'],
                         seed=SEEDS[trial],
                         logdir=trial_logdir)
        else:
            train_atari(task['env_id'],
                        num_timesteps=task['num_timesteps'],
                        seed=SEEDS[trial],
                        logdir=trial_logdir)

        res = load_results(trial_logdir)
        res['trial'] = trial
        res['seed'] = SEEDS[trial]

        results.append(res)

    return results
Пример #2
0
def plot_single_directory(env_id,
                          directory,
                          method_name,
                          num_folds,
                          EPISODES_WINDOW=100):
    directory_name = directory + '/' + 'logs_' + env_id + '_'
    if not os.path.isdir(directory_name + '0'):
        print('Warning: directory ' + directory_name + '0' +
              'does not exist, skipping...')
        return
    results_x = []
    results_y = []
    results_x_all = []
    for i in range(0, num_folds):
        directory_name_i = directory_name + str(i)
        current_results = load_results(directory_name_i)
        current_results_x, current_results_y = ts2xy(current_results)
        results_x.append(current_results_x)
        results_y.append(current_results_y)
        results_x_all.extend(current_results_x)
        #plt.plot (current_results_x, current_results_y)
    results_x_all = np.sort(results_x_all)
    results_y_all = []
    for i in range(num_folds):
        np.append(results_x[i], results_x_all[-1])
        np.append(results_y[i], results_y[i][-1])
        results_y_all.append(
            np.interp(results_x_all, results_x[i], results_y[i]))
    results_x_all, results_y_all = window_func(results_x_all, results_y_all,
                                               EPISODES_WINDOW, np.mean)
    plt.plot(results_x_all, np.mean(results_y_all, 0), label=method_name)
    plt.fill_between(results_x_all,
                     np.mean(results_y_all, 0) - np.std(results_y_all, 0),
                     np.mean(results_y_all, 0) + np.std(results_y_all, 0),
                     alpha=0.3)
Пример #3
0
def log_reward_statistics(vec_env, num_last_eps=100, prefix=""):
    all_stats = None
    for _ in range(10):
        try:
            all_stats = load_results(osp.dirname(
                vec_env.results_writer.f.name))
        except FileNotFoundError:
            time.sleep(1)
            continue
    if all_stats is not None:
        episode_rewards = all_stats["r"]
        episode_lengths = all_stats["l"]

        recent_episode_rewards = episode_rewards[-num_last_eps:]
        recent_episode_lengths = episode_lengths[-num_last_eps:]

        if len(recent_episode_rewards) > 0:
            kvs = {
                prefix + "AverageReturn": np.mean(recent_episode_rewards),
                prefix + "MinReturn": np.min(recent_episode_rewards),
                prefix + "MaxReturn": np.max(recent_episode_rewards),
                prefix + "StdReturn": np.std(recent_episode_rewards),
                prefix + "AverageEpisodeLength":
                np.mean(recent_episode_lengths),
                prefix + "MinEpisodeLength": np.min(recent_episode_lengths),
                prefix + "MaxEpisodeLength": np.max(recent_episode_lengths),
                prefix + "StdEpisodeLength": np.std(recent_episode_lengths),
            }
            logger.logkvs(kvs)
        logger.logkv(prefix + "TotalNEpisodes", len(episode_rewards))
Пример #4
0
def plot_results(dirs, num_timesteps, xaxis, task_name):
    tslist = []
    for dir in dirs:
        ts = load_results(dir)
        ts = ts[ts.l.cumsum() <= num_timesteps]
        tslist.append(ts)
    xy_list = [ts2xy(ts, xaxis) for ts in tslist]
    plot_curves(xy_list, xaxis, task_name)
Пример #5
0
def plot_results(dirs, num_timesteps, xaxis, task_name):
    tslist = []
    for dir in dirs:
        ts = load_results(dir)
        ts = ts[ts.l.cumsum() <= num_timesteps]
        tslist.append(ts)
    xy_list = [ts2xy(ts, xaxis) for ts in tslist]
    plot_curves(xy_list, xaxis, task_name)
Пример #6
0
 def load_results(self, dirs, max_timesteps=1e8, x_axis=X_TIMESTEPS, episode_window=100):
     tslist = []
     for dir in dirs:
         ts = load_results(dir)
         ts = ts[ts.l.cumsum() <= max_timesteps]
         tslist.append(ts)
     xy_list = [self._ts2xy(ts, x_axis) for ts in tslist]
     if episode_window:
         xy_list = [self._window_func(x, y, episode_window, np.mean) for x, y in xy_list]
     return xy_list
Пример #7
0
def plot_results_single(ax, input_dir, num_timesteps, xaxis):
    ts = load_results(input_dir)
    ts = ts[ts.l.cumsum() <= num_timesteps]
    xy_list = ts2xy(ts, xaxis)

    x = xy_list[0]
    y = xy_list[1]
    ax.plot(x, y, alpha=0.4, linewidth=0.8, c=sns.color_palette()[0])
    x, y_mean = window_func(
        x, y, EPISODES_WINDOW,
        np.mean)  #So returns average of last EPISODE_WINDOW episodes
    print("avg_100: %.1f" % np.mean(y_mean[-100:]))
    ax.plot(x, y_mean, linewidth=0.8, c=sns.color_palette()[0], label='normal')
Пример #8
0
def plot_results_compare(dirs, num_timesteps, xaxis, title):
    import seaborn as sns
    sns.set()
    sns.set_color_codes()

    ts = load_results(dirs["noisy"])
    ts = ts[ts.l.cumsum() <= num_timesteps]
    xy_list = ts2xy(ts, xaxis)

    x = xy_list[0]
    y = xy_list[1]
    plt.plot(x, y, alpha=0.4, linewidth=0.8, c=sns.color_palette()[1])
    x, y_mean = window_func(
        x, y, EPISODES_WINDOW,
        np.mean)  #So returns average of last EPISODE_WINDOW episodes
    plt.plot(x, y_mean, linewidth=0.8, c=sns.color_palette()[1], label='noisy')

    ts = load_results(dirs["surrogate"])
    ts = ts[ts.l.cumsum() <= num_timesteps]
    xy_list = ts2xy(ts, xaxis)

    x = xy_list[0]
    y = xy_list[1]
    plt.plot(x, y, alpha=0.4, linewidth=0.8, c=sns.color_palette()[2])
    x, y_mean = window_func(
        x, y, EPISODES_WINDOW,
        np.mean)  #So returns average of last EPISODE_WINDOW episodes
    plt.plot(x,
             y_mean,
             linewidth=0.8,
             c=sns.color_palette()[2],
             label='surrogate')

    plt.title(title)
    plt.xlabel(xaxis)
    plt.ylabel("Episode Rewards")
    plt.legend()
    plt.tight_layout()
def plot_results(log_dir, dirs, num_timesteps, xaxis, yaxis, task_name, agent_infos):
    time_step = 1000
    time_steps = np.arange(0, int(num_timesteps / time_step)) * time_step + time_step
    # print(time_steps)
    xy_lists = []
    for dir in dirs:
        tslist = []
        for iter_ in dir:
            ts = load_results(iter_)
            ts = ts[ts.l.cumsum() <= num_timesteps]
            tslist.append(ts)
        # print(tslist)
        xy_lists.append([ts2xy(ts, xaxis, yaxis) for ts in tslist])
    plot_curves(xy_lists, xaxis, yaxis, task_name, log_dir, agent_infos, time_steps)
Пример #10
0
def plot_results(dirs, num_timesteps, xaxis, task_name):
    """
    plot the results
    :param dirs: ([str]) the save location of the results to plot
    :param num_timesteps: (int or None) only plot the points below this value
    :param xaxis: (str) the axis for the x and y output
        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
    :param task_name: (str) the title of the task to plot
    """

    tslist = []
    for folder in dirs:
        timesteps = load_results(folder)
        if num_timesteps is not None:
            timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps]
        tslist.append(timesteps)
    xy_list = [ts2xy(timesteps_item, xaxis) for timesteps_item in tslist]
    plot_curves(xy_list, xaxis, task_name)
Пример #11
0
def visualize(dir, seeds, name, color, lines):
    episode_rewards = []
    for seed in seeds:
        thedir = "{}-seed{}".format(dir, seed)
        results = load_results(thedir)
        #episode_rewards.append(results["episode_rewards"])
        episode_rewards.append(results["episode_successes"])

    xs, ys = [], []
    for i in range(len(episode_rewards)):
        x, y = smooth_reward_curve(np.arange(0, len(episode_rewards[i])),
                                   episode_rewards[i])
        x, y = fix_point(x, y, 10)
        xs.append(x)
        ys.append(y)

    length = min([len(x) for x in xs])
    for j in range(len(xs)):
        xs[j] = xs[j][:length]
        ys[j] = ys[j][:length]

    xs = np.asarray(xs)
    ys = np.asarray(ys)
    x = np.mean(np.array(xs), axis=0)
    y_mean = np.mean(np.array(ys), axis=0)
    y_std = np.std(np.array(ys), axis=0)

    y_upper = y_mean + y_std
    y_lower = y_mean - y_std
    plt.fill_between(x,
                     list(y_lower),
                     list(y_upper),
                     interpolate=True,
                     facecolor=color,
                     linewidth=0.0,
                     alpha=0.3)
    line = plt.plot(x, list(y_mean), label=name, color=color)
    lines.append(line[0])
    return lines
Пример #12
0
import os, sys, shutil, argparse
sys.path.append(os.getcwd())

import numpy as np

#from baselines import bench
from baselines.bench.monitor import LoadMonitorResultsError
from baselines.bench.monitor import load_results
dir_path = "/home/mansimov/logdir/ppo-mpi/Humanoid-v1-seed41/"
#results_class = LoadMonitorResultsError()

results = load_results(dir_path)
episode_rewards = results["episode_rewards"]
episode_lengths = results["episode_lengths"]

print(sum(episode_lengths))

print("Num episodes {}, Mean 100 episode reward {}".format(
    len(episode_rewards), np.mean(episode_rewards[-100:])))
#print (np.sort(episode_rewards)[-10:])
Пример #13
0
def load_results(root_dir_or_dirs,
                 running_agents,
                 enable_progress=True,
                 enable_monitor=True,
                 verbose=False):
    '''
    load summaries of runs from a list of directories (including subdirectories)
    Arguments:

    enable_progress: bool - if True, will attempt to load data from progress.csv files (data saved by logger). Default: True

    enable_monitor: bool - if True, will attempt to load data from monitor.csv files (data saved by Monitor environment wrapper). Default: True

    verbose: bool - if True, will print out list of directories from which the data is loaded. Default: False


    Returns:
    List of Result objects with the following fields:
         - dirname - path to the directory data was loaded from
         - metadata - run metadata (such as command-line arguments and anything else in metadata.json file
         - monitor - if enable_monitor is True, this field contains pandas dataframe with loaded monitor.csv file (or aggregate of all *.monitor.csv files in the directory)
         - progress - if enable_progress is True, this field contains pandas dataframe with loaded progress.csv file
    '''
    import re
    RunMetaData = namedtuple(
        'RunMetaData',
        ['type', 'n_active_agents', 'n_agents', 'exp_freq', 'run_num'])
    if isinstance(root_dir_or_dirs, str):
        rootdirs = [osp.expanduser(root_dir_or_dirs)]
    else:
        rootdirs = [osp.expanduser(d) for d in root_dir_or_dirs]
    allresults = []
    for rootdir in rootdirs:
        assert osp.exists(rootdir), "%s doesn't exist" % rootdir
        for dirname, dirs, files in os.walk(rootdir):
            if running_agents is not None:
                re_template = r"\/(\D+)_(\d+)(?:_(\d+))?_(?:no_exp|exp_freq(\d+))-(\d)"
                run_meta_data = re.search(re_template, dirname)
                if run_meta_data is not None and int(
                        run_meta_data.groups()[1]) != running_agents:
                    continue
                ## IT RETURN NONE
            if '-proc' in dirname:
                files[:] = []
                continue
            monitor_re = re.compile(r'(\d+\.)?(\d+\.)?monitor\.csv')
            if set(['metadata.json', 'monitor.json', 'progress.json', 'progress.csv']).intersection(files) or \
               any([f for f in files if monitor_re.match(f)]):  # also match monitor files like 0.1.monitor.csv
                # used to be uncommented, which means do not go deeper than current directory if any of the data files
                # are found
                # dirs[:] = []
                result = {'dirname': dirname}
                if "metadata.json" in files:
                    with open(osp.join(dirname, "metadata.json"), "r") as fh:
                        result['metadata'] = json.load(fh)
                progjson = osp.join(dirname, "progress.json")
                progcsv = osp.join(dirname, "progress.csv")
                if enable_progress:
                    if osp.exists(progjson):
                        result['progress'] = pandas.DataFrame(
                            read_json(progjson))
                    elif osp.exists(progcsv):
                        try:
                            print(progcsv)
                            result['progress'] = read_csv(progcsv)
                        except pandas.errors.EmptyDataError:
                            print('skipping progress file in ', dirname,
                                  'empty data')
                    else:
                        if verbose:
                            print('skipping %s: no progress file' % dirname)

                if enable_monitor:
                    try:
                        result['monitor'] = pandas.DataFrame(
                            monitor.load_results(dirname))
                    except monitor.LoadMonitorResultsError:
                        print('skipping %s: no monitor files' % dirname)
                    except Exception as e:
                        print('exception loading monitor file in %s: %s' %
                              (dirname, e))

                if result.get('monitor') is not None or result.get(
                        'progress') is not None:
                    allresults.append(Result(**result))
                    if verbose:
                        print('successfully loaded %s' % dirname)

    if verbose: print('loaded %i results' % len(allresults))
    return allresults
Пример #14
0
import tensorflow as tf, numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from baselines.bench.monitor import load_results

logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/cpo/train'
df_train = load_results(logger_dir)

logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/cpo/test'
df_test = load_results(logger_dir)

logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/cpo_v1/train/cpo_d_is_1'
df_train_d_is_1 = load_results(logger_dir)

logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/cpo_v1/test/cpo_d_is_1'
df_test_d_is_1 = load_results(logger_dir)

logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/cpo_v1/train/cpo_d_is_2'
df_train_d_is_2 = load_results(logger_dir)

logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/cpo_v1/test/cpo_d_is_2'
df_test_d_is_2 = load_results(logger_dir)

logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/sp/sp_returns.txt'
f_sp = np.loadtxt(logger_dir)

xmax = 3000000

rolling_window = 365 * 1
rolling_reward = pd.Series(df_train["r"]).rolling(rolling_window)
Пример #15
0
import tensorflow as tf, numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from baselines.bench.monitor import load_results

logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/cpo/train'
df_train = load_results(logger_dir)

logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/cpo/test'
df_test = load_results(logger_dir)

# TO
logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/to/optimal_returns.txt'
f_to = np.loadtxt(logger_dir)

# SP
logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/sp/sp_returns.txt'
f_sp = np.loadtxt(logger_dir)

# MPC
logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/mpc/mpc_returns.txt'
f_mpc = np.loadtxt(logger_dir)
logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/mpc/mpc_safeties.txt'
s_mpc = np.loadtxt(logger_dir)

# DDPG
logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/ddpg/test/returns_0.1.npy'
ddpg_r_01 = np.load(logger_dir)

logger_dir = '/home/lihepeng/Documents/Github/tmp/ev/ddpg/test/safeties_0.1.npy'
Пример #16
0
import os
from glob import glob
from baselines.bench.monitor import load_results
from baselines.logger import TensorBoardOutputFormat
from collections import deque
import numpy as np

monitor_files = glob(os.path.join(os.path.dirname(__file__), '../../result',
                                  '**/monitor.csv'),
                     recursive=True)

for monitor in monitor_files:

    dir = os.path.dirname(monitor)
    csv = load_results(dir)
    tb = TensorBoardOutputFormat(os.path.join(dir, 'tb2'))

    length = 100
    kv = {}

    for i in range(length, csv.r.size):
        t = csv.t.values[i]
        r = csv.r.values[i - length:i]
        l = csv.l.values[i - length:i]
        e = csv.best_exec.values[i - length:i] * 1000  # seconds to ms
        kv['EpExecMean'] = np.mean(e)
        kv['EpRewMean'] = np.mean(r)
        kv['EpLenMean'] = np.mean(l)
        tb.writekvs_wt(kv, t)

    tb.close()
Пример #17
0
def load_results(root_dir_or_dirs, enable_progress=True, enable_monitor=True, verbose=False):
    '''
    load summaries of runs from a list of directories (including subdirectories)
    Arguments:

    enable_progress: bool - if True, will attempt to load data from progress.csv files (data saved by logger). Default: True

    enable_monitor: bool - if True, will attempt to load data from monitor.csv files (data saved by Monitor environment wrapper). Default: True

    verbose: bool - if True, will print out list of directories from which the data is loaded. Default: False


    Returns:
    List of Result objects with the following fields:
         - dirname - path to the directory data was loaded from
         - metadata - run metadata (such as command-line arguments and anything else in metadata.json file
         - monitor - if enable_monitor is True, this field contains pandas dataframe with loaded monitor.csv file (or aggregate of all *.monitor.csv files in the directory)
         - progress - if enable_progress is True, this field contains pandas dataframe with loaded progress.csv file
    '''
    import re
    if isinstance(root_dir_or_dirs, str):
        rootdirs = [osp.expanduser(root_dir_or_dirs)]
    else:
        rootdirs = [osp.expanduser(d) for d in root_dir_or_dirs]
    allresults = []
    for rootdir in rootdirs:
        assert osp.exists(rootdir), "%s doesn't exist"%rootdir
        for dirname, dirs, files in os.walk(rootdir):
            if '-proc' in dirname:
                files[:] = []
                continue
            monitor_re = re.compile(r'(\d+\.)?(\d+\.)?monitor\.csv')
            if set(['metadata.json', 'monitor.json', 'progress.json', 'progress.csv']).intersection(files) or \
               any([f for f in files if monitor_re.match(f)]):  # also match monitor files like 0.1.monitor.csv
                # used to be uncommented, which means do not go deeper than current directory if any of the data files
                # are found
                # dirs[:] = []
                result = {'dirname' : dirname}
                if "metadata.json" in files:
                    with open(osp.join(dirname, "metadata.json"), "r") as fh:
                        result['metadata'] = json.load(fh)
                progjson = osp.join(dirname, "progress.json")
                progcsv = osp.join(dirname, "progress.csv")
                if enable_progress:
                    if osp.exists(progjson):
                        result['progress'] = pandas.DataFrame(read_json(progjson))
                    elif osp.exists(progcsv):
                        try:
                            result['progress'] = read_csv(progcsv)
                        except pandas.errors.EmptyDataError:
                            print('skipping progress file in ', dirname, 'empty data')
                    else:
                        if verbose: print('skipping %s: no progress file'%dirname)

                if enable_monitor:
                    try:
                        result['monitor'] = pandas.DataFrame(monitor.load_results(dirname))
                    except monitor.LoadMonitorResultsError:
                        print('skipping %s: no monitor files'%dirname)
                    except Exception as e:
                        print('exception loading monitor file in %s: %s'%(dirname, e))

                if result.get('monitor') is not None or result.get('progress') is not None:
                    allresults.append(Result(**result))
                    if verbose:
                        print('successfully loaded %s'%dirname)

    if verbose: print('loaded %i results'%len(allresults))
    return allresults
Пример #18
0
def load_results(
    root_dir_or_dirs, enable_progress=True, enable_monitor=True, verbose=False
):
    """
    load summaries of runs from a list of directories (including subdirectories)
    Arguments:

    enable_progress: bool - if True, will attempt to load data from progress.csv files (data saved by logger). Default: True

    enable_monitor: bool - if True, will attempt to load data from monitor.csv files (data saved by Monitor environment wrapper). Default: True

    verbose: bool - if True, will print out list of directories from which the data is loaded. Default: False


    Returns:
    List of Result objects with the following fields:
         - dirname - path to the directory data was loaded from
         - metadata - run metadata (such as command-line arguments and anything else in metadata.json file
         - monitor - if enable_monitor is True, this field contains pandas dataframe with loaded monitor.csv file (or aggregate of all *.monitor.csv files in the directory)
         - progress - if enable_progress is True, this field contains pandas dataframe with loaded progress.csv file
    """
    import re

    if isinstance(root_dir_or_dirs, str):
        rootdirs = [osp.expanduser(root_dir_or_dirs)]
    else:
        rootdirs = [osp.expanduser(d) for d in root_dir_or_dirs]
    allresults = []
    for rootdir in rootdirs:
        assert osp.exists(rootdir), "%s doesn't exist" % rootdir
        for dirname, dirs, files in os.walk(rootdir):
            if "-proc" in dirname:
                files[:] = []
                continue
            monitor_re = re.compile(r"(\d+\.)?(\d+\.)?monitor\.csv")
            if set(
                ["metadata.json", "monitor.json", "progress.json", "progress.csv"]
            ).intersection(files) or any(
                [f for f in files if monitor_re.match(f)]
            ):  # also match monitor files like 0.1.monitor.csv
                # used to be uncommented, which means do not go deeper than current directory if any of the data files
                # are found
                # dirs[:] = []
                result = {"dirname": dirname}
                if "metadata.json" in files:
                    with open(osp.join(dirname, "metadata.json"), "r") as fh:
                        result["metadata"] = json.load(fh)
                progjson = osp.join(dirname, "progress.json")
                progcsv = osp.join(dirname, "progress.csv")
                if enable_progress:
                    if osp.exists(progjson):
                        result["progress"] = pandas.DataFrame(read_json(progjson))
                    elif osp.exists(progcsv):
                        try:
                            result["progress"] = read_csv(progcsv)
                        except pandas.errors.EmptyDataError:
                            print("skipping progress file in ", dirname, "empty data")
                    else:
                        if verbose:
                            print("skipping %s: no progress file" % dirname)

                if enable_monitor:
                    try:
                        result["monitor"] = pandas.DataFrame(
                            monitor.load_results(dirname)
                        )
                    except monitor.LoadMonitorResultsError:
                        print("skipping %s: no monitor files" % dirname)
                    except Exception as e:
                        print("exception loading monitor file in %s: %s" % (dirname, e))

                if (
                    result.get("monitor") is not None
                    or result.get("progress") is not None
                ):
                    allresults.append(Result(**result))
                    if verbose:
                        print("successfully loaded %s" % dirname)

    if verbose:
        print("loaded %i results" % len(allresults))
    return allresults