Exemplo n.º 1
0
    gforce_threshold=1.0,

    # https://iopscience.iop.org/article/10.1088/0143-0807/37/6/065008/pdf
    # Importantly they depict the threshold
    # for admissible acceleration onset or jerk as j = 15g/s or ~150m/s^3.
    jerk_threshold=150.0,  # 15g/s
    incent_win=True,
    constrain_controls=False,
    incent_yield_to_oncoming_traffic=True,
    physics_steps_per_observation=12,
    discrete_actions=COMFORTABLE_ACTIONS,
)

net_config = dict(hidden_units=(256, 256), activation=torch.nn.Tanh)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
# eg.add('seed', 0)
eg.add(
    'resume',
    '/home/c2/src/tmp/spinningup/data/intersection_from_scratch_discrete_steer_and_accel/intersection_from_scratch_discrete_steer_and_accel_s0_2020_04-22_13-12.42.071188'
)
# eg.add('reinitialize_optimizer_on_resume', True)
# eg.add('num_inputs_to_add', 0)
# eg.add('pi_lr', 3e-6)
# eg.add('vf_lr', 1e-5)
# eg.add('boost_explore', 5)
eg.add('epochs', 20000)
eg.add('steps_per_epoch', 4000)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
        for setting in settings:
            division = int(total / len(setting))
            index = int(remainder / division)
            remainder = remainder % division
            indexes.append(index)
            total = division
        actual_setting = {}
        for j in range(len(indexes)):
            actual_setting[setting_names[j]] = settings[j][indexes[j]]
        return indexes, actual_setting

    indexes, actual_setting = get_setting(args.setting, total, settings, setting_names)
####################################################################################################

    ## use eg.add to add parameters in the settings or add parameters that apply to all jobs
    eg = ExperimentGrid(name=exp_name)
    eg.add('ue_seed', 21, 'ues', True)
    eg.add('lr', actual_setting['lr'], 'lr', True)
    eg.add('border', actual_setting['border'], 'border', True)
    eg.add('wd', 0, 'wd', True)
    eg.add('buffer_type', 'FinalSigma0.5', 'Buf-', True)
    eg.add('buffer_size', '500K', '', True)
    eg.add('eval_freq', 500)
    eg.add('max_timesteps', 100000)
    eg.add('env_set', actual_setting['env_set'], '', True)
    eg.add('seed', actual_setting['seed'])

    eg.run(bc_ue_learn, num_cpu=args.cpu)

    print('\n###################################### GRID EXP END ######################################')
    print('total time for grid experiment:',time.time()-start_time)
Exemplo n.º 3
0
def parse_and_execute_grid_search(cmd, args):
    """Interprets algorithm name and cmd line args into an ExperimentGrid."""

    if cmd in BASE_ALGO_NAMES:
        backend = DEFAULT_BACKEND[cmd]
        print('\n\nUsing default backend (%s) for %s.\n'%(backend, cmd))
        cmd = cmd + '_' + backend

    algo = eval('spinup.'+cmd)

    # Before all else, check to see if any of the flags is 'help'.
    valid_help = ['--help', '-h', 'help']
    if any([arg in valid_help for arg in args]):
        print('\n\nShowing docstring for spinup.'+cmd+':\n')
        print(algo.__doc__)
        sys.exit()

    def process(arg):
        # Process an arg by eval-ing it, so users can specify more
        # than just strings at the command line (eg allows for
        # users to give functions as args).
        try:
            return eval(arg)
        except:
            return arg

    # Make first pass through args to build base arg_dict. Anything
    # with a '--' in front of it is an argument flag and everything after,
    # until the next flag, is a possible value.
    arg_dict = dict()
    for i, arg in enumerate(args):
        assert i > 0 or '--' in arg, \
            friendly_err("You didn't specify a first flag.")
        if '--' in arg:
            arg_key = arg.lstrip('-')
            arg_dict[arg_key] = []
        else:
            arg_dict[arg_key].append(process(arg))

    # Make second pass through, to catch flags that have no vals.
    # Assume such flags indicate that a boolean parameter should have
    # value True.
    for k,v in arg_dict.items():
        if len(v) == 0:
            v.append(True)

    # Third pass: check for user-supplied shorthands, where a key has
    # the form --keyname[kn]. The thing in brackets, 'kn', is the
    # shorthand. NOTE: modifying a dict while looping through its
    # contents is dangerous, and breaks in 3.6+. We loop over a fixed list
    # of keys to avoid this issue.
    given_shorthands = dict()
    fixed_keys = list(arg_dict.keys())
    for k in fixed_keys:
        p1, p2 = k.find('['), k.find(']')
        if p1 >= 0 and p2 >= 0:
            # Both '[' and ']' found, so shorthand has been given
            k_new = k[:p1]
            shorthand = k[p1+1:p2]
            given_shorthands[k_new] = shorthand
            arg_dict[k_new] = arg_dict[k]
            del arg_dict[k]

    # Penultimate pass: sugar. Allow some special shortcuts in arg naming,
    # eg treat "env" the same as "env_name". This is super specific
    # to Spinning Up implementations, and may be hard to maintain.
    # These special shortcuts are described by SUBSTITUTIONS.
    for special_name, true_name in SUBSTITUTIONS.items():
        if special_name in arg_dict:
            # swap it in arg dict
            arg_dict[true_name] = arg_dict[special_name]
            del arg_dict[special_name]

        if special_name in given_shorthands:
            # point the shortcut to the right name
            given_shorthands[true_name] = given_shorthands[special_name]
            del given_shorthands[special_name]

    # Final pass: check for the special args that go to the 'run' command
    # for an experiment grid, separate them from the arg dict, and make sure
    # that they have unique values. The special args are given by RUN_KEYS.
    run_kwargs = dict()
    for k in RUN_KEYS:
        if k in arg_dict:
            val = arg_dict[k]
            assert len(val) == 1, \
                friendly_err("You can only provide one value for %s."%k)
            run_kwargs[k] = val[0]
            del arg_dict[k]

    # Determine experiment name. If not given by user, will be determined
    # by the algorithm name.
    if 'exp_name' in arg_dict:
        assert len(arg_dict['exp_name']) == 1, \
            friendly_err("You can only provide one value for exp_name.")
        exp_name = arg_dict['exp_name'][0]
        del arg_dict['exp_name']
    else:
        exp_name = 'cmd_' + cmd

    # Make sure that if num_cpu > 1, the algorithm being used is compatible
    # with MPI.
    if 'num_cpu' in run_kwargs and not(run_kwargs['num_cpu'] == 1):
        assert cmd in add_with_backends(MPI_COMPATIBLE_ALGOS), \
            friendly_err("This algorithm can't be run with num_cpu > 1.")

    # Special handling for environment: make sure that env_name is a real,
    # registered gym environment.
    valid_envs = [e.id for e in list(gym.envs.registry.all())]
    assert 'env_name' in arg_dict, \
        friendly_err("You did not give a value for --env_name! Add one and try again.")
    for env_name in arg_dict['env_name']:
        err_msg = dedent("""

            %s is not registered with Gym.

            Recommendations:

                * Check for a typo (did you include the version tag?)

                * View the complete list of valid Gym environments at

                    https://gym.openai.com/envs/

            """%env_name)
        assert env_name in valid_envs, err_msg


    # Construct and execute the experiment grid.
    eg = ExperimentGrid(name=exp_name)
    for k,v in arg_dict.items():
        eg.add(k, v, shorthand=given_shorthands.get(k))
    eg.run(algo, **run_kwargs)
Exemplo n.º 4
0
 import argparse
 parser = argparse.ArgumentParser()
 parser.add_argument('--cpu', type=int, default=4)
 parser.add_argument('--num_runs', type=int, default=1)
 parser.add_argument('--psp_type',
                     type=str,
                     default='Rand',
                     help='Rand, Ones, Binary, Proposed, Sanity')
 parser.add_argument('--hidden_sizes',
                     type=int,
                     nargs='+',
                     default=(160, 160, 160, 160, 160, 160))
 args = parser.parse_args()
 hidden_sizes_name = '_'.join([str(num) for num in args.hidden_sizes])
 #eg = ExperimentGrid(name='superpos_sac-MT10_with_bias_%s_context_q_%s' % (args.psp_type, hidden_sizes_name))
 eg = ExperimentGrid(name='TIMETEST')
 eg.add('env_name', 'MT10Helper-v0', '', True)
 eg.add('num_tasks', 10)
 eg.add('batch_size', 128)  # This is per task, so real is 128 x 10
 eg.add('psp_type', args.psp_type)
 eg.add('seed', [10 * i for i in range(args.num_runs)])
 eg.add('epochs', 1000)
 eg.add('steps_per_epoch', TASK_HORIZON * PATHS_PER_TASK * NUM_TASKS)
 eg.add('update_after', TASK_HORIZON * NUM_TASKS * PATHS_PER_TASK)
 eg.add('lr', [3e-4])
 eg.add('start_steps', TASK_HORIZON * PATHS_PER_TASK * NUM_TASKS)
 #eg.add('update_every', NUM_TASKS * )
 eg.add('num_test_episodes', 10 * NUM_TASKS)
 eg.add('ac_kwargs:hidden_sizes', [tuple(args.hidden_sizes)], 'hid')
 eg.add('ac_kwargs:activation', [torch.nn.ReLU], '')
 eg.run(psp_sac_pytorch, num_cpu=args.cpu)
Exemplo n.º 5
0
            logger.log_tabular('Time', time.time() - start_time)
            logger.dump_tabular()


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, default='HalfCheetah-v2')
    parser.add_argument('--h', type=int, default=300)
    parser.add_argument('--l', type=int, default=1)
    parser.add_argument('--num_runs', '-n', type=int, default=3)
    parser.add_argument('--steps_per_epoch', '-s', type=int, default=5000)
    parser.add_argument('--total_steps', '-t', type=int, default=int(5e4))
    args = parser.parse_args()

    def td3_with_actor_critic(**kwargs):
        td3(ac_kwargs=dict(hidden_sizes=[args.h] * args.l),
            start_steps=5000,
            max_ep_len=150,
            batch_size=64,
            polyak=0.95,
            **kwargs)

    eg = ExperimentGrid(name='ex2-3_td3')
    eg.add('replay_size', int(args.total_steps))
    eg.add('env_name', args.env, '', True)
    eg.add('seed', [10 * i for i in range(args.num_runs)])
    eg.add('epochs', int(args.total_steps / args.steps_per_epoch))
    eg.add('steps_per_epoch', args.steps_per_epoch)
    eg.add('remove_action_clip', [False, True])
    eg.run(td3_with_actor_critic, datestamp=True)
from spinup.algos.ddpg.ddpg import ddpg
from spinup.utils.run_utils import ExperimentGrid

env_fn = lambda: gym.make('Pendulum-v0')

network_kwargs = dict(hidden_sizes=[400, 300], activation=tf.nn.relu)
logger_kwargs = dict(output_dir='logging/NAF', exp_name='naf - tests')

steps_per_epoch = 1000
epochs = 100
start_steps = 50
algorithm = 'naf'

if __name__ == '__main__':

    eg = ExperimentGrid(name='ddpg-bench-long')
    eg.add('env_name', 'Pendulum-v0', '', True)
    eg.add('seed', [10 * i for i in range(4)])
    eg.add('epochs', 20)
    eg.add('steps_per_epoch', 1000)
    eg.add('ac_kwargs:hidden_sizes', [(100, 100), (400, 300)], 'hid')
    eg.add('ac_kwargs:activation', [tf.nn.relu], '')
    eg.run(ddpg, num_cpu=4, data_dir='logging/DDPG')
#
# # agent = naf(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=100, epochs=25, logger_kwargs=logger_kwargs)
#
# # agent = spinup.ddpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=500, epochs=250, logger_kwargs=logger_kwargs,
# #                     start_steps=start_steps)

# tf.reset_default_graph()
# naf(env_fn=env_fn, ac_kwargs=network_kwargs, steps_per_epoch=steps_per_epoch, epochs=epochs, logger_kwargs=logger_kwargs,
Exemplo n.º 7
0
# LunarLanderContinuous-v2
# MontezumaRevenge-ram-v0
# Enduro-ram-v0
# MsPacman-ram-v0
# Ant-v2
# HumanoidStandup-v2

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--cpu', type=int, default=4)
    parser.add_argument('--num_runs', type=int, default=10)
    # parser.add_argument('--clip_ratio', type=int, )
    args = parser.parse_args()

    MontezumaRevenge = ExperimentGrid(name='vpg-nueve-singular')
    MontezumaRevenge.add('env_name', 'MontezumaRevenge-ram-v0', '', True)
    # eg.add('clip_ratio', [0.1,0.2])
    MontezumaRevenge.add('seed', [10 * i for i in range(args.num_runs)])
    MontezumaRevenge.add('epochs', 10)
    MontezumaRevenge.add('steps_per_epoch', [4000, 100])
    MontezumaRevenge.add('optimizer', [
        'GradientDescentOptimizer', 'MomentumOptimizer',
        'ProximalAdagradOptimizer', 'ProximalGradientDescentOptimizer',
        'RMSPropOptimizer', 'AdaMaxOptimizer', 'AdamGSOptimizer',
        'AdamWOptimizer', 'AddSignOptimizer', 'GGTOptimizer', 'LARSOptimizer',
        'LazyAdamGSOptimizer', 'LazyAdamOptimizer', 'MomentumWOptimizer',
        'NadamOptimizer', 'PowerSignOptimizer', 'RegAdagradOptimizer',
        'ShampooOptimizer'
    ])
    MontezumaRevenge.add('ac_kwargs:hidden_sizes', [(32, ), (64, 64)], 'hid')
Exemplo n.º 8
0
# LunarLanderContinuous-v2
# MontezumaRevenge-ram-v0
# Enduro-ram-v0
# MsPacman-ram-v0
# Ant-v2
# HumanoidStandup-v2

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--cpu', type=int, default=4)
    parser.add_argument('--num_runs', type=int, default=10)
    # parser.add_argument('--clip_ratio', type=int, )
    args = parser.parse_args()

    LunarLanderContinuous = ExperimentGrid(name='vpg-ocho')
    LunarLanderContinuous.add('env_name', 'LunarLanderContinuous-v2', '', True)
    # eg.add('clip_ratio', [0.1,0.2])
    LunarLanderContinuous.add('seed', [10 * i for i in range(args.num_runs)])
    LunarLanderContinuous.add('epochs', 10)
    LunarLanderContinuous.add('steps_per_epoch', [4000, 100])
    LunarLanderContinuous.add('optimizer', [
        'NadamOptimizer', 'PowerSignOptimizer', 'RegAdagradOptimizer',
        'ShampooOptimizer'
    ])
    LunarLanderContinuous.add('ac_kwargs:hidden_sizes', [(32, ), (64, 64)],
                              'hid')
    LunarLanderContinuous.add('ac_kwargs:activation', [
        tf.nn.relu, tf.nn.relu6, tf.nn.crelu, tf.nn.elu, tf.nn.selu,
        tf.nn.softplus, tf.nn.softsign, tf.sigmoid, tf.tanh
    ], '')
from spinup.utils.run_utils import ExperimentGrid
from spinup import ppo
import torch

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--cpu', type=int, default=4)
    parser.add_argument('--num_runs', type=int, default=3)
    args = parser.parse_args()

    eg = ExperimentGrid(name='ppo-bench')
    eg.add('env_name', 'CartPole-v0', '', True)
    eg.add('seed', [10 * i for i in range(args.num_runs)])
    eg.add('epochs', 10)
    eg.add('steps_per_epoch', 4000)
    eg.add('ac_kwargs:hidden_sizes', [(32, ), (64, 64)], 'hid')
    eg.add('ac_kwargs:activation', [torch.tanh, torch.relu], '')
    eg.run(ppo, num_cpu=args.cpu)
Exemplo n.º 10
0
from spinup.utils.run_utils import ExperimentGrid
from spinup import fgym_trunk_sac_discrete_v2
import torch

if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--cpu', type=int, default=1)
    parser.add_argument('--num_runs', type=int, default=2)
    args = parser.parse_args()

    eg = ExperimentGrid(name='sac-pyt')
    from src.Financial_gym.financial_env.fgym import *

    env = lambda: Assembly_Fin_for_pic(
        data_path='src/Financial_gym/data/pic/',  # 文件存放路径
        game=continus_Daily_Fin_Futures_holding_reward_pic,
        seed=123,
        windows=50,
        init_capital=1000000,
        show_statistics=True,
        drawdown=0.1)

    eg.add('environment', env)
    eg.add('show_kwargs_json', True)
    eg.add('env_name', 'Financial_gym_pic_daily', '', True)

    eg.add('seed', [10 * i for i in range(args.num_runs)])
    eg.add('epochs', 400)
    eg.add('save_freq', 5)  # epoch save frequeece
from spinup.utils.run_utils import ExperimentGrid
from spinup import ppo_pytorch
import torch
from gym_match_input_continuous.experiments import utils

experiment_name = os.path.basename(__file__)[:-3]
notes = """
Garbage into RL treasure out
"""

env_config = dict(env_name='match-input-continuous-v0', )

net_config = dict(hidden_units=(32, 32), activation=torch.nn.Tanh)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
# eg.add('gamma', 0.999)  # Lower gamma so seconds of effective horizon remains at 10s with current physics steps = 12 * 1/60s * 1 / (1-gamma)
eg.add('epochs', 1000)
eg.add('steps_per_epoch', 500)
eg.add('try_rollouts', 2)
eg.add('shift_advs_pct', 90)
eg.add('take_worst_rollout', True)
eg.add('steps_per_try_rollout', 1)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
eg.add('notes', notes, '')
eg.add('run_filename', os.path.realpath(__file__), '')
eg.add('env_config', env_config, '')

Exemplo n.º 12
0
# Import packages and environment
import numpy as np
from spinup.utils.run_utils import ExperimentGrid
from spinup import soc_pytorch
# from spinup import sac_pytorch
# from spinup import ppo_pytorch
import torch as th

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_runs', type=int, default=3)
    args = parser.parse_args()

    eg = ExperimentGrid(name='hac-hopper-soc')
    # eg.add('env_name', 'Walker2DBulletEnv-v0', '', True)
    eg.add('env_name', 'HopperBulletEnv-v0', '', True)

    eg.add('seed', [10 * i for i in range(args.num_runs)])
    eg.add('epochs', 250)
    eg.add('N_options', [2])  # 2,3
    eg.add('ac_kwargs:hidden_sizes', [[128, 256, 128]], 'hid')
    eg.add('alpha', [0.1, 0.2])
    eg.add('c', [0.2])  # 0.1,0.2,0.3 og evt. 0.0
    eg.run(soc_pytorch)
Exemplo n.º 13
0
            if args.save_model_interval > 0 and (i_iter+1) % args.save_model_interval == 0:
                to_device(torch.device('cpu'), policy_net, value_net)
                pickle.dump((policy_net, value_net, running_state),
                            open(os.path.join(assets_dir(), 'learned_models/{}_ppo.p'.format(args.env_name)), 'wb'))
                to_device(device, policy_net, value_net)

        #     """clean up gpu memory"""
            torch.cuda.empty_cache()
        return agent.evaluate()

    print('a')
    print(config)
    print(args)
    return main_loop(config)

def mock_train(**kwargs):
    config = {
        "lr": kwargs['lr'],
        "gamma": kwargs['gamma']
    }
    print('a')
    print(config)
    print(args)


eg = ExperimentGrid('hopper')
eg.add('lr', [1e-4])
eg.add('gamma', [0.99, 0.95])
eg.run(train)

    expect_normalized_action_deltas=False,
    jerk_penalty_coeff=0.20 / (60*100),
    gforce_penalty_coeff=0.06,
    collision_penalty_coeff=4,
    gforce_threshold=None,
    incent_win=True,
    constrain_controls=False,
    incent_yield_to_oncoming_traffic=True,
)

net_config = dict(
    hidden_units=(256, 256),
    activation=torch.nn.Tanh
)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
# eg.add('seed', 0)
eg.add('resume', '/home/c2/src/tmp/spinningup/data/intersection_2_agents_fine_tune_add_left_yield/intersection_2_agents_fine_tune_add_left_yield_s0_2020_03-23_13-16.15')
eg.add('reinitialize_optimizer_on_resume', False)  # Old optimizer had only 28 inputs despite NN having 29!
eg.add('num_inputs_to_add', 0)
eg.add('pi_lr', 3e-6)
eg.add('vf_lr', 1e-5)
eg.add('boost_explore', 5)
eg.add('epochs', 8000)
eg.add('steps_per_epoch', 32000)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
eg.add('notes', notes, '')
eg.add('run_filename', os.path.realpath(__file__), '')
eg.add('env_config', env_config, '')
Exemplo n.º 15
0
from spinup.utils.run_utils import ExperimentGrid
from spinup import ppo_pytorch

import torch

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--cpu', type=int, default=4)
    parser.add_argument('--num_runs', type=int, default=3)
    args = parser.parse_args()

    eg = ExperimentGrid(name='ppo-test-lunar')

    #eg.add('env_name', 'MountainCar-v0', '', True)
    #eg.add('env_name', 'CartPole-v0', '', True)
    #eg.add('env_name', 'gym_multiagent_control:foo-v0', '', True)
    eg.add('env_name', 'gym_multiagent_control:foo-v2', '', True)
    eg.add('seed', [10 * i for i in range(args.num_runs)])
    eg.add('epochs', 10)
    eg.add('steps_per_epoch', 4000)
    eg.add('ac_kwargs:hidden_sizes', [(32, ), (64, 64)], 'hid')
    eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '')
    eg.run(ppo_pytorch, num_cpu=args.cpu)
Exemplo n.º 16
0
        eg.add('epochs', 10)
        eg.add('steps_per_epoch', 5000)

        # Use default hidden sizes in actor_critic function, comment below out
        eg.add('ac_kwargs:hidden_sizes', [(32,)], 'hid')
        eg.add('ac_kwargs:activation', [tf.nn.relu], '')
        
        eg.run(algo[i], num_cpu=args.cpu)
'''



#Training
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--cpu', type=int, default=1)
    parser.add_argument('--num_runs', type=int, default=1)
    args = parser.parse_args()    

    eg = ExperimentGrid(name='ex4_trpo_30ep')
    eg.add('env_name', 'Acrobot-v1', '', True)
    eg.add('seed', [10*i for i in range(args.num_runs)])
    eg.add('epochs', 30)
    #eg.add('steps_per_epoch', 4000)
    eg.add('max_ep_len', 1500)
    eg.add('ac_kwargs:activation', [tf.nn.relu], '')
    eg.add('ac_kwargs:hidden_sizes', [(16,),(16,16),(8,),(8,8),(4,),(4,4)], 'hid')
    eg.run(trpo, num_cpu=args.cpu)

Exemplo n.º 17
0
env_config = dict(
    env_name='deepdrive-2d-intersection-w-gs-allow-decel-v0',
    is_intersection_map=True,
    expect_normalized_action_deltas=False,
    jerk_penalty_coeff=1.5 * 0.20 / (60 * 100),
    gforce_penalty_coeff=1.5 * 0.06,
    collision_penalty_coeff=1.5,
    end_on_harmful_gs=False,
    incent_win=True,
    constrain_controls=False,
)

net_config = dict(hidden_units=(256, 256), activation=torch.nn.Tanh)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
# eg.add('seed', 0)
eg.add(
    'resume',
    '/home/c2/src/tmp/spinningup/data/intersection_2_agents_fine_tune_collision_resume_add_comfort1/intersection_2_agents_fine_tune_collision_resume_add_comfort1_s0_2020_03-13_21-45.39'
)
eg.add('reinitialize_optimizer_on_resume', True)
eg.add(
    'pi_lr', 3e-6
)  # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates
eg.add(
    'vf_lr', 1e-5
)  # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates
eg.add('epochs', 8000)
eg.add('steps_per_epoch', 16000)
Exemplo n.º 18
0
    parser.add_argument('--cpu', type=str, default='auto')
    parser.add_argument('--num_runs', type=int, default=3)
    args = parser.parse_args()

    x = 10
    layers = []
    layers_itr = []
    for i in range(x):
        layers.append(64)
        layers_itr.append(list(layers))
    layers = []
    for i in range(x):
        layers.append(128)
        layers_itr.append(list(layers))
    layers = []
    for i in range(x):
        layers.append(256)
        layers_itr.append(list(layers))
    layers = []
    for i in range(x):
        layers.append(512)
        layers_itr.append(list(layers))

    eg = ExperimentGrid(name='td3-bench')
    eg.add('env_name', 'MountainCarContinuous-v0', '', True)
    eg.add('seed', [10 * i for i in range(args.num_runs)])
    eg.add('epochs', 10)
    eg.add('steps_per_epoch', 4000)
    eg.add('ac_kwargs:hidden_sizes', layers_itr, 'hid')
    eg.add('ac_kwargs:activation', [tf.nn.relu], '')
    eg.run(td3, num_cpu=args.cpu)
Exemplo n.º 19
0
def train():
    eg = ExperimentGrid(name=experiment_name)
    eg.add('env_name', env_config['env_name'], '', False)
    # eg.add('seed', 0)
    eg.add('epochs', 8000)
    eg.add('gamma', 0.95)
    eg.add('lam', 0.835)
    # eg.add('steps_per_epoch', 4000)
    eg.add('ac_kwargs:hidden_sizes', (256, 256), 'hid')
    eg.add('ac_kwargs:activation', torch.nn.Tanh, '')
    eg.add('notes', notes, '')
    eg.add('run_filename', os.path.realpath(__file__), '')
    eg.add('env_config', env_config, '')
    eg.run(ppo_pytorch)
Exemplo n.º 20
0
from spinup import vpg_pytorch
from spinup.utils.run_utils import ExperimentGrid
import torch

if __name__ == '__main__':
    grid = ExperimentGrid(name='vpg-torch-cart-bench')
    grid.add('env_name', 'CartPole-v0')
    grid.add('seed', [0])
    grid.add('epochs', 2)
    grid.add('steps_per_epoch', 100)
    grid.add('gamma', [0, 0.5, 1])
    grid.add('ac_kwargs:hidden_sizes', [(32, ), (64, 64)], 'hid')
    grid.add('ac_kwargs:activation', [torch.nn.Tanh], '')

    grid.run(vpg_pytorch, num_cpu=4)
env_config = dict(
    env_name='deepdrive-2d-intersection-w-gs-allow-decel-v0',
    is_intersection_map=True,
    expect_normalized_action_deltas=False,
    jerk_penalty_coeff=0.20 / (60 * 100),
    gforce_penalty_coeff=0.06,
    collision_penalty_coeff=4,
    end_on_harmful_gs=False,
    incent_win=True,
    constrain_controls=False,
    incent_yield_to_oncoming_traffic=True,
)

net_config = dict(hidden_units=(256, 256), activation=torch.nn.Tanh)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
# eg.add('seed', 0)
eg.add(
    'resume',
    '/home/c2/src/tmp/spinningup/data/intersection_2_agents_fine_tune_add_left_yield2/intersection_2_agents_fine_tune_add_left_yield2_s0_2020_03-23_22-40.11'
)
eg.add('reinitialize_optimizer_on_resume', True)
eg.add('num_inputs_to_add', 0)
eg.add('pi_lr', 3e-6)
eg.add('vf_lr', 1e-5)
# eg.add('boost_explore', 5)
eg.add('epochs', 8000)
eg.add('steps_per_epoch', 32000)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
Exemplo n.º 22
0
        eg.add('steps_per_epoch', 5000)

        # Use default hidden sizes in actor_critic function, comment below out
        eg.add('ac_kwargs:hidden_sizes', [(16,16)], 'hid')
        eg.add('ac_kwargs:activation', [tf.nn.relu], '')
        
        eg.run(algo[i], num_cpu=args.cpu)
'''



#Training
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--cpu', type=int, default=1)
    parser.add_argument('--num_runs', type=int, default=1)
    args = parser.parse_args()    

    eg = ExperimentGrid(name=ex_number+'_ddpg_100ep')
    eg.add('env_name', env_name, '', True)
    eg.add('seed', [10*i for i in range(args.num_runs)])
    eg.add('epochs', 100)
    #eg.add('steps_per_epoch', 4000)
    eg.add('max_ep_len', 1500)
    eg.add('ac_kwargs:activation', [tf.nn.relu], '')
    eg.add('ac_kwargs:hidden_sizes', [(64,64)], 'hid')
    eg.run(ddpg, num_cpu=args.cpu)

 
Exemplo n.º 23
0
env_config = dict(
    env_name='deepdrive-2d-intersection-w-gs-allow-decel-v0',
    is_intersection_map=True,
    expect_normalized_action_deltas=False,
    jerk_penalty_coeff=0,
    gforce_penalty_coeff=0,
    collision_penalty_coeff=1,
    end_on_harmful_gs=False,
    incent_win=True,
    constrain_controls=False,
)

net_config = dict(hidden_units=(256, 256), activation=torch.nn.Tanh)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
# eg.add('seed', 0)
eg.add(
    'resume',
    '/home/c2/src/tmp/spinningup/data/deepdrive-2d-intersection-no-constrained-controls-example/deepdrive-2d-intersection-no-constrained-controls-example_s0_2020_03-12_18-09.49'
)
eg.add('reinitialize_optimizer_on_resume', True)
eg.add(
    'pi_lr', 3e-5
)  # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates
eg.add(
    'vf_lr', 1e-4
)  # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates
eg.add('epochs', 8000)
# eg.add('steps_per_epoch', 4000)
Exemplo n.º 24
0
    lane_margin=0.2,

    # https://iopscience.iop.org/article/10.1088/0143-0807/37/6/065008/pdf
    # Importantly they depict the threshold
    # for admissible acceleration onset or jerk as j = 15g/s or ~150m/s^3.
    jerk_threshold=150.0,  # 15g/s
    incent_win=True,
    constrain_controls=False,
    incent_yield_to_oncoming_traffic=True,
    physics_steps_per_observation=12,
    discrete_actions=COMFORTABLE_ACTIONS2,
)

net_config = dict(hidden_units=(256, 256), activation=torch.nn.Tanh)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
# eg.add('seed', 0)
eg.add(
    'resume',
    '/home/c2/src/tmp/spinningup/data/intersection_discrete_micro_turn_lower_lane_pen2_diag_lane19/intersection_discrete_micro_turn_lower_lane_pen2_diag_lane19_s0_2020_05-15_18-26.32.741210'
)
# eg.add('reinitialize_optimizer_on_resume', True)
# eg.add('num_inputs_to_add', 0)
eg.add('pi_lr', 3e-4)  # default pi_lr=3e-4
eg.add('vf_lr', 1e-3)  # default vf_lr=1e-3,
# eg.add('boost_explore', 5)
eg.add('epochs', 20000)
eg.add('steps_per_epoch', 8000)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
Exemplo n.º 25
0
def run_experiment(args):
    def env_fn():
        import HumanoidRL
        return gym.make(args.env_name)

    eg = ExperimentGrid(name=args.exp_name)
    eg.add('env_fn', env_fn)
    eg.add('seed', [10 * i for i in range(args.num_runs)])
    eg.add('epochs', 500)
    eg.add('steps_per_epoch', 10000)
    eg.add('save_freq', 20)
    eg.add('max_ep_len', 200)
    eg.add('ac_kwargs:activation', tf.tanh, '')
    eg.run(ppo_tf1)
    # Importantly they depict the threshold
    # for admissible acceleration onset or jerk as j = 15g/s or ~150m/s^3.
    jerk_threshold=150.0,  # 15g/s
    incent_win=True,
    constrain_controls=False,
    incent_yield_to_oncoming_traffic=True,
    physics_steps_per_observation=12,
    discrete_actions=COMFORTABLE_ACTIONS2,
)

net_config = dict(
    hidden_units=(256, 256),
    activation=torch.nn.Tanh
)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
# eg.add('seed', 0)
eg.add('resume', '/home/c2/src/tmp/spinningup/data/intersection_discrete_micro_turn_lower_lane_pen2_diag_lane17_2/intersection_discrete_micro_turn_lower_lane_pen2_diag_lane17_2_s0_2020_05-13_14-46.12.823730_snapshot2')
# eg.add('reinitialize_optimizer_on_resume', True)
# eg.add('num_inputs_to_add', 0)
# eg.add('pi_lr', 3e-6)
# eg.add('vf_lr', 1e-5)
# eg.add('boost_explore', 5)
eg.add('epochs', 20000)
eg.add('steps_per_epoch', 4000)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
eg.add('notes', notes, '')
eg.add('run_filename', os.path.realpath(__file__), '')
eg.add('env_config', env_config, '')
Exemplo n.º 27
0
    # Importantly they depict the threshold
    # for admissible acceleration onset or jerk as j = 15g/s or ~150m/s^3.
    jerk_threshold=150.0,  # 15g/s
    incent_win=True,
    constrain_controls=False,
    incent_yield_to_oncoming_traffic=True,
    physics_steps_per_observation=12,
    discrete_actions=COMFORTABLE_ACTIONS2,
)

net_config = dict(
    hidden_units=(256, 256),
    activation=torch.nn.Tanh
)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
# eg.add('seed', 0)
# eg.add('resume', '/home/c2/src/tmp/spinningup/data/intersection_2_agents_fine_tune_add_left_yield2/intersection_2_agents_fine_tune_add_left_yield2_s0_2020_03-23_22-40.11')
# eg.add('reinitialize_optimizer_on_resume', True)
# eg.add('num_inputs_to_add', 0)
# eg.add('pi_lr', 3e-6)
# eg.add('vf_lr', 1e-5)
# eg.add('boost_explore', 5)
eg.add('epochs', 20000)
eg.add('steps_per_epoch', 4000)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
eg.add('notes', notes, '')
eg.add('run_filename', os.path.realpath(__file__), '')
eg.add('env_config', env_config, '')
Exemplo n.º 28
0
def train():
    eg = ExperimentGrid(name=experiment_name)
    eg.add('env_name', env_config['env_name'], '', False)
    # eg.add('seed', 0)
    eg.add(
        'resume',
        '/home/c2/src/tmp/spinningup/data/intersection_2_agents_lower_gamma_snapshot/intersection_2_agents_lower_gamma_s0_2020_03-12_12-07.37'
    )
    eg.add('reinitialize_optimizer_on_resume', False)
    eg.add(
        'pi_lr', 3e-5
    )  # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates
    eg.add(
        'vf_lr', 1e-4
    )  # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates
    eg.add('epochs', 8000)
    eg.add('gamma', 0.95)
    eg.add('lam', 0.835)
    # eg.add('steps_per_epoch', 4000)
    eg.add('ac_kwargs:hidden_sizes', (256, 256), 'hid')
    eg.add('ac_kwargs:activation', torch.nn.Tanh, '')
    eg.add('notes', notes, '')
    eg.add('run_filename', os.path.realpath(__file__), '')
    eg.add('env_config', env_config, '')
    eg.run(ppo_pytorch)
    jerk_penalty_coeff=3.3e-5,
    gforce_penalty_coeff=0.006 * 5,
    collision_penalty_coeff=4,
    lane_penalty_coeff=0.02,
    speed_reward_coeff=0.50,
    gforce_threshold=None,
    incent_win=True,
    constrain_controls=False,
    incent_yield_to_oncoming_traffic=True,
    physics_steps_per_observation=12,
    discrete_actions=COMFORTABLE_ACTIONS,
)

net_config = dict(hidden_units=(64, 64), activation=torch.nn.Tanh)

eg = ExperimentGrid(name=experiment_name)
eg.add('env_name', env_config['env_name'], '', False)
pso = env_config['physics_steps_per_observation']
effective_horizon_seconds = 10
eg.add(
    'gamma', 1 - pso / (effective_horizon_seconds * FPS)
)  # Lower gamma so seconds of effective horizon remains at 10s with current physics steps = 12 * 1/60s * 1 / (1-gamma)
eg.add('epochs', 10000)
eg.add('steps_per_epoch', 8000)
eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid')
eg.add('ac_kwargs:activation', net_config['activation'], '')
eg.add('notes', notes, '')
eg.add('run_filename', os.path.realpath(__file__), '')
eg.add('env_config', env_config, '')

Exemplo n.º 30
0
        remainder = setting_number
        for setting in settings:
            division = int(total / len(setting))
            index = int(remainder / division)
            remainder = remainder % division
            indexes.append(index)
            total = division
        actual_setting = {}
        for j in range(len(indexes)):
            actual_setting[setting_names[j]] = settings[j][indexes[j]]
        return indexes, actual_setting

    indexes, actual_setting = get_setting(args.setting, total, settings,
                                          setting_names)

    eg = ExperimentGrid(name=EXPERIMENT_NAME)
    # use eg.add to add parameters in the settings or add parameters tha apply to all jobs
    # we now automated this part, as long as you added settings correctly into the arrays at the start of this program
    # they should be added to experiment automatically
    for i in range(len(actual_setting)):
        setting_name = setting_names[i]
        if setting_name != 'env_name' and setting_name != 'seed':
            eg.add(setting_name, actual_setting[setting_name],
                   setting_savename_prefix[i], whether_add_to_savename[i])

    eg.add('env_name', actual_setting['env_name'], '', True)
    eg.add('seed', actual_setting['seed'])

    eg.run(function_to_run, num_cpu=args.cpu, data_dir=save_data_dir)

    print(