示例#1
0
def run_experiment(**config):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + config.get(
        'exp_name', '')
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(config,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)

    env = normalize(config['env'](reset_every_episode=True,
                                  task=config['task']))

    dynamics_model = MLPDynamicsModel(
        name="dyn_model",
        env=env,
        learning_rate=config['learning_rate'],
        hidden_sizes=config['hidden_sizes'],
        valid_split_ratio=config['valid_split_ratio'],
        rolling_average_persitency=config['rolling_average_persitency'],
        hidden_nonlinearity=config['hidden_nonlinearity'],
        batch_size=config['batch_size'],
    )

    policy = MPCController(
        name="policy",
        env=env,
        dynamics_model=dynamics_model,
        discount=config['discount'],
        n_candidates=config['n_candidates'],
        horizon=config['horizon'],
        use_cem=config['use_cem'],
        num_cem_iters=config['num_cem_iters'],
    )

    sampler = Sampler(
        env=env,
        policy=policy,
        num_rollouts=config['num_rollouts'],
        max_path_length=config['max_path_length'],
        n_parallel=config['n_parallel'],
    )

    sample_processor = ModelSampleProcessor(recurrent=False)

    algo = Trainer(
        env=env,
        policy=policy,
        dynamics_model=dynamics_model,
        sampler=sampler,
        dynamics_sample_processor=sample_processor,
        n_itr=config['n_itr'],
        initial_random_samples=config['initial_random_samples'],
        dynamics_model_max_epochs=config['dynamic_model_epochs'],
    )
    algo.train()
示例#2
0
def run_experiment(config):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + config.get('exp_name', '')
    logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last')
    json.dump(config, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder)

    port_policy = 21001
    ports = [19999, 20001, 22001,23001,24001]

    _env = normalize(config['env'](reset_every_episode=True, task=config['task'], port=port_policy))

    dynamics_model = RNNDynamicsModel(
        name="dyn_model",
        env=_env,
        hidden_sizes=config['hidden_sizes'],
        learning_rate=config['learning_rate'],
        backprop_steps=config['backprop_steps'],
        cell_type=config['cell_type'],
        batch_size=config['batch_size'],
    )

    policy = RNNMPCController(
        name="policy",
        env=_env,
        dynamics_model=dynamics_model,
        discount=config['discount'],
        n_candidates=config['n_candidates'],
        horizon=config['horizon'],
        use_cem=config['use_cem'],
        num_cem_iters=config['num_cem_iters'],
    )

    sampler = Sampler(
        env=_env,
        policy=policy,
        num_rollouts=config['num_rollouts'],
        max_path_length=config['max_path_length'],
        n_parallel=config['n_parallel'],
        ports=ports
    )

    sample_processor = ModelSampleProcessor(recurrent=True)

    algo = Trainer(
        env=_env,
        policy=policy,
        dynamics_model=dynamics_model,
        sampler=sampler,
        sample_processor=sample_processor,
        n_itr=config['n_itr'],
        initial_random_samples=config['initial_random_samples'],
        dynamics_model_max_epochs=config['dynamic_model_epochs'],
    )
    algo.train()
示例#3
0
def run_experiment(config):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + config.get(
        'exp_name', '')
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(config,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)

    port_policy = 21001
    ports = [19999, 20001, 22001, 23001, 24001]
    #ports = [25001, 26001]
    #env = normalize(config['env'](reset_every_episode=True, task=config['task'], port=19999))
    #_envs = [normalize(config['env'](reset_every_episode=True, port=pt)) for pt in ports]
    # For the policy & etc
    _env = normalize(config['env'](reset_every_episode=True,
                                   task=config['task'],
                                   port=port_policy))
    dynamics_model = MetaMLPDynamicsModel(
        name="dyn_model",
        env=_env,
        meta_batch_size=config['meta_batch_size'],
        inner_learning_rate=config['inner_learning_rate'],
        learning_rate=config['learning_rate'],
        hidden_sizes=config['hidden_sizes_model'],
        valid_split_ratio=config['valid_split_ratio'],
        rolling_average_persitency=config['rolling_average_persitency'],
        hidden_nonlinearity=config['hidden_nonlinearity_model'],
        batch_size=config['adapt_batch_size'],
    )

    policy = MPCController(
        name="policy",
        env=_env,
        dynamics_model=dynamics_model,
        discount=config['discount'],
        n_candidates=config['n_candidates'],
        horizon=config['horizon'],
        use_cem=config['use_cem'],
        num_cem_iters=config['num_cem_iters'],
    )

    sampler = Sampler(
        env=_env,
        policy=policy,
        n_parallel=config['n_parallel'],
        max_path_length=config['max_path_length'],
        num_rollouts=config['num_rollouts'],
        adapt_batch_size=config[
            'adapt_batch_size'],  # Comment this out and it won't adapt during rollout
        ports=ports)

    sample_processor = ModelSampleProcessor(recurrent=True)

    algo = Trainer(
        env=_env,
        policy=policy,
        dynamics_model=dynamics_model,
        sampler=sampler,
        sample_processor=sample_processor,
        n_itr=config['n_itr'],
        initial_random_samples=config['initial_random_samples'],
        dynamics_model_max_epochs=config['dynamic_model_epochs'],
    )

    algo.train()