def main():
    parser = base_config.get_base_config()
    parser = ecco_config.get_ecco_config(parser)
    parser = dqn_transfer_config.get_dqn_transfer_config(parser)
    args = base_config.make_parser(parser)

    if args.write_log:
        logger.set_file_handler(path=args.output_dir,
                                prefix='ecco_ecco' + args.task,
                                time_str=args.exp_id)

    print('DQN_TRANSFER_MAIN.PY is Deprecated, do not use')
    print('Training starts at {}'.format(init_path.get_abs_base_dir()))
    from trainer import dqn_transfer_trainer
    from runners import dqn_transfer_task_sampler
    from runners.workers import dqn_transfer_worker
    from policy import ecco_pretrain
    from policy import dqn_base, a2c_base
    from policy import ecco_transfer

    base_model = {'dqn': dqn_base, 'a2c': a2c_base}[args.base_policy]

    models = {
        'final': ecco_pretrain.model,
        'transfer': ecco_transfer.model,
        'base': base_model.model
    }

    pretrain_weights = None

    train(dqn_transfer_trainer.trainer, dqn_transfer_task_sampler,
          dqn_transfer_worker, models, args, pretrain_weights)
示例#2
0
def main():
    parser = base_config.get_base_config()
    params = base_config.make_parser(parser)

    dir = osp.join('../log/baseline_' + params.task, params.output_dir)
    dir = get_dir(dir)
    if not osp.exists(dir):
        os.makedirs(dir)

    if params.write_log:
        logger.set_file_handler(dir, time_str=params.exp_id)

    argparse_dict = vars(params)
    import json
    with open(osp.join(dir, 'args.json'), 'w') as f:
        json.dump(argparse_dict, f)

    print('Training starts at {}'.format(init_path.get_abs_base_dir()))

    if params.separate_train:
        train(trainer.Trainer, ppo_runner, base_worker,
              sparse_ppo_policy.SparsePPOPolicy, ppo_policy.PPOPolicy, params)

    else:
        train(trainer.Trainer, ppo_runner, base_worker,
              consolidated_ppo_policy.ConsolidatedPPOPolicy,
              ppo_policy.PPOPolicy, params)
示例#3
0
def main():
    parser = base_config.get_base_config()
    parser = ecco_config.get_ecco_config(parser)
    args = base_config.make_parser(parser)

    if args.write_log:
        logger.set_file_handler(path=args.output_dir,
                                prefix='ecco_ecco' + args.task,
                                time_str=args.exp_id)

    print('Training starts at {}'.format(init_path.get_abs_base_dir()))
    from trainer import ecco_trainer
    from runners import task_sampler
    from runners.workers import base_worker
    from policy import ecco_pretrain

    train(ecco_trainer.trainer, task_sampler, base_worker, ecco_pretrain.model,
          args)
def main():
    parser = base_config.get_base_config()
    parser = ecco_config.get_ecco_config(parser)
    parser = dqn_transfer_config.get_dqn_transfer_config(parser)
    args = base_config.make_parser(parser)

    if args.write_log:
        logger.set_file_handler(path=args.output_dir,
                                prefix='ecco_ecco' + args.task,
                                time_str=args.exp_id)

    from trainer import dqn_transfer_trainer, dqn_transfer_jwt
    from runners import dqn_transfer_task_sampler
    from runners.workers import dqn_transfer_worker
    from policy import ecco_pretrain
    from policy import dqn_base, a2c_base
    from policy import ecco_transfer
    
    base_model = {
        'dqn': dqn_base, 'a2c':a2c_base        
    }[args.base_policy]
    
    models = {'final': ecco_pretrain.model, 'transfer': ecco_transfer.model,
           'base': base_model.model}

    from env.env_utils import load_environments

    if args.load_environments is not None:
        environments_cache = load_environments(
            args.load_environments, args.num_cache, args.task,
            args.episode_length, args.seed
        )

    else:
        environments_cache = None

    train(dqn_transfer_trainer.trainer, dqn_transfer_task_sampler, 
          dqn_transfer_worker, models, args,
          {'pretrain_fnc':pretrain, 'pretrain_thread': dqn_transfer_jwt},
          environments_cache)
示例#5
0
    )
    rollout_agent.set_policy_weights(starting_weights)
    return rollout_agent


if __name__ == '__main__':

    # get the configuration
    logger.info('New environments available : {}'.format(
        register.get_name_list()))
    args = get_config()
    # args.use_nervenet = 0

    if args.write_log:
        logger.set_file_handler(
            path=args.output_dir,
            prefix='mujoco_' + args.task, time_str=args.time_id
        )

    if args.task in dm_control_util.DM_ENV_INFO:
        args.dm = 1

    # optional visdom plotting
    if args.viz:
        viz_item = ['avg_reward', 'entropy', 'kl', 'surr_loss',
                    'vf_loss', 'weight_l2_loss', 'learning_rate']
        viz_win = {}
        for item in viz_item:
            viz_win[item] = None

    if not args.dm:
        args.max_pathlength = gym.spec(args.task).timestep_limit
import __init_path
from util import logger

if __name__ == '__main__':
    logger.set_file_handler()
    logger.info('it is a test')
    logger.debug('it is a test')
    logger.warning('it is a test')
    logger.error('it is a test')
示例#7
0
import argparse
import os

if __name__ == '__main__':
    # the parser
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', default=0)
    parser.add_argument('--restore', help='the path of model to restore',
                        default=None)
    parser.add_argument('--dcgan', default=False)

    args = parser.parse_args()

    # init the logger, just save the network ----------------------------------
    if not args.dcgan:
        logger.set_file_handler(prefix='TIGAN_')
        gan_net = TI_GAN(config)
        logger.info('Training TIGAN')
    else:
        logger.set_file_handler(prefix='DCGAN_')
        gan_net = DC_GAN(config)
        logger.info('Training DCGAN')

    # build the network and data loader ---------------------------------------
    sess = tf.Session()
    # tf.device('/gpu:' + str(args.gpu))
    logger.info('Session starts, using gpu: {}'.format(str(args.gpu)))

    gan_net.build_models()
    gan_net.init_training(sess, args.restore)
示例#8
0
    parser.add_argument('--restore',
                        help='the path of model to restore',
                        default=None)
    parser.add_argument('--env_name',
                        help='the game to play, add the deterministic flag',
                        default='Breakout-v0')

    parser.add_argument('--debug',
                        help='the game to play, add the deterministic flag',
                        default=False)

    args = parser.parse_args()
    args.debug = True
    config.TRAIN.training_start_episode = 1000
    # init the logger, just save the network
    logger.set_file_handler(prefix='gym_')

    # if debug, make some changes to the config file
    if args.debug:
        config = change_debug_config(config)
        args.env_name = 'CorridorSmall-v5'

    # build the network
    sess = tf.Session()
    tf.device('/gpu:' + str(args.gpu))
    logger.info('Session starts, using gpu: {}'.format(str(args.gpu)))
    game_agent = dqn_agent.qlearning_agent(sess,
                                           config,
                                           args.env_name,
                                           restore_path=args.restore)