def main(): parser = base_config.get_base_config() parser = ecco_config.get_ecco_config(parser) parser = dqn_transfer_config.get_dqn_transfer_config(parser) args = base_config.make_parser(parser) if args.write_log: logger.set_file_handler(path=args.output_dir, prefix='ecco_ecco' + args.task, time_str=args.exp_id) print('DQN_TRANSFER_MAIN.PY is Deprecated, do not use') print('Training starts at {}'.format(init_path.get_abs_base_dir())) from trainer import dqn_transfer_trainer from runners import dqn_transfer_task_sampler from runners.workers import dqn_transfer_worker from policy import ecco_pretrain from policy import dqn_base, a2c_base from policy import ecco_transfer base_model = {'dqn': dqn_base, 'a2c': a2c_base}[args.base_policy] models = { 'final': ecco_pretrain.model, 'transfer': ecco_transfer.model, 'base': base_model.model } pretrain_weights = None train(dqn_transfer_trainer.trainer, dqn_transfer_task_sampler, dqn_transfer_worker, models, args, pretrain_weights)
def main(): parser = base_config.get_base_config() params = base_config.make_parser(parser) dir = osp.join('../log/baseline_' + params.task, params.output_dir) dir = get_dir(dir) if not osp.exists(dir): os.makedirs(dir) if params.write_log: logger.set_file_handler(dir, time_str=params.exp_id) argparse_dict = vars(params) import json with open(osp.join(dir, 'args.json'), 'w') as f: json.dump(argparse_dict, f) print('Training starts at {}'.format(init_path.get_abs_base_dir())) if params.separate_train: train(trainer.Trainer, ppo_runner, base_worker, sparse_ppo_policy.SparsePPOPolicy, ppo_policy.PPOPolicy, params) else: train(trainer.Trainer, ppo_runner, base_worker, consolidated_ppo_policy.ConsolidatedPPOPolicy, ppo_policy.PPOPolicy, params)
def main(): parser = base_config.get_base_config() parser = ecco_config.get_ecco_config(parser) args = base_config.make_parser(parser) if args.write_log: logger.set_file_handler(path=args.output_dir, prefix='ecco_ecco' + args.task, time_str=args.exp_id) print('Training starts at {}'.format(init_path.get_abs_base_dir())) from trainer import ecco_trainer from runners import task_sampler from runners.workers import base_worker from policy import ecco_pretrain train(ecco_trainer.trainer, task_sampler, base_worker, ecco_pretrain.model, args)
def main(): parser = base_config.get_base_config() parser = ecco_config.get_ecco_config(parser) parser = dqn_transfer_config.get_dqn_transfer_config(parser) args = base_config.make_parser(parser) if args.write_log: logger.set_file_handler(path=args.output_dir, prefix='ecco_ecco' + args.task, time_str=args.exp_id) from trainer import dqn_transfer_trainer, dqn_transfer_jwt from runners import dqn_transfer_task_sampler from runners.workers import dqn_transfer_worker from policy import ecco_pretrain from policy import dqn_base, a2c_base from policy import ecco_transfer base_model = { 'dqn': dqn_base, 'a2c':a2c_base }[args.base_policy] models = {'final': ecco_pretrain.model, 'transfer': ecco_transfer.model, 'base': base_model.model} from env.env_utils import load_environments if args.load_environments is not None: environments_cache = load_environments( args.load_environments, args.num_cache, args.task, args.episode_length, args.seed ) else: environments_cache = None train(dqn_transfer_trainer.trainer, dqn_transfer_task_sampler, dqn_transfer_worker, models, args, {'pretrain_fnc':pretrain, 'pretrain_thread': dqn_transfer_jwt}, environments_cache)
) rollout_agent.set_policy_weights(starting_weights) return rollout_agent if __name__ == '__main__': # get the configuration logger.info('New environments available : {}'.format( register.get_name_list())) args = get_config() # args.use_nervenet = 0 if args.write_log: logger.set_file_handler( path=args.output_dir, prefix='mujoco_' + args.task, time_str=args.time_id ) if args.task in dm_control_util.DM_ENV_INFO: args.dm = 1 # optional visdom plotting if args.viz: viz_item = ['avg_reward', 'entropy', 'kl', 'surr_loss', 'vf_loss', 'weight_l2_loss', 'learning_rate'] viz_win = {} for item in viz_item: viz_win[item] = None if not args.dm: args.max_pathlength = gym.spec(args.task).timestep_limit
import __init_path from util import logger if __name__ == '__main__': logger.set_file_handler() logger.info('it is a test') logger.debug('it is a test') logger.warning('it is a test') logger.error('it is a test')
import argparse import os if __name__ == '__main__': # the parser parser = argparse.ArgumentParser() parser.add_argument('--gpu', default=0) parser.add_argument('--restore', help='the path of model to restore', default=None) parser.add_argument('--dcgan', default=False) args = parser.parse_args() # init the logger, just save the network ---------------------------------- if not args.dcgan: logger.set_file_handler(prefix='TIGAN_') gan_net = TI_GAN(config) logger.info('Training TIGAN') else: logger.set_file_handler(prefix='DCGAN_') gan_net = DC_GAN(config) logger.info('Training DCGAN') # build the network and data loader --------------------------------------- sess = tf.Session() # tf.device('/gpu:' + str(args.gpu)) logger.info('Session starts, using gpu: {}'.format(str(args.gpu))) gan_net.build_models() gan_net.init_training(sess, args.restore)
parser.add_argument('--restore', help='the path of model to restore', default=None) parser.add_argument('--env_name', help='the game to play, add the deterministic flag', default='Breakout-v0') parser.add_argument('--debug', help='the game to play, add the deterministic flag', default=False) args = parser.parse_args() args.debug = True config.TRAIN.training_start_episode = 1000 # init the logger, just save the network logger.set_file_handler(prefix='gym_') # if debug, make some changes to the config file if args.debug: config = change_debug_config(config) args.env_name = 'CorridorSmall-v5' # build the network sess = tf.Session() tf.device('/gpu:' + str(args.gpu)) logger.info('Session starts, using gpu: {}'.format(str(args.gpu))) game_agent = dqn_agent.qlearning_agent(sess, config, args.env_name, restore_path=args.restore)