def main(): parser = argparse.ArgumentParser( description="HasukoRPG parser version: {}".format(VERSION)) parser.add_argument('-d', '--debug', action='store_true', help="Debug mode") parser.add_argument('-t', '--token', help="Specify the discord bot token") parser.add_argument('-u', '--user', help="Specify the user to the database") parser.add_argument('-p', '--password', help="Specify the password to the database") parser.add_argument('-n', '--db-name', help="Specify the database name") args = parser.parse_args() log_path = "logs/" if not os.path.exists(os.path.dirname(log_path)): os.makedirs(os.path.dirname(log_path)) if args.debug: level = logging.DEBUG else: level = logging.INFO logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=level) runner = Runner(args) runner.run()
def __init__(self, threadId, sess, graph): StoppableThread.__init__(self) self.threadId = threadId self.sess = sess self.graph = graph with self.graph.as_default(): if args.game == "Grid": env = GridEnv() else: env = gym_env(args.game) env = Penalizer(env) proproc = None rewproc = None q_model = CartPoleModel(modelOps) q_model.model_update = model.model q_model.set_weights(model.get_weights()) summary_writer = tf.summary.FileWriter( args.logdir + '/thread-' + str(threadId), K.get_session().graph) if not args.logdir is None else None agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 20 agentOps.REPLAY_START_SIZE = 1 #agentOps.INITIAL_EXPLORATION = 0 agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10 #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32) replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, args.nstep) agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, model_eval=model_eval) # egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = 1 egreedyOps.FINAL_EXPLORATION_FRAME = 5000 egreedyOps.FINAL_EXPLORATION = 0.01 egreedyOps.DECAY = 0.999 egreedyAgent = MultiEGreedyAgent(env.action_space, egreedyOps, agent, [0.4, 0.3, 0.3], [0.1, 0.01, 0.5]) self.runner = Runner(env, egreedyAgent, proproc, modelOps.AGENT_HISTORY_LENGTH) self.runner.listen(replay_buffer, proproc) self.runner.listen(agent, None) self.runner.listen(egreedyAgent, None) self.runner.listen(self, proproc) pass
def main(): # 0. 固定设置 cfg = Config().fromfile('../slcv/cfg/cfg_resnet18_dogcat.py') # 需要写相对路径 # 1. 数据 # transform = data_transform( # train=True, # input_size = cfg.input_size, # mean = cfg.mean, # std = cfg.std) trainset = DogCat( cfg.train_root, transform=None, # 使用dogcatDataset默认transform train=True, test=False) trainloader = DataLoader( trainset, batch_size = cfg.batch_size, shuffle = True, num_workers = 2) # 2. 模型 model = pretrained_models(model_name='resnet18', num_classes=cfg.num_classes) if torch.cuda.device_count() > 0 and len(cfg.gpus) == 1: model = model.cuda() elif torch.cuda.device_count() > 1 and len(cfg.gpus) > 1: # 数据并行模型 model = torch.nn.DataParallel(model, device_ids=cfg.gpus).cuda() optimizer = cfg.optimizer # 3. 训练 runner = Runner(trainloader, model, optimizer, cfg) # cfg对象也先传进去,想挂参数应该是需要的 runner.register_hooks( cfg.optimizer_config, cfg.checkpoint_config, cfg.logger_config ) # 恢复训练 if cfg.resume_from is not None: runner.resume(cfg.resume_from, resume_optimizer=True, map_location='default') # 确保map_location与cfg的定义一致 # 加载模型做inference elif cfg.load_from is not None: runner.load_checkpoint(cfg.load_from) runner.train()
def do_test_example_sets(self, algorithm): runner = Runner(algorithm) for i in self.TESTS_TO_RUN: input_file = self.EXAMPLE_FILES[i] output_file = "%s.tour" % input_file expected_total = self.EXPECTED_TOTALS[i] expected_time = self.EXPECTED_TIMES[i] # Run the algorithm and generate output file sys.argv[1] = '-f' sys.argv[2] = input_file runner.load() start_time = time.clock() runner.run() elapsed_time = time.clock() - start_time # verify that all cities are visited (all_match, problems) = visit.main(input_file, output_file) message = "For %s, possible problems include:\n" % output_file[3:] for each in problems: message = "%s%s\n" % (message, problems[each]) self.assertTrue(all_match, message) # verify correct solution length cities = self.readinstance(input_file) solution = self.readsolution(output_file) self.checksolution(cities, solution[0][0], solution[1]) # verify that the problem was solved within the allotted time frame message = "For %s, execution time of %f exceeded allotted time of %f" % (output_file[3:], elapsed_time, expected_time) self.assertTrue(elapsed_time < expected_time, message) # verify that the solution is reasonably optimal if an optimal value is posted if expected_total >= 0: message = "For %s, the total distance %f exceeded the allowed %f" % (output_file[3:], solution[0][0], expected_total) self.assertTrue(solution[0][0] <= expected_total, message) print "%s passed with distance %d/%d and time %d/%d" % (self.EXAMPLE_FILES[i][3:], solution[0][0], expected_total, elapsed_time, expected_time)
reward_sess = tf.compat.v1.Session(graph=graph) reward_model = RewardModel(actions_size=5, policy=agent, sess=reward_sess, name=model_name) # Initialize variables of models init = tf.compat.v1.global_variables_initializer() reward_sess.run(init) # If we want, we can use an already trained reward model if fixed_reward_model: reward_model.load_model(reward_model_name) print("Model loaded!") # Create runner runner = Runner(agent=agent, frequency=frequency, env=env, save_frequency=save_frequency, logging=logging, total_episode=total_episode, curriculum=curriculum, frequency_mode=frequency_mode, reward_model=reward_model, reward_frequency=reward_frequency, dems_name=dems_name, fixed_reward_model=fixed_reward_model) try: runner.run() finally: env.close()
def running(): """ 静态的执行方法,如果没有用 @staticmethod 需要 Main().run_ranzhi() :return: """ print("start-test") Runner().run_test()
def __init__(self, threadId, sess, graph): StoppableThread.__init__(self) self.threadId = threadId self.sess = sess self.graph = graph with self.graph.as_default(): if args.atari: env = gym_env(args.game + 'NoFrameskip-v0') env = WarmUp(env, min_step=0, max_step=30) env = ActionRepeat(env, 4) proproc = PreProPipeline( [GrayPrePro(), ResizePrePro(modelOps.INPUT_SIZE)]) rewproc = PreProPipeline([RewardClipper(-1, 1)]) #q_model = A3CModel(modelOps) else: if args.game == "Grid": env = GridEnv() else: env = gym_env(args.game) proproc = None rewproc = None #q_model = TabularQModel(modelOps) for trans in args.env_transforms: env = globals()[trans](env) if 'shared_model' in kargs and kargs['shared_model']: q_model = model else: q_model = globals()[args.model](modelOps) q_model.model_update = model.model q_model.set_weights(model.get_weights()) summary_writer = tf.summary.FileWriter( args.logdir + '/thread-' + str(threadId), K.get_session().graph) if not args.logdir is None else None agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.REPLAY_START_SIZE = 1 #agentOps.INITIAL_EXPLORATION = 0 agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10 #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32) replay_buffer = None #if threadId > 0: if args.nstep > 0: replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, args.nstep) else: replay_buffer = ReplayBuffer(args.replay_buffer_size, modelOps.AGENT_HISTORY_LENGTH, args.update_frequency, args.replay_start_size, args.batch_size) #print(kargs['agent']) if kargs['agent'] == 'ActorCriticAgent': agent = ActorCriticAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, ac_model_update=model) # else: agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, model_eval=model_eval) # egreedyAgent = None if threadId > 0 and kargs[ 'agent'] != 'ActorCriticAgent': # first thread is for testing egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE #egreedyOps.FINAL_EXPLORATION_FRAME = int(args.egreedy_final_step / args.thread_count) #if args.egreedy_decay<1: # egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent) #else: if len(args.egreedy_props ) > 1 and args.egreedy_props[0] == round( args.egreedy_props[0]): cs = np.array(args.egreedy_props) cs = np.cumsum(cs) idx = np.searchsorted(cs, threadId) print('Egreedyagent selected', idx, args.egreedy_final[idx], args.egreedy_decay[idx], args.egreedy_final_step[idx]) egreedyAgent = MultiEGreedyAgent( env.action_space, egreedyOps, agent, [1], [args.egreedy_final[idx]], [args.egreedy_decay[idx]], [args.egreedy_final_step[idx]]) else: egreedyAgent = MultiEGreedyAgent( env.action_space, egreedyOps, agent, args.egreedy_props, args.egreedy_final, args.egreedy_decay, args.egreedy_final_step) self.runner = Runner( env, egreedyAgent if egreedyAgent is not None else agent, proproc, modelOps.AGENT_HISTORY_LENGTH) if replay_buffer is not None: self.runner.listen(replay_buffer, proproc) self.runner.listen(agent, None) if egreedyAgent is not None: self.runner.listen(egreedyAgent, None) self.runner.listen(self, proproc) self.agent = agent self.q_model = q_model pass
class AgentThread(StoppableThread, RunnerListener): def __init__(self, threadId, sess, graph): StoppableThread.__init__(self) self.threadId = threadId self.sess = sess self.graph = graph with self.graph.as_default(): if args.atari: env = gym_env(args.game + 'NoFrameskip-v0') env = WarmUp(env, min_step=0, max_step=30) env = ActionRepeat(env, 4) proproc = PreProPipeline( [GrayPrePro(), ResizePrePro(modelOps.INPUT_SIZE)]) rewproc = PreProPipeline([RewardClipper(-1, 1)]) #q_model = A3CModel(modelOps) else: if args.game == "Grid": env = GridEnv() else: env = gym_env(args.game) proproc = None rewproc = None #q_model = TabularQModel(modelOps) for trans in args.env_transforms: env = globals()[trans](env) if 'shared_model' in kargs and kargs['shared_model']: q_model = model else: q_model = globals()[args.model](modelOps) q_model.model_update = model.model q_model.set_weights(model.get_weights()) summary_writer = tf.summary.FileWriter( args.logdir + '/thread-' + str(threadId), K.get_session().graph) if not args.logdir is None else None agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.REPLAY_START_SIZE = 1 #agentOps.INITIAL_EXPLORATION = 0 agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10 #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32) replay_buffer = None #if threadId > 0: if args.nstep > 0: replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, args.nstep) else: replay_buffer = ReplayBuffer(args.replay_buffer_size, modelOps.AGENT_HISTORY_LENGTH, args.update_frequency, args.replay_start_size, args.batch_size) #print(kargs['agent']) if kargs['agent'] == 'ActorCriticAgent': agent = ActorCriticAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, ac_model_update=model) # else: agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, model_eval=model_eval) # egreedyAgent = None if threadId > 0 and kargs[ 'agent'] != 'ActorCriticAgent': # first thread is for testing egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE #egreedyOps.FINAL_EXPLORATION_FRAME = int(args.egreedy_final_step / args.thread_count) #if args.egreedy_decay<1: # egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent) #else: if len(args.egreedy_props ) > 1 and args.egreedy_props[0] == round( args.egreedy_props[0]): cs = np.array(args.egreedy_props) cs = np.cumsum(cs) idx = np.searchsorted(cs, threadId) print('Egreedyagent selected', idx, args.egreedy_final[idx], args.egreedy_decay[idx], args.egreedy_final_step[idx]) egreedyAgent = MultiEGreedyAgent( env.action_space, egreedyOps, agent, [1], [args.egreedy_final[idx]], [args.egreedy_decay[idx]], [args.egreedy_final_step[idx]]) else: egreedyAgent = MultiEGreedyAgent( env.action_space, egreedyOps, agent, args.egreedy_props, args.egreedy_final, args.egreedy_decay, args.egreedy_final_step) self.runner = Runner( env, egreedyAgent if egreedyAgent is not None else agent, proproc, modelOps.AGENT_HISTORY_LENGTH) if replay_buffer is not None: self.runner.listen(replay_buffer, proproc) self.runner.listen(agent, None) if egreedyAgent is not None: self.runner.listen(egreedyAgent, None) self.runner.listen(self, proproc) self.agent = agent self.q_model = q_model pass def run(self): with self.graph.as_default(): self.runner.run() def on_step(self, ob, action, next_ob, reward, done): global T global model, model_eval with tLock: T = T + 1 if T % target_network_update_freq == 0 and kargs[ 'agent'] != 'ActorCriticAgent': #print('CLONE TARGET: ' + str(T)) model_eval.set_weights(model.get_weights()) for agent in agents: agent.model_eval = model_eval if T % SAVE_FREQ == 0 and args.mode == "train": if not args.output_dir is None: model.model.save_weights(args.output_dir + '/weights_{0}.h5'.format(T)) #if T % 1000 == 0: # print('STEP', T) #if self.threadId == 0 and T % 10 == 0: # self.q_model.set_weights(model.get_weights()) if T % args.render_step == 0 and ENABLE_RENDER: viewer.imshow( np.repeat(np.reshape(ob, ob.shape + (1, )), 3, axis=2)) if T > args.max_step: self.stop() #print(T) def stop(self): super(AgentThread, self).stop() self.runner.stop()
def run(self): Runner().runner()
try: cfg["packet"].sport = int(input.split()[1]) except Exception as e: print("set source port failed. " + str(e)) continue if input.split()[0] == "set-dport": try: cfg["packet"].dport = int(input.split()[1]) except Exception as e: print("set dest port failed. " + str(e)) continue if input.split()[0] == "run-incomming-packet": if (not cfg["iptables"]) or (not cfg["ipaddrs"]) or (not cfg["iproutes"]) or (not cfg["ipsets"]): print("please run 'load' cmds to load configurations.") continue Runner(cfg["ipaddrs"], cfg["non_local_ip"], cfg["iproutes"], cfg["ipsets"], cfg["iptables"]).RunIncommingPacket(cfg["packet"]) continue if input.split()[0] == "run-localgen-packet": if (not cfg["iptables"]) or (not cfg["ipaddrs"]) or (not cfg["iproutes"]) or (not cfg["ipsets"]): print("please run 'load' cmds to load configurations.") continue Runner(cfg["ipaddrs"], cfg["non_local_ip"], cfg["iproutes"], cfg["ipsets"], cfg["iptables"]).RunLocalGenPacket(cfg["packet"]) continue # p.set_source("192.168.199.10").set_dest("192.168.199.14").dport = 2379 # p.iface = "cali30b5015dbf7" # p.set_source("192.16.1.51").set_dest("192.16.1.29").dport = 2379 # p.set_source("192.16.1.51").set_dest("10.254.0.1").dport = 443 # Runner(addrs, non_local_ip, routes, sets, tables).RunIncommingPacket(p) # answer = prompt('Give me some input: ', bottom_toolbar=bottom_statusbar) # print('You said: %s' % answer)
class AgentThread(StoppableThread, RunnerListener): def __init__(self, threadId, sess, graph): StoppableThread.__init__(self) self.threadId = threadId self.sess = sess self.graph = graph with self.graph.as_default(): if args.game == "Grid": env = GridEnv() else: env = gym_env(args.game) env = Penalizer(env) proproc = None rewproc = None q_model = CartPoleModel(modelOps) q_model.model_update = model.model q_model.set_weights(model.get_weights()) summary_writer = tf.summary.FileWriter( args.logdir + '/thread-' + str(threadId), K.get_session().graph) if not args.logdir is None else None agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 20 agentOps.REPLAY_START_SIZE = 1 #agentOps.INITIAL_EXPLORATION = 0 agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10 #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32) replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, args.nstep) agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer, model_eval=model_eval) # egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = 1 egreedyOps.FINAL_EXPLORATION_FRAME = 5000 egreedyOps.FINAL_EXPLORATION = 0.01 egreedyOps.DECAY = 0.999 egreedyAgent = MultiEGreedyAgent(env.action_space, egreedyOps, agent, [0.4, 0.3, 0.3], [0.1, 0.01, 0.5]) self.runner = Runner(env, egreedyAgent, proproc, modelOps.AGENT_HISTORY_LENGTH) self.runner.listen(replay_buffer, proproc) self.runner.listen(agent, None) self.runner.listen(egreedyAgent, None) self.runner.listen(self, proproc) pass def run(self): with self.graph.as_default(): self.runner.run() def on_step(self, ob, action, next_ob, reward, done): global T global model, model_eval with tLock: T = T + 1 #if T % 1000 == 0: # print('STEP', T) if T % target_network_update_freq == 0: print('CLONE TARGET') model_eval.set_weights(model.get_weights()) for agent in agents: agent.model_eval = model_eval if T % args.render_step == 0 and ENABLE_RENDER: viewer.imshow( np.repeat(np.reshape(ob, ob.shape + (1, )), 3, axis=2)) if T % SAVE_FREQ == 0 and args.mode == "train": if not args.output_dir is None: model.model.save_weights(args.output_dir + '/weights_{0}.h5'.format(T)) #print(T) def stop(self): super(AgentThread, self).stop() self.runner.stop()
def run(conf): cf = ConfigParser.ConfigParser() cf.read(conf) runner = Runner(cf) runner.run()
"device": device, "cache": cache, "cache_valid": cache_valid, "cache_test": cache_test, "value_model": value_model, "value_model_valid": value_model_valid, "value_model_test": value_model_test, "attribute": attribute, "selection_criterion": args.selection_criterion, } if args.log_wandb: runner_config["wandb_run"] = run total_dims = reader.get_dimensionality() runner = Runner(runner_config) selected_results = runner.main_loop(max_iter=args.max_iter) # Draw graphs graphs = runner.draw_graphs(selected_results) mi_fig = graphs["mi"] normalized_mi_fig = graphs["normalized_mi"] accuracy_fig = graphs["accuracy"] scatter_fig = runner.plot_dims( selected_results[0]["candidate_dim"], selected_results[1]["candidate_dim"], test_data=True, log_prob_dim_pool=list(selected_results[-1]["candidate_dim_pool"])) # You can uncomment these lines to output scatter plots for any pair of dimensions you need. #
def start_testing(self): run = Runner() run.runner()
#print(env.observation_space.n) modelOps = DqnOps(env.action_count) modelOps.dueling_network = args.dueling_dqn modelOps.INPUT_SIZE = env.observation_space.n modelOps.LEARNING_RATE = 0.2 q_model = TabularQModel(modelOps) summary_writer = tf.summary.FileWriter(args.logdir, K.get_session().graph) if not args.logdir is None else None agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn replay_buffer = NStepBuffer(1, args.nstep) agent = DqnAgent(env.action_space, q_model, replay_buffer, None, agentOps, summary_writer) egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = 1 egreedyOps.FINAL_EXPLORATION_FRAME = 10000 egreedyAgent = EGreedyAgent(env.action_space, egreedyOps, agent) runner = Runner(env, egreedyAgent, None, 1) runner.listen(replay_buffer, None) runner.listen(agent, None) runner.listen(egreedyAgent, None) runner.run()
agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 20 #agentOps.REPLAY_START_SIZE = 100 #agentOps.FINAL_EXPLORATION_FRAME = 10000 replay_buffer = ReplayBuffer(int(2000), 1, 1, 1000, 64) #replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, 8) agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer) egreedyOps = EGreedyOps() egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE egreedyOps.FINAL_EXPLORATION_FRAME = 10000 egreedyOps.FINAL_EXPLORATION = 0.01 egreedyOps.DECAY = 0.999 egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent) runner = Runner(env, egreedyAgent, proproc, 1) runner.listen(replay_buffer, proproc) runner.listen(agent, None) runner.listen(egreedyAgent, None) if viewer is not None: runner.listen(viewer, None) if args.logdir is not None: networkSaver = NetworkSaver(50000, args.logdir, q_model.model) runner.listen(networkSaver, None) runner.run()
def run_dqn(**kargs): if kargs['output_dir'] is None and kargs['logdir'] is not None: kargs['output_dir'] = kargs['logdir'] q_model_initial = kargs[ 'q_model_initial'] if 'q_model_initial' in kargs else None from collections import namedtuple args = namedtuple("DQNParams", kargs.keys())(*kargs.values()) if 'dont_init_tf' not in kargs.keys() or not kargs['dont_init_tf']: #init_nn_library(True, "1") init_nn_library("gpu" in kargs and kargs["gpu"] is not None, kargs["gpu"] if "gpu" in kargs else "1") #if args.atari: # env = gym_env(args.game + 'NoFrameskip-v0') # env = WarmUp(env, min_step=0, max_step=30) # env = ActionRepeat(env, 4) # #q_model = A3CModel(modelOps) #else: # if args.game == "Grid": # env = GridEnv() # else: # env = gym_env(args.game) # #q_model = TabularQModel(modelOps) #for trans in args.env_transforms: # env = globals()[trans](env) if 'use_env' in kargs and kargs['use_env'] is not None: env = kargs['use_env'] else: env = get_env(args.game, args.atari, args.env_transforms, kargs['monitor_dir'] if 'monitor_dir' in kargs else None) if 'env_model' in kargs and kargs['env_model'] is not None and kargs[ 'env_weightfile'] is not None: print('Using simulated environment') envOps = EnvOps(env.observation_space.shape, env.action_space.n, args.learning_rate) env_model = globals()[kargs['env_model']](envOps) env_model.model.load_weights(kargs['env_weightfile']) env = SimulatedEnv(env, env_model, use_reward='env_reward' in kargs and kargs['env_reward']) modelOps = DqnOps(env.action_count) modelOps.dueling_network = args.dueling_dqn viewer = None if args.enable_render: viewer = EnvViewer(env, args.render_step, 'human') if args.atari: proproc = PreProPipeline( [GrayPrePro(), ResizePrePro(modelOps.INPUT_SIZE)]) rewproc = PreProPipeline([RewardClipper(-1, 1)]) else: if env.observation_space.__class__.__name__ is 'Discrete': modelOps.INPUT_SIZE = env.observation_space.n else: modelOps.INPUT_SIZE = env.observation_space.shape modelOps.AGENT_HISTORY_LENGTH = 1 proproc = None rewproc = None modelOps.LEARNING_RATE = args.learning_rate if q_model_initial is None: q_model = globals()[args.model](modelOps) else: q_model = q_model_initial if not args.load_weightfile is None: q_model.model.load_weights(args.load_weightfile) summary_writer = tf.summary.FileWriter( args.logdir, K.get_session().graph) if not args.logdir is None else None agentOps = DqnAgentOps() agentOps.double_dqn = args.double_dqn agentOps.mode = args.mode if args.mode == "train": agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = args.target_network_update replay_buffer = None if args.replay_buffer_size > 0: if 'load_trajectory' in kargs and kargs['load_trajectory'] is not None: replay_buffer = TrajectoryReplay(kargs['load_trajectory'], kargs['batch_size'], args.update_frequency, args.replay_start_size) else: replay_buffer = ReplayBuffer(args.replay_buffer_size, modelOps.AGENT_HISTORY_LENGTH, args.update_frequency, args.replay_start_size, args.batch_size) #replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, 8) agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps, summary_writer) egreedyOps = EGreedyOps() if replay_buffer is not None: egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE egreedyOps.mode = args.mode egreedyOps.test_epsilon = args.test_epsilon #egreedyOps.FINAL_EXPLORATION_FRAME = 10000 if args.mode == "train": egreedyOps.FINAL_EXPLORATION_FRAME = args.egreedy_final_step if args.mode == "train": if args.egreedy_decay < 1: egreedyOps.DECAY = args.egreedy_decay egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent) else: egreedyAgent = MultiEGreedyAgent( env.action_space, egreedyOps, agent, args.egreedy_props, args.egreedy_final, final_exp_frame=args.egreedy_final_step) else: egreedyAgent = EGreedyAgent(env.action_space, egreedyOps, agent) runner = Runner(env, egreedyAgent, proproc, modelOps.AGENT_HISTORY_LENGTH, max_step=args.max_step, max_episode=args.max_episode) if replay_buffer is not None: runner.listen(replay_buffer, proproc) runner.listen(agent, None) runner.listen(egreedyAgent, None) if viewer is not None: runner.listen(viewer, None) if args.output_dir is not None: networkSaver = NetworkSaver( 50000 if 'save_interval' not in kargs else kargs['save_interval'], args.output_dir, q_model.model) runner.listen(networkSaver, None) return runner, agent
from runner.runner import Runner from algorithm.collective_algorithm import RealTSPAlgorithm runner = Runner(RealTSPAlgorithm()) runner.load() runner.run()