Пример #1
0
def main():
    parser = argparse.ArgumentParser(
        description="HasukoRPG parser version: {}".format(VERSION))
    parser.add_argument('-d',
                        '--debug',
                        action='store_true',
                        help="Debug mode")
    parser.add_argument('-t', '--token', help="Specify the discord bot token")
    parser.add_argument('-u',
                        '--user',
                        help="Specify the user to the database")
    parser.add_argument('-p',
                        '--password',
                        help="Specify the password to the database")
    parser.add_argument('-n', '--db-name', help="Specify the database name")

    args = parser.parse_args()

    log_path = "logs/"
    if not os.path.exists(os.path.dirname(log_path)):
        os.makedirs(os.path.dirname(log_path))

    if args.debug:
        level = logging.DEBUG
    else:
        level = logging.INFO

    logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s',
                        level=level)

    runner = Runner(args)
    runner.run()
Пример #2
0
    def __init__(self, threadId, sess, graph):
        StoppableThread.__init__(self)
        self.threadId = threadId
        self.sess = sess
        self.graph = graph
        with self.graph.as_default():

            if args.game == "Grid":
                env = GridEnv()
            else:
                env = gym_env(args.game)
                env = Penalizer(env)

            proproc = None
            rewproc = None
            q_model = CartPoleModel(modelOps)

            q_model.model_update = model.model
            q_model.set_weights(model.get_weights())
            summary_writer = tf.summary.FileWriter(
                args.logdir + '/thread-' + str(threadId),
                K.get_session().graph) if not args.logdir is None else None

            agentOps = DqnAgentOps()
            agentOps.double_dqn = args.double_dqn
            agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 20
            agentOps.REPLAY_START_SIZE = 1
            #agentOps.INITIAL_EXPLORATION = 0
            agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10

            #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32)
            replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH,
                                        args.nstep)
            agent = DqnAgent(env.action_space,
                             q_model,
                             replay_buffer,
                             rewproc,
                             agentOps,
                             summary_writer,
                             model_eval=model_eval)  #

            egreedyOps = EGreedyOps()
            egreedyOps.REPLAY_START_SIZE = 1
            egreedyOps.FINAL_EXPLORATION_FRAME = 5000
            egreedyOps.FINAL_EXPLORATION = 0.01
            egreedyOps.DECAY = 0.999
            egreedyAgent = MultiEGreedyAgent(env.action_space, egreedyOps,
                                             agent, [0.4, 0.3, 0.3],
                                             [0.1, 0.01, 0.5])

            self.runner = Runner(env, egreedyAgent, proproc,
                                 modelOps.AGENT_HISTORY_LENGTH)
            self.runner.listen(replay_buffer, proproc)
            self.runner.listen(agent, None)
            self.runner.listen(egreedyAgent, None)
            self.runner.listen(self, proproc)
        pass
def main():
    # 0. 固定设置
    cfg = Config().fromfile('../slcv/cfg/cfg_resnet18_dogcat.py')  # 需要写相对路径
    
    # 1. 数据
#    transform = data_transform(
#        train=True, 
#        input_size = cfg.input_size, 
#        mean = cfg.mean, 
#        std = cfg.std)
    trainset = DogCat(
        cfg.train_root, 
        transform=None,    # 使用dogcatDataset默认transform
        train=True, 
        test=False)
    trainloader = DataLoader(
        trainset,
        batch_size = cfg.batch_size,
        shuffle = True,
        num_workers = 2)
    
    # 2. 模型
    model = pretrained_models(model_name='resnet18', num_classes=cfg.num_classes)
    if torch.cuda.device_count() > 0 and len(cfg.gpus) == 1:
        model = model.cuda()
    elif torch.cuda.device_count() > 1 and len(cfg.gpus) > 1:  # 数据并行模型
        model = torch.nn.DataParallel(model, device_ids=cfg.gpus).cuda()
    optimizer = cfg.optimizer
    
    # 3. 训练
    runner = Runner(trainloader, model, optimizer, cfg) # cfg对象也先传进去,想挂参数应该是需要的
    runner.register_hooks(
            cfg.optimizer_config,
            cfg.checkpoint_config,
            cfg.logger_config
            )
    # 恢复训练
    if cfg.resume_from is not None:
        runner.resume(cfg.resume_from, resume_optimizer=True, map_location='default')  # 确保map_location与cfg的定义一致
    # 加载模型做inference
    elif cfg.load_from is not None:
        runner.load_checkpoint(cfg.load_from)
    
    runner.train()
Пример #4
0
    def do_test_example_sets(self, algorithm):
        runner = Runner(algorithm)

        for i in self.TESTS_TO_RUN:
            input_file = self.EXAMPLE_FILES[i]
            output_file = "%s.tour" % input_file
            expected_total = self.EXPECTED_TOTALS[i]
            expected_time = self.EXPECTED_TIMES[i]

            # Run the algorithm and generate output file
            sys.argv[1] = '-f'
            sys.argv[2] = input_file
            runner.load()
            start_time = time.clock()
            runner.run()
            elapsed_time = time.clock() - start_time

            # verify that all cities are visited
            (all_match, problems) = visit.main(input_file, output_file)
            message = "For %s, possible problems include:\n" % output_file[3:]
            for each in problems:
                message = "%s%s\n" % (message, problems[each])
            self.assertTrue(all_match, message)

            # verify correct solution length
            cities = self.readinstance(input_file)
            solution = self.readsolution(output_file)
            self.checksolution(cities, solution[0][0], solution[1])

            # verify that the problem was solved within the allotted time frame
            message = "For %s, execution time of %f exceeded allotted time of %f" % (output_file[3:],
                                                                                     elapsed_time, expected_time)
            self.assertTrue(elapsed_time < expected_time, message)

            # verify that the solution is reasonably optimal if an optimal value is posted
            if expected_total >= 0:
                message = "For %s, the total distance %f exceeded the allowed %f" % (output_file[3:],
                                                                                     solution[0][0], expected_total)
                self.assertTrue(solution[0][0] <= expected_total, message)

            print "%s passed with distance %d/%d and time %d/%d" % (self.EXAMPLE_FILES[i][3:], solution[0][0],
                                                                expected_total, elapsed_time, expected_time)
Пример #5
0
        reward_sess = tf.compat.v1.Session(graph=graph)
        reward_model = RewardModel(actions_size=5,
                                   policy=agent,
                                   sess=reward_sess,
                                   name=model_name)
        # Initialize variables of models
        init = tf.compat.v1.global_variables_initializer()
        reward_sess.run(init)
        # If we want, we can use an already trained reward model
        if fixed_reward_model:
            reward_model.load_model(reward_model_name)
            print("Model loaded!")

# Create runner
runner = Runner(agent=agent,
                frequency=frequency,
                env=env,
                save_frequency=save_frequency,
                logging=logging,
                total_episode=total_episode,
                curriculum=curriculum,
                frequency_mode=frequency_mode,
                reward_model=reward_model,
                reward_frequency=reward_frequency,
                dems_name=dems_name,
                fixed_reward_model=fixed_reward_model)
try:
    runner.run()
finally:
    env.close()
Пример #6
0
 def running():
     """ 静态的执行方法,如果没有用 @staticmethod 需要 Main().run_ranzhi()
     :return:
     """
     print("start-test")
     Runner().run_test()
Пример #7
0
        def __init__(self, threadId, sess, graph):
            StoppableThread.__init__(self)
            self.threadId = threadId
            self.sess = sess
            self.graph = graph
            with self.graph.as_default():
                if args.atari:
                    env = gym_env(args.game + 'NoFrameskip-v0')
                    env = WarmUp(env, min_step=0, max_step=30)
                    env = ActionRepeat(env, 4)
                    proproc = PreProPipeline(
                        [GrayPrePro(),
                         ResizePrePro(modelOps.INPUT_SIZE)])
                    rewproc = PreProPipeline([RewardClipper(-1, 1)])
                    #q_model = A3CModel(modelOps)
                else:
                    if args.game == "Grid":
                        env = GridEnv()
                    else:
                        env = gym_env(args.game)
                    proproc = None
                    rewproc = None
                    #q_model = TabularQModel(modelOps)
                for trans in args.env_transforms:
                    env = globals()[trans](env)

                if 'shared_model' in kargs and kargs['shared_model']:
                    q_model = model
                else:
                    q_model = globals()[args.model](modelOps)
                    q_model.model_update = model.model
                    q_model.set_weights(model.get_weights())
                summary_writer = tf.summary.FileWriter(
                    args.logdir + '/thread-' + str(threadId),
                    K.get_session().graph) if not args.logdir is None else None

                agentOps = DqnAgentOps()
                agentOps.double_dqn = args.double_dqn
                agentOps.REPLAY_START_SIZE = 1
                #agentOps.INITIAL_EXPLORATION = 0
                agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10

                #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32)
                replay_buffer = None
                #if threadId > 0:
                if args.nstep > 0:
                    replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH,
                                                args.nstep)
                else:
                    replay_buffer = ReplayBuffer(args.replay_buffer_size,
                                                 modelOps.AGENT_HISTORY_LENGTH,
                                                 args.update_frequency,
                                                 args.replay_start_size,
                                                 args.batch_size)

                #print(kargs['agent'])
                if kargs['agent'] == 'ActorCriticAgent':
                    agent = ActorCriticAgent(env.action_space,
                                             q_model,
                                             replay_buffer,
                                             rewproc,
                                             agentOps,
                                             summary_writer,
                                             ac_model_update=model)  #
                else:
                    agent = DqnAgent(env.action_space,
                                     q_model,
                                     replay_buffer,
                                     rewproc,
                                     agentOps,
                                     summary_writer,
                                     model_eval=model_eval)  #

                egreedyAgent = None

                if threadId > 0 and kargs[
                        'agent'] != 'ActorCriticAgent':  # first thread is for testing
                    egreedyOps = EGreedyOps()
                    egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE
                    #egreedyOps.FINAL_EXPLORATION_FRAME = int(args.egreedy_final_step / args.thread_count)
                    #if args.egreedy_decay<1:
                    #	egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent)
                    #else:
                    if len(args.egreedy_props
                           ) > 1 and args.egreedy_props[0] == round(
                               args.egreedy_props[0]):
                        cs = np.array(args.egreedy_props)
                        cs = np.cumsum(cs)
                        idx = np.searchsorted(cs, threadId)
                        print('Egreedyagent selected', idx,
                              args.egreedy_final[idx], args.egreedy_decay[idx],
                              args.egreedy_final_step[idx])
                        egreedyAgent = MultiEGreedyAgent(
                            env.action_space, egreedyOps, agent, [1],
                            [args.egreedy_final[idx]],
                            [args.egreedy_decay[idx]],
                            [args.egreedy_final_step[idx]])
                    else:
                        egreedyAgent = MultiEGreedyAgent(
                            env.action_space, egreedyOps, agent,
                            args.egreedy_props, args.egreedy_final,
                            args.egreedy_decay, args.egreedy_final_step)

                self.runner = Runner(
                    env, egreedyAgent if egreedyAgent is not None else agent,
                    proproc, modelOps.AGENT_HISTORY_LENGTH)
                if replay_buffer is not None:
                    self.runner.listen(replay_buffer, proproc)
                self.runner.listen(agent, None)
                if egreedyAgent is not None:
                    self.runner.listen(egreedyAgent, None)
                self.runner.listen(self, proproc)
                self.agent = agent
                self.q_model = q_model
            pass
Пример #8
0
    class AgentThread(StoppableThread, RunnerListener):
        def __init__(self, threadId, sess, graph):
            StoppableThread.__init__(self)
            self.threadId = threadId
            self.sess = sess
            self.graph = graph
            with self.graph.as_default():
                if args.atari:
                    env = gym_env(args.game + 'NoFrameskip-v0')
                    env = WarmUp(env, min_step=0, max_step=30)
                    env = ActionRepeat(env, 4)
                    proproc = PreProPipeline(
                        [GrayPrePro(),
                         ResizePrePro(modelOps.INPUT_SIZE)])
                    rewproc = PreProPipeline([RewardClipper(-1, 1)])
                    #q_model = A3CModel(modelOps)
                else:
                    if args.game == "Grid":
                        env = GridEnv()
                    else:
                        env = gym_env(args.game)
                    proproc = None
                    rewproc = None
                    #q_model = TabularQModel(modelOps)
                for trans in args.env_transforms:
                    env = globals()[trans](env)

                if 'shared_model' in kargs and kargs['shared_model']:
                    q_model = model
                else:
                    q_model = globals()[args.model](modelOps)
                    q_model.model_update = model.model
                    q_model.set_weights(model.get_weights())
                summary_writer = tf.summary.FileWriter(
                    args.logdir + '/thread-' + str(threadId),
                    K.get_session().graph) if not args.logdir is None else None

                agentOps = DqnAgentOps()
                agentOps.double_dqn = args.double_dqn
                agentOps.REPLAY_START_SIZE = 1
                #agentOps.INITIAL_EXPLORATION = 0
                agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10

                #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32)
                replay_buffer = None
                #if threadId > 0:
                if args.nstep > 0:
                    replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH,
                                                args.nstep)
                else:
                    replay_buffer = ReplayBuffer(args.replay_buffer_size,
                                                 modelOps.AGENT_HISTORY_LENGTH,
                                                 args.update_frequency,
                                                 args.replay_start_size,
                                                 args.batch_size)

                #print(kargs['agent'])
                if kargs['agent'] == 'ActorCriticAgent':
                    agent = ActorCriticAgent(env.action_space,
                                             q_model,
                                             replay_buffer,
                                             rewproc,
                                             agentOps,
                                             summary_writer,
                                             ac_model_update=model)  #
                else:
                    agent = DqnAgent(env.action_space,
                                     q_model,
                                     replay_buffer,
                                     rewproc,
                                     agentOps,
                                     summary_writer,
                                     model_eval=model_eval)  #

                egreedyAgent = None

                if threadId > 0 and kargs[
                        'agent'] != 'ActorCriticAgent':  # first thread is for testing
                    egreedyOps = EGreedyOps()
                    egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE
                    #egreedyOps.FINAL_EXPLORATION_FRAME = int(args.egreedy_final_step / args.thread_count)
                    #if args.egreedy_decay<1:
                    #	egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent)
                    #else:
                    if len(args.egreedy_props
                           ) > 1 and args.egreedy_props[0] == round(
                               args.egreedy_props[0]):
                        cs = np.array(args.egreedy_props)
                        cs = np.cumsum(cs)
                        idx = np.searchsorted(cs, threadId)
                        print('Egreedyagent selected', idx,
                              args.egreedy_final[idx], args.egreedy_decay[idx],
                              args.egreedy_final_step[idx])
                        egreedyAgent = MultiEGreedyAgent(
                            env.action_space, egreedyOps, agent, [1],
                            [args.egreedy_final[idx]],
                            [args.egreedy_decay[idx]],
                            [args.egreedy_final_step[idx]])
                    else:
                        egreedyAgent = MultiEGreedyAgent(
                            env.action_space, egreedyOps, agent,
                            args.egreedy_props, args.egreedy_final,
                            args.egreedy_decay, args.egreedy_final_step)

                self.runner = Runner(
                    env, egreedyAgent if egreedyAgent is not None else agent,
                    proproc, modelOps.AGENT_HISTORY_LENGTH)
                if replay_buffer is not None:
                    self.runner.listen(replay_buffer, proproc)
                self.runner.listen(agent, None)
                if egreedyAgent is not None:
                    self.runner.listen(egreedyAgent, None)
                self.runner.listen(self, proproc)
                self.agent = agent
                self.q_model = q_model
            pass

        def run(self):
            with self.graph.as_default():
                self.runner.run()

        def on_step(self, ob, action, next_ob, reward, done):
            global T
            global model, model_eval
            with tLock:
                T = T + 1
                if T % target_network_update_freq == 0 and kargs[
                        'agent'] != 'ActorCriticAgent':
                    #print('CLONE TARGET: ' + str(T))
                    model_eval.set_weights(model.get_weights())
                    for agent in agents:
                        agent.model_eval = model_eval
                if T % SAVE_FREQ == 0 and args.mode == "train":
                    if not args.output_dir is None:
                        model.model.save_weights(args.output_dir +
                                                 '/weights_{0}.h5'.format(T))
            #if T % 1000 == 0:
            #	print('STEP', T)
            #if self.threadId == 0 and T % 10 == 0:
            #	self.q_model.set_weights(model.get_weights())
            if T % args.render_step == 0 and ENABLE_RENDER:
                viewer.imshow(
                    np.repeat(np.reshape(ob, ob.shape + (1, )), 3, axis=2))
            if T > args.max_step:
                self.stop()
            #print(T)
        def stop(self):
            super(AgentThread, self).stop()
            self.runner.stop()
Пример #9
0
 def run(self):
     Runner().runner()
Пример #10
0
            try:
                cfg["packet"].sport = int(input.split()[1])
            except Exception as e:
                print("set source port failed. " + str(e))
            continue
        if input.split()[0] == "set-dport":
            try:
                cfg["packet"].dport = int(input.split()[1])
            except Exception as e:
                print("set dest port failed. " + str(e))
            continue
        if input.split()[0] == "run-incomming-packet":
            if (not cfg["iptables"]) or (not cfg["ipaddrs"]) or (not cfg["iproutes"]) or (not cfg["ipsets"]):
                print("please run 'load' cmds to load configurations.")
                continue
            Runner(cfg["ipaddrs"], cfg["non_local_ip"], cfg["iproutes"], cfg["ipsets"], cfg["iptables"]).RunIncommingPacket(cfg["packet"])
            continue
        if input.split()[0] == "run-localgen-packet":
            if (not cfg["iptables"]) or (not cfg["ipaddrs"]) or (not cfg["iproutes"]) or (not cfg["ipsets"]):
                print("please run 'load' cmds to load configurations.")
                continue
            Runner(cfg["ipaddrs"], cfg["non_local_ip"], cfg["iproutes"], cfg["ipsets"], cfg["iptables"]).RunLocalGenPacket(cfg["packet"])
            continue

    # p.set_source("192.168.199.10").set_dest("192.168.199.14").dport = 2379
    # p.iface = "cali30b5015dbf7"
    # p.set_source("192.16.1.51").set_dest("192.16.1.29").dport = 2379
    # p.set_source("192.16.1.51").set_dest("10.254.0.1").dport = 443
    # Runner(addrs, non_local_ip, routes, sets, tables).RunIncommingPacket(p)
    # answer = prompt('Give me some input: ', bottom_toolbar=bottom_statusbar)
    # print('You said: %s' % answer)
Пример #11
0
class AgentThread(StoppableThread, RunnerListener):
    def __init__(self, threadId, sess, graph):
        StoppableThread.__init__(self)
        self.threadId = threadId
        self.sess = sess
        self.graph = graph
        with self.graph.as_default():

            if args.game == "Grid":
                env = GridEnv()
            else:
                env = gym_env(args.game)
                env = Penalizer(env)

            proproc = None
            rewproc = None
            q_model = CartPoleModel(modelOps)

            q_model.model_update = model.model
            q_model.set_weights(model.get_weights())
            summary_writer = tf.summary.FileWriter(
                args.logdir + '/thread-' + str(threadId),
                K.get_session().graph) if not args.logdir is None else None

            agentOps = DqnAgentOps()
            agentOps.double_dqn = args.double_dqn
            agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 20
            agentOps.REPLAY_START_SIZE = 1
            #agentOps.INITIAL_EXPLORATION = 0
            agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 1e10

            #replay_buffer = ReplayBuffer(int(1e6), 4, 4, agentOps.REPLAY_START_SIZE, 32)
            replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH,
                                        args.nstep)
            agent = DqnAgent(env.action_space,
                             q_model,
                             replay_buffer,
                             rewproc,
                             agentOps,
                             summary_writer,
                             model_eval=model_eval)  #

            egreedyOps = EGreedyOps()
            egreedyOps.REPLAY_START_SIZE = 1
            egreedyOps.FINAL_EXPLORATION_FRAME = 5000
            egreedyOps.FINAL_EXPLORATION = 0.01
            egreedyOps.DECAY = 0.999
            egreedyAgent = MultiEGreedyAgent(env.action_space, egreedyOps,
                                             agent, [0.4, 0.3, 0.3],
                                             [0.1, 0.01, 0.5])

            self.runner = Runner(env, egreedyAgent, proproc,
                                 modelOps.AGENT_HISTORY_LENGTH)
            self.runner.listen(replay_buffer, proproc)
            self.runner.listen(agent, None)
            self.runner.listen(egreedyAgent, None)
            self.runner.listen(self, proproc)
        pass

    def run(self):
        with self.graph.as_default():
            self.runner.run()

    def on_step(self, ob, action, next_ob, reward, done):
        global T
        global model, model_eval
        with tLock:
            T = T + 1
        #if T % 1000 == 0:
        #	print('STEP', T)
        if T % target_network_update_freq == 0:
            print('CLONE TARGET')
            model_eval.set_weights(model.get_weights())
            for agent in agents:
                agent.model_eval = model_eval
        if T % args.render_step == 0 and ENABLE_RENDER:
            viewer.imshow(
                np.repeat(np.reshape(ob, ob.shape + (1, )), 3, axis=2))
        if T % SAVE_FREQ == 0 and args.mode == "train":
            if not args.output_dir is None:
                model.model.save_weights(args.output_dir +
                                         '/weights_{0}.h5'.format(T))
        #print(T)
    def stop(self):
        super(AgentThread, self).stop()
        self.runner.stop()
Пример #12
0
def run(conf):
    cf = ConfigParser.ConfigParser()
    cf.read(conf)
    runner = Runner(cf)
    runner.run()
Пример #13
0
        "device": device,
        "cache": cache,
        "cache_valid": cache_valid,
        "cache_test": cache_test,
        "value_model": value_model,
        "value_model_valid": value_model_valid,
        "value_model_test": value_model_test,
        "attribute": attribute,
        "selection_criterion": args.selection_criterion,
    }

    if args.log_wandb:
        runner_config["wandb_run"] = run

    total_dims = reader.get_dimensionality()
    runner = Runner(runner_config)
    selected_results = runner.main_loop(max_iter=args.max_iter)

    # Draw graphs
    graphs = runner.draw_graphs(selected_results)
    mi_fig = graphs["mi"]
    normalized_mi_fig = graphs["normalized_mi"]
    accuracy_fig = graphs["accuracy"]
    scatter_fig = runner.plot_dims(
        selected_results[0]["candidate_dim"],
        selected_results[1]["candidate_dim"],
        test_data=True,
        log_prob_dim_pool=list(selected_results[-1]["candidate_dim_pool"]))

    # You can uncomment these lines to output scatter plots for any pair of dimensions you need.
    #
Пример #14
0
 def start_testing(self):
     run = Runner()
     run.runner()
Пример #15
0

#print(env.observation_space.n)

modelOps = DqnOps(env.action_count)
modelOps.dueling_network = args.dueling_dqn
modelOps.INPUT_SIZE = env.observation_space.n
modelOps.LEARNING_RATE = 0.2

q_model = TabularQModel(modelOps)

summary_writer = tf.summary.FileWriter(args.logdir, K.get_session().graph) if not args.logdir is None else None

agentOps = DqnAgentOps()
agentOps.double_dqn = args.double_dqn

replay_buffer = NStepBuffer(1, args.nstep)
agent = DqnAgent(env.action_space, q_model, replay_buffer, None, agentOps, summary_writer)

egreedyOps = EGreedyOps()
egreedyOps.REPLAY_START_SIZE = 1
egreedyOps.FINAL_EXPLORATION_FRAME = 10000
egreedyAgent = EGreedyAgent(env.action_space, egreedyOps, agent)

runner = Runner(env, egreedyAgent, None, 1)
runner.listen(replay_buffer, None)
runner.listen(agent, None)
runner.listen(egreedyAgent, None)

runner.run()
Пример #16
0
agentOps = DqnAgentOps()
agentOps.double_dqn = args.double_dqn
agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = 20
#agentOps.REPLAY_START_SIZE = 100
#agentOps.FINAL_EXPLORATION_FRAME = 10000

replay_buffer = ReplayBuffer(int(2000), 1, 1, 1000, 64)
#replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, 8)
agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc, agentOps,
                 summary_writer)

egreedyOps = EGreedyOps()
egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE
egreedyOps.FINAL_EXPLORATION_FRAME = 10000
egreedyOps.FINAL_EXPLORATION = 0.01
egreedyOps.DECAY = 0.999
egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent)

runner = Runner(env, egreedyAgent, proproc, 1)
runner.listen(replay_buffer, proproc)
runner.listen(agent, None)
runner.listen(egreedyAgent, None)
if viewer is not None:
    runner.listen(viewer, None)

if args.logdir is not None:
    networkSaver = NetworkSaver(50000, args.logdir, q_model.model)
    runner.listen(networkSaver, None)

runner.run()
Пример #17
0
def run_dqn(**kargs):
    if kargs['output_dir'] is None and kargs['logdir'] is not None:
        kargs['output_dir'] = kargs['logdir']

    q_model_initial = kargs[
        'q_model_initial'] if 'q_model_initial' in kargs else None

    from collections import namedtuple
    args = namedtuple("DQNParams", kargs.keys())(*kargs.values())

    if 'dont_init_tf' not in kargs.keys() or not kargs['dont_init_tf']:
        #init_nn_library(True, "1")
        init_nn_library("gpu" in kargs and kargs["gpu"] is not None,
                        kargs["gpu"] if "gpu" in kargs else "1")

    #if args.atari:
    #	env = gym_env(args.game + 'NoFrameskip-v0')
    #	env = WarmUp(env, min_step=0, max_step=30)
    #	env = ActionRepeat(env, 4)
    #	#q_model = A3CModel(modelOps)
    #else:
    #	if args.game == "Grid":
    #		env = GridEnv()
    #	else:
    #		env = gym_env(args.game)
    #	#q_model = TabularQModel(modelOps)
    #for trans in args.env_transforms:
    #	env = globals()[trans](env)
    if 'use_env' in kargs and kargs['use_env'] is not None:
        env = kargs['use_env']
    else:
        env = get_env(args.game, args.atari, args.env_transforms,
                      kargs['monitor_dir'] if 'monitor_dir' in kargs else None)
        if 'env_model' in kargs and kargs['env_model'] is not None and kargs[
                'env_weightfile'] is not None:
            print('Using simulated environment')
            envOps = EnvOps(env.observation_space.shape, env.action_space.n,
                            args.learning_rate)
            env_model = globals()[kargs['env_model']](envOps)
            env_model.model.load_weights(kargs['env_weightfile'])
            env = SimulatedEnv(env,
                               env_model,
                               use_reward='env_reward' in kargs
                               and kargs['env_reward'])

    modelOps = DqnOps(env.action_count)
    modelOps.dueling_network = args.dueling_dqn

    viewer = None
    if args.enable_render:
        viewer = EnvViewer(env, args.render_step, 'human')
    if args.atari:
        proproc = PreProPipeline(
            [GrayPrePro(), ResizePrePro(modelOps.INPUT_SIZE)])
        rewproc = PreProPipeline([RewardClipper(-1, 1)])
    else:
        if env.observation_space.__class__.__name__ is 'Discrete':
            modelOps.INPUT_SIZE = env.observation_space.n
        else:
            modelOps.INPUT_SIZE = env.observation_space.shape
        modelOps.AGENT_HISTORY_LENGTH = 1
        proproc = None
        rewproc = None

    modelOps.LEARNING_RATE = args.learning_rate
    if q_model_initial is None:
        q_model = globals()[args.model](modelOps)
    else:
        q_model = q_model_initial

    if not args.load_weightfile is None:
        q_model.model.load_weights(args.load_weightfile)

    summary_writer = tf.summary.FileWriter(
        args.logdir,
        K.get_session().graph) if not args.logdir is None else None

    agentOps = DqnAgentOps()
    agentOps.double_dqn = args.double_dqn
    agentOps.mode = args.mode
    if args.mode == "train":
        agentOps.TARGET_NETWORK_UPDATE_FREQUENCY = args.target_network_update

    replay_buffer = None
    if args.replay_buffer_size > 0:
        if 'load_trajectory' in kargs and kargs['load_trajectory'] is not None:
            replay_buffer = TrajectoryReplay(kargs['load_trajectory'],
                                             kargs['batch_size'],
                                             args.update_frequency,
                                             args.replay_start_size)
        else:
            replay_buffer = ReplayBuffer(args.replay_buffer_size,
                                         modelOps.AGENT_HISTORY_LENGTH,
                                         args.update_frequency,
                                         args.replay_start_size,
                                         args.batch_size)
    #replay_buffer = NStepBuffer(modelOps.AGENT_HISTORY_LENGTH, 8)
    agent = DqnAgent(env.action_space, q_model, replay_buffer, rewproc,
                     agentOps, summary_writer)

    egreedyOps = EGreedyOps()
    if replay_buffer is not None:
        egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE
    egreedyOps.mode = args.mode
    egreedyOps.test_epsilon = args.test_epsilon
    #egreedyOps.FINAL_EXPLORATION_FRAME = 10000
    if args.mode == "train":
        egreedyOps.FINAL_EXPLORATION_FRAME = args.egreedy_final_step

    if args.mode == "train":
        if args.egreedy_decay < 1:
            egreedyOps.DECAY = args.egreedy_decay
            egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, agent)
        else:
            egreedyAgent = MultiEGreedyAgent(
                env.action_space,
                egreedyOps,
                agent,
                args.egreedy_props,
                args.egreedy_final,
                final_exp_frame=args.egreedy_final_step)
    else:
        egreedyAgent = EGreedyAgent(env.action_space, egreedyOps, agent)

    runner = Runner(env,
                    egreedyAgent,
                    proproc,
                    modelOps.AGENT_HISTORY_LENGTH,
                    max_step=args.max_step,
                    max_episode=args.max_episode)
    if replay_buffer is not None:
        runner.listen(replay_buffer, proproc)
    runner.listen(agent, None)
    runner.listen(egreedyAgent, None)
    if viewer is not None:
        runner.listen(viewer, None)

    if args.output_dir is not None:
        networkSaver = NetworkSaver(
            50000 if 'save_interval' not in kargs else kargs['save_interval'],
            args.output_dir, q_model.model)
        runner.listen(networkSaver, None)

    return runner, agent
Пример #18
0
from runner.runner import Runner
from algorithm.collective_algorithm import RealTSPAlgorithm


runner = Runner(RealTSPAlgorithm())
runner.load()
runner.run()