示例#1
0
文件: train.py 项目: chshong/rmaddpg
    def init(self, arglist, env):
        num_thread = 1
        tf_config = tf.ConfigProto(inter_op_parallelism_threads=num_thread,
                                   intra_op_parallelism_threads=num_thread)
        self.sess = tf.InteractiveSession(config=tf_config)

        # To make sure that training and testing are based on diff seeds
        if arglist.restore:
            create_seed(np.random.randint(2))
        else:
            create_seed(arglist.seed)

        # Create agent trainers
        self.obs_shape_n = [
            env.observation_space[i].shape for i in range(env.n)
        ]
        self.num_adversaries = min(env.n, arglist.num_adversaries)
        self.trainers = get_trainers(env, self.num_adversaries,
                                     self.obs_shape_n, arglist)
        print('Using good policy {} and adv policy {}'.format(
            arglist.good_policy, arglist.adv_policy))

        # Initialize
        U.initialize()

        # Load previous results, if necessary
        if arglist.load_dir == "":
            arglist.load_dir = arglist.save_dir
        if arglist.restore or arglist.benchmark:
            print('Loading previous state...')
            U.load_state(arglist.load_dir)

        self.episode_rewards = [0.0]  # sum of rewards for all agents
        self.agent_rewards = [[0.0]
                              for _ in range(env.n)]  # individual agent reward
        self.final_ep_rewards = []  # sum of rewards for training curve
        self.final_ep_ag_rewards = []  # agent rewards for training curve
        self.agent_info = [[[]]]  # placeholder for benchmarking info
        self.saver = tf.train.Saver()
        self.obs_n = env.reset()
        self.train_step = 0
        self.t_start = time.time()
        self.new_episode = True  # start of a new episode (used for replay buffer)
        self.start_saving_comm = False

        if arglist.graph:
            print("Setting up graph writer!")
            self.writer = tf.summary.FileWriter("learning_curves/graph",
                                                sess.graph)

        if arglist.analysis:
            print("Starting analysis on {}...".format(arglist.analysis))
            if arglist.analysis != 'video':
                analyze.run_analysis(arglist, env, self.trainers)
            return  # should be a single run
示例#2
0
    def post(self):
        im = self.__validate_post_args()
        if im is None:
            return

        result = analyze.run_analysis(im)
        self.write(utils.serialize(result))
        self.set_header('Content-Type', 'application/json; charset=UTF-8')
        self.set_status(200)
def add():
    #pdb.set_trace()
    if request.method == 'POST':
        # download html, parse text into individual words
        #print "generating response for URL: " + response.form['site']
        article_bag = bag.get_bag(request.form['site'])

        # analysis is done as part of generating output

        data = {
            'input' : request.form,
            'output' : analyze.run_analysis(article_bag)
        }
        js = json.dumps(data)

        return Response(js, status=200, mimetype='application/json')
    else:
        return "Must POST for this method to work"
示例#4
0
def test_analyze():
  prefix = "results/single_trial_test"
  res = a.run_analysis(prefix)
  print res
示例#5
0
def train(arglist):
    # To make sure that training and testing are based on diff seeds
    if arglist.restore:
        create_seed(np.random.randint(2))
    else:
        create_seed(arglist.seed)

    with U.single_threaded_session() as sess:
        # Create environment
        env = make_env(arglist.scenario, arglist, arglist.benchmark)
        # Create agent trainers
        obs_shape_n = [env.observation_space[i].shape for i in range(env.n)]

        num_adversaries = min(env.n, arglist.num_adversaries)
        trainers = get_trainers(env, num_adversaries, obs_shape_n, arglist)
        print('Using good policy {} and adv policy {}'.format(arglist.good_policy, arglist.adv_policy))

        # Initialize
        U.initialize()

        # Load previous results, if necessary
        if arglist.load_dir == "":
            arglist.load_dir = arglist.save_dir
        if arglist.restore or arglist.benchmark:
            print('Loading previous state...')
            U.load_state(arglist.load_dir)

        if arglist.analysis:
            print("Starting analysis on {}...".format(arglist.analysis))
            if arglist.analysis != 'video':
                analyze.run_analysis(arglist, env, trainers)
            return # should be a single run

        episode_rewards = [0.0]  # sum of rewards for all agents
        agent_rewards = [[0.0] for _ in range(env.n)]  # individual agent reward
        final_ep_rewards = []  # sum of rewards for training curve
        final_ep_ag_rewards = []  # agent rewards for training curve
        agent_info = [[[]]]  # placeholder for benchmarking info
        saver = tf.train.Saver()
        obs_n = env.reset()
        episode_step = 0
        train_step = 0
        t_start = time.time()
        new_episode = True # start of a new episode (used for replay buffer)
        start_saving_comm = False

        if arglist.graph:
            print("Setting up graph writer!")
            writer = tf.summary.FileWriter("learning_curves/graph",sess.graph)

        print('Starting iterations...')
        while True:
            if arglist.actor_lstm:
                # get critic input states
                p_in_c_n, p_in_h_n = get_lstm_states('p', trainers) # num_trainers x 1 x 1 x 64
            if arglist.critic_lstm:
                q_in_c_n, q_in_h_n = get_lstm_states('q', trainers) # num_trainers x 1 x 1 x 64

            # get action
            action_n = [agent.action(obs) for agent, obs in zip(trainers,obs_n)]
            if arglist.critic_lstm:
                # get critic output states
                p_states = [p_in_c_n, p_in_h_n] if arglist.actor_lstm else []
                update_critic_lstm(trainers, obs_n, action_n, p_states)
                q_out_c_n, q_out_h_n = get_lstm_states('q', trainers) # num_trainers x 1 x 1 x 64
            if arglist.actor_lstm:
                p_out_c_n, p_out_h_n = get_lstm_states('p', trainers) # num_trainers x 1 x 1 x 64

            # environment step
            new_obs_n, rew_n, done_n, info_n = env.step(action_n)
            episode_step += 1
            done = all(done_n)
            terminal = (episode_step >= arglist.max_episode_len)
            # collect experience
            for i, agent in enumerate(trainers):
                num_episodes = len(episode_rewards)
                # do this every iteration
                if arglist.critic_lstm and arglist.actor_lstm:
                    agent.experience(obs_n[i], action_n[i], rew_n[i],
                                    new_obs_n[i], done_n[i], # terminal,
                                    p_in_c_n[i][0], p_in_h_n[i][0],
                                    p_out_c_n[i][0], p_out_h_n[i][0],
                                    q_in_c_n[i][0], q_in_h_n[i][0],
                                    q_out_c_n[i][0], q_out_h_n[i][0], new_episode)
                elif arglist.critic_lstm:
                    agent.experience(obs_n[i], action_n[i], rew_n[i],
                                    new_obs_n[i], done_n[i], # terminal,
                                    q_in_c_n[i][0], q_in_h_n[i][0],
                                    q_out_c_n[i][0], q_out_h_n[i][0],new_episode)
                elif arglist.actor_lstm:
                    agent.experience(obs_n[i], action_n[i], rew_n[i],
                                    new_obs_n[i], done_n[i], # terminal,
                                    p_in_c_n[i][0], p_in_h_n[i][0],
                                    p_out_c_n[i][0], p_out_h_n[i][0],
                                    new_episode)
                else:
                    agent.experience(obs_n[i], action_n[i], rew_n[i],
                                    new_obs_n[i], done_n[i], # terminal,
                                    new_episode)

                obs_n = new_obs_n

            # Adding rewards
            if arglist.tracking:
                for i, a in enumerate(trainers):
                    if arglist.num_episodes - len(episode_rewards) <= 1000:
                        a.tracker.record_information("goal", np.array(env.world.landmarks[0].state.p_pos))
                        a.tracker.record_information("position",np.array(env.world.agents[i].state.p_pos))
                    a.tracker.record_information("ag_reward", rew_n[i])
                    a.tracker.record_information("team_dist_reward", info_n["team_dist"][i])
                    a.tracker.record_information("team_diff_reward", info_n["team_diff"][i])

            # Closing graph writer
            if arglist.graph:
                writer.close()
            for i, rew in enumerate(rew_n):
                episode_rewards[-1] += rew
                agent_rewards[i][-1] += rew

            if done or terminal:
                new_episode = True
                num_episodes = len(episode_rewards)
                obs_n = env.reset()
                # reset trainers
                if arglist.actor_lstm or arglist.critic_lstm:
                    for agent in trainers:
                        agent.reset_lstm()
                if arglist.tracking:
                    for agent in trainers:
                        agent.tracker.reset()
                episode_step = 0
                episode_rewards.append(0)
                for a in agent_rewards:
                    a.append(0)
                agent_info.append([[]])
            else:
                new_episode=False

            # increment global step counter
            train_step += 1

            # for benchmarking learned policies
            if arglist.benchmark:
                for i, info in enumerate(info_n):
                    agent_info[-1][i].append(info_n['n'])
                if train_step > arglist.benchmark_iters and (done or terminal):
                    file_name = arglist.benchmark_dir + arglist.exp_name + '.pkl'
                    print('Finished benchmarking, now saving...')
                    with open(file_name, 'wb') as fp:
                        pickle.dump(agent_info[:-1], fp)
                    break
                continue

            # update all trainers, if not in display or benchmark mode
            loss = None

            # get same episode sampling
            if arglist.sync_sampling:
                inds = [random.randint(0, len(trainers[0].replay_buffer._storage)-1) for i in range(arglist.batch_size)]
            else:
                inds = None

            for agent in trainers:
                # if arglist.lstm:
                #     agent.preupdate(inds=inds)
                # else:
                agent.preupdate(inds)
            for agent in trainers:
                loss = agent.update(trainers, train_step)
                if loss is None: continue

            # for displaying learned policies
            if arglist.display:
                env.render()
                # continue

            # save model, display training output
            if terminal and (len(episode_rewards) % arglist.save_rate == 0):
                U.save_state(arglist.save_dir, saver=saver)
                # print statement depends on whether or not there are adversaries
                if num_adversaries == 0:
                    print("steps: {}, episodes: {}, mean episode reward: {}, time: {}".format(
                        train_step, len(episode_rewards), np.mean(episode_rewards[-arglist.save_rate:]), round(time.time()-t_start, 3)))
                else:
                    print("steps: {}, episodes: {}, mean episode reward: {}, agent episode reward: {}, time: {}".format(
                        train_step, len(episode_rewards), np.mean(episode_rewards[-arglist.save_rate:]),
                        [np.mean(rew[-arglist.save_rate:]) for rew in agent_rewards], round(time.time()-t_start, 3)))
                t_start = time.time()
                # Keep track of final episode reward
                final_ep_rewards.append(np.mean(episode_rewards[-arglist.save_rate:]))
                for rew in agent_rewards:
                    final_ep_ag_rewards.append(np.mean(rew[-arglist.save_rate:]))

            # saves final episode reward for plotting training curve later
            if len(episode_rewards) > arglist.num_episodes:
                # U.save_state(arglist.save_dir, saver=saver)
                if arglist.tracking:
                    for agent in trainers:
                        agent.tracker.save()

                rew_file_name = "rewards/" + arglist.commit_num + "_rewards.pkl"
                with open(rew_file_name, 'wb') as fp:
                    pickle.dump(final_ep_rewards, fp)
                agrew_file_name = "rewards/" + arglist.commit_num + "_agrewards.pkl"
                # agrew_file_name = arglist.plots_dir + arglist.exp_name + '_agrewards.pkl'
                with open(agrew_file_name, 'wb') as fp:
                    pickle.dump(final_ep_ag_rewards, fp)
                print('...Finished total of {} episodes.'.format(len(episode_rewards)))
                break
示例#6
0
def kickoff():
	run_analysis(fbid, start_date, end_date)
	summary = pd.read_csv('%s_facebook_analysis.csv' % fbid)
	return render_template('index.html.jinja', summary = summary)