class ModelInterface(): def __init__(self): self.agent = Agent(state_size=STATE_SIZE, action_size=ACTION_SIZE, random_seed=10) self.agent.actor_local.load_state_dict( torch.load('model/checkpoint_actor.pth', map_location='cpu')) self.agent.critic_local.load_state_dict( torch.load('model/checkpoint_critic.pth', map_location='cpu')) self.agent.actor_local.eval() self.agent.critic_local.eval() def get_action_q(self, state, action): s = np.zeros((128, 6)) s[0, :] = state a = np.zeros((128, 2)) a[0, :] = action state = torch.Tensor(s) action = torch.Tensor(a) return self.agent.critic_local(state, action).detach().numpy()[0, 0] def get_action(self, state): return self.agent.act(state)
def __init__(self): self.agent = Agent(state_size=STATE_SIZE, action_size=ACTION_SIZE, random_seed=10) self.agent.actor_local.load_state_dict( torch.load('model/checkpoint_actor.pth', map_location='cpu')) self.agent.critic_local.load_state_dict( torch.load('model/checkpoint_critic.pth', map_location='cpu')) self.agent.actor_local.eval() self.agent.critic_local.eval()
def train_sarsa(num_trials, num_episodes, lr, gamma, total_trials, alarm_consistency, epsilon): undiscounted_returns = np.zeros(shape=(total_trials, num_episodes)) decayed_epsilon = epsilon environment = Environment.Environment(Constants.observations_file, Constants.log_crash_prior, alarm_consistency) agent = Agent.Agent(epsilon=epsilon) for trial in xrange(num_trials[0], num_trials[1]): agent.reset(epsilon) for episode in xrange(num_episodes): # if episode % Constants.epsilon_decay_episode: # decayed_epsilon /= 2 # agent.set_epsilon(decayed_epsilon) print "Trial: %d, Episode: %d" % (trial, episode) environment.reset() current_state = environment.current_state current_action = agent.get_action(current_state) total_undiscounted_reward = 0 while False or not environment.done: next_state, reward, done = environment.step(current_action) next_action = agent.get_action(next_state) # print current_state, current_action, reward, next_state, next_action agent.sarsa_update(current_state, current_action, reward, next_state, next_action, lr, gamma) current_state = next_state current_action = next_action total_undiscounted_reward += reward undiscounted_returns[trial][episode] = total_undiscounted_reward return undiscounted_returns
def add_rep(): agency = request.form.get("agency") agent = request.form.get("agent") email = request.form.get("email") phone = request.form.get("phone") user_id = session["current_user"] user = User.query.get(user_id) user_agencies = [agency.agency for agency in user.agencies] print(user_agencies) print(agency) if agency not in user_agencies: kwargs = dict(user_id=user_id, agency=agency) db.session.add(Agency(**kwargs)) db.session.commit() print("agency added") agency_id = Agency.query.filter_by(agency=agency).first().agency_id kwargs = dict(user_id=user_id, agency_id=agency_id, agent=agent, email=email, phone=phone) db.session.add(Agent(**kwargs)) db.session.commit() return redirect("/users/{}/my_contact".format(user_id))
def load_agents(): """Load agents from seed data into database""" with open("seed_data/agents.txt") as agents: for row in agents: agent = row.rstrip().split("|") hidden = True if agent[6] == "True" else False kwargs = dict( agent_id = agent[0], user_id = agent[1], agency_id = agent[2], agent = agent[3], email = agent[4], phone = agent[5], hidden = hidden ) keys_to_remove = [] for key in kwargs.keys(): if kwargs[key] == "": keys_to_remove.append(key) for key in keys_to_remove: del kwargs[key] agent = Agent(**kwargs) db.session.add(agent) db.session.commit()
def main(): config = Config() print('*' * 20, config.envname, config.method, '*' * 20) env = config.env if config.seed: env.seed(config.seed) torch.manual_seed(config.seed) agent = Agent(env.observation_space.shape[0], env.action_space.shape[0]).to(config.device) expert = load_policy(config.expert_path + config.envname + '.pkl') method = config.method if method == 'BC': agent = BehavioralCloning(config, agent, expert) elif method == 'DA': agent = DAgger(config, agent, expert) else: NotImplementedError(method) avrg_mean, avrg_std = Eval(config, expert) print('[expert] avrg_mean:{:.2f} avrg_std:{:.2f}'.format( avrg_mean, avrg_std)) avrg_mean, avrg_std = Eval(config, agent) print('[agent] avrg_mean:{:.2f} avrg_std:{:.2f}'.format( avrg_mean, avrg_std))
def __init__(self, **kwargs): super(SupervisedTrainer, self).__init__(**kwargs) self.targets = tf.placeholder(tf.int32, [None, None]) self.targets_length = tf.placeholder(tf.int32, [None]) self.slot_targets = tf.placeholder(tf.int32, [None, None]) self.slot_any_targets = tf.placeholder(tf.int32, [None, None]) self.action_targets = tf.placeholder(tf.int32, [None]) self.value_targets = tf.placeholder(tf.float32, [None]) self._decoder_sampling_p = tf.placeholder(tf.float32, []) self._loss_mixture_weights = tf.placeholder(tf.float32, [None]) self._dropout = tf.placeholder(tf.float32, []) self.agent = Agent(n_slots=self._n_slots, n_actions=self._n_actions, word_embeddings_shape=self._word_embeddings_shape, hidden_size=self._hidden_size, dropout=self._dropout, decoder_helper_initializer=self._decoder_helper()) self._loss() self._optimizer() self._metrics_writers() print('Dilatation rates:', self.DILATATION_RATES)
def dashboard_summary(): summary = { 'agent_count': Agent.count(), 'service_count': SInfo.count(), 'alarm_count': 0, 'sample_count': 0 } return dump_json(summary)
def _prepare(self, config): self.env = make_vec_env(SerialVecEnv, make_gym_env, config['env.id'], config['train.N'], 0) self.env = VecClipAction(self.env) if config['env.standardize']: self.env = VecStandardize(self.env, use_obs=True, use_reward=False, clip_obs=10.0, clip_reward=10.0, gamma=0.99, eps=1e-08) self.env_spec = EnvSpec(self.env) self.device = torch.device('cpu') self.agent = Agent(config, self.env_spec, self.device)
def _build_agent(self): self.agent = Agent( word_embeddings_shape=[len(self._word_embeddings), 300], n_slots=len(self._slot_embeddings), n_actions=len(self._action_embbeddings), hidden_size=self._hidden_size) self.agent.saver.restore(self._sess, self._checkpoint)
def get_agents(): mhost = os.getenv('MASTER_HOST', None) or socket.gethostname() mport = _CONFIG['master']['server']['port'] master_addr = '%s:%s' % (mhost, mport) agents = Agent.query(orderby='last_msg_at DESC') thresh = datetime.utcnow() - timedelta(minutes=5) for a in agents: a.status = 'active' if a.last_msg_at and a.last_msg_at >= thresh else 'inactive' return dump_json({'agents': agents, 'master_addr': master_addr})
def main(): data_size = 2000 agent = Agent(1, envs).to(device) optimizer = optim.Adam(agent.parameters(), lr=0.0005, eps=1e-5) for epoch in range(data_size): rollouts = get_rollouts(trajectories, 64) losses = [] for states, rewards, actions in rollouts: x = torch.from_numpy(states).float() y = torch.from_numpy(actions).long() loss = train(agent, x, y, optimizer, epochs=2) losses.append(loss) if epoch % 100 == 0: if not os.path.exists(f"models/"): os.makedirs(f"models/") torch.save(agent.state_dict(), f"models/agent-il.pt") print(f"epoch: {epoch} loss: {np.mean(losses)}")
def main(): args = get_args() args.critic_layers = literal_eval(args.critic_layers) args.actor_layers = literal_eval(args.actor_layers) save_dir = os.path.join('tests') if not os.path.exists(save_dir): os.makedirs(save_dir) state_transform = NormState(args.prosthetic) # state_transform = StateVelCentr(obstacles_mode='standard', # exclude_centr=True, # vel_states=[]) env = RunEnv2(state_transform, integrator_accuracy=args.accuracy, model=args.modeldim, prosthetic=args.prosthetic, difficulty=args.difficulty, skip_frame=1) env.change_model(args.modeldim, args.prosthetic, args.difficulty) num_actions = env.get_action_space_size() del env model_params = { 'state_size': state_transform.state_size, 'num_act': num_actions, 'gamma': 0, 'actor_layers': args.actor_layers, 'critic_layers': args.critic_layers, 'actor_lr': 0, 'critic_lr': 0, 'layer_norm': args.layer_norm } actor_fn, params_actor, params_crit, actor_lr, critic_lr = \ build_model_test(**model_params) actor = Agent(actor_fn, params_actor, params_crit) actor.load(args.weights) weights = [p.get_value() for p in params_actor] global_step = 0 test_agent(args, state_transform, args.episodes, actor, weights, global_step, save_dir)
def submit_agent(args, model_params): ########################################################## actor_fn, params_actor, params_crit = build_model_test(**model_params) weights = [p.get_value() for p in params_actor] actor = Agent(actor_fn, params_actor, params_crit) actor.set_actor_weights(weights) if args.weights is not None: actor.load(args.weights) env = RunEnv2(model=args.modeldim, prosthetic=args.prosthetic, difficulty=args.difficulty, skip_frame=3) # Settings remote_base = "http://grader.crowdai.org:1729" token = args.token client = Client(remote_base) # Create environment di = client.env_create(token, env_id="ProstheticsEnv") stat = [] ep = 1 ii = 0 reward_sum = 0 print('\n\n#################################################\n\n') while True: ii += 1 proj = env.dict_to_vec(di) action = actor.act(proj) action += np.random.rand(len(action)) / 10. [di, reward, done, info] = client.env_step(action.tolist(), True) reward_sum += reward print('ep: ' + str(ep) + ' >> step: ' + str(int(ii)) + ' >> reward: ' + format(reward, '.2f') + ' \t' + str(int(reward_sum)) + '\t >> pelvis X Y Z: \t' + format(di['body_pos']['pelvis'][0], '.2f') + '\t' + format(di['body_pos']['pelvis'][1], '.2f') + '\t' + format(di['body_pos']['pelvis'][2], '.2f')) if done: print('\n\n#################################################\n\n') stat.append([ep, ii, reward_sum]) di = client.env_reset() ep += 1 ii = 0 reward_sum = 0 if not di: break for e in stat: print(e) print('\n\nclient.submit()\n\n') client.submit() ########################################################## print('\n\n#################################################\n\n') print('DONE\n\n')
def _build_agent(self): self.agent = Agent( word_embeddings_shape=[len(self._word_embeddings), 300], n_slots=len(self._slot_embeddings), n_actions=len(self._action_embbeddings), hidden_size=self._hidden_size, scope='target_agent', decoder_max_iter=50) self.agent.saver.restore(self._sess, self._checkpoint)
def test_agent(args, testing, state_transform, num_test_episodes, model_params, weights, best_reward, updates, global_step, save_dir): env = RunEnv2(state_transform, visualize=args.test, integrator_accuracy=args.accuracy, model=args.modeldim, prosthetic=args.prosthetic, difficulty=args.difficulty, skip_frame=1) test_rewards = [] train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = \ build_model(**model_params) actor = Agent(actor_fn, params_actor, params_crit) actor.set_actor_weights(weights) if args.weights is not None: actor.load(args.weights) for ep in range(num_test_episodes): seed = random.randrange(2**32 - 2) state = env.reset(seed=seed, difficulty=2) test_reward = 0 while True: state = np.asarray(state, dtype='float32') action = actor.act(state) state, reward, terminal, _ = env._step(action) test_reward += reward if terminal: break test_rewards.append(test_reward) mean_reward = np.mean(test_rewards) std_reward = np.std(test_rewards) test_str ='global step {}; test reward mean: {:.2f}, std: {:.2f}, all: {} '.\ format(global_step.value, float(mean_reward), float(std_reward), test_rewards) print(test_str) with open(os.path.join(save_dir, 'test_report.log'), 'a') as f: f.write(test_str + '\n') if mean_reward > best_reward.value or mean_reward > 30 * env.reward_mult: if mean_reward > best_reward.value: best_reward.value = mean_reward fname = os.path.join( save_dir, 'weights_updates_{}_reward_{:.2f}.pkl'.format( updates.value, mean_reward)) actor.save(fname) testing.value = 0
class ESWorker(BaseESWorker): def prepare(self): self.agent = None def _prepare(self, config): self.env = make_vec_env(SerialVecEnv, make_gym_env, config['env.id'], config['train.N'], 0) self.env = VecClipAction(self.env) if config['env.standardize']: self.env = VecStandardize(self.env, use_obs=True, use_reward=False, clip_obs=10.0, clip_reward=10.0, gamma=0.99, eps=1e-08) self.env_spec = EnvSpec(self.env) self.device = torch.device('cpu') self.agent = Agent(config, self.env_spec, self.device) def f(self, config, solution): if self.agent is None: self._prepare(config) solution = torch.from_numpy(np.asarray(solution)).float().to( self.device) assert solution.numel() == self.agent.num_params # Load solution params to agent self.agent.from_vec(solution) runner = EpisodeRunner(config, self.agent, self.env) with torch.no_grad(): D = runner(self.env_spec.T) mean_return = D.numpy_rewards.sum(-1).mean() # ES does minimization, so use negative returns function_value = -mean_return return function_value
def del_agent(aid): connectType = request.args.get('connect_type') u = request.args.get('username') p = request.args.get('password') agent = Agent.get_by_id(aid) logging.info('remove agent on %s@%s', u, agent) with NodeConnector(agent.host, u, p) as nc: nc.remove_agent() agent.remove() logging.info('agent removed on %s@%s finished.', u, agent) return dump_json(agent)
def infection_events(env: simpy.Environment, infected: Agent, rng: np.random.Generator): print(f'@t={env.now} - {infected}->{State.INFECTED.name}') infected.state = State.INFECTED yield env.timeout(delay=rng.normal(loc=4.6, scale=0.3)) print(f'@t={env.now} - {infected}->{State.INFECTIOUS.name}') infected.state = State.INFECTIOUS if rng.uniform() < p_asymptomatic: # Asymptomatic yield env.timeout(delay=rng.normal(loc=6.5, scale=0.4)) print(f'@t={env.now} - {infected}->{State.REMOVED.name}') infected.state = State.REMOVED else: # Symptomatic yield env.timeout(delay=0.5) print(f'@t={env.now} - {infected}->{State.SYMPTOMATIC_INFECTIOUS.name}') infected.state = State.SYMPTOMATIC_INFECTIOUS yield env.timeout(delay=rng.normal(loc=6.0, scale=0.4)) print(f'@t={env.now} - {infected}->{State.REMOVED.name}') infected.state = State.REMOVED
def test_agent(args, testing, num_test_episodes, model_params, weights, best_reward, updates, global_step, save_dir): env = RunEnv2(model=args.modeldim, prosthetic=args.prosthetic, difficulty=args.difficulty, skip_frame=3) test_rewards_all = [] test_pelvis_X_all = [] train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model( **model_params) actor = Agent(actor_fn, params_actor, params_crit) actor.set_actor_weights(weights) # if args.weights is not None: # actor.load(args.weights) for ep in range(num_test_episodes): seed = random.randrange(2**32 - 2) state = env.reset(seed=seed, difficulty=0) test_reward = 0 while True: state = np.asarray(state, dtype='float32') action = actor.act(state) state, reward, terminal, info = env._step(action) test_reward += reward if terminal: break test_rewards_all.append(test_reward) test_pelvis_X_all.append(info['pelvis_X']) test_reward_mean = np.mean(test_rewards_all) mean_pelvis_X = np.mean(test_pelvis_X_all) std_reward = np.std(test_rewards_all) test_str ='global step {}; test_reward_mean: {:.2f}, test_rewards_all: {}; mean_pelvis_Xmean: {:.2f}, test_pelvis_X_all: {} '.\ format(global_step.value, float(test_reward_mean), test_rewards_all, float(mean_pelvis_X), test_pelvis_X_all) print(test_str) try: with open(os.path.join(save_dir, 'test_report.log'), 'a') as f: f.write(test_str + '\n') except: print('#############################################') print('except » f.write(test_str )') print('#############################################') if test_reward_mean > best_reward.value or test_reward_mean > 30 * env.reward_mult: if test_reward_mean > best_reward.value: best_reward.value = test_reward_mean fname = os.path.join( save_dir, 'weights_updates_{}_reward_{:.1f}_pelvis_X_{:.1f}.pkl'.format( updates.value, test_reward_mean, mean_pelvis_X)) actor.save(fname) testing.value = 0
def main(): """ Agent setting """ agent = Agent() """ Learning """ agent.learn() """ Test """ agent.test() agent.sess.close()
def load_agents(): """ Load agents from agents.txt to database""" for row in open("seed_data/agents.txt"): row = row.strip() id, name, password, phone_number, email, tier = row.split("|") agent = Agent(id=id.strip(), name=name.strip(), password=password.strip(), phone_number=phone_number.strip(), email=email.strip(), tier=int(tier.strip())) db.session.add(agent) db.session.commit()
def main(): env = gym.make("LunarLander-v2") agent = Agent(env) agent.load_state_dict(torch.load("./models/agent.pt")) agent.eval() obs = env.reset() done = False for i in range(10000): env.render() obs = torch.from_numpy(obs).float() action, _, _ = agent.get_action(obs) obs, rew, done, info = env.step(action.cpu().numpy()) sleep(0.001) if done: obs = env.reset()
def test_agent(args, num_test_episodes, model_params): env = RunEnv2(visualize=True, model=args.modeldim, prosthetic=args.prosthetic, difficulty=args.difficulty, skip_frame=3) test_rewards = [] # train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(**model_params) # actor_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(**model_params) actor_fn, params_actor, params_crit = build_model_test(**model_params) weights = [p.get_value() for p in params_actor] actor = Agent(actor_fn, params_actor, params_crit) actor.set_actor_weights(weights) if args.weights is not None: actor.load(args.weights) for ep in range(num_test_episodes): seed = random.randrange(2**32 - 2) state = env.reset(seed=seed, difficulty=0) test_reward = 0 while True: state = np.asarray(state, dtype='float32') # state = np.concatenate((state,state,state))[:390] # ndrw tmp action = actor.act(state) # ndrw tmp # if args.prosthetic: # action = np.zeros(19) # ndrw tmp # else: # action = np.zeros(22) # ndrw tmp state, reward, terminal, _ = env._step(action) test_reward += reward if terminal: break test_rewards.append(test_reward) mean_reward = np.mean(test_rewards) std_reward = np.std(test_rewards) global_step = 0 test_str ='global step {}; test reward mean: {:.2f}, std: {:.2f}, all: {} '.\ format(global_step.value, float(mean_reward), float(std_reward), test_rewards) print(test_str) with open(os.path.join('test_report.log'), 'a') as f: f.write(test_str + '\n')
def main(): data_size = [1, 100, 100, 100, 100, 100, 100] for size in data_size: x = [] y = [] student = Agent(env) for episode in range(size): states, actions, rewards = generate_rollout(expert, env) x.extend(states) y.extend(actions) x = torch.stack(x) y = torch.cat(y) train(student, x, y) episode_rewards = [] for _ in range(50): states, actions, rewards = generate_rollout(student, env) total_reward = np.sum(rewards) episode_rewards.append(total_reward) print( f'Number of training rollouts: {str(size)}: reward mean: {str(np.mean(episode_rewards))} \n' )
Created on Tue Oct 16 23:57:38 2018 @author: Abhista """ import gym from model import Agent from utils import plotLearning from gym import wrappers import numpy as np if __name__ == '__main__': env = gym.make('SpaceInvaders-v0') brain = Agent(gamma=0.95, epsilon=1.0, alpha=0.003, maxMemorySize=5000, replace=None) while brain.memCntr < brain.memSize: observation = env.reset() done = False while not done: # 0 no action, 1 fire, 2 move right, 3 move left, 4 move right fire, 5 move left fire action = env.action_space.sample() observation_, reward, done, info = env.step(action) if done and info['ale.lives'] == 0: reward = -100 brain.storeTransition( np.mean(observation[15:200, 30:125], axis=2), action, reward, np.mean(observation_[15:200, 30:125], axis=2)) observation = observation_
def train(): print() print("RUNNING THE MINECRAFT SIMULATION") print() RENDER = False # RENDER = True LOAD_MODEL = False # LOAD_MODEL = True start_eps = 0.8 WRAP = False GRID_SIZE = 7 LOCAL_GRID_SIZE = 9 # Has to be an odd number (I think...) SEED = 1 FOOD_COUNT = 1 OBSTACLE_COUNT = 0 # MAP_PATH = "./Maps/Grid{}/map2.txt".format(GRID_SIZE) MAP_PATH = None env = Environment(wrap=WRAP, grid_size=GRID_SIZE, rate=80, max_time=30, food_count=FOOD_COUNT, obstacle_count=OBSTACLE_COUNT, lava_count=0, zombie_count=0, action_space=5, map_path=MAP_PATH) brain = Agent(gamma=0.99, epsilon=start_eps, alpha=0.01, maxMemorySize=10000, replace=10) if LOAD_MODEL: try: path = "./Models/Torch/my_model.pth" brain.load_model(path) print("Model loaded from path:", path) print() brain.EPSILON = 0.05 except Exception: print('Could not load model') print('Press <ENTER> to continue with random initialision') print() input() # quit() if RENDER: env.prerender() games_played = 0 print("INITIALISING REPLAY MEMORY") while brain.memCntr < brain.memSize: observation, _ = env.reset() # print(observation) # observation = env.local_state_vector_3D() done = False if RENDER: env.render() # Render first screen while not done: action = brain.chooseAction(observation) observation_, reward, done, info = env.step(action) # observation_ = env.local_state_vector_3D() # print(observation_) if done: # reward = -1 games_played += 1 brain.storeTransition(observation, action, reward, done, observation_) observation = observation_ if RENDER: env.render() print("Done initialising replay memory. Played {} games".format( games_played)) scores = [] epsHistory = [] numGames = 100000 print_episode = 100 batch_size = 16 avg_score = 0 avg_time = 0 avg_loss = 0 print() print("TRAINING MODEL") print() for i in range(numGames): epsHistory.append(brain.EPSILON) done = False observation, _ = env.reset() # observation = env.local_state_vector_3D() score = 0 lastAction = 0 if RENDER: env.render() # Render first screen while not done: action = brain.chooseAction(observation) observation_, reward, done, info = env.step(action) # observation_ = env.local_state_vector_3D() # score += reward # print(observation_) brain.storeTransition(observation, action, reward, done, observation_) observation = observation_ loss = brain.learn(batch_size) lastAction = action if RENDER: env.render() avg_score += info["score"] avg_time += info["time"] avg_loss += loss.item() if i % print_episode == 0 and not i == 0 or i == numGames - 1: print("Episode", i, "\tavg time: {0:.3f}".format(avg_time / print_episode), "\tavg score: {0:.3f}".format(avg_score / print_episode), "\tavg loss: {0:.3f}".format(avg_loss / print_episode), "\tepsilon: %.4f" % brain.EPSILON) brain.save_model("./Models/Torch/my_model{}.pth".format(i)) avg_loss = 0 avg_score = 0 avg_time = 0 # scores.append(score) # print("score:", score) brain.save_model("./Models/Torch/my_model.pth")
for k in range(5): done = False state = game.reset() while not done: game.render() #vid.capture_frame() if not arbit: action = agent.get_action(state, sample=False) else: action = random.choice(range(action_size)) next_state, reward, done, _ = game.step(action) state = next_state time.sleep(0.015) if __name__ == '__main__': #pip install gym[Box2D] -- maybe needed game = gym.make('LunarLander-v2') #vid = gym.wrappers.monitoring.video_recorder.VideoRecorder(game, path = './random.mp4') action_size = game.action_space.n print("Action size ", action_size) state_size = game.observation_space.shape[0] print("State size ", state_size) num_episodes = 2000 agent = Agent(state_size, action_size, gamma=0.99, fc1=64, fc2=64) #train(game, num_episodes, agent) infer(game, vid, agent, arbit=False) #game.close()
def run_agent(model_params, weights, state_transform, data_queue, weights_queue, process, global_step, updates, best_reward, param_noise_prob, save_dir, max_steps=10000000): train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = \ build_model(**model_params) actor = Agent(actor_fn, params_actor, params_crit) actor.set_actor_weights(weights) env = RunEnv2(state_transform, max_obstacles=config.num_obstacles, skip_frame=config.skip_frames) random_process = OrnsteinUhlenbeckProcess(theta=.1, mu=0., sigma=.2, size=env.noutput, sigma_min=0.05, n_steps_annealing=1e6) # prepare buffers for data states = [] actions = [] rewards = [] terminals = [] total_episodes = 0 start = time() action_noise = True while global_step.value < max_steps: seed = random.randrange(2**32-2) state = env.reset(seed=seed, difficulty=2) random_process.reset_states() total_reward = 0. total_reward_original = 0. terminal = False steps = 0 while not terminal: state = np.asarray(state, dtype='float32') action = actor.act(state) if action_noise: action += random_process.sample() next_state, reward, next_terminal, info = env.step(action) total_reward += reward total_reward_original += info['original_reward'] steps += 1 global_step.value += 1 # add data to buffers states.append(state) actions.append(action) rewards.append(reward) terminals.append(terminal) state = next_state terminal = next_terminal if terminal: break total_episodes += 1 # add data to buffers after episode end states.append(state) actions.append(np.zeros(env.noutput)) rewards.append(0) terminals.append(terminal) states_np = np.asarray(states).astype(np.float32) data = (states_np, np.asarray(actions).astype(np.float32), np.asarray(rewards).astype(np.float32), np.asarray(terminals), ) weight_send = None if total_reward > best_reward.value: weight_send = actor.get_actor_weights() # send data for training data_queue.put((process, data, weight_send, total_reward)) # receive weights and set params to weights weights = weights_queue.get() report_str = 'Global step: {}, steps/sec: {:.2f}, updates: {}, episode len {}, ' \ 'reward: {:.2f}, original_reward {:.4f}; best reward: {:.2f} noise {}'. \ format(global_step.value, 1. * global_step.value / (time() - start), updates.value, steps, total_reward, total_reward_original, best_reward.value, 'actions' if action_noise else 'params') print(report_str) with open(os.path.join(save_dir, 'train_report.log'), 'a') as f: f.write(report_str + '\n') actor.set_actor_weights(weights) action_noise = np.random.rand() < 1 - param_noise_prob if not action_noise: set_params_noise(actor, states_np, random_process.current_sigma) # clear buffers del states[:] del actions[:] del rewards[:] del terminals[:] if total_episodes % 100 == 0: env = RunEnv2(state_transform, max_obstacles=config.num_obstacles, skip_frame=config.skip_frames)
p.flip_left_right(probability=0.5) p.zoom_random(probability=0.5, percentage_area=0.95) p.resize(probability=1.0, width=246, height=205) p.sample(n_augment) print("Data augmentation done on directory '%s'." % (com_anel_directory)) print("Found %d items on directory '%s', starting data augmentation..." % (len(glob.glob(sem_anel_directory + "/*.bmp")), sem_anel_directory)) p = Augmentor.Pipeline(sem_anel_directory, output_directory="augmented") p.rotate(probability=1, max_left_rotation=5, max_right_rotation=5) p.flip_left_right(probability=0.5) p.zoom_random(probability=0.5, percentage_area=0.95) p.resize(probability=1.0, width=246, height=205) p.sample(n_augment) print("Data augmentation done on directory '%s'." % (sem_anel_directory)) model = Agent() imgs = [] labels = [] for filename in glob.glob(com_anel_directory + "/augmented/*.bmp"): img = cv2.imread(filename) img = img/255.0 imgs.append(img) labels.append(1.0) for filename in glob.glob(sem_anel_directory + "/augmented/*.bmp"): img = cv2.imread(filename) img = img/255.0 imgs.append(img) labels.append(0.0)