def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 1000000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters (0.01)
def __init__(self, state_size, action_size, random_seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) np.random.seed(random_seed) # set the numpy seed # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed, device)
def __init__(self, state_size, action_size, random_seed, actor_layers, critic_layers): """ Initialize an Agent object. Params ====== state_size (int): size of the environment state action_size (int): size of the environment action random_seed (int): seed for the random actor_layers (array[int]): array containing the size of each layer of the actor network critic_layers (array[int]): array containing the size of each layer of the critic network """ self.state_size = state_size self.action_size = action_size self.random_seed = random_seed random.seed(random_seed) np.random.seed(random_seed) # Actor print(f'Agent running on {DEVICE}') self.actor_local = Actor(self.state_size, self.action_size, self.random_seed, *actor_layers).to(DEVICE) self.actor_target = Actor(self.state_size, self.action_size, self.random_seed, *actor_layers).to(DEVICE) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic self.critic_local = Critic(self.state_size, self.action_size, self.random_seed, *critic_layers).to(DEVICE) self.critic_target = Critic(self.state_size, self.action_size, self.random_seed, *critic_layers).to(DEVICE) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # Noise self.noise = OrsnteinUhlenbeck(self.action_size, self.random_seed) # Replay Buffer self.memory = ReplayBuffer(self.action_size, BUFFER_SIZE, BATCH_SIZE, self.random_seed)
def __init__(self, env, hp): self.env = env self.hp = hp self.critic = Critic(env.observation_space.shape[0], env.action_space.shape[0], hp) self.target_critic = Critic(env.observation_space.shape[0], env.action_space.shape[0], hp) self.actor = Actor(env.observation_space.shape[0], env.action_space.shape[0], env.action_space.high[0], hp) self.target_actor = Actor(env.observation_space.shape[0], env.action_space.shape[0], env.action_space.high[0], hp) self.dataset = ReplayBuffer(self.hp['batch_size'], self.hp['max_buffer_size']) self.noise = OrnsteinUhlenbeckProcess(env.action_space.shape[0], sigma=self.hp['noise_sigma']) self.noise.reset_states()
def add_actor(): """ Add new actor """ data = get_request_data() ### YOUR CODE HERE ### # creating date cr_date = data['date_of_birth'] cr_date = dt.strptime(cr_date, '%d.%m.%Y') cr_date = cr_date.strftime("%a, %d %b %Y %H:%M:%S" + " GMT") # use this for 200 response code new_record = Actor(name = data['name'], gender = data['gender'], date_of_birth = cr_date) new_actor = {k: v for k, v in new_record.__dict__.items() if k in ACTOR_FIELDS} return make_response(jsonify(new_actor), 200)
def create_actors_acts_in_and_directors(): movies = Movies.load_all() for movie in movies: moviee = tmdb.Movies(movie.id) response = moviee.credits() for person in moviee.crew: if person['job'] == 'Director': director = Director(movie.id, person['name'], person['profile_path']) if not Director.find_by_movie_id(movie.id): director.save_to_db() for person in moviee.cast[:4]: if not Actor.load_by_id(person['id']): actor = Actor(person['id'], person['name'], person['profile_path']) actor.save_to_db() relation = Movie_Actors(movie.id, person['id']) relation.save_to_db()
def load(self, transformed_response): print "loading...\n" result = {} if transformed_response.get('Person'): self.entities += [Person().extract(transformed_response['Person'])] if transformed_response.get('Author'): self.entities += [Author().extract(transformed_response['Author'])] if transformed_response.get('Actor'): self.entities += [Actor().extract(transformed_response['Actor'])] if transformed_response.get('BusinessPerson'): self.entities += [ BusinessPerson().extract( transformed_response['BusinessPerson']) ] if transformed_response.get('League'): self.entities += [League().extract(transformed_response['League'])] if transformed_response.get('SportsTeam'): self.entities += [ SportsTeam().extract(transformed_response['SportsTeam']) ] if transformed_response.get('Description'): self.entities += [ Description().extract(transformed_response['Description']) ] header = str(self.query) + "(" for entity in self.entities: if entity.__class__.__name__ != "Description": header = header + " " + str(entity.__class__.__name__) header = header + ")" print "----------------------------------" print header print "----------------------------------" for entity in self.entities: entity.print_box() result[entity.__class__.__name__] = entity return result
'critic_threshold': 17.5, 'critic_suffices_required': 1, 'critic_steps_start': 200, 'critic_steps_end': 200, 'actor_steps_start': 1000, 'actor_steps_end': 1000, 'batch_size': 256, 'seed': 123456, 'replay_fill_threshold': 1., 'random_exploration': True, 'test_iterations': 30, 'validation_epoch_mod': 3, } # configuring the environment environment = gym.make('Humanoid-v3') # environment._max_episode_steps = 600 # setting up the training components agent = AWRAgent actor = Actor() critic = Critic() # training and testing Training.train((actor, critic), agent, environment, hyper_ps, save=True, debug_type=DebugType.NONE)
def main(): env = DialogEnvironment() experiment_name = args.logdir.split('/')[1] #model name torch.manual_seed(args.seed) #TODO actor = Actor(hidden_size=args.hidden_size,num_layers=args.num_layers,device='cuda',input_size=args.input_size,output_size=args.input_size) actor.to(device) actor_optim = optim.Adam(actor.parameters(), lr=args.learning_rate) # load demonstrations writer = SummaryWriter(args.logdir) if args.load_model is not None: #TODO saved_ckpt_path = os.path.join(os.getcwd(), 'save_model', str(args.load_model)) ckpt = torch.load(saved_ckpt_path) actor.load_state_dict(ckpt['actor']) episodes = 0 for iter in range(args.max_iter_num): actor.eval() steps = 0 scores = [] states = [] expert_actions = [] while steps < args.batch_size: scores = [] similarity_scores = [] state, expert_action, raw_state, raw_expert_action = env.reset() score = 0 similarity_score = 0 state = state[:args.seq_len,:] expert_action = expert_action[:args.seq_len,:] state = state.to(device) expert_action = expert_action.to(device) states.append(state) expert_actions.append(expert_action) similarity_score += get_cosine_sim(expert=expert_action,action=action.squeeze(),seq_len=5) #print(get_cosine_sim(s1=expert_action,s2=action.squeeze(),seq_len=5),'sim') if done: break episodes += 1 similarity_scores.append(similarity_score) states = torch.stack(states) actions_pred , _ = actor(states) expert_actions = torch.stack(expert_actions) similarity_score_avg = np.mean(similarity_scores) print('{}:: {} episode similarity score is {:.2f}'.format(iter, episodes, similarity_score_avg)) actor.train() loss = F.mse_loss(actions_pred,expert_action) actor_optim.zero_grad() actor_optim.step() # and this is basically all we need to do train_actor_critic(actor, critic, memory, actor_optim, critic_optim, args) writer.add_scalar('log/score', float(score_avg), iter) writer.add_scalar('log/similarity_score', float(similarity_score_avg), iter) writer.add_text('log/raw_state', raw_state[0],iter) raw_action = get_raw_action(action) #TODO writer.add_text('log/raw_action', raw_action,iter) writer.add_text('log/raw_expert_action', raw_expert_action,iter) if iter % 100: score_avg = int(score_avg) # Open a file with access mode 'a' file_object = open(experiment_name+'.txt', 'a') result_str = str(iter) + '|' + raw_state[0] + '|' + raw_action + '|' + raw_expert_action + '\n' # Append at the end of file file_object.write(result_str) # Close the file file_object.close() model_path = os.path.join(os.getcwd(),'save_model') if not os.path.isdir(model_path): os.makedirs(model_path) ckpt_path = os.path.join(model_path, experiment_name + '_ckpt_'+ str(score_avg)+'.pth.tar') save_checkpoint({ 'actor': actor.state_dict(), 'critic': critic.state_dict(), 'discrim': discrim.state_dict(), 'args': args, 'score': score_avg, }, filename=ckpt_path)
def main(): env = DialogEnvironment() experiment_name = args.logdir.split('/')[1] #model name torch.manual_seed(args.seed) #TODO actor = Actor(hidden_size=args.hidden_size,num_layers=args.num_layers,device='cuda',input_size=args.input_size,output_size=args.input_size) critic = Critic(hidden_size=args.hidden_size,num_layers=args.num_layers,input_size=args.input_size,seq_len=args.seq_len) discrim = Discriminator(hidden_size=args.hidden_size,num_layers=args.hidden_size,input_size=args.input_size,seq_len=args.seq_len) actor.to(device), critic.to(device), discrim.to(device) actor_optim = optim.Adam(actor.parameters(), lr=args.learning_rate) critic_optim = optim.Adam(critic.parameters(), lr=args.learning_rate, weight_decay=args.l2_rate) discrim_optim = optim.Adam(discrim.parameters(), lr=args.learning_rate) # load demonstrations writer = SummaryWriter(args.logdir) if args.load_model is not None: #TODO saved_ckpt_path = os.path.join(os.getcwd(), 'save_model', str(args.load_model)) ckpt = torch.load(saved_ckpt_path) actor.load_state_dict(ckpt['actor']) critic.load_state_dict(ckpt['critic']) discrim.load_state_dict(ckpt['discrim']) episodes = 0 train_discrim_flag = True for iter in range(args.max_iter_num): actor.eval(), critic.eval() memory = deque() steps = 0 scores = [] similarity_scores = [] while steps < args.total_sample_size: scores = [] similarity_scores = [] state, expert_action, raw_state, raw_expert_action = env.reset() score = 0 similarity_score = 0 state = state[:args.seq_len,:] expert_action = expert_action[:args.seq_len,:] state = state.to(device) expert_action = expert_action.to(device) for _ in range(10000): steps += 1 mu, std = actor(state.resize(1,args.seq_len,args.input_size)) #TODO: gotta be a better way to resize. action = get_action(mu.cpu(), std.cpu())[0] for i in range(5): emb_sum = expert_action[i,:].sum().cpu().item() if emb_sum == 0: # print(i) action[i:,:] = 0 # manual padding break done= env.step(action) irl_reward = get_reward(discrim, state, action, args) if done: mask = 0 else: mask = 1 memory.append([state, torch.from_numpy(action).to(device), irl_reward, mask,expert_action]) score += irl_reward similarity_score += get_cosine_sim(expert=expert_action,action=action.squeeze(),seq_len=5) #print(get_cosine_sim(s1=expert_action,s2=action.squeeze(),seq_len=5),'sim') if done: break episodes += 1 scores.append(score) similarity_scores.append(similarity_score) score_avg = np.mean(scores) similarity_score_avg = np.mean(similarity_scores) print('{}:: {} episode score is {:.2f}'.format(iter, episodes, score_avg)) print('{}:: {} episode similarity score is {:.2f}'.format(iter, episodes, similarity_score_avg)) actor.train(), critic.train(), discrim.train() if train_discrim_flag: expert_acc, learner_acc = train_discrim(discrim, memory, discrim_optim, args) print("Expert: %.2f%% | Learner: %.2f%%" % (expert_acc * 100, learner_acc * 100)) writer.add_scalar('log/expert_acc', float(expert_acc), iter) #logg writer.add_scalar('log/learner_acc', float(learner_acc), iter) #logg writer.add_scalar('log/avg_acc', float(learner_acc + expert_acc)/2, iter) #logg if args.suspend_accu_exp is not None: #only if not None do we check. if expert_acc > args.suspend_accu_exp and learner_acc > args.suspend_accu_gen: train_discrim_flag = False train_actor_critic(actor, critic, memory, actor_optim, critic_optim, args) writer.add_scalar('log/score', float(score_avg), iter) writer.add_scalar('log/similarity_score', float(similarity_score_avg), iter) writer.add_text('log/raw_state', raw_state[0],iter) raw_action = get_raw_action(action) #TODO writer.add_text('log/raw_action', raw_action,iter) writer.add_text('log/raw_expert_action', raw_expert_action,iter) if iter % 100: score_avg = int(score_avg) # Open a file with access mode 'a' file_object = open(experiment_name+'.txt', 'a') result_str = str(iter) + '|' + raw_state[0] + '|' + raw_action + '|' + raw_expert_action + '\n' # Append at the end of file file_object.write(result_str) # Close the file file_object.close() model_path = os.path.join(os.getcwd(),'save_model') if not os.path.isdir(model_path): os.makedirs(model_path) ckpt_path = os.path.join(model_path, experiment_name + '_ckpt_'+ str(score_avg)+'.pth.tar') save_checkpoint({ 'actor': actor.state_dict(), 'critic': critic.state_dict(), 'discrim': discrim.state_dict(), 'args': args, 'score': score_avg, }, filename=ckpt_path)
from models.movie import Movie from models.stuntman import Stuntman # 2 - generate database schema Base.metadata.create_all(engine) # 3 - create a new session session = Session() # 4 - create movies bourne_identity = Movie("The Bourne Identity", date(2002, 10, 11)) furious_7 = Movie("Furious 7", date(2015, 4, 2)) pain_and_gain = Movie("Pain & Gain", date(2013, 8, 23)) # 5 - creates actors matt_damon = Actor("Matt Damon", date(1970, 10, 8)) dwayne_johnson = Actor("Dwayne Johnson", date(1972, 5, 2)) mark_wahlberg = Actor("Mark Wahlberg", date(1971, 6, 5)) # 6 - add actors to movies bourne_identity.actors = [matt_damon] furious_7.actors = [dwayne_johnson] pain_and_gain.actors = [dwayne_johnson, mark_wahlberg] # 7 - add contact details to actors matt_contact = ContactDetails("415 555 2671", "Burbank, CA", matt_damon) dwayne_contact = ContactDetails("423 555 5623", "Glendale, CA", dwayne_johnson) dwayne_contact_2 = ContactDetails("421 444 2323", "West Hollywood, CA", dwayne_johnson) mark_contact = ContactDetails("421 333 9428", "Glendale, CA", mark_wahlberg)