示例#1
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 1000000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters (0.01)
示例#2
0
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        np.random.seed(random_seed)  # set the numpy seed

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed, device)
    def __init__(self, state_size, action_size, random_seed, actor_layers,
                 critic_layers):
        """ Initialize an Agent object.

        Params
        ======
            state_size (int): size of the environment state
            action_size (int): size of the environment action
            random_seed (int): seed for the random
            actor_layers (array[int]): array containing the size of each layer of the actor network
            critic_layers (array[int]): array containing the size of each layer of the critic network
        """

        self.state_size = state_size
        self.action_size = action_size
        self.random_seed = random_seed
        random.seed(random_seed)
        np.random.seed(random_seed)

        # Actor
        print(f'Agent running on {DEVICE}')
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.random_seed, *actor_layers).to(DEVICE)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.random_seed, *actor_layers).to(DEVICE)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic
        self.critic_local = Critic(self.state_size, self.action_size,
                                   self.random_seed, *critic_layers).to(DEVICE)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    self.random_seed,
                                    *critic_layers).to(DEVICE)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise
        self.noise = OrsnteinUhlenbeck(self.action_size, self.random_seed)

        # Replay Buffer
        self.memory = ReplayBuffer(self.action_size, BUFFER_SIZE, BATCH_SIZE,
                                   self.random_seed)
示例#4
0
    def __init__(self, env, hp):

        self.env = env
        self.hp = hp
        self.critic = Critic(env.observation_space.shape[0],
                             env.action_space.shape[0], hp)

        self.target_critic = Critic(env.observation_space.shape[0],
                                    env.action_space.shape[0], hp)

        self.actor = Actor(env.observation_space.shape[0],
                           env.action_space.shape[0], env.action_space.high[0],
                           hp)

        self.target_actor = Actor(env.observation_space.shape[0],
                                  env.action_space.shape[0],
                                  env.action_space.high[0], hp)

        self.dataset = ReplayBuffer(self.hp['batch_size'],
                                    self.hp['max_buffer_size'])

        self.noise = OrnsteinUhlenbeckProcess(env.action_space.shape[0],
                                              sigma=self.hp['noise_sigma'])
        self.noise.reset_states()
示例#5
0
def add_actor():
    """
    Add new actor
    """
    data = get_request_data()
    ### YOUR CODE HERE ###

    # creating date
    cr_date = data['date_of_birth']
    cr_date = dt.strptime(cr_date, '%d.%m.%Y')
    cr_date = cr_date.strftime("%a, %d %b %Y %H:%M:%S" + " GMT")

    # use this for 200 response code
    new_record = Actor(name = data['name'], gender = data['gender'], date_of_birth = cr_date)
    new_actor = {k: v for k, v in new_record.__dict__.items() if k in ACTOR_FIELDS}

    return make_response(jsonify(new_actor), 200)
示例#6
0
def create_actors_acts_in_and_directors():
    movies = Movies.load_all()

    for movie in movies:
        moviee = tmdb.Movies(movie.id)
        response = moviee.credits()
        for person in moviee.crew:
            if person['job'] == 'Director':
                director = Director(movie.id, person['name'],
                                    person['profile_path'])
                if not Director.find_by_movie_id(movie.id):
                    director.save_to_db()

        for person in moviee.cast[:4]:
            if not Actor.load_by_id(person['id']):
                actor = Actor(person['id'], person['name'],
                              person['profile_path'])
                actor.save_to_db()
            relation = Movie_Actors(movie.id, person['id'])
            relation.save_to_db()
    def load(self, transformed_response):
        print "loading...\n"

        result = {}
        if transformed_response.get('Person'):
            self.entities += [Person().extract(transformed_response['Person'])]
        if transformed_response.get('Author'):
            self.entities += [Author().extract(transformed_response['Author'])]
        if transformed_response.get('Actor'):
            self.entities += [Actor().extract(transformed_response['Actor'])]
        if transformed_response.get('BusinessPerson'):
            self.entities += [
                BusinessPerson().extract(
                    transformed_response['BusinessPerson'])
            ]
        if transformed_response.get('League'):
            self.entities += [League().extract(transformed_response['League'])]
        if transformed_response.get('SportsTeam'):
            self.entities += [
                SportsTeam().extract(transformed_response['SportsTeam'])
            ]
        if transformed_response.get('Description'):
            self.entities += [
                Description().extract(transformed_response['Description'])
            ]

        header = str(self.query) + "("
        for entity in self.entities:
            if entity.__class__.__name__ != "Description":
                header = header + "  " + str(entity.__class__.__name__)
        header = header + ")"
        print "----------------------------------"
        print header
        print "----------------------------------"

        for entity in self.entities:
            entity.print_box()
            result[entity.__class__.__name__] = entity

        return result
示例#8
0
    'critic_threshold': 17.5,
    'critic_suffices_required': 1,
    'critic_steps_start': 200,
    'critic_steps_end': 200,
    'actor_steps_start': 1000,
    'actor_steps_end': 1000,
    'batch_size': 256,
    'seed': 123456,
    'replay_fill_threshold': 1.,
    'random_exploration': True,
    'test_iterations': 30,
    'validation_epoch_mod': 3,
}

# configuring the environment
environment = gym.make('Humanoid-v3')
# environment._max_episode_steps = 600

# setting up the training components
agent = AWRAgent
actor = Actor()
critic = Critic()

# training and testing
Training.train((actor, critic),
               agent,
               environment,
               hyper_ps,
               save=True,
               debug_type=DebugType.NONE)
示例#9
0
def main():
    env = DialogEnvironment()
    experiment_name = args.logdir.split('/')[1] #model name

    torch.manual_seed(args.seed)

    #TODO
    actor = Actor(hidden_size=args.hidden_size,num_layers=args.num_layers,device='cuda',input_size=args.input_size,output_size=args.input_size)
    
    actor.to(device)
    
    actor_optim = optim.Adam(actor.parameters(), lr=args.learning_rate)
    # load demonstrations

    writer = SummaryWriter(args.logdir)

    if args.load_model is not None: #TODO
        saved_ckpt_path = os.path.join(os.getcwd(), 'save_model', str(args.load_model))
        ckpt = torch.load(saved_ckpt_path)

        actor.load_state_dict(ckpt['actor'])
 

    
    episodes = 0


    for iter in range(args.max_iter_num):
        actor.eval()
 

        steps = 0
        scores = []
        states = []
        expert_actions = []
        while steps < args.batch_size: 
            scores = []
            similarity_scores = []
            state, expert_action, raw_state, raw_expert_action = env.reset()

            score = 0
            similarity_score = 0
            state = state[:args.seq_len,:]
            expert_action = expert_action[:args.seq_len,:]
            state = state.to(device)
            expert_action = expert_action.to(device)
            states.append(state)
            expert_actions.append(expert_action)



                similarity_score += get_cosine_sim(expert=expert_action,action=action.squeeze(),seq_len=5)
                #print(get_cosine_sim(s1=expert_action,s2=action.squeeze(),seq_len=5),'sim')
                if done:
                    break

            episodes += 1

            similarity_scores.append(similarity_score)
        states = torch.stack(states)
        actions_pred , _ = actor(states)
        expert_actions = torch.stack(expert_actions)



        similarity_score_avg = np.mean(similarity_scores)
        print('{}:: {} episode similarity score is {:.2f}'.format(iter, episodes, similarity_score_avg))

        actor.train()
        loss = F.mse_loss(actions_pred,expert_action)
        actor_optim.zero_grad()
        actor_optim.step() 
        # and this is basically all we need to do



        train_actor_critic(actor, critic, memory, actor_optim, critic_optim, args)
        writer.add_scalar('log/score', float(score_avg), iter)
        writer.add_scalar('log/similarity_score', float(similarity_score_avg), iter)
        writer.add_text('log/raw_state', raw_state[0],iter)
        raw_action = get_raw_action(action) #TODO
        writer.add_text('log/raw_action', raw_action,iter)
        writer.add_text('log/raw_expert_action', raw_expert_action,iter)

        if iter % 100:
            score_avg = int(score_avg)
            # Open a file with access mode 'a'
            file_object = open(experiment_name+'.txt', 'a')

            result_str = str(iter) + '|' + raw_state[0] + '|' + raw_action + '|' + raw_expert_action + '\n'
            # Append at the end of file
            file_object.write(result_str)
            # Close the file
            file_object.close()

            model_path = os.path.join(os.getcwd(),'save_model')
            if not os.path.isdir(model_path):
                os.makedirs(model_path)

            ckpt_path = os.path.join(model_path, experiment_name + '_ckpt_'+ str(score_avg)+'.pth.tar')

            save_checkpoint({
                'actor': actor.state_dict(),
                'critic': critic.state_dict(),
                'discrim': discrim.state_dict(),
                'args': args,
                'score': score_avg,
            }, filename=ckpt_path)
示例#10
0
def main():
    env = DialogEnvironment()
    experiment_name = args.logdir.split('/')[1] #model name

    torch.manual_seed(args.seed)

    #TODO
    actor = Actor(hidden_size=args.hidden_size,num_layers=args.num_layers,device='cuda',input_size=args.input_size,output_size=args.input_size)
    critic = Critic(hidden_size=args.hidden_size,num_layers=args.num_layers,input_size=args.input_size,seq_len=args.seq_len)
    discrim = Discriminator(hidden_size=args.hidden_size,num_layers=args.hidden_size,input_size=args.input_size,seq_len=args.seq_len)
    
    actor.to(device), critic.to(device), discrim.to(device)
    
    actor_optim = optim.Adam(actor.parameters(), lr=args.learning_rate)
    critic_optim = optim.Adam(critic.parameters(), lr=args.learning_rate, 
                              weight_decay=args.l2_rate) 
    discrim_optim = optim.Adam(discrim.parameters(), lr=args.learning_rate)

    # load demonstrations

    writer = SummaryWriter(args.logdir)

    if args.load_model is not None: #TODO
        saved_ckpt_path = os.path.join(os.getcwd(), 'save_model', str(args.load_model))
        ckpt = torch.load(saved_ckpt_path)

        actor.load_state_dict(ckpt['actor'])
        critic.load_state_dict(ckpt['critic'])
        discrim.load_state_dict(ckpt['discrim'])


    
    episodes = 0
    train_discrim_flag = True

    for iter in range(args.max_iter_num):
        actor.eval(), critic.eval()
        memory = deque()

        steps = 0
        scores = []
        similarity_scores = []
        while steps < args.total_sample_size: 
            scores = []
            similarity_scores = []
            state, expert_action, raw_state, raw_expert_action = env.reset()
            score = 0
            similarity_score = 0
            state = state[:args.seq_len,:]
            expert_action = expert_action[:args.seq_len,:]
            state = state.to(device)
            expert_action = expert_action.to(device)
            for _ in range(10000): 

                steps += 1

                mu, std = actor(state.resize(1,args.seq_len,args.input_size)) #TODO: gotta be a better way to resize. 
                action = get_action(mu.cpu(), std.cpu())[0]
                for i in range(5):
                    emb_sum = expert_action[i,:].sum().cpu().item()
                    if emb_sum == 0:
                       # print(i)
                        action[i:,:] = 0 # manual padding
                        break

                done= env.step(action)
                irl_reward = get_reward(discrim, state, action, args)
                if done:
                    mask = 0
                else:
                    mask = 1


                memory.append([state, torch.from_numpy(action).to(device), irl_reward, mask,expert_action])
                score += irl_reward
                similarity_score += get_cosine_sim(expert=expert_action,action=action.squeeze(),seq_len=5)
                #print(get_cosine_sim(s1=expert_action,s2=action.squeeze(),seq_len=5),'sim')
                if done:
                    break

            episodes += 1
            scores.append(score)
            similarity_scores.append(similarity_score)

        score_avg = np.mean(scores)
        similarity_score_avg = np.mean(similarity_scores)
        print('{}:: {} episode score is {:.2f}'.format(iter, episodes, score_avg))
        print('{}:: {} episode similarity score is {:.2f}'.format(iter, episodes, similarity_score_avg))

        actor.train(), critic.train(), discrim.train()
        if train_discrim_flag:
            expert_acc, learner_acc = train_discrim(discrim, memory, discrim_optim, args) 
            print("Expert: %.2f%% | Learner: %.2f%%" % (expert_acc * 100, learner_acc * 100))
            writer.add_scalar('log/expert_acc', float(expert_acc), iter) #logg
            writer.add_scalar('log/learner_acc', float(learner_acc), iter) #logg
            writer.add_scalar('log/avg_acc', float(learner_acc + expert_acc)/2, iter) #logg
            if args.suspend_accu_exp is not None: #only if not None do we check.
                if expert_acc > args.suspend_accu_exp and learner_acc > args.suspend_accu_gen:
                    train_discrim_flag = False

        train_actor_critic(actor, critic, memory, actor_optim, critic_optim, args)
        writer.add_scalar('log/score', float(score_avg), iter)
        writer.add_scalar('log/similarity_score', float(similarity_score_avg), iter)
        writer.add_text('log/raw_state', raw_state[0],iter)
        raw_action = get_raw_action(action) #TODO
        writer.add_text('log/raw_action', raw_action,iter)
        writer.add_text('log/raw_expert_action', raw_expert_action,iter)

        if iter % 100:
            score_avg = int(score_avg)
            # Open a file with access mode 'a'
            file_object = open(experiment_name+'.txt', 'a')

            result_str = str(iter) + '|' + raw_state[0] + '|' + raw_action + '|' + raw_expert_action + '\n'
            # Append at the end of file
            file_object.write(result_str)
            # Close the file
            file_object.close()

            model_path = os.path.join(os.getcwd(),'save_model')
            if not os.path.isdir(model_path):
                os.makedirs(model_path)

            ckpt_path = os.path.join(model_path, experiment_name + '_ckpt_'+ str(score_avg)+'.pth.tar')

            save_checkpoint({
                'actor': actor.state_dict(),
                'critic': critic.state_dict(),
                'discrim': discrim.state_dict(),
                'args': args,
                'score': score_avg,
            }, filename=ckpt_path)
示例#11
0
from models.movie import Movie
from models.stuntman import Stuntman

# 2 - generate database schema
Base.metadata.create_all(engine)

# 3 - create a new session
session = Session()

# 4 - create movies
bourne_identity = Movie("The Bourne Identity", date(2002, 10, 11))
furious_7 = Movie("Furious 7", date(2015, 4, 2))
pain_and_gain = Movie("Pain & Gain", date(2013, 8, 23))

# 5 - creates actors
matt_damon = Actor("Matt Damon", date(1970, 10, 8))
dwayne_johnson = Actor("Dwayne Johnson", date(1972, 5, 2))
mark_wahlberg = Actor("Mark Wahlberg", date(1971, 6, 5))

# 6 - add actors to movies
bourne_identity.actors = [matt_damon]
furious_7.actors = [dwayne_johnson]
pain_and_gain.actors = [dwayne_johnson, mark_wahlberg]

# 7 - add contact details to actors
matt_contact = ContactDetails("415 555 2671", "Burbank, CA", matt_damon)
dwayne_contact = ContactDetails("423 555 5623", "Glendale, CA", dwayne_johnson)
dwayne_contact_2 = ContactDetails("421 444 2323", "West Hollywood, CA",
                                  dwayne_johnson)
mark_contact = ContactDetails("421 333 9428", "Glendale, CA", mark_wahlberg)