def __init__(self, agent, policy_model, state_value_model, total_reward):
        self.agent = agent
        self.policy_model = policy_model
        self.state_value_model = state_value_model
        self.total_reward = total_reward

        # Compute MLE loss function. MLE is used to initialize parameters for reinforce
        self.mle_policy_gradient = MaximumLikelihoodEstimation(agent, policy_model)

        # Compute reinforce loss function
        loss_reinforce, entropy_penalty = self.calc_loss(
            policy_model.model_output, policy_model.model_output_indices, policy_model.target)

        optimizer = tf.train.AdamOptimizer(self.rl_learning_rate)

        using_grad_clip = True
        grad_clip_val = 5.0
        if not using_grad_clip:
            train_step = optimizer.minimize(loss_reinforce)
        else:
            gvs = optimizer.compute_gradients(loss_reinforce)
            capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var)
                          if grad is not None else (grad, var) for grad, var in gvs]
            train_step = optimizer.apply_gradients(capped_gvs)

        # Create summaries for training
        summary_loss = tf.scalar_summary("Loss", loss_reinforce)
        summary_target_min = tf.scalar_summary("Target Min", tf.reduce_min(self.policy_model.target))
        summary_target_max = tf.scalar_summary("Target Max", tf.reduce_max(self.policy_model.target))
        summary_target_mean = tf.scalar_summary("Target Mean", tf.reduce_mean(self.policy_model.target))
        summary_entropy_penalty = tf.scalar_summary("Entropy Penalty", entropy_penalty)
        update_summaries = [summary_loss, summary_target_min,
                            summary_target_max, summary_target_mean, summary_entropy_penalty]

        AbstractLearning.__init__(self, policy_model, loss_reinforce, train_step, update_summaries)
示例#2
0
    def __init__(self, agent, policy_model):
        self.agent = agent
        self.policy_model = policy_model

        # Replay memory
        max_replay_memory_size = 2000
        self.replay_memory = collections.deque(maxlen=max_replay_memory_size)
        rho = 0.5
        self.ps = prioritized_sweeping.PrioritizedSweeping(0, rho)

        optimizer = tf.train.AdamOptimizer(self.mle_learning_rate)
        loss = MaximumLikelihoodEstimation.calc_loss(
            self.policy_model.model_output, self.policy_model.model_output_indices)

        using_grad_clip = True
        grad_clip_val = 5.0
        if not using_grad_clip:
            train_step = optimizer.minimize(loss)
        else:
            gvs = optimizer.compute_gradients(loss)
            capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var)
                          if grad is not None else (grad, var) for grad, var in gvs]
            train_step = optimizer.apply_gradients(capped_gvs)

        # Create summaries for training
        summary_loss = tf.scalar_summary("Loss", loss)
        update_summaries = [summary_loss]

        AbstractLearning.__init__(self, policy_model, loss, train_step, update_summaries)
    def __init__(self, model, action_space, meta_data_util, config, constants, tensorboard):
        self.max_epoch = 100  # constants["max_epochs"]
        self.model = model
        self.action_space = action_space
        self.meta_data_util = meta_data_util
        self.config = config
        self.constants = constants
        self.tensorboard = tensorboard
        self.entropy = None
        self.cross_entropy = None
        self.entropy_coef = constants["entropy_coefficient"]
        self.optimizer = optim.Adam(model.get_parameters(),
                                    lr=constants["learning_rate"])

        ###################
        self.confusion_num_count = []
        self.confusion_denom_count = []

        for i in range(0, 63):
            self.confusion_num_count.append([0.0] * 63)
            self.confusion_denom_count.append([0.0] * 63)
        ###################

        AbstractLearning.__init__(self, self.model, self.calc_loss,
                                  self.optimizer, self.config, self.constants)
示例#4
0
 def __init__(self, agent):
     self.agent = agent
     self.replay_memory = None
     self.batch_size = None
     self.null_previous_action = None
     self.ps = None
     AbstractLearning.__init__(agent)
示例#5
0
 def __init__(self, model, config, constants, tensorboard):
     self.max_epoch = constants["max_epochs"]
     self.model = model
     self.config = config
     self.constants = constants
     self.tensorboard = tensorboard
     self.entropy_coef = constants["entropy_coefficient"]
     self.optimizer = optim.Adam(model.get_parameters(),
                                 lr=constants["learning_rate"])
     AbstractLearning.__init__(self, self.model, self.calc_loss,
                               self.optimizer, self.config, self.constants,
                               self.tensorboard)
示例#6
0
 def __init__(self, model, action_space, meta_data_util, config, constants):
     self.max_epoch = constants["max_epochs"]
     self.model = model
     self.action_space = action_space
     self.meta_data_util = meta_data_util
     self.config = config
     self.constants = constants
     self.tensorboard = Tensorboard()
     self.entropy_coef = constants["entropy_coefficient"]
     self.optimizer = optim.Adam(model.get_parameters(),
                                 lr=constants["learning_rate"])
     AbstractLearning.__init__(self, self.model, self.calc_loss,
                               self.optimizer, self.config, self.constants)
 def __init__(self, model, action_space, meta_data_util, config, constants,
              tensorboard):
     self.max_epoch = constants["max_epochs"]
     self.model = model
     self.action_space = action_space
     self.meta_data_util = meta_data_util
     self.config = config
     self.constants = constants
     self.tensorboard = tensorboard
     self.global_replay_memory = collections.deque(maxlen=2000)
     self.optimizer = optim.Adam(model.get_parameters(),
                                 lr=constants["learning_rate"])
     AbstractLearning.__init__(self, self.model, self.calc_loss,
                               self.optimizer, self.config, self.constants,
                               self.tensorboard)
示例#8
0
 def __init__(self, model, action_space, meta_data_util, config, constants,
              tensorboard):
     self.max_epoch = 100  # constants["max_epochs"]
     self.model = model
     self.action_space = action_space
     self.meta_data_util = meta_data_util
     self.config = config
     self.constants = constants
     self.tensorboard = tensorboard
     self.entropy_coef = constants["entropy_coefficient"]
     self.optimizer = optim.Adam(model.get_parameters(),
                                 lr=constants["learning_rate"])
     self.linguistic_prior = LinguisticPrior()
     # self.alignment_reward = AlignmentReward()
     self.entropy = None
     AbstractLearning.__init__(self, self.model, self.calc_loss,
                               self.optimizer, self.config, self.constants)
示例#9
0
    def __init__(self, agent, q_network, target_q_network):
        """ Creates constructor for an abstract learning setup """

        self.agent = agent
        self.loss = None
        self.q_network = q_network
        self.target_q_network = target_q_network

        # Define epsilon greedy behaviour policy
        epsilon = 1.0
        min_epsilon = 0.1
        self.behaviour_policy = egp.EpsilonGreedyPolicy(epsilon, min_epsilon)

        # Replay memory and prioritized sweeping for sampling from the replay memory
        max_replay_memory_size = 2000
        self.replay_memory = collections.deque(maxlen=max_replay_memory_size)
        rho = 0.5
        self.ps = prioritized_sweeping.PrioritizedSweeping(0, rho)

        optimizer = tf.train.AdamOptimizer(self.rl_learning_rate)
        loss = self.calc_loss(self.q_network.model_output,
                              self.q_network.model_output_indices,
                              self.q_network.target)

        using_grad_clip = True
        grad_clip_val = 5.0
        if not using_grad_clip:
            train_step = optimizer.minimize(loss)
        else:
            gvs = optimizer.compute_gradients(loss)
            capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val),
                           var) if grad is not None else (grad, var)
                          for grad, var in gvs]
            train_step = optimizer.apply_gradients(capped_gvs)

        # Create summaries for training
        summary_loss = tf.scalar_summary("Loss", loss)
        update_summaries = [summary_loss]

        AbstractLearning.__init__(self, q_network, loss, train_step,
                                  update_summaries)
    def __init__(self, model, action_space, meta_data_util, config, constants,
                 tensorboard):
        self.max_epoch = constants["max_epochs"]
        self.model = model
        self.action_space = action_space
        self.meta_data_util = meta_data_util
        self.config = config
        self.constants = constants
        self.tensorboard = tensorboard
        self.global_replay_memory = collections.deque(maxlen=2000)
        self.optimizer = optim.Adam(model.get_parameters(),
                                    lr=constants["learning_rate"])

        theta_values = []
        for i in range(0, 48):
            theta_values.append([i * 7.5])
        self.theta_values = cuda_var(torch.from_numpy(
            np.array(theta_values))).float()

        AbstractLearning.__init__(self, self.model, self.calc_loss,
                                  self.optimizer, self.config, self.constants,
                                  self.tensorboard)
示例#11
0
    def __init__(self, model, action_space, meta_data_util, config, constants, tensorboard, resnet_detection_model):
        self.max_epoch = constants["max_epochs"]
        self.model = model
        self.resnet_detection_model = resnet_detection_model
        self.action_space = action_space
        self.meta_data_util = meta_data_util
        self.config = config
        self.constants = constants
        self.tensorboard = tensorboard
        self.discretize = nav_drone_symbolic_instructions.BUCKET_WIDTH
        self.num_buckets = nav_drone_symbolic_instructions.NO_BUCKETS
        self.global_replay_memory = collections.deque(maxlen=2000)
        self.optimizer = optim.Adam(model.get_parameters(),
                                    lr=constants["learning_rate"])

        theta_values = []
        for i in range(0, self.num_buckets):
            theta_values.append([i * self.discretize])
        self.theta_values = cuda_var(torch.from_numpy(np.array(theta_values))).float()

        AbstractLearning.__init__(self, self.model, self.calc_loss,
                                  self.optimizer, self.config, self.constants, self.tensorboard)