예제 #1
0
def load_data(dataset,ground_truth,test_rate):
    data = np.load(dataset)
    ground_truth = np.load(ground_truth)
    processd = data['low']
    original = ground_truth['gt']

    #original = np.array([pro[9+10*i] for i in range(len(pro)//10)])
    #processd = np.array([pro[0+10*i] for i in range(len(pro)//10)])

    original = utils.normal(original)
    processd = utils.normal(processd)
    print(np.max(original))
    # expend the data to 3 channels
    original = np.expand_dims(original,axis=3)
    original = np.concatenate((original,original,original),axis=-1)
    processd = np.expand_dims(processd,axis=3)
    processd = np.concatenate((processd,processd,processd),axis=-1)

    # reshape them to [batch,imgdata]
    original = np.float16(np.reshape(original,(original.shape[0],-1)))/255
    processd = np.float16(np.reshape(processd,(processd.shape[0],-1)))/255

    test_size = int(original.shape[0]*test_rate)
    test_data = processd[0:test_size,:]
    test_groud = original[0:test_size,:]
    train_data = processd[test_size:-1,:]
    train_groud = original[test_size:-1,:]
    print(train_groud.shape)
    print(test_groud.shape)
    print(train_groud.dtype)
    print(train_data.dtype)

    return test_data, test_groud, train_data, train_groud
예제 #2
0
def init_params(options):
    params = OrderedDict()
    params['W_users'] = sharedX(normal((options['n_users'],options['n_factors'])),
                                name='W_users')
    params['W_items'] = sharedX(normal((options['n_items'],options['n_factors'])),
                                name='W_items')
    params['b_users'] = sharedX(np.zeros((options['n_users'], 1)), name='b_users')
    params['b_items'] = sharedX(np.zeros((options['n_items'], 1)), name='b_items')
    params['b'] = sharedX(np.zeros(1), name='b')
    return params
    def _calculate_normals(self):
        """
            The method calculates the normals for each points in self.points
        """
        self.normals = []

        for ind in range(len(self.points.points)):
            n1 = utils.normal(self.points.points[ind-1, :], self.points.points[ind, :])
            n2 = utils.normal(self.points.points[ind, :], self.points.points[(ind+1) % 40, :])

            self.normals.append((n1 + n2)/2)
예제 #4
0
    def _calculate_normals(self):
        """
            The method calculates the normals for each points in self.points
        """
        self.normals = []

        for ind in range(len(self.points.points)):
            n1 = utils.normal(self.points.points[ind - 1, :],
                              self.points.points[ind, :])
            n2 = utils.normal(self.points.points[ind, :],
                              self.points.points[(ind + 1) % 40, :])

            self.normals.append((n1 + n2) / 2)
예제 #5
0
def get_mcs_macro(product, name):
    """
        Return a macro space macro from the given mcs product.
        
        Parameters:
            product: The mcs product to return a macro made from. With triple parens (one because it'll be unwrapped, to to end up in the result).
#        """
    #        print('get_mcs_macro:')
    #        print(product)
    #        print('end get_mcs_macro')
    return utils.paren([
        utils.normal(name),
        utils.paren([utils.normal(name)]),
        utils.paren([product])
    ])
    def __calculate_normal(self, p_prev, p_next):
        """Calculates the normal in a model point.

        Args:
            p_prev: The previous model point.
            p_next: The next model point.

        Returns:
            The normal in the given model point.

        """
        n1 = utils.normal(p_prev, self.model_point)
        n2 = utils.normal(self.model_point, p_next)
        n = (n1 + n2) / 2
        return n / np.linalg.norm(n)
    def __calculate_normal(self, p_prev, p_next):
        """Calculates the normal in a model point.

        Args:
            p_prev: The previous model point.
            p_next: The next model point.

        Returns:
            The normal in the given model point.

        """
        n1 = utils.normal(p_prev, self.model_point)
        n2 = utils.normal(self.model_point, p_next)
        n = (n1 + n2) / 2
        return n / np.linalg.norm(n)
예제 #8
0
def init_params(options):
    params = OrderedDict()
    # LF model params
    params['W_users'] = sharedX(normal((options['n_users'],options['n_factors'])),
                                name='W_users')
    params['W_items'] = sharedX(normal((options['n_items'],options['n_factors'])),
                                name='W_items')
    params['b_users'] = sharedX(np.zeros((options['n_users'],)), name='b_users')
    params['b_items'] = sharedX(np.zeros((options['n_items'],)), name='b_items')
    params['b'] = sharedX(0., name='b')

    # distributed BOW params
    params['W_bow'] = sharedX(normal((options['n_factors'],options['vocab_size'])),
                              name='W_bow')
    params['b_bow'] = sharedX(np.zeros((options['vocab_size'],)), name='b_bow')
    return params
예제 #9
0
def sample_action(continuous, mu_multi, sigma_multi, device, test=False):
    if continuous:
        mu = torch.clamp(mu_multi, -1.0, 1.0)
        sigma = F.softplus(sigma_multi) + 1e-5
        eps = torch.randn(mu.size())
        pi = np.array([math.pi])
        pi = torch.from_numpy(pi).float()
        eps = Variable(eps).to(device)
        pi = Variable(pi).to(device)
        action = (mu + sigma.sqrt() * eps).data
        act = Variable(action)
        prob = normal(act, mu, sigma, device)
        action = torch.clamp(action, -1.0, 1.0)
        entropy = 0.5 * ((sigma * 2 * pi.expand_as(sigma)).log() + 1
                         )  # 0.5 * (log(2*pi*sigma) + 1
        log_prob = (prob + 1e-6).log()
        action_env = action.cpu().numpy()
    else:  # discrete
        logit = mu_multi
        prob = F.softmax(logit, dim=1)
        log_prob = F.log_softmax(logit, dim=1)
        entropy = -(log_prob * prob).sum(1, keepdim=True)
        if test:
            action = prob.max(1)[1].data
        else:
            action = prob.multinomial(1).data
            log_prob = log_prob.gather(1, Variable(action))
        action_env = np.squeeze(action.cpu().numpy())

    return action_env, entropy, log_prob
예제 #10
0
    def action_train(self):
        if self.args.model == 'CONV':
            self.state = self.state.unsqueeze(0)
        value, mu, sigma, (self.hx, self.cx) = self.model(
            (self.state, (self.hx, self.cx)))
        mu = torch.clamp(mu, -1.0, 1.0)
        sigma = F.softplus(sigma) + 1e-5
        eps = torch.randn(mu.size())
        pi = np.array([math.pi])
        pi = torch.from_numpy(pi).float()
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                eps = eps.cuda()
                pi = pi.cuda()

        act = (mu + sigma.sqrt() * eps).detach()
        prob = normal(act, mu, sigma, self.gpu_id, gpu=self.gpu_id >= 0)
        action = torch.clamp(act, -1.0, 1.0)
        entropy = 0.5 * ((sigma * 2 * pi.expand_as(sigma)).log() + 1)
        self.entropies.append(entropy)
        log_prob = (prob + 1e-6).log()
        self.log_probs.append(log_prob)
        state, reward, self.done, self.info = self.env.step(
            action.cpu().numpy()[0])
        reward = max(min(float(reward), 1.0), -1.0)
        self.state = torch.from_numpy(state).float()
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                self.state = self.state.cuda()
        self.eps_len += 1
        self.done = self.done or self.eps_len >= self.args.max_episode_length
        self.values.append(value)
        self.rewards.append(reward)
        return self
예제 #11
0
 def load_time(self, id):
     '''
     Return the ship number <id>
     load cargo requiered time
     '''
     u, o = self.cargo_params[id]
     return self.time + normal(u, o) * 60
예제 #12
0
    def action_train(self):
        value, mu, sigma, (self.hx, self.cx) = self.model(
            (Variable(self.state), (self.hx, self.cx)))
        mu = torch.clamp(mu, -1.0, 1.0)
        sigma = F.softplus(sigma) + 1e-5
        eps = torch.randn(mu.size())
        pi = np.array([math.pi])
        pi = torch.from_numpy(pi).float()
        eps = Variable(eps)
        pi = Variable(pi)

        action = (mu + sigma.sqrt() * eps).data
        act = Variable(action)
        prob = normal(act, mu, sigma)
        action = torch.clamp(action, -1.0, 1.0)
        entropy = 0.5 * ((sigma * 2 * pi.expand_as(sigma)).log() + 1)
        self.entropies.append(entropy)
        log_prob = (prob + 1e-6).log()
        self.log_probs.append(log_prob)
        state, reward, self.done, self.info = self.env.step(
            action.cpu().numpy()[0])
        reward = max(min(float(reward), 1.0), -1.0)
        self.state = torch.from_numpy(state).float()
        self.eps_len += 1
        self.done = self.done or self.eps_len >= self.args.max_episode_length
        self.values.append(value)
        self.rewards.append(reward)
        return self
예제 #13
0
def sample_action(action_type, mu_multi, sigma_multi, test=False, gpu_id=-1):
    if 'discrete' in action_type:
        logit = mu_multi
        prob = F.softmax(logit, dim=1)
        log_prob = F.log_softmax(logit, dim=1)
        entropy = -(log_prob * prob).sum(1)
        if test:
            action = prob.max(1)[1].data
        else:
            action = prob.multinomial(1).data
            log_prob = log_prob.gather(1, Variable(action))
        action_env_multi = np.squeeze(action.cpu().numpy())
    else:  # continuous
        mu = torch.clamp(mu_multi, -1.0, 1.0)
        sigma = F.softplus(sigma_multi) + 1e-5
        eps = torch.randn(mu.size())
        pi = np.array([math.pi])
        pi = torch.from_numpy(pi).float()
        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                eps = Variable(eps).cuda()
                pi = Variable(pi).cuda()
        else:
            eps = Variable(eps)
            pi = Variable(pi)
            action = (mu + sigma.sqrt() * eps).data
            act = Variable(action)
            prob = normal(act, mu, sigma, gpu_id, gpu=gpu_id >= 0)
            action = torch.clamp(action, -1.0, 1.0)
            entropy = 0.5 * ((sigma * 2 * pi.expand_as(sigma)).log() + 1
                             )  # 0.5 * (log(2*pi*sigma) + 1
            log_prob = (prob + 1e-6).log()
            action_env_multi = action.cpu().numpy()
    return action_env_multi, entropy, log_prob
예제 #14
0
    def action_train(self):  #
        self.time_step += 1
        # model为A3C,此步为前向计算forward。value network——输入state,输出value;policy network——输入state,输出mean和标准差(theta)
        value, mu_learned, sigma_learned = self.model(Variable(self.state))

        if self.args.use_prior:
            mu_prior, sigma_prior = self.prior.forward(
                Variable(self.state), self.time_step,
                self.args)  # prior network 输入state,输出mean和标准差(h)
            sigma_prior = sigma_prior.diag()

        sigma_learned = sigma_learned.diag()

        self.reset_flag = False

        if self.args.use_prior:  # sigma_prior对应论文中的sigma_h,   sigma_learned对应论文中的sigma_theta
            sigma = (sigma_learned.inverse() +
                     sigma_prior.inverse()).inverse()  # 计算behavior的方差 公式(23)
            temp = torch.matmul(sigma_learned.inverse(), mu_learned) + \
                torch.matmul(sigma_prior.inverse(), mu_prior)
            mu = torch.matmul(sigma, temp)  # mean_behavior 公式(24)
        else:
            sigma = sigma_learned
            mu = mu_learned

        sigma = sigma.diag()  # sigma_behavior(behavior policy)
        sigma_learned = sigma_learned.diag()

        eps = torch.randn(mu.size())  #随机数 randn生成正态分布的随机数
        pi = np.array([math.pi])
        pi = torch.from_numpy(pi).float()
        eps = Variable(eps)
        pi = Variable(pi)
        #?
        action = (mu + sigma.sqrt() * eps).data  # 根据均值加方差确定动作

        act = Variable(action)
        prob = normal(act, mu, sigma)  # 计算正态分布(22)behavior policy
        # execute the action
        action = torch.clamp(action, self.env.action_space.low[0],
                             self.env.action_space.high[0])
        # expand_as():把一个tensor变成和函数括号内一样形状的tensor
        entropy = 0.5 * \
            ((sigma_learned * 2 * pi.expand_as(sigma_learned)).log() + 1)
        self.entropies.append(entropy)
        log_prob = (prob + 1e-6).log()
        self.log_probs.append(log_prob)
        state, reward, self.done, self.info = self.env.step(
            action.cpu().numpy())

        # self.env.render()
        self.state = torch.from_numpy(state).float()
        self.eps_len += 1
        self.done = self.done

        self.values.append(value)
        self.rewards.append(reward)
        self.infos.append(self.info)
        return self
예제 #15
0
파일: player_util.py 프로젝트: NGU12138/LwH
    def action_train(self):
        self.time_step += 1
        value, mu_learned, sigma_learned = self.model(Variable(self.state))

        if self.args.use_prior:
            mu_prior, sigma_prior = self.prior.forward(Variable(self.state),
                                                       self.time_step,
                                                       self.args)
            sigma_prior = sigma_prior.diag()

        sigma_learned = sigma_learned.diag()

        self.reset_flag = False

        if self.args.use_prior:
            sigma = (sigma_learned.inverse() + sigma_prior.inverse()).inverse()
            temp = torch.matmul(sigma_learned.inverse(),
                                mu_learned) + torch.matmul(
                                    sigma_prior.inverse(), mu_prior)
            mu = torch.matmul(sigma, temp)
        else:
            sigma = sigma_learned
            mu = mu_learned

        sigma = sigma.diag()
        sigma_learned = sigma_learned.diag()

        eps = torch.randn(mu.size())
        pi = np.array([math.pi])
        pi = torch.from_numpy(pi).float()
        eps = Variable(eps)
        pi = Variable(pi)

        action = (mu + sigma.sqrt() * eps).data

        act = Variable(action)
        prob = normal(act, mu, sigma)
        action = torch.clamp(action, self.env.action_space.low[0],
                             self.env.action_space.high[0])
        entropy = 0.5 * (
            (sigma_learned * 2 * pi.expand_as(sigma_learned)).log() + 1)
        self.entropies.append(entropy)
        log_prob = (prob + 1e-6).log()
        self.log_probs.append(log_prob)
        state, reward, self.done, self.info = self.env.step(
            action.cpu().numpy())

        self.state = torch.from_numpy(state).float()
        self.eps_len += 1
        self.done = self.done

        self.values.append(value)
        self.rewards.append(reward)
        self.infos.append(self.info)
        return self
예제 #16
0
    def generate_particles(self, num_particles):
        """
        Creates the initial set of particles for SLAM
        Each particle starts at (0,0) since we build the map relative to the drone's
        initial position, but with some noise

        :param num_particles: the number of particles to generate
        """
        self.particles = [Particle(abs(utils.normal(0, 0.1)),
                                   abs(utils.normal(0, 0.1)),
                                   self.z,
                                   abs(utils.normal(math.pi, 0.01))) for _ in range(num_particles)]

        # Reset SLAM variables in case of restart
        self.num_particles = num_particles
        self.key_kp, self.key_des, self.most_recent_map = None, None, None
        self.new_result = False
        self.weight = PROB_THRESHOLD

        return estimate_pose(self.particles)
    def generate_particles(self, num_particles):
        """""
        Creates the initial set of particles for SLAM

        Each particle should start at (0,0) since we build the map relative to the drone's
        initial position, but I want them to be a little different so Gaussian

        potential problem here is that some of them will be negative which is impossible so maybe abs?
        """ ""
        # since the localization code treats pi and the forward facing yaw, probably safer to initialize the
        # heading around pi...
        self.particles = [
            Particle(abs(utils.normal(0, 0.01)), abs(utils.normal(0, 0.01)),
                     self.z, abs(utils.normal(math.pi, 0.01)))
            for _ in range(num_particles)
        ]

        self.num_particles = num_particles
        self.key_kp, self.key_des = None, None

        return self.estimate_pose()
예제 #18
0
    def action_train(self):
        if self.args.model == 'CONV':
            self.state = self.state.unsqueeze(0)
        value, mu, sigma, (self.hx, self.cx) = self.model(
            (Variable(self.state), (self.hx, self.cx)))
        mu = torch.clamp(mu, -1.0, 1.0)
        sigma = F.softplus(sigma) + 1e-5
        eps = torch.randn(mu.size())
        pi = np.array([math.pi])
        pi = torch.from_numpy(pi).float()
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                eps = Variable(eps).cuda()
                pi = Variable(pi).cuda()
        else:
            eps = Variable(eps)
            pi = Variable(pi)

        action = (mu + sigma.sqrt() * eps).data
        act = Variable(action)
        prob = normal(act, mu, sigma, self.gpu_id, gpu=self.gpu_id >= 0)
        action = torch.clamp(action, -1.0, 1.0)
        entropy = 0.5 * ((sigma * 2 * pi.expand_as(sigma)).log() + 1)
        self.entropies.append(entropy)
        log_prob = (prob + 1e-6).log()
        self.log_probs.append(log_prob)
        state, reward, self.done, self.info = self.env.step(
            action.cpu().numpy()[0])
        reward = max(min(float(reward), 1.0), -1.0)
        self.state = torch.from_numpy(state).float()
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                self.state = self.state.cuda()
        self.eps_len += 1

        # update position history
        self.position_history.push(self.env.env.hull.position.x)
        # check for the stagnation
        if self._is_stagnating():
            self.done = True
            self.reward = -100

        self.done = self.done or self.eps_len >= self.args.max_episode_length
        self.values.append(value)
        self.rewards.append(reward)
        return self
예제 #19
0
    def action_train(self, print_log=False):
        self.state = self.state.unsqueeze(0)
        value, mu, sigma, (self.hx, self.cx) = self.model(
            (Variable(self.state), (self.hx, self.cx)))
        mu = torch.clamp(mu, -1.0, 1.0)
        sigma = sigma + 1e-3
        eps = torch.randn(mu.size())
        pi = np.array([math.pi])
        pi = torch.from_numpy(pi).float()

        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                eps = Variable(eps).cuda()
                pi = Variable(pi).cuda()
        else:
            eps = Variable(eps)
            pi = Variable(pi)

        action = (mu + sigma.sqrt() * eps).data
        if (print_log):
            print(mu.cpu().detach().numpy())
        # print (sigma.cpu (). detach ().numpy ())
        act = Variable(action)
        prob = normal(act, mu, sigma, self.gpu_id, gpu=self.gpu_id >= 0)
        action = torch.clamp(action, -1.0, 1.0)
        entropy = 0.5 * ((sigma * 2 * pi.expand_as(sigma)).log() + 1)
        self.entropies.append(entropy)
        log_prob = (prob + 1e-6).log()
        self.log_probs.append(log_prob)
        state, self.reward, self.done, self.info = self.env.step(
            action.cpu().numpy()[0])

        self.state = torch.from_numpy(state).float()
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                self.state = self.state.cuda()

        self.reward = max(min(self.reward, 1), -1)
        # print ("Train: ", self.reward, "Done", self.done)
        self.values.append(value)
        self.log_probs.append(log_prob)
        self.rewards.append(self.reward)
        self.eps_len += 1
        return self
예제 #20
0
    def action_train(self):

        self.state = self.state.unsqueeze(0)
        value, mu, sigma, (self.hx, self.cx), terminal_prediction, reward_prediction = self.model((Variable(self.state), (self.hx, self.cx)))
        mu = torch.clamp(mu, -1.0, 1.0)
        sigma = F.softplus(sigma) + 1e-5
        eps = torch.randn(mu.size())
        pi = np.array([math.pi])
        pi = torch.from_numpy(pi).float()
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                eps = Variable(eps).cuda()
                pi = Variable(pi).cuda()
        else:
            eps = Variable(eps)
            pi = Variable(pi)

        if terminal_prediction is not None:
            self.terminal_predictions.append(terminal_prediction)

        if reward_prediction is not None:
            self.reward_predictions.append(reward_prediction) # does this need to be a Variable?

        action = (mu + sigma.sqrt() * eps).data
        act = Variable(action)
        prob = normal(act, mu, sigma, self.gpu_id, gpu=self.gpu_id >= 0)
        action = torch.clamp(action, -1.0, 1.0)
        entropy = 0.5 * ((sigma * 2 * pi.expand_as(sigma)).log() + 1)
        self.entropies.append(entropy)
        log_prob = (prob + 1e-6).log()
        self.log_probs.append(log_prob)
        state, reward, self.done, self.info = self.env.step(
            action.cpu().numpy()[0])
        reward = max(min(float(reward), 1.0), -1.0)
        self.state = torch.from_numpy(state).float()
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                self.state = self.state.cuda()
        self.eps_len += 1
        self.done = self.done or self.eps_len >= self.args.max_episode_length
        self.values.append(value)
        self.rewards.append(reward)
        return self
예제 #21
0
    def process_rollout(self):
        """
        Interact with the envirnomant for a few time steps
        and build the loss
        """
        if self.env.done:
            self.env.reset()

            self.model_state = copy.deepcopy(
                self.local_model.init_state(self.device))

        log_probs, rewards, values, entropies = [], [], [], []

        for _ in range(self.cfg.ROLLOUT_STEPS):
            #while not self.env.done:

            state = self.env.get_state()

            state = Variable(state.to(self.device))

            policy_mu, policy_sigma, value, n_model_state = self.local_model(
                state.unsqueeze(0), self.model_state, self.device)

            #mu = F.softsign(policy_mu)
            mu = torch.clamp(policy_mu, -1.0, 1.0)
            #mu = F.tanh(policy_mu)
            sigma = F.softplus(policy_sigma, beta=1.0) + np.finfo(
                np.float32).eps.item()
            """
            # Does not work good # https://discuss.pytorch.org/t/backpropagation-through-sampling-a-normal-distribution/3164
            action_dist = torch.distributions.Normal(mu, sigma.sqrt())
            action = action_dist.rsample().data
            action_log_prob = action_dist.log_prob(action)
            entropy = action_dist.entropy()

            action = torch.clamp(action, -1.0, 1.0)
            """
            noise = Variable(torch.randn(mu.size()).to(self.device))
            pi = Variable(torch.FloatTensor([math.pi]).to(self.device))

            action = (mu + sigma.sqrt() * noise).data
            act = Variable(action)
            action_prob = ut.normal(act, mu, sigma, self.device)
            action_log_prob = (action_prob + 1e-6).log()
            entropy = 0.5 * ((sigma * 2 * pi.expand_as(sigma)).log() + 1)

            action = torch.clamp(action, -1.0, 1.0)

            reward = self.env.step(action.cpu().numpy()[0])

            if self.cfg.CLIP_REWARDS:
                # reward clipping
                r = max(min(float(reward), 1.0), -1.0)
            else:
                r = reward

            log_probs.append(action_log_prob)
            rewards.append(r)
            values.append(value)
            entropies.append(entropy)

            self.model_state = n_model_state

            if self.env.done:

                if self.cfg.DECAY_LR:
                    self.lr_scheduler.step(self.episode_count)

                self.total_reward += self.env.total_reward
                self.episode_count += 1
                self.logger.log_episode(self.worker_name, self.episode_count,
                                        self.env.total_reward)

                break

        if self.env.done:
            R = torch.zeros(1, 1).to(self.device)
        else:
            state = self.env.get_state()

            state = Variable(state.to(self.device))

            _, _, value, _ = self.local_model(state.unsqueeze(0),
                                              self.model_state, self.device)

            R = value.data

        R = Variable(R)
        values.append(R)

        # computing loss
        policy_loss = 0.0
        value_loss = 0.0

        #rewards_ = []
        #for i in reversed(range(len(rewards))):
        #    R = self.cfg.GAMMA * R + rewards[i]
        #    rewards_.append(R)

        #rewards = torch.Tensor(rewards_).to(self.device)

        # reward standardization
        #if self.cfg.STD_REWARDS and len(rewards) > 1:
        #    rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps.item())

        if self.cfg.USE_GAE:
            gae = torch.zeros(1, 1).to(self.device)

        for i in reversed(range(len(rewards))):
            R = self.cfg.GAMMA * R + rewards[i]
            advantage = R - values[i]
            #advantage = rewards[i] - values[i]

            value_loss = value_loss + 0.5 * advantage.pow(2)

            if self.cfg.USE_GAE:
                delta = rewards[i] + self.cfg.GAMMA * \
                        values[i+1].data - values[i].data

                gae = gae * self.cfg.GAMMA * self.cfg.TAU + delta

            else:
                gae = R - values[i].data  #advantage

            policy_loss = policy_loss - \
                            (log_probs[i].sum() * Variable(gae)) - \
                          (self.cfg.ENTROPY_BETA * entropies[i].sum())

        self.logger.log_value('policy_loss',
                              self.step,
                              policy_loss.item(),
                              print_value=False,
                              to_file=False)
        self.logger.log_value('value_loss',
                              self.step,
                              value_loss.item(),
                              print_value=False,
                              to_file=False)

        return policy_loss + self.cfg.VALUE_LOSS_MULT * value_loss
예제 #22
0
 def random(self):
     return normal(size=self.s.shape) * self.inv_s
예제 #23
0
 def reset_parameters(self):
     normal(self.weight, 0, 0.1)
     normal(self.bias, 0, 0.1)
예제 #24
0
 def random(self):
     n = normal(size=self.L.shape[0])
     return dot(self.L, n)
예제 #25
0
def expd_product(mappings, interpreter):
    a = get(mappings, 'a')
    return True, [utils.normal(c) for c in a.val]
예제 #26
0
 vec_mod_day = [0 for x in range(0,7)]
 vec_mod_hour = [0 for x in range(0,24)]
 rows = []
 # 查询所有数据
 sql = "SELECT data_id,wifi_ssid,wifi_db,time_stamp,wifi_conn,DAYOFWEEK(time_stamp),HOUR(time_stamp),MINUTE(time_stamp) FROM data_test_final WHERE mall_id='%s' ORDER BY data_id,wifi_ssid " % mall_id
 cur.execute(sql)
 row = cur.fetchone()
 v = vec[:]
 vec_day = vec_mod_day[:]
 vec_day[ row[5] - 1 ] = weight_day
 vec_hour = vec_mod_hour[:]
 hour = (row[6]+1) if row[7]>=30  else row[6]
 vec_hour[0 if hour > 23 else hour] = weight_hour
 row_id = row[0]
 if wifis.__contains__(row[1]):
     v[wifis.index(row[1])] = utils.normal(row[2])
 for r in cur.fetchall():
     # 根据是否与前一条row_id相同进行不同操作
     if r[0] != row_id:
         matrix.append(v)
         matrix_day.append(vec_day)
         matrix_hour.append(vec_hour)
         rows.append(row_id)
         v = vec[:]
         vec_day = vec_mod_day[:]
         vec_day[r[5] - 1] = weight_day
         vec_hour = vec_mod_hour[:]
         hour = (r[6] + 1) if r[7] >= 30  else r[6]
         vec_hour[0 if hour > 23 else hour] = weight_hour
         row_id = r[0]
     if wifis.__contains__(r[1]):
예제 #27
0
 def random(self):
     return normal(size=self.s.shape) * self.inv_s
예제 #28
0
 def random(self):
     n = normal(size=self.L.shape[0])
     return dot(self.L, n)
예제 #29
0
 def random(self):
     n = normal(size=self.L.shape[0])
     return solve(self.L.T, n)
예제 #30
0
 def random(self):
     return self.Ldot(normal(size=self.n))
예제 #31
0
 def visit_unicode(self, node, children) -> Any:
     return add_node(normal(int(node.value[2:], 16)), shape=Shape.BOX)
예제 #32
0
 def random(self):
     return self.Ldot(normal(size=self.n))
예제 #33
0
        pre_y = self.model.predict(test_x, batch_size=256)
        return pre_y

if __name__ == '__main__':
    # 三个输入:loan、know、attribute共三个矩阵,然后combine后进行训练。
    Y = np.load('embedding_y.npy')
    X_attr = np.load('embedding_x_attr.npy')
    X_loan = load_file('embeding_matrix', tail='loan')
    X_chaxun = load_file('embeding_matrix', tail='chaxun')
    Y = np.where(Y == 'good', 1, 0)
    X_train1, X_test1, X_train2, X_test2, X_train3, X_test3, y_train_or, y_test = train_test_split(
        X_attr, X_loan, X_chaxun, Y, test_size=0.2, random_state=4, shuffle=False)
    X_train1, y_train = balance_data(X_train1, y_train_or)
    X_train2, _ = balance_data(X_train2, y_train_or)
    X_train3, _ = balance_data(X_train3, y_train_or)
    X_train1, X_test1 = normal(X_train1, X_test1)
    X_train2, X_test2 = normal(X_train2, X_test2)
    X_train3, X_test3 = normal(X_train3, X_test3)
    # print(X_train1.shape)
    # X_train1, y_train = up_sample(X_train1, y_train_or, 2)
    # X_train2, _ = up_sample(X_train2, y_train_or, 2)
    # X_train3, _ = up_sample(X_train3, y_train_or, 2)
    # print(X_train1.shape, X_train2.shape, X_train3.shape)
    y_train = to_categorical(y_train, 2)
    y_test = to_categorical(y_test, 2)
    # 输入模型
    model = Model_Graph([X_train1, X_train2, X_train3], y_train)
    model.mutilin_model()
    model.model_fit([X_test1, X_test2, X_test3], y_test)
    pre_y = model.predict([X_test1, X_test2, X_test3])
    # 一个输入
예제 #34
0
 def random(self):
     n = normal(size=self.L.shape[0])
     return solve(self.L.T, n)
예제 #35
0
 def visit_symbol_in_range(self, node, children) -> Any:
     return add_node(normal(ord(node.value)), shape=Shape.BOX)
예제 #36
0
    def visit_character_set(self, node, children) -> Any:
        negated = children[0] == '^'
        if negated:
            children = children[1:]

        classes, values = set(), set()
        if children[0] in ['-', ']']:
            values.add(ord(children[0]))
            children = children[1:]
        for child in children:
            if isinstance(child, dict):
                ident = child['top']
                label = child['nodes'][ident]['label']
                try:
                    values.update(order(label))
                except TypeError:
                    classes.add(label)
            else:
                values.update(child)

        graph = add_node(
            f"{'^' if negated else ''}charset",
            font=Font.ITALIC,
            shape=Shape.TRAPEZIUM,
            color=NEGATED if negated else None,
        )
        source = graph['top']

        for class_ in sorted(classes):
            child = add_node(class_, shape=Shape.BOX, style=Style.FILLED)
            graph = merge(graph, child)
            graph = add_edge(source, child['top'], graph)

        for group, symbol in [
            (BUT_SPACE, '\\S'),
            (BUT_DIGIT, '\\D'),
            (BUT_WORD, '\\W'),
            (WORD, '\\w'),
            (DIGIT, '\\d'),
            (SPACE, '\\s'),
        ]:
            if group in values:
                child = add_node(symbol, shape=Shape.BOX, style=Style.FILLED)
                graph = merge(graph, child)
                graph = add_edge(source, child['top'], graph)
                values -= group

        start, last = None, None
        for value in sorted(values):
            if last is None:
                start, last = value, value
            elif value == last + 1:
                last = value
            else:
                label = normal(
                    start
                ) if last == start else f"{normal(start)}-{normal(last)}"
                child = add_node(label, shape=Shape.BOX)
                graph = merge(graph, child)
                graph = add_edge(source, child['top'], graph)
                start, last = None, None

        if last is not None:
            label = normal(
                start) if last == start else f"{normal(start)}-{normal(last)}"
            child = add_node(label, shape=Shape.BOX)
            graph = merge(graph, child)
            graph = add_edge(source, child['top'], graph)

        return graph