Exemplo n.º 1
0
Arquivo: main.py Projeto: Sha-Lab/qmc
def get_policy(args, env):
    N = env.observation_space.shape[0]
    M = env.action_space.shape[0]
    if args.init_policy == 'optimal':
        K = env.optimal_controller()
        mean_network = nn.Linear(*K.shape[::-1], bias=False)
        mean_network.weight.data = tensor(K)
    elif args.init_policy == 'linear':
        K = np.random.randn(M, N)
        mean_network = nn.Linear(*K.shape[::-1], bias=False)
        mean_network.weight.data = tensor(K)
    elif args.init_policy == 'linear_bias':
        K = np.random.randn(M, N)
        mean_network = nn.Linear(*K.shape[::-1], bias=True)
        mean_network.weight.data = tensor(K)
    elif args.init_policy == 'mlp':
        mean_network = get_mlp((N, ) + tuple(args.hidden_sizes) + (M, ),
                               gate=nn.Tanh)
    else:
        raise Exception('unsupported policy type')
    return GaussianPolicy(N,
                          M,
                          mean_network,
                          learn_std=not args.fix_std,
                          gate_output=args.gate_output)
Exemplo n.º 2
0
def zero_transition(t: Transition):
    result = Transition(
                state=tensor(torch.zeros(t['state'].shape)),
                policy=tensor(torch.ones(t['policy'].shape)/t['policy'].shape[0]),
                action=tensor(0).long(),
                reward=tensor(0.),
                new_state=tensor(torch.zeros(t['new_state'].shape)))

    return result
    def min_noise_D_unfaithful(self, D):
        noise = cp.Variable(1)
        iden = np.eye(self.dim)
        state = self.make_state(noise)
        Na = utils._make_cp_matrix((self.dim, self.dim), False)
        Nb = utils._make_cp_matrix((self.dim, self.dim), False)

        constraints = [noise >= 0, noise <= 1, Na >> 0, Nb >> 0]
        constraints += [utils.tensor(Na, iden) + utils.tensor(iden, Nb) >> state]
        constraints += [cp.trace(Na)*iden >> (D-1)*Na]
        constraints += [cp.trace(Nb)*iden >> (D-1)*Nb]
        constraints += [cp.trace(Na) + cp.trace(Nb) == D-1]
        pr = cp.Problem(cp.Minimize(noise), constraints)
        pr.solve(solver=cp.MOSEK)
        return noise.value[0]
Exemplo n.º 4
0
 def __call__(self, *args, **kwargs):
     this_op = self.compute(*args, **kwargs)
     if self.tensor_with is not None:
         ops = [T if T is not None else this_op for T in self.tensor_with]
         return tensor(ops)
     else:
         return this_op
Exemplo n.º 5
0
 def create_batch(batch, n):
     train_in = tensor(P.cuda_device, n, *P.image_input_size)
     labels_in = tensor_t(torch.LongTensor, P.cuda_device, n)
     for j, (im, lab, _) in enumerate(batch):
         train_in[j] = trans(im)
         labels_in[j] = labels.index(lab)
     return [train_in], [labels_in]
Exemplo n.º 6
0
def get_embeddings(net, dataset, device, out_size):
    trans = P.test_trans
    if P.test_pre_proc:
        trans = transforms.Compose([])

    if not P.embeddings_classify:
        # remove classifier and add back later
        classifier = net.classifier
        net.classifier = nn.Sequential()

    def batch(last, i, is_final, batch):
        embeddings = last
        n = len(batch)
        test_in = tensor(P.cuda_device, n, *P.image_input_size)
        for j, (testIm, _, _) in enumerate(batch):
            test_in[j] = trans(testIm)
        out = net(Variable(test_in, volatile=True))
        # we have the classification values. just normalize
        out = NormalizeL2Fun()(out)
        out = out.data
        for j in range(n):
            embeddings[i + j] = out[j]
        return embeddings

    init = tensor(device, len(dataset), out_size)
    embeddings = fold_batches(batch, init, dataset, P.test_batch_size)
    if not P.embeddings_classify:
        net.classifier = classifier
    return embeddings
Exemplo n.º 7
0
def get_embeddings(net, dataset, device, out_size):
    test_trans = P.test_trans
    if P.test_pre_proc:
        test_trans = transforms.Compose([])

    def batch(last, i, is_final, batch):
        embeddings = last
        im_trans = test_trans(batch[0][0])
        test_in = move_device(im_trans.unsqueeze(0), P.cuda_device)
        out = net(Variable(test_in, volatile=True))[0].data
        # first, determine location of highest maximal activation
        max_pred, _ = out.max(1)
        max_pred1, max_i1 = max_pred.max(2)
        _, max_i2 = max_pred1.max(3)
        i2 = max_i2.view(-1)[0]
        i1 = max_i1.view(-1)[i2]
        # we have the indexes of the highest maximal activation,
        # get the classification values at this point and normalize
        out = out[:, :, i1, i2]
        out = NormalizeL2Fun()(Variable(out, volatile=True))
        out = out.data
        embeddings[i] = out[0]
        return embeddings

    init = tensor(device, len(dataset), out_size)
    return fold_batches(batch, init, dataset, 1)
Exemplo n.º 8
0
 def create_batch(batch, n, epoch, similarities):
     # one image at a time. batch is always of size 1
     train_in1 = tensor(P.cuda_device, n, *P.image_input_size)
     train_in2 = tensor(P.cuda_device, n, *P.image_input_size)
     train_in3 = tensor(P.cuda_device, n, *P.image_input_size)
     labels_in = tensor_t(torch.LongTensor, P.cuda_device, n)
     # we get positive couples. find negatives for them
     for j, (lab, (i1, i2), (im1, im2)) in enumerate(batch):
         im3 = None
         # choose a semi-hard negative. see FaceNet
         # paper by Schroff et al for details.
         # essentially, choose hardest negative that is still
         # easier than the positive. this should avoid
         # collapsing the model at beginning of training
         ind_exl = lab_indicators[lab]
         sim_pos = similarities[i1, i2]
         if epoch < P.train_epoch_switch:
             # exclude all positives as well as any that are
             # more similar than sim_pos
             ind_exl = ind_exl | similarities[i1].ge(sim_pos)
         if ind_exl.sum() >= similarities.size(0):
             p = 'cant find semi-hard neg for'
             s = 'falling back to random neg'
             n_pos = lab_indicators[lab].sum()
             n_ge = similarities[i1].ge(sim_pos).sum()
             n_tot = similarities.size(0)
             print('{0} {1}-{2}-{3} (#pos:{4}, #ge:{5}, #total:{6}), {7}'.
                   format(p, i1, i2, lab, n_pos, n_ge, n_tot, s))
         else:
             # similarities must be in [-1, 1]
             # set all similarities of excluded indexes to -2
             # then take argmax (highest similarity not excluded)
             sims = similarities[i1].clone()
             sims[ind_exl] = -2
             _, k = sims.max(0)
             im3 = train_set[k[0]][0]
         if im3 is None:
             # default to random negative
             im3 = choose_rand_neg(train_set, lab)
         # one image at a time
         train_in1[j] = train_trans(im1)
         train_in2[j] = train_trans(im2)
         train_in3[j] = train_trans(im3)
         labels_in[j] = labels.index(lab)
     # return input tensors and labels
     return [train_in1, train_in2, train_in3], [labels_in]
Exemplo n.º 9
0
 def __init__(self,
              N,
              batch_shape,
              echo_pulse=True,
              name='conditional_displacement'):
     super().__init__(name=name)
     self.displace = ops.DisplacementOperator(
         N, tensor_with=[ops.identity(2), None])
     self.P = {
         i: utils.tensor([ops.projector(i, 2),
                          ops.identity(N)])
         for i in [0, 1]
     }
     self.batch_shape = batch_shape
     self.qubit_op = utils.tensor([ops.sigma_x(),
                                   ops.identity(N)
                                   ]) if echo_pulse else ops.identity(2 * N)
Exemplo n.º 10
0
 def batch(last, i, is_final, batch):
     embeddings = last
     test_in = tensor(P.cuda_device, len(batch), *P.image_input_size)
     for j, (im, _, _) in enumerate(batch):
         test_in[j] = test_trans(im)
     out = net(Variable(test_in, volatile=True)).data
     for j, embedding in enumerate(out):
         embeddings[i + j] = embedding
     return embeddings
Exemplo n.º 11
0
 def batch(last, i, is_final, batch):
     embeddings = last
     n = len(batch)
     test_in = tensor(P.cuda_device, n, *P.image_input_size)
     for j, (testIm, _, _) in enumerate(batch):
         test_in[j] = trans(testIm)
     out = net(Variable(test_in, volatile=True))
     # we have the classification values. just normalize
     out = NormalizeL2Fun()(out)
     out = out.data
     for j in range(n):
         embeddings[i + j] = out[j]
     return embeddings
Exemplo n.º 12
0
 def __init__(
     self,
     state_dim,
     action_dim,
     mean_network,
     learn_std=True,
     gate_output=False,
 ):
     super().__init__()
     self._mean = mean_network
     if learn_std: self._std = nn.Parameter(torch.zeros(action_dim))
     else: self._std = tensor(np.ones(action_dim))
     self.gate_output = gate_output
     self.learn_std = learn_std
     self.to(Config.DEVICE)
Exemplo n.º 13
0
 def eval_batch_test(last, i, is_final, batch):
     correct, total = last
     n = len(batch)
     test_in = tensor(P.cuda_device, n, *P.image_input_size)
     for j, (testIm, _, _) in enumerate(batch):
         test_in[j] = trans(testIm)
     out = net(Variable(test_in, volatile=True)).data
     # first get all maximal values for classification
     # then, use the spatial region with the highest maximal value
     # to make a prediction
     _, predicted = torch.max(out, 1)
     total += n
     correct += sum(
         labels.index(testLabel) == predicted[j][0]
         for j, (_, testLabel, _) in enumerate(batch))
     return correct, total
Exemplo n.º 14
0
def get_embeddings(net, dataset, device, out_size):
    test_trans = P.test_trans
    if P.test_pre_proc:
        test_trans = transforms.Compose([])

    def batch(last, i, is_final, batch):
        embeddings = last
        test_in = tensor(P.cuda_device, len(batch), *P.image_input_size)
        for j, (im, _, _) in enumerate(batch):
            test_in[j] = test_trans(im)
        out = net(Variable(test_in, volatile=True)).data
        for j, embedding in enumerate(out):
            embeddings[i + j] = embedding
        return embeddings

    init = tensor(device, len(dataset), out_size)
    return fold_batches(batch, init, dataset, P.test_batch_size)
Exemplo n.º 15
0
def get_embeddings(net, dataset, device, out_size):
    test_trans = P.test_trans
    if P.test_pre_proc:
        test_trans = transforms.Compose([])

    def batch(last, i, is_final, batch):
        embeddings = last
        # one image at a time
        test_in = move_device(
            test_trans(batch[0][0]).unsqueeze(0), P.cuda_device)

        out = net(Variable(test_in, volatile=True)).data
        embeddings[i] = out[0]
        return embeddings

    init = tensor(device, len(dataset), out_size)
    return fold_batches(batch, init, dataset, 1)
    def min_noise_to_have_rank_D_or_less(self, D):
        # Saves some memory by not redefining a cvxpy state here, and rewriting the constraints
        dT = self.dim**2 * D**2
        iden = np.eye(self.dim)
        proj = utils.tensor(iden, np.sqrt(D)*utils.max_entangled_ket(D), iden)

        noise = cp.Variable(1)
        state = self.make_state(noise)
        sigma = utils._make_cp_matrix((dT, dT), False)

        constraints = [noise >= 0, noise <= 1]
        constraints += [sigma >> 0, cp.trace(sigma) == D]
        constraints += [proj @ sigma @ proj.T == state]
        constraints += [utils.partial_transpose(sigma, [0], [self.dim*D, self.dim*D]) >> 0]

        pr = cp.Problem(cp.Minimize(noise), constraints)
        pr.solve(solver=cp.MOSEK)
        return noise.value[0]
Exemplo n.º 17
0
    observation = env.reset()
    buff = []
    done = False
    score = 0
    enemy_score = 0

    while not done:
        
        env.render()
        
        if len(buff) < 4:
            observation, reward, done, info = env.step(env.action_space.sample())
            buff.append(observation)
            continue

        x = tensor(preprocess(buff), args.device)[None]
        action = int(torch.argmax(model(x).detach().cpu()))

        observation, reward, done, info = env.step(action)

        buff.pop(0)
        buff.append(observation)
        
        if reward>0:
            score+=reward
        else:
            enemy_score-=reward

    print('Enemy Score - {}, Our Score - {}'.format(enemy_score, score))
    time.sleep(10)
    def _step(self):
        config = self.config
        if self.state is None:
            self.random_process.reset_states()
            self.state = self.task.reset()
            self.state = config.state_normalizer(self.state)

        if self.total_steps < config.warm_up:
            action = to_np(self.task.action_space.sample())
        else:
            action = self.network(self.state)
            action = to_np(action)
            action += self.random_process.sample()
        action = np.clip(action, int(self.task.action_space.low),
                         int(self.task.action_space.high))
        next_state, reward, done, info = self.task.step(action)
        next_state = self.config.state_normalizer(next_state)
        reward = norm_reward = self.config.reward_normalizer(reward)

        experiences = list(
            zip(self.state, action, norm_reward, next_state, done))
        self.replay.feed_batch(experiences)
        if done[0]:
            self.random_process.reset_states()
        self.state = next_state
        self.total_steps += 1

        if (self.replay.size() >= config.warm_up):
            experiences = self.replay.sample()
            states, actions, rewards, next_states, terminals = experiences
            states = tensor(states)
            actions = tensor(actions)
            rewards = tensor(rewards).unsqueeze(-1)
            next_states = tensor(next_states)
            mask = tensor(1 - terminals).unsqueeze(-1)

            phi_next = self.target_network.feature(next_states)
            a_next = self.target_network.actor(phi_next)
            q_next = self.target_network.critic(phi_next, a_next)
            q_next = self.config.discount * mask * q_next
            q_next.add_(rewards)
            q_next = q_next.detach()
            phi = self.network.feature(states)
            q = self.network.critic(phi, actions)
            critic_loss = (q - q_next).pow(2).mul(0.5).sum(-1).mean()

            self.network.zero_grad()
            critic_loss.backward()
            self.network.critic_opt.step()

            phi = self.network.feature(states)
            action = self.network.actor(phi)
            policy_loss = -self.network.critic(phi.detach(), action).mean()

            self.network.zero_grad()
            policy_loss.backward()
            self.network.actor_opt.step()

            self.soft_update(self.target_network, self.network)

        return reward, not all(done)
Exemplo n.º 19
0
 def forward(self, obs, noise):
     obs = tensor(obs)
     action = self.mean(obs) + tensor(noise) * self.std
     return action.cpu().detach().numpy()
Exemplo n.º 20
0
 def distribution(self, obs):
     obs = tensor(obs)
     dist = torch.distributions.Normal(self.mean(obs), self.std)
     return dist
Exemplo n.º 21
0
    def train_rollout(self, total_step):
        storage = Storage(self.episode_C['rollout_length'])
        state = self.env._copy_state(*self.state)
        step_times = []
        # Sync.
        self.gnn.load_state_dict(self.shared_gnn.state_dict())
        for rollout_step in range(self.episode_C['rollout_length']):
            start_step_time = time.time()
            prediction = self.env.propagate(self.gnn, [state])
            action = prediction['a'].cpu().numpy()[0]
            next_state, reward, done, achieved_goal = self.env.step(action, self.ep_step, state)

            self.ep_step += 1
            if done:
                # Sync local model with shared model at start of each ep
                self.gnn.load_state_dict(self.shared_gnn.state_dict())
                self.ep_step = 0

            storage.add(prediction)
            storage.add({'r': tensor(reward, self.device).unsqueeze(-1).unsqueeze(-1),
                         'm': tensor(1 - done, self.device).unsqueeze(-1).unsqueeze(-1),
                         's': state})

            state = self.env._copy_state(*next_state)

            total_step += 1

            end_step_time = time.time()
            step_times.append(end_step_time - start_step_time)

        self.state = self.env._copy_state(*state)

        prediction = self.env.propagate(self.gnn, [state])
        storage.add(prediction)
        storage.placeholder()

        advantages = tensor(np.zeros((1, 1)), self.device)
        returns = prediction['v'].detach()
        for i in reversed(range(self.episode_C['rollout_length'])):
            # Disc. Return
            returns = storage.r[i] + self.agent_C['discount'] * storage.m[i] * returns
            # GAE
            td_error = storage.r[i] + self.agent_C['discount'] * storage.m[i] * storage.v[i + 1] - storage.v[i]
            advantages = advantages * self.agent_C['gae_tau'] * self.agent_C['discount'] * storage.m[i] + td_error
            storage.adv[i] = advantages.detach()
            storage.ret[i] = returns.detach()

        # print(returns.shape, td_error.shape, advantages.shape, storage.adv[-1].shape, storage.ret[-1].shape)

        actions, log_probs_old, returns, advantages = storage.cat(['a', 'log_pi_a', 'ret', 'adv'])
        states = [storage.s[i] for i in range(storage.size)]

        actions = actions.detach()
        log_probs_old = log_probs_old.detach()
        advantages = (advantages - advantages.mean()) / advantages.std()

        # Train
        self.gnn.train()
        batch_times = []
        train_pred_times = []
        for _ in range(self.agent_C['optimization_epochs']):
            # Sync. at start of each epoch
            self.gnn.load_state_dict(self.shared_gnn.state_dict())
            sampler = random_sample(np.arange(len(states)), self.agent_C['minibatch_size'])
            for batch_indices in sampler:
                start_batch_time = time.time()

                batch_indices_tensor = tensor(batch_indices, self.device).long()

                # Important Node: these are tensors but dont have a grad
                sampled_states = [states[i] for i in batch_indices]
                sampled_actions = actions[batch_indices_tensor]
                sampled_log_probs_old = log_probs_old[batch_indices_tensor]
                sampled_returns = returns[batch_indices_tensor]
                sampled_advantages = advantages[batch_indices_tensor]

                start_pred_time = time.time()
                prediction = self.env.propagate(self.gnn, sampled_states, sampled_actions)
                end_pred_time = time.time()
                train_pred_times.append(end_pred_time - start_pred_time)

                # Calc. Loss
                ratio = (prediction['log_pi_a'] - sampled_log_probs_old).exp()

                obj = ratio * sampled_advantages
                obj_clipped = ratio.clamp(1.0 - self.agent_C['ppo_ratio_clip'],
                                          1.0 + self.agent_C['ppo_ratio_clip']) * sampled_advantages

                # policy loss and value loss are scalars
                policy_loss = -torch.min(obj, obj_clipped).mean() - self.agent_C['entropy_weight'] * prediction['ent'].mean()

                value_loss = self.agent_C['value_loss_coef'] * (sampled_returns - prediction['v']).pow(2).mean()

                self.opt.zero_grad()
                (policy_loss + value_loss).backward()
                if self.agent_C['clip_grads']:
                    nn.utils.clip_grad_norm_(self.gnn.parameters(), self.agent_C['gradient_clip'])
                ensure_shared_grads(self.gnn, self.shared_gnn)
                self.opt.step()
                end_batch_time = time.time()
                batch_times.append(end_batch_time - start_batch_time)
        self.gnn.eval()
        return total_step, np.array(step_times).mean(), np.array(batch_times).mean(), np.array(train_pred_times).mean()
def plot_phase_space(state,
                     tensorstate,
                     phase_space_rep='wigner',
                     lim=4,
                     pts=81,
                     title=None):
    """
    Plot phase space representation of the state. Converts a batch of states
    to density matrix.
    
    Args:
        state (tf.Tensor([B,N], c64)): batched state vector
        tensorstate (bool): flag if tensored with qubit
        phase_space_rep (str): either 'wigner' or 'CF'
        lim (float): plot limit in displacement units
        pts (int): number of pixels in each direction 
        title (str): figure title (optional)
    
    """

    assert len(state.shape) >= 2 and state.shape[1] > 1

    # create operators
    if tensorstate:
        N = int(state.shape[1] / 2)
        parity = utils.tensor([ops.identity(2), ops.parity(N)])
        D = ops.DisplacementOperator(N, tensor_with=[ops.identity(2), None])
    else:
        N = state.shape[1]
        D = ops.DisplacementOperator(N)
        parity = ops.parity(N)

    # project every trajectory onto |g> subspace
    if tensorstate:
        P0 = utils.tensor([ops.projector(0, 2), ops.identity(N)])
        state, _ = utils.normalize(tf.linalg.matvec(P0, state))

    # make a density matrix
    dm = utils.density_matrix(state)

    # Generate a grid of phase space points
    x = np.linspace(-lim, lim, pts)
    y = np.linspace(-lim, lim, pts)

    xs_mesh, ys_mesh = np.meshgrid(x, y, indexing='ij')
    grid = tf.cast(xs_mesh + 1j * ys_mesh, c64)
    grid_flat = tf.reshape(grid, [-1])

    matmul = tf.linalg.matmul

    # Calculate and plot the phase space representation
    if phase_space_rep == 'wigner':
        displaced_parity = matmul(D(grid_flat), matmul(parity, D(-grid_flat)))
        W = 1 / pi * tf.linalg.trace(matmul(displaced_parity, dm))
        W_grid = tf.reshape(W, grid.shape)

        fig, ax = plt.subplots(1, 1, dpi=200)
        fig.suptitle(title)
        ax.pcolormesh(x,
                      y,
                      np.transpose(W_grid.numpy().real),
                      cmap='RdBu_r',
                      vmin=-1 / pi,
                      vmax=1 / pi)
        ax.set_aspect('equal')

    if phase_space_rep == 'CF':

        C = tf.linalg.trace(matmul(D(grid_flat), dm))
        C_grid = tf.reshape(C, grid.shape)

        fig, axes = plt.subplots(1, 2, sharey=True, dpi=200)
        fig.suptitle(title)
        axes[0].pcolormesh(x,
                           y,
                           np.transpose(C_grid.numpy().real),
                           cmap='RdBu_r',
                           vmin=-1,
                           vmax=1)
        axes[1].pcolormesh(x,
                           y,
                           np.transpose(C_grid.numpy().imag),
                           cmap='RdBu_r',
                           vmin=-1,
                           vmax=1)
        axes[0].set_title('Re')
        axes[1].set_title('Im')
        axes[0].set_aspect('equal')
        axes[1].set_aspect('equal')

    plt.tight_layout()
Exemplo n.º 23
0
Arquivo: main.py Projeto: Sha-Lab/qmc
def compare_cost(args):
    set_seed(args.seed)
    env = LQR(
        #N=20,
        #M=12,
        init_scale=1.0,
        max_steps=args.H,  # 10, 20
        Sigma_s_kappa=1.0,
        Q_kappa=1.0,
        P_kappa=1.0,
        A_norm=1.0,
        B_norm=1.0,
        Sigma_s_scale=0.0,
    )
    K = env.optimal_controller()
    mean_network = nn.Linear(*K.shape[::-1], bias=False)
    mean_network.weight.data = tensor(K)
    policy = GaussianPolicy(*K.shape[::-1],
                            mean_network,
                            learn_std=False,
                            gate_output=False)

    # mc
    mc_costs = []  # individual
    mc_means = []  # cumulative
    for i in tqdm(range(args.n_trajs), 'mc'):
        noises = np.random.randn(env.max_steps, env.M)
        _, _, rewards, _, _ = rollout(env, policy, noises)
        mc_costs.append(-rewards.sum())
        mc_means.append(np.mean(mc_costs))

    # rqmc
    rqmc_costs = []
    rqmc_means = []
    rqmc_noises = get_rqmc_noises(args.n_trajs, env.max_steps, env.M,
                                  'trajwise')
    for i in tqdm(range(args.n_trajs), 'rqmc'):
        _, _, rewards, _, _ = rollout(env, policy, rqmc_noises[i])
        rqmc_costs.append(-rewards.sum())
        rqmc_means.append(np.mean(rqmc_costs))

    # array rqmc
    arqmc_costs_dict = {}
    arqmc_means_dict = {}
    arqmc_noises = get_rqmc_noises(args.n_trajs, env.max_steps, env.M, 'ssj')
    #arqmc_noises = get_rqmc_noises(args.n_trajs, env.max_steps, env.M, 'array')

    for sorter in args.sorter:
        arqmc_costs = []
        arqmc_means = []
        sort_f = get_sorter(sorter, env)

        data = ArrayRQMCSampler(env, args.n_trajs,
                                sort_f=sort_f).sample(policy, arqmc_noises)
        for traj in data:
            rewards = np.asarray(traj['rewards'])
            arqmc_costs.append(-rewards.sum())
            arqmc_means.append(np.mean(arqmc_costs))
        arqmc_costs_dict[sorter] = arqmc_costs
        arqmc_means_dict[sorter] = arqmc_means

    expected_cost = env.expected_cost(K, np.diag(np.ones(env.M)))

    mc_errors = np.abs(mc_means - expected_cost)
    rqmc_errors = np.abs(rqmc_means - expected_cost)
    arqmc_errors_dict = {
        sorter: np.abs(arqmc_means - expected_cost)
        for sorter, arqmc_means in arqmc_means_dict.items()
    }
    logger.info('mc: {}, rqmc: {} '.format(mc_errors[-1], rqmc_errors[-1]) + \
        ' '.join(['arqmc ({}): {}'.format(sorter, arqmc_errors[-1]) for sorter, arqmc_errors in arqmc_errors_dict.items()]))
    info = {
        **vars(args), 'mc_costs': mc_costs,
        'rqmc_costs': rqmc_costs,
        'arqmc_costs': arqmc_costs
    }
    if args.save_fn is not None:
        with open(args.save_fn, 'wb') as f:
            dill.dump(
                dict(mc_errors=mc_errors,
                     rqmc_errors=rqmc_errors,
                     arqmc_errors_dict=arqmc_errors_dict,
                     info=info), f)
    if args.show_fig:
        data = pd.concat([
            pd.DataFrame({
                'name': 'mc',
                'x': np.arange(len(mc_errors)),
                'error': mc_errors,
            }),
            pd.DataFrame({
                'name': 'rqmc',
                'x': np.arange(len(rqmc_errors)),
                'error': rqmc_errors,
            }),
            pd.concat([
                pd.DataFrame({
                    'name': 'arqmc_{}'.format(sorter),
                    'x': np.arange(len(arqmc_errors)),
                    'error': arqmc_errors,
                }) for sorter, arqmc_errors in arqmc_errors_dict.items()
            ]),
        ])
        plot = sns.lineplot(x='x', y='error', hue='name', data=data)
        plot.set(yscale='log')
        plt.show()
    return mc_errors, rqmc_errors, arqmc_errors_dict, info
Exemplo n.º 24
0
    prev_buff = []
    done = False

    while not done:

        prev_buff = buff

        if len(buff) < 4:
            observation, reward, done, info = env.step(env.action_space.sample())
            buff.append(observation)
            continue

        previous_state = preprocess(prev_buff)

        if args.resume_episode != 0 and args.epsilon < random.random():
            x = tensor(previous_state, args.device)[None]
            action = int(torch.argmax(model(x).detach().cpu()))
        else:
            action = env.action_space.sample()

        observation, reward, done, info = env.step(action)

        buff.pop(0)
        buff.append(observation)
            
        next_state = preprocess(buff)

        REPLAY_MEMORY.append([previous_state, action, reward, next_state, done])

REPLAY_MEMORY = REPLAY_MEMORY[-args.replay_size:]
 def min_noise_reducible(self):
     sigma = self.make_state(0)
     sigmaB = utils.partial_trace(sigma, [0], [self.dim, self.dim])
     sigmaB_ext = utils.tensor(np.eye(self.dim), sigmaB)
     eigval = min(np.real(np.linalg.eigvals(sigmaB_ext - sigma)))
     return eigval / (eigval - (self.dim - 1)/self.dim**2)
 def feature(self, obs):
     obs = tensor(obs)
     return self.phi_body(obs)
Exemplo n.º 27
0
    def create_operators(self):
        N = self.N

        # oscillator fixed operators
        self.I = tensor([ops.identity(2), ops.identity(N)])
        self.a = tensor([ops.identity(2), ops.destroy(N)])
        self.a_dag = tensor([ops.identity(2), ops.create(N)])
        self.q = tensor([ops.identity(2), ops.position(N)])
        self.p = tensor([ops.identity(2), ops.momentum(N)])
        self.n = tensor([ops.identity(2), ops.num(N)])
        self.parity = tensor([ops.identity(2), ops.parity(N)])

        # qubit fixed operators
        self.sx = tensor([ops.sigma_x(), ops.identity(N)])
        self.sy = tensor([ops.sigma_y(), ops.identity(N)])
        self.sz = tensor([ops.sigma_z(), ops.identity(N)])
        self.sm = tensor([ops.sigma_m(), ops.identity(N)])
        self.H = tensor([ops.hadamard(), ops.identity(N)])

        # oscillator parameterized operators
        tensor_with = [ops.identity(2), None]
        self.displace = ops.DisplacementOperator(N, tensor_with=tensor_with)
        self.rotate = ops.RotationOperator(N, tensor_with=tensor_with)

        # qubit parameterized operators
        tensor_with = [None, ops.identity(N)]
        self.rotate_qb_xy = ops.QubitRotationXY(tensor_with=tensor_with)
        self.rotate_qb_z = ops.QubitRotationZ(tensor_with=tensor_with)

        # qubit sigma_z measurement projector
        self.P = {
            i: tensor([ops.projector(i, 2),
                       ops.identity(N)])
            for i in [0, 1]
        }
Exemplo n.º 28
0
optimizer = optim.SGD(params, lr=1e-3, momentum=.9, weight_decay=1e-6)

writer = SummaryWriter()

reward_normalizer = RewardNormalizer()
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

global_t = 0
for ep in range(10000):

    # episode loop
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    trajectory = []
    ep_t = 0
    state = tensor(env.reset())
    ep_return = 0
    while True:
        with torch.no_grad():
            policy = policy_net(state)

        action = np.random.choice(env.action_space.n, p=policy.cpu().numpy())

        new_state, reward, done, _ = env.step(action)
        new_state = tensor(new_state)

        ep_return += reward
        # reward = reward_normalizer.transform_reward(reward)

        transition = Transition(state=state,
                                policy=policy,
Exemplo n.º 29
0
Arquivo: main.py Projeto: Sha-Lab/qmc
def compare_grad(args):
    set_seed(args.seed)
    env = LQR(
        N=args.xu_dim[0],
        M=args.xu_dim[1],
        lims=100,
        init_scale=1.0,
        max_steps=args.H,
        Sigma_s_kappa=1.0,
        Q_kappa=1.0,
        P_kappa=1.0,
        A_norm=1.0,
        B_norm=1.0,
        Sigma_s_scale=args.noise,
    )
    #K = env.optimal_controller()
    K = np.random.randn(env.M, env.N)
    mean_network = nn.Linear(*K.shape[::-1], bias=False)
    mean_network.weight.data = tensor(K)
    policy = GaussianPolicy(*K.shape[::-1],
                            mean_network,
                            learn_std=False,
                            gate_output=False)
    out_set = set()  # here

    Sigma_a = np.diag(np.ones(env.M))
    mc_grads = []
    for i in tqdm(range(args.n_trajs), 'mc'):
        noises = np.random.randn(env.max_steps, env.M)
        states, actions, rewards, _, _ = rollout(env, policy, noises)
        if len(states) < args.H:
            out_set.add('mc')
            break
        mc_grads.append(
            get_gaussian_policy_gradient(states, actions, rewards, policy,
                                         variance_reduced_loss))
    mc_grads = np.asarray(mc_grads)
    mc_means = np.cumsum(mc_grads, axis=0) / np.arange(
        1,
        len(mc_grads) + 1)[:, np.newaxis, np.newaxis]

    rqmc_grads = []
    #loc = torch.zeros(env.max_steps * env.M)
    #scale = torch.ones(env.max_steps * env.M)
    #rqmc_noises = Normal_RQMC(loc, scale).sample(torch.Size([args.n_trajs])).data.numpy()
    rqmc_noises = uniform2normal(
        random_shift(
            ssj_uniform(
                args.n_trajs,
                args.H * env.M,
            ).reshape(args.n_trajs, args.H, env.M),
            0,
        ))
    for i in tqdm(range(args.n_trajs), 'rqmc'):
        states, actions, rewards, _, _ = rollout(
            env, policy, rqmc_noises[i].reshape(env.max_steps, env.M))
        if len(states) < args.H:
            out_set.add('rqmc')
            break
        rqmc_grads.append(
            get_gaussian_policy_gradient(states, actions, rewards, policy,
                                         variance_reduced_loss))
    rqmc_grads = np.asarray(rqmc_grads)
    rqmc_means = np.cumsum(rqmc_grads, axis=0) / np.arange(
        1,
        len(rqmc_grads) + 1)[:, np.newaxis, np.newaxis]

    arqmc_means_dict = {}
    #arqmc_noises = get_rqmc_noises(args.n_trajs, args.H, env.M, 'array')
    uniform_noises = ssj_uniform(args.n_trajs, env.M)  # n_trajs , action_dim
    arqmc_noises = uniform2normal(
        random_shift(np.expand_dims(uniform_noises, 1).repeat(args.H, 1),
                     0))  # n_trajs, horizon, action_dim
    for sorter in args.sorter:
        arqmc_grads = []
        sort_f = get_sorter(sorter, env, K)
        data = ArrayRQMCSampler(env, args.n_trajs,
                                sort_f=sort_f).sample(policy, arqmc_noises)
        for traj in data:
            states, actions, rewards = np.asarray(traj['states']), np.asarray(
                traj['actions']), np.asarray(traj['rewards'])
            if len(states) < args.H:
                out_set.add('arqmc_{}'.format(sorter))
                break
            arqmc_grads.append(
                get_gaussian_policy_gradient(states, actions, rewards, policy,
                                             variance_reduced_loss))
        arqmc_grads = np.asarray(arqmc_grads)
        arqmc_means = np.cumsum(arqmc_grads, axis=0) / np.arange(
            1,
            len(arqmc_grads) + 1)[:, np.newaxis, np.newaxis]
        arqmc_means_dict[sorter] = arqmc_means

    expected_grad = env.expected_policy_gradient(K, Sigma_a)

    mc_errors = [np.nan] if 'mc' in out_set else ((
        mc_means - expected_grad)**2).reshape(mc_means.shape[0], -1).mean(
            1)  # why the sign is reversed?
    rqmc_errors = [np.nan] if 'rqmc' in out_set else (
        (rqmc_means -
         expected_grad)**2).reshape(rqmc_means.shape[0], -1).mean(1)
    arqmc_errors_dict = {
        sorter: [np.nan] if 'arqmc_{}'.format(sorter) in out_set else
        ((arqmc_means -
          expected_grad)**2).reshape(arqmc_means.shape[0], -1).mean(1)
        for sorter, arqmc_means in arqmc_means_dict.items()
    }
    info = {
        **vars(args),
        'out': out_set,
        'expected_grad': expected_grad,
        'means': {
            'mc': mc_means,
            'rqmc': rqmc_means,
            **arqmc_means_dict,
        },
    }
    if args.save_fn is not None:
        with open(save_fn, 'wb') as f:
            dill.dump(
                dict(mc_errors=mc_errors,
                     rqmc_errors=rqmc_errors,
                     arqmc_errors_dict=arqmc_errors_dict,
                     info=info), f)
    if args.show_fig:
        mc_data = pd.DataFrame({
            'name': 'mc',
            'x': np.arange(len(mc_errors)),
            'error': mc_errors,
        })
        rqmc_data = pd.DataFrame({
            'name': 'rqmc',
            'x': np.arange(len(rqmc_errors)),
            'error': rqmc_errors,
        })
        arqmc_data = pd.concat([
            pd.DataFrame({
                'name': 'arqmc_{}'.format(sorter),
                'x': np.arange(len(arqmc_errors)),
                'error': arqmc_errors,
            }) for sorter, arqmc_errors in arqmc_errors_dict.items()
        ])
        plot = sns.lineplot(x='x',
                            y='error',
                            hue='name',
                            data=pd.concat([mc_data, rqmc_data, arqmc_data]))
        plot.set(yscale='log')
        plt.show()
    return mc_errors, rqmc_errors, arqmc_errors_dict, info