Пример #1
0
def main(args):
    """Main run function."""

    # Build the gridworld
    transitions, rewards = gridworld()

    # print('transitions: {}'.format(transitions))
    print('transtions shape: {}'.format(transitions.shape))
    # print('rewards: {}'.format(rewards))
    print('rewards shape: {}'.format(rewards.shape))

    # Tensors now live on the remote workers
    transitions.fix_precision().share(bob, alice)
    rewards.fix_precision().share(bob, alice)

    num_actions = rewards.shape[0]
    num_states = rewards.shape[1]
    print('Number of actions: {}'.format(num_actions))
    print('Number of states: {}'.format(num_states))

    # Initialize a policy to hold the optimal policy
    policy = sy.zeros(num_states)
    # Initialize a value function to hold the long-term value of state, s
    values = sy.zeros(num_states)
    policy = policy.fix_precision().share(bob, alice)
    values = values.fix_precision().share(bob, alice)

    # Get theta and gamma from args and check value
    gamma = args.gamma * sy.ones(1)
    theta = args.theta * sy.ones(1)
    # check theta stopping condition
    assert float(theta) > 0, "Theta must be greater than 0."

    # Share theta and gamma for learning
    gamma = gamma.fix_precision().share(bob, alice)
    theta = theta.fix_precision().share(bob, alice)

    # run value iteration
    values, policy = value_iteration(
        values=values,
        policy=policy,
        transitions=transitions,
        rewards=rewards,
        gamma=gamma,
        theta=theta,
        max_iter=args.max_iter,
    )
    values = values.get().decode()
    policy = policy.get().decode()

    # print results
    print('\n************************')
    d_state = (int(np.sqrt(num_states)), int(np.sqrt(num_states)))
    print('Optimized Values:\n {}'.format(np.reshape(list(values), d_state)))
    print('Optimized Policy:\n {}'.format(np.reshape(list(policy), d_state)))
Пример #2
0
    def testDotProduct(self):

        pk, sk = Paillier()

        x = pk.ones(10)
        y = sy.ones(10)

        out1 = y.dot(x).decrypt(sk)
        out2 = x.dot(y).decrypt(sk)

        self.assertEqual(out1, 10)
        self.assertEqual(out2, 10)
Пример #3
0
def gridworld():
    """4x4 gridworld example."""
    # number of states
    S = 16

    # number of actions
    A = 4
    # indices of the actions
    up, down, right, left = range(A)

    # Transitions.
    T = sy.zeros((A, S, S))

    # Grid transitions.
    grid_transitions = {
        # from_state: ((action, to_state), ...)
        0: ((up, 0), (down, 0), (right, 0), (left, 0)),
        1: ((up, 1), (down, 5), (right, 2), (left, 0)),
        2: ((up, 2), (down, 6), (right, 3), (left, 1)),
        3: ((up, 3), (down, 7), (right, 3), (left, 2)),
        4: ((up, 0), (down, 8), (right, 5), (left, 4)),
        5: ((up, 1), (down, 9), (right, 6), (left, 4)),
        6: ((up, 2), (down, 10), (right, 7), (left, 5)),
        7: ((up, 3), (down, 11), (right, 7), (left, 6)),
        8: ((up, 4), (down, 12), (right, 9), (left, 8)),
        9: ((up, 5), (down, 13), (right, 10), (left, 8)),
        10: ((up, 6), (down, 14), (right, 11), (left, 9)),
        11: ((up, 7), (down, 15), (right, 11), (left, 10)),
        12: ((up, 8), (down, 12), (right, 13), (left, 12)),
        13: ((up, 9), (down, 13), (right, 14), (left, 12)),
        14: ((up, 10), (down, 14), (right, 15), (left, 13)),
        15: ((up, 15), (down, 15), (right, 15), (left, 15))
    }
    for i, moves in grid_transitions.items():
        for a, j in moves:
            T[a, i, j] = 1.0

    # Rewards.
    R = sy.ones((A, S, S)).mul(-1)
    R[:, 0, :] = 0
    R[:, 15, :] = 0

    return T, R
Пример #4
0
 def test_ones(self):
     self.assertTrue((syft.ones(5).data == np.ones(5)).all())
Пример #5
0
 def ones(self, dim):
     """Returns an encrypted tensor of ones"""
     return syft.ones(dim).encrypt(self)
Пример #6
0
 def ones(self, dim):
     """Returns an encrypted tensor of ones"""
     return PaillierTensor(self, syft.ones(dim))