Exemplo n.º 1
0
 def test_rewards2(self):
     numStates = 5000
     numActions = 20
     mu = [10, 10, 10]
     sigma = [1, 1, 1]
     R = np.asarray([[ 1.0, -0.7, -0.5],
                     [-0.7,  1.0,  0.8],
                     [-0.5,  0.8,  1.0]])
     cov = rewards.cor2cov(R, sigma)
     D = rewards.mvnrewards(numStates, numActions, mu, cov)
     self.checkCorrelations(R, D)
Exemplo n.º 2
0
 def test_rewards1(self):
     numStates = 1000
     numActions = 5
     mu = [20, 0, 50]
     sigma = [5, 5, 10]
     R = np.asarray([[ 1.0,  0.4, -0.4],
                     [ 0.4,  1.0,  0.6],
                     [-0.4,  0.6,  1.0]])
     cov = rewards.cor2cov(R, sigma)
     D = rewards.mvnrewards(numStates, numActions, mu, cov)
     self.checkCorrelations(R, D)
Exemplo n.º 3
0
 def test_rewards3(self):
     numStates = 200
     numActions = 8
     mu = [0, -10, 30, 0]
     sigma = [5, 0.5, 10, 2.0]
     R = np.asarray([[ 1.0,  0.2, -0.5,  0.0],
                     [ 0.2,  1.0,  0.4,  0.0],
                     [-0.5,  0.4,  1.0,  0.6],
                     [ 0.0,  0.0,  0.6,  1.0]])
     cov = rewards.cor2cov(R, sigma)
     D = rewards.mvnrewards(numStates, numActions, mu, cov)
     self.checkCorrelations(R, D)
Exemplo n.º 4
0
            params = {'frond_probability': args.frond_probability,
                      'frond_size': args.frond_size,
                      'frond_actions': args.frond_actions}
        
        if args.graph_type == 'lobster':
            args.actions = 3
        
        transition_graph = grp.create_graph(args.graph_type, args.states, args.actions, params)

        # some of the graph types might add nodes and edges, so we should recalculate the
        # graph size before continuing
        args.states = transition_graph.number_of_nodes()
        args.actions = max(transition_graph.out_degree().values())

        # create the reward structures
        rewards = rwd.mvnrewards(args.states, args.actions, args.rmeans, cov)

        io.write_instance(transition_graph, rewards)
        print('# type={}, states={}, actions={}, correlation={}, stdev={}'.
              format(args.type, args.states, args.actions, args.correlation.tolist(), args.stdev.tolist()))

        # calculate some summary statistics on the instance
        rewards_est = rwd.correlation(rewards)
        print('\n# Summary Statistics')
        print('# actual R={}'.format(rewards_est.tolist()))

        values.annotate_discrete_graph(transition_graph)
        if args.transitions_dot:
            #io.output_dot(transition_graph, args.transitions_dot)
            with open(args.transitions_dot, 'w') as f:
                nx.write_dot(transition_graph, f)