def calculate_values(grid):
    # initialize V(s)
    V = {}
    states = grid.all_states()
    for s in states:
        V[s] = 0
    # repeat until convergence
    # V[s] = max[a]{ sum[s',r] { p(s',r|s,a)[r + gamma*V[s']] } }

    i = 0
    while i < 4:
        # biggest_change is referred to by the mathematical symbol delta in equations
        biggest_change = 0
        for s in grid.non_terminal_states():
            old_v = V[s]
            _, new_v = best_action_value(grid, V, s)
            V[s] = new_v
            biggest_change = max(biggest_change, np.abs(old_v - new_v))

        if biggest_change < SMALL_ENOUGH:
            break
        print("values:")
        print_values(V, grid)
        i += 1
    return V
示例#2
0
def main():
    grid = standard_grid(obey_prob=1.0, step_cost=None)
    print_values(grid.rewards, grid)
    V, Policy, Deltas = monte_carlo(grid)
    print_values(V, grid)
    print_policy(Policy, grid)
    plt.plot(Deltas)
    plt.show()
示例#3
0
def visit(inst, s, solved, values):
    # TODO: add your code here.
    # Make use of compute_greedy_action_and_value, sample_successor, and
    # check_solved.
    # Return updated labeling solved and updated value function values.


"""
Run the LRTDP algorithm until it converges.
"""
def lrtdp(inst, values):
    solved = { s: False for s in inst.states }
    iteration = 1
    while not solved[inst.init]:
        wait_for_input("Press enter for another iteration of LRTDP...".format(iteration))
        solved, values = visit(inst, inst.init, solved, values)
        print("Values after iteration {}: ".format(iteration))
        print_values(inst, values)
        print("Solved after iteration {}: ".format(iteration))
        print_solved(inst, solved)
        iteration += 1
    return values


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'algorithm', choices=['rtdp', 'lrtdp'],
        help="Choose the algorithm."
    )
    args = parser.parse_args()

    inst = instance.get_example_instance()
    print(inst)

    values = { s : heuristic(inst, s) for s in inst.states }

    print("")
    print("Initial state-values:")
    print_values(inst, values)

    if args.algorithm == 'rtdp':
        values = rtdp(inst, values)
    elif args.algorithm == 'lrtdp':
        values = lrtdp(inst, values)
    else:
        sys.exit("Unknown algorithm")
    print("")

    print("Final values:")
    print_values(inst, values)

    policy = get_greedy_policy(inst, values)
    print("Corresponding final policy:")
    print_policy(inst, policy)
示例#4
0
def lrtdp(inst, values):
    solved = { s: False for s in inst.states }
    iteration = 1
    while not solved[inst.init]:
        wait_for_input("Press enter for another iteration of LRTDP...".format(iteration))
        solved, values = visit(inst, inst.init, solved, values)
        print("Values after iteration {}: ".format(iteration))
        print_values(inst, values)
        print("Solved after iteration {}: ".format(iteration))
        print_solved(inst, solved)
        iteration += 1
    return values
def main():
  grid = standard_grid(obey_prob=1.0, step_cost=None)

  # print rewards
  print("rewards:")
  print_values(grid.rewards, grid)

  V, policy, deltas = monte_carlo(grid)

  print("final values:")
  print_values(V, grid)
  print("final policy:")
  print_policy(policy, grid)

  plt.plot(deltas)
  plt.show()
示例#6
0
def test_data_store_variable(variable, name, indent=''):
    print('%s - Test %s:' % (indent, name))
    print('%s    - Name: %s' % (indent, variable.name()))
    print('%s    - Unit: %s' % (indent, variable.unit()))
    print('%s    - URI: %s' % (indent, variable.uri()))

    values_count = variable.values_count()

    test_data_store_variable_index(variable, -1, indent)
    test_data_store_variable_index(variable, 0, indent)
    test_data_store_variable_index(variable, values_count - 1, indent)
    test_data_store_variable_index(variable, values_count, indent)

    for run in range(variable.runs_count() + 3):
        print('%s    - values(%d): ' % (indent, run - 2), end='')
        utils.print_values(variable.values(run - 2))
示例#7
0
def rtdp(inst, values):
    iteration = 1
    while True:
        wait_for_input("Press enter for another iteration of RTDP...")
        old_values = dict(values)
        values = perform_trial(inst, values)

        print("Values after iteration {}: ".format(iteration))
        print_values(inst, values)

        change = compute_max_difference(old_values, values)
        if change < EPSILON:
            print("Converged in iteration {}".format(iteration))
            break
        iteration += 1
    return values
示例#8
0
    def forward(self, x):
        if self.filter_size > 0:
            return self.layers(x)  #image, conv, batchnorm, relu
        else:
            y = torch.add(x.unsqueeze(2), self.noise * self.level)
            # (10, 3, 1, 32, 32) + (1, 3, 128, 32, 32) --> (10, 3, 128, 32, 32)

            if self.debug:
                print_values(x, self.noise, y, self.unique_masks)

            y = y.view(-1, self.in_channels * self.nmasks, self.input_size,
                       self.input_size)
            y = self.layers(y)

            if self.mix_maps:
                y = self.mix_layers(y)

            return y  #image, perturb, (relu?), conv1x1, batchnorm, relu + mix_maps (conv1x1, batchnorm relu)
示例#9
0
from neutronclient.v2_0 import client
from credentials import get_credentials
from utils import print_values

credentials = get_credentials()
neutron = client.Client(**credentials)
ports = neutron.list_ports()
print_values(ports, 'ports')
示例#10
0
    }
    return keystone_client.Client(**params)

tests = [
    {"user": "******", "password": "******", "tenant": "admin"},
]

for test in tests:
    print "Attempting authentication for tenant %s by user %s" % (
        test['tenant'], test['user']
    ),
    try:
        ks = get_keystone_client(**test)
        print "Authorized"
    except:
        print "Denied"
#
#
#
#Listing All the networks from OpenStack environment.
#
#
print "Listing All Networks."

credentials = get_credentials()
neutron = client.Client(**credentials)
list_network = neutron.list_networks()

print_values(list_network, 'networks')

示例#11
0
from neutronclient.v2_0 import client
from credentials import get_credentials_tenant_one
from utils import print_values

credentials = get_credentials_tenant_one("user1", "user1", "user1-project")
neutron = client.Client(**credentials)
netw = neutron.list_networks()

print_values(netw, 'networks')
示例#12
0
tests = [
    {
        "user": "******",
        "password": "******",
        "tenant": "admin"
    },
]

for test in tests:
    print "Attempting authentication for tenant %s by user %s" % (
        test['tenant'], test['user']),
    try:
        ks = get_keystone_client(**test)
        print "Authorized"
    except:
        print "Denied"
#
#
#
#Listing All the networks from OpenStack environment.
#
#
print "Listing All Networks."

credentials = get_credentials()
neutron = client.Client(**credentials)
list_network = neutron.list_networks()

print_values(list_network, 'networks')
示例#13
0
from neutronclient.v2_0 import client
from credentials import get_credentials
from utils import print_values

try:
    credentials = get_credentials()
    neutron = client.Client(**credentials)
    routers_list = neutron.list_routers(retrieve_all=True)
    print_values(routers_list, 'routers')
finally:
    print 'Execution Completed'
示例#14
0
from neutronclient.v2_0 import client
from credentials import get_credentials
from utils import print_values


credentials = get_credentials()
neutron = client.Client(**credentials)
ports = neutron.list_ports()
print_values(ports, 'ports')
示例#15
0
def list_the_ports():
	ports = neutron.list_ports()
	print print_values(ports, 'ports')
	return
示例#16
0
                Q[s][a] = np.mean(returns[sa])
                biggest_change = max(biggest_change, np.abs(old_q - Q[s][a]))
                seen_state_action_pairs.add(sa)
        deltas.append(biggest_change)

        for s in policy.keys():
            a, _ = max_dict(Q[s])
            policy[s] = a
    V = {}
    for s in policy.keys():
        V[s] = max_dict(Q[s])[1]

    return V, policy, deltas


if __name__ == '__main__':
    grid = standard_grid(obey_prob=1.0, step_cost=None)

    print("rewards:")
    print_values(grid.rewards, grid)

    V, policy, deltas = monte_carlo(grid)

    print("final values:")
    print_values(V, grid)
    print("final policy:")
    print_policy(policy, grid)

    plt.plot(deltas)
    plt.show()
示例#17
0
    for i in range(N):
        visited_states = set()
        states_and_returns = play_episode(standard_grid(), pi)
        for s, g in states_and_returns:
            if s not in visited_states:
                visited_states.add(s)
                if s not in all_returns:
                    all_returns[s] = []
                all_returns[s].append(g)
                V[s] = np.mean(all_returns[s])
    return V


ALL_POSSIBLE_ACTIONS = ['U', 'D', 'L', 'R']
if __name__ == '__main__':
    pi = {
        (0, 0): 'R',
        (0, 1): 'R',
        (0, 2): 'R',
        (1, 2): 'U',
        (2, 2): 'U',
        (2, 1): 'L',
        (2, 0): 'U',
        (1, 0): 'U',
        (2, 3): 'L',
    }
    grid = standard_grid()
    print_policy(pi, grid)
    V = first_visit_monte_carlo_prediction(pi, 100)
    print_values(V, grid)
示例#18
0
#!/usr/bin/env python

from neutronclient.v2_0 import client
from credentials import get_credentials
from utils import print_values

#List the routers created by the create-router code

try:
    credentials = get_credentials()
    neutron = client.Client(**credentials)
    routers_list = neutron.list_routers(retrieve_all=True)
    print_values(routers_list, 'routers')
finally:
    print("Execution completed")
	
#List the subnets which queries the neutron.list method 

credentials = get_credentials()
neutron = client.Client(**credentials)
subnets = neutron.list_subnets()
print(subnets)
示例#19
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('algorithm',
                        choices=['rtdp', 'lrtdp'],
                        help="Choose the algorithm.")
    args = parser.parse_args()

    inst = instance.get_example_instance()
    print(inst)

    values = {s: heuristic(inst, s) for s in inst.states}

    print("")
    print("Initial state-values:")
    print_values(inst, values)

    if args.algorithm == 'rtdp':
        values = rtdp(inst, values)
    elif args.algorithm == 'lrtdp':
        values = lrtdp(inst, values)
    else:
        sys.exit("Unknown algorithm")
    print("")

    print("Final values:")
    print_values(inst, values)

    policy = get_greedy_policy(inst, values)
    print("Corresponding final policy:")
    print_policy(inst, policy)