Пример #1
0
 def learn(self):
   args = {}
   args['S'] = self.mdp.getStates()
   args['A'] = self.mdp.getPossibleActions(self.mdp.state) # assume state actions are available for all states
   def transition(s, a, sp):
     trans = self.mdp.getTransitionStatesAndProbs(s, a)
     trans = filter(lambda (state, prob): state == sp, trans)
     if len(trans) > 0: return trans[0][1]
     else: return 0
   args['T'] = transition
   args['r'] = self.mdp.getReward
   args['s0'] = self.mdp.state
   self.v = lp(**args)
Пример #2
0
def specific(G, best_val, k, s_max):
    n = G.order()

    happiness = {}
    stress = {}
    for i in range(n):
        happiness[i] = {}
        stress[i] = {}
    for i in range(n):
        for j in range(i + 1, n):
            happiness[i][j] = G.get_edge_data(i, j)['happiness']
            stress[i][j] = G.get_edge_data(i, j)['stress']

    answer = best_val
    best_k = -1
    rooms = {}

    if (n == 20 and k in [1, 2, 17, 18, 19]) or (n == 50 and k in [1, 48, 49]):
        print("(", end='', flush=True)
        val, arr = bruteforce.bruteforce_k(happiness, stress, n, s_max, k)
        if val > answer:
            print("*", end=",", flush=True)
            answer = val
            rooms = arr
            best_k = k
        else:
            print("X", end=",", flush=True)
        print(")", end="", flush=True)
        return answer, rooms, best_k

    #prune
    pruned = {}
    for u in range(n):
        for v in range(u + 1, n):
            if stress[u][v] > s_max / k:
                pruned[(u, v)] = stress[u][v]  #add pair to pruned

    # print(pruned)
    print("(", end="", flush=True)
    #auto optimize
    val, arr, not_optimal = lp.lp(happiness, stress, s_max, n, k, answer,
                                  pruned)
    if val > answer:
        print("*", end="", flush=True)
        answer = val
        rooms = arr
        best_k = k
    else:
        print("X", end="", flush=True)
    print(")", end=" ", flush=True)
    return answer, rooms, best_k
Пример #3
0
def lp_cutoff(happiness, stress, s_max, n, cutoff):
    answer = cutoff
    best_k = -1
    rooms = {}

    #if n = 20, brute force 1,2,17,18,19
    #if n = 50, brute force 1,48,49
    #if n <= 10, don't brute force anything (for testing purposes)
    #always brute force k = 1, n
    bruteforce_nums = []
    suboptimal = []
    if n == 20:
        bruteforce_nums = [1, 2, 17, 18, 19]
    elif n == 50:
        bruteforce_nums = [1, 48, 49]

    nonbruteforce_nums = [i for i in range(1, n) if i not in bruteforce_nums]
    print("Bruteforce...", end=" ", flush=True)
    for k in bruteforce_nums:
        val, arr = bruteforce.bruteforce_k(happiness, stress, n, s_max, k)
        if val > answer:
            print("*", end="", flush=True)
            answer = val
            rooms = arr
            best_k = k
        print("", end=" ", flush=True)
    print()
    print("Gurobi...", end=" ", flush=True)
    for k in nonbruteforce_nums:
        #prune
        pruned = {}
        for u in range(n):
            for v in range(u + 1, n):
                if stress[u][v] > s_max / k:
                    pruned[(u, v)] = stress[u][v]  #add pair to pruned
        print("(", end="", flush=True)
        val, arr, not_optimal = lp.lp(happiness, stress, s_max, n, k, answer,
                                      pruned)
        if not_optimal:
            suboptimal.append(k)
        if val > answer:
            #print(val, arr)
            print("*", end="", flush=True)
            answer = val
            rooms = arr
            best_k = k
        print(")", end=" ", flush=True)
    print()
    print("SUBOPTIMAL K:", suboptimal)
    return answer, rooms, best_k
Пример #4
0
    def learn(self):
        args = {}
        args['S'] = self.mdp.getStates()
        args['A'] = self.mdp.getPossibleActions(
            self.mdp.state
        )  # assume state actions are available for all states

        def transition(s, a, sp):
            trans = self.mdp.getTransitionStatesAndProbs(s, a)
            trans = filter(lambda (state, prob): state == sp, trans)
            if len(trans) > 0: return trans[0][1]
            else: return 0

        args['T'] = transition
        args['r'] = self.mdp.getReward
        args['s0'] = self.mdp.state
        self.v = lp(**args)
Пример #5
0
        "--algorithm",
        help=
        "Algorithm - vi, hpi or lp - to calculate optimal value and policy.",
        required=True)
    args = parser.parse_args()

    # Read the mdp file
    numStates, numActions, startState, endStates, transitions, mdpType, discount = helper.readMdpFile(
        args.mdp)

    # print(f'Number of states: {numStates}')
    # print(f'Number of actions: {numActions}')

    V, policy = None, None

    # Call to the respective function
    if args.algorithm == 'vi':
        V, policy = vi(numStates, numActions, startState, endStates,
                       transitions, mdpType, discount)
    elif args.algorithm == 'hpi':
        V, policy = hpi(numStates, numActions, startState, endStates,
                        transitions, mdpType, discount)
    elif args.algorithm == 'lp':
        V, policy = lp(numStates, numActions, startState, endStates,
                       transitions, mdpType, discount)
    else:
        raise ValueError(
            f'Illegal arguments specified for algorithm - {args.algorithm}')

    helper.printValuePolicy(V, policy)
Пример #6
0
import numpy as np
import lp

A = np.array([[1., 1.], [16., 8.], [9000., 5000.]])
b = np.array([44., 512., 300000.])
c = np.array([30000., 20000.])
optx, zmin, is_bounded, sol, basis = lp.lp(c, A, b)
print basis
print zmin
print optx
Пример #7
0
import numpy as np
import lp

A = np.array([[1., 1.],[16., 8.],[9000., 5000.]])
b = np.array([44., 512., 300000.])
c = np.array([30000., 20000.])
optx,zmin,is_bounded,sol,basis = lp.lp(c,A,b)
print zmin
print optx