def learn(self): args = {} args['S'] = self.mdp.getStates() args['A'] = self.mdp.getPossibleActions(self.mdp.state) # assume state actions are available for all states def transition(s, a, sp): trans = self.mdp.getTransitionStatesAndProbs(s, a) trans = filter(lambda (state, prob): state == sp, trans) if len(trans) > 0: return trans[0][1] else: return 0 args['T'] = transition args['r'] = self.mdp.getReward args['s0'] = self.mdp.state self.v = lp(**args)
def specific(G, best_val, k, s_max): n = G.order() happiness = {} stress = {} for i in range(n): happiness[i] = {} stress[i] = {} for i in range(n): for j in range(i + 1, n): happiness[i][j] = G.get_edge_data(i, j)['happiness'] stress[i][j] = G.get_edge_data(i, j)['stress'] answer = best_val best_k = -1 rooms = {} if (n == 20 and k in [1, 2, 17, 18, 19]) or (n == 50 and k in [1, 48, 49]): print("(", end='', flush=True) val, arr = bruteforce.bruteforce_k(happiness, stress, n, s_max, k) if val > answer: print("*", end=",", flush=True) answer = val rooms = arr best_k = k else: print("X", end=",", flush=True) print(")", end="", flush=True) return answer, rooms, best_k #prune pruned = {} for u in range(n): for v in range(u + 1, n): if stress[u][v] > s_max / k: pruned[(u, v)] = stress[u][v] #add pair to pruned # print(pruned) print("(", end="", flush=True) #auto optimize val, arr, not_optimal = lp.lp(happiness, stress, s_max, n, k, answer, pruned) if val > answer: print("*", end="", flush=True) answer = val rooms = arr best_k = k else: print("X", end="", flush=True) print(")", end=" ", flush=True) return answer, rooms, best_k
def lp_cutoff(happiness, stress, s_max, n, cutoff): answer = cutoff best_k = -1 rooms = {} #if n = 20, brute force 1,2,17,18,19 #if n = 50, brute force 1,48,49 #if n <= 10, don't brute force anything (for testing purposes) #always brute force k = 1, n bruteforce_nums = [] suboptimal = [] if n == 20: bruteforce_nums = [1, 2, 17, 18, 19] elif n == 50: bruteforce_nums = [1, 48, 49] nonbruteforce_nums = [i for i in range(1, n) if i not in bruteforce_nums] print("Bruteforce...", end=" ", flush=True) for k in bruteforce_nums: val, arr = bruteforce.bruteforce_k(happiness, stress, n, s_max, k) if val > answer: print("*", end="", flush=True) answer = val rooms = arr best_k = k print("", end=" ", flush=True) print() print("Gurobi...", end=" ", flush=True) for k in nonbruteforce_nums: #prune pruned = {} for u in range(n): for v in range(u + 1, n): if stress[u][v] > s_max / k: pruned[(u, v)] = stress[u][v] #add pair to pruned print("(", end="", flush=True) val, arr, not_optimal = lp.lp(happiness, stress, s_max, n, k, answer, pruned) if not_optimal: suboptimal.append(k) if val > answer: #print(val, arr) print("*", end="", flush=True) answer = val rooms = arr best_k = k print(")", end=" ", flush=True) print() print("SUBOPTIMAL K:", suboptimal) return answer, rooms, best_k
def learn(self): args = {} args['S'] = self.mdp.getStates() args['A'] = self.mdp.getPossibleActions( self.mdp.state ) # assume state actions are available for all states def transition(s, a, sp): trans = self.mdp.getTransitionStatesAndProbs(s, a) trans = filter(lambda (state, prob): state == sp, trans) if len(trans) > 0: return trans[0][1] else: return 0 args['T'] = transition args['r'] = self.mdp.getReward args['s0'] = self.mdp.state self.v = lp(**args)
"--algorithm", help= "Algorithm - vi, hpi or lp - to calculate optimal value and policy.", required=True) args = parser.parse_args() # Read the mdp file numStates, numActions, startState, endStates, transitions, mdpType, discount = helper.readMdpFile( args.mdp) # print(f'Number of states: {numStates}') # print(f'Number of actions: {numActions}') V, policy = None, None # Call to the respective function if args.algorithm == 'vi': V, policy = vi(numStates, numActions, startState, endStates, transitions, mdpType, discount) elif args.algorithm == 'hpi': V, policy = hpi(numStates, numActions, startState, endStates, transitions, mdpType, discount) elif args.algorithm == 'lp': V, policy = lp(numStates, numActions, startState, endStates, transitions, mdpType, discount) else: raise ValueError( f'Illegal arguments specified for algorithm - {args.algorithm}') helper.printValuePolicy(V, policy)
import numpy as np import lp A = np.array([[1., 1.], [16., 8.], [9000., 5000.]]) b = np.array([44., 512., 300000.]) c = np.array([30000., 20000.]) optx, zmin, is_bounded, sol, basis = lp.lp(c, A, b) print basis print zmin print optx
import numpy as np import lp A = np.array([[1., 1.],[16., 8.],[9000., 5000.]]) b = np.array([44., 512., 300000.]) c = np.array([30000., 20000.]) optx,zmin,is_bounded,sol,basis = lp.lp(c,A,b) print zmin print optx