def MPC_exact(self, CPLEXPATH=None): t = self.time demandAttr = [(i, j, tt, self.demand[i, j][tt], self.price[i, j][tt]) for i, j in self.demand for tt in range(t, t + self.T) if self.demand[i, j][tt] > 1e-3] accTuple = [(n, self.acc[n][t]) for n in self.acc] daccTuple = [(n, tt, self.dacc[n][tt]) for n in self.acc for tt in range(t, t + self.T)] edgeAttr = [(i, j, self.G.edges[i, j]['time']) for i, j in self.G.edges] modPath = os.getcwd().replace('\\', '/') + '/mod/' MPCPath = os.getcwd().replace('\\', '/') + '/MPC/' if not os.path.exists(MPCPath): os.makedirs(MPCPath) datafile = MPCPath + 'data_{}.dat'.format(t) resfile = MPCPath + 'res_{}.dat'.format(t) with open(datafile, 'w') as file: file.write('path="' + resfile + '";\r\n') file.write('t0=' + str(t) + ';\r\n') file.write('T=' + str(self.T) + ';\r\n') file.write('beta=' + str(self.beta) + ';\r\n') file.write('demandAttr=' + mat2str(demandAttr) + ';\r\n') file.write('edgeAttr=' + mat2str(edgeAttr) + ';\r\n') file.write('accInitTuple=' + mat2str(accTuple) + ';\r\n') file.write('daccAttr=' + mat2str(daccTuple) + ';\r\n') modfile = modPath + 'MPC.mod' if CPLEXPATH is None: CPLEXPATH = "C:/Program Files/ibm/ILOG/CPLEX_Studio1210/opl/bin/x64_win64/" my_env = os.environ.copy() my_env["LD_LIBRARY_PATH"] = CPLEXPATH out_file = MPCPath + 'out_{}.dat'.format(t) with open(out_file, 'w') as output_f: subprocess.check_call([CPLEXPATH + "oplrun", modfile, datafile], stdout=output_f, env=my_env) output_f.close() paxFlow = defaultdict(float) rebFlow = defaultdict(float) with open(resfile, 'r', encoding="utf8") as file: for row in file: item = row.replace('e)', ')').strip().strip(';').split('=') if item[0] == 'flow': values = item[1].strip(')]').strip('[(').split(')(') for v in values: if len(v) == 0: continue i, j, f1, f2 = v.split(',') paxFlow[int(i), int(j)] = float(f1) rebFlow[int(i), int(j)] = float(f2) paxAction = [ paxFlow[i, j] if (i, j) in paxFlow else 0 for i, j in self.edges ] rebAction = [ rebFlow[i, j] if (i, j) in rebFlow else 0 for i, j in self.edges ] return paxAction, rebAction
def matching(self, CPLEXPATH=None, PATH='', platform='linux'): t = self.time demandAttr = [(i,j,self.demand[i,j][t], self.price[i,j][t]) for i,j in self.demand \ if self.demand[i,j][t]>1e-3] accTuple = [(n, self.acc[n][t + 1]) for n in self.acc] modPath = os.getcwd().replace('\\', '/') + '/mod/' matchingPath = os.getcwd().replace('\\', '/') + '/matching/' + PATH if not os.path.exists(matchingPath): os.makedirs(matchingPath) datafile = matchingPath + 'data_{}.dat'.format(t) resfile = matchingPath + 'res_{}.dat'.format(t) with open(datafile, 'w') as file: file.write('path="' + resfile + '";\r\n') file.write('demandAttr=' + mat2str(demandAttr) + ';\r\n') file.write('accInitTuple=' + mat2str(accTuple) + ';\r\n') modfile = modPath + 'matching.mod' if CPLEXPATH is None: CPLEXPATH = "C:/Program Files/ibm/ILOG/CPLEX_Studio1210/opl/bin/x64_win64/" my_env = os.environ.copy() if platform == 'mac': my_env["DYLD_LIBRARY_PATH"] = CPLEXPATH else: my_env["LD_LIBRARY_PATH"] = CPLEXPATH out_file = matchingPath + 'out_{}.dat'.format(t) with open(out_file, 'w') as output_f: subprocess.check_call([CPLEXPATH + "oplrun", modfile, datafile], stdout=output_f, env=my_env) output_f.close() flow = defaultdict(float) with open(resfile, 'r', encoding="utf8") as file: for row in file: item = row.replace('e)', ')').strip().strip(';').split('=') if item[0] == 'flow': values = item[1].strip(')]').strip('[(').split(')(') for v in values: if len(v) == 0: continue i, j, f = v.split(',') flow[int(i), int(j)] = float(f) paxAction = [ flow[i, j] if (i, j) in flow else 0 for i, j in self.edges ] return paxAction
def solveRebFlow(env, res_path, desiredAcc, CPLEXPATH): t = env.time accRLTuple = [(n, desiredAcc[n]) for n in desiredAcc] accTuple = [(n, int(env.acc[n][t])) for n in env.acc] edgeAttr = [(i, j, env.G.edges[i, j]['time']) for i, j in env.G.edges] modPath = os.getcwd().replace('\\', '/') + '/mod/' OPTPath = os.getcwd().replace('\\', '/') + '/MPC/' + res_path if not os.path.exists(OPTPath): os.makedirs(OPTPath) datafile = OPTPath + f'data_{t}.dat' resfile = OPTPath + f'res_{t}.dat' with open(datafile, 'w') as file: file.write('path="' + resfile + '";\r\n') file.write('edgeAttr=' + mat2str(edgeAttr) + ';\r\n') file.write('accInitTuple=' + mat2str(accTuple) + ';\r\n') file.write('accRLTuple=' + mat2str(accRLTuple) + ';\r\n') modfile = modPath + 'minRebDistRebOnly.mod' if CPLEXPATH is None: CPLEXPATH = "/opt/ibm/ILOG/CPLEX_Studio128/opl/bin/x86-64_linux/" my_env = os.environ.copy() my_env["LD_LIBRARY_PATH"] = CPLEXPATH out_file = OPTPath + f'out_{t}.dat' with open(out_file, 'w') as output_f: subprocess.check_call([CPLEXPATH + "oplrun", modfile, datafile], stdout=output_f, env=my_env) output_f.close() # 3. collect results from file flow = defaultdict(float) with open(resfile, 'r', encoding="utf8") as file: for row in file: item = row.strip().strip(';').split('=') if item[0] == 'flow': values = item[1].strip(')]').strip('[(').split(')(') for v in values: if len(v) == 0: continue i, j, f = v.split(',') flow[int(i), int(j)] = float(f) action = [flow[i, j] for i, j in env.edges] return action
coded_CG_densi = classify(CG_densi,**construct_LR(scheme2)) _ = plt.hist(coded_CG_densi,alpha = 0.3,bins = np.arange(-0.5,5.5,0.25),label ='scheme2') plt.legend() plt.ylabel('Count of emission') plt.xlabel('Emission index') figname = 'Q5_fig2.pdf' print "Saving figure to: %s"%figname plt.savefig(figname) # In[9]: mdl = util.Var_hmm.read_model(fname = 'Q3.hmm', emission_list = [str(x) for x in range(1,6)]) print mdl lst = [] print '\nScheme 1' coded_CG_densi = classify(CG_densi,**construct_LR(scheme1)) mdl.emission_likelihood([coded_CG_densi],debug = 1) lst.append(coded_CG_densi) print '\nScheme 2' coded_CG_densi = classify(CG_densi,**construct_LR(scheme2)) _ = mdl.emission_likelihood([coded_CG_densi],debug = 1) lst.append(coded_CG_densi) print >>open('Q3.echain','w'),util.mat2str(np.vstack(lst)) #mdl.unicode()
def testing(scenario, env, dqn, sidx): # Test Episodes test_episodes = 100 epochs = trange(test_episodes) # build tqdm iterator for loop visualization np.random.seed(10) max_steps = 100 # maximum length of episode # book-keeping variables test_rewards = [] test_revenue = [] test_served_demand = [] test_rebalancing_cost = [] test_operating_cost = [] for episode in epochs: try: obs = env.reset() state = torch.tensor(dqn.decode_state(obs)).to(device).view(1,-1).float() episode_reward = 0 episode_revenue = 0 episode_served_demand = 0 episode_rebalancing_cost = 0 episode_operating_cost = 0 for step in range(max_steps): dqn.policy_net.eval() action_rl = select_action(state, dqn, test=True) # 1.2 get actual vehicle distributions vi (i.e. (x1*x2*..*xn)*num_vehicles) v_d = dqn.get_desired_distribution(action_rl) # 1.3 Solve ILP - Minimal Distance Problem # 1.3.1 collect inputs and build .dat file t = dqn.env.time accTuple = [(n,int(dqn.env.acc[n][t])) for n in dqn.env.acc] accRLTuple = [(n, int(v_d_n)) for n, v_d_n in enumerate(v_d)] edgeAttr = [(i,j,dqn.env.G.edges[i,j]['time']) for i,j in dqn.env.G.edges] modPath = os.getcwd().replace('\\','/')+'/mod/' OPTPath = os.getcwd().replace('\\','/')+'/OPT/DQN/Test/' if not os.path.exists(OPTPath): os.makedirs(OPTPath) datafile = OPTPath + f'data_{t}_{sidx}_testing.dat' resfile = OPTPath + f'res_{t}_{sidx}_testing.dat' with open(datafile,'w') as file: file.write('path="'+resfile+'";\r\n') file.write('edgeAttr='+mat2str(edgeAttr)+';\r\n') file.write('accInitTuple='+mat2str(accTuple)+';\r\n') file.write('accRLTuple='+mat2str(accRLTuple)+';\r\n') # 2. execute .mod file and write result on file modfile = modPath+'minRebDistRebOnly.mod' my_env = os.environ.copy() if platform == 'mac': my_env["DYLD_LIBRARY_PATH"] = CPLEXPATH else: my_env["LD_LIBRARY_PATH"] = CPLEXPATH out_file = OPTPath + f'out_{t}_{sidx}_testing.dat' with open(out_file,'w') as output_f: subprocess.check_call([CPLEXPATH+"oplrun", modfile, datafile], stdout=output_f, env=my_env) output_f.close() # 3. collect results from file flow = defaultdict(float) with open(resfile,'r', encoding="utf8") as file: for row in file: item = row.strip().strip(';').split('=') if item[0] == 'flow': values = item[1].strip(')]').strip('[(').split(')(') for v in values: if len(v) == 0: continue i,j,f = v.split(',') flow[int(i),int(j)] = float(f) rebAction = [flow[i,j] for i,j in dqn.env.edges] # Take step new_obs, reward, done, info = env.step(rebAction, isMatching=True, CPLEXPATH=CPLEXPATH, PATH="DQN/Test/") new_state = torch.tensor(dqn.decode_state(new_obs)).to(device).view(1,-1).float() # track performance over episode episode_reward += reward episode_revenue += info['revenue'] episode_served_demand += info['served_demand'] episode_rebalancing_cost += info['rebalancing_cost'] episode_operating_cost += info['operating_cost'] obs, state = deepcopy(new_obs), deepcopy(new_state) # end episode if conditions reached if done: break epochs.set_description(f"Episode {episode+1} | Reward: {episode_reward:.2f} | Revenue: {episode_revenue:.2f} | ServedDemand: {episode_served_demand:.2f} \ | Oper. Cost: {episode_operating_cost:.2f}") #Adding the total reward and reduced epsilon values test_rewards.append(episode_reward) test_revenue.append(episode_revenue) test_served_demand.append(episode_served_demand) test_rebalancing_cost.append(episode_rebalancing_cost) test_operating_cost.append(episode_operating_cost) except KeyboardInterrupt: break fig = plt.figure(figsize=(12,32)) fig.add_subplot(411) plt.plot(test_rewards, label="Reward") plt.title("Episode Rewards") plt.xlabel("Episode") plt.ylabel("J") plt.legend() fig.add_subplot(412) plt.plot(test_revenue, label="Revenue") plt.title("Episode Revenue") plt.xlabel("Episode") plt.ylabel("Revenue") plt.legend() fig.add_subplot(413) plt.plot(test_served_demand, label="Served Demand") plt.title("Episode Served Demand") plt.xlabel("Episode") plt.ylabel("Served Demand") plt.legend() fig.add_subplot(414) plt.plot(test_rebalancing_cost, label="Reb. Cost") plt.title("Episode Reb. Cost") plt.xlabel("Episode") plt.ylabel("Cost") plt.legend() plt.show() fig.savefig(f'{sidx}_testing.png') print("Average Performance: \n") print(f'Avg Reward: {np.mean(test_rewards):.2f}') print(f'Total Revenue: {np.mean(test_revenue):.2f}') print(f'Total Served Demand: {np.mean(test_served_demand):.2f}') print(f'Total Rebalancing Cost: {np.mean(test_rebalancing_cost):.2f}')
def training(scenario, env, dqn,sidx): # book-keeping variables training_rewards = [] training_revenue = [] training_served_demand = [] training_rebalancing_cost = [] training_operating_cost = [] last_t_update = 0 train_episodes = 200 # num_of_episodes_with_same_epsilon x num_of_q_tables x num_epsilons max_steps = 100 # maximum length of episode epochs = trange(train_episodes) # build tqdm iterator for loop visualization for i_episode in epochs: obs = env.reset() state = torch.tensor(dqn.decode_state(obs)).to(device).view(1,-1).float() episode_reward = 0 episode_revenue = 0 episode_served_demand = 0 episode_rebalancing_cost = 0 episode_operating_cost = 0 for step in range(max_steps): # Select and perform an RL action dqn.policy_net.eval() action_rl = select_action(state,dqn) # 1.2 get actual vehicle distributions vi (i.e. (x1*x2*..*xn)*num_vehicles) v_d = dqn.get_desired_distribution(action_rl) # 1.3 Solve ILP - Minimal Distance Problem # 1.3.1 collect inputs and build .dat file t = dqn.env.time accTuple = [(n,int(dqn.env.acc[n][t])) for n in dqn.env.acc] accRLTuple = [(n, int(v_d_n)) for n, v_d_n in enumerate(v_d)] edgeAttr = [(i,j,dqn.env.G.edges[i,j]['time']) for i,j in dqn.env.G.edges] modPath = os.getcwd().replace('\\','/')+'/mod/' OPTPath = os.getcwd().replace('\\','/')+'/OPT/DQN/Train/' if not os.path.exists(OPTPath): os.makedirs(OPTPath) datafile = OPTPath + f'data_{t}_{sidx}_training.dat' resfile = OPTPath + f'res_{t}_{sidx}_training.dat' with open(datafile,'w') as file: file.write('path="'+resfile+'";\r\n') file.write('edgeAttr='+mat2str(edgeAttr)+';\r\n') file.write('accInitTuple='+mat2str(accTuple)+';\r\n') file.write('accRLTuple='+mat2str(accRLTuple)+';\r\n') # 2. execute .mod file and write result on file modfile = modPath+'minRebDistRebOnly.mod' my_env = os.environ.copy() if platform == 'mac': my_env["DYLD_LIBRARY_PATH"] = CPLEXPATH else: my_env["LD_LIBRARY_PATH"] = CPLEXPATH out_file = OPTPath + f'out_{t}_{sidx}_training.dat' with open(out_file,'w') as output_f: subprocess.check_call([CPLEXPATH+"oplrun", modfile, datafile], stdout=output_f, env=my_env) output_f.close() # 3. collect results from file flow = defaultdict(float) with open(resfile,'r', encoding="utf8") as file: for row in file: item = row.strip().strip(';').split('=') if item[0] == 'flow': values = item[1].strip(')]').strip('[(').split(')(') for v in values: if len(v) == 0: continue i,j,f = v.split(',') flow[int(i),int(j)] = float(f) rebAction = [flow[i,j] for i,j in dqn.env.edges] # Take step new_obs, reward, done, info = env.step(rebAction, isMatching=True, CPLEXPATH=CPLEXPATH, PATH="DQN/Train/") new_state = torch.tensor(dqn.decode_state(new_obs)).to(device).view(1,-1).float() reward = torch.tensor([reward], device=device).float() # Store the transition in memory dqn.memory.push(state, action_rl, new_state, reward) # Move to the next state # track performance over episode episode_reward += reward.item() episode_revenue += info['revenue'] episode_served_demand += info['served_demand'] episode_rebalancing_cost += info['rebalancing_cost'] episode_operating_cost += info['operating_cost'] obs, state = deepcopy(new_obs), deepcopy(new_state) # Perform one step of the optimization (on the target network) optimize_model(dqn) if done: break # Update the target network, copying all weights and biases in DQN if i_episode % TARGET_UPDATE == 0: dqn.target_net.load_state_dict(dqn.policy_net.state_dict()) last_t_update = i_episode epochs.set_description(f"Episode {i_episode+1} | Reward: {episode_reward:.2f} | Revenue: {episode_revenue:.2f} | ServedDemand: {episode_served_demand:.2f} \ | Reb. Cost: {episode_rebalancing_cost:.2f} | Oper. Cost: {episode_operating_cost:.2f}| Epsilon: {EPS_END + (EPS_START - EPS_END) * math.exp(-1. * steps_done / EPS_DECAY)},\ Idx: {0} | Last Target Update {last_t_update}") #Adding the total reward and reduced epsilon values training_rewards.append(episode_reward) training_revenue.append(episode_revenue) training_served_demand.append(episode_served_demand) training_rebalancing_cost.append(episode_rebalancing_cost) training_operating_cost.append(episode_operating_cost) torch.save(dqn.policy_net.state_dict(), f"policy_net_{sidx}_training") # Plot results fig = plt.figure(figsize=(12,32)) fig.add_subplot(411) plt.plot(training_rewards, label="Reward") plt.title("Episode Rewards") plt.xlabel("Episode") plt.ylabel("J") plt.legend() fig.add_subplot(412) plt.plot(training_revenue, label="Revenue") plt.title("Episode Revenue") plt.xlabel("Episode") plt.ylabel("Revenue") plt.legend() fig.add_subplot(413) plt.plot(training_served_demand, label="Served Demand") plt.title("Episode Served Demand") plt.xlabel("Episode") plt.ylabel("Served Demand") plt.legend() fig.add_subplot(414) plt.plot(training_rebalancing_cost, label="Reb. Cost") plt.title("Episode Reb. Cost") plt.xlabel("Episode") plt.ylabel("Cost") plt.legend() plt.show() fig.savefig(f'{sidx}_training.png') print("Average Performance: \n") print(f'Avg Reward: {np.mean(training_rewards):.2f}') print(f'Total Revenue: {np.mean(training_revenue):.2f}') print(f'Total Served Demand: {np.mean(training_served_demand):.2f}') print(f'Total Rebalancing Cost: {np.mean(training_rebalancing_cost):.2f}')
def policy(self, obs, params, train=True, isMatching=False, CPLEXPATH=None, res_path=None): """ Apply the current policy. Parameters ---------- params : dict Training settings for cascaded learning. Returns ------- action: list List of action indexes for each Q-table in the cascade. # STEP 1 - Select desired distribution of idle vehicles through RL # 1.1 Pick actions for all Q tables with the following logic: # (i) If Q table under training: either epsilon greedy or max_Q # (ii) for all untrained Q tables select default action (.5, .5) # (iii) for all trained Q tables select argmax action argmax Q(:, a) """ num_nodes = len(self.nodes) action_rl = [] # RL action for all nodes if train: # Allow for epsilon-greedy exploration during training training_round_len = params["training_round_len"] epsilon = params["epsilon"] k = params["k"] default_action = params["default_action"] idx = (k//training_round_len)%num_nodes # Q table index (initially, top-most node) for i in range(num_nodes): state_i = self.encode_state(self.decode_state(obs[0], obs[1])[i]) if i==idx: if np.random.rand() < epsilon: # Epsilon-greedy policy for current policy action_rl.append(np.random.randint(low=0, high=self.nA)) else: # Apply current policy action_rl.append(np.argmax(self.Q[self.nodes[i]][state_i, :])) else: # for all other nodes, select either default action or take argmax Q(:,a) if (k//(training_round_len*num_nodes) < 1) and (k//100 < i): action_rl.append(default_action) else: action_rl.append(np.argmax(self.Q[self.nodes[i]][state_i, :])) else: # At test time, simply use the learnt argmax policy for i in range(num_nodes): state_i = self.encode_state(self.decode_state(obs[0], obs[1])[i]) action_rl.append(np.argmax(self.Q[self.nodes[i]][state_i, :])) # 1.2 get actual vehicle distributions vi (i.e. (x1*x2*..*xn)*num_vehicles) v_d = self.get_desired_distribution(action_rl) # 1.3 Solve ILP - Minimal Distance Problem # 1.3.1 collect inputs and build .dat file t = self.env.time accTuple = [(n,int(self.env.acc[n][t])) for n in self.env.acc] accRLTuple = [(n, int(v_d_n)) for n, v_d_n in enumerate(v_d)] edgeAttr = [(i,j,self.env.G.edges[i,j]['time']) for i,j in self.env.G.edges] modPath = os.getcwd().replace('\\','/')+'/mod/' OPTPath = os.getcwd().replace('\\','/')+'/OPT/CQL/'+res_path if not os.path.exists(OPTPath): os.makedirs(OPTPath) datafile = OPTPath + f'data_{t}.dat' resfile = OPTPath + f'res_{t}.dat' with open(datafile,'w') as file: file.write('path="'+resfile+'";\r\n') file.write('edgeAttr='+mat2str(edgeAttr)+';\r\n') file.write('accInitTuple='+mat2str(accTuple)+';\r\n') file.write('accRLTuple='+mat2str(accRLTuple)+';\r\n') # 2. execute .mod file and write result on file modfile = modPath+'minRebDistRebOnly.mod' if CPLEXPATH is None: CPLEXPATH = "/opt/ibm/ILOG/CPLEX_Studio128/opl/bin/x86-64_linux/" my_env = os.environ.copy() my_env["LD_LIBRARY_PATH"] = CPLEXPATH out_file = OPTPath + f'out_{t}.dat' with open(out_file,'w') as output_f: subprocess.check_call([CPLEXPATH+"oplrun", modfile, datafile], stdout=output_f, env=my_env) output_f.close() # 3. collect results from file flow = defaultdict(float) with open(resfile,'r', encoding="utf8") as file: for row in file: item = row.strip().strip(';').split('=') if item[0] == 'flow': values = item[1].strip(')]').strip('[(').split(')(') for v in values: if len(v) == 0: continue i,j,f = v.split(',') flow[int(i),int(j)] = float(f) action = [flow[i,j] for i,j in self.env.edges] return action, action_rl
print "Calculating likelihood for chain:", echain _ = h1.emission_likelihood(echain, debug=1) # from forward import * def _make_Nbeta_initial(self): self.beta_initial = np.ones((1, self.internal_space), dtype='float') self.Nbeta_initial = self.beta_initial / self.internal_space pass util.Var_hmm._make_Nbeta_initial = _make_Nbeta_initial if __name__ == '__main__': h1 = util.Var_hmm() h1._make_Nbeta_initial() print "initial beta:\n", util.mat2str(h1.beta_initial) print "initial normed beta:\n", util.mat2str(h1.Nbeta_initial) def _backward(self, emission_idx, return_d=False, as_norm=True, rescale=True, hold=False, debug=0): ''' Input an emission index, based on which "self.Nbeta" and "self.d" is updated Params: hold: BOOL, whether time step shall be updated return_d: whether return d as ouput