def main(args): args = parse_arguments() for env_name in ENV_LIST: print('\nTesting functionality for {}'.format(env_name)) try: env = or_gym.make(env_name) print('{} initialized successfully'.format(env_name)) try: action = env.action_space.sample() print('Action {} selected'.format(action)) except Exception as e: print('Error sampling action for env = {}'.format(env_name)) try: _ = env.step(action) print('Step successful') except Exception as e: print('Error encountered during step for action {}.'.format( action)) try: env.reset() print('Reset successful for env = {}'.format(env_name)) except Exception as e: print('Reset error encountered for env = {}'.format(env_name)) except Exception as e: print('Error encountered initializing env = {}'.format(env_name))
def check_config(env_name, model_name=None, *args, **kwargs): if model_name is None: model_name = 'or_gym_tune' env = or_gym.make(env_name) try: vf_clip_param = env._max_rewards except AttributeError: vf_clip_param = 10 # TODO: Add grid search capabilities rl_config = { "env": env_name, "num_workers": 2, "env_config": { 'mask': True }, # "lr": 1e-5, # "entropy_coeff": 1e-4, "vf_clip_param": vf_clip_param, "lr": tune.grid_search([1e-4, 1e-5]), #1e-6, 1e-7]), "entropy_coeff": tune.grid_search([1e-2]), #, 1e-4]), # "critic_lr": tune.grid_search([1e-3, 1e-4, 1e-5]), # "actor_lr": tune.grid_search([1e-3, 1e-4, 1e-5]), # "lambda": tune.grid_search([0.95, 0.9]), "kl_target": tune.grid_search([0.01]), # "sgd_minibatch_size": tune.grid_search([128, 512, 1024]), # "train_batch_size": tune.grid_search([]) "model": { "vf_share_layers": False, # "custom_model": model_name, "fcnet_activation": "elu", "fcnet_hiddens": [128, 128, 128] } } return rl_config
if __name__=='__main__': N=10 # print(net) # param=list(net.parameters()) # print(param[0].size()) env_config={ 'N':N, 'max_weight': 200, 'current_weight': 0, 'mask':True, 'randomize_params_on_reset': True, } env=or_gym.make('Knapsack-v0',env_config=env_config) # set seed for reproducibility # right now take fresh entropy from the computer env.set_seed(int.from_bytes(os.urandom(4), sys.byteorder)) net = actor(N) print(net) K=7 nb_episodes = 1000 criterion = nn.CrossEntropyLoss() directory = 'linear_only_actor,CrossEntropy,oracletrainer,batch_updates' LR=np.array([ 10**i for i in np.arange(-2,3,1,dtype='float')]) failure=os.system('mkdir ./results/'+directory)
import or_gym env_name = 'InvManagement-v2' env = or_gym.make(env_name) print(env.step(env.action_space.sample())) print(env.state.shape[0]==env.obs_dim) env.plot_network()
def im_dfo_model(x, env, online): ''' Compute negative of the expected profit for a sample path. This function is used in an unconstrained optimization algorithm (scipy.optimize.minimize). x = [integer list; dimension |Stages| - 1] total inventory levels at each node. env = [InvManagementEnv] current simulation environment. online = [Boolean] should the optimization be run online? ''' # assert env.spec.id == 'InvManagement-v0', \ # '{} received. Heuristic designed for InvManagement-v0.'.format(env.spec.id) x = np.array(x) #inventory level at each node z = np.cumsum(x) #base stock levels m = env.num_stages try: dimz = len(z) except: dimz = 1 assert dimz == m - 1, "Wrong dimension on base stock level vector. Should be #Stages - 1." #create simulation environment (copy it if in offline mode) sim_kwargs = { 'I0': x, #set initial inventory to full base stock 'p': env.p, #extract all other parameters from env 'r': env.r, 'k': env.k, 'h': env.h, 'c': env.c, 'L': env.L, 'backlog': env.backlog, 'dist_param': env.dist_param, 'alpha': env.alpha, 'seed_int': env.seed_int } demand_dist = env.demand_dist #extract demand distribution function from env if online: #extract args to pass to re-simulation sim_kwargs[ 'periods'] = env.period #simulation goes up until current period in online mode sim_kwargs['dist'] = 5 #set distribution to manual mode sim_kwargs['user_D'] = env.D[:env. period] #copy historical demands from env else: sim_kwargs['periods'] = env.num_periods #copy num_periods from env sim_kwargs['dist'] = env.dist #copy dist from env #build simulation environment (this is just clean copy if in offline mode) if env.backlog: sim = or_gym.make("InvManagement-v0", env_config=sim_kwargs) else: sim = or_gym.make("InvManagement-v1", env_config=sim_kwargs) #run simulation for t in range(sim.num_periods): #take a step in the simulation using critical ratio base stock sim.step(action=sim.base_stock_action(z=z)) #probability for demand at each period prob = demand_dist.pmf(sim.D, **sim.dist_param) #expected profit return -1 / sim.num_periods * np.sum(prob * sim.P)
model = "DLP" mode = "SH" window = 30 solver = "glpk" #create file names filename = path+"in"+ver+".pkl" D = pickle.load(open(filename,'rb')) filesave = path+model+"_"+mode+"_"+ver+"/" #solve shrinking horizon model for i in range(100): #create environmnet with user specified demand user_D = {(1,0): D[:,i]} #assign scenario to retail/market link sample_path = {(1,0): True} #note that the specified demand is sampled from the prob distribution env = or_gym.make("InvManagement-"+ver, env_config={'user_D': user_D, 'sample_path': sample_path}) #loop through each period in the simulation, optimizing along the way for t in range(env.num_periods): #create model m=net_im_lp_model(env,window_size=window,use_expectation=True) #select solver s=SolverFactory(solver) #solve model res=s.solve(m, tee=False) #check result is optimal if res['Solver'][0]['Termination condition'][:] != 'optimal': print("ERROR: NOT OPTIMAL") break #extract reorder quantities Ropt=m.R.get_values() #pass action for the current timestep only (i.e. t=0)
def _build_env(self, env_name): env = or_gym.make(env_name) return env
def knapsack_env(): env = or_gym.make('Knapsack-v0') # todo: take constraints from or_gym created and put into # class knapsack environment. raise ValueError('This still has to be done!')
import or_gym import numpy as np import pandas as pd from pyomo.opt import SolverFactory from or_gym.algos.supply_network.math_prog import * from or_gym.algos.supply_network.stoch_prog import * #solve perfect information model env1 = or_gym.make("InvManagement-v2") m1 = net_im_lp_model(env1, perfect_information=True) s1 = SolverFactory('glpk') res1 = s1.solve(m1) print(np.sum(list(m1.P.get_values().values()))) #solve shrinking horizon model at t=0 env3 = or_gym.make("InvManagement-v2") m3 = net_im_lp_model(env3) s3 = SolverFactory('glpk') res3 = s3.solve(m3) print(np.sum(list(m3.P.get_values().values()))) #solve perfect information model with average demand D = 20 * np.ones(30) env4 = or_gym.make("InvManagement-v2", env_config={'user_D': {(1, 0): D}}) # env4.graph.edges[(1,0)]['demand_dist']=[20 for i in range(env4.num_periods)] m4 = net_im_lp_model(env4, perfect_information=True) s4 = SolverFactory('glpk') res4 = s4.solve(m4) print(np.sum(list(m4.P.get_values().values()))) #solve shrinking horizon model