def __init__(self, sess, settings, netConfigOverride, stateShape, actionSize, nTrajs=1, **kwargs): """ Initializes a training method for a neural network. Parameters ---------- Model : Keras Model Object A Keras model object with fully defined layers and a call function. See examples in networks module. sess : Tensorflow Session Initialized Tensorflow session stateShape : list List of integers of the inputs shape size. Ex [39,39,6] actionSize : int Output size of the network. HPs : dict Dictionary that contains all hyperparameters to be used in the methods training nTrajs : int (Optional) Number that specifies the number of trajectories to be created for collecting training data. scope : str (Optional) Name of the PPO method. Used to group and differentiate variables between other networks. Returns ------- N/A """ # Creating nested Method that will be updated. network = NetworkBuilder(networkConfig=settings["NetworkConfig"], netConfigOverride=netConfigOverride, actionSize=N) Method = GetFunction(settings["SubMethod"]) self.nestedMethod = Method(sess, settings, netConfigOverride, stateShape=dFeatures, actionSize=N, nTrajs=nTrajs)
break with open(runConfigFile) as json_file: settings = json.load(json_file) settings.update(configOverride) #Creating the Networks and Methods of the Run. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=settings["GPUCapacitty"], allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True) sess = tf.Session(config=config) with tf.device(args.processor): Method = GetFunction(settings["Method"]) workers = Method(sess=sess, settings=settings, netConfigOverride=netConfigOverride) InitializeVariables( sess) #Included to catch if there are any uninitalized variables. #Saving config files in the model directory EXP_NAME = settings["RunName"] LOG_PATH = './logs/' + EXP_NAME CreatePath(LOG_PATH) with open(LOG_PATH + '/runSettings.json', 'w') as outfile: json.dump(settings, outfile) with open(LOG_PATH + '/netConfigOverride.json', 'w') as outfile: json.dump(netConfigOverride, outfile)
def __init__(self, sess, settings, netConfigOverride, stateShape, actionSize, env, nTrajs=1, **kwargs): """ Initializes a training method for a neural network. Parameters ---------- Model : Keras Model Object A Keras model object with fully defined layers and a call function. See examples in networks module. sess : Tensorflow Session Initialized Tensorflow session stateShape : list List of integers of the inputs shape size. Ex [39,39,6] actionSize : int Output size of the network. HPs : dict Dictionary that contains all hyperparameters to be used in the methods training nTrajs : int (Optional) Number that specifies the number of trajectories to be created for collecting training data. scope : str (Optional) Name of the PPO method. Used to group and differentiate variables between other networks. Returns ------- N/A """ EXP_NAME = settings["RunName"] LoadName = settings["LoadName"] MODEL_PATH_ = './models/' + EXP_NAME + '/' MODEL_PATH = './models/' + LoadName + '/' LOG_PATH = './logs/' + EXP_NAME + '/' CreatePath(LOG_PATH) CreatePath(MODEL_PATH_) self.sess = sess self.env = env N = settings["NumOptions"] #Create the Q Maps if "LoadQMaps" in settings: #Loading the Q-tables for the sub-policies loadedData = np.load('./models/' + settings["LoadQMaps"] + '/options.npz') opt = loadedData["options"] options = [] for i in range(opt.shape[0]): options.append(opt[i, :, :, :, :]) else: if "LoadSamples" in settings: pass else: print("Creating Samples") #Creating Instance of environment and running through it to generate samples def GetAction(state): """ Contains the code to run the network based on an input. """ p = 1 / actionSize if len(state.shape) == 3: probs = np.full((1, actionSize), p) else: probs = np.full((state.shape[0], actionSize), p) actions = np.array([ np.random.choice(probs.shape[1], p=prob / sum(prob)) for prob in probs ]) return actions s = [] for i in range(settings["SampleEpisodes"]): s0 = env.reset() for j in range(settings["MAX_EP_STEPS"] + 1): a = GetAction(state=s0) s1, r, done, _ = env.step(a) if arreq_in_list(s0, s): pass else: s.append(s0) s0 = s1 if done: break with open(MODEL_PATH + 'netConfigOverride.json') as json_file: networkOverrides = json.load(json_file) # if "DefaultParams" not in networkOverrides: # networkOverrides["DefaultParams"] = {} # networkOverrides["DefaultParams"]["Trainable"]=False # print(settings["SFNetworkConfig"]) # print(networkOverrides) SF1, SF2, SF3, SF4, SF5 = buildNetwork(settings["SFNetworkConfig"], actionSize, networkOverrides, scope="Global") SF5.load_weights(MODEL_PATH + "model.h5") #Selecting the samples: psi = SF2.predict(np.vstack(s)) # [X,SF Dim] #test for approximate equality (for floating point types) def arreqclose_in_list(myarr, list_arrays): return next((True for elem in list_arrays if elem.size == myarr.size and np.allclose(elem, myarr, atol=1E-6)), False) print("Selecting Samples") if settings["Selection"] == "First": samples = [] points = [] i = 0 while len(samples) < settings["TotalSamples"]: if not arreqclose_in_list(psi[i, :], samples): samples.append(psi[i, :]) points.append(i) i += 1 elif settings["Selection"] == "Random": samples = [] points = [] while len(samples) < settings["TotalSamples"]: idx = randint(1, psi.shape[0] - 1) if not arreqclose_in_list(psi[idx, :], samples): samples.append(psi[idx, :]) points.append(idx) elif settings["Selection"] == "Random_sampling": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=2) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v1 points = SampleSelection_v1(pca_result, settings["TotalSamples"], returnIndicies=True) elif settings["Selection"] == "Hull_pca": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=4) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v2 points = SampleSelection_v2(pca_result, settings["TotalSamples"], returnIndicies=True) elif settings["Selection"] == "Hull_tsne": #PCA Decomp to dimension: import pandas as pd from sklearn.manifold import TSNE feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) tsne = TSNE(n_components=3, verbose=1, perplexity=10, n_iter=1000) tsne_results = tsne.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v2 points = SampleSelection_v2(tsne_results, settings["TotalSamples"], returnIndicies=True) elif settings["Selection"] == "Hull_cluster": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=4) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v3 points = SampleSelection_v3(pca_result, settings["TotalSamples"], returnIndicies=True) else: print("Invalid Method selected") exit() psiSamples = [] for point in points: psiSamples.append(psi[point, :]) while len(psiSamples) < len(psiSamples[0]): psiSamples.extend(psiSamples) samps = np.stack(psiSamples) samps2 = samps[0:samps.shape[1], :] w_g, v_g = np.linalg.eig(samps2) # print("here") dim = samps2.shape[1] #Creating Sub-policies offset = 0 options = [] # QMapStructure = self.env.GetQMapStructure() print("Getting data for a Q-Map") grids = self.env.ConstructAllSamples() phis = SF3.predict(grids) for sample in range(int(N / 2)): print("Creating Option", sample) if sample + offset >= dim: continue v_option, v_option_inv = self.env.ReformatSamples( np.real(np.matmul(phis, v_g[:, sample + offset]))) options.append(v_option) options.append(v_option_inv) if np.iscomplex(w_g[sample + offset]): offset += 1 if settings["PlotOptions"]: imgplot = plt.imshow(v_option) plt.title(" Option " + str(sample) + " Value Estimate | Eigenvalue:" + str(w_g[sample + offset])) plt.savefig(LOG_PATH + "/option" + str(sample) + ".png") plt.close() #Plotting the first couple samples with random enemy positions: #Saving the different options. to log: np.savez_compressed(MODEL_PATH_ + "options.npz", options=np.stack(options)) self.options = options # Creating nested Method that will be updated. network = NetworkBuilder(networkConfig=settings["NetworkConfig"], netConfigOverride=netConfigOverride, actionSize=N) Method = GetFunction(settings["SubMethod"]) self.nestedMethod = Method(sess, settings, netConfigOverride, stateShape=stateShape, actionSize=N, nTrajs=nTrajs)
def __init__(self,sess,settings,netConfigOverride,stateShape,actionSize,nTrajs=1,**kwargs): """ Initializes a training method for a neural network. Parameters ---------- Model : Keras Model Object A Keras model object with fully defined layers and a call function. See examples in networks module. sess : Tensorflow Session Initialized Tensorflow session stateShape : list List of integers of the inputs shape size. Ex [39,39,6] actionSize : int Output size of the network. HPs : dict Dictionary that contains all hyperparameters to be used in the methods training nTrajs : int (Optional) Number that specifies the number of trajectories to be created for collecting training data. scope : str (Optional) Name of the PPO method. Used to group and differentiate variables between other networks. Returns ------- N/A """ EXP_NAME = settings["RunName"] LoadName = settings["LoadName"] MODEL_PATH = './models/'+LoadName+ '/' IMAGE_PATH = './images/SF/'+EXP_NAME+'/' MODEL_PATH_ = './models/'+EXP_NAME+'/' LOG_PATH = './logs/CTF_1v1/'+EXP_NAME CreatePath(LOG_PATH) CreatePath(IMAGE_PATH) CreatePath(MODEL_PATH) CreatePath(MODEL_PATH_) self.sess=sess N = settings["NumOptions"] for (dirpath, dirnames, filenames) in os.walk("configs/environment"): for filename in filenames: if settings["EnvConfig"] == filename: envConfigFile = os.path.join(dirpath,filename) break with open(envConfigFile) as json_file: envSettings = json.load(json_file) env,dFeatures,nActions,nTrajs = CreateEnvironment(envSettings) #Create the Q Maps if "LoadQMaps" in settings: #Loading the Q-tables for the sub-policies loadedData = np.load('./models/'+settings["LoadQMaps"]+ '/options.npz') opt = loadedData["options"] options=[] for i in range(opt.shape[0]): options.append(opt[i,:,:,:,:]) else: if "LoadSamples" in settings: pass else: #Creating Instance of environment and running through it to generate samples def GetAction(state): """ Contains the code to run the network based on an input. """ p = 1/nActions if len(state.shape)==3: probs =np.full((1,nActions),p) else: probs =np.full((state.shape[0],nActions),p) actions = np.array([np.random.choice(probs.shape[1], p=prob / sum(prob)) for prob in probs]) return actions s = [] for i in range(settings["SampleEpisodes"]): s0 = env.reset() for j in range(settings["MAX_EP_STEPS"]+1): a = GetAction(state=s0) s1,r,done,_ = env.step(a) if arreq_in_list(s0,s): pass else: s.append(s0) s0 = s1 if done: break #Creating and smoothing Q Maps def ConstructSamples(env,position2): grid = env.get_obs_blue locX,locY = np.unravel_index(np.argmax(grid[:,:,4], axis=None), grid[:,:,0].shape) locX2,locY2 = np.unravel_index(np.argmin(grid[:,:,4], axis=None), grid[:,:,0].shape) #Removing the agent grid[locX,locY,4] = 0 grid[locX2,locY2,4] = 0 stacked_grids = np.repeat(np.expand_dims(grid,0), grid.shape[0]*grid.shape[1],0) for i in range(stacked_grids.shape[1]): for j in range(stacked_grids.shape[2]): stacked_grids[i*stacked_grids.shape[2]+j,stacked_grids.shape[2]-i-1,j,4] = 5 stacked_grids[:,position2[0],position2[1],4] = -5 return stacked_grids def SmoothOption(option_, gamma =0.9): # option[option<0.0] = 0.0 #Create the Adjacency Matric v_option=np.full((dFeatures[0],dFeatures[1],dFeatures[0],dFeatures[1]),0,dtype=np.float32) for i2,j2 in itertools.product(range(dFeatures[0]),range(dFeatures[1])): option = option_[:,:,i2,j2] states_ = {} count = 0 for i in range(option.shape[0]): for j in range(option.shape[1]): if option[i,j] != 0: states_[count] = [i,j] # states_.append([count, [i,j]]) count+=1 states=len(states_.keys()) x = np.zeros((states,states)) for i in range(len(states_)): [locx,locy] = states_[i] sum = 0 for j in range(len(states_)): if states_[j] == [locx+1,locy]: x[i,j] = 0.25 sum += 0.25 if states_[j] == [locx-1,locy]: x[i,j] = 0.25 sum += 0.25 if states_[j] == [locx,locy+1]: x[i,j] = 0.25 sum += 0.25 if states_[j] == [locx,locy-1]: x[i,j] = 0.25 sum += 0.25 x[i,i]= 1.0-sum #Create W w = np.zeros((states)) for count,loc in states_.items(): w[count] = option[loc[0],loc[1]] # (I-gamma*Q)^-1 I = np.identity(states) psi = np.linalg.inv(I-gamma*x) smoothedOption = np.zeros_like(option,dtype=float) value = np.matmul(psi,w) for j,loc in states_.items(): smoothedOption[loc[0],loc[1]] = value[j] v_option[:,:,i2,j2] = smoothedOption return v_option SF1,SF2,SF3,SF4,SF5 = buildNetwork(settings["SFNetworkConfig"],nActions,{},scope="Global") SF5.load_weights('./models/'+LoadName+ '/'+"model.h5") #Selecting the samples: psi = SF2.predict(np.vstack(s)) # [X,SF Dim] #test for approximate equality (for floating point types) def arreqclose_in_list(myarr, list_arrays): return next((True for elem in list_arrays if elem.size == myarr.size and np.allclose(elem, myarr,atol=1E-6)), False) if settings["Selection"]=="First": samples = [];points=[] i =0 while len(samples) < settings["TotalSamples"]: if not arreqclose_in_list(psi[i,:], samples): samples.append(psi[i,:]) points.append(i) i+=1 elif settings["Selection"]=="Random": samples = [];points=[] while len(samples) < settings["TotalSamples"]: idx = randint(1,psi.shape[0]) if not arreqclose_in_list(psi[idx,:], samples): samples.append(psi[idx,:]) points.append(idx) elif settings["Selection"]=="Random_sampling": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = [ 'pixel'+str(i) for i in range(psi.shape[1]) ] df = pd.DataFrame(psi,columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=2) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v1 points = SampleSelection_v1(pca_result,settings["TotalSamples"],returnIndicies=True) elif settings["Selection"]=="Hull_pca": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = [ 'pixel'+str(i) for i in range(psi.shape[1]) ] df = pd.DataFrame(psi,columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=4) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v2 points = SampleSelection_v2(pca_result,settings["TotalSamples"],returnIndicies=True) elif settings["Selection"]=="Hull_tsne": #PCA Decomp to dimension: import pandas as pd from sklearn.manifold import TSNE feat_cols = [ 'pixel'+str(i) for i in range(psi.shape[1]) ] df = pd.DataFrame(psi,columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) tsne = TSNE(n_components=3, verbose=1, perplexity=10, n_iter=1000) tsne_results = tsne.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v2 points = SampleSelection_v2(tsne_results,settings["TotalSamples"],returnIndicies=True) elif settings["Selection"]=="Hull_cluster": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = [ 'pixel'+str(i) for i in range(psi.shape[1]) ] df = pd.DataFrame(psi,columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=4) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v3 points = SampleSelection_v3(pca_result,settings["TotalSamples"],returnIndicies=True) else: print("Invalid Method selected") exit() psiSamples=[] for point in points: psiSamples.append(psi[point,:]) while len(psiSamples) < len(psiSamples[0]): psiSamples.extend(psiSamples) samps = np.stack(psiSamples) samps2 = samps[0:samps.shape[1],:] w_g,v_g = np.linalg.eig(samps2) # print("here") dim = samps2.shape[1] #Creating Sub-policies offset = 0 options = [] for sample in range(int(N/2)): print("Creating Option",sample) v_option=np.full((dFeatures[0],dFeatures[1],dFeatures[0],dFeatures[1]),0,dtype=np.float32) for i2,j2 in itertools.product(range(dFeatures[0]),range(dFeatures[1])): if sample+offset >= dim: continue grids = ConstructSamples(env,[i2,j2]) phi= SF3.predict(grids) v_option[:,:,i2,j2]=np.real(np.matmul(phi,v_g[:,sample+offset])).reshape([dFeatures[0],dFeatures[1]]) if np.iscomplex(w_g[sample+offset]): offset+=1 print("Smoothing Option") v_option_ = SmoothOption(v_option) options.append(v_option_) options.append(-v_option_) #Plotting the first couple samples with random enemy positions: v_map = v_option_[:,:,10,10] imgplot = plt.imshow(v_map) plt.title(" Option "+str(sample)+" Value Estimate | Eigenvalue:" +str(w_g[sample+offset])) plt.savefig(IMAGE_PATH+"/option"+str(sample)+"_"+str(1)+".png") plt.close() v_map = v_option_[:,:,10,17] imgplot = plt.imshow(v_map) plt.title(" Option "+str(sample)+" Value Estimate | Eigenvalue:" +str(w_g[sample+offset])) plt.savefig(IMAGE_PATH+"/option"+str(sample)+"_"+str(2)+".png") plt.close() v_map = v_option_[:,:,17,10] imgplot = plt.imshow(v_map) plt.title(" Option "+str(sample)+" Value Estimate | Eigenvalue:" +str(w_g[sample+offset])) plt.savefig(IMAGE_PATH+"/option"+str(sample)+"_"+str(3)+".png") plt.close() v_map = v_option_[:,:,10,2] imgplot = plt.imshow(v_map) plt.title(" Option "+str(sample)+" Value Estimate | Eigenvalue:" +str(w_g[sample+offset])) plt.savefig(IMAGE_PATH+"/option"+str(sample)+"_"+str(4)+".png") plt.close() v_map = v_option_[:,:,2,10] imgplot = plt.imshow(v_map) plt.title(" Option "+str(sample)+" Value Estimate | Eigenvalue:" +str(w_g[sample+offset])) plt.savefig(IMAGE_PATH+"/option"+str(sample)+"_"+str(5)+".png") plt.close() #Saving the different options. to log: np.savez_compressed(MODEL_PATH_ +"options.npz", options=np.stack(options)) self.options = options # Creating nested Method that will be updated. network = NetworkBuilder(networkConfig=settings["NetworkConfig"],netConfigOverride=netConfigOverride,actionSize=N) Method = GetFunction(settings["SubMethod"]) self.nestedMethod = Method(sess,settings,netConfigOverride,stateShape=dFeatures,actionSize=N,nTrajs=nTrajs)
elif [0,int(locX-1),int(locY),3] == 1: actionValue.append(-999) else: actionValue.append(options[int(subpolicyNum)][int(locX-1),int(locY),int(locX2),int(locY2) ]) # Go Left #Selecting Action with Highest Value. Will always take a movement. return actionValue.index(max(actionValue)) from networks.network import Network #Creating High level policy with tf.device(args.processor): global_step = tf.Variable(0, trainable=False, name='global_step') global_step_next = tf.assign_add(global_step,1) network = Network(settings["NetworkConfig"],N,netConfigOverride) Method = GetFunction(settings["Method"]) net = Method(network,sess,scope="net",stateShape=dFeatures,actionSize=N,HPs=settings["NetworkHPs"],nTrajs=nTrajs) #Creating Auxilary Functions for logging and saving. writer = tf.summary.FileWriter(LOG_PATH,graph=sess.graph) saver = tf.train.Saver(max_to_keep=3, var_list=net.getVars+[global_step]) net.InitializeVariablesFromFile(saver,MODEL_PATH_) InitializeVariables(sess) #Included to catch if there are any uninitalized variables. progbar = tf.keras.utils.Progbar(None, unit_name='Training',stateful_metrics=["Reward"]) loggingFunctions=[] for loggingFunc in settings["LoggingFunctions"]: func = GetFunction(loggingFunc) loggingFunctions.append(func(env,net,IMAGE_PATH))
log.info("Running the expeiment with {}. CUDA_VISIBLE_DEVICES={}".format( args.processor, os.getenv("CUDA_VISIBLE_DEVICES"))) if args.seed is None: tf.random.set_seed(args.seed) np.random.seed(args.seed) try: dataset = LoadDataset(settings) model = LoadMethod(settings, dataset, networkConfig=args.network) # Initialize Callbacks callbacks = [] if "Callbacks" in settings: for dict in settings["Callbacks"]: func = GetFunction(dict["Name"]) callbacks.append(func(logger, dataset, **dict["Arguments"])) if args.test: #Skipping Training and going straight to testing. This calls "On Train End" Callbacks model.Test(callbacks=callbacks) else: model.Train(dataset.trainData, callbacks=callbacks) except Exception as e: log.warning("Closing environment due to error") log.error('Error', exc_info=e) except KeyboardInterrupt: log.warning("Closing environment due to Keyboard Interrupt")
def ApplyWrappers(env, wrapperList): for wrapperDict in wrapperList: print(wrapperDict["WrapperName"]) wrapper = GetFunction(wrapperDict["WrapperName"]) env = wrapper(env, **wrapperDict["WrapperParameters"]) return env
def LoadMethod(settingDict, dataset, **kwargs): Method = GetFunction(settingDict["Method"]) net = Method(settingDict, dataset, **kwargs) return net