CreatePath(MODEL_PATH) #Creating the Environment env, dFeatures, nActions, nTrajs = CreateEnvironment(envSettings) #Creating the Networks and Methods of the Run. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=settings["GPUCapacitty"], allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True) sess = tf.Session(config=config) with tf.device(args.processor): SF1, SF2, SF3, SF4, SF5 = buildNetwork(settings["NetworkConfig"], nActions, netConfigOverride, scope="Global") if args.load == "all": SF5.load_weights(MODEL_PATH + "/model.h5") elif args.load == "phi": SF1.load_weights(MODEL_PATH + "/model_phi.h5") elif args.load == "psi": SF2.load_weights(MODEL_PATH + "/model_psi.h5") elif args.load == "phi/psi": SF1.load_weights(MODEL_PATH + "/model_phi.h5") SF2.load_weights(MODEL_PATH + "/model_psi.h5") else: print("Did not load weights")
def __init__(self,sess,settings,netConfigOverride,stateShape,actionSize,nTrajs=1,**kwargs): """ Initializes a training method for a neural network. Parameters ---------- Model : Keras Model Object A Keras model object with fully defined layers and a call function. See examples in networks module. sess : Tensorflow Session Initialized Tensorflow session stateShape : list List of integers of the inputs shape size. Ex [39,39,6] actionSize : int Output size of the network. HPs : dict Dictionary that contains all hyperparameters to be used in the methods training nTrajs : int (Optional) Number that specifies the number of trajectories to be created for collecting training data. scope : str (Optional) Name of the PPO method. Used to group and differentiate variables between other networks. Returns ------- N/A """ EXP_NAME = settings["RunName"] LoadName = settings["LoadName"] MODEL_PATH = './models/'+LoadName+ '/' IMAGE_PATH = './images/SF/'+EXP_NAME+'/' MODEL_PATH_ = './models/'+EXP_NAME+'/' LOG_PATH = './logs/CTF_1v1/'+EXP_NAME CreatePath(LOG_PATH) CreatePath(IMAGE_PATH) CreatePath(MODEL_PATH) CreatePath(MODEL_PATH_) self.sess=sess N = settings["NumOptions"] for (dirpath, dirnames, filenames) in os.walk("configs/environment"): for filename in filenames: if settings["EnvConfig"] == filename: envConfigFile = os.path.join(dirpath,filename) break with open(envConfigFile) as json_file: envSettings = json.load(json_file) env,dFeatures,nActions,nTrajs = CreateEnvironment(envSettings) #Create the Q Maps if "LoadQMaps" in settings: #Loading the Q-tables for the sub-policies loadedData = np.load('./models/'+settings["LoadQMaps"]+ '/options.npz') opt = loadedData["options"] options=[] for i in range(opt.shape[0]): options.append(opt[i,:,:,:,:]) else: if "LoadSamples" in settings: pass else: #Creating Instance of environment and running through it to generate samples def GetAction(state): """ Contains the code to run the network based on an input. """ p = 1/nActions if len(state.shape)==3: probs =np.full((1,nActions),p) else: probs =np.full((state.shape[0],nActions),p) actions = np.array([np.random.choice(probs.shape[1], p=prob / sum(prob)) for prob in probs]) return actions s = [] for i in range(settings["SampleEpisodes"]): s0 = env.reset() for j in range(settings["MAX_EP_STEPS"]+1): a = GetAction(state=s0) s1,r,done,_ = env.step(a) if arreq_in_list(s0,s): pass else: s.append(s0) s0 = s1 if done: break #Creating and smoothing Q Maps def ConstructSamples(env,position2): grid = env.get_obs_blue locX,locY = np.unravel_index(np.argmax(grid[:,:,4], axis=None), grid[:,:,0].shape) locX2,locY2 = np.unravel_index(np.argmin(grid[:,:,4], axis=None), grid[:,:,0].shape) #Removing the agent grid[locX,locY,4] = 0 grid[locX2,locY2,4] = 0 stacked_grids = np.repeat(np.expand_dims(grid,0), grid.shape[0]*grid.shape[1],0) for i in range(stacked_grids.shape[1]): for j in range(stacked_grids.shape[2]): stacked_grids[i*stacked_grids.shape[2]+j,stacked_grids.shape[2]-i-1,j,4] = 5 stacked_grids[:,position2[0],position2[1],4] = -5 return stacked_grids def SmoothOption(option_, gamma =0.9): # option[option<0.0] = 0.0 #Create the Adjacency Matric v_option=np.full((dFeatures[0],dFeatures[1],dFeatures[0],dFeatures[1]),0,dtype=np.float32) for i2,j2 in itertools.product(range(dFeatures[0]),range(dFeatures[1])): option = option_[:,:,i2,j2] states_ = {} count = 0 for i in range(option.shape[0]): for j in range(option.shape[1]): if option[i,j] != 0: states_[count] = [i,j] # states_.append([count, [i,j]]) count+=1 states=len(states_.keys()) x = np.zeros((states,states)) for i in range(len(states_)): [locx,locy] = states_[i] sum = 0 for j in range(len(states_)): if states_[j] == [locx+1,locy]: x[i,j] = 0.25 sum += 0.25 if states_[j] == [locx-1,locy]: x[i,j] = 0.25 sum += 0.25 if states_[j] == [locx,locy+1]: x[i,j] = 0.25 sum += 0.25 if states_[j] == [locx,locy-1]: x[i,j] = 0.25 sum += 0.25 x[i,i]= 1.0-sum #Create W w = np.zeros((states)) for count,loc in states_.items(): w[count] = option[loc[0],loc[1]] # (I-gamma*Q)^-1 I = np.identity(states) psi = np.linalg.inv(I-gamma*x) smoothedOption = np.zeros_like(option,dtype=float) value = np.matmul(psi,w) for j,loc in states_.items(): smoothedOption[loc[0],loc[1]] = value[j] v_option[:,:,i2,j2] = smoothedOption return v_option SF1,SF2,SF3,SF4,SF5 = buildNetwork(settings["SFNetworkConfig"],nActions,{},scope="Global") SF5.load_weights('./models/'+LoadName+ '/'+"model.h5") #Selecting the samples: psi = SF2.predict(np.vstack(s)) # [X,SF Dim] #test for approximate equality (for floating point types) def arreqclose_in_list(myarr, list_arrays): return next((True for elem in list_arrays if elem.size == myarr.size and np.allclose(elem, myarr,atol=1E-6)), False) if settings["Selection"]=="First": samples = [];points=[] i =0 while len(samples) < settings["TotalSamples"]: if not arreqclose_in_list(psi[i,:], samples): samples.append(psi[i,:]) points.append(i) i+=1 elif settings["Selection"]=="Random": samples = [];points=[] while len(samples) < settings["TotalSamples"]: idx = randint(1,psi.shape[0]) if not arreqclose_in_list(psi[idx,:], samples): samples.append(psi[idx,:]) points.append(idx) elif settings["Selection"]=="Random_sampling": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = [ 'pixel'+str(i) for i in range(psi.shape[1]) ] df = pd.DataFrame(psi,columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=2) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v1 points = SampleSelection_v1(pca_result,settings["TotalSamples"],returnIndicies=True) elif settings["Selection"]=="Hull_pca": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = [ 'pixel'+str(i) for i in range(psi.shape[1]) ] df = pd.DataFrame(psi,columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=4) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v2 points = SampleSelection_v2(pca_result,settings["TotalSamples"],returnIndicies=True) elif settings["Selection"]=="Hull_tsne": #PCA Decomp to dimension: import pandas as pd from sklearn.manifold import TSNE feat_cols = [ 'pixel'+str(i) for i in range(psi.shape[1]) ] df = pd.DataFrame(psi,columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) tsne = TSNE(n_components=3, verbose=1, perplexity=10, n_iter=1000) tsne_results = tsne.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v2 points = SampleSelection_v2(tsne_results,settings["TotalSamples"],returnIndicies=True) elif settings["Selection"]=="Hull_cluster": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = [ 'pixel'+str(i) for i in range(psi.shape[1]) ] df = pd.DataFrame(psi,columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=4) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v3 points = SampleSelection_v3(pca_result,settings["TotalSamples"],returnIndicies=True) else: print("Invalid Method selected") exit() psiSamples=[] for point in points: psiSamples.append(psi[point,:]) while len(psiSamples) < len(psiSamples[0]): psiSamples.extend(psiSamples) samps = np.stack(psiSamples) samps2 = samps[0:samps.shape[1],:] w_g,v_g = np.linalg.eig(samps2) # print("here") dim = samps2.shape[1] #Creating Sub-policies offset = 0 options = [] for sample in range(int(N/2)): print("Creating Option",sample) v_option=np.full((dFeatures[0],dFeatures[1],dFeatures[0],dFeatures[1]),0,dtype=np.float32) for i2,j2 in itertools.product(range(dFeatures[0]),range(dFeatures[1])): if sample+offset >= dim: continue grids = ConstructSamples(env,[i2,j2]) phi= SF3.predict(grids) v_option[:,:,i2,j2]=np.real(np.matmul(phi,v_g[:,sample+offset])).reshape([dFeatures[0],dFeatures[1]]) if np.iscomplex(w_g[sample+offset]): offset+=1 print("Smoothing Option") v_option_ = SmoothOption(v_option) options.append(v_option_) options.append(-v_option_) #Plotting the first couple samples with random enemy positions: v_map = v_option_[:,:,10,10] imgplot = plt.imshow(v_map) plt.title(" Option "+str(sample)+" Value Estimate | Eigenvalue:" +str(w_g[sample+offset])) plt.savefig(IMAGE_PATH+"/option"+str(sample)+"_"+str(1)+".png") plt.close() v_map = v_option_[:,:,10,17] imgplot = plt.imshow(v_map) plt.title(" Option "+str(sample)+" Value Estimate | Eigenvalue:" +str(w_g[sample+offset])) plt.savefig(IMAGE_PATH+"/option"+str(sample)+"_"+str(2)+".png") plt.close() v_map = v_option_[:,:,17,10] imgplot = plt.imshow(v_map) plt.title(" Option "+str(sample)+" Value Estimate | Eigenvalue:" +str(w_g[sample+offset])) plt.savefig(IMAGE_PATH+"/option"+str(sample)+"_"+str(3)+".png") plt.close() v_map = v_option_[:,:,10,2] imgplot = plt.imshow(v_map) plt.title(" Option "+str(sample)+" Value Estimate | Eigenvalue:" +str(w_g[sample+offset])) plt.savefig(IMAGE_PATH+"/option"+str(sample)+"_"+str(4)+".png") plt.close() v_map = v_option_[:,:,2,10] imgplot = plt.imshow(v_map) plt.title(" Option "+str(sample)+" Value Estimate | Eigenvalue:" +str(w_g[sample+offset])) plt.savefig(IMAGE_PATH+"/option"+str(sample)+"_"+str(5)+".png") plt.close() #Saving the different options. to log: np.savez_compressed(MODEL_PATH_ +"options.npz", options=np.stack(options)) self.options = options # Creating nested Method that will be updated. network = NetworkBuilder(networkConfig=settings["NetworkConfig"],netConfigOverride=netConfigOverride,actionSize=N) Method = GetFunction(settings["SubMethod"]) self.nestedMethod = Method(sess,settings,netConfigOverride,stateShape=dFeatures,actionSize=N,nTrajs=nTrajs)
def __init__(self, sess, settings, netConfigOverride, stateShape, actionSize, env, nTrajs=1, **kwargs): """ Initializes a training method for a neural network. Parameters ---------- Model : Keras Model Object A Keras model object with fully defined layers and a call function. See examples in networks module. sess : Tensorflow Session Initialized Tensorflow session stateShape : list List of integers of the inputs shape size. Ex [39,39,6] actionSize : int Output size of the network. HPs : dict Dictionary that contains all hyperparameters to be used in the methods training nTrajs : int (Optional) Number that specifies the number of trajectories to be created for collecting training data. scope : str (Optional) Name of the PPO method. Used to group and differentiate variables between other networks. Returns ------- N/A """ EXP_NAME = settings["RunName"] LoadName = settings["LoadName"] MODEL_PATH_ = './models/' + EXP_NAME + '/' MODEL_PATH = './models/' + LoadName + '/' LOG_PATH = './logs/' + EXP_NAME + '/' CreatePath(LOG_PATH) CreatePath(MODEL_PATH_) self.sess = sess self.env = env N = settings["NumOptions"] #Create the Q Maps if "LoadQMaps" in settings: #Loading the Q-tables for the sub-policies loadedData = np.load('./models/' + settings["LoadQMaps"] + '/options.npz') opt = loadedData["options"] options = [] for i in range(opt.shape[0]): options.append(opt[i, :, :, :, :]) else: if "LoadSamples" in settings: pass else: print("Creating Samples") #Creating Instance of environment and running through it to generate samples def GetAction(state): """ Contains the code to run the network based on an input. """ p = 1 / actionSize if len(state.shape) == 3: probs = np.full((1, actionSize), p) else: probs = np.full((state.shape[0], actionSize), p) actions = np.array([ np.random.choice(probs.shape[1], p=prob / sum(prob)) for prob in probs ]) return actions s = [] for i in range(settings["SampleEpisodes"]): s0 = env.reset() for j in range(settings["MAX_EP_STEPS"] + 1): a = GetAction(state=s0) s1, r, done, _ = env.step(a) if arreq_in_list(s0, s): pass else: s.append(s0) s0 = s1 if done: break with open(MODEL_PATH + 'netConfigOverride.json') as json_file: networkOverrides = json.load(json_file) # if "DefaultParams" not in networkOverrides: # networkOverrides["DefaultParams"] = {} # networkOverrides["DefaultParams"]["Trainable"]=False # print(settings["SFNetworkConfig"]) # print(networkOverrides) SF1, SF2, SF3, SF4, SF5 = buildNetwork(settings["SFNetworkConfig"], actionSize, networkOverrides, scope="Global") SF5.load_weights(MODEL_PATH + "model.h5") #Selecting the samples: psi = SF2.predict(np.vstack(s)) # [X,SF Dim] #test for approximate equality (for floating point types) def arreqclose_in_list(myarr, list_arrays): return next((True for elem in list_arrays if elem.size == myarr.size and np.allclose(elem, myarr, atol=1E-6)), False) print("Selecting Samples") if settings["Selection"] == "First": samples = [] points = [] i = 0 while len(samples) < settings["TotalSamples"]: if not arreqclose_in_list(psi[i, :], samples): samples.append(psi[i, :]) points.append(i) i += 1 elif settings["Selection"] == "Random": samples = [] points = [] while len(samples) < settings["TotalSamples"]: idx = randint(1, psi.shape[0] - 1) if not arreqclose_in_list(psi[idx, :], samples): samples.append(psi[idx, :]) points.append(idx) elif settings["Selection"] == "Random_sampling": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=2) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v1 points = SampleSelection_v1(pca_result, settings["TotalSamples"], returnIndicies=True) elif settings["Selection"] == "Hull_pca": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=4) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v2 points = SampleSelection_v2(pca_result, settings["TotalSamples"], returnIndicies=True) elif settings["Selection"] == "Hull_tsne": #PCA Decomp to dimension: import pandas as pd from sklearn.manifold import TSNE feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) tsne = TSNE(n_components=3, verbose=1, perplexity=10, n_iter=1000) tsne_results = tsne.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v2 points = SampleSelection_v2(tsne_results, settings["TotalSamples"], returnIndicies=True) elif settings["Selection"] == "Hull_cluster": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=4) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v3 points = SampleSelection_v3(pca_result, settings["TotalSamples"], returnIndicies=True) else: print("Invalid Method selected") exit() psiSamples = [] for point in points: psiSamples.append(psi[point, :]) while len(psiSamples) < len(psiSamples[0]): psiSamples.extend(psiSamples) samps = np.stack(psiSamples) samps2 = samps[0:samps.shape[1], :] w_g, v_g = np.linalg.eig(samps2) # print("here") dim = samps2.shape[1] #Creating Sub-policies offset = 0 options = [] # QMapStructure = self.env.GetQMapStructure() print("Getting data for a Q-Map") grids = self.env.ConstructAllSamples() phis = SF3.predict(grids) for sample in range(int(N / 2)): print("Creating Option", sample) if sample + offset >= dim: continue v_option, v_option_inv = self.env.ReformatSamples( np.real(np.matmul(phis, v_g[:, sample + offset]))) options.append(v_option) options.append(v_option_inv) if np.iscomplex(w_g[sample + offset]): offset += 1 if settings["PlotOptions"]: imgplot = plt.imshow(v_option) plt.title(" Option " + str(sample) + " Value Estimate | Eigenvalue:" + str(w_g[sample + offset])) plt.savefig(LOG_PATH + "/option" + str(sample) + ".png") plt.close() #Plotting the first couple samples with random enemy positions: #Saving the different options. to log: np.savez_compressed(MODEL_PATH_ + "options.npz", options=np.stack(options)) self.options = options # Creating nested Method that will be updated. network = NetworkBuilder(networkConfig=settings["NetworkConfig"], netConfigOverride=netConfigOverride, actionSize=N) Method = GetFunction(settings["SubMethod"]) self.nestedMethod = Method(sess, settings, netConfigOverride, stateShape=stateShape, actionSize=N, nTrajs=nTrajs)
def ApeXWorkers(sess, settings, netConfigOverride): EXP_NAME = settings["RunName"] LoadName = settings["LoadName"] MODEL_PATH = './models/' + LoadName + '/' IMAGE_PATH = './images/SF/' + EXP_NAME + '/' MODEL_PATH_ = './models/' + EXP_NAME + '/' LOG_PATH = './logs/CTF_1v1/' + EXP_NAME CreatePath(LOG_PATH) CreatePath(IMAGE_PATH) CreatePath(MODEL_PATH) CreatePath(MODEL_PATH_) self.sess = sess N = settings["NumOptions"] with open("configs/environment/" + settings["EnvConfig"]) as json_file: envSettings = json.load(json_file) env, dFeatures, nActions, nTrajs = CreateEnvironment(envSettings) #Create the Q Maps if "LoadQMaps" in settings: #Loading the Q-tables for the sub-policies loadedData = np.load('./models/' + settings["LoadQMaps"] + '/options.npz') opt = loadedData["options"] options = [] for i in range(opt.shape[0]): options.append(opt[i, :, :, :, :]) else: if "LoadSamples" in settings: pass else: #Creating Instance of environment and running through it to generate samples def GetAction(state): """ Contains the code to run the network based on an input. """ p = 1 / nActions if len(state.shape) == 3: probs = np.full((1, nActions), p) else: probs = np.full((state.shape[0], nActions), p) actions = np.array([ np.random.choice(probs.shape[1], p=prob / sum(prob)) for prob in probs ]) return actions s = [] for i in range(settings["SampleEpisodes"]): s0 = env.reset() for j in range(settings["MAX_EP_STEPS"] + 1): a = GetAction(state=s0) s1, r, done, _ = env.step(a) if arreq_in_list(s0, s): pass else: s.append(s0) s0 = s1 if done: break #Creating and smoothing Q Maps def ConstructSamples(env, position2): grid = env.get_obs_blue locX, locY = np.unravel_index(np.argmax(grid[:, :, 4], axis=None), grid[:, :, 0].shape) locX2, locY2 = np.unravel_index( np.argmin(grid[:, :, 4], axis=None), grid[:, :, 0].shape) #Removing the agent grid[locX, locY, 4] = 0 grid[locX2, locY2, 4] = 0 stacked_grids = np.repeat(np.expand_dims(grid, 0), grid.shape[0] * grid.shape[1], 0) for i in range(stacked_grids.shape[1]): for j in range(stacked_grids.shape[2]): stacked_grids[i * stacked_grids.shape[2] + j, stacked_grids.shape[2] - i - 1, j, 4] = 5 stacked_grids[:, position2[0], position2[1], 4] = -5 return stacked_grids def SmoothOption(option_, gamma=0.9): # option[option<0.0] = 0.0 #Create the Adjacency Matric v_option = np.full( (dFeatures[0], dFeatures[1], dFeatures[0], dFeatures[1]), 0, dtype=np.float32) for i2, j2 in itertools.product(range(dFeatures[0]), range(dFeatures[1])): option = option_[:, :, i2, j2] states_ = {} count = 0 for i in range(option.shape[0]): for j in range(option.shape[1]): if option[i, j] != 0: states_[count] = [i, j] # states_.append([count, [i,j]]) count += 1 states = len(states_.keys()) x = np.zeros((states, states)) for i in range(len(states_)): [locx, locy] = states_[i] sum = 0 for j in range(len(states_)): if states_[j] == [locx + 1, locy]: x[i, j] = 0.25 sum += 0.25 if states_[j] == [locx - 1, locy]: x[i, j] = 0.25 sum += 0.25 if states_[j] == [locx, locy + 1]: x[i, j] = 0.25 sum += 0.25 if states_[j] == [locx, locy - 1]: x[i, j] = 0.25 sum += 0.25 x[i, i] = 1.0 - sum #Create W w = np.zeros((states)) for count, loc in states_.items(): w[count] = option[loc[0], loc[1]] # (I-gamma*Q)^-1 I = np.identity(states) psi = np.linalg.inv(I - gamma * x) smoothedOption = np.zeros_like(option, dtype=float) value = np.matmul(psi, w) for j, loc in states_.items(): smoothedOption[loc[0], loc[1]] = value[j] v_option[:, :, i2, j2] = smoothedOption return v_option SF1, SF2, SF3, SF4, SF5 = buildNetwork(settings["SFNetworkConfig"], nActions, {}, scope="Global") SF5.load_weights('./models/' + LoadName + '/' + "model.h5") #Selecting the samples: psi = SF2.predict(np.vstack(s)) # [X,SF Dim] #test for approximate equality (for floating point types) def arreqclose_in_list(myarr, list_arrays): return next((True for elem in list_arrays if elem.size == myarr.size and np.allclose(elem, myarr, atol=1E-6)), False) if settings["Selection"] == "First": samples = [] points = [] i = 0 while len(samples) < settings["TotalSamples"]: if not arreqclose_in_list(psi[i, :], samples): samples.append(psi[i, :]) points.append(i) i += 1 elif settings["Selection"] == "Random": samples = [] points = [] while len(samples) < settings["TotalSamples"]: idx = randint(1, psi.shape[0]) if not arreqclose_in_list(psi[idx, :], samples): samples.append(psi[idx, :]) points.append(idx) elif settings["Selection"] == "Hull_pca": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=4) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v2 points = SampleSelection_v2(pca_result, settings["TotalSamples"], returnIndicies=True) elif settings["Selection"] == "Hull_tsne": #PCA Decomp to dimension: import pandas as pd from sklearn.manifold import TSNE feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) tsne = TSNE(n_components=3, verbose=1, perplexity=10, n_iter=1000) tsne_results = tsne.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v2 points = SampleSelection_v2(tsne_results, settings["TotalSamples"], returnIndicies=True) elif settings["Selection"] == "Hull_cluster": #PCA Decomp to dimension: import pandas as pd from sklearn.decomposition import PCA feat_cols = ['pixel' + str(i) for i in range(psi.shape[1])] df = pd.DataFrame(psi, columns=feat_cols) np.random.seed(42) rndperm = np.random.permutation(df.shape[0]) pca = PCA(n_components=4) pca_result = pca.fit_transform(df[feat_cols].values) from SampleSelection import SampleSelection_v3 points = SampleSelection_v3(pca_result, settings["TotalSamples"], returnIndicies=True) else: print("Invalid Method selected") exit() psiSamples = [] for point in points: psiSamples.append(psi[point, :]) while len(psiSamples) < len(psiSamples[0]): psiSamples.extend(psiSamples) samps = np.stack(psiSamples) samps2 = samps[0:samps.shape[1], :] w_g, v_g = np.linalg.eig(samps2) # print("here") dim = samps2.shape[1] #Creating Sub-policies offset = 0 options = [] for sample in range(int(N / 2)): print("Creating Option", sample) v_option = np.full( (dFeatures[0], dFeatures[1], dFeatures[0], dFeatures[1]), 0, dtype=np.float32) for i2, j2 in itertools.product(range(dFeatures[0]), range(dFeatures[1])): if sample + offset >= dim: continue grids = ConstructSamples(env, [i2, j2]) phi = SF3.predict(grids) v_option[:, :, i2, j2] = np.real( np.matmul(phi, v_g[:, sample + offset])).reshape( [dFeatures[0], dFeatures[1]]) if np.iscomplex(w_g[sample + offset]): offset += 1 print("Smoothing Option") v_option_ = SmoothOption(v_option) options.append(v_option_) options.append(-v_option_) #Plotting the first couple samples with random enemy positions: v_map = v_option_[:, :, 10, 10] imgplot = plt.imshow(v_map) plt.title(" Option " + str(sample) + " Value Estimate | Eigenvalue:" + str(w_g[sample + offset])) plt.savefig(IMAGE_PATH + "/option" + str(sample) + "_" + str(1) + ".png") plt.close() v_map = v_option_[:, :, 10, 17] imgplot = plt.imshow(v_map) plt.title(" Option " + str(sample) + " Value Estimate | Eigenvalue:" + str(w_g[sample + offset])) plt.savefig(IMAGE_PATH + "/option" + str(sample) + "_" + str(2) + ".png") plt.close() v_map = v_option_[:, :, 17, 10] imgplot = plt.imshow(v_map) plt.title(" Option " + str(sample) + " Value Estimate | Eigenvalue:" + str(w_g[sample + offset])) plt.savefig(IMAGE_PATH + "/option" + str(sample) + "_" + str(3) + ".png") plt.close() v_map = v_option_[:, :, 10, 2] imgplot = plt.imshow(v_map) plt.title(" Option " + str(sample) + " Value Estimate | Eigenvalue:" + str(w_g[sample + offset])) plt.savefig(IMAGE_PATH + "/option" + str(sample) + "_" + str(4) + ".png") plt.close() v_map = v_option_[:, :, 2, 10] imgplot = plt.imshow(v_map) plt.title(" Option " + str(sample) + " Value Estimate | Eigenvalue:" + str(w_g[sample + offset])) plt.savefig(IMAGE_PATH + "/option" + str(sample) + "_" + str(5) + ".png") plt.close() #Saving the different options. to log: np.savez_compressed(MODEL_PATH_ + "options.npz", options=np.stack(options)) self.options = options EXP_NAME = settings["RunName"] MODEL_PATH = './models/' + EXP_NAME LOG_PATH = './logs/' + EXP_NAME CreatePath(LOG_PATH) CreatePath(MODEL_PATH) with open("configs/environment/" + settings["EnvConfig"]) as json_file: envSettings = json.load(json_file) progbar = tf.keras.utils.Progbar(None, unit_name='Training', stateful_metrics=["Reward"]) writer = tf.summary.FileWriter(LOG_PATH, graph=sess.graph) global_step = tf.Variable(0, trainable=False, name='global_step') global_step_next = tf.assign_add(global_step, 1) workers = [] sharedBuffer = ApexBuffer() _, dFeatures, nActions, nTrajs = CreateEnvironment(envSettings, multiprocessing=1) network = NetworkBuilder(settings["NetworkConfig"], netConfigOverride, scope="Global", actionSize=nActions) targetNetwork = NetworkBuilder(settings["NetworkConfig"], netConfigOverride, scope="target", actionSize=nActions) Updater = ApeX(network, sess, stateShape=dFeatures, actionSize=nActions, scope="Global", HPs=settings["NetworkHPs"], sharedBuffer=sharedBuffer, targetNetwork=targetNetwork) Updater.Model.summary() saver = tf.train.Saver(max_to_keep=3, var_list=Updater.getVars + [global_step]) Updater.InitializeVariablesFromFile(saver, MODEL_PATH) workers.append( WorkerLearner(Updater, sess, global_step, global_step_next, settings, progbar, writer, MODEL_PATH, saver)) i_name = "prioritizer" network = NetworkBuilder(settings["NetworkConfig"], netConfigOverride, scope=i_name, actionSize=nActions) localNetwork = ApeX(network, sess, stateShape=dFeatures, actionSize=nActions, scope=i_name, HPs=settings["NetworkHPs"], globalAC=Updater, nTrajs=nTrajs, sharedBuffer=sharedBuffer) localNetwork.InitializeVariablesFromFile(saver, MODEL_PATH) workers.append(WorkerPrioritizer(localNetwork, sess, global_step, settings)) # Create workers for i in range(settings["NumberENV"]): i_name = 'W_%i' % i # worker name network = NetworkBuilder(settings["NetworkConfig"], netConfigOverride, scope=i_name, actionSize=nActions) localNetwork = ApeX(network, sess, stateShape=dFeatures, actionSize=nActions, scope=i_name, HPs=settings["NetworkHPs"], globalAC=Updater, nTrajs=nTrajs, sharedBuffer=sharedBuffer) localNetwork.InitializeVariablesFromFile(saver, MODEL_PATH) env, _, _, _ = CreateEnvironment(envSettings, multiprocessing=1) workers.append( WorkerActor(localNetwork, env, sess, global_step, global_step_next, settings, progbar, writer, MODEL_PATH, saver)) return workers