def main(): # hyper parameters env = NavigationTask() input_size = np.shape(env.getStateRep(True)) hparams = {'input_size': input_size, 'num_actions': 10, 'epiode_length': 7} # environment params eparams = {'num_batches': 100, 'ep_per_batch': 128} numIts = 5 print("Starting Policy Gradient") for i in range(numIts): print("######################") print("Try Number: ", i) with tf.Graph().as_default(), tf.Session() as sess: pi = SimplePolicy(hparams['input_size'], hparams['num_actions']) sess.run(tf.initialize_all_variables()) for batch in range(0, eparams['num_batches']): num = 0 total = 0 for i in range(0, eparams['ep_per_batch']): obs, acts, rews = policyRollout(pi, hparams) num += 1 if 1 in rews else 0 total += 1 pi.train_step(obs, acts, rews) if batch % 50 == 0: _, loss = pi.train_step(obs, acts, rews) print("Accuracy ", batch, " : ", str(num / total))
def generateTask(px, py, orien, gx, gy): direction = NavigationTask.oriens[orien] gs = np.array([gx, gy]) env = NavigationTask( agent_start_pos=[np.array([px, py]), direction], goal_pos=gs) return env
def policyRollout(agent, hparams): #"Runs one episode" episode_length = hparams['epiode_length'] env = NavigationTask() obs, acts, rews = [], [], [] for i in range(0, episode_length): state = env.getStateRep(True) obs.append(state) actionProb, sampleAction = agent.act_inference(state) action = actionProb.argmax() sampleActionIndex = sampleAction.argmax() env.performAction(action) newState = env.getStateRep() reward = env.getReward() values = [action] acts.append(np.squeeze(np.eye(hparams['num_actions'])[values])) rews.append(reward) return obs, acts, rews
def mainQ(): ts = "navigation-data-train-sequence-singularDiscrete.pickle" vs = "navigation-data-test-sequence-singularDiscrete.pickle" exampleEnv = NavigationTask() #trainSeqs = SingDiscSeqData(ts,exampleEnv) validSeqs = SingDiscSeqData(vs, exampleEnv) f = ForwardModelLSTM_SD(exampleEnv)
def main(): ts = "navigation-data-state_to_reward-train.pickle" vs = "navigation-data-state_to_reward-valid.pickle" ############ print('Reading Data') with open(ts,'rb') as inFile: print('\tReading',ts); trainSet = pickle.load(inFile) with open(vs,'rb') as inFile: print('\tReading',vs); validSet = pickle.load(inFile) env = NavigationTask() greedyvp = GreedyValuePredictor(env) greedyvp.train( trainSet, validSet) def generateTask(px,py,orien,gx,gy): direction = NavigationTask.oriens[orien] gs = np.array([gx, gy]) env = NavigationTask(agent_start_pos=[np.array([px,py]), direction],goal_pos=gs) return env env = generateTask(0,1,2,3,2) state = avar( torch.FloatTensor(env.getStateRep()), requires_grad=False).view(1,-1) print(state.shape) greedyvp.forward(state).data.numpy() torch.save(greedyvp.state_dict(), "greedy_value_predictor")
def main(): f_model_name = 'LSTM_FM_1_99' gvp_model_name = "greedy_value_predictor_3" numRepeats = 5 tasks = [[6, generateTask(0, 0, 0, 12, 10)]] exampleEnv = NavigationTask() ForwardModel = LSTMForwardModel(74, 64) ForwardModel.load_state_dict(torch.load(f_model_name)) GreedyVP = GreedyValuePredictor(exampleEnv) GreedyVP.load_state_dict(torch.load(gvp_model_name)) print("Running the tasks") for i, task in enumerate(tasks): for j in range(numRepeats): task_state = task[1].getStateRep(oneHotOutput=False) px = int(task_state[0]) py = int(task_state[1]) orien = np.argmax(task_state[2:6]) gx = int(task_state[-2]) gy = int(task_state[-1]) print("$$###############################") print("Repeat " + str(j) + " for " + str(gx) + " , " + str(gy)) #print('www',px,py,orien,gx,gy) cenv = generateTask(px, py, orien, gx, gy) SimPolicy = SimulationPolicy(cenv) SimPolicy.trainSad(ForwardModel, GreedyVP, maxDepth=task[0], niters=2000) s_0 = torch.unsqueeze(avar(torch.FloatTensor(cenv.getStateRep())), dim=0) tree = Tree(s_0, ForwardModel, SimPolicy, greedy_valueF, cenv, task[0], 2) states, actions = tree.getBestPlan() for i in range(len(actions)): cenv.performAction(actions[i][0].data.numpy().argmax()) r = cenv.getReward() correct = (r == 1) #print('Correct?',correct) if correct: print('Correct final state', str(gx), str(gy)) torch.save( SimPolicy.state_dict(), "SimPolicy_solve_" + str(gx) + "_" + str(gy) + "_" + str(j))
def mainOld(): #################################################### useFFANN = True trainingFFANN = False # 1 henaffHyperSearch = False # 4 runHenaffFFANN = False # 5 manualTest = False # 2 #################################################### if len(sys.argv) > 1: if sys.argv[1] == '1': trainingFFANN = True if sys.argv[1] == '2': manualTest = True if sys.argv[1] == '3': autoTest = True if sys.argv[1] == '4': henaffHyperSearch = True if sys.argv[1] == '5': runHenaffFFANN = True if useFFANN: f_model_name = 'forward-ffann-singDisc-noisy-2.pt' exampleEnv = NavigationTask() f = ForwardModelFFANN(exampleEnv) ################################################################################################################ if trainingFFANN: ############ ts = "navigation-data-train-single-singularDiscrete.pickle" vs = "navigation-data-test-single-singularDiscrete.pickle" preload_name = None saveName = 'forward-ffann-singDisc-noisy-3.pt' ############ print('Reading Data') with open(ts, 'rb') as inFile: print('\tReading', ts) trainSet = pickle.load(inFile) with open(vs, 'rb') as inFile: print('\tReading', vs) validSet = pickle.load(inFile) if not preload_name is None: print('Loading from', f_model_name) f.load_state_dict(torch.load(f_model_name)) f.runTraining(trainSet, validSet, maxIters=50000, modelFilenameToSave=saveName, testEvery=100) if manualTest: # 2 print('Loading from', f_model_name) f.load_state_dict(torch.load(f_model_name)) print('Environment states') ### start_px = 7 start_py = 9 start_orien = 1 action = [5, 1, 5] ### cstate = avar( torch.FloatTensor( exampleEnv.singularDiscreteStateFromInts( start_px, start_py, start_orien))).unsqueeze(0) for act in action: action1h = avar( torch.FloatTensor(exampleEnv._intToOneHot( act, 10))).unsqueeze(0) inputVal = torch.cat([cstate, action1h], dim=1) cstate = f.forward(inputVal) print(cstate) print("sx,sy,sorien =", start_px, ',', start_py, ',', start_orien) print("As =", ",".join([NavigationTask.actions[a] for a in action])) print( "px,py,orien =", f.env.singularDiscreteStateToInts( cstate.squeeze(0).data.numpy())) ################################################################################################################ if runHenaffFFANN: # 5 print('Loading from', f_model_name) f.load_state_dict(torch.load(f_model_name)) print('Environment states') start_px = 0 start_py = 0 start_orien = 0 start_state = exampleEnv.singularDiscreteStateFromInts( start_px, start_py, start_orien) goal_state = [0, 2] print('Building planner') planner = HenaffPlanner(f, maxNumActions=2) print('Starting generation') actions = planner.generatePlan( start_state, # The starting state of the agent (one-hot singDisc) goal_state, # The goal state of the agent as two ints (e.g. [gx,gy]) eta=0.01, # The learning rate given to ADAM noiseSigma= None, # Noise strength on inputs. Overwrites the default setting from the init niters= None, # Number of optimization iterations. Overwrites the default setting from the init useCE= False, # Specifies use of the cross-entropy loss, taken over subvectors of the state verbose=False, # Specifies verbosity extraVerbose=False, # Specifies extra verbosity useGumbel= False, # Whether to use Gumbel-Softmax in the action sampling temp=0.01, # The temperature of the Gumbel-Softmax method lambda_h= 0.0 # Specify the strength of entropy regularization (negative values encourage entropy) ) print('START STATE:', start_px, start_py, start_orien) print( 'FINAL ACTIONS:', ", ".join([ str(a) + ' (' + NavigationTask.actions[a] + ')' for a in actions ])) print('GOAL STATE:', goal_state) newEnv = NavigationTask(agent_start_pos=[ np.array([start_px, start_py]), NavigationTask.oriens[start_orien] ], goal_pos=np.array(goal_state)) for action in actions: newEnv.performAction(action) state = newEnv.getStateRep(oneHotOutput=False) pred_x = state[0] pred_y = state[1] pred_orien = NavigationTask.oriens[np.argmax(state[2:6])] print('PREDICTED FINAL STATE:', pred_x, pred_y, pred_orien) ################################################################################################################ if henaffHyperSearch: print('Loading ', f_model_name) f.load_state_dict(torch.load(f_model_name)) ##################### Hyper-params ##################### # lambda_hs = [0.0,0.01,-0.01,0.05,-0.05,0.005,-0.005] # Entropy strength # etas = [0.5,0.25,0.1,0.05,0.025,0.01,0.005,0.001,0.0005] # Learning rate # useGumbels = [True,False] # Whether to use Gumbel-softmax # temperatures = [0.1,0.01,0.001,1.0] # Temperature for Gumbel-softmax # noiseSigmas = [0.0,0.01,0.02,0.05,0.1,0.25,0.5,0.75,1.0,1.25] # Noise strength on input ## Init try # lambda_hs = [0.0,0.005,-0.005] # Entropy strength # etas = [0.5,0.25,0.1,0.05,0.025,0.01,0.005,0.001,0.0005] # Learning rate # useGumbels = [True,False] # Whether to use Gumbel-softmax # temperatures = [0.1,0.01,0.001] # Temperature for Gumbel-softmax # noiseSigmas = [0.0,0.05,0.1,0.5,1.0] # Noise strength on input ## Only use ones with decent results lambda_hs = [0.0, -0.005] # Entropy strength etas = [0.5, 0.25, 0.1, 0.005] # Learning rate useGumbels = [True, False] # Whether to use Gumbel-softmax temperatures = [0.1, 0.001] # Temperature for Gumbel-softmax noiseSigmas = [0.5, 1.0] # Noise strength on input ######################################################## ###### Settings ###### niters = 75 verbose = False extraVerbose = False numRepeats = 10 fileToWriteTo = 'hyper-param-results.txt' # Set to None to do no writing distType = 1 # 0 = MSE, 1 = CE, 2 = dist ###################### # Build an env with the given INT inputs def generateTask(px, py, orien, gx, gy): direction = NavigationTask.oriens[orien] gs = np.array([gx, gy]) env = NavigationTask( agent_start_pos=[np.array([px, py]), direction], goal_pos=gs) return env # Function for running a single suite of tests (on one hyper-param set) def runTests(lh, eta, noiseLevel, ug, cnum, temp=None, distType=0): # Define tasks tasks = [[1, generateTask(0, 0, 0, 0, 2)], [1, generateTask(5, 5, 1, 8, 5)], [1, generateTask(3, 2, 2, 3, 0)], [1, generateTask(9, 9, 3, 7, 9)], [2, generateTask(0, 0, 0, 0, 6)], [2, generateTask(0, 0, 0, 0, 8)], [2, generateTask(2, 3, 0, 2, 8)], [2, generateTask(0, 0, 0, 0, 10)], [3, generateTask(1, 1, 0, 2, 2)]] # Choose dist type if distType == 0: useCE = False intDist = False elif distType == 1: useCE = True intDist = False elif distType == 2: useCE = False intDist = True # Display status wstring = cnum + ',lambda_h=' + str(lh) + ',eta=' + str( eta) + ',sigma=' + str(noiseLevel) + ',dType=' + str( distType) + ',ug=' + str(ug) if ug: wstring += ',temp=' + str(temp) # For each tasks, repeated a few times, attempt to solve the problem score, tot = 0, 0 for i, task in enumerate(tasks): #print(i) for _ in range(numRepeats): planner = HenaffPlanner(f, maxNumActions=task[0]) cenv = task[1] actions = planner.generatePlan( cenv.getStateRep(oneHotOutput=True), eta=eta, noiseSigma=noiseLevel, niters=niters, goal_state=None, useCE=True, verbose=verbose, extraVerbose=extraVerbose, useGumbel=ug, temp=temp, lambda_h=lh, useIntDistance=intDist) # Check for correctness for a in actions: cenv.performAction(a) r = cenv.getReward() correct = (r == 1) tot += 1 if correct: score += 1 wstring += ' -> Score:' + str(score) + '/' + str(tot) print(wstring) # Write output if not fileToWriteTo is None: with open(fileToWriteTo, 'a') as filehandle: filehandle.write(wstring + '\n') # Run tasks over all hyper-parameter settings N_p, cp = len(lambda_hs) * len(etas) * len(noiseSigmas) * ( 1 + len(temperatures)), 1 for lambda_h in lambda_hs: for eta in etas: for noiseLevel in noiseSigmas: for ug in useGumbels: if ug: for temp in temperatures: ps = str(cp) + '/' + str(N_p) runTests(lambda_h, eta, noiseLevel, ug, ps, temp, distType=distType) cp += 1 else: ps = str(cp) + '/' + str(N_p) runTests(lambda_h, eta, noiseLevel, ug, ps, distType=distType) cp += 1
def main(): ####### Settings ####### preloadModel = True runTraining = True # Task 1 testHenaff = False # Task 2 testFM = False # Task 3 ######################## ############################ External Files ############################ ts = "navigation-data-train-sequence-singularDiscrete.pickle" vs = "navigation-data-test-sequence-singularDiscrete.pickle" f_model_name_to_preload = 'forward-lstm-singDisc-TF0-ns-7.pt' f_model_name_to_save = 'forward-lstm-singDisc-TF0-ns-9.pt' # For training ######################################################################## # Define shell environment and empty forward model exampleEnv = NavigationTask() f = ForwardModelLSTM_SD(exampleEnv) # Preload the forward model, if wanted if preloadModel and not f_model_name_to_preload is None: f.load_state_dict(torch.load(f_model_name_to_preload)) # Run training if desired if runTraining: trainSeqs = SingDiscSeqData(ts, exampleEnv) validSeqs = SingDiscSeqData(vs, exampleEnv) f.runTraining( trainSeqs, validSeqs, modelFilenameToSave=f_model_name_to_save, noiseSigma=0.01 # Note: does nothing ) if testFM: # Start Location start_px = 1 start_py = 1 start_orien = 0 start_state = exampleEnv.singularDiscreteStateFromInts( start_px, start_py, start_orien) # Actions actions = np.zeros((5, 10)) actions[0, 8] = 1.0 actions[1, 7] = 1.0 actions[2, 2] = 1.0 actions[3, 7] = 1.0 actions[4, 8] = 1.0 actions = avar(torch.FloatTensor(actions)) #print(actions) #print(actions[0]) # Get Prediction start_state = avar(torch.FloatTensor(start_state)) outputs, hidden = f.runOnActionSequence(start_state.unsqueeze(0), actions, hidden=None) finalOutput = outputs[-1] print(finalOutput.max()) print('Pred final state', f.env.singularDiscreteStateToInts(finalOutput.data.numpy()[0])) # if testHenaff: print('Environment states') start_px = 0 start_py = 0 start_orien = 0 start_state = exampleEnv.singularDiscreteStateFromInts( start_px, start_py, start_orien) goal_state = [0, 2] maxNumActions = 2 print('Building planner') planner = HenaffPlanner(f, maxNumActions=maxNumActions) print('Starting generation') actions = planner.generatePlan( start_state, # The starting state of the agent (one-hot singDisc) goal_state, # The goal state of the agent as two ints (e.g. [gx,gy]) eta=0.01, # The learning rate given to ADAM noiseSigma= 0.25, # Noise strength on inputs. Overwrites the default setting from the init niters= 100, # Number of optimization iterations. Overwrites the default setting from the init useGumbel= True, # Whether to use Gumbel-Softmax in the action sampling temp=1.0, # The temperature of the Gumbel-Softmax method lambda_h= 0.0 # Specify the strength of entropy regularization (negative values encourage entropy) ) print('START STATE:', start_px, start_py, start_orien) print( 'FINAL ACTIONS:', ", ".join([ str(a) + ' (' + NavigationTask.actions[a] + ')' for a in actions ])) print('GOAL STATE:', goal_state) newEnv = NavigationTask(agent_start_pos=[ np.array([start_px, start_py]), NavigationTask.oriens[start_orien] ], goal_pos=np.array(goal_state)) for action in actions: newEnv.performAction(action) state = newEnv.getStateRep(oneHotOutput=False) pred_x = state[0] pred_y = state[1] pred_orien = NavigationTask.oriens[np.argmax(state[2:6])] print('PREDICTED FINAL STATE:', pred_x, pred_y, pred_orien)
def navmain(): env = NavigationTask() #(stochasticity=0.2) state_i = env.getStateRep() #get goal state state_f = env.getStateRep() inds = np.cumsum([0, env.w, env.h, len(env.oriens), env.w, env.h]) state_f[inds[0]:inds[1]] = env._intToOneHot(env.goal_pos[0], env.w) state_f[inds[1]:inds[2]] = env._intToOneHot(env.goal_pos[1], env.h) state_i = env.getStateRep() #get initial state #we want the goal state so replce the first position vector with the goal position vector with tf.Graph().as_default(), tf.Session() as sess: numActions = 2 nIters = 100 hp = Henaff_Planning(numActions, 10, 64, nIters, 0.0000001) #initialize hennaff planning method init = tf.variables_initializer(hp.trainable_vars) sess.run(init) print(state_i, state_f) action_sequence = hp.optimize(state_i, state_f, env) #convert action sequence to [num_action,] action numer ids action_sequence = np.argmax(action_sequence, 1) action_sequence = np.reshape(action_sequence, [ len(action_sequence), ]) for action in action_sequence: print('\n') print('-Initial State-') env.display() print('-Action Taken-') env.performAction(action) print(env.actions[action]) print('-Resultant State-') env.display()
print('--') # Return the final action sequence return [x.max(0)[1].data[0] for x in x_t] # Build an env with the given INT inputs def generateTask(px, py, orien, gx, gy): direction = NavigationTask.oriens[orien] gs = np.array([gx, gy]) env = NavigationTask(agent_start_pos=[np.array([px, py]), direction], goal_pos=gs) return env # Function for running a single suite of tests (on one hyper-param set) env = NavigationTask() def runTests(lh, eta, noiseLevel, ug, cnum, temp=None, distType=0, difficulty='Hard', tasks=None, verbose=False, extraVerbose=False): numRepeats = 5
def main(): #################################################### useFFANN = True trainingFFANN = False # 1 manualTest = False # 2 autoTest = False # 3 henaffHyperSearch = False # 4 runHenaffFFANN = False # 5 #################################################### print(sys.argv) if len(sys.argv) > 1: if sys.argv[1] == '1': trainingFFANN = True if sys.argv[1] == '2': manualTest = True if sys.argv[1] == '3': autoTest = True if sys.argv[1] == '4': henaffHyperSearch = True if sys.argv[1] == '5': runHenaffFFANN = True if useFFANN: f_model_name = 'forward-ffann-noisy-wan-1.pt' # 6 gets 99% on 0.1% noise exampleEnv = NavigationTask() f = ForwardModelFFANN(exampleEnv) ################################################################################################################ if trainingFFANN: ############ ts = "navigation-data-train-single-small.pickle" vs = "navigation-data-test-single-small.pickle" tsx_noisy = "noisier-actNoise-navigation-data-single.pickle" preload_name = f_model_name saveName = 'forward-ffann-noisy-wan-2.pt' ############ print('Reading Data') with open(ts,'rb') as inFile: print('\tReading',ts); trainSet = pickle.load(inFile) with open(vs,'rb') as inFile: print('\tReading',vs); validSet = pickle.load(inFile) if not preload_name is None: print('Loading from',f_model_name) f.load_state_dict( torch.load(f_model_name) ) f.train(trainSet,validSet,noisyDataSetTxLoc=tsx_noisy,f_model_name=saveName) print('Saving to',saveName) torch.save(f.state_dict(), saveName) ################################################################################################################ elif manualTest: ### #f_model_name = 'forward-ffann-noisy6.pt' ### f.load_state_dict( torch.load(f_model_name) ) start = np.zeros(74, dtype=np.float32) start[0+4] = 1 start[15+6] = 1 start[15+15+0] = 1 start[15+15+4+8] = 1 start[15+15+4+15+7] = 1 start[15+15+4+15+15+4] = 1.0 f.test(start) print('-----\n','Starting manualTest loop') for i in range(5): width, height = 15, 15 p_0 = np.array([npr.randint(0,width),npr.randint(0,height)]) start_pos = [p_0, r.choice(NavigationTask.oriens)] goal_pos = np.array([ npr.randint(0,width), npr.randint(0,height) ]) checkEnv = NavigationTask( width=width, height=height, agent_start_pos=start_pos, goal_pos=goal_pos, track_history=True, stochasticity=0.0, maxSteps=10) s_0 = checkEnv.getStateRep() #a1, a2 = np.zeros(10), np.zeros(10) #a1[ npr.randint(0,10) ] = 1 #a2[ npr.randint(0,10) ] = 1 numActions = 3 currState = avar( torch.FloatTensor(s_0).unsqueeze(0) ) print('Start State') f.printState( currState[0] ) actionSet = [] for j in range(numActions): action = np.zeros( 10 ) action[ npr.randint(0,10) ] = 1 action += npr.randn( 10 )*0.1 action = Utils.softmax( action ) print('\tSoft Noisy Action ',j,'=',action) #### Apply Gumbel Softmax #### temperature = 0.01 logProbAction = torch.log( avar(torch.FloatTensor(action)) ) actiong = GumbelSoftmax.gumbel_softmax(logProbAction, temperature) ############################## print('\tGumbel Action ',j,'=',actiong.data.numpy()) actionSet.append( actiong ) checkEnv.performAction( np.argmax(action) ) a = actiong # avar( torch.FloatTensor(actiong) ) currState = f.forward( torch.cat([currState[0],a]).unsqueeze(0) ) print("Intermediate State",j) f.printState( currState[0] ) #checkEnv.performAction(np.argmax(a1)) #checkEnv.performAction(np.argmax(a2)) s_1 = checkEnv.getStateRep() #inval = np.concatenate( (s_0,a1) ) #outval1 = f.forward( avar(torch.FloatTensor(inval).unsqueeze(0)) ) #print(outval1.shape) #print(a2.shape) #inval2 = np.concatenate( (outval1[0].data.numpy(),a2) ) #outval2 = f.forward( avar(torch.FloatTensor(inval2).unsqueeze(0)) ) for action in actionSet: f.printAction(action) print('Predicted') f.printState( currState[0] ) print('Actual') s1 = avar( torch.FloatTensor( s_1 ).unsqueeze(0) ) f.printState( s1[0] ) print("Rough accuracy", torch.sum( (currState - s1).pow(2) ).data[0] ) #print('Predicted',currState.data[0].numpy()) #print('Actual',s_1) #outval1 = f.test(inval,s_1) print('----\n') if autoTest: print('Loading from',f_model_name) f.load_state_dict( torch.load(f_model_name) ) # TODO ################################################################################################################ if runHenaffFFANN: print('Loading from',f_model_name) f.load_state_dict( torch.load(f_model_name) ) start = np.zeros(64) start[0] = 1 start[15] = 1 start[15+15] = 1 start[15+15+4+0] = 1 start[15+15+4+15+4] = 1 print(f.env.deconcatenateOneHotStateVector(start)) print('Building planner') planner = HenaffPlanner(f,maxNumActions=2) print('Starting generation') actions = planner.generatePlan( start, eta=0.1, noiseSigma=0.5, niters=500, goal_state=None, useCE=True, verbose=True, extraVerbose=False, useGumbel=True, temp=0.1, lambda_h=-0.005, useIntDistance=False ) print('FINAL ACTIONS:', actions) ################################################################################################################ if henaffHyperSearch: print('Loading ',f_model_name) f.load_state_dict( torch.load(f_model_name) ) ##################### Hyper-params ##################### # lambda_hs = [0.0,0.01,-0.01,0.05,-0.05,0.005,-0.005] # Entropy strength # etas = [0.5,0.25,0.1,0.05,0.025,0.01,0.005,0.001,0.0005] # Learning rate # useGumbels = [True,False] # Whether to use Gumbel-softmax # temperatures = [0.1,0.01,0.001,1.0] # Temperature for Gumbel-softmax # noiseSigmas = [0.0,0.01,0.02,0.05,0.1,0.25,0.5,0.75,1.0,1.25] # Noise strength on input ## Init try # lambda_hs = [0.0,0.005,-0.005] # Entropy strength # etas = [0.5,0.25,0.1,0.05,0.025,0.01,0.005,0.001,0.0005] # Learning rate # useGumbels = [True,False] # Whether to use Gumbel-softmax # temperatures = [0.1,0.01,0.001] # Temperature for Gumbel-softmax # noiseSigmas = [0.0,0.05,0.1,0.5,1.0] # Noise strength on input ## Only use ones with decent results lambda_hs = [0.0,-0.005] # Entropy strength etas = [0.5,0.25,0.1,0.005] # Learning rate useGumbels = [True,False] # Whether to use Gumbel-softmax temperatures = [0.1,0.001] # Temperature for Gumbel-softmax noiseSigmas = [0.5,1.0] # Noise strength on input ######################################################## ###### Settings ###### niters = 75 verbose = False extraVerbose = False numRepeats = 10 fileToWriteTo = 'hyper-param-results.txt' # Set to None to do no writing distType = 1 # 0 = MSE, 1 = CE, 2 = dist ###################### # Build an env with the given INT inputs def generateTask(px,py,orien,gx,gy): direction = NavigationTask.oriens[orien] gs = np.array([gx, gy]) env = NavigationTask(agent_start_pos=[np.array([px,py]), direction],goal_pos=gs) return env # Function for running a single suite of tests (on one hyper-param set) def runTests(lh,eta,noiseLevel,ug,cnum,temp=None,distType=0): # Define tasks tasks = [ [1, generateTask(0,0,0,0,2)], [1, generateTask(5,5,1,8,5)], [1, generateTask(3,2,2,3,0)], [1, generateTask(9,9,3,7,9)], [2, generateTask(0,0,0,0,6)], [2, generateTask(0,0,0,0,8)], [2, generateTask(2,3,0,2,8)], [2, generateTask(0,0,0,0,10)], [3, generateTask(1,1,0,2,2)] ] # Choose dist type if distType == 0: useCE = False; intDist = False elif distType == 1: useCE = True; intDist = False elif distType == 2: useCE = False; intDist = True # Display status wstring = cnum + ',lambda_h=' + str(lh) + ',eta=' + str(eta) + ',sigma=' + str(noiseLevel) + ',dType=' + str(distType) + ',ug=' + str(ug) if ug: wstring += ',temp=' + str(temp) # For each tasks, repeated a few times, attempt to solve the problem score, tot = 0, 0 for i, task in enumerate(tasks): #print(i) for _ in range(numRepeats): planner = HenaffPlanner(f,maxNumActions=task[0]) cenv = task[1] actions = planner.generatePlan( cenv.getStateRep(oneHotOutput=True), eta=eta, noiseSigma=noiseLevel, niters=niters, goal_state=None, useCE=True, verbose=verbose, extraVerbose=extraVerbose, useGumbel=ug, temp=temp, lambda_h=lh, useIntDistance=intDist ) # Check for correctness for a in actions: cenv.performAction( a ) r = cenv.getReward() correct = (r==1) tot += 1 if correct: score += 1 wstring += ' -> Score:' + str(score) + '/' + str(tot) print(wstring) # Write output if not fileToWriteTo is None: with open(fileToWriteTo,'a') as filehandle: filehandle.write( wstring + '\n' ) # Run tasks over all hyper-parameter settings N_p, cp = len(lambda_hs)*len(etas)*len(noiseSigmas)*(1 + len(temperatures)), 1 for lambda_h in lambda_hs: for eta in etas: for noiseLevel in noiseSigmas: for ug in useGumbels: if ug: for temp in temperatures: ps = str(cp) + '/' + str(N_p) runTests(lambda_h,eta,noiseLevel,ug,ps,temp,distType=distType) cp += 1 else: ps = str(cp) + '/' + str(N_p) runTests(lambda_h,eta,noiseLevel,ug,ps,distType=distType) cp += 1
def main(): #################################################### trainingLSTM = False overwrite = False runHenaff = False testFM = False ### useFFANN = True trainingFFANN = False #################################################### if useFFANN: f_model_name = 'forward-ffann-stochastic.pt' exampleEnv = NavigationTask() f = ForwardModelFFANN(exampleEnv) if trainingFFANN: ts = "navigation-data-train-single-small.pickle" vs = "navigation-data-test-single-small.pickle" print('Reading Data') with open(ts, 'rb') as inFile: print('\tReading', ts) trainSet = pickle.load(inFile) with open(vs, 'rb') as inFile: print('\tReading', vs) validSet = pickle.load(inFile) f.train(trainSet, validSet) print('Saving to', f_model_name) torch.save(f.state_dict(), f_model_name) else: f.load_state_dict(torch.load(f_model_name)) start = np.zeros(74, dtype=np.float32) start[0 + 4] = 1 start[15 + 6] = 1 start[15 + 15 + 0] = 1 start[15 + 15 + 4 + 8] = 1 start[15 + 15 + 4 + 15 + 7] = 1 start[15 + 15 + 4 + 15 + 15 + 4] = 1.0 f.test(start) for i in range(10): width, height = 15, 15 p_0 = np.array([npr.randint(0, width), npr.randint(0, height)]) start_pos = [p_0, r.choice(NavigationTask.oriens)] goal_pos = np.array( [npr.randint(0, width), npr.randint(0, height)]) checkEnv = NavigationTask(width=width, height=height, agent_start_pos=start_pos, goal_pos=goal_pos, track_history=True, stochasticity=0.0, maxSteps=10) s_0 = checkEnv.getStateRep() a = np.zeros(10) a[npr.randint(0, 10)] = 1 inval = np.concatenate((s_0, a)) checkEnv.performAction(np.argmax(a)) s_1 = checkEnv.getStateRep() f.test(inval, s_1) print('----') else: f_model_name = 'forward-lstm-stochastic.pt' s = 'navigation' # 'transport' trainf, validf = s + "-data-train-small.pickle", s + "-data-test-small.pickle" print('Reading Data') train, valid = SeqData(trainf), SeqData(validf) f = ForwardModelLSTM(train.lenOfInput, train.lenOfState) if trainingLSTM: if os.path.exists(f_model_name) and not overwrite: print('Loading from', f_model_name) f.load_state_dict(torch.load(f_model_name)) else: f.train(train, valid) print('Saving to', f_model_name) torch.save(f.state_dict(), f_model_name) print('Q-test') bdata, blabels, _ = valid.next(2000, nopad=True) acc1, _ = f._accuracyBatch(bdata, blabels, valid.env) print(acc1) if runHenaff: print('Loading from', f_model_name) f.load_state_dict(torch.load(f_model_name)) # seq,label = train.randomTrainingPair() # start = seq[0][0:64] # start[63] = 0 # start[63-15] = 0 # start[15+15+4+5] = 1 # start[15+15+4+15+5] = 1 # start start = np.zeros(64) start[0] = 1 start[15] = 1 start[15 + 15] = 1 start[15 + 15 + 4 + 0] = 1 start[15 + 15 + 4 + 15 + 2] = 1 print(train.env.deconcatenateOneHotStateVector(start)) #sys.exit(0) print('Building planner') planner = HenaffPlanner(f) print('Starting generation') planner.generatePlan(start, train.env, niters=150) if testFM: f.load_state_dict(torch.load(f_model_name)) start = np.zeros(64) start[0 + 2] = 1 start[15 + 3] = 1 start[15 + 15 + 0] = 1 start[15 + 15 + 4 + 5] = 1 start[15 + 15 + 4 + 15 + 5] = 1 action = np.zeros(10) deconRes = train.env.deconcatenateOneHotStateVector(start) print('Start state') print('px', np.argmax(deconRes[0])) print('py', np.argmax(deconRes[1])) print('orien', np.argmax(deconRes[2])) print('gx', np.argmax(deconRes[3])) print('gy', np.argmax(deconRes[4])) action[5] = 1.0 stateAction = [ torch.cat([(torch.FloatTensor(start)), (torch.FloatTensor(action))]) ] #print('SA:',stateAction) #print('Start State') #printState( stateAction[0][0:-10], train.env ) print('Action', NavigationTask.actions[np.argmax(action)]) f.reInitialize() seq = avar(torch.cat(stateAction).view( len(stateAction), 1, -1)) # [seqlen x batchlen x hidden_size] result = f.forward(seq) print('PredState') printState(result, train.env)
def main(): ####### Settings ####### preloadModel = True runTraining = True # Task 1 testHenaff = False # Task 2 testFM = False # Task 3 henaffHyperSearch = False # Task 4 ######################## ############################ External Files ############################ ts = "navigation-data-train-sequence-singularDiscrete.pickle" vs = "navigation-data-test-sequence-singularDiscrete.pickle" f_model_name_to_preload = 'forward-lstm-singDisc-scratch-pt0d5-3.pt' f_model_name_to_save = 'forward-lstm-singDisc-scratch-pt0d5-4.pt' # For training ######################################################################## # Define shell environment and empty forward model exampleEnv = NavigationTask() f = ForwardModelLSTM_SD(exampleEnv) # Preload the forward model, if wanted if preloadModel and not f_model_name_to_preload is None: f.load_state_dict(torch.load(f_model_name_to_preload)) # Run training if desired if runTraining: trainSeqs = SingDiscSeqData(ts, exampleEnv) validSeqs = SingDiscSeqData(vs, exampleEnv) print('Saving to', f_model_name_to_save) f.runTraining(trainSeqs, validSeqs, modelFilenameToSave=f_model_name_to_save) if testFM: # Start Location start_px = 1 start_py = 1 start_orien = 0 start_state = exampleEnv.singularDiscreteStateFromInts( start_px, start_py, start_orien) # Actions actions = np.zeros((5, 10)) actions[0, 8] = 1.0 actions[1, 7] = 1.0 actions[2, 2] = 1.0 actions[3, 7] = 1.0 actions[4, 8] = 1.0 actions = avar(torch.FloatTensor(actions)) #print(actions) #print(actions[0]) # Get Prediction start_state = avar(torch.FloatTensor(start_state)) outputs, hidden = f.runOnActionSequence(start_state.unsqueeze(0), actions, hidden=None) finalOutput = outputs[-1] print(finalOutput.max()) print('Pred final state', f.env.singularDiscreteStateToInts(finalOutput.data.numpy()[0])) # if testHenaff: print('Environment states') start_px = 0 start_py = 0 start_orien = 0 start_state = exampleEnv.singularDiscreteStateFromInts( start_px, start_py, start_orien) goal_state = [0, 2] maxNumActions = 2 print('Building planner') planner = HenaffPlanner(f, maxNumActions=maxNumActions) print('Starting generation') actions = planner.generatePlan( start_state, # The starting state of the agent (one-hot singDisc) goal_state, # The goal state of the agent as two ints (e.g. [gx,gy]) eta=0.01, # The learning rate given to ADAM noiseSigma= 0.25, # Noise strength on inputs. Overwrites the default setting from the init niters= 100, # Number of optimization iterations. Overwrites the default setting from the init useGumbel= True, # Whether to use Gumbel-Softmax in the action sampling temp=1.0, # The temperature of the Gumbel-Softmax method lambda_h= 0.0 # Specify the strength of entropy regularization (negative values encourage entropy) ) print('START STATE:', start_px, start_py, start_orien) print( 'FINAL ACTIONS:', ", ".join([ str(a) + ' (' + NavigationTask.actions[a] + ')' for a in actions ])) print('GOAL STATE:', goal_state) newEnv = NavigationTask(agent_start_pos=[ np.array([start_px, start_py]), NavigationTask.oriens[start_orien] ], goal_pos=np.array(goal_state)) for action in actions: newEnv.performAction(action) state = newEnv.getStateRep(oneHotOutput=False) pred_x = state[0] pred_y = state[1] pred_orien = NavigationTask.oriens[np.argmax(state[2:6])] print('PREDICTED FINAL STATE:', pred_x, pred_y, pred_orien) if henaffHyperSearch: print('Loading ', f_model_name) f.load_state_dict(torch.load(f_model_name)) ##################### Hyper-params ##################### lambda_hs = [0.0, -0.005] # Entropy strength etas = [0.5, 0.25, 0.1, 0.005] # Learning rate useGumbels = [True, False] # Whether to use Gumbel-softmax temperatures = [0.1, 0.001] # Temperature for Gumbel-softmax noiseSigmas = [0.5, 1.0] # Noise strength on input ######################################################## ###### Settings ###### niters = 75 verbose = False extraVerbose = False numRepeats = 10 fileToWriteTo = 'hyper-param-results.txt' # Set to None to do no writing distType = 1 # 0 = MSE, 1 = CE, 2 = dist ###################### # Build an env with the given INT inputs def generateTask(px, py, orien, gx, gy): direction = NavigationTask.oriens[orien] gs = np.array([gx, gy]) env = NavigationTask( agent_start_pos=[np.array([px, py]), direction], goal_pos=gs) return env # Function for running a single suite of tests (on one hyper-param set) def runTests(lh, eta, noiseLevel, ug, cnum, temp=None, distType=0): # Define tasks tasks = [[1, generateTask(0, 0, 0, 0, 2)], [1, generateTask(5, 5, 1, 8, 5)], [1, generateTask(3, 2, 2, 3, 0)], [1, generateTask(9, 9, 3, 7, 9)], [2, generateTask(0, 0, 0, 0, 6)], [2, generateTask(0, 0, 0, 0, 8)], [2, generateTask(2, 3, 0, 2, 8)], [2, generateTask(0, 0, 0, 0, 10)], [3, generateTask(1, 1, 0, 2, 2)]] # Choose dist type if distType == 0: useCE = False intDist = False elif distType == 1: useCE = True intDist = False elif distType == 2: useCE = False intDist = True # Display status wstring = cnum + ',lambda_h=' + str(lh) + ',eta=' + str( eta) + ',sigma=' + str(noiseLevel) + ',dType=' + str( distType) + ',ug=' + str(ug) if ug: wstring += ',temp=' + str(temp) # For each tasks, repeated a few times, attempt to solve the problem score, tot = 0, 0 for i, task in enumerate(tasks): #print(i) for _ in range(numRepeats): planner = HenaffPlanner(f, maxNumActions=task[0]) cenv = task[1] actions = planner.generatePlan( cenv.getStateRep(oneHotOutput=True), eta=eta, noiseSigma=noiseLevel, niters=niters, goal_state=None, useCE=True, verbose=verbose, extraVerbose=extraVerbose, useGumbel=ug, temp=temp, lambda_h=lh, useIntDistance=intDist) # Check for correctness for a in actions: cenv.performAction(a) r = cenv.getReward() correct = (r == 1) tot += 1 if correct: score += 1 wstring += ' -> Score:' + str(score) + '/' + str(tot) print(wstring) # Write output if not fileToWriteTo is None: with open(fileToWriteTo, 'a') as filehandle: filehandle.write(wstring + '\n') # Run tasks over all hyper-parameter settings N_p, cp = len(lambda_hs) * len(etas) * len(noiseSigmas) * ( 1 + len(temperatures)), 1 for lambda_h in lambda_hs: for eta in etas: for noiseLevel in noiseSigmas: for ug in useGumbels: if ug: for temp in temperatures: ps = str(cp) + '/' + str(N_p) runTests(lambda_h, eta, noiseLevel, ug, ps, temp, distType=distType) cp += 1 else: ps = str(cp) + '/' + str(N_p) runTests(lambda_h, eta, noiseLevel, ug, ps, distType=distType) cp += 1
def main(): #################################################### trainingLSTM = False overwrite = False runHenaff = False testFM = False ### useFFANN = True trainingFFANN = False manualTest = False autoTest = False henaffHyperSearch = False runHenaffFFANN = True #True #################################################### if useFFANN: f_model_name = 'forward-ffann-noisy-wan-1.pt' # 6 gets 99% on 0.1% noise exampleEnv = NavigationTask() f = ForwardModelFFANN(exampleEnv) if trainingFFANN: ############ ts = "navigation-data-train-single-small.pickle" vs = "navigation-data-test-single-small.pickle" tsx_noisy = "noisier-actNoise-navigation-data-single.pickle" preload_name = f_model_name saveName = 'forward-ffann-noisy-wan-2.pt' ############ print('Reading Data') with open(ts,'rb') as inFile: print('\tReading',ts); trainSet = pickle.load(inFile) with open(vs,'rb') as inFile: print('\tReading',vs); validSet = pickle.load(inFile) if not preload_name is None: print('Loading from',f_model_name) f.load_state_dict( torch.load(f_model_name) ) f.train(trainSet,validSet,noisyDataSetTxLoc=tsx_noisy,f_model_name=saveName) print('Saving to',saveName) torch.save(f.state_dict(), saveName) elif manualTest: def softmax(x): e_x = np.exp(x - np.max(x)) return e_x / e_x.sum() ### #f_model_name = 'forward-ffann-noisy6.pt' ### f.load_state_dict( torch.load(f_model_name) ) start = np.zeros(74, dtype=np.float32) start[0+4] = 1 start[15+6] = 1 start[15+15+0] = 1 start[15+15+4+8] = 1 start[15+15+4+15+7] = 1 start[15+15+4+15+15+4] = 1.0 f.test(start) print('-----\n','Starting manualTest loop') for i in range(5): width, height = 15, 15 p_0 = np.array([npr.randint(0,width),npr.randint(0,height)]) start_pos = [p_0, r.choice(NavigationTask.oriens)] goal_pos = np.array([ npr.randint(0,width), npr.randint(0,height) ]) checkEnv = NavigationTask( width=width, height=height, agent_start_pos=start_pos, goal_pos=goal_pos, track_history=True, stochasticity=0.0, maxSteps=10) s_0 = checkEnv.getStateRep() #a1, a2 = np.zeros(10), np.zeros(10) #a1[ npr.randint(0,10) ] = 1 #a2[ npr.randint(0,10) ] = 1 numActions = 3 currState = avar( torch.FloatTensor(s_0).unsqueeze(0) ) print('Start State') f.printState( currState[0] ) actionSet = [] for j in range(numActions): action = np.zeros( 10 ) action[ npr.randint(0,10) ] = 1 action += npr.randn( 10 )*0.1 action = softmax( action ) print('\tSoft Noisy Action ',j,'=',action) #### Apply Gumbel Softmax #### temperature = 0.01 logProbAction = torch.log( avar(torch.FloatTensor(action)) ) actiong = gumbel_softmax(logProbAction, temperature) ############################## print('\tGumbel Action ',j,'=',actiong.data.numpy()) actionSet.append( actiong ) checkEnv.performAction( np.argmax(action) ) a = actiong # avar( torch.FloatTensor(actiong) ) currState = f.forward( torch.cat([currState[0],a]).unsqueeze(0) ) print("Intermediate State",j) f.printState( currState[0] ) #checkEnv.performAction(np.argmax(a1)) #checkEnv.performAction(np.argmax(a2)) s_1 = checkEnv.getStateRep() #inval = np.concatenate( (s_0,a1) ) #outval1 = f.forward( avar(torch.FloatTensor(inval).unsqueeze(0)) ) #print(outval1.shape) #print(a2.shape) #inval2 = np.concatenate( (outval1[0].data.numpy(),a2) ) #outval2 = f.forward( avar(torch.FloatTensor(inval2).unsqueeze(0)) ) for action in actionSet: f.printAction(action) print('Predicted') f.printState( currState[0] ) print('Actual') s1 = avar( torch.FloatTensor( s_1 ).unsqueeze(0) ) f.printState( s1[0] ) print("Rough accuracy", torch.sum( (currState - s1).pow(2) ).data[0] ) #print('Predicted',currState.data[0].numpy()) #print('Actual',s_1) #outval1 = f.test(inval,s_1) print('----\n') if autoTest: print('Loading from',f_model_name) f.load_state_dict( torch.load(f_model_name) ) if runHenaffFFANN: print('Loading from',f_model_name) f.load_state_dict( torch.load(f_model_name) ) start = np.zeros(64) start[0] = 1 start[15] = 1 start[15+15] = 1 start[15+15+4+0] = 1 start[15+15+4+15+4] = 1 print(f.env.deconcatenateOneHotStateVector(start)) #sys.exit(0) print('Building planner') planner = HenaffPlanner(f,maxNumActions=2) print('Starting generation') actions = planner.generatePlan(start,niters=100,extraVerbose=False) if henaffHyperSearch: print('Loading from',f_model_name) f.load_state_dict( torch.load(f_model_name) ) ### Hyper-params ### lambda_h = 0.01 # Entropy strength eta = 0.5 # Learning rate ### else: f_model_name = 'forward-lstm-stochastic.pt' s = 'navigation' # 'transport' trainf, validf = s + "-data-train-small.pickle", s + "-data-test-small.pickle" print('Reading Data') train, valid = SeqData(trainf), SeqData(validf) f = ForwardModelLSTM(train.lenOfInput,train.lenOfState) if trainingLSTM: if os.path.exists(f_model_name) and not overwrite: print('Loading from',f_model_name) f.load_state_dict( torch.load(f_model_name) ) else: f.train(train,valid) print('Saving to',f_model_name) torch.save(f.state_dict(), f_model_name) print('Q-test') bdata, blabels, _ = valid.next(2000, nopad=True) acc1, _ = f._accuracyBatch(bdata,blabels,valid.env) print(acc1) if runHenaff: print('Loading from',f_model_name) f.load_state_dict( torch.load(f_model_name) ) # seq,label = train.randomTrainingPair() # start = seq[0][0:64] # start[63] = 0 # start[63-15] = 0 # start[15+15+4+5] = 1 # start[15+15+4+15+5] = 1 # start start = np.zeros(64) start[0] = 1 start[15] = 1 start[15+15] = 1 start[15+15+4+0] = 1 start[15+15+4+15+2] = 1 print(train.env.deconcatenateOneHotStateVector(start)) #sys.exit(0) print('Building planner') planner = HenaffPlanner(f) print('Starting generation') planner.generatePlan(start,train.env,niters=150) if testFM: f.load_state_dict( torch.load(f_model_name) ) start = np.zeros(64) start[0+2] = 1 start[15+3] = 1 start[15+15+0] = 1 start[15+15+4+5] = 1 start[15+15+4+15+5] = 1 action = np.zeros(10) deconRes = train.env.deconcatenateOneHotStateVector(start) print('Start state') print('px', np.argmax(deconRes[0]) ) print('py', np.argmax(deconRes[1]) ) print('orien', np.argmax(deconRes[2]) ) print('gx', np.argmax(deconRes[3]) ) print('gy', np.argmax(deconRes[4]) ) action[5] = 1.0 stateAction = [torch.cat([(torch.FloatTensor(start)), (torch.FloatTensor(action))])] #print('SA:',stateAction) #print('Start State') #printState( stateAction[0][0:-10], train.env ) print('Action',NavigationTask.actions[np.argmax( action )]) f.reInitialize() seq = avar(torch.cat(stateAction).view(len(stateAction), 1, -1)) # [seqlen x batchlen x hidden_size] result = f.forward(seq) print('PredState') printState( result, train.env )