print('\nInit Policies') # agent = a2c(model_dict) # param_file = home+'/Documents/tmp/breakout_2frames/BreakoutNoFrameskip-v4/A2C/seed0/model_params/model_params9999360.pt' # load_policy = 1 policies = [] policies_dir = home + '/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/' for f in os.listdir(policies_dir): print(f) policy = CNNPolicy(2, 4) #.cuda() param_file = home + '/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/' + f + '/model_params3/model_params9999360.pt' param_dict = torch.load(param_file) policy.load_state_dict(param_dict) # policy = torch.load(param_file).cuda() print('loaded params', param_file) policy.cuda() policies.append(policy) #just one for now break # if load_policy: # param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt' # print (param_dict.keys()) # for key in param_dict.keys(): # print (param_dict[key].size())
load_policy = 1 if load_policy: # param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt' param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params9999360.pt' param_dict = torch.load(param_file) # print (param_dict.keys()) # for key in param_dict.keys(): # print (param_dict[key].size()) # print (policy.state_dict().keys()) # for key in policy.state_dict().keys(): # print (policy.state_dict()[key].size()) expert_policy.load_state_dict(param_dict) # policy = torch.load(param_file).cuda() print ('loaded params', param_file) expert_policy.cuda() print ('Init Imitator Policy') imitator_policy = CNNPolicy(2, 4).cuda() # def save_params(save_location, model):
#Init policy , not agent print ('init policy') policy = CNNPolicy(2*3, 18) #frames*channels, action size #load params # param_file = exp_dir + '/' +env_name+ 'NoFrameskip-v4/A2C/seed0/model_params3/model_params2000000.pt' param_file = exp_dir + '/' +env_name+ 'NoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt' param_dict = torch.load(param_file) policy.load_state_dict(param_dict) # policy = torch.load(param_file).cuda() print ('loaded params', param_file) policy.cuda() #init env env = make_env_basic(env_name2)
load_policy = 1 if load_policy: # param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt' param_file = home + '/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params9999360.pt' param_dict = torch.load(param_file) # print (param_dict.keys()) # for key in param_dict.keys(): # print (param_dict[key].size()) # print (policy.state_dict().keys()) # for key in policy.state_dict().keys(): # print (policy.state_dict()[key].size()) expert_policy.load_state_dict(param_dict) # policy = torch.load(param_file).cuda() print('loaded params', param_file) expert_policy.cuda() print('Init Imitator Policy') imitator_policy = CNNPolicy(2, 4).cuda() # def save_params(save_location, model): # #saves all params in recommended way # save_path = os.path.join(save_dir, 'model_params3') # try: # os.makedirs(save_path) # except OSError: