def train(rank, args, shared_model, counter, lock, optimizer=None): FloatTensor = torch.cuda.FloatTensor if args.use_cuda else torch.FloatTensor env = gym.make("FetchPickAndPlace-v1") env2 = gym.wrappers.FlattenDictWrapper( env, dict_keys=['observation', 'desired_goal']) model = Actor() if args.use_cuda: model.cuda() torch.cuda.manual_seed_all(12) if optimizer is None: optimizer = optim.Adam(shared_model.parameters(), lr=args.lr) for p in model.fc1.parameters(): p.requires_grad_(False) for p in model.fc2.parameters(): p.requires_grad_(False) model.train() done = True for num_iter in count(): with lock: counter.value += 1 #print(num_iter, counter.value) lastObs = env.reset() goal = lastObs['desired_goal'] objectPos = lastObs['observation'][3:6] object_rel_pos = lastObs['observation'][6:9] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[ 2] += 0.03 # first make the gripper go slightly above the object timeStep = 0 #count the total number of timesteps if rank == 0: if num_iter % args.save_interval == 0 and num_iter > 0: #print ("Saving model at :" + args.save_path) torch.save(shared_model.state_dict(), args.save_path1) if num_iter % ( args.save_interval * 2.5 ) == 0 and num_iter > 0 and rank == 1: # Second saver in-case first processes crashes #print ("Saving model for process 1 at :" + args.save_path) torch.save(shared_model.state_dict(), args.save_path1) model.load_state_dict(shared_model.state_dict()) criterion = nn.MSELoss() while np.linalg.norm(object_oriented_goal ) >= 0.015 and timeStep <= env._max_episode_steps: action = [0, 0, 0, 0, 0, 0] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[2] += 0.03 for i in range(len(object_oriented_goal)): action[i] = object_oriented_goal[i] * 6 action[3] = 0.05 #open obsDataNew, reward, done, info = env.step(action) timeStep += 1 objectPos = obsDataNew['observation'][3:6] object_rel_pos = obsDataNew['observation'][6:9] while np.linalg.norm(object_rel_pos ) >= 0.005 and timeStep <= env._max_episode_steps: action = [0, 0, 0, 0, 0, 0] for i in range(len(object_rel_pos)): action[i] = object_rel_pos[i] * 6 action[3] = -0.01 action[4] = obsDataNew['observation'][13] / 8 obsDataNew, reward, done, info = env.step(action) timeStep += 1 objectPos = obsDataNew['observation'][3:6] object_rel_pos = obsDataNew['observation'][6:9] state_inp = torch.from_numpy( env2.observation(obsDataNew)).type(FloatTensor) while np.linalg.norm(goal - objectPos ) >= 0.01 and timeStep <= env._max_episode_steps: action = [0, 0, 0, 0, 0, 0] act_tensor, _ = act(state_inp, model, False, True) #error = torch.zeros(3).type(FloatTensor) for i in range(len(goal - objectPos)): optimizer.zero_grad() expected = torch.from_numpy(np.array( (goal - objectPos)[i] * 6)).type(FloatTensor) action[i] = act_tensor[i].cpu().detach().numpy() error = criterion(act_tensor[i], expected) (error).backward(retain_graph=True) ensure_shared_grads(model, shared_model) optimizer.step() '''optimizer.zero_grad() loss = torch.sum(error) loss.backward(retain_graph=True) ensure_shared_grads(model, shared_model) optimizer.step()''' action[3] = -0.01 obsDataNew, reward, done, info = env.step(action) timeStep += 1 objectPos = obsDataNew['observation'][3:6] object_rel_pos = obsDataNew['observation'][6:9] state_inp = torch.from_numpy( env2.observation(obsDataNew)).type(FloatTensor) objectPos = obsDataNew['observation'][3:6] object_rel_pos = obsDataNew['observation'][6:9] while True: #limit the number of timesteps in the episode to a fixed duration action = [0, 0, 0, 0, 0, 0] action[3] = -0.01 # keep the gripper closed obsDataNew, reward, done, info = env.step(action) timeStep += 1 objectPos = obsDataNew['observation'][3:6] object_rel_pos = obsDataNew['observation'][6:9] if timeStep >= env._max_episode_steps: break
ep_numb += 1 lastObs = env.reset() goal = lastObs['desired_goal'] objectPos = lastObs['observation'][3:6] object_rel_pos = lastObs['observation'][6:9] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[ 2] += 0.03 # first make the gripper go slightly above the object timeStep = 0 #count the total number of timesteps state_inp = torch.from_numpy(env2.observation(lastObs)).type(FloatTensor) Ratio = [] while np.linalg.norm(object_oriented_goal ) >= 0.015 and timeStep <= env._max_episode_steps: env.render() action = [0, 0, 0, 0, 0, 0] act_tensor, ratio = act(state_inp, model, True, False) #print(act_tensor) Ratio.append(ratio.cpu().detach().numpy()) for i in range(len(object_oriented_goal)): action[i] = act_tensor[i].cpu().detach().numpy() object_oriented_goal = object_rel_pos.copy() object_oriented_goal[2] += 0.03 action[3] = 0.05 obsDataNew, reward, done, info = env.step(action) timeStep += 1 objectPos = obsDataNew['observation'][3:6] object_rel_pos = obsDataNew['observation'][6:9] state_inp = torch.from_numpy(
def test(rank, args, shared_model, counter): FloatTensor = torch.cuda.FloatTensor if args.use_cuda else torch.FloatTensor env = gym.make("FetchPickAndPlace-v1") env2 = gym.wrappers.FlattenDictWrapper( env, dict_keys=['observation', 'desired_goal']) model = Actor() if args.use_cuda: model.cuda() model.eval() done = True savefile = os.getcwd() + '/train/mario_curves.csv' title = ['No. episodes', 'No. of success'] with open(savefile, 'a', newline='') as sfile: writer = csv.writer(sfile) writer.writerow(title) while True: model.load_state_dict(shared_model.state_dict()) ep_num = 0 num_ep = counter.value success = 0 while ep_num < 100: ep_num += 1 lastObs = env.reset() goal = lastObs['desired_goal'] objectPos = lastObs['observation'][3:6] object_rel_pos = lastObs['observation'][6:9] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[ 2] += 0.03 # first make the gripper go slightly above the object timeStep = 0 model.load_state_dict(shared_model.state_dict()) #Ratio, first_step =[], [] while np.linalg.norm( object_oriented_goal ) >= 0.015 and timeStep <= env._max_episode_steps: env.render() action = [0, 0, 0, 0, 0, 0] object_oriented_goal = object_rel_pos.copy() object_oriented_goal[2] += 0.03 for i in range(len(object_oriented_goal)): action[i] = object_oriented_goal[i] * 6 action[3] = 0.05 #open obsDataNew, reward, done, info = env.step(action) timeStep += 1 objectPos = obsDataNew['observation'][3:6] object_rel_pos = obsDataNew['observation'][6:9] while np.linalg.norm( object_rel_pos ) >= 0.005 and timeStep <= env._max_episode_steps: env.render() action = [0, 0, 0, 0, 0, 0] for i in range(len(object_rel_pos)): action[i] = object_rel_pos[i] * 6 action[4] = obsDataNew['observation'][13] / 8 action[3] = -0.01 obsDataNew, reward, done, info = env.step(action) timeStep += 1 objectPos = obsDataNew['observation'][3:6] object_rel_pos = obsDataNew['observation'][6:9] state_inp = torch.from_numpy( env2.observation(obsDataNew)).type(FloatTensor) while np.linalg.norm( goal - objectPos) >= 0.01 and timeStep <= env._max_episode_steps: env.render() action = [0, 0, 0, 0, 0, 0] act_tensor, ratio = act(state_inp, model, False, True) #Ratio.append(ratio.cpu().detach().numpy()) for i in range(3): action[i] = act_tensor[i].cpu().detach().numpy() action[3] = -0.01 obsDataNew, reward, done, info = env.step(action) timeStep += 1 state_inp = torch.from_numpy( env2.observation(obsDataNew)).type(FloatTensor) objectPos = obsDataNew['observation'][3:6] object_rel_pos = obsDataNew['observation'][6:9] if timeStep >= env._max_episode_steps: break while True: #limit the number of timesteps in the episode to a fixed duration env.render() action = [0, 0, 0, 0, 0, 0] action[3] = -0.01 # keep the gripper closed obsDataNew, reward, done, info = env.step(action) timeStep += 1 objectPos = obsDataNew['observation'][3:6] object_rel_pos = obsDataNew['observation'][6:9] if timeStep >= env._max_episode_steps: break if info['is_success'] == 1.0: success += 1 if done: #plot_ratio = np.average(np.array(Ratio), 0) #lastObs = env.reset() if ep_num % 100 == 0: print("num episodes {}, success {}".format( num_ep, success)) data = [counter.value, success] with open(savefile, 'a', newline='') as sfile: writer = csv.writer(sfile) writer.writerows([data]) time.sleep(20)