def run_set_actions(waitTime=0, verbose=True): rospy.init_node("test") env = BaxterEnvironment() rospy.on_shutdown(env.close) cv2.namedWindow("Feed", cv2.WINDOW_NORMAL) # Display starting environment env._update_state() if verbose: print "" print "horiz: " + str(env.horiz_dist) print "vert: " + str(env.vert_dist) cv2.imshow("Feed", env.image) cv2.waitKey(waitTime) # Create list of actions actions = [] actions.append([0.9, 0.0]) actions.append([-0.9, 0.0]) actions.append([-0.9, 0.0]) actions.append([0.9, 0.0]) actions.append([0.0, 0.9]) actions.append([0.0, -0.9]) actions.append([0.0, -0.9]) actions.append([0.0, 0.9]) # Run actions untill interupted i = -1 while not rospy.is_shutdown(): i = (i + 1) % len(actions) action = actions[i] s = time.time() state, reward, done = env.step(actions[i]) e = time.time() if verbose: print "" print "action: " + str(action) print "horiz: " + str(state[0]) print "vert: " + str(state[1]) print "reward: " + str(reward) print "done: " + str(done) print "time: " + str(e - s) # Display camera image cv2.imshow("Feed", env.image) cv2.waitKey(waitTime)
def run_random(waitTime=0, verbose=True): rospy.init_node("test") env = BaxterEnvironment() env.reset() rospy.on_shutdown(env.close) cv2.namedWindow("Feed", cv2.WINDOW_NORMAL) # Run randomly untill interupteds = time.time() done = True while not rospy.is_shutdown(): if done: # Reset state = env.reset() if verbose: print "" print "horiz: " + str(state[0]) print "vert: " + str(state[1]) done = False else: # Random action action = [random.uniform(-1, 1), random.uniform(-1, 1)] s = time.time() state, reward, done = env.step(action) e = time.time() if verbose: print "" print "action: " + str(action) print "horiz: " + str(state[0]) print "vert: " + str(state[1]) print "reward: " + str(reward) print "done: " + str(done) print "time: " + str(e - s) # Display camera image cv2.imshow("Feed", env.image) cv2.waitKey(waitTime)
def run(): rospy.init_node("test") env = BaxterEnvironment((1280, 800)) retina = Retina(1280, 800) env.reset() rospy.on_shutdown(env.close) cv2.namedWindow("Feed", cv2.WINDOW_NORMAL) # Key action mapping actions = {ord('a'): [0.1, 0], ord('s'): [-0.1, 0], ord('d'): [0, 0.1], ord('f'): [0, -0.1], ord('q'): [1, 0], ord('w'): [-1, 0], ord('e'): [0, 1], ord('r'): [0, -1]} # Run until interupted or 'esc' pressed done = True key = None ep_step = 0 while not rospy.is_shutdown(): if done: # Reset environment state = env.reset() print "" print "horiz: " + str(state[0]) print "vert: " + str(state[1]) print "x pos: " + str(state[2]) print "y pos: " + str(state[3]) done = False ep_step = 0 else: if key in actions: ep_step = ep_step + 1 # If valid key pressed execute corresponding action action = actions[key] state, reward, done = env.step(action) print "" print "action: " + str(action) print "horiz: " + str(state[0]) print "vert: " + str(state[1]) print "x pos: " + str(state[2]) print "y pos: " + str(state[3]) print "reward: " + str(reward) print "done: " + str(done) done = done or (ep_step == 15) # Display image until key pressed cv2.imshow("Feed", retina.sample(env.image)) key = cv2.waitKey(0) # If 'esc' pressed exit program if key == 27: break
state_dict = state_dict + "retina_" state_dict = (state_dict + args.network + "/state_dicts/net_" + str(args.epoch)) resnet.load_state_dict(torch.load(state_dict)) # Replace final layer with flatten layer layers = list(resnet.children())[:-1] layers.append(Flatten()) resnet = nn.Sequential(*layers) # Environment variables STATE_DIM = features + 2 ACTION_DIM = 2 # Training variables ENVIRONMENT = BaxterEnvironment((1280, 800), True) INIT_EXPLORE = 2000 MAX_STEPS = 100000 MAX_EP_STEPS = 15 UPDATES_PER_STEP = 1 DATA = (os.path.dirname(os.path.realpath(__file__)) + "/baxter_center/data/") DATA = None MODEL = (os.path.dirname(os.path.realpath(__file__)) + "/baxter_center/mlp_") RESULT = (os.path.dirname(os.path.realpath(__file__)) + "/baxter_center/mlp_") if args.use_retina: MODEL = MODEL + "retina_" RESULT = RESULT + "retina_" MODEL = MODEL + str(features) + "vector/state_dicts/"
# Create folders to save data DATA_FOLDER = (os.path.dirname( os.path.realpath(__file__)) + "/baxter_center/image_data/") NORM_IMAGES = DATA_FOLDER + "images/" RETINA_IMAGES = DATA_FOLDER + "retina_images/" if not os.path.isdir(NORM_IMAGES): os.makedirs(NORM_IMAGES) if not os.path.isdir(RETINA_IMAGES): os.makedirs(RETINA_IMAGES) # Create preprocessors preprocessor = BaxterPreprocessor() # Create environment rospy.init_node('data_gathering_process') env = BaxterEnvironment((1280, 800)) retina = Retina(1280, 800) rospy.on_shutdown(env.close) # Create function for exploration noise_function = NormalActionNoise(actions=ACTION_DIM) # Create tensors for storing data states = torch.ones((SIZE, STATE_DIM), dtype=torch.float) timestep_ep = 0 done = True state = None image = None index = 0 while index < SIZE:
PREPROCESSOR = BaxterImagePreprocessor(resnet, args.visualise) S_NORMALIZER = FeatureNormalizer(2) R_NORMALIZER = None MODEL_FOLDER = (os.path.dirname(os.path.realpath(__file__)) + "/baxter_center/mlp_normRs/state_dicts/") CHECKPOINT = 100000 # Load agent from checkpoint agent = Ddpg(REPLAY_SIZE, BATCH_SIZE, NOISE_FUNCTION, INIT_NOISE, FINAL_NOISE, EXPLORATION_LEN, REWARD_SCALE, ACTOR, ACTOR_OPTIM, CRITIC, CRITIC_OPTIM, PREPROCESSOR, S_NORMALIZER, R_NORMALIZER) agent.load_checkpoint(MODEL_FOLDER, CHECKPOINT) # Set seeds torch.manual_seed(15) torch.cuda.manual_seed(15) np.random.seed(15) random.seed(15) # Create environment if args.network is not None: env = BaxterEnvironment((1280, 800), True) else: env = BaxterEnvironment() rospy.on_shutdown(env.close) # Evaluate and print performance print "Performance: %0.3f +- %0.3f" % (agent.evaluate( env, MAX_STEPS, EVAL_EP))
if __name__ == '__main__': rospy.init_node("training process") # Set seeds torch.manual_seed(15) torch.cuda.manual_seed(15) np.random.seed(15) random.seed(15) # Environment variables STATE_DIM = 4 ACTION_DIM = 2 # Training variables ENVIRONMENT = BaxterEnvironment() INIT_EXPLORE = 2000 MAX_STEPS = 300000 MAX_EP_STEPS = 15 UPDATES_PER_STEP = 1 DATA = (os.path.dirname(os.path.realpath(__file__)) + "/baxter_center/data/") DATA = None MODEL = (os.path.dirname(os.path.realpath(__file__)) + "/baxter_center/mlp_normRs/state_dicts/") RESULT = (os.path.dirname(os.path.realpath(__file__)) + "/baxter_center/mlp_normRs/results/") EVAL_FREQ = 2000 EVAL_EP = 20 CHECKPOINT = None
STATE_DIM = 4 ACTION_DIM = 2 MAX_EP_STEPS = 100 # Create folder to save data DATA_FOLDER = (os.path.dirname(os.path.realpath(__file__)) + "/baxter_center/data/") if not os.path.isdir(DATA_FOLDER): os.makedirs(DATA_FOLDER) # Create preprocessors preprocessor = BaxterPreprocessor() # Create environment rospy.init_node('data_gathering_process') env = BaxterEnvironment() rospy.on_shutdown(env.close) # Create function for exploration noise_function = NOISE_FUNCTION = NormalActionNoise(actions=ACTION_DIM) # Create tensors for storing data device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") states = torch.ones((SIZE, STATE_DIM), dtype=torch.float, device=device) actions = torch.ones((SIZE, ACTION_DIM), dtype=torch.float, device=device) next_states = torch.ones((SIZE, STATE_DIM), dtype=torch.float, device=device) rewards = torch.ones((SIZE, 1), dtype=torch.float, device=device) dones = torch.ones((SIZE, 1), dtype=torch.float, device=device)