示例#1
0
def run_set_actions(waitTime=0, verbose=True):
    rospy.init_node("test")
    env = BaxterEnvironment()
    rospy.on_shutdown(env.close)
    cv2.namedWindow("Feed", cv2.WINDOW_NORMAL)

    # Display starting environment
    env._update_state()
    if verbose:
        print ""
        print "horiz: " + str(env.horiz_dist)
        print "vert: " + str(env.vert_dist)
    cv2.imshow("Feed", env.image)
    cv2.waitKey(waitTime)

    # Create list of actions
    actions = []
    actions.append([0.9, 0.0])
    actions.append([-0.9, 0.0])
    actions.append([-0.9, 0.0])
    actions.append([0.9, 0.0])
    actions.append([0.0, 0.9])
    actions.append([0.0, -0.9])
    actions.append([0.0, -0.9])
    actions.append([0.0, 0.9])

    # Run actions untill interupted
    i = -1
    while not rospy.is_shutdown():
        i = (i + 1) % len(actions)
        action = actions[i]
        s = time.time()
        state, reward, done = env.step(actions[i])
        e = time.time()
        if verbose:
            print ""
            print "action: " + str(action)
            print "horiz: " + str(state[0])
            print "vert: " + str(state[1])
            print "reward: " + str(reward)
            print "done: " + str(done)
            print "time: " + str(e - s)
        # Display camera image
        cv2.imshow("Feed", env.image)
        cv2.waitKey(waitTime)
示例#2
0
def run_random(waitTime=0, verbose=True):
    rospy.init_node("test")
    env = BaxterEnvironment()
    env.reset()
    rospy.on_shutdown(env.close)
    cv2.namedWindow("Feed", cv2.WINDOW_NORMAL)

    # Run randomly untill interupteds = time.time()
    done = True
    while not rospy.is_shutdown():
        if done:
            # Reset
            state = env.reset()
            if verbose:
                print ""
                print "horiz: " + str(state[0])
                print "vert: " + str(state[1])
            done = False
        else:
            # Random action
            action = [random.uniform(-1, 1), random.uniform(-1, 1)]
            s = time.time()
            state, reward, done = env.step(action)
            e = time.time()
            if verbose:
                print ""
                print "action: " + str(action)
                print "horiz: " + str(state[0])
                print "vert: " + str(state[1])
                print "reward: " + str(reward)
                print "done: " + str(done)
                print "time: " + str(e - s)
        # Display camera image
        cv2.imshow("Feed", env.image)
        cv2.waitKey(waitTime)
示例#3
0
def run():
    rospy.init_node("test")
    env = BaxterEnvironment((1280, 800))
    retina = Retina(1280, 800)
    env.reset()
    rospy.on_shutdown(env.close)
    cv2.namedWindow("Feed", cv2.WINDOW_NORMAL)

    # Key action mapping
    actions = {ord('a'): [0.1, 0], ord('s'): [-0.1, 0],
               ord('d'): [0, 0.1], ord('f'): [0, -0.1],
               ord('q'): [1, 0], ord('w'): [-1, 0],
               ord('e'): [0, 1], ord('r'): [0, -1]}

    # Run until interupted or 'esc' pressed
    done = True
    key = None
    ep_step = 0
    while not rospy.is_shutdown():
        if done:
            # Reset environment
            state = env.reset()
            print ""
            print "horiz: " + str(state[0])
            print "vert: " + str(state[1])
            print "x pos: " + str(state[2])
            print "y pos: " + str(state[3])
            done = False
            ep_step = 0
        else:
            if key in actions:
                ep_step = ep_step + 1

                # If valid key pressed execute corresponding action
                action = actions[key]
                state, reward, done = env.step(action)
                print ""
                print "action: " + str(action)
                print "horiz: " + str(state[0])
                print "vert: " + str(state[1])
                print "x pos: " + str(state[2])
                print "y pos: " + str(state[3])
                print "reward: " + str(reward)
                print "done: " + str(done)

                done = done or (ep_step == 15)

        # Display image until key pressed
        cv2.imshow("Feed", retina.sample(env.image))
        key = cv2.waitKey(0)

        # If 'esc' pressed exit program
        if key == 27:
            break
        state_dict = state_dict + "retina_"
    state_dict = (state_dict + args.network + "/state_dicts/net_"
                  + str(args.epoch))
    resnet.load_state_dict(torch.load(state_dict))

    # Replace final layer with flatten layer
    layers = list(resnet.children())[:-1]
    layers.append(Flatten())
    resnet = nn.Sequential(*layers)

    # Environment variables
    STATE_DIM = features + 2
    ACTION_DIM = 2

    # Training variables
    ENVIRONMENT = BaxterEnvironment((1280, 800), True)
    INIT_EXPLORE = 2000
    MAX_STEPS = 100000
    MAX_EP_STEPS = 15
    UPDATES_PER_STEP = 1
    DATA = (os.path.dirname(os.path.realpath(__file__))
            + "/baxter_center/data/")
    DATA = None
    MODEL = (os.path.dirname(os.path.realpath(__file__))
             + "/baxter_center/mlp_")
    RESULT = (os.path.dirname(os.path.realpath(__file__))
              + "/baxter_center/mlp_")
    if args.use_retina:
        MODEL = MODEL + "retina_"
        RESULT = RESULT + "retina_"
    MODEL = MODEL + str(features) + "vector/state_dicts/"
    # Create folders to save data
    DATA_FOLDER = (os.path.dirname(
        os.path.realpath(__file__)) + "/baxter_center/image_data/")
    NORM_IMAGES = DATA_FOLDER + "images/"
    RETINA_IMAGES = DATA_FOLDER + "retina_images/"
    if not os.path.isdir(NORM_IMAGES):
        os.makedirs(NORM_IMAGES)
    if not os.path.isdir(RETINA_IMAGES):
        os.makedirs(RETINA_IMAGES)

    # Create preprocessors
    preprocessor = BaxterPreprocessor()

    # Create environment
    rospy.init_node('data_gathering_process')
    env = BaxterEnvironment((1280, 800))
    retina = Retina(1280, 800)
    rospy.on_shutdown(env.close)

    # Create function for exploration
    noise_function = NormalActionNoise(actions=ACTION_DIM)

    # Create tensors for storing data
    states = torch.ones((SIZE, STATE_DIM), dtype=torch.float)

    timestep_ep = 0
    done = True
    state = None
    image = None
    index = 0
    while index < SIZE:
示例#6
0
        PREPROCESSOR = BaxterImagePreprocessor(resnet, args.visualise)
    S_NORMALIZER = FeatureNormalizer(2)
    R_NORMALIZER = None
    MODEL_FOLDER = (os.path.dirname(os.path.realpath(__file__)) +
                    "/baxter_center/mlp_normRs/state_dicts/")
    CHECKPOINT = 100000

    # Load agent from checkpoint
    agent = Ddpg(REPLAY_SIZE, BATCH_SIZE, NOISE_FUNCTION, INIT_NOISE,
                 FINAL_NOISE, EXPLORATION_LEN, REWARD_SCALE, ACTOR,
                 ACTOR_OPTIM, CRITIC, CRITIC_OPTIM, PREPROCESSOR, S_NORMALIZER,
                 R_NORMALIZER)
    agent.load_checkpoint(MODEL_FOLDER, CHECKPOINT)

    # Set seeds
    torch.manual_seed(15)
    torch.cuda.manual_seed(15)
    np.random.seed(15)
    random.seed(15)

    # Create environment
    if args.network is not None:
        env = BaxterEnvironment((1280, 800), True)
    else:
        env = BaxterEnvironment()
    rospy.on_shutdown(env.close)

    # Evaluate and print performance
    print "Performance: %0.3f +- %0.3f" % (agent.evaluate(
        env, MAX_STEPS, EVAL_EP))
示例#7
0
if __name__ == '__main__':
    rospy.init_node("training process")

    # Set seeds
    torch.manual_seed(15)
    torch.cuda.manual_seed(15)
    np.random.seed(15)
    random.seed(15)

    # Environment variables
    STATE_DIM = 4
    ACTION_DIM = 2

    # Training variables
    ENVIRONMENT = BaxterEnvironment()
    INIT_EXPLORE = 2000
    MAX_STEPS = 300000
    MAX_EP_STEPS = 15
    UPDATES_PER_STEP = 1
    DATA = (os.path.dirname(os.path.realpath(__file__)) +
            "/baxter_center/data/")
    DATA = None
    MODEL = (os.path.dirname(os.path.realpath(__file__)) +
             "/baxter_center/mlp_normRs/state_dicts/")
    RESULT = (os.path.dirname(os.path.realpath(__file__)) +
              "/baxter_center/mlp_normRs/results/")
    EVAL_FREQ = 2000
    EVAL_EP = 20
    CHECKPOINT = None
    STATE_DIM = 4
    ACTION_DIM = 2
    MAX_EP_STEPS = 100

    # Create folder to save data
    DATA_FOLDER = (os.path.dirname(os.path.realpath(__file__)) +
                   "/baxter_center/data/")
    if not os.path.isdir(DATA_FOLDER):
        os.makedirs(DATA_FOLDER)

    # Create preprocessors
    preprocessor = BaxterPreprocessor()

    # Create environment
    rospy.init_node('data_gathering_process')
    env = BaxterEnvironment()
    rospy.on_shutdown(env.close)

    # Create function for exploration
    noise_function = NOISE_FUNCTION = NormalActionNoise(actions=ACTION_DIM)

    # Create tensors for storing data
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    states = torch.ones((SIZE, STATE_DIM), dtype=torch.float, device=device)
    actions = torch.ones((SIZE, ACTION_DIM), dtype=torch.float, device=device)
    next_states = torch.ones((SIZE, STATE_DIM),
                             dtype=torch.float,
                             device=device)
    rewards = torch.ones((SIZE, 1), dtype=torch.float, device=device)
    dones = torch.ones((SIZE, 1), dtype=torch.float, device=device)