Exemplo n.º 1
0
def _enjoyWindow(env, dagger=False):
    model = WindowModel(action_dim=2, max_action=1.)

    try:
        if dagger:
            state_dict = torch.load('./models/dagger_windowimitate.pt')
        else:
            state_dict = torch.load('./models/windowimitate.pt')
        model.load_state_dict(state_dict)
    except:
        print('failed to load model')
        exit()

    model.eval().to(device)

    obs = env.reset()

    obsWindow = np.zeros((12, 160, 120))

    successes = 0
    count = 0
    written = False

    while count < NUM_TESTS:
        obsWindow[:9, :, :] = obsWindow[3:, :, :]
        obsWindow[9:12, :, :] = obs
        obs = torch.from_numpy(obsWindow).float().to(device).unsqueeze(0)

        action = model(obs)
        action = action.squeeze().data.cpu().numpy()

        obs, reward, done, info = env.step(action)
        env.render()

        if done:
            count += 1

            if info['Simulator']['done_code'] == 'lap-completed':
                print('*** SUCCESS ***')
                successes += 1
            else:
                print('*** FAILED ***')

            obs = env.reset()
            env.render()

        if count != 0 and count % 50 == 0 and written is False:
            if dagger:
                f = open("../window_test_{}_dagger.txt".format(env.map_name),
                         "a")
            else:
                f = open("../window_test_{}.txt".format(env.map_name), "a")
            f.write("{} {}".format(env.map_name, count))
            f.write("\n{}/{}\n\n".format(successes, NUM_TESTS))
            f.close()
            written = True
        else:
            if count % 50 != 0:
                written = False
def _enjoyWindow():
    model = WindowModel(action_dim=2, max_action=1.)

    try:
        state_dict = torch.load('./models/windowimitate.pt')
        model.load_state_dict(state_dict)
    except:
        print('failed to load model')
        exit()

    model.eval().to(device)

    env = launch_env1()
    env = ResizeWrapper(env)
    env = NormalizeWrapper(env)
    env = ImgWrapper(env)
    env = ActionWrapper(env)
    env = DtRewardWrapper(env)

    obs = env.reset()

    obsWindow = np.zeros((12, 160, 120))

    while True:
        obsWindow[:9, :, :] = obsWindow[3:, :, :]
        obsWindow[9:12, :, :] = obs
        obs = torch.from_numpy(obsWindow).float().to(device).unsqueeze(0)

        action = model(obs)
        action = action.squeeze().data.cpu().numpy()

        obs, reward, done, info = env.step(action)
        env.render()

        if done:
            if reward < 0:
                print('*** FAILED ***')
                time.sleep(0.7)

            obs = env.reset()
            env.render()
def _dagger():
    model = Model(action_dim=2, max_action=1.)

    try:
        state_dict = torch.load('./models/imitate.pt')
        model.load_state_dict(state_dict)
    except:
        print('failed to load model')
        exit()

    model.eval().to(device)

    env = launch_env1()
    # Register a keyboard handler

    env = ResizeWrapper(env)
    env = NormalizeWrapper(env)
    env = ImgWrapper(env)
    env = ActionWrapper(env)
    env = DtRewardWrapper(env)

    obs = env.reset()
    env.render()
    key_handler = key.KeyStateHandler()
    env.unwrapped.window.push_handlers(key_handler)

    print(env.map_name)
    raise Exception("asdfsadf")

    obsHistory = []
    actionHistory = []

    while True:
        obs = torch.from_numpy(obs).float().to(device).unsqueeze(0)

        action = model(obs)
        action = action.squeeze().data.cpu().numpy()

        obs, reward, done, info = env.step(action)

        print(key_handler)
        daggerAction = np.array([0.0, 0.0])
        if key_handler[key.UP]:
            print("as===as=df=sad=f=asdf=sad=fs=adf")
            daggerAction = np.array([1.00, 0.0])
            #action = np.array([0.44, 0.0])
        if key_handler[key.DOWN]:
            print("as===as=df=sad=f=asdf=sad=fs=adf")
            daggerAction = np.array([-1.00, 0])
            #action = np.array([-0.44, 0])
        if key_handler[key.LEFT]:
            print("as===as=df=sad=f=asdf=sad=fs=adf")
            daggerAction = np.array([0.35, +1])
        if key_handler[key.RIGHT]:
            print("as===as=df=sad=f=asdf=sad=fs=adf")
            daggerAction = np.array([0.35, -1])
        if key_handler[key.SPACE]:
            obsHistoryArray = np.array(obsHistory)
            actionHistoryArray = np.array(actionHistory)
            np.save('./dagger/obs_{}.npy'.format(len(count)), obsHistoryArray)
            np.save('./dagger/actions_{}.npy'.format(len(count)),
                    actionHistoryArray)

        print(daggerAction)
        obsHistory.append(obs)
        actionHistory.append(daggerAction)

        env.render()

        if done:
            if reward < 0:
                print('*** FAILED ***')
                time.sleep(0.7)

            obs = env.reset()
            env.render()
Exemplo n.º 4
0
def _train(args):
    env = launch_env1()
    env1 = ResizeWrapper(env)
    env2 = NormalizeWrapper(env) 
    env3 = ImgWrapper(env)
    env = ActionWrapper(env)
    env = DtRewardWrapper(env)
    print("Initialized Wrappers")

    def transformObs(obs):
        obs = env1.observation(obs)
        obs = env2.observation(obs)
        obs = env3.observation(obs)
        return obs

    actions = None
    rawObs = None
    for map in MAP_NAMES:
        if map == "loop_obstacles":
            episodes = 3
        else:
            episodes = 2

        print(map)
        for episode in range(episodes):
            actionFile = "actions_{}.npy".format(episode)
            action = np.load(TRAINING_DATA_PATH.format(map, actionFile))
            print(action.shape)

            observationFile = "obs_{}.npy".format(episode)
            observation = np.load(TRAINING_DATA_PATH.format(map, observationFile))

            if actions is None:
                actions = action
                rawObs = observation
            else:
                actions = np.concatenate((actions, action), axis=0)
                rawObs = np.concatenate((rawObs, observation), axis=0)
            print(actions.shape)
        print(actions.shape)
        print("---")

    observations = np.zeros((rawObs.shape[0], 3, 160, 120))
    for i, obs in enumerate(rawObs):
        observations[i] = transformObs(obs)

    
    '''
    # Create an imperfect demonstrator
    expert = PurePursuitExpert(env=env)

    observations = []
    actions = []

    # let's collect our samples
    for episode in range(0, 2):
    #for episode in range(0, args.episodes):
        print("Starting episode", episode)
        #for steps in range(0, args.steps):
        for steps in range(0, 4):
            # use our 'expert' to predict the next action.
            action = expert.predict(None)
            observation, reward, done, info = env.step(action)
            observations.append(observation)
            actions.append(action)
        env.reset()

    actions = np.array(actions)
    observations = np.array(observations)
    print(observations.shape)
    '''

    env.close()
    #raise Exception("Done with testing")

    model = Model(action_dim=2, max_action=1.)
    model.train().to(device)

    # weight_decay is L2 regularization, helps avoid overfitting
    optimizer = optim.SGD(
        model.parameters(),
        lr=0.0004,
        weight_decay=1e-3
    )

    loss_list = []
    avg_loss = 0
    for epoch in range(args.epochs):
        optimizer.zero_grad()

        batch_indices = np.random.randint(0, observations.shape[0], (args.batch_size))
        obs_batch = torch.from_numpy(observations[batch_indices]).float().to(device)
        act_batch = torch.from_numpy(actions[batch_indices]).long().to(device)

        model_actions = model(obs_batch)

        loss = (model_actions - act_batch).norm(2).mean()
        loss.backward()
        optimizer.step()

        #loss = loss.data[0]
        loss = loss.item()
        avg_loss = avg_loss * 0.995 + loss * 0.005

        print('epoch %d, loss=%.3f' % (epoch, loss))
        loss_list.append(loss)

        # Periodically save the trained model
        if epoch % 50 == 0:
            print("Saving...")
            torch.save(model.state_dict(), 'imitation/pytorch/models/imitate.pt')
            save_loss(loss_list, 'imitation/pytorch/loss.npy')

    print("Saving...")
    torch.save(model.state_dict(), 'imitation/pytorch/models/imitate.pt')
Exemplo n.º 5
0
def _train(args):
    actions = None
    observations = None
    for map in MAP_NAMES:
        print(map)
        actionFile = "actions_{}.npy".format(map)
        action = np.load(TRAINING_DATA_PATH.format(actionFile))
        print(action.shape)

        observationFile = "obs_{}.npy".format(map)
        observation = np.load(TRAINING_DATA_PATH.format(observationFile))

        if actions is None:
            actions = action
            observations = observation
        else:
            actions = np.concatenate((actions, action), axis=0)
            observations = np.concatenate((observations, observation), axis=0)
        print("---")

    model = Model(action_dim=2, max_action=1.)
    try:
        state_dict = torch.load('./models/imitate.pt')
        model.load_state_dict(state_dict)
    except:
        print('failed to load model')
        exit()
    model.train().to(device)

    # weight_decay is L2 regularization, helps avoid overfitting
    optimizer = optim.SGD(model.parameters(), lr=0.0004, weight_decay=1e-3)

    length = min(len(actions), len(observations))
    actions = actions[:length, :]
    observations = observations[:length, :, :, :]
    loss_list = []
    avg_loss = 0
    for epoch in range(args.epochs):
        optimizer.zero_grad()

        batch_indices = np.random.randint(0, observations.shape[0],
                                          (args.batch_size))
        obs_batch = torch.from_numpy(
            observations[batch_indices]).float().to(device)
        act_batch = torch.from_numpy(actions[batch_indices]).long().to(device)

        model_actions = model(obs_batch)

        loss = (model_actions - act_batch).norm(2).mean()
        loss.backward()
        optimizer.step()

        #loss = loss.data[0]
        loss = loss.item()
        avg_loss = avg_loss * 0.995 + loss * 0.005

        print('epoch %d, loss=%.3f' % (epoch, loss))
        loss_list.append(loss)

        # Periodically save the trained model
        if epoch % 50 == 0:
            print("Saving...")
            torch.save(model.state_dict(),
                       'imitation/pytorch/models/dagger_imitate.pt')
            save_loss(loss_list, 'imitation/pytorch/dagger_loss.npy')

    print("Saving...")
    torch.save(model.state_dict(),
               'imitation/pytorch/models/dagger_imitate.pt')
Exemplo n.º 6
0
def dagger(dt, actionHistory, obsHistory, count, env, model):
    """
    This function is called at every frame to handle
    movement/stepping and redrawing
    """

    step_count = env.unwrapped.step_count

    daggerAction = np.array([0.0, 0.0])
    if key_handler[key.UP]:
        daggerAction = np.array([1.00, 0.0])
        #action = np.array([0.44, 0.0])
    if key_handler[key.DOWN]:
        daggerAction = np.array([-1.00, 0])
        #action = np.array([-0.44, 0])
    if key_handler[key.LEFT]:
        daggerAction = np.array([0.35, +1])
    if key_handler[key.RIGHT]:
        daggerAction = np.array([0.35, -1])
    if key_handler[key.SPACE]:
        obsHistoryArray = np.array(obsHistory)
        actionHistoryArray = np.array(actionHistory)
        np.save('./dagger/obs_{}.npy'.format(env.map_name), obsHistoryArray)
        np.save('./dagger/actions_{}.npy'.format(env.map_name),
                actionHistoryArray)

    print(daggerAction)
    actionHistory.append(daggerAction)

    if step_count == 0:
        obs = np.zeros((3, 160, 120))
    else:
        obs = obsHistory[step_count - 1]

    obs = torch.from_numpy(obs).float().to(device).unsqueeze(0)

    action = model(obs)
    action = action.squeeze().data.cpu().numpy()
    obs, reward, done, info = env.step(action)
    print('step_count = %s, reward=%.3f' % (env.unwrapped.step_count, reward))
    obsHistory.append(obs)

    if key_handler[key.RETURN]:
        from PIL import Image
        im = Image.fromarray(obs)

        im.save('screen.png')

    if done:
        print('done!')
        '''
        if info['Simulator']['done_code'] == 'lap-completed' and args.record:
            obsHistoryArray = np.array(obsHistory)
            print(obsHistoryArray.shape)
            np.save('./data/{}/obs_{}.npy'.format(args.map_name, len(count)), obsHistoryArray)
            actionHistoryArray = np.array(actionHistory)
            print(actionHistoryArray.shape)
            np.save('./data/{}/actions_{}.npy'.format(args.map_name, len(count)), actionHistoryArray)
            count.append(0)
        '''
        #raise Exception("Stopping the program")
        env.reset()
        #obsHistory.clear()
        #actionHistory.clear()
        env.render()

    env.render()