示例#1
0
def test(rank, args, shared_model, counter):
    torch.manual_seed(args.seed + rank)

    FloatTensor = torch.cuda.FloatTensor if args.use_cuda else torch.FloatTensor
    DoubleTensor = torch.cuda.DoubleTensor if args.use_cuda else torch.DoubleTensor
    ByteTensor = torch.cuda.ByteTensor if args.use_cuda else torch.ByteTensor

    env = create_mario_env(args.env_name)
    """ 
        need to implement Monitor wrapper with env.change_level
    """
    # expt_dir = 'video'
    # env = wrappers.Monitor(env, expt_dir, force=True, video_callable=lambda count: count % 10 == 0)

    env.seed(args.seed + rank)

    model = ActorCritic(env.observation_space.shape[0], len(ACTIONS))
    if args.use_cuda:
        model.cuda()
    model.eval()

    state = env.reset()
    state = torch.from_numpy(state)
    reward_sum = 0
    done = True
    savefile = os.getcwd() + '/save/mario_curves.csv'

    title = ['Time', 'No. Steps', 'Total Reward', 'Episode Length']
    with open(savefile, 'a', newline='') as sfile:
        writer = csv.writer(sfile)
        writer.writerow(title)

    start_time = time.time()

    # a quick hack to prevent the agent from stucking
    actions = deque(maxlen=4000)
    episode_length = 0
    while True:
        episode_length += 1
        ep_start_time = time.time()
        # Sync with the shared model
        if done:
            model.load_state_dict(shared_model.state_dict())
            cx = Variable(torch.zeros(1, 512), volatile=True).type(FloatTensor)
            hx = Variable(torch.zeros(1, 512), volatile=True).type(FloatTensor)

        else:
            cx = Variable(cx.data, volatile=True).type(FloatTensor)
            hx = Variable(hx.data, volatile=True).type(FloatTensor)

        state_inp = Variable(state.unsqueeze(0),
                             volatile=True).type(FloatTensor)
        value, logit, (hx, cx) = model((state_inp, (hx, cx)))
        prob = F.softmax(logit, dim=-1)
        action = prob.max(-1, keepdim=True)[1].data

        action_out = ACTIONS[action][0, 0]
        # print("Process: Test Action: {}".format(str(action_out)))

        state, reward, done, _ = env.step(action_out)
        env.render()
        done = done or episode_length >= args.max_episode_length
        reward_sum += reward

        # a quick hack to prevent the agent from stucking
        actions.append(action[0, 0])
        if actions.count(actions[0]) == actions.maxlen:
            done = True

        if done:
            print(
                "Time {}, num steps {}, FPS {:.0f}, episode reward {}, episode length {}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    counter.value, counter.value / (time.time() - start_time),
                    reward_sum, episode_length))

            data = [
                time.time() - ep_start_time, counter.value, reward_sum,
                episode_length
            ]

            with open(savefile, 'a', newline='') as sfile:
                writer = csv.writer(sfile)
                writer.writerows([data])

            reward_sum = 0
            episode_length = 0
            actions.clear()
            time.sleep(60)
            env.locked_levels = [False] + [True] * 31
            env.change_level(0)
            state = env.reset()

        state = torch.from_numpy(state)
示例#2
0
def test(rank, args, shared_model, counter):
    torch.manual_seed(args.seed + rank)

    FloatTensor = torch.FloatTensor# torch.cuda.FloatTensor if args.use_cuda else torch.FloatTensor
    DoubleTensor = torch.DoubleTensor# torch.cuda.DoubleTensor if args.use_cuda else torch.DoubleTensor
    ByteTensor = torch.ByteTensor# torch.cuda.ByteTensor if args.use_cuda else torch.ByteTensor

    env = create_mario_env(args.env_name, args.reward_type)
    """ 
        need to implement Monitor wrapper with env.change_level
    """
    # expt_dir = 'video'
    # env = wrappers.Monitor(env, expt_dir, force=True, video_callable=lambda count: count % 10 == 0)
    
    #env.seed(args.seed + rank)

    model = ActorCritic(env.observation_space.shape[0], len(ACTIONS))
    model.eval()

    state = env.reset()
    state = torch.from_numpy(state)
    reward_sum = 0
    done = True
    savefile = os.getcwd() + '/save/scmemi_'+ args.reward_type +'/mario_curves.csv'
    
    title = ['Time','No. Steps', 'Total Reward', 'final_position', 'Episode Length']
    with open(savefile, 'a', newline='') as sfile:
        writer = csv.writer(sfile)
        writer.writerow(title)    

    start_time = time.time()

    # a quick hack to prevent the agent from stucking
    actions = deque(maxlen=400)
    positions = deque(maxlen=400)
    episode_length = 0
    while True:
        episode_length += 1
        ep_start_time = time.time()
        # Sync with the shared model
        if done:
            model.load_state_dict(shared_model.state_dict())
            cx = Variable(torch.zeros(1, 512),  requires_grad=True ).type(FloatTensor)
            with torch.no_grad():
                cx=cx
            hx = Variable(torch.zeros(1, 512),  requires_grad=True).type(FloatTensor)
            with torch.no_grad():
                hx=hx

        else:
            with torch.no_grad():
                cx = Variable(cx.data).type(FloatTensor)
                hx = Variable(hx.data).type(FloatTensor)
        

        with torch.no_grad(): state_inp = Variable(state.unsqueeze(0)).type(FloatTensor)
        value, logit, (hx, cx) = model((state_inp, (hx, cx)))
        prob = F.softmax(logit, dim=-1)
        action = prob.max(-1, keepdim=True)[1].data
        action_out = int(action[0, 0].data.numpy())
        state, reward, done, info = env.step(action_out)
        #env.render()
        done = done or episode_length >= args.max_episode_length
        reward_sum += reward

        # a quick hack to prevent the agent from stucking
        actions.append(action[0, 0])
        if actions.count(actions[0]) == actions.maxlen:
            done = True
            print('action')
        if args.pos_stuck :
            positions.append(info['x_pos'])
            pos_ar = np.array(positions)
            if (len(positions) >= 200) and (pos_ar < pos_ar[-1] + 20).all() and (pos_ar > pos_ar[-1] - 20).all():
                done = True

        if done:
            print("Time {}, num steps {}, FPS {:.0f}, episode reward {:.3f}, distance covered {:.3f}, episode length {}".format(
                time.strftime("%Hh %Mm %Ss",
                              time.gmtime(time.time() - start_time)), 
                counter.value, counter.value / (time.time() - start_time),
                reward_sum, info['x_pos']/x_norm, episode_length))
            
            data = [time.time() - ep_start_time,
                    counter.value, reward_sum, info['x_pos']/x_norm, episode_length]
            
            with open(savefile, 'a', newline='') as sfile:
                writer = csv.writer(sfile)
                writer.writerows([data])
            
            reward_sum = 0
            episode_length = 0
            actions.clear()
            positions.clear()
            time.sleep(60)
#             env.locked_levels = [False] + [True] * 31
#             env.change_level(0)
            state = env.reset()

        state = torch.from_numpy(state)