Exemplo n.º 1
0
def trainJump(save, save_as=None, curr_checkpoint=None):
    model.train()
    for episode in range(num_episodes):
        time.sleep(1.2)
        prev_score = getScore()

        if episode % 10 == 0:
            print("Score:", prev_score)
            print("-----------------------------------------")
            print("Episode:", episode)

        # Get state
        state = torch.Tensor(getLessProcessed()).unsqueeze(0)
        state_shape = state.shape
        state = state.view(state_shape[0], state_shape[3], state_shape[1],
                           state_shape[2])

        # Construct distribution based on calculated mean and variance
        mean = model(state)
        variance = final_variance + (initial_variance - final_variance) * \
                        math.exp(-1. * episode / variance_decay)
        if episode % 10 == 0:
            print("Mean:", float(mean), "Deviation:", float(variance))
        m = Normal(mean, variance)

        # Sample and perform action
        action = m.sample()
        if episode % 10 == 0:
            print("Action:", action)
        os.system("adb shell input swipe 500 500 500 500 " + str(int(action)))
        time.sleep(0.5)

        # Get reward and optimize model
        reward = getReward(prev_score)
        if reward >= 2:
            reward = 10
        elif reward == 1:
            reward = 0.3
        elif reward < 0:
            reward = -10
            onDeath()
        if episode % 1 == 0:
            print("Reward:", reward)
        loss = -m.log_prob(action) * reward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if episode % 10 == 0:
            print("-----------------------------------------")

        if save:
            if (episode + 1) % 1001 == 0:
                save_file = {
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                }
                file_name = save_as + str((episode // 1000) +
                                          curr_checkpoint) + ".pth"
                torch.save(save_file, file_name)
Exemplo n.º 2
0
def trainJump(save, save_as=None, curr_checkpoint=None):
    Agent.model.eval()
    for episode in range(num_episodes):
        time.sleep(1.2)
        prev_score = getScore()

        if episode % 10 == 0:
            print("Score:", prev_score)
            print("-----------------------------------------")
            print("Episode:", episode)

        # Get state
        state = torch.Tensor(getLessProcessed()).unsqueeze(0)
        state_shape = state.shape
        state = state.view(state_shape[0], state_shape[3], state_shape[1],
                           state_shape[2])

        # Calculate mean and variance
        mean = Agent.a2c(state)
        variance = final_variance + (initial_variance - final_variance) * \
                        math.exp(-1. * episode / variance_decay)

        # Construct normal distribution and sample action
        if episode % 10 == 0:
            print("Mean:", float(mean), "Deviation:", float(variance))
        m = Normal(mean, variance)
        action = m.sample()
        log_prob = m.log_prob(action)
        if episode % 10 == 0:
            print("Action:", action)
        os.system("adb shell input swipe 500 500 500 500 " + str(int(action)))

        # Get reward and push action, state, log_prob, and reward to action
        time.sleep(0.5)
        reward = getReward(prev_score)
        if reward >= 2:
            reward = 10
        elif reward == 1:
            reward = 0.1
        elif reward < 0:
            reward = -10
            onDeath()
        Agent.add_mem(action, state, log_prob, reward)
        if episode % 10 == 0:
            print("Reward:", reward)
        if episode % 10 == 0:
            print("-----------------------------------------")

        # Optimize model
        if (episode + 1) % (k + 1) == 0:
            Agent.optimize_model(k, variance)

        if save:
            if (episode + 1) % 1001 == 0:
                Agent.save_agent(save_as + str(curr_checkpoint +
                                               (episode // 1000)) + ".pth")
Exemplo n.º 3
0
def onDeath():
    time.sleep(1.9)
    if getScore() == -10:
        os.system("adb shell input tap 550 1700")
        time.sleep(0.5)
    else:  # Sometimes a weird pop up will come up, this closes it
        image = getImage()
        if (image[1000, 200] == [255, 255, 255]).all():
            os.system("adb shell input tap 200 1400")
            time.sleep(0.1)
            os.system("adb shell input tap 550 1700")
def trainJump(save, save_as=None, curr_checkpoint=None):
    model.train()
    global variance
    for episode in range(num_episodes):
        print("-----------------------------------------")
        print("Episode:", episode)

        # Get state
        state = get_distance()
        prev_score = getScore()
        print("Distance:", state)
        state = np.array([state])
        state = torch.from_numpy(state)
        state = state.float()

        # Calculate mean and variance
        mean = model(state)
        variance = final_variance + (initial_variance - final_variance) * \
                        math.exp(-1. * episode / variance_decay)
        print("Mean:", float(mean), "Deviation:", float(variance))

        # Construct normal distribution based off of mean and variance and sample from it
        m = Normal(mean, variance)
        action = m.sample()

        # Perform action
        print("Action:", action)
        os.system("adb shell input swipe 500 500 500 500 " + str(int(action)))

        # Get reward and optimize model
        time.sleep(0.5)
        reward = getReward(prev_score)
        if reward >= 2:
            reward = 10
        elif reward == 1:
            reward = 1
        elif reward < 0:
            onDeath()
        print("Reward:", reward)
        loss = -m.log_prob(action) * reward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if save:
            if (episode + 1) % 501 == 0:
                save_file = {
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                }
                file_name = save_as + str((episode // 1000) +
                                          curr_checkpoint) + ".pth"
                torch.save(save_file, file_name)
Exemplo n.º 5
0
def trainJump(save, save_as=None, curr_checkpoint=None):
    model.train()
    for episode in range(num_episodes):
        prev_score = getScore()
        print("-----------------------------------------")
        print("Episode:", episode)

        # Get state
        state = torch.Tensor(get_distance())

        # Select action based off state
        node_activated, action = select_action(state)
        os.system("adb shell input swipe 500 500 500 500 " + str(action))

        # Optimize model
        optimize_model()  # Optimize here to save time

        # Get reward and push state, node_activated, and actual_reward to memory
        time.sleep(0.5)
        actual_reward = getReward(prev_score)
        print("Node Activated:", node_activated, "Action:", action)
        if actual_reward >= 2:
            actual_reward = 10
        elif actual_reward < 0:
            onDeath()
        memory.push(state, node_activated, actual_reward)

        # Print difference between predicted and actual rewards
        predicted_reward = model(state).view(16)[node_activated]
        print("Predicted Reward:", float(predicted_reward), "Actual Reward:",
              actual_reward)
        print("-----------------------------------------")

        if save:
            if (episode + 1) % 1001 == 0:
                save_file = {
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                }
                file_name = save_as + str((episode // 1000) +
                                          curr_checkpoint) + ".pth"
                torch.save(save_file, file_name)
Exemplo n.º 6
0
def trainJump(save, save_as=None, curr_checkpoint=None):
    model.train()
    for episode in range(num_episodes):
        prev_score = getScore()
        print("-----------------------------------------")
        print("Episode:", episode)

        # Get state and select action based on state
        state = torch.Tensor(getLessProcessed()).unsqueeze(0)
        state_shape = state.shape
        state = state.view(state_shape[0], state_shape[3], state_shape[1],
                           state_shape[2])
        node_activated, action = select_action(state)
        os.system("adb shell input swipe 500 500 500 500 " + str(action))

        optimize_model()  # Optimize here to save time

        # Get actual reward and push state, node_activated, and actual_reward to memory
        actual_reward = getReward(prev_score)
        print("Node Activated:", node_activated, "Action:", action)
        if actual_reward >= 2:
            actual_reward = 10
        elif actual_reward < 0:
            onDeath()
        memory.push(state, node_activated, actual_reward)

        # Print predicted and actual rewards
        predicted_reward = model(state).view(16)[node_activated]
        print("Predicted Reward:", float(predicted_reward), "Actual Reward:",
              actual_reward)
        print("-----------------------------------------")

        if save:
            if (episode + 1) % 1001 == 0:
                save_file = {
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                }
                file_name = save_as + str((episode // 1000) +
                                          curr_checkpoint) + ".pth"
                torch.save(save_file, file_name)