Пример #1
0
                            phi) - distance[1] * math.cos(phi)
                        if math.sqrt(dx**2 +
                                     dy**2) > agent.env.agent.targetClipLength:
                            angle = math.atan2(dy, dx)
                            dx = agent.env.agent.targetClipLength * math.cos(
                                angle)
                            dy = agent.env.agent.targetClipLength * math.sin(
                                angle)
                        #dx = agent.env.agent.targetClipMap(dx) if dx > 0 else -agent.env.agent.targetClipMap(-dx)
                        #dy = agent.env.agent.targetClipMap(dy) if dy > 0 else -agent.env.agent.targetClipMap(-dy)
                        state = {
                            'sensor': sensorInfo,
                            'target': np.array([dx, dy])
                        }
                        policy[i, j] = agent.getPolicy(state)
                        Qvalue = agent.policyNet(
                            stateProcessor([state], config['device'])[0])
                        value[i, j] = Qvalue[0, policy[i, j]].cpu().item()
            np.savetxt('DynamicMazePolicyBeforeTrain' + config['mapName'] +
                       'phiIdx' + str(phiIdx) + '.txt',
                       policy,
                       fmt='%d',
                       delimiter='\t')
            np.savetxt('DynamicMazeValueBeforeTrain' + config['mapName'] +
                       'phiIdx' + str(phiIdx) + '.txt',
                       value,
                       fmt='%.3f',
                       delimiter='\t')
    # plotPolicy(policy, N_A)

    agent.train()
    agent.env.currentState = np.array([0.0, agent.env.mapWidth / 2 + 1])
    state['state']= np.array([0.0, agent.env.mapWidth / 2 + 1])

    for step in range(nSteps):
        action = agent.select_action(agent.policyNet, state, -0.1)
        nextState, reward, done, info = agent.env.step(action)

        state = nextState

        if done:
            print('finish step:', agent.env.stepCount)
            print(agent.env.currentState)
            break


policyFlag = True

if policyFlag:
    policy = np.zeros((env.mapHeight, env.mapWidth), dtype=np.int32)
    value = np.zeros((env.mapHeight, env.mapWidth), dtype=np.float)
    for i in range(policy.shape[0]):
        for j in range(policy.shape[1]):
            state = {'state': np.array([i / agent.env.lengthScale, \
                                        j / agent.env.lengthScale])
                     }
            policy[i, j] = agent.select_action(agent.policyNet, state, -0.1)
            stateTorch = stateProcessor([state], config['device'])[0]
            Qvalue = agent.policyNet(stateTorch)
            value[i, j] = Qvalue[0, policy[i, j]].cpu().item()
    np.savetxt('SimpleMazePolicyAfterTrain_stage.txt', policy, fmt='%d', delimiter='\t')
    np.savetxt('SimpleMazeValueAfterTrain_stage.txt', value, fmt='%f', delimiter='\t')