Exemplo n.º 1
0
                        dy = distance[0] * math.sin(
                            phi) - distance[1] * math.cos(phi)
                        if math.sqrt(dx**2 +
                                     dy**2) > agent.env.agent.targetClipLength:
                            angle = math.atan2(dy, dx)
                            dx = agent.env.agent.targetClipLength * math.cos(
                                angle)
                            dy = agent.env.agent.targetClipLength * math.sin(
                                angle)
                        #dx = agent.env.agent.targetClipMap(dx) if dx > 0 else -agent.env.agent.targetClipMap(-dx)
                        #dy = agent.env.agent.targetClipMap(dy) if dy > 0 else -agent.env.agent.targetClipMap(-dy)
                        state = {
                            'sensor': sensorInfo,
                            'target': np.array([dx, dy])
                        }
                        policy[i, j] = agent.getPolicy(state)
                        Qvalue = agent.policyNet(
                            stateProcessor([state], config['device'])[0])
                        value[i, j] = Qvalue[0, policy[i, j]].cpu().item()
            np.savetxt('DynamicMazePolicyBeforeTrain' + config['mapName'] +
                       'phiIdx' + str(phiIdx) + '.txt',
                       policy,
                       fmt='%d',
                       delimiter='\t')
            np.savetxt('DynamicMazeValueBeforeTrain' + config['mapName'] +
                       'phiIdx' + str(phiIdx) + '.txt',
                       value,
                       fmt='%.3f',
                       delimiter='\t')
    # plotPolicy(policy, N_A)
policyNet = MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output'])

targetNet = deepcopy(policyNet)

optimizer = optim.Adam(policyNet.parameters(), lr=config['learningRate'])

agent = DQNAgent(config, policyNet, targetNet, env, optimizer,
                 torch.nn.MSELoss(reduction='none'), N_A)

xSet = np.linspace(-1, 1, 100)
policy = np.zeros_like(xSet)
for i, x in enumerate(xSet):
    policy[i] = agent.getPolicy(np.array([x]))

np.savetxt('StabilizerPolicyBeforeTrain.txt', policy, fmt='%d')

#agent.perform_random_exploration(10)
agent.train()
#storeMemory = ReplayMemory(100000)
agent.testPolicyNet(100)
#storeMemory.write_to_text('testPolicyMemory.txt')


def customPolicy(state):
    x = state[0]
    # move towards negative
    if x > 0.1:
        action = 2
agent = DQNAgent(policyNet,
                 targetNet,
                 env,
                 optimizer,
                 torch.nn.MSELoss(),
                 N_S,
                 N_A,
                 config=config)

policy = deepcopy(env.map)
for i in range(policy.shape[0]):
    for j in range(policy.shape[1]):
        if env.map[i, j] == 0:
            policy[i, j] = -1
        else:
            policy[i, j] = agent.getPolicy(np.array([i, j]))

np.savetxt('DoubleQSimpleMazePolicyBeforeTrain' + mapName + '.txt',
           policy,
           fmt='%d',
           delimiter='\t')

plotPolicy(policy, N_A)

agent.train()

policy = deepcopy(env.map)
for i in range(policy.shape[0]):
    for j in range(policy.shape[1]):
        if env.map[i, j] == 0:
            policy[i, j] = -1
Exemplo n.º 4
0
agent = DQNAgent(policyNet,
                 targetNet,
                 env,
                 optimizer,
                 torch.nn.MSELoss(),
                 N_A,
                 config=config)

policy = deepcopy(env.mapMat)
for i in range(policy.shape[0]):
    for j in range(policy.shape[1]):
        if env.mapMat[i, j] == 1:
            policy[i, j] = -1
        else:
            sensorInfo = env.agent.getSensorInfoFromPos(np.array([i, j]))
            policy[i, j] = agent.getPolicy(sensorInfo)

np.savetxt('DynamicMazePolicyBeforeTrain' + mapName + '.txt',
           policy,
           fmt='%d',
           delimiter='\t')
#
# plotPolicy(policy, N_A)

agent.train()

for i in range(policy.shape[0]):
    for j in range(policy.shape[1]):
        if env.mapMat[i, j] == 1:
            policy[i, j] = -1
        else: