Exemplo n.º 1
0
                 torch.nn.MSELoss(reduction='none'),
                 N_A,
                 stateProcessor=stateProcessor,
                 experienceProcessor=experienceProcessor)

trainFlag = True
testFlag = True

if trainFlag:

    if config['loadExistingModel']:
        checkpoint = torch.load(config['saveModelFile'])
        agent.policyNet.load_state_dict(checkpoint['model_state_dict'])
        agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    if config['loadCheckpointFlag']:
        agent.load_checkpoint(config['loadCheckpointPrefix'])

    plotPolicyFlag = True
    if plotPolicyFlag:

        for phiIdx in range(8):
            phi = phiIdx * np.pi / 4.0
            policy = deepcopy(env.mapMat).astype(np.long)
            value = deepcopy(env.mapMat)
            for i in range(policy.shape[0]):
                for j in range(policy.shape[1]):
                    if env.mapMat[i, j] == 1:
                        policy[i, j] = -1
                        value[i, j] = -1
                    else:
                        sensorInfo = env.agent.getSensorInfoFromPos(
Exemplo n.º 2
0
                 stateProcessor=stateProcessor, config=config)



trainFlag = True
testFlag = True

if trainFlag:

    if config['loadExistingModel']:
        checkpoint = torch.load(config['saveModelFile'])
        agent.policyNet.load_state_dict(checkpoint['model_state_dict'])
        agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    if config['loadCheckpoint']:
        agent.load_checkpoint(config['checkpointTag'])

    plotPolicyFlag = True
    if plotPolicyFlag:

        for phiIdx in range(8):
            phi = phiIdx * np.pi/4.0
            policy = deepcopy(env.mapMat)
            for i in range(policy.shape[0]):
                  for j in range(policy.shape[1]):
                      if env.mapMat[i, j] == 1:
                          policy[i, j] = -1
                      else:
                          sensorInfo = env.agent.getSensorInfoFromPos(np.array([i, j, phi]))
                          distance = np.array(config['targetState']) - np.array([i, j])
                          dx = distance[0] * math.cos(phi) + distance[1] * math.sin(phi)