torch.nn.MSELoss(reduction='none'), N_A, stateProcessor=stateProcessor, experienceProcessor=experienceProcessor) trainFlag = True testFlag = True if trainFlag: if config['loadExistingModel']: checkpoint = torch.load(config['saveModelFile']) agent.policyNet.load_state_dict(checkpoint['model_state_dict']) agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) if config['loadCheckpointFlag']: agent.load_checkpoint(config['loadCheckpointPrefix']) plotPolicyFlag = True if plotPolicyFlag: for phiIdx in range(8): phi = phiIdx * np.pi / 4.0 policy = deepcopy(env.mapMat).astype(np.long) value = deepcopy(env.mapMat) for i in range(policy.shape[0]): for j in range(policy.shape[1]): if env.mapMat[i, j] == 1: policy[i, j] = -1 value[i, j] = -1 else: sensorInfo = env.agent.getSensorInfoFromPos(
stateProcessor=stateProcessor, config=config) trainFlag = True testFlag = True if trainFlag: if config['loadExistingModel']: checkpoint = torch.load(config['saveModelFile']) agent.policyNet.load_state_dict(checkpoint['model_state_dict']) agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) if config['loadCheckpoint']: agent.load_checkpoint(config['checkpointTag']) plotPolicyFlag = True if plotPolicyFlag: for phiIdx in range(8): phi = phiIdx * np.pi/4.0 policy = deepcopy(env.mapMat) for i in range(policy.shape[0]): for j in range(policy.shape[1]): if env.mapMat[i, j] == 1: policy[i, j] = -1 else: sensorInfo = env.agent.getSensorInfoFromPos(np.array([i, j, phi])) distance = np.array(config['targetState']) - np.array([i, j]) dx = distance[0] * math.cos(phi) + distance[1] * math.sin(phi)