Пример #1
0
actorTargetNet = deepcopy(actorNet)

criticNet = Critic(netParameter['n_feature'] + N_A, netParameter['n_hidden'])

criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(),
                            lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(),
                             lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer}
agent = DDPGAgent(config, actorNets, criticNets, env, optimizers,
                  torch.nn.MSELoss(reduction='mean'), N_A)

plotPolicyFlag = True
N = 100
if plotPolicyFlag:
    phi = 0.0

    xSet = np.linspace(-10, 10, N)
    ySet = np.linspace(-10, 10, N)
    policy = np.zeros((N, N))

    value = np.zeros((N, N))
    for i, x in enumerate(xSet):
        for j, y in enumerate(ySet):
            # x, y is the target position, (0, 0, 0) is the particle configuration
            distance = np.array([x, y])
Пример #2
0
criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(),
                            lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(),
                             lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer}
agent = DDPGAgent(config,
                  actorNets,
                  criticNets,
                  env,
                  optimizers,
                  torch.nn.MSELoss(reduction='mean'),
                  N_A,
                  stateProcessor=stateProcessor,
                  experienceProcessor=experienceProcessor)

checkpoint = torch.load('../../Log/Epoch15000_checkpoint.pt')

agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict'])

config['randomMoveFlag'] = True
config['dynamicInitialStateFlag'] = False
config['dynamicTargetFlag'] = False
config['currentState'] = [-20, -20, 1, 1, 0, 0]
config['currentState'] = [0, 0, 1, 1, 0, 0]
criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(),
                            lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(),
                             lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer}
agent = DDPGAgent(config,
                  actorNets,
                  criticNets,
                  env,
                  optimizers,
                  torch.nn.MSELoss(reduction='mean'),
                  N_A,
                  stateProcessor=stateProcessor,
                  experienceProcessor=experienceProcessor)

if config['loadExistingModel']:
    checkpoint = torch.load(config['loadExistingModelCheckPoint'])
    agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict'])
    agent.actorNet_target.load_state_dict(checkpoint['actorNet_state_dict'])
    agent.criticNet.load_state_dict(checkpoint['criticNet_state_dict'])
    agent.criticNet_target.load_state_dict(checkpoint['criticNet_state_dict'])

plotPolicyFlag = False
N = 100
if plotPolicyFlag:
                                    netParameter['n_output'])

actorTargetNet = deepcopy(actorNet)

criticNet = Critic(netParameter['n_feature'] + N_A,
                                    netParameter['n_hidden'])

criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic':criticOptimizer}
agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A)


checkpoint = torch.load('Log/Finalepoch20000_checkpoint.pt')
agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict'])



plotPolicyFlag = True
N = 100
if plotPolicyFlag:
    phi = 0.0

    xSet = np.linspace(-10,10,N)
    ySet = np.linspace(-10,10,N)
    policyX = np.zeros((N, N))
Пример #5
0
actorTargetNet = deepcopy(actorNet)

criticNet = CriticConvNet(netParameter['n_feature'] ,
                            netParameter['n_hidden'],
                        netParameter['n_output'])

criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic':criticOptimizer}
agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A, stateProcessor=stateProcessor)


checkpoint = torch.load('Log/Finalepoch2500_checkpoint.pt')
agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict'])

config['dynamicInitialStateFlag'] = False
config['dynamicTargetFlag'] = False
config['currentState'] = [15, 15]
config['targetState'] = [10, 15]
config['filetag'] = 'test'
config['trajOutputInterval'] = 10
config['trajOutputFlag'] = True
config['customExploreFlag'] = False

with open('config_test.json', 'w') as f:
Пример #6
0
criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(),
                            lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(),
                             lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer}
agent = DDPGAgent(config,
                  actorNets,
                  criticNets,
                  env,
                  optimizers,
                  torch.nn.MSELoss(reduction='mean'),
                  N_A,
                  stateProcessor=stateProcessor,
                  experienceProcessor=experienceProcessor)

plotPolicyFlag = False
N = 100
if plotPolicyFlag:
    phi = 0.0

    xSet = np.linspace(-10, 10, N)
    ySet = np.linspace(-10, 10, N)
    policy = np.zeros((N, N))

    value = np.zeros((N, N))
Пример #7
0
actorTargetNet = deepcopy(actorNet)

criticNet = Critic(netParameter['n_feature'] + N_A, netParameter['n_hidden'])

criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(),
                            lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(),
                             lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer}
agent = DDPGAgent(config, actorNets, criticNets, env, optimizers,
                  torch.nn.MSELoss(reduction='mean'), N_A)

plotPolicyFlag = False
N = 100
if plotPolicyFlag:
    phi = 0.0

    xSet = np.linspace(-10, 10, N)
    ySet = np.linspace(-10, 10, N)
    policy = np.zeros((N, N))

    value = np.zeros((N, N))
    for i, x in enumerate(xSet):
        for j, y in enumerate(ySet):
            # x, y is the target position, (0, 0, 0) is the particle configuration
            distance = np.array([x, y])
Пример #8
0
actorTargetNet = deepcopy(actorNet)

criticNet = CriticConvNet(netParameter['n_feature'] ,
                            netParameter['n_hidden'],
                        netParameter['n_output'], config['n_channels'])

criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic':criticOptimizer}
agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A, stateProcessor=stateProcessor)

if config['loadCheckpointFlag']:
    agent.load_checkpoint(config['loadCheckpointPrefix'])



plotPolicyFlag = False
N = 100
if plotPolicyFlag:
    phi = 0.0

    xSet = np.linspace(-10,10,N)
    ySet = np.linspace(-10,10,N)
    policy = np.zeros((N, N))
Пример #9
0
actorTargetNet = deepcopy(actorNet)

criticNet = CriticConvNet(netParameter['n_feature'] ,
                            netParameter['n_hidden'],
                        netParameter['n_output'])

criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic':criticOptimizer}
agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A, stateProcessor=stateProcessor, experienceProcessor=experienceProcessor)

if config['loadExistingModel']:
    checkpoint = torch.load(config['loadExistingModelCheckPoint'])
    agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict'])
    agent.actorNet_target.load_state_dict(checkpoint['actorNet_state_dict'])
    agent.criticNet.load_state_dict(checkpoint['criticNet_state_dict'])
    agent.criticNet_target.load_state_dict(checkpoint['criticNet_state_dict'])


if config['loadCheckpointFlag']:
    agent.load_checkpoint(config['loadCheckpointPrefix'])

agent.actor_optimizer = optimizers['actor']
agent.critic_optimizer = optimizers['critic']