示例#1
0
from __future__ import print_function
import numpy as np
from Environment_V2 import environment
from Model_dpg import PolicyGradient

use_cuda = False  # set to True if training with GPU

ENV = environment.env([21, 14], [45, 87], 999)

action_dic = ['up', 'upright', 'right', 'rightdown', 'down', 'downleft', 'left', 'leftup']
GAMMA = 0.99


# args = GetConfiguration()
# args.model_dir = os.path.abspath(os.path.join(os.path.dirname(__file__),os.path.pardir)) + '/SmartST/model_saved_rl/'
# args.result_dir = os.path.abspath(os.path.join(os.path.dirname(__file__),os.path.pardir)) + '/SmartST/result_saved_rl/'

PG = PolicyGradient(A_DIM=8, lr=0.001, reward_decay=GAMMA)
value_point = ENV.data_base.value_point

if __name__ == '__main__':

    episode = 0
    PG.build_net()

    while True:
        current_state = np.array(ENV.reset(start_loc=value_point[15], target=[48, 46], time=1), dtype='float32')
        print(current_state.shape)
        step = 0

        for step in range(10000):
示例#2
0
args.model_dir = os.path.abspath(
    os.path.join(os.path.dirname(__file__),
                 os.path.pardir)) + '/SmartST/model_saved_rl/'
args.result_dir = os.path.abspath(
    os.path.join(os.path.dirname(__file__),
                 os.path.pardir)) + '/SmartST/result_saved_rl/'

ac_dic = [
    'up', 'upright', 'right', 'rightdown', 'down', 'downleft', 'left', 'leftup'
]
PG = PolicyGradient(A_DIM=8, S_DIM=3, lr=args.lrate,
                    reward_decay=args.GAMMA).cuda()
Optimizer = optim.Adam(PG.parameters(), lr=args.lrate)
# start_loc, target, time, alpha = 0.5, time_factor= 0.1, plot = True, sleep = 0.5):
ENV = environment.env(start_loc=[2, 51],
                      target=[48, 46],
                      time=1,
                      plot=args.use_plt)

if __name__ == '__main__':
    value_point = ENV.data_base.value_point
    episode = 0
    PG.train()

    while True:
        s = ENV.reset(start_loc=value_point[22], target=[48, 46], time=1)

        cx = Variable(torch.zeros(1, 256)).cuda()
        hx = Variable(torch.zeros(1, 256)).cuda()

        step = 0
示例#3
0
import torch
import torch.optim as optim
from torch.autograd import Variable
import matplotlib.pyplot as plt
import torch.nn.functional as F
import random

from Agent.ac import Actor, Critic

actor = Actor(A_DIM=8).cuda()
critic = Critic().cuda()

a_opt = optim.Adam(actor.parameters(), lr=0.001)
c_opt = optim.Adam(critic.parameters(), lr=0.001)

ENV = environment.env([21, 14], [45, 87], 999, plot=False)

action_dic = ['up', 'upright', 'right', 'rightdown', 'down', 'downleft', 'left', 'leftup']
saved_dict = "saved_model"
saved_fig = "saved_figure"

GAMMA = 0.99
TAU = 1.0
EnCOEF = 0.01
max_times = 100

if __name__ == '__main__':
    actor.train()
    critic.train()

    value_point = ENV.data_base.value_point