from __future__ import print_function import numpy as np from Environment_V2 import environment from Model_dpg import PolicyGradient use_cuda = False # set to True if training with GPU ENV = environment.env([21, 14], [45, 87], 999) action_dic = ['up', 'upright', 'right', 'rightdown', 'down', 'downleft', 'left', 'leftup'] GAMMA = 0.99 # args = GetConfiguration() # args.model_dir = os.path.abspath(os.path.join(os.path.dirname(__file__),os.path.pardir)) + '/SmartST/model_saved_rl/' # args.result_dir = os.path.abspath(os.path.join(os.path.dirname(__file__),os.path.pardir)) + '/SmartST/result_saved_rl/' PG = PolicyGradient(A_DIM=8, lr=0.001, reward_decay=GAMMA) value_point = ENV.data_base.value_point if __name__ == '__main__': episode = 0 PG.build_net() while True: current_state = np.array(ENV.reset(start_loc=value_point[15], target=[48, 46], time=1), dtype='float32') print(current_state.shape) step = 0 for step in range(10000):
args.model_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), os.path.pardir)) + '/SmartST/model_saved_rl/' args.result_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), os.path.pardir)) + '/SmartST/result_saved_rl/' ac_dic = [ 'up', 'upright', 'right', 'rightdown', 'down', 'downleft', 'left', 'leftup' ] PG = PolicyGradient(A_DIM=8, S_DIM=3, lr=args.lrate, reward_decay=args.GAMMA).cuda() Optimizer = optim.Adam(PG.parameters(), lr=args.lrate) # start_loc, target, time, alpha = 0.5, time_factor= 0.1, plot = True, sleep = 0.5): ENV = environment.env(start_loc=[2, 51], target=[48, 46], time=1, plot=args.use_plt) if __name__ == '__main__': value_point = ENV.data_base.value_point episode = 0 PG.train() while True: s = ENV.reset(start_loc=value_point[22], target=[48, 46], time=1) cx = Variable(torch.zeros(1, 256)).cuda() hx = Variable(torch.zeros(1, 256)).cuda() step = 0
import torch import torch.optim as optim from torch.autograd import Variable import matplotlib.pyplot as plt import torch.nn.functional as F import random from Agent.ac import Actor, Critic actor = Actor(A_DIM=8).cuda() critic = Critic().cuda() a_opt = optim.Adam(actor.parameters(), lr=0.001) c_opt = optim.Adam(critic.parameters(), lr=0.001) ENV = environment.env([21, 14], [45, 87], 999, plot=False) action_dic = ['up', 'upright', 'right', 'rightdown', 'down', 'downleft', 'left', 'leftup'] saved_dict = "saved_model" saved_fig = "saved_figure" GAMMA = 0.99 TAU = 1.0 EnCOEF = 0.01 max_times = 100 if __name__ == '__main__': actor.train() critic.train() value_point = ENV.data_base.value_point