def __init__(self): self.agent = CnnDqnAgent() self.agent_initialized = False self.cycle_counter = 0 self.log_file = 'reward.log' self.reward_sum = 0 # press, up, down, left, right, none self.commands = ["none", "none", "none", "none", "none"]
def __init__(self, args): print "start to load cnn model" self.args = args self.cnnDqnAgent = CnnDqnAgent(use_gpu=self.args.gpu, depth_image_dim=self.depth_image_dim * self.depth_image_count, agent_id=self.agent_id) print 'finish loading cnn model' self.cnnDqnAgent.agent_init() print 'finish init cnn dqn agent'
class AgentServer(WebSocket): agent = CnnDqnAgent() agent_initialized = False cycle_counter = 0 thread_event = threading.Event() log_file = args.log_file reward_sum = 0 depth_image_dim = 32 * 32 depth_image_count = 1 def send_action(self, action): dat = msgpack.packb({"command": str(action)}) self.send(dat, binary=True) def received_message(self, m): payload = m.data dat = msgpack.unpackb(payload) image = [] for i in xrange(self.depth_image_count): image.append(Image.open(io.BytesIO(bytearray(dat['image'][i])))) depth = [] for i in xrange(self.depth_image_count): d = (Image.open(io.BytesIO(bytearray(dat['depth'][i])))) depth.append(np.array(ImageOps.grayscale(d)).reshape(self.depth_image_dim)) observation = {"image": image, "depth": depth} reward = dat['reward'] end_episode = dat['endEpisode'] if not self.agent_initialized: self.agent_initialized = True print ("initializing agent...") self.agent.agent_init( use_gpu=args.gpu, depth_image_dim=self.depth_image_dim * self.depth_image_count) action = self.agent.agent_start(observation) self.send_action(action) with open(self.log_file, 'w') as the_file: the_file.write('cycle, episode_reward_sum \n') else: self.thread_event.wait() self.cycle_counter += 1 self.reward_sum += reward if end_episode: self.agent.agent_end(reward) action = self.agent.agent_start(observation) # TODO self.send_action(action) with open(self.log_file, 'a') as the_file: the_file.write(str(self.cycle_counter) + ',' + str(self.reward_sum) + '\n') self.reward_sum = 0 else: action, eps, q_now, obs_array = self.agent.agent_step(reward, observation) self.send_action(action) self.agent.agent_step_update(reward, action, eps, q_now, obs_array) self.thread_event.set()
class AgentController: def __init__(self): self.agent = CnnDqnAgent() self.agent_initialized = False self.cycle_counter = 0 self.log_file = 'reward.log' self.reward_sum = 0 # press, up, down, left, right, none self.commands = ["none", "none", "none", "none", "none"] def get_commands(self): return self.commands def set_commands_from_action(self, action): command_candidate = ["press", "up", "down", "right", "left", "none"] self.commands = [command_candidate[a] for a in action] def update(self, message): image = message["image"] pad_states = message["pad_states"] end_episode = message['end_episode'] observation = {"image": image, "pad_states": pad_states} reward = message['reward'] if not self.agent_initialized: self.agent_initialized = True print("initializing agent......") self.agent.agent_init(use_gpu=args.gpu, pad_states_dim=len(pad_states)) action = self.agent.agent_start(observation) self.set_commands_from_action(action) with open(self.log_file, 'w') as the_file: the_file.write('cycle, episode_reward_sum \n') else: self.cycle_counter += 1 self.reward_sum += reward if end_episode: self.agent.agent_end(reward) with open(self.log_file, 'a') as the_file: the_file.write( str(self.cycle_counter) + ',' + str(self.reward_sum) + '\n') self.reward_sum = 0 else: action, eps, q_now, obs_array = self.agent.agent_step( reward, observation) self.set_commands_from_action(action) self.agent.agent_step_update(reward, action, eps, q_now, obs_array)
import numpy as np parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--log-file', '-l', default='reward.log', type=str, help='reward log file name') args = parser.parse_args() agent = CnnDqnAgent() agent_initialized = False cycle_counter = 0 thread_event = threading.Event() log_file = args.log_file reward_sum = 0 depth_image_dim = 32 * 32 depth_image_count = 1 total_episode = 10000 episode_count = 0 while episode_count <= total_episode: if not agent_initialized: agent_initialized = True print("initializing agent...") agent.agent_init(use_gpu=args.gpu,
def agent_process(gpu_id, log_file, q_from_parent, q_to_parent): # initialization depth_image_dim = 32 * 32 depth_image_count = 1 has_started = False cycle_counter = 0 reward_sum = 0 agent = CnnDqnAgent() print("initializing agent...") agent.agent_init( use_gpu=gpu_id, depth_image_dim=depth_image_dim * depth_image_count, ) with open(log_file, 'w') as the_file: the_file.write('cycle, episode_reward_sum \n') # step byte_data = q_from_parent.get() while not byte_data is None: #try: # data extraction dat = msgpack.unpackb(byte_data) image = [ Image.open(io.BytesIO(bytearray(dat[b'image'][i]))) for i in range(depth_image_count) ] depth = [ np.array( ImageOps.grayscale( Image.open(io.BytesIO(bytearray( dat[b'depth'][i]))))).reshape(depth_image_dim) for i in range(depth_image_count) ] observation = {"image": image, "depth": depth} reward = dat[b'reward'] end_episode = dat[b'endEpisode'] # action-making ret = None if not has_started: has_started = True ret = agent.agent_start(observation) else: cycle_counter += 1 reward_sum += reward if end_episode: agent.agent_end(reward) with open(log_file, 'a') as the_file: the_file.write('%d, %f\n' % (cycle_counter, reward_sum)) reward_sum = 0 ret = agent.agent_start(observation) else: action, eps, q_now, new_feature_vec, deg_interest = agent.agent_step( reward, observation) agent.agent_step_update(reward, action, eps, q_now, new_feature_vec, deg_interest) ret = (action, deg_interest) q_to_parent.put(ret) #except Exception as e: #print(e) #q_to_parent.put(None) #raise e byte_data = q_from_parent.get()
class AgentServer(WebSocket): test_num = 5000 # 1つのモデルをためすテストの回数 agent = CnnDqnAgent() #cnn_dqn_agent.pyの中のCnnDqnAgentクラスのインスタンス agent_initialized = False thread_event = threading.Event() #threading -> Eventの中にWait,Setがある reward_sum = 0 depth_image_dim = 32 * 32 #receive後のreshapeに使うのでここで定義 image_count = 1 # 1cycleでUnityのAgentから送られてくる画像の枚数 log_file = args.log_file gpu = args.gpu draw = args.draw test = args.test episode_num = args.episode #行ったエピソードの数 folder = args.folder model_num = args.model_num cycle_counter = 0 print u"------------------------------------------------" print u"./%sディレクトリが存在するか確認" % (folder) print u"logファイルがあってるか確認" print u"------------------------------------------------" def send_action(self, action): dat = msgpack.packb({"command": str(action)}) self.send(dat, binary=True) def received_message(self, m): try: payload = m.data dat = msgpack.unpackb(payload) image = [] for i in xrange(self.image_count): image.append(Image.open(io.BytesIO(bytearray( dat['image'][i])))) depth = [] for i in xrange(self.image_count): d = (Image.open(io.BytesIO(bytearray(dat['depth'][i])))) #depth画像は一次元ベクトルにreshape depth.append( np.array(ImageOps.grayscale(d)).reshape( self.depth_image_dim)) observation = {"image": image, "depth": depth} reward = dat['reward'] end_episode = dat['endEpisode'] lastZ = dat['score'] if not self.agent_initialized: self.agent_initialized = True print("initializing agent...") #depth_image_dimが引数で使われるのはここだけ self.agent.agent_init(depth_image_dim=self.depth_image_dim, image_count=self.image_count, use_gpu=self.gpu, test=self.test, folder=self.folder, model_num=self.model_num) action = self.agent.agent_start(observation) self.send_action(action) print "send" #logファイルへの書き込み if not self.succeed: with open(self.log_file, 'w') as the_file: the_file.write('Cycle,Score,Episode \n') if (args.draw): self.fig, self.ax1 = plt.subplots(1, 1) else: self.thread_event.wait() self.cycle_counter += 1 self.reward_sum += reward if end_episode: self.agent.agent_end(reward) #logファイルへの書き込み with open(self.log_file, 'a') as the_file: the_file.write( str(self.cycle_counter) + ',' + str(lastZ) + ',' + str(self.episode_num) + '\n') print "Last Player's Z is %d" % (lastZ) self.reward_sum = 0 if (args.test and self.episode_num % self.test_num == 0): self.model_num += 10000 self.agent.q_net.load_model(self.model_num, self.velocity) self.episode_num += 1 print "----------------------------------" print "Episode %d Start" % (self.episode_num) print "----------------------------------" action = self.agent.agent_start(observation) # TODO self.send_action(action) else: action, eps, q_now, obs_array = self.agent.agent_step( observation) self.send_action(action) self.agent.agent_step_update(reward, action, eps, q_now) if args.draw: pause_Q_plot(q_now.ravel()) self.thread_event.set() except: import traceback import sys traceback.print_exc() sys.exit() def pause_Image_plot(self, img): plt.cla() plt.imshow(img) plt.pause(1.0 / 10**10) #引数はsleep時間 #Q関数のplot def pause_Q_plot(self, q): self.ax1.cla() actions = range(3) q = q[:3] max_q_abs = max(abs(q)) if max_q_abs != 0: q = q / float(max_q_abs) self.ax1.set_xticks(actions) self.ax1.set_xticklabels(['Left', 'Forward', 'Right'], rotation=0, fontsize='small') self.ax1.set_xlabel("Action") # x軸のラベル self.ax1.set_ylabel("Q_Value") # y軸のラベル self.ax1.set_ylim(-1.1, 1.1) # yを-1.1-1.1の範囲に限定 self.ax1.set_xlim(-1, 4) self.ax1.hlines(y=0, xmin=-1, xmax=4, colors='r', linewidths=2) #y=0の直線 self.ax1.bar(actions, q, align="center") plt.pause(1.0 / 10**10) #引数はsleep時間
class AgentServer(WebSocket): agent = CnnDqnAgent() agent_initialized = False cycle_counter = 1 rgb_image_count = 1 depth_image_count = 0 depth_image_dim = 0 ir_count = 1 ground_count = 0 compass_count = 1 target_count = 1 if args.mode_distribute: thread_event = threading.Event() def send_action(self, action): dat = msgpack.packb({"command": "".join(map(str, action))}) self.send(dat, binary=True) def received_message(self, m): payload = m.data dat = msgpack.unpackb(payload, encoding='utf-8') image = [] depth = [] agent_count = len(dat['image']) for i in range(agent_count): image.append(Image.open(io.BytesIO(bytearray(dat['image'][i])))) if (self.depth_image_count == 1): depth_dim = len(dat['depth'][0]) temp = (Image.open(io.BytesIO(bytearray(dat['depth'][i])))) depth.append( np.array(ImageOps.grayscale(temp)).reshape( self.depth_image_dim)) if (self.ir_count == 1): ir = dat['ir'] ir_dim = len(ir[0]) else: ir = [] ir_dim = 0 if (self.ground_count == 1): ground = dat['ground'] ground_dim = len(ground[0]) else: ground = [] ground_dim = 0 if (self.compass_count == 1): compass = dat['compass'] compass_dim = len(compass[0]) else: compass = [] compass_dim = 0 if (self.target_count == 1): target = dat['target'] target_dim = len(target[0]) else: target = [] target_dim = 0 observation = { "image": image, "depth": depth, "ir": ir, "ground": ground, "compass": compass, "target": target } reward = np.array(dat['reward'], dtype=np.float32) end_episode = np.array(dat['endEpisode'], dtype=np.bool) if not self.agent_initialized: self.agent_initialized = True print("initializing agent...") self.agent.agent_init(use_gpu=args.gpu, agent_count=agent_count, rgb_image_count=self.rgb_image_count, depth_image_dim=self.depth_image_count * self.depth_image_dim, ir_dim=self.ir_count * ir_dim, ground_dim=self.ground_count * ground_dim, compass_dim=self.compass_count * compass_dim, target_dim=self.target_count * target_dim, model=args.model) self.reward_sum = np.zeros((agent_count), dtype=np.float32) dateinfo = datetime.datetime.now() self.logDirPath = args.log_file + dateinfo.strftime( "%Y%m%d%H%M%S") + "/" os.makedirs(self.logDirPath) self.log_file = self.logDirPath + "reward.log" with open(self.log_file, 'w') as the_file: the_file.write('cycle, episode_reward_sum \n') self.agent.q_net.model.to_cpu() self.model_log = self.logDirPath + "model_" + str(self.agent.time - 1) + ".pkl" pickle.dump(self.agent.q_net.model, open(self.model_log, "wb"), -1) self.agent.q_net.model.to_gpu() self.agent.q_net.optimizer.setup(self.agent.q_net.model) action, q_now = self.agent.agent_start(observation, reward) self.send_action(action) self.q_log = self.logDirPath + "q.pkl" pickle.dump(q_now, open(self.q_log, "wb"), -1) else: if args.mode_distribute: self.thread_event.wait() self.cycle_counter += 1 self.reward_sum += reward if end_episode: self.agent.agent_end(reward) with open(self.log_file, 'a') as the_file: the_file.write( str(self.agent.time - 1) + ',' + str(self.reward_sum) + '\n') self.agent.q_net.model.to_cpu() self.model_log = self.logDirPath + "model_" + str( self.agent.time - 1) + ".pkl" pickle.dump(self.agent.q_net.model, open(self.model_log, "wb"), -1) self.agent.q_net.model.to_gpu() self.agent.q_net.optimizer.setup(self.agent.q_net.model) self.reward_sum = np.zeros((agent_count), dtype=np.float32) action = self.agent.agent_start(observation, reward) # TODO self.send_action(action) else: action, eps, q_now = self.agent.agent_step(reward, observation) self.send_action(action) self.agent.agent_step_update(reward, action, eps) pickle.dump(q_now, open(self.q_log, "ab"), -1) if args.mode_distribute: self.thread_event.set()
class AgentServer(WebSocket): agent = CnnDqnAgent() agent_initialized = False cycle_counter = 0 thread_event = threading.Event() log_file = args.log_file reward_sum = 0 depth_image_dim = 32 * 32 depth_image_count = 1 now = datetime.datetime.now() otherStyleTime = now.strftime("%Y-%m-%d %H_%M_%S") cur_dir = 'C:\Users\hosilab\Desktop\ls\ls\python-agent' folder_name = 'RGB' directory = cur_dir + '\\' + folder_name if os.path.isdir(cur_dir) and os.path.exists(directory) is False: os.makedirs(directory) #os.mkdir(os.path.join(cur_dir, folder_name)) #folder_name = folder_name #+ otherStyleTime[-1:-9:-1][::-1] def send_action(self, action): dat = msgpack.packb({"command": str(action)}) self.send(dat, binary=True) def received_message(self, m): payload = m.data dat = msgpack.unpackb(payload) image = [] for i in xrange(self.depth_image_count): image_ = Image.open(io.BytesIO(bytearray(dat['image'][i]))) #image_.save("./RGB/" + "img_" + str(self.cycle_counter) + ".png") image.append(image_) #depth = [] # for i in xrange(self.depth_image_count): # d = (Image.open(io.BytesIO(bytearray(dat['depth'][i])))) # depth.append(np.array(ImageOps.grayscale(d)).reshape(self.depth_image_dim)) observation = {"image": image} #, "depth": depth} #print observation["image"][0], "observation" #observation["image"]= # [<PIL.PngImagePlugin.PngImageFile image mode=RGB size=227x227 at 0x7F60F28>] #[<PIL.PngImagePlugin.PngImageFile image mode=RGB size=227x227 at 0x7E61FD0>] #reward = dat['reward']#np.array(dat['reward'], dtype=np.float32)#dat['reward']#<type 'float'># reward = np.asanyarray( dat['reward'], dtype=np.float32) #----------------------------------- #print(reward.ndim, 'Reward!!length')#error#------------------------------------------------ end_episode = dat['endEpisode'] if not self.agent_initialized: self.agent_initialized = True print("initializing agent...") self.agent.agent_init(use_gpu=args.gpu) #, #depth_image_dim=self.depth_image_dim * self.depth_image_count) action = self.agent.agent_start(observation) #开始的第一个动作是白送的 # # if action != self.agent.actions.index(2): # reward = -0.3 # else: # reward = 0.1 # #print reward, "!!!!!!!!!!!!!" self.send_action(action) with open(self.log_file, 'w') as the_file: the_file.write('cycle, episode_reward_sum \n') else: #如果agent启动观测环境和动作给了,大如果 self.thread_event.wait() self.cycle_counter += 1 self.reward_sum += reward #此处reward要改#--------------------------- if end_episode: #如果在大如果下 结束这一回合,通过开始的state_获得动作和q值,然后下个状态就是开始的状态 self.agent.agent_end(reward) action = self.agent.agent_start( observation) # TODO# return return_action # #action, q_now = self.q_net.e_greedy(state_, self.epsilon)-75 # if action != self.agent.actions.index(2): # reward -= 0.3 self.send_action(action) with open(self.log_file, 'a') as the_file: the_file.write( str(self.cycle_counter) + ',' + str(self.reward_sum) + '\n') self.reward_sum = 0 else: #如果在大如果下没有结束这个回合 action, eps, q_now, obs_array = self.agent.agent_step( reward, observation) if action != self.agent.actions.index(2) and reward != 1.: reward -= 0.1 #先执行这个 self.send_action(action) self.agent.agent_step_update(reward, action, eps, q_now, obs_array) #通过cnn——agentupdata self.q_net.stock_experience和 self.q_net.experience_replay self.thread_event.set()
class Agent: agent_initialized = False ga = GeneGenerator() # add Naka agent_id = -1 # add Naka cycle_counter = 0 thread_event = threading.Event() reward_sum = 0 depth_image_dim = 32 * 32 depth_image_count = 1 gene_count = 3 # Number of gene (add Naka) scale_x = 1 scale_y = 1 scale_z = 1 def __init__(self, args): print "start to load cnn model" self.args = args self.cnnDqnAgent = CnnDqnAgent(use_gpu=self.args.gpu, depth_image_dim=self.depth_image_dim * self.depth_image_count, agent_id=self.agent_id) print 'finish loading cnn model' self.cnnDqnAgent.agent_init() print 'finish init cnn dqn agent' def received_message(self, agentServer, dat): image = [] for i in xrange(self.depth_image_count): image.append(Image.open(io.BytesIO(bytearray(dat['image'][i])))) depth = [] for i in xrange(self.depth_image_count): d = (Image.open(io.BytesIO(bytearray(dat['depth'][i])))) depth.append( np.array(ImageOps.grayscale(d)).reshape(self.depth_image_dim)) observation = { "image": image, "depth": depth, "scale": [dat['x_s'], dat['y_s'], dat['z_s']] } self.scale_x = dat['x_s'] self.scale_y = dat['y_s'] self.scale_z = dat['z_s'] # print 'scale' # print observation['scale'] gene = [] # add Naka for i in xrange(len(dat['gene'])): gene.append(dat['gene'][i]) reward = dat['reward'] rewards = dat['rewards'] # add Naka self.agent_id = dat['agent_id'] # add Naka end_episode = dat['endEpisode'] if not self.agent_initialized: print 'connected and agent started..' self.agent_initialized = True action = self.cnnDqnAgent.agent_start(observation) agentServer.send_action(action) if not os.path.exists(self.args.log_file): with open(self.args.log_file, 'w') as the_file: the_file.write('cycle, episode_reward_sum \n') else: self.thread_event.wait() self.cycle_counter += 1 self.reward_sum += reward if end_episode: self.cnnDqnAgent.agent_end(reward, self.agent_id) action = self.cnnDqnAgent.agent_start(observation) # TODO self.gene = self.ga.gene_updater(gene, rewards) # add Naka print self.agent_id, self.gene agentServer.send_actionAndgene( action, self.gene[self.agent_id]) # add Naka with open(self.args.log_file, 'a') as the_file: the_file.write( str(self.cycle_counter) + ',' + str(self.reward_sum) + ',' + str(self.scale_x) + ',' + str(self.scale_y) + ',' + str(self.scale_z) + '\n') self.reward_sum = 0 else: action, eps, obs_array = self.cnnDqnAgent.agent_step( reward, observation) agentServer.send_action(action) self.cnnDqnAgent.agent_step_update(reward, action, eps, obs_array, self.agent_id) self.thread_event.set()