class Train: def __init__(self): self.batch_size = P['batch_size'] self.train_one_eps = P['train_one_eps'] self.train_all_eps = P['train_all_eps'] self.memory_updata_size = P['memory_updata_size'] tf.reset_default_graph() self.network = JointNetWork_V2(24, 4) self.sess = tf.Session() self.writer = tf.summary.FileWriter(P['TF_Log'], self.sess.graph) self.network.restore(self.sess) self.sess.graph.finalize() self.memory = Memory() #self.env = gym.make('BipedalWalker-v2') self.env = BipedalWalkerHardcore() self.UO_noise = UONoise() def online_train(self): for i in range(self.train_one_eps): x = self.memory.get(self.batch_size) #print('record.shape',x.shape) online_state = x[:, :24] action = x[:, 24:28] R_input = x[:, 28:29] target_state = x[:, 29:53] #print(R_input.shape) self.sess.run(self.network.policyUpdate, feed_dict={ self.network.online_state_input: online_state, self.network.action: action, self.network.con_training_q: False, self.network.bn_training: True }) merged, _, step = self.sess.run( [ self.network.merged, self.network.Qupdate, self.network.global_step ], feed_dict={ self.network.online_state_input: online_state, self.network.target_state_input: target_state, self.network.R_input: R_input, self.network.action: action, self.network.con_training_q: True, self.network.bn_training: True }) self.sess.run(self.network.target_update) if step % 100 == 0: self.writer.add_summary(merged, global_step=step) def __action(self, state): action = self.sess.run(self.network.online_action, feed_dict={ self.network.online_state_input: state, self.network.con_training_q: False, self.network.action: np.zeros([self.batch_size, 4]), self.network.bn_training: False }) if P['mode'] == 0: print( u'\r online_action: {} '.format(action), end='') return self.UO_noise(action[0]) return action[0] def memory_updata(self): state = self.env.reset() cont = 0 total_reward = 0 nums = 0 while cont < self.memory_updata_size: state = np.expand_dims(np.array(state, dtype=np.float32), 0) action = self.__action(state) assert action.shape == (4, ) next_state, R, done, inf = self.env.step(action) total_reward += R self.memory.push(state[0], action, R, next_state) state = next_state cont += 1 if done: #self.env.seed(3130) state = self.env.reset() nums += 1 if nums == 0: nums = 1 return total_reward / nums def check(self, show=True): state = self.env.reset() total_reward = 0 while True: state = np.expand_dims(np.array(state, dtype=np.float32), 0) action = self.__action(state) assert action.shape == (4, ) next_state, R, done, inf = self.env.step(action) total_reward += R if show: self.env.render() state = next_state if done: if show: self.env.close() break return total_reward def train(self): up_cont = 10 rewards = np.zeros([up_cont]) cont = 0 for i in range(self.train_all_eps): s1 = time.time() var_reward = self.memory_updata() rewards[cont % 10] = var_reward s2 = time.time() self.online_train() s3 = time.time() print( 'index {}: memory update time: {:.2f} train time:{:.2f} reword:{:.2f}' .format(i, s2 - s1, s3 - s2, np.mean(rewards))) if (i + 1) % 100 == 0: self.network.save(self.sess) self.memory.save() cont += 1 def test(self, times=100): MinR = 10000 Minseed = 0 for i in range(times): seed = np.random.randint(1, high=10000) #seed = 3130 self.env.seed(seed=seed) print(seed) reward = self.check(show=True) print('reward:{} seed:{} '.format(reward, seed)) if reward < MinR: MinR = reward Minseed = seed print('minreward:{} minseed:{}'.format(MinR, Minseed))
class R86: def __init__(self): self.segment_register = SegmentRegister() self.integer_register = IntegerRegister() self.special_register = SpecialRegister() self.memory = Memory() self.code_segment = [] self.label_table = {} self.unary_operation_dict = {} self.unary_operation_dict["incl"] = lambda x: x + 1 self.unary_operation_dict["decl"] = lambda x: x - 1 self.unary_operation_dict["negl"] = lambda x: -x self.unary_operation_dict["notl"] = lambda x: ~x self.unary_operation_dict["shrl"] = lambda x: x >> 1 self.unary_operation_dict["shll"] = lambda x: x >> 1 self.binary_operation_dict = {} self.binary_operation_dict["addl"] = lambda x, y: x + y self.binary_operation_dict["subl"] = lambda x, y: x - y self.binary_operation_dict["imull"] = lambda x, y: x * y self.binary_operation_dict["orl"] = lambda x, y: x | y self.binary_operation_dict["andl"] = lambda x, y: x & y self.binary_operation_dict["xorl"] = lambda x, y: x ^ y self.binary_operation_dict["shrl"] = lambda x, y: x >> y self.binary_operation_dict["shll"] = lambda x, y: x << y self.register_table = self.segment_register.register_table.copy() self.register_table.update(self.integer_register.register_table) self.register_table.update(self.special_register.register_table) def unary_operate(self, ins, dest): result = self.unary_operation_dict[ins](self.get(dest)) self.set(result, dest) self.set_condition_code(result) def binary_operate(self, ins, source, dest): result = self.binary_operation_dict[ins](self.get(dest), source) self.set(result, dest) self.set_condition_code(result) def compare(self, ins, second_source, first_source): self.set_condition_code(first_source - second_source) def test(self, ins, second_source, first_source): self.set_condition_code(first_source & second_source) def jump_to_table(self, label, offset): self.set_reg(self.label_table[label]+offset, "eip") target_label_line = self.code_segment[self.get_reg("eip")+1] target_label = target_label_line[len(".long"):].strip() label_pos = self.label_table[target_label] self.set_reg(label_pos, "eip") #print("label_pos : {}".format(label_pos)) def conditional_jump(self, ins, label): should_jump = { "jmp": True, "je" : self.get_reg("ZF"), "jne": not self.get_reg("ZF"), "jl" : self.get_reg("SF"), "jle": self.get_reg("SF") or self.get_reg("ZF"), "jg" : not (self.get_reg("SF") or self.get_reg("ZF")), "jge": not self.get_reg("SF"), "js" : self.get_reg("SF"), "jns": not self.get_reg("SF") }[ins] if should_jump: self.set_reg(self.label_table[label], "eip") def set_condition_code(self, result): if result == 0: self.set_reg(1, "ZF") self.set_reg(0, "SF") elif result < 0: self.set_reg(0, "ZF") self.set_reg(1, "SF") else: #result > 0 self.set_reg(0, "ZF") self.set_reg(0, "SF") def set(self, source_value, dest): if dest in self.integer_register.name_list: self.set_reg(source_value, dest) else: self.set_memory(source_value, dest) def get(self, dest): if dest in self.integer_register.name_list: return self.get_reg(dest) else: return self.get_memory(dest) def set_reg(self, _value, reg): try: self.register_table[reg].set_value(_value) except LookupError: print("***\nRegister not found: [" + reg + "]\n***") exit() def get_reg(self, reg): try: return self.register_table[reg].get_value() except LookupError: print("***\nRegister not found: [" + reg + "]\n***") exit() def init_memory(self, _min, _max): self.memory.init(_min, _max) def set_memory(self, _value, _address): self.memory.set(_value, _address) def get_memory(self, _address): return self.memory.get(_address) def print_register(self): self.integer_register.print_self() self.special_register.print_self() self.segment_register.print_self() def print_memory(self): self.memory.print_self() def print_self(self): self.print_register() print("LABEL TABLE") print(self.label_table) print() self.print_memory()