Exemplo n.º 1
0
class Train:
    def __init__(self):
        self.batch_size = P['batch_size']
        self.train_one_eps = P['train_one_eps']
        self.train_all_eps = P['train_all_eps']
        self.memory_updata_size = P['memory_updata_size']
        tf.reset_default_graph()
        self.network = JointNetWork_V2(24, 4)
        self.sess = tf.Session()
        self.writer = tf.summary.FileWriter(P['TF_Log'], self.sess.graph)
        self.network.restore(self.sess)
        self.sess.graph.finalize()
        self.memory = Memory()
        #self.env = gym.make('BipedalWalker-v2')
        self.env = BipedalWalkerHardcore()
        self.UO_noise = UONoise()

    def online_train(self):
        for i in range(self.train_one_eps):
            x = self.memory.get(self.batch_size)
            #print('record.shape',x.shape)
            online_state = x[:, :24]
            action = x[:, 24:28]
            R_input = x[:, 28:29]
            target_state = x[:, 29:53]
            #print(R_input.shape)
            self.sess.run(self.network.policyUpdate,
                          feed_dict={
                              self.network.online_state_input: online_state,
                              self.network.action: action,
                              self.network.con_training_q: False,
                              self.network.bn_training: True
                          })
            merged, _, step = self.sess.run(
                [
                    self.network.merged, self.network.Qupdate,
                    self.network.global_step
                ],
                feed_dict={
                    self.network.online_state_input: online_state,
                    self.network.target_state_input: target_state,
                    self.network.R_input: R_input,
                    self.network.action: action,
                    self.network.con_training_q: True,
                    self.network.bn_training: True
                })
            self.sess.run(self.network.target_update)

            if step % 100 == 0:
                self.writer.add_summary(merged, global_step=step)

    def __action(self, state):
        action = self.sess.run(self.network.online_action,
                               feed_dict={
                                   self.network.online_state_input:
                                   state,
                                   self.network.con_training_q:
                                   False,
                                   self.network.action:
                                   np.zeros([self.batch_size, 4]),
                                   self.network.bn_training:
                                   False
                               })
        if P['mode'] == 0:
            print(
                u'\r online_action: {}                        '.format(action),
                end='')
            return self.UO_noise(action[0])
        return action[0]

    def memory_updata(self):
        state = self.env.reset()
        cont = 0
        total_reward = 0
        nums = 0
        while cont < self.memory_updata_size:
            state = np.expand_dims(np.array(state, dtype=np.float32), 0)
            action = self.__action(state)
            assert action.shape == (4, )

            next_state, R, done, inf = self.env.step(action)
            total_reward += R
            self.memory.push(state[0], action, R, next_state)
            state = next_state
            cont += 1
            if done:
                #self.env.seed(3130)
                state = self.env.reset()
                nums += 1
        if nums == 0: nums = 1
        return total_reward / nums

    def check(self, show=True):
        state = self.env.reset()
        total_reward = 0
        while True:
            state = np.expand_dims(np.array(state, dtype=np.float32), 0)
            action = self.__action(state)
            assert action.shape == (4, )

            next_state, R, done, inf = self.env.step(action)
            total_reward += R
            if show:
                self.env.render()
            state = next_state
            if done:
                if show:
                    self.env.close()
                break
        return total_reward

    def train(self):
        up_cont = 10
        rewards = np.zeros([up_cont])
        cont = 0
        for i in range(self.train_all_eps):
            s1 = time.time()
            var_reward = self.memory_updata()
            rewards[cont % 10] = var_reward
            s2 = time.time()
            self.online_train()
            s3 = time.time()
            print(
                'index {}: memory update time: {:.2f} train time:{:.2f} reword:{:.2f}'
                .format(i, s2 - s1, s3 - s2, np.mean(rewards)))
            if (i + 1) % 100 == 0:
                self.network.save(self.sess)
                self.memory.save()
            cont += 1

    def test(self, times=100):
        MinR = 10000
        Minseed = 0
        for i in range(times):
            seed = np.random.randint(1, high=10000)
            #seed = 3130
            self.env.seed(seed=seed)
            print(seed)
            reward = self.check(show=True)
            print('reward:{} seed:{} '.format(reward, seed))
            if reward < MinR:
                MinR = reward
                Minseed = seed
        print('minreward:{} minseed:{}'.format(MinR, Minseed))
Exemplo n.º 2
0
Arquivo: R86.py Projeto: RahnX/R86
class R86:
	def __init__(self):
		self.segment_register = SegmentRegister()
		self.integer_register = IntegerRegister()
		self.special_register = SpecialRegister()
		self.memory = Memory()

		self.code_segment = []
		self.label_table  = {}

		self.unary_operation_dict = {}
		self.unary_operation_dict["incl"] = lambda x: x + 1
		self.unary_operation_dict["decl"] = lambda x: x - 1
		self.unary_operation_dict["negl"] = lambda x: -x
		self.unary_operation_dict["notl"] = lambda x: ~x
		self.unary_operation_dict["shrl"] = lambda x: x >> 1
		self.unary_operation_dict["shll"] = lambda x: x >> 1

		self.binary_operation_dict = {}
		self.binary_operation_dict["addl"]  = lambda x, y: x + y
		self.binary_operation_dict["subl"]  = lambda x, y: x - y
		self.binary_operation_dict["imull"] = lambda x, y: x * y
		self.binary_operation_dict["orl"]  = lambda x, y: x | y
		self.binary_operation_dict["andl"] = lambda x, y: x & y
		self.binary_operation_dict["xorl"] = lambda x, y: x ^ y

		self.binary_operation_dict["shrl"] = lambda x, y: x >> y
		self.binary_operation_dict["shll"] = lambda x, y: x << y

		self.register_table = self.segment_register.register_table.copy()
		self.register_table.update(self.integer_register.register_table)
		self.register_table.update(self.special_register.register_table)

	def unary_operate(self, ins, dest):
		result = self.unary_operation_dict[ins](self.get(dest))
		self.set(result, dest)
		self.set_condition_code(result)

	def binary_operate(self, ins, source, dest):
		result = self.binary_operation_dict[ins](self.get(dest), source)
		self.set(result, dest)
		self.set_condition_code(result)

	def compare(self, ins, second_source, first_source):
		self.set_condition_code(first_source - second_source)

	def test(self, ins, second_source, first_source):
		self.set_condition_code(first_source & second_source)

	def jump_to_table(self, label, offset):
		self.set_reg(self.label_table[label]+offset, "eip")
		target_label_line = self.code_segment[self.get_reg("eip")+1]
		target_label = target_label_line[len(".long"):].strip()
		label_pos = self.label_table[target_label]
		self.set_reg(label_pos, "eip")

		#print("label_pos : {}".format(label_pos))

	def conditional_jump(self, ins, label):
		should_jump = {
			"jmp": True,
			"je" : self.get_reg("ZF"),
			"jne": not self.get_reg("ZF"),
			"jl" : self.get_reg("SF"),
			"jle": self.get_reg("SF") or self.get_reg("ZF"),
			"jg" : not (self.get_reg("SF") or self.get_reg("ZF")),
			"jge": not self.get_reg("SF"),
			"js" : self.get_reg("SF"),
			"jns": not self.get_reg("SF")
		}[ins]

		if should_jump:
			self.set_reg(self.label_table[label], "eip")

	def set_condition_code(self, result):
		if result == 0:
			self.set_reg(1, "ZF")
			self.set_reg(0, "SF")
		elif result < 0:
			self.set_reg(0, "ZF")
			self.set_reg(1, "SF")
		else: #result > 0
			self.set_reg(0, "ZF")
			self.set_reg(0, "SF")

	def set(self, source_value, dest):
		if dest in self.integer_register.name_list:
			self.set_reg(source_value, dest)
		else:
			self.set_memory(source_value, dest)

	def get(self, dest):
		if dest in self.integer_register.name_list:
			return self.get_reg(dest)
		else:
			return self.get_memory(dest)

	def set_reg(self, _value, reg):
		try:
			self.register_table[reg].set_value(_value)
		except LookupError:
			print("***\nRegister not found: [" + reg + "]\n***")
			exit()

	def get_reg(self, reg):
		try:
			return self.register_table[reg].get_value()
		except LookupError:
			print("***\nRegister not found: [" + reg + "]\n***")
			exit()

	def init_memory(self, _min, _max):
		self.memory.init(_min, _max)

	def set_memory(self, _value, _address):
		self.memory.set(_value, _address)

	def get_memory(self, _address):
		return self.memory.get(_address)

	def print_register(self):
		self.integer_register.print_self()
		self.special_register.print_self()
		self.segment_register.print_self()

	def print_memory(self):
		self.memory.print_self()

	def print_self(self):
		self.print_register()
		print("LABEL TABLE")
		print(self.label_table)
		print()
		self.print_memory()