def __init__(self, env, batch_size): self.batch_size = batch_size self.tau = 1e-2 memory_size = 1000000 self.gamma = 0.99 actor_learning_rate = 1e-4 critic_learning_rate = 1e-3 self.critic_loss_fn = nn.MSELoss() self.actor = DdpgActor(env.observation_space.shape[0], env.action_space.shape[0], env.action_space.high, env.action_space.low) self.actor_target = DdpgActor(env.observation_space.shape[0], env.action_space.shape[0], env.action_space.high, env.action_space.low) self.copy_networks(self.actor, self.actor_target) self.critic = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.critic_target = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.copy_networks(self.critic, self.critic_target) self.memory = Memory(memory_size) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_learning_rate) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_learning_rate)
def __init__(self): self.capture_view = CaptureView.instance() self.memory = Memory() self.cursor = Cursor() self.setup_pages() self.setup_name_table() self.monitor = 'a' self.channel = 1 self.set_page('input_monitor.' + self.monitor)
def __load_memory(self, data): self.mem = Memory() try: self.mem.code = data["mem_code"] except: # Not available in previous versions, this try will be # removed in the future pass
def __init__(self, filename, raw_type, raw_base, raw_big_endian, database): import capstone as CAPSTONE self.capstone_inst = {} # capstone instruction cache if database.loaded: self.mem = database.mem else: self.mem = Memory() database.mem = self.mem self.binary = Binary(self.mem, filename, raw_type, raw_base, raw_big_endian) self.binary.load_section_names() arch, mode = self.binary.get_arch() if arch is None or mode is None: raise ExcArch(self.binary.get_arch_string()) self.jmptables = database.jmptables self.user_inline_comments = database.user_inline_comments self.internal_inline_comments = database.internal_inline_comments self.user_previous_comments = database.user_previous_comments self.internal_previous_comments = database.internal_previous_comments self.functions = database.functions self.func_id = database.func_id self.end_functions = database.end_functions self.xrefs = database.xrefs # TODO: is it a global constant or $gp can change during the execution ? self.mips_gp = database.mips_gp if database.loaded: self.binary.symbols = database.symbols self.binary.reverse_symbols = database.reverse_symbols self.binary.imports = database.imports else: self.binary.load_symbols() database.symbols = self.binary.symbols database.reverse_symbols = self.binary.reverse_symbols database.imports = self.binary.imports self.capstone = CAPSTONE self.md = CAPSTONE.Cs(arch, mode) self.md.detail = True self.arch = arch self.mode = mode for s in self.binary.iter_sections(): s.big_endian = self.mode & self.capstone.CS_MODE_BIG_ENDIAN # TODO: useful ? if not database.loaded: self.mem.add(s.start, s.end, MEM_UNK)
def __load_memory(self, data): self.mem = Memory() try: if self.version == -1: self.mem.mm = data["mem_code"] for ad in self.mem.mm: self.mem.mm[ad].append(-1) return self.mem.mm = data["mem"] except: # Not available in previous versions, this try will be # removed in the future pass
def __init__(self, env, batch_size): self.batch_size = batch_size self.tau = 1e-2 memory_size = 1000000 self.gamma = 0.99 self.q_lr = 3e-4 self.actor_lr = 3e-4 self.alpha_lr = 3e-3 self.update_step = 0 self.delay_step = 2 self.action_range = [env.action_space.low, env.action_space.high] self.memory = Memory(memory_size) # entropy temperature self.alpha = 0.2 self.target_entropy = -torch.prod(torch.Tensor( env.action_space.shape)).item() self.log_alpha = torch.zeros(1, requires_grad=True) self.alpha_optim = optim.Adam([self.log_alpha], lr=self.alpha_lr) self.actor = SacActor(env.observation_space.shape[0], env.action_space.shape[0]) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.actor_lr) self.q_net_1 = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.q_net_1_target = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.copy_networks(self.q_net_1, self.q_net_1_target) self.q_net_1_optimizer = optim.Adam(self.q_net_1.parameters(), lr=self.q_lr) self.q_net_2 = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.q_net_2_target = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.copy_networks(self.q_net_2, self.q_net_2_target) self.q_net_2_optimizer = optim.Adam(self.q_net_2.parameters(), lr=self.q_lr)
import unittest from math import inf from lib.memory import Memory from lib.types import Volume mem = Memory() class TestMemory(unittest.TestCase): @classmethod def setUpClass(cls): cls.mem = mem cls.mem.capture_view.add_name_to_table( 'input_monitor.a.channel.1.volume') def test_memory_get_empty(self): self.assertEqual(None, self.mem.get(0)) def test_memory_set_get_erase(self): self.mem.set(0, 1) self.assertEqual(mem.get(0), 1) self.mem.set(1, 2) self.assertEqual(mem.get(0), 1) self.assertEqual(mem.get(1), 2) self.mem.set(0, 3) self.assertEqual(mem.get(0), 3) self.assertEqual(mem.get(1), 2) self.mem.erase(0) self.mem.erase(1) self.assertIsNone(self.mem.get(0))
def setUp(self): self.memory = Memory(1)
def main(): memory_bank = Memory(MEMORY_SIZE) pong_game = Game(GAME_LENGTH, GAME_STEP_TIME) champion = Network(3, 7, hidden_layer_size=HIDDEN_LAYER_SIZE, no_hidden_layers=NO_HIDDEN_LAYERS, learning_rate=LEARNING_RATE) competitor = Network(3, 7, hidden_layer_size=HIDDEN_LAYER_SIZE, no_hidden_layers=NO_HIDDEN_LAYERS) trainer = Trainer(pong_game, memory_bank, champion, competitor, MAX_EPSILON, MIN_EPSILON, EPSILON_DECAY, GAMMA, RETURNS_DECAY, WINNERS_GROWTH, batch_size=BATCH_SIZE) champion.save_network(DIRECTORY + '/version_' + str(STARTING_VERSION)) for version in range(STARTING_VERSION, STARTING_VERSION + NUMBER_OF_TRAINING_SESSIONS): start_time = time.time() for _ in range(GAMES_PER_TRAINING_SESSION): print('New game') trainer.run_game() trainer.game = Game(GAME_LENGTH, GAME_STEP_TIME) print("Time taken for training session: ", time.time() - start_time) champion.save_network(DIRECTORY + '/version_' + str(version + 1)) current_epsilon = trainer.epsilon current_returns_parameter = trainer.returns_parameter current_winners_parameter = trainer.winners_parameter trainer = Trainer(Game(GAME_LENGTH, GAME_STEP_TIME), memory_bank, champion, competitor, current_epsilon, MIN_EPSILON, EPSILON_DECAY, GAMMA, RETURNS_DECAY, WINNERS_GROWTH, returns_parameter=current_returns_parameter, winners_parameter=current_winners_parameter, batch_size=BATCH_SIZE) test_score = trainer.test_game() if test_score < 0: print('Competitor wins, score was ' + str(test_score)) competitor.save_network(DIRECTORY + '/competitor_save') champion.load_network(DIRECTORY + '/competitor_save') else: print('Champion continues, score was ' + str(test_score)) new_competitor_version = random.randint(max(0, version - 5), version) print('New competitor version: ' + str(new_competitor_version)) competitor.load_network(DIRECTORY + '/version_' + str(new_competitor_version)) current_epsilon = trainer.epsilon print('epsilon is ' + str(current_epsilon)) current_returns_parameter = trainer.returns_parameter current_winners_parameter = trainer.winners_parameter trainer = Trainer(Game(GAME_LENGTH, GAME_STEP_TIME), memory_bank, champion, competitor, current_epsilon, MIN_EPSILON, EPSILON_DECAY, GAMMA, RETURNS_DECAY, WINNERS_GROWTH, returns_parameter=current_returns_parameter, winners_parameter=current_winners_parameter, batch_size=BATCH_SIZE)