Exemplos de Memory.Memory em Python, exemplos de memory.Memory.Memory em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: ntm.py Projeto: aregic/Neural-Turing-Machine

    def __init__(self,
                 controller_class,
                 input_size,
                 output_size,
                 memory_locations=256,
                 memory_word_size=64,
                 memory_read_heads=4,
                 shift_range=1,
                 batch_size=1):
        """
        constructs a complete DNC architecture as described in the "Neural Turing Machines" paper
        https://arxiv.org/abs/1410.5401

        Parameters:
        -----------
        controller_class: BaseController
            a concrete implementation of the BaseController class
        input_size: int
            the size of the input vector
        output_size: int
            the size of the output vector
        memory_locations: int
            the number of words that can be stored in memory
        memory_word_size: int
            the size of an individual word in memory
        memory_read_heads: int
            the number of read heads in the memory
        shift_range: int
            allowed integer shifts
        batch_size: int
            the size of the data batch
        """
        self.input_size = input_size
        self.output_size = output_size
        self.memory_locations = memory_locations
        self.word_size = memory_word_size
        self.read_heads = memory_read_heads
        self.batch_size = batch_size
        self.shift_range = shift_range

        self.memory = Memory(self.memory_locations, self.word_size,
                             self.read_heads, self.batch_size)
        self.controller = controller_class(self.input_size, self.output_size,
                                           self.read_heads, self.word_size,
                                           self.shift_range, self.batch_size)

        # input data placeholders

        self.input_data = tf.compat.v1.placeholder(
            tf.float32, [batch_size, None, input_size], name='input')
        self.target_output = tf.compat.v1.placeholder(
            tf.float32, [batch_size, None, output_size], name='targets')
        self.sequence_length = tf.compat.v1.placeholder(tf.int32,
                                                        name='sequence_length')

        self.build_graph()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: dnc.py Projeto: egoetz/DNC-tensorflow

    def __init__(self,
                 controller_class,
                 input_size,
                 output_size,
                 max_sequence_length,
                 memory_words_num=256,
                 memory_word_size=64,
                 memory_read_heads=4,
                 batch_size=1):
        """
        constructs a complete DNC architecture as described in the DNC paper
        http://www.nature.com/nature/journal/vaop/ncurrent/full/nature20101.html

        Parameters:
        -----------
        controller_class: BaseController
            a concrete implementation of the BaseController class
        input_size: int
            the size of the input vector
        output_size: int
            the size of the output vector
        max_sequence_length: int
            the maximum length of an input sequence
        memory_words_num: int
            the number of words that can be stored in memory
        memory_word_size: int
            the size of an individual word in memory
        memory_read_heads: int
            the number of read heads in the memory
        batch_size: int
            the size of the data batch
        """

        self.input_size = input_size
        self.output_size = output_size
        self.max_sequence_length = max_sequence_length
        self.words_num = memory_words_num
        self.word_size = memory_word_size
        self.read_heads = memory_read_heads
        self.batch_size = batch_size

        self.memory = Memory(self.words_num, self.word_size, self.read_heads,
                             self.batch_size)
        self.controller = controller_class(self.input_size, self.output_size,
                                           self.read_heads, self.word_size,
                                           self.batch_size)

        # input data placeholders
        self.input_data = tf.compat.v1.placeholder(
            tf.float32, [batch_size, None, input_size], name='input')
        self.target_output = tf.compat.v1.placeholder(
            tf.float32, [batch_size, None, output_size], name='targets')
        self.sequence_length = tf.compat.v1.placeholder(tf.int32,
                                                        name='sequence_length')

        self.build_graph()

Exemplo n.º 3

0

Exibir arquivo

    def __init__(self, params):
        #params.actions = env.actions()
        #self.num_actions = env.actions()
        self.episodes = params.episodes
        self.steps = params.steps
        self.train_steps = params.train_steps
        self.update_freq = params.update_freq
        self.save_weights = params.save_weights
        self.history_length = params.history_length
        self.discount = params.discount
        self.eps = params.init_eps
        self.eps_delta = (params.init_eps - params.final_eps) / params.final_eps_frame
        self.replay_start_size = params.replay_start_size
        self.eps_endt = params.final_eps_frame
        self.random_starts = params.random_starts
        self.batch_size = params.batch_size
        self.ckpt_file = params.ckpt_dir+'/'+params.game

        self.global_step = tf.Variable(0, trainable=False)
        if params.lr_anneal:
            self.lr = tf.train.exponential_decay(params.lr, self.global_step, params.lr_anneal, 0.96, staircase=True)
        else:
            self.lr = params.lr

        self.buffer = Buffer(params)
        self.memory = Memory(params.size, self.batch_size)

        with tf.variable_scope("train") as self.train_scope:
            self.train_net = ConvNet(params, trainable=True)
        with tf.variable_scope("target") as self.target_scope:
            self.target_net = ConvNet(params, trainable=False)

        self.optimizer = tf.train.RMSPropOptimizer(self.lr, params.decay_rate, 0.0, self.eps)

        self.actions = tf.placeholder(tf.float32, [None, self.num_actions])
        self.q_target = tf.placeholder(tf.float32, [None])
        self.q_train = tf.reduce_max(tf.multiply(self.train_net.y, self.actions), reduction_indices=1)
        self.diff = tf.subtract(self.q_target, self.q_train)

        half = tf.constant(0.5)
        if params.clip_delta > 0:
            abs_diff = tf.abs(self.diff)
            clipped_diff = tf.clip_by_value(abs_diff, 0, 1)
            linear_part = abs_diff - clipped_diff
            quadratic_part = tf.square(clipped_diff)
            self.diff_square = tf.multiply(half, tf.add(quadratic_part, linear_part))
        else:
            self.diff_square = tf.multiply(half, tf.square(self.diff))

        if params.accumulator == 'sum':
            self.loss = tf.reduce_sum(self.diff_square)
        else:
            self.loss = tf.reduce_mean(self.diff_square)

        # backprop with RMS loss
        self.task = self.optimizer.minimize(self.loss, global_step=self.global_step)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: machine.py Projeto: rumeshmadhusanka/Core-Wars

 def __init__(self, program1, program2):
     self.memory = Memory(4096)
     self.memory.load(program1, 0)
     self.memory.load(program2, 2048)
     if len(program1) * 32 > 2048 or len(program2) * 32 > 2048:
         raise ValueError
     self.process_queue = [[Process()], [Process()]]
     self.player1 = self.process_queue[0]  # process queue of program1
     self.player2 = self.process_queue[1]  # process queue of program2
     self.state = None

Exemplo n.º 5

0

Exibir arquivo

 def __init__(self):
     self.quads = []
     self.data = 0
     self.fd = FunctionDirectory()
     self.memory = Memory()
     self.vt = VarTable()
     self.start_time = 0
     self.end_time = 0
     self.adidtg = AddressIdTable()
     self.faux = FuncionAux()

Exemplo n.º 6

0

Exibir arquivo

    def test_get(self):

        target = 'b'
        x, y = (1, 1)

        memory = Memory(['abc', 'd' + target + 'd'])

        result = memory.get(x, y)

        self.assertEqual(result, target, 'get result doesnt match target')

Exemplo n.º 7

0

Exibir arquivo

Arquivo: agent.py Projeto: ygorshenin/reinforcement-learning

    def __init__(self, sess, eps_schedule, lr_schedule):
        self.dqn_online = Agent._make_dqn('online')
        self.dqn_target = Agent._make_dqn('target')

        self.sess = sess
        self.eps_schedule = eps_schedule
        self.lr_schedule = lr_schedule
        self.memory = Memory(MEMORY_SIZE)

        self.step = 0

Exemplo n.º 8

0

Exibir arquivo

 def test1(self):
     a = Memory(ph_cells=4)
     a.addprocess(2)
     a.addprocess(4)
     for i in range(0, 10):
         process, page = a.choosepagevm()
         self.assertGreaterEqual(page, 0)
         self.assertGreaterEqual(
             len(a.vm.processes[process].allocation) - 1, page)
         self.assertIn(process, [0, 1])

Exemplo n.º 9

0

Exibir arquivo

Arquivo: main.py Projeto: curtisxk38/gb-emulator

    def __init__(self, config):
        self.MS_PER_UPDATE = 1000.0 / 60
        self.config = config
        self.screen = pygame.display.get_surface()

        self.game_done = False
        self.CLOCK = pygame.time.Clock()

        self.memory = Memory()
        self.cpu = CPU(self.memory, self.config.debug)

Exemplo n.º 10

0

Exibir arquivo

def era_operation(current_quad):
    current_function_name.push(current_quad.getResult())
    current_era = ActivationRecord(globalScope.functionDirectory.local_memory)
    local_memory_handler.push(current_era)

    # globalScope.functionDirectory.local_memory.clear_Memory()

    new_memory = Memory("Local/Temporal", 2000, 3999)
    new_era = ActivationRecord(new_memory)
    local_memory_handler.push(new_era)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: manipulator.py Projeto: PapaKarlo787/keks

 def __init__(self, fn):
     self.main = True
     self.mode = Mode.edit
     self.mem = Memory(fn)
     self.cursor = (1, 0)
     self.win_y_loc = 0
     self.lr = 16
     self.height = 4
     self.selected_area = None
     self.buffer = b''

Exemplo n.º 12

0

Exibir arquivo

    def test_play_matches_fake_neural_network(self):

        memory = Memory(config.MEMORY_SIZE)

        player1 = Agent('random_agent_1', config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, GenRandomModel())
        player2 = Agent('random_agent_2', config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, GenRandomModel())
        
        logger = lg.logger_main
        logger.setLevel(logging.DEBUG)
        scores, memory, points, sp_scores = play_matches.playMatches(player1, player2, config.EPISODES, logger, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: test_memory.py Projeto: yashadhikari/ELISA

def test_cache_write_byte():
    mem = Memory(lines=2**8, delay=0)
    cache = Cache(lines=2**4, words_per_line=1, delay=0, next_level=mem)

    cache.write(0x0, 0x0, only_byte=True)
    cache.write(0x1, 0x1, only_byte=True)
    cache.write(0x2, 0x2, only_byte=True)
    cache.write(0x3, 0x3, only_byte=True)
    assert (cache.data[0][1] == 0x03020100)
    assert (mem.data[0] == 0x03020100)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: rl_agent.py Projeto: gandroz/rl-taxi

 def load_checkpoint(self, filename):
     with open(filename, "r") as json_file:
         data = json.load(json_file)
     self.last_step = data['last_step']
     self.current_episode = data['current_episode']
     self.model_name = data['model_name']
     if self.memory is None:
         self.config = Config(self.config_file)
         self.memory = Memory(max_len=self.config.max_queue_length)
     self.memory.load(data['memory'])

Exemplo n.º 15

0

Exibir arquivo

    def exit_process(self, process):
        "Exit the current process, for any reason"
        process_memory = Memory(lower_bound=process.pcb.lower_bound,
                                upper_bound=process.pcb.upper_bound)

        self.recycle_memory_algo(self.empty_memory, process_memory)

        self.used_memory.remove(process_memory)

        return

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_memory.py Projeto: yashadhikari/ELISA

def test_cache_read_byte():
    mem = Memory(lines=2**8, delay=1)
    mem.data[0] = 1
    # mem has data: 0x0: 1, 0x1: 0, 0x2: 0, 0x3: 0
    cache = Cache(lines=2**4, words_per_line=4, delay=0, next_level=mem)
    assert (cache.read(0, only_byte=True) == 'wait')
    assert (cache.read(0, only_byte=True)[0] == 1)
    assert (cache.read(1, only_byte=True)[0] == 0)
    assert (cache.read(2, only_byte=True)[0] == 0)
    assert (cache.read(3, only_byte=True)[0] == 0)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: agent.py Projeto: eiganken/Models

    def __init__(self, state_size, action_size, replay_size=32):
        """
		state_size : int
			状態空間の次元数
		action_size : int
			行動空間の次元数
		"""
        self.brain = Brain(state_size, action_size)
        self.memory = Memory()
        self.replay_size = replay_size

Exemplo n.º 18

0

Exibir arquivo

    def reset(self):
        """Función que resetea los valores viejos de ejecuciones 
		pasadas que fueron guardados en recursos compartidos
		"""
        global_vars.init()
        self.virtual_machine.mem = Memory()
        st.SymbolsTable.function_dictionary = {}
        self.canvas.delete("all")
        self.console.delete(1.0, tk.END)
        q.Quadruple.quadruple_list = []

Exemplo n.º 19

0

Exibir arquivo

 def __init__(self, input_shape, action_count, steps=0, model_path=None):
     self.steps = steps
     self.epsilon = MAX_EPSILON if steps == 0 else self.__calc_epsilon(
         steps)
     self.brain = Brain(action_count,
                        input_shape=input_shape,
                        model_path=model_path)
     self.memory = Memory(MEMORY_CAPACITY)
     self.input_shape = input_shape
     self.action_count = action_count

Exemplo n.º 20

0

Exibir arquivo

    def __init__(self, library, program_name):
        self.computer_name = 'HAL'

        self.cpu = CPU()
        self.io = IO()
        self.memory = Memory()
        self.program = Program(library[program_name])

        self.halt_condition = False
        self.ip = 0

Exemplo n.º 21

0

Exibir arquivo

Arquivo: cpu.py Projeto: haddadtheorc/so-elc1080

 def initialize(self, base_instructions):
     self.memory = Memory(20)
     self.error = Error()
     self.internal_state = Internal_State()
     self.io = IO()
     self.base_instructions = base_instructions
     self.max_mem_pages = 2
     self.remaining_mem_pages = 2
     self.new_memory = [0] * self.max_mem_pages
     print(self.new_memory)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: test_util.py Projeto: kkaszyk/tcs

 def setUp(self):
     self.sys = MemSys("Test Platform", 64)
     self.memory = Memory(self.sys, 4, 4, 4, 2, 64, 1866, True, None)
     self.l2c = Cache(self.sys, 1037000, 0, 1, 16, 8, 534288, 64, 3, True,
                      self.memory.get_component_id())
     self.l1c0 = Cache(self.sys, 1037000, 0, 0, 16, 8, 16384, 64, 1, True,
                       self.l2c.get_component_id())
     self.cu0 = ComputeUnit(self.sys, 1037000, 0, True,
                            self.l1c0.get_component_id())
     self.sys.build_map()

Exemplo n.º 23

0

Exibir arquivo

def main():
    price_db = leveldb.LevelDB(config.PRICE_DB)
    trade_db = leveldb.LevelDB(config.TRADE_DB)

    price_memory = Memory(
        price_db, trade_db
    )  # price data of latest period, multiple frequency and period length maybe
    pool = Pool(100000, 0)
    my_hand = Hand(pool)
    my_brain = brain.Brain(price_memory, my_hand)

Exemplo n.º 24

0

Exibir arquivo

Arquivo: feature_base_agent.py Projeto: Ashargin/SC2AI

 def reset(self):
     super().reset()
     if self.log is not None:
         self.log.close()
     self.log = open(
         f'{os.path.abspath(os.getcwd())}'
         f'\\data\\logs\\log_{self.__class__.__name__}_game_{self.episodes-1}.txt',
         'w')
     self.game_step = 0
     self.memory = Memory()

Exemplo n.º 25

0

Exibir arquivo

def run(seed, noise_type, layer_norm, **kwargs):
    """Configure things."""
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0: logger.set_level(logger.DISABLED)

    """Create Simulation envs."""
    # env = PegintoHoles()

    """Create True envs"""
    env = Env_robot_control()

    """Parse noise_type"""
    action_noise = None
    param_noise = None
    nb_actions = env.action_dim

    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            # action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions),
            #                                             sigma=float(0.05) * np.ones(nb_actions))            param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions),
                                                        sigma=float(stddev) * np.ones(nb_actions))
        else:
            raise RuntimeError('unknown noise type "{}"'.format(current_noise_type))

    """Configure components."""
    memory = Memory(limit=int(1e5), action_shape=env.action_dim, observation_shape=env.state_dim)
    critic = Critic(layer_norm=layer_norm)
    actor = Actor(nb_actions, layer_norm=layer_norm)

    """Seed everything to make things reproducible."""
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir()))
    tf.reset_default_graph()
    set_global_seeds(seed)

    """Disable logging to avoid noise."""
    start_time = time.time()

    """Train the model"""
    training.train(env=env, param_noise=param_noise,
                   action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs)

    """Eval the result"""

    logger.info('total runtime: {}s'.format(time.time() - start_time))

Exemplo n.º 26

0

Exibir arquivo

    def __init__(self, memory_size=1024 * 8):

        # *************************************************************
        # Memory Control Part
        # Initialize the memory of the System
        # Partner System for memory managing
        self.empty_memory = [
            Memory(lower_bound=0, upper_bound=memory_size - 1)
        ]
        self.used_memory = []
        self.space_enough = True

        self.threshold = 16
        # Arranging a max size of the pcb, imitating the real os
        self.pcb_max_size = 128

        # *************************************************************
        # The ID of the process allocated for the process
        self.pid = 0

        # *************************************************************
        # ALGORITHM AREA
        self.apply_memory_algorithm = algorithm_memory_apply_best_adapt
        self.recycle_memory_algo = algorithm_memory_recycle

        # *************************************************************
        # EVENT CONTROL AREA
        self.clock = YidanTime(0)
        self.time_period = YidanTime(0)

        self.events = [
            Event(name="event1", semaphore=2),
            Event(name="event2", semaphore=1),
            Event(name="event3", semaphore=2),
            Event(name="event4", semaphore=1),
            Event(name="event5", semaphore=2)
        ]
        self.current_event = []

        self.event_sustain = YidanTime(0)

        self.menu_show = True

        # *************************************************************
        # LIST MANAGE PART

        self.blocked_list = {}
        for event in self.events:
            self.blocked_list[event.name] = []

        self.ready_list = {}
        for event in self.events:
            self.ready_list[event.name] = []

        self.running_process = None

Exemplo n.º 27

0

Exibir arquivo

    def __init__(self, no_display=False, zoom=1):
        self.ram = Memory(0x2000,
                          offset=0x8000,
                          randomized=True,
                          name="Display RAM")

        self.vblank_duration = 1.1
        self.fps = 59.7

        self.frames = 0
        self.frame_start = time.clock()

        # pseudo-registers
        self.LY = 0
        self.SCY = 0
        self.SCX = 0
        self.BGPAL = 0
        self._LCDCONT = 0

        self.scanlines = 154
        self.turned_on = False

        # Actual display area
        self.vwidth = 256
        self.vheight = 256

        # Shown area
        self.width = 160
        self.height = 144

        if not no_display:
            self.window = HostDisplay("GameBoy", zoom=zoom)
            self.tile_window = HostDisplay("Tiles", zoom=zoom, height=128)
        else:
            self.window = NoDisplay()
            self.tile_window = NoDisplay()

        self.window.show()

        self.window.clear(0x474741)
        self.tile_window.clear(0x474741)

        self.window.update()
        self.tile_window.update()

        self.palette = {
            0: 0xffffff,
            1: 0xaaaaaa,
            2: 0x555555,
            3: 0x000000,
        }

        # Maps color to (x,y) pairs. Stupid, but that's how SDL wants it.
        self.pixels = {0: [], 1: [], 2: [], 3: []}
        self.tile_pixels = {0: [], 1: [], 2: [], 3: []}

Exemplo n.º 28

0

Exibir arquivo

def process(program, test_case_num):

    pid = str(uuid.uuid1())

    if not isinstance(program, Program):
        program = Program.deserialize(program)

    program_length = program.length
    program_trace = {str(n): [] for n in range(program_length+1)}
    sampled_test_cases = random.sample(program.test_cases, test_case_num)
    for tid, test_case in enumerate(sampled_test_cases):
        memory = Memory()
        inputs = test_case.inputs
        for i in inputs:
            entry = MemoryEntry(
                name=i["variable_name"],
                value=i["value"],
                data_type=i["data_type"],
                opt=None
            )
            memory.write(i["variable_name"], entry)
        expressions = program.expressions()

        idx = 0
        while idx < len(expressions):
            expression = expressions[idx]
            curr_trace = str(program_length - idx)
            program_trace[curr_trace].append({
                "test_case_id": tid,
                "step": int(curr_trace),
                "output": test_case.output,
                "func": expression[1],
                "args": expression[2:],
                "memory": memory.serialize()
            })
            interpreter.interpret(memory, expression)
            idx += 1
        program_trace["0"].append({
            "test_case_id": tid,
            "step": 0,
            "output": test_case.output,
            "func": None,
            "args": [],
            "memory": memory.serialize()
        })

    result = dict()
    for key, value in program_trace.items():
        result[key] = {
            "program_id": pid,
            "program_length": program.length,
            "detail": value
        }

    return result

Exemplo n.º 29

0

Exibir arquivo

Arquivo: test_simulator.py Projeto: yashadhikari/ELISA

def test_simulator():

    # Create a simulator
    sim = Simulator()

    # Test for pipelineing being off
    sim.enable_pipeline = False

    # Set Memory
    DRAM = Memory(lines=2**12, delay=10)
    L2 = Cache(lines=32,
               words_per_line=4,
               delay=1,
               associativity=1,
               next_level=DRAM,
               name="L2")
    L1 = Cache(lines=8,
               words_per_line=4,
               delay=0,
               associativity=1,
               next_level=L2,
               name="L1")
    sim.memory_heirarchy = [L1, L2, DRAM]

    # Set initial register values for debugging
    sim.R = list(range(0, 32))

    # Load some instructions
    with open('test/IF_test.asm') as f:
        file_contents = f.read()

    instructions, data = assemble_to_numerical(file_contents)
    sim.set_instructions(instructions)

    # Step a few times; it will take 10 + 1 + 1 steps to load the first instruction
    for i in range(12):
        assert (sim.buffer == [
            sim.IF_NOOP, sim.ID_NOOP, sim.EX_NOOP, sim.MEM_NOOP
        ])
        sim.step()

    # After the initial compulsory miss, with four words per line,
    # our first four fetches should now be cache hits.
    # Ensure that the first buffer contains the next instruction after each step
    for i in range(4):
        # assert(sim.buffer[0] == [instructions[i], (i + 1) * 4])
        sim.step()

    # $r4 has initial value of 4. After LW completes, it should have value 0
    # This should take 10 cycles to load the next block of instructions,
    # then five cycles for the instruction to go through the pipeline
    # and 11 delay cycles to load the word from memory
    print(sim.memory_heirarchy[0].data)
    for i in range(50):
        sim.step()

Exemplo n.º 30

0

Exibir arquivo

Arquivo: worker.py Projeto: zhihanyang2022/Reinforcement-Learning-Pytorch-Cartpole

    def run(self):

        while self.global_ep.value < max_episode:
            self.local_model.pull_from_global_model(self.global_model)
            done = False
            score = 0
            steps = 0

            state = self.env.reset()
            state = torch.Tensor(state)
            state = state.unsqueeze(0)
            memory = Memory(n_step)

            while True:
                policy, value = self.local_model(state)
                action = self.get_action(policy, self.num_actions)

                next_state, reward, done, _ = self.env.step(action)
                next_state = torch.Tensor(next_state)
                next_state = next_state.unsqueeze(0)

                mask = 0 if done else 1
                reward = reward if not done or score == 499 else -1
                action_one_hot = torch.zeros(2)
                action_one_hot[action] = 1
                memory.push(state, next_state, action_one_hot, reward, mask)

                score += reward
                state = next_state

                if len(memory) == n_step or done:
                    batch = memory.sample()
                    loss = self.local_model.push_to_global_model(
                        batch, self.global_model, self.global_optimizer)
                    self.local_model.pull_from_global_model(self.global_model)
                    memory = Memory(n_step)

                    if done:
                        running_score = self.record(score, loss)
                        break

        self.res_queue.put(None)