Пример #1
0
    def __init__(self, screen, program=None, id=0):

        self.commands = {
            1: self.addition,
            2: self.multiplication,
            3: self.wait_for_input,
            4: self.output,
            5: self.jump_if_true,
            6: self.jump_if_false,
            7: self.less_than,
            8: self.equals,
            9: self.adjust_base,
            99: self.finalize
        }
        self.screen = screen
        self.id = id
        self.pc = 0
        self.p_mem = Memory()
        self.input_address = None
        self.output_value = [None, None, None]
        self.output_in_progress = 0
        self.relative_base = 0
        if program:
            self.store_mem = Memory(program)
            self.reset()
Пример #2
0
    def __init__(self, env, config, demo_transitions=None):
        self.sess = tf.InteractiveSession()
        self.config = config
        # replay_memory stores both demo data and generated data, while demo_memory only store demo data
        self.replay_memory = Memory(capacity=self.config.replay_buffer_size, permanent_data=len(demo_transitions))
        self.demo_memory = Memory(capacity=self.config.demo_buffer_size, permanent_data=self.config.demo_buffer_size)
        self.add_demo_to_memory(demo_transitions=demo_transitions)  # add demo data to both demo_memory & replay_memory
        self.time_step = 0
        self.epsilon = self.config.INITIAL_EPSILON

        self.state_dim = 735
        self.action_dim = env.action_space.n

        self.action_batch = tf.placeholder("int32", [None])
        self.y_input = tf.placeholder("float", [None, self.action_dim])
        self.ISWeights = tf.placeholder("float", [None, 1])
        self.n_step_y_input = tf.placeholder("float", [None, self.action_dim])  # for n-step reward
        self.isdemo = tf.placeholder("float", [None])
        self.eval_input = tf.placeholder("float", [None, self.state_dim])
        self.select_input = tf.placeholder("float", [None, self.state_dim])

        self.Q_evaluation
        self.Q_selection

        self.loss
        self.optimize
        self.update_target_net
        self.abs_errors

        self.saver = tf.train.Saver()

        self.sess.run(tf.global_variables_initializer())

        self.save_model()
        self.restore_model()
Пример #3
0
 def performERA(self, quad):
     # Instantiate memory of next context and appends it to the call stack
     next_local_mem = Memory(self.func_table[quad.op1].address_dir.local)
     next_temp_mem = Memory(self.func_table[quad.op1].address_dir.temp)
     next_context = quad.op1
     self.call_stack.append(
         Cache(None, next_context, next_local_mem, next_temp_mem))
Пример #4
0
    def __init__(self, filename=''):
        self.filename = filename   
        
        screenShape = QtGui.QDesktopWidget().screenGeometry()
        self.width = screenShape.width()/3 + 100
        self.height = screenShape.height() - 100
        
        #Create the activities before creating the caregivers
        self.activities_list = ActivitiesList(account=self)
        
        #Store memory objects
        self.memories = [] 
        self.memories2 = []
        #create a bunch of fake data
        people = ['Bill', 'Frank', 'Jess', 'Penelope', 'Faith', 'Kale', 'JJ']
        for i in range(10):
            a = 'Event Title ' + str(i)
            b = QtCore.QDate.currentDate()
            c = 'Description ' + str(i) + 'Generic description of the event, add more details, more details, descriptions, more descriptions, it was fun'
            d = [people[random.randint(0, 6)], people[random.randint(0, 6)], people[random.randint(0, 6)]]
            e = 'stockphoto' + str(i) + '.png'
            l = 'Location ' + str(i)
            self.memories.append(Memory(title=a, date=b, loc=l, descr=c, tags=d, pic_filename=e))
            self.memories2.append(Memory(title=a, date=b, loc=l, descr=c, tags=d, pic_filename=e))
            self.memories[-1].resize_frame(width=self.width, height=3*self.height/6)
            self.memories2[-1].resize_frame(width=self.width, height=3*self.height/6)
        self.memory_browse = mywidgets.MemoryBrowse(elements=self.memories, tags=self.get_tags(), locs=self.get_locations(), account=self)
        self.memory_browse_patient = mywidgets.MemoryBrowse(elements=self.memories2, tags=self.get_tags(), locs=self.get_locations(), account=self, small=True)
        
        #populate with fake data -- eventually either read in data or start from scratch
        self.caregivers = []
        self.caregivers.append(Caregiver(name='Diana', availability=[0, 1, 0, 0, 0, 1, 0], account=self))
        self.caregivers.append(Caregiver(name='Caregiver 1', availability=[0, 0, 1, 0, 0, 0, 0], account=self))
        self.caregivers.append(Caregiver(name='Caregiver 2', availability=[0, 0, 0, 1, 0, 0, 0], account=self))
        self.caregivers.append(Caregiver(name='Caregiver 3', availability=[0, 0, 0, 0, 1, 0, 0], account=self))

        #create a list of colors corresponding to each caregiver
        self.colors = []
        for caregiver in self.caregivers:
            self.colors.append(QtGui.QColor(random.randint(0,255), random.randint(0,255), random.randint(0,255), 150))
            caregiver.browseClicked.connect(self.open_browse_memories)
            caregiver.availabilityChanged.connect(self.update_calendar)
        
        #create the screens
        self.create_caregiver_screen()
        
        #stack for regular caregiver screen, browse memories
        self.cw = QtGui.QStackedLayout() 
        self.cw.addWidget(self.cs)
        self.cw.addWidget(self.memory_browse)
        
        self.caregiver_screen = mywidgets.BaseFrame(width=self.width, height=self.height+100)    
        self.caregiver_screen.grid.addLayout(self.cw, 0, 0)
        
        #suggest an activity
        self.caregivers[self.current_caregiver()].suggest_activity(self.activities_list.get_activity())
        
        #create the patient
        self.patient = Patient(width=self.width, height=self.height, account=self, memory_browse=self.memory_browse_patient)
Пример #5
0
    def add_memory(self, title='', date=None, loc='', descr='', tags=None, pic_filename=''):
        self.memories.append(Memory(title=title, date=date, loc=loc, descr=descr, tags=tags, pic_filename=pic_filename))      
        self.memories[-1].resize_frame(width=self.width, height=3*self.height/5)      
        self.memory_browse.add_element(self.memories[-1], tags=self.get_tags(), locs=self.get_locations())

        self.memories2.append(Memory(title=title, date=date, loc=loc, descr=descr, tags=tags, pic_filename=pic_filename))         
        self.memories2[-1].resize_frame(width=self.width, height=3*self.height/5)          
        self.memory_browse_patient.add_element(self.memories2[-1], tags=self.get_tags(), locs=self.get_locations())
Пример #6
0
 def __init__(self,
              initialPC=Bus(64),
              IMem=Memory(True),
              DMem=Memory(False)):
     self.ALU = ALU()
     self.IMem = IMem
     self.DMem = DMem
     self.signExtender = SignExtender()
     self.regFile = RegisterFile()
     self.Control = Control()
     self.nextPCLogic = NextPCLogic()
     self.PC = initialPC
     self.aluZero = 0
Пример #7
0
    def __init__(self,
                 retina_length,
                 num_bits_addr=2,
                 memory_is_cumulative=True,
                 ignore_zero_addr=False,
                 random_positions=True,
                 seed=424242):

        if (not isinstance(retina_length, int)):
            raise Exception('retina_length must be a integer')

        if (not isinstance(num_bits_addr, int)):
            raise Exception('num_bits_addr must be a integer')

        if (not isinstance(memory_is_cumulative, bool)):
            raise Exception('memory_is_cumulative must be a boolean')

        if (not isinstance(ignore_zero_addr, bool)):
            raise Exception('ignore_zero_addr must be a boolean')

        if (not isinstance(random_positions, bool)):
            raise Exception('random_positions must be a boolean')

        if (not isinstance(seed, int)):
            raise Exception('seed must be a boolean')

        self.__retina_length = retina_length
        self.__num_bits_addr = num_bits_addr

        self.__mapping_positions = np.arange(retina_length)
        if random_positions:
            np.random.seed(seed)
            np.random.shuffle(self.__mapping_positions)

        num_memories = self.__retina_length//self.__num_bits_addr
        self.__memories = []
        for i in range(0, num_memories):
            m = Memory(num_bits_addr=self.__num_bits_addr,
                       is_cummulative=memory_is_cumulative,
                       ignore_zero_addr=ignore_zero_addr)
            self.__memories.append(m)

        # if there is rest positions
        if retina_length % num_bits_addr > 0:
            num_rest_positions = retina_length % num_bits_addr
            m = Memory(num_bits_addr=num_rest_positions,
                       is_cummulative=memory_is_cumulative,
                       ignore_zero_addr=ignore_zero_addr)
            self.__memories.append(m)
Пример #8
0
 def run():
     cpu = Cpu()
     cpu.updateDatabase()
     mem = Memory()
     mem.updateDatabase()
     proc = Processes()
     proc.updateDatabase()
Пример #9
0
 def __init__(self, num_states=4, num_actions=2):
     self.num_states = num_states
     self.num_actions = num_actions
     self.main_q_net = self._build_network("main")
     self.trgt_q_net = self._build_network('target')
     self.memory = Memory(MAX_MEMORY)
     self.run_counter = 0
Пример #10
0
 def __init__(self, mem_size: int = 10000):
     """
             The following is an abstraction for a bank of values
             such as valA, which will be used during each cycle.
             It's set up as an object to avoid circular import.
     """
     self.ValBank = ValBank()
     """
     The following are functional units like memory,
     registers, or flags
     """
     self.Memory = Memory(mem_size)
     self.RegisterBank = RegisterBank()
     self.ZF = CCFlag("ZF")  # zero flag
     self.OF = CCFlag("OF")  # overflow flag
     self.SF = CCFlag("SF")  # sign flag
     self.ErrorFlag = StateFlag("Error Flag", error_lib)
     self.StateFlag = StateFlag("State Flag", state_lib)
     self.ALU = ALU(self.ValBank, self.StateFlag, self.ErrorFlag, self.SF, self.OF, self.ZF)
     """
     The following are functional abstractions of operations
     that the processor performs
     """
     self.Fetcher = Fetcher(self.ValBank, self.RegisterBank, self.Memory, self.StateFlag, self.ErrorFlag)
     self.Decoder = Decoder(self.ValBank, self.RegisterBank, self.Memory)
     self.Executor = Executor(self.ValBank, self.ALU, self.OF, self.ZF, self.SF)
     self.Memorizer = Memorizer(self.ValBank, self.Memory)
     self.RegWriter = RegWriter(self.RegisterBank, self.ValBank)
     self.PCUpdater = PCUpdater(self.RegisterBank, self.ValBank)
Пример #11
0
 def reset(self):
     self.p_mem = Memory()
     self.p_mem = self.store_mem.copy()
     self.pc = 0
     self.relative_base = 0
     self.input_address = None
     self.output_value = None
Пример #12
0
    def __init__(self,
                 actions,
                 network_input_shape,
                 replay_memory_size=1024,
                 minibatch_size=32,
                 learning_rate=0.00025,
                 discount_factor=0.9,
                 dropout_prob=0.1,
                 epsilon=1,
                 epsilon_decrease_rate=0.99,
                 min_epsilon=0.1,
                 load_path=None,
                 logger=None):

        # Parameters
        self.network_input_shape = network_input_shape  # Shape of the DQN input
        self.actions = actions  # Size of the discrete action space
        self.learning_rate = learning_rate  # Learning rate for the DQN
        self.dropout_prob = dropout_prob  # Dropout probability of the DQN
        self.load_path = load_path  # Path from which to load the DQN's weights
        self.replay_memory_size = replay_memory_size  # Size of replay memory
        self.minibatch_size = minibatch_size  # Size of a DQN minibatch
        self.discount_factor = discount_factor  # Discount factor of the MDP
        self.epsilon = epsilon  # Probability of taking a random action
        self.epsilon_decrease_rate = epsilon_decrease_rate  # See update_epsilon
        self.min_epsilon = min_epsilon  # Minimum value for epsilon
        self.logger = logger

        # Replay memory
        self.max_loss_memory = Memory(capacity=self.replay_memory_size)
        self.training_count = 0

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        session = tf.Session(config=config)

        KTF.set_session(session)

        # Instantiate the deep Q-networks
        # Main DQN
        self.DQN = DQNetwork(self.actions,
                             self.network_input_shape,
                             learning_rate=self.learning_rate,
                             discount_factor=self.discount_factor,
                             minibatch_size=self.minibatch_size,
                             dropout_prob=self.dropout_prob,
                             load_path=self.load_path,
                             logger=self.logger)

        # Target DQN used to generate targets
        self.DQN_target = DQNetwork(self.actions,
                                    self.network_input_shape,
                                    learning_rate=self.learning_rate,
                                    discount_factor=self.discount_factor,
                                    minibatch_size=self.minibatch_size,
                                    dropout_prob=self.dropout_prob,
                                    load_path=self.load_path,
                                    logger=self.logger)
        # Reset target DQN
        self.DQN_target.model.set_weights(self.DQN.model.get_weights())
Пример #13
0
 def __init__(self,
              state_shape,
              action_size,
              use_per=True,
              use_target_net=True,
              use_duel=True,
              discount_factor=0.99,
              epsilon=1.0,
              epsilon_decay=0.98,
              mem_size=20001,
              batch_size=32,
              tau=0.05):
     self.state_shape = state_shape
     self.action_size = action_size
     self.epsilon = epsilon
     self.epsilon_min = 0.05
     self.epsilon_decay = epsilon_decay
     # self.epsilon_speed = episodes * 2
     self.discount_factor = discount_factor
     self.tau = tau
     self.use_per = use_per
     self.memory = PER(mem_size, beta=0.4) if use_per else Memory(mem_size)
     self.batch_size = batch_size
     self.optimizer = Adam()
     self.use_duel = use_duel
     self.model = self._build_model()
     self.use_target_net = use_target_net  # double q networks
     if use_target_net:
         self.target_model = self._build_model()
Пример #14
0
def main(args):

    training = int(args[1])
    test_interwal = int(args[2])
    load = int(args[3])

    env = gym.make('BipedalWalker-v2')
    memory = None

    if training == 1:
        memory = Memory(MAX_BUFFER)
        prepopulate_memory(memory, env)

    rewards = []
    start_time = time.time()
    max_reward = 0

    trainer = ActorCritic(env.observation_space.shape[0],
                          env.action_space.shape[0], memory, load)

    for episode in np.arange(MAX_EPISODES):
        if training == 1:
            env_run(env, episode, trainer, memory, True)
        if episode % test_interwal == 0:
            max_reward += env_run(env, episode, trainer, None, False)
            rewards.append(max_reward / ((episode / test_interwal) + 1))
    plt.plot(rewards)
    plt.show()
Пример #15
0
def assemble(file_name):
    symbols = []
    memory = Memory()
    instructions = Instructions()
    sap1_parser = Parser()

    print("Assemble {}".format(file_name))
    segments = sap1_parser.parse_file(file_name)

    if segments == []:
        print("ERROR: No code found in source file")
        exit(-2)

    # Extract all the lables from the segments to create a symbol table
    for segment in segments:
        for label in segment.labels:
            symbols.append(label)

    for segment in segments:
        segment.assemble(symbols, instructions)

    code_segment = None
    for segment in segments:
        if segment.is_code():
            code_segment = segment
        memory = segment.load_memory(memory)

    memory.dump(symbols, code_segment)
Пример #16
0
    def __init__(self, state_size, action_size, seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = Memory(BUFFER_SIZE)
        self.experience = namedtuple(
            "Experience",
            field_names=["state", "action", "reward", "next_state", "done"])

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0

        # Here we'll deal with the empty memory problem: we pre-populate our memory
        # by taking random actions and storing the experience.
        self.tree_idx = None
Пример #17
0
 def __init__(self, shape=(84, 84), num_actions=4):
     self.shape = (shape[0], shape[1], 1)
     self.num_actions = num_actions
     self.main_q_net = self._build_network("main")
     self.trgt_q_net = self._build_network('target')
     self.memory = Memory(MAX_MEMORY)
     self.run_counter = 0
Пример #18
0
    def __init__(self, actions, gamma=0.1, e_greedy=0.9):
        state_size = 1
        neurons = 24

        self.actions = actions
        self.gamma = gamma
        self.epsilon = e_greedy
        self.lr = 0.1
        self.count = 0
        self.epochs = 5

        self.v_max = 10
        self.v_min = -10
        self.atoms = 51
        self.delta_z = (self.v_max - self.v_min) / (self.atoms - 1)
        self.z = [self.v_min + i * self.delta_z for i in range(self.atoms)]

        self.m = Build_Model(state_size,
                             neurons,
                             len(actions),
                             atoms=self.atoms)
        self.model = self.m.model
        self.dump_model = copy.copy(self.model)

        self.capacity = 300
        self.memory = Memory(self.capacity)
Пример #19
0
 def __init__(self, coef_memory=0.1, dropout_seq=0.9):
     super(KAST, self).__init__()
     self.kernel = 13
     self.dropout_seq = dropout_seq
     self.transformation = Transformation(trainable=False)
     self.resnet = ResNet()
     self.rkn = RKNModel()
     self.memory = Memory(unit=200, kernel=self.kernel)
     self.corr_cost = tfa.layers.CorrelationCost(
         kernel_size=1,
         max_displacement=self.kernel // 2,
         stride_1=1,
         stride_2=1,
         pad=self.kernel // 2,
         data_format="channels_last")
     self.corr_cost_stride = tfa.layers.CorrelationCost(
         kernel_size=1,
         max_displacement=(self.kernel // 2) * 2,
         stride_1=1,
         stride_2=2,
         pad=(self.kernel // 2) * 2,
         data_format="channels_last")
     #self.memory = tf.keras.Sequential()
     #self.memory.add(tf.keras.layers.Input(input_shape=((None, None, 256)), batch_input_shape=[4]))
     #self.memory.add(tf.keras.layers.RNN(self.memory_cell, stateful=True))
     self.coef_memory = coef_memory
     self.description = 'KAST'
     self.mem_write = True
     self.mem0 = None
     self.mem5 = None
     self.k0 = None
     self.v0 = None
     self.last_v = None
Пример #20
0
 def __init__(self):
     # main memory object
     self.memory = Memory()
     # program counter
     self.pc = 0
     # accumulator
     self.accumulator = 0
Пример #21
0
 def __init__(self, filename):
     self.filename = filename
     self.quadruples = []
     self.instructionPointer = [0]
     self.functionLocalStack = []
     self.functionTempStack = []
     self.retVal = 0
     self.paramStack = []
     self.parse_quadruples()
     self.memory = MemoryGenerator.decode(self.filename + ".json")
     self.globalMemory = Memory(self.memory["program"]["locals"])
     self.globalTemps = Memory(self.memory["program"]["temps"])
     self.constants = Memory(self.memory["constants"]["repr"])
     for [addr, val] in self.memory["constants"]["vals"]:
         self.constants.set_value(addr, val)
     self.matrices_calls = [{}]
Пример #22
0
def train_mcar():
    env_name = 'MountainCar-v0'
    env = gym.make(env_name)

    num_states = env.env.observation_space.shape[0]
    num_actions = env.env.action_space.n

    model = Model(num_states, num_actions, BATCH_SIZE)
    mem = Memory(50000)

    with tf.Session() as sess:
        sess.run(model.var_init)
        mc = MCar(sess, model, env, mem, MAX_EPSILON, MIN_EPSILON, LAMBDA)
        # change the number of episodes as needed
        num_episodes = 300
        cnt = 0
        while cnt < num_episodes:
            if cnt % 10 == 0:
                print('Episode {} of {}'.format(cnt + 1, num_episodes))
            mc.run()
            cnt += 1

        plt.plot(mc.reward_store)
        plt.show()
        plt.close("all")
        plt.plot(mc.max_x_store)
        plt.show()
Пример #23
0
def train_mcar():
    env_name = 'MountainCar-v0'
    env = gym.make(env_name)

    num_states = env.env.observation_space.shape[0]
    num_actions = env.env.action_space.n

    model = Model(num_states, num_actions, BATCH_SIZE)
    mem = Memory(50000)

    with tf.Session() as sess:
        sess.run(model.var_init)
        mc = MCar(sess, model, env, mem, MAX_EPSILON, MIN_EPSILON, LAMBDA)
        # change the number of episodes as needed
        num_episodes = 300
        cnt = 0
        while cnt < num_episodes:
            if cnt % 10 == 0:
                print('Episode {} of {}'.format(cnt+1, num_episodes))
            mc.run()
            cnt += 1
        print("Total award is :", np.sum(mc.reward_store))
        print("Average x-position is :", np.average(mc.max_x_store))
        plt.plot(mc.reward_store)
        plt.savefig(r"D:\USU\Assignments\IntelligentSystems\hw06\plot0.png")
        #plt.show()
        #plt.close("all")
        plt.plot(mc.max_x_store)
        plt.savefig(r"D:\USU\Assignments\IntelligentSystems\hw06\plot1.png")
Пример #24
0
    def __init__(self, asm_filename, obj_filename="../test.o"):
        self.memory = Memory()
        self.registers = Registers()
        #        self.executer = Executioner()
        self.assembler = Assembler()

        self.asm_filename = asm_filename
        self.obj_filename = obj_filename
Пример #25
0
 def __init__(self):
     self.alu = Alu()
     self.clock = CPUclock()
     self.memory = Memory()
     self.ram = Ram()
     self.rom = Rom()
     self.registers = Registers()
     self.cu = CU()
Пример #26
0
 def test_write(self):
     a = Memory([1, 2, 3])
     a[2] = 5
     self.assertEqual(a[2], 5, "Retrieve/save failed")
     a[10] = 7
     self.assertEqual(a[10], 7, "Retrieve/save beyond range failed")
     self.assertEqual(a[9], 0, "Retrieve/save beyond range failed")
     self.assertEqual(len(a), 11, "Invalid length")
Пример #27
0
    def __init__(self, state_count, action_count):
        self.state_count = state_count
        self.action_count = action_count

        self.brain = Brain(state_count, action_count)
        self.memory = Memory(MEMORY_CAPACITY)

        self.epsilon = MAX_EPSILON
        self.steps = 0
Пример #28
0
    def __init__(self, bTrain):

        # Settings
        self.directory = '/tmp/TrainedQNetwork'
        self.num_actions = 9
        self.im_height = 84
        self.im_width = 84
        self.discount_factor = 0.99
        self.minibatch_size = 32
        self.initial_epsilon = 1.0
        self.final_epsilon = 0.1
        self.epsilon_frames = 1000000
        self.replay_start_size = 50000
        self.policy_start_size = self.replay_start_size
        self.k = 4  # action repeat (frame skipping)
        self.u = 4  # update frequency
        self.m = 4  # number of frames to include in sequence
        self.c = 10000  # number of actions selected before updating the network used to generate the targets

        # Internal Variables
        self.bTrain = bTrain
        self.ki = 0
        self.ui = 0
        self.mi = 0
        self.frame = 0
        self.ci = 0
        self.sequence = []
        self.prev_phi = np.array([])
        self.phi = np.array([])
        self.epsilon_increment = (self.initial_epsilon -
                                  self.final_epsilon) / self.epsilon_frames
        self.epsilon = self.initial_epsilon
        self.action = 0
        self.reward = 0
        self.memory = Memory()
        self.minibatch = MiniBatch()
        self.targets = np.zeros(self.minibatch_size)
        self.bTrial_over = False
        self.bStartLearning = False
        self.bStartPolicy = False
        self.ti = 0

        random.seed(0)

        # Construct tensorflow graphs
        self.q_graph = QGraph(self.im_width, self.im_height, self.m,
                              self.num_actions, self.directory)

        if (self.bTrain):
            self.q_graph.SaveGraphAndVariables()
            self.q_graph_targets = QTargetGraph(self.im_width, self.im_height,
                                                self.m, self.num_actions,
                                                self.directory)
        else:
            self.q_graph.LoadGraphAndVariables()

        return
Пример #29
0
 def test_copy(self):
     a = Memory(range(0, 10))
     b = a.copy()
     print(type(b))
     a[3] = 77
     self.assertEqual(b[3], 3, "Failed copy")
     a[99] = 77
     self.assertEqual(len(b), 10, "Failed copy - invalid length")
     self.assertEqual(b[99], 0, "Failed copy")
Пример #30
0
 def __init__(self,
              environment,
              learningRateVar,
              dynamicAlphaVar,
              discountVar,
              nStepVar,
              nPlanVar,
              onPolicyVar,
              updateByExpectationVar,
              behaviorEpsilonVar,
              behaviorEpsilonDecayRateVar,
              targetEpsilonVar,
              targetEpsilonDecayRateVar,
              initialActionvalueMean=0,
              initialActionvalueSigma=0,
              predefinedAlgorithm=None,
              actionPlan=[]):
     self.environment = environment
     if predefinedAlgorithm:
         # TODO: set missing params accordingly
         pass
     self.learningRateVar = learningRateVar
     self.dynamicAlphaVar = dynamicAlphaVar
     self.discountVar = discountVar
     self.behaviorPolicy = EpsilonGreedyPolicy(self, behaviorEpsilonVar,
                                               behaviorEpsilonDecayRateVar)
     self.targetPolicy = EpsilonGreedyPolicy(self, targetEpsilonVar,
                                             targetEpsilonDecayRateVar)
     self.onPolicyVar = onPolicyVar
     self.updateByExpectationVar = updateByExpectationVar
     self.nStepVar = nStepVar
     self.nPlanVar = nPlanVar
     self.initialActionvalueMean = initialActionvalueMean  # TODO: Set this in GUI
     self.initialActionvalueSigma = initialActionvalueSigma  # TODO: Set this in GUI
     self.Qvalues = np.empty_like(self.environment.get_grid())
     self.greedyActions = np.empty_like(self.environment.get_grid())
     self.initialize_Qvalues()
     self.stateActionPairCounts = np.empty_like(self.environment.get_grid())
     self.initialize_stateActionPairCounts()
     # Strictly speaking, the agent has no model at all and therefore in the beginning knows nothing about the environment, including its shape.
     # But to avoid technical details in implementation that would anyway not change the Agent behavior at all,
     # the agent will be given that the states can be structured in a matrix that has the same shape as the environment
     # and that the actionspace is constant for all possible states.
     self.episodicTask = None  # TODO: not used so far
     self.state = None
     self.episodeFinished = False
     self.return_ = None  # underscore to avoid naming conflict with return keyword
     self.episodeReturns = []
     self.memory = Memory(self)
     self.hasChosenExploratoryMove = None
     self.hasMadeExploratoryMove = None
     self.targetAction = None
     self.targetActionvalue = None
     self.iSuccessivePlannings = None
     # Debug variables:
     self.actionPlan = actionPlan
     self.actionHistory = []