def __init__(self, screen, program=None, id=0): self.commands = { 1: self.addition, 2: self.multiplication, 3: self.wait_for_input, 4: self.output, 5: self.jump_if_true, 6: self.jump_if_false, 7: self.less_than, 8: self.equals, 9: self.adjust_base, 99: self.finalize } self.screen = screen self.id = id self.pc = 0 self.p_mem = Memory() self.input_address = None self.output_value = [None, None, None] self.output_in_progress = 0 self.relative_base = 0 if program: self.store_mem = Memory(program) self.reset()
def __init__(self, env, config, demo_transitions=None): self.sess = tf.InteractiveSession() self.config = config # replay_memory stores both demo data and generated data, while demo_memory only store demo data self.replay_memory = Memory(capacity=self.config.replay_buffer_size, permanent_data=len(demo_transitions)) self.demo_memory = Memory(capacity=self.config.demo_buffer_size, permanent_data=self.config.demo_buffer_size) self.add_demo_to_memory(demo_transitions=demo_transitions) # add demo data to both demo_memory & replay_memory self.time_step = 0 self.epsilon = self.config.INITIAL_EPSILON self.state_dim = 735 self.action_dim = env.action_space.n self.action_batch = tf.placeholder("int32", [None]) self.y_input = tf.placeholder("float", [None, self.action_dim]) self.ISWeights = tf.placeholder("float", [None, 1]) self.n_step_y_input = tf.placeholder("float", [None, self.action_dim]) # for n-step reward self.isdemo = tf.placeholder("float", [None]) self.eval_input = tf.placeholder("float", [None, self.state_dim]) self.select_input = tf.placeholder("float", [None, self.state_dim]) self.Q_evaluation self.Q_selection self.loss self.optimize self.update_target_net self.abs_errors self.saver = tf.train.Saver() self.sess.run(tf.global_variables_initializer()) self.save_model() self.restore_model()
def performERA(self, quad): # Instantiate memory of next context and appends it to the call stack next_local_mem = Memory(self.func_table[quad.op1].address_dir.local) next_temp_mem = Memory(self.func_table[quad.op1].address_dir.temp) next_context = quad.op1 self.call_stack.append( Cache(None, next_context, next_local_mem, next_temp_mem))
def __init__(self, filename=''): self.filename = filename screenShape = QtGui.QDesktopWidget().screenGeometry() self.width = screenShape.width()/3 + 100 self.height = screenShape.height() - 100 #Create the activities before creating the caregivers self.activities_list = ActivitiesList(account=self) #Store memory objects self.memories = [] self.memories2 = [] #create a bunch of fake data people = ['Bill', 'Frank', 'Jess', 'Penelope', 'Faith', 'Kale', 'JJ'] for i in range(10): a = 'Event Title ' + str(i) b = QtCore.QDate.currentDate() c = 'Description ' + str(i) + 'Generic description of the event, add more details, more details, descriptions, more descriptions, it was fun' d = [people[random.randint(0, 6)], people[random.randint(0, 6)], people[random.randint(0, 6)]] e = 'stockphoto' + str(i) + '.png' l = 'Location ' + str(i) self.memories.append(Memory(title=a, date=b, loc=l, descr=c, tags=d, pic_filename=e)) self.memories2.append(Memory(title=a, date=b, loc=l, descr=c, tags=d, pic_filename=e)) self.memories[-1].resize_frame(width=self.width, height=3*self.height/6) self.memories2[-1].resize_frame(width=self.width, height=3*self.height/6) self.memory_browse = mywidgets.MemoryBrowse(elements=self.memories, tags=self.get_tags(), locs=self.get_locations(), account=self) self.memory_browse_patient = mywidgets.MemoryBrowse(elements=self.memories2, tags=self.get_tags(), locs=self.get_locations(), account=self, small=True) #populate with fake data -- eventually either read in data or start from scratch self.caregivers = [] self.caregivers.append(Caregiver(name='Diana', availability=[0, 1, 0, 0, 0, 1, 0], account=self)) self.caregivers.append(Caregiver(name='Caregiver 1', availability=[0, 0, 1, 0, 0, 0, 0], account=self)) self.caregivers.append(Caregiver(name='Caregiver 2', availability=[0, 0, 0, 1, 0, 0, 0], account=self)) self.caregivers.append(Caregiver(name='Caregiver 3', availability=[0, 0, 0, 0, 1, 0, 0], account=self)) #create a list of colors corresponding to each caregiver self.colors = [] for caregiver in self.caregivers: self.colors.append(QtGui.QColor(random.randint(0,255), random.randint(0,255), random.randint(0,255), 150)) caregiver.browseClicked.connect(self.open_browse_memories) caregiver.availabilityChanged.connect(self.update_calendar) #create the screens self.create_caregiver_screen() #stack for regular caregiver screen, browse memories self.cw = QtGui.QStackedLayout() self.cw.addWidget(self.cs) self.cw.addWidget(self.memory_browse) self.caregiver_screen = mywidgets.BaseFrame(width=self.width, height=self.height+100) self.caregiver_screen.grid.addLayout(self.cw, 0, 0) #suggest an activity self.caregivers[self.current_caregiver()].suggest_activity(self.activities_list.get_activity()) #create the patient self.patient = Patient(width=self.width, height=self.height, account=self, memory_browse=self.memory_browse_patient)
def add_memory(self, title='', date=None, loc='', descr='', tags=None, pic_filename=''): self.memories.append(Memory(title=title, date=date, loc=loc, descr=descr, tags=tags, pic_filename=pic_filename)) self.memories[-1].resize_frame(width=self.width, height=3*self.height/5) self.memory_browse.add_element(self.memories[-1], tags=self.get_tags(), locs=self.get_locations()) self.memories2.append(Memory(title=title, date=date, loc=loc, descr=descr, tags=tags, pic_filename=pic_filename)) self.memories2[-1].resize_frame(width=self.width, height=3*self.height/5) self.memory_browse_patient.add_element(self.memories2[-1], tags=self.get_tags(), locs=self.get_locations())
def __init__(self, initialPC=Bus(64), IMem=Memory(True), DMem=Memory(False)): self.ALU = ALU() self.IMem = IMem self.DMem = DMem self.signExtender = SignExtender() self.regFile = RegisterFile() self.Control = Control() self.nextPCLogic = NextPCLogic() self.PC = initialPC self.aluZero = 0
def __init__(self, retina_length, num_bits_addr=2, memory_is_cumulative=True, ignore_zero_addr=False, random_positions=True, seed=424242): if (not isinstance(retina_length, int)): raise Exception('retina_length must be a integer') if (not isinstance(num_bits_addr, int)): raise Exception('num_bits_addr must be a integer') if (not isinstance(memory_is_cumulative, bool)): raise Exception('memory_is_cumulative must be a boolean') if (not isinstance(ignore_zero_addr, bool)): raise Exception('ignore_zero_addr must be a boolean') if (not isinstance(random_positions, bool)): raise Exception('random_positions must be a boolean') if (not isinstance(seed, int)): raise Exception('seed must be a boolean') self.__retina_length = retina_length self.__num_bits_addr = num_bits_addr self.__mapping_positions = np.arange(retina_length) if random_positions: np.random.seed(seed) np.random.shuffle(self.__mapping_positions) num_memories = self.__retina_length//self.__num_bits_addr self.__memories = [] for i in range(0, num_memories): m = Memory(num_bits_addr=self.__num_bits_addr, is_cummulative=memory_is_cumulative, ignore_zero_addr=ignore_zero_addr) self.__memories.append(m) # if there is rest positions if retina_length % num_bits_addr > 0: num_rest_positions = retina_length % num_bits_addr m = Memory(num_bits_addr=num_rest_positions, is_cummulative=memory_is_cumulative, ignore_zero_addr=ignore_zero_addr) self.__memories.append(m)
def run(): cpu = Cpu() cpu.updateDatabase() mem = Memory() mem.updateDatabase() proc = Processes() proc.updateDatabase()
def __init__(self, num_states=4, num_actions=2): self.num_states = num_states self.num_actions = num_actions self.main_q_net = self._build_network("main") self.trgt_q_net = self._build_network('target') self.memory = Memory(MAX_MEMORY) self.run_counter = 0
def __init__(self, mem_size: int = 10000): """ The following is an abstraction for a bank of values such as valA, which will be used during each cycle. It's set up as an object to avoid circular import. """ self.ValBank = ValBank() """ The following are functional units like memory, registers, or flags """ self.Memory = Memory(mem_size) self.RegisterBank = RegisterBank() self.ZF = CCFlag("ZF") # zero flag self.OF = CCFlag("OF") # overflow flag self.SF = CCFlag("SF") # sign flag self.ErrorFlag = StateFlag("Error Flag", error_lib) self.StateFlag = StateFlag("State Flag", state_lib) self.ALU = ALU(self.ValBank, self.StateFlag, self.ErrorFlag, self.SF, self.OF, self.ZF) """ The following are functional abstractions of operations that the processor performs """ self.Fetcher = Fetcher(self.ValBank, self.RegisterBank, self.Memory, self.StateFlag, self.ErrorFlag) self.Decoder = Decoder(self.ValBank, self.RegisterBank, self.Memory) self.Executor = Executor(self.ValBank, self.ALU, self.OF, self.ZF, self.SF) self.Memorizer = Memorizer(self.ValBank, self.Memory) self.RegWriter = RegWriter(self.RegisterBank, self.ValBank) self.PCUpdater = PCUpdater(self.RegisterBank, self.ValBank)
def reset(self): self.p_mem = Memory() self.p_mem = self.store_mem.copy() self.pc = 0 self.relative_base = 0 self.input_address = None self.output_value = None
def __init__(self, actions, network_input_shape, replay_memory_size=1024, minibatch_size=32, learning_rate=0.00025, discount_factor=0.9, dropout_prob=0.1, epsilon=1, epsilon_decrease_rate=0.99, min_epsilon=0.1, load_path=None, logger=None): # Parameters self.network_input_shape = network_input_shape # Shape of the DQN input self.actions = actions # Size of the discrete action space self.learning_rate = learning_rate # Learning rate for the DQN self.dropout_prob = dropout_prob # Dropout probability of the DQN self.load_path = load_path # Path from which to load the DQN's weights self.replay_memory_size = replay_memory_size # Size of replay memory self.minibatch_size = minibatch_size # Size of a DQN minibatch self.discount_factor = discount_factor # Discount factor of the MDP self.epsilon = epsilon # Probability of taking a random action self.epsilon_decrease_rate = epsilon_decrease_rate # See update_epsilon self.min_epsilon = min_epsilon # Minimum value for epsilon self.logger = logger # Replay memory self.max_loss_memory = Memory(capacity=self.replay_memory_size) self.training_count = 0 config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) KTF.set_session(session) # Instantiate the deep Q-networks # Main DQN self.DQN = DQNetwork(self.actions, self.network_input_shape, learning_rate=self.learning_rate, discount_factor=self.discount_factor, minibatch_size=self.minibatch_size, dropout_prob=self.dropout_prob, load_path=self.load_path, logger=self.logger) # Target DQN used to generate targets self.DQN_target = DQNetwork(self.actions, self.network_input_shape, learning_rate=self.learning_rate, discount_factor=self.discount_factor, minibatch_size=self.minibatch_size, dropout_prob=self.dropout_prob, load_path=self.load_path, logger=self.logger) # Reset target DQN self.DQN_target.model.set_weights(self.DQN.model.get_weights())
def __init__(self, state_shape, action_size, use_per=True, use_target_net=True, use_duel=True, discount_factor=0.99, epsilon=1.0, epsilon_decay=0.98, mem_size=20001, batch_size=32, tau=0.05): self.state_shape = state_shape self.action_size = action_size self.epsilon = epsilon self.epsilon_min = 0.05 self.epsilon_decay = epsilon_decay # self.epsilon_speed = episodes * 2 self.discount_factor = discount_factor self.tau = tau self.use_per = use_per self.memory = PER(mem_size, beta=0.4) if use_per else Memory(mem_size) self.batch_size = batch_size self.optimizer = Adam() self.use_duel = use_duel self.model = self._build_model() self.use_target_net = use_target_net # double q networks if use_target_net: self.target_model = self._build_model()
def main(args): training = int(args[1]) test_interwal = int(args[2]) load = int(args[3]) env = gym.make('BipedalWalker-v2') memory = None if training == 1: memory = Memory(MAX_BUFFER) prepopulate_memory(memory, env) rewards = [] start_time = time.time() max_reward = 0 trainer = ActorCritic(env.observation_space.shape[0], env.action_space.shape[0], memory, load) for episode in np.arange(MAX_EPISODES): if training == 1: env_run(env, episode, trainer, memory, True) if episode % test_interwal == 0: max_reward += env_run(env, episode, trainer, None, False) rewards.append(max_reward / ((episode / test_interwal) + 1)) plt.plot(rewards) plt.show()
def assemble(file_name): symbols = [] memory = Memory() instructions = Instructions() sap1_parser = Parser() print("Assemble {}".format(file_name)) segments = sap1_parser.parse_file(file_name) if segments == []: print("ERROR: No code found in source file") exit(-2) # Extract all the lables from the segments to create a symbol table for segment in segments: for label in segment.labels: symbols.append(label) for segment in segments: segment.assemble(symbols, instructions) code_segment = None for segment in segments: if segment.is_code(): code_segment = segment memory = segment.load_memory(memory) memory.dump(symbols, code_segment)
def __init__(self, state_size, action_size, seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = Memory(BUFFER_SIZE) self.experience = namedtuple( "Experience", field_names=["state", "action", "reward", "next_state", "done"]) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 # Here we'll deal with the empty memory problem: we pre-populate our memory # by taking random actions and storing the experience. self.tree_idx = None
def __init__(self, shape=(84, 84), num_actions=4): self.shape = (shape[0], shape[1], 1) self.num_actions = num_actions self.main_q_net = self._build_network("main") self.trgt_q_net = self._build_network('target') self.memory = Memory(MAX_MEMORY) self.run_counter = 0
def __init__(self, actions, gamma=0.1, e_greedy=0.9): state_size = 1 neurons = 24 self.actions = actions self.gamma = gamma self.epsilon = e_greedy self.lr = 0.1 self.count = 0 self.epochs = 5 self.v_max = 10 self.v_min = -10 self.atoms = 51 self.delta_z = (self.v_max - self.v_min) / (self.atoms - 1) self.z = [self.v_min + i * self.delta_z for i in range(self.atoms)] self.m = Build_Model(state_size, neurons, len(actions), atoms=self.atoms) self.model = self.m.model self.dump_model = copy.copy(self.model) self.capacity = 300 self.memory = Memory(self.capacity)
def __init__(self, coef_memory=0.1, dropout_seq=0.9): super(KAST, self).__init__() self.kernel = 13 self.dropout_seq = dropout_seq self.transformation = Transformation(trainable=False) self.resnet = ResNet() self.rkn = RKNModel() self.memory = Memory(unit=200, kernel=self.kernel) self.corr_cost = tfa.layers.CorrelationCost( kernel_size=1, max_displacement=self.kernel // 2, stride_1=1, stride_2=1, pad=self.kernel // 2, data_format="channels_last") self.corr_cost_stride = tfa.layers.CorrelationCost( kernel_size=1, max_displacement=(self.kernel // 2) * 2, stride_1=1, stride_2=2, pad=(self.kernel // 2) * 2, data_format="channels_last") #self.memory = tf.keras.Sequential() #self.memory.add(tf.keras.layers.Input(input_shape=((None, None, 256)), batch_input_shape=[4])) #self.memory.add(tf.keras.layers.RNN(self.memory_cell, stateful=True)) self.coef_memory = coef_memory self.description = 'KAST' self.mem_write = True self.mem0 = None self.mem5 = None self.k0 = None self.v0 = None self.last_v = None
def __init__(self): # main memory object self.memory = Memory() # program counter self.pc = 0 # accumulator self.accumulator = 0
def __init__(self, filename): self.filename = filename self.quadruples = [] self.instructionPointer = [0] self.functionLocalStack = [] self.functionTempStack = [] self.retVal = 0 self.paramStack = [] self.parse_quadruples() self.memory = MemoryGenerator.decode(self.filename + ".json") self.globalMemory = Memory(self.memory["program"]["locals"]) self.globalTemps = Memory(self.memory["program"]["temps"]) self.constants = Memory(self.memory["constants"]["repr"]) for [addr, val] in self.memory["constants"]["vals"]: self.constants.set_value(addr, val) self.matrices_calls = [{}]
def train_mcar(): env_name = 'MountainCar-v0' env = gym.make(env_name) num_states = env.env.observation_space.shape[0] num_actions = env.env.action_space.n model = Model(num_states, num_actions, BATCH_SIZE) mem = Memory(50000) with tf.Session() as sess: sess.run(model.var_init) mc = MCar(sess, model, env, mem, MAX_EPSILON, MIN_EPSILON, LAMBDA) # change the number of episodes as needed num_episodes = 300 cnt = 0 while cnt < num_episodes: if cnt % 10 == 0: print('Episode {} of {}'.format(cnt + 1, num_episodes)) mc.run() cnt += 1 plt.plot(mc.reward_store) plt.show() plt.close("all") plt.plot(mc.max_x_store) plt.show()
def train_mcar(): env_name = 'MountainCar-v0' env = gym.make(env_name) num_states = env.env.observation_space.shape[0] num_actions = env.env.action_space.n model = Model(num_states, num_actions, BATCH_SIZE) mem = Memory(50000) with tf.Session() as sess: sess.run(model.var_init) mc = MCar(sess, model, env, mem, MAX_EPSILON, MIN_EPSILON, LAMBDA) # change the number of episodes as needed num_episodes = 300 cnt = 0 while cnt < num_episodes: if cnt % 10 == 0: print('Episode {} of {}'.format(cnt+1, num_episodes)) mc.run() cnt += 1 print("Total award is :", np.sum(mc.reward_store)) print("Average x-position is :", np.average(mc.max_x_store)) plt.plot(mc.reward_store) plt.savefig(r"D:\USU\Assignments\IntelligentSystems\hw06\plot0.png") #plt.show() #plt.close("all") plt.plot(mc.max_x_store) plt.savefig(r"D:\USU\Assignments\IntelligentSystems\hw06\plot1.png")
def __init__(self, asm_filename, obj_filename="../test.o"): self.memory = Memory() self.registers = Registers() # self.executer = Executioner() self.assembler = Assembler() self.asm_filename = asm_filename self.obj_filename = obj_filename
def __init__(self): self.alu = Alu() self.clock = CPUclock() self.memory = Memory() self.ram = Ram() self.rom = Rom() self.registers = Registers() self.cu = CU()
def test_write(self): a = Memory([1, 2, 3]) a[2] = 5 self.assertEqual(a[2], 5, "Retrieve/save failed") a[10] = 7 self.assertEqual(a[10], 7, "Retrieve/save beyond range failed") self.assertEqual(a[9], 0, "Retrieve/save beyond range failed") self.assertEqual(len(a), 11, "Invalid length")
def __init__(self, state_count, action_count): self.state_count = state_count self.action_count = action_count self.brain = Brain(state_count, action_count) self.memory = Memory(MEMORY_CAPACITY) self.epsilon = MAX_EPSILON self.steps = 0
def __init__(self, bTrain): # Settings self.directory = '/tmp/TrainedQNetwork' self.num_actions = 9 self.im_height = 84 self.im_width = 84 self.discount_factor = 0.99 self.minibatch_size = 32 self.initial_epsilon = 1.0 self.final_epsilon = 0.1 self.epsilon_frames = 1000000 self.replay_start_size = 50000 self.policy_start_size = self.replay_start_size self.k = 4 # action repeat (frame skipping) self.u = 4 # update frequency self.m = 4 # number of frames to include in sequence self.c = 10000 # number of actions selected before updating the network used to generate the targets # Internal Variables self.bTrain = bTrain self.ki = 0 self.ui = 0 self.mi = 0 self.frame = 0 self.ci = 0 self.sequence = [] self.prev_phi = np.array([]) self.phi = np.array([]) self.epsilon_increment = (self.initial_epsilon - self.final_epsilon) / self.epsilon_frames self.epsilon = self.initial_epsilon self.action = 0 self.reward = 0 self.memory = Memory() self.minibatch = MiniBatch() self.targets = np.zeros(self.minibatch_size) self.bTrial_over = False self.bStartLearning = False self.bStartPolicy = False self.ti = 0 random.seed(0) # Construct tensorflow graphs self.q_graph = QGraph(self.im_width, self.im_height, self.m, self.num_actions, self.directory) if (self.bTrain): self.q_graph.SaveGraphAndVariables() self.q_graph_targets = QTargetGraph(self.im_width, self.im_height, self.m, self.num_actions, self.directory) else: self.q_graph.LoadGraphAndVariables() return
def test_copy(self): a = Memory(range(0, 10)) b = a.copy() print(type(b)) a[3] = 77 self.assertEqual(b[3], 3, "Failed copy") a[99] = 77 self.assertEqual(len(b), 10, "Failed copy - invalid length") self.assertEqual(b[99], 0, "Failed copy")
def __init__(self, environment, learningRateVar, dynamicAlphaVar, discountVar, nStepVar, nPlanVar, onPolicyVar, updateByExpectationVar, behaviorEpsilonVar, behaviorEpsilonDecayRateVar, targetEpsilonVar, targetEpsilonDecayRateVar, initialActionvalueMean=0, initialActionvalueSigma=0, predefinedAlgorithm=None, actionPlan=[]): self.environment = environment if predefinedAlgorithm: # TODO: set missing params accordingly pass self.learningRateVar = learningRateVar self.dynamicAlphaVar = dynamicAlphaVar self.discountVar = discountVar self.behaviorPolicy = EpsilonGreedyPolicy(self, behaviorEpsilonVar, behaviorEpsilonDecayRateVar) self.targetPolicy = EpsilonGreedyPolicy(self, targetEpsilonVar, targetEpsilonDecayRateVar) self.onPolicyVar = onPolicyVar self.updateByExpectationVar = updateByExpectationVar self.nStepVar = nStepVar self.nPlanVar = nPlanVar self.initialActionvalueMean = initialActionvalueMean # TODO: Set this in GUI self.initialActionvalueSigma = initialActionvalueSigma # TODO: Set this in GUI self.Qvalues = np.empty_like(self.environment.get_grid()) self.greedyActions = np.empty_like(self.environment.get_grid()) self.initialize_Qvalues() self.stateActionPairCounts = np.empty_like(self.environment.get_grid()) self.initialize_stateActionPairCounts() # Strictly speaking, the agent has no model at all and therefore in the beginning knows nothing about the environment, including its shape. # But to avoid technical details in implementation that would anyway not change the Agent behavior at all, # the agent will be given that the states can be structured in a matrix that has the same shape as the environment # and that the actionspace is constant for all possible states. self.episodicTask = None # TODO: not used so far self.state = None self.episodeFinished = False self.return_ = None # underscore to avoid naming conflict with return keyword self.episodeReturns = [] self.memory = Memory(self) self.hasChosenExploratoryMove = None self.hasMadeExploratoryMove = None self.targetAction = None self.targetActionvalue = None self.iSuccessivePlannings = None # Debug variables: self.actionPlan = actionPlan self.actionHistory = []