def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high self.action_range = self.action_high - self.action_low # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 * (self.action_range) self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters (CartPole) # self.gamma = 0.99 # discount factor # self.tau = 0.01 # for soft update of target parameters # Algorithm parameters (Quadcopter) self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters
def gdl_changed(self, signal_name, device_udi, *args): """ This method is called when a HAL device is added or removed. """ #play sound playsound_removed='' playsound_added='' if signal_name=="DeviceAdded": #print "\nDeviceAdded, udi=%s"%(device_udi) self.add_device_signal_recv(device_udi) self.update_device_dict() required,actions,properties=self.properties_rules(device_udi) #get traicon position as a tuple (x,y) coordinates=self.get_tray_coordinates(self.trayicon.tray) actor=Actor(actions,required,properties,self.msg_render,self.config,self.voice,coordinates=coordinates ) actor.on_added() elif signal_name=="DeviceRemoved": #print "\nDeviceRemoved, udi=%s"%(device_udi) required,actions,properties=self.properties_rules(device_udi) #get traicon position as a tuple (x,y) coordinates=self.get_tray_coordinates(self.trayicon.tray) actor=Actor(actions,required,properties,self.msg_render,self.config,self.voice,coordinates=coordinates) actor.on_removed() self.remove_device_signal_recv(device_udi) self.virtual_root.pop(device_udi) self.trayicon.on_rem_udi(device_udi) elif signal_name=="NewCapability": [cap] = args #print "\nNewCapability, cap=%s, udi=%s"%(cap, device_udi) # not an hal signal: needed to add new parttions elif signal_name=="VolumeAdded": print "\nVolumeAdded, udi=%s"%(device_udi) device_dict=args[0] print device_dict.__class__ self.trayicon.on_add_udi(device_udi,device_dict) #for i in device_dict[0]["vm.info.childs"]: # for key in i.keys(): # print i[key] else: print "*** Unknown signal %s"% signal_name
def property_modified(self, device_udi, num_changes, change_list): """ This method is called when signals on the Device interface is received """ # device_udi: the device identifier # change_list : # ex: when the volume has been mounted # [('volume.mount_point', False, False), ('volume.is_mounted', False, False)] # it will read # 1) ('volume.mount_point', False, False) # i[0] == volume.mount_point [property_name] [key] # i[1] == False [removed] [rem=0|1] # i[2] == False [added] [add=0|1] # 2) ('volume.is_mounted', False, False) # i[0] == volume.is_mounted [property_name] # i[1] == False [removed] # i[2] == False [added] print "\nPropertyModified, device=%s, num=%s"%(device_udi,num_changes) for i in change_list: property_name = i[0] removed = i[1] added = i[2] print property_name,removed,added #print " key=%s, rem=%d, add=%d"%(property_name, removed, added) if property_name=="info.parent": self.update_device_list() else: device_udi_obj = self.bus.get_object("org.freedesktop.Hal", device_udi) properties = self.udi_to_properties(device_udi) # if from the udi of the devce is possible to find the modified property: # value = the value of the hal key: # ex: # key=volume.mount_point # value=/media/usbdisk if device_udi_obj.PropertyExists(property_name, dbus_interface="org.freedesktop.Hal.Device"): properties[property_name] = device_udi_obj.GetProperty(property_name, dbus_interface="org.freedesktop.Hal.Device") print " value=%s"%(properties[property_name]) rules=RulesParser(filename=self.appdir+"/rules.xml", input=properties) ############################################################# # ACTOR # # # #if mount is true volume.mount_point property is modified # # # ############################################################# if "mount" in rules.actions.keys() and rules.actions["mount"]: if property_name == "volume.mount_point": actor=Actor(rules.actions,rules.required,properties,self.msg_render,self.config,self.voice ) # if val is empty don't do anything actor.on_modified_mount(properties[property_name]) else: if property_name in rules.required.keys() and str(properties[property_name]) == str(rules.required[property_name]): pass else: rules.actions={} rules.required={} else: if device_obj != None: try: del device_obj.properties[property_name] except: pass
class DDPG(): """Reinforcement Learning agent that learns using DDPG.""" def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high self.action_range = self.action_high - self.action_low # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 * (self.action_range) self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters (CartPole) # self.gamma = 0.99 # discount factor # self.tau = 0.01 # for soft update of target parameters # Algorithm parameters (Quadcopter) self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters def reset_episode(self): self.noise.reset() state = self.task.reset() self.last_state = state return state def step(self, action, reward, next_state, done): # Save experience / reward self.memory.add(self.last_state, action, reward, next_state, done) # Learn, if enough samples are available in memory if len(self.memory) > self.batch_size: experiences = self.memory.sample() self.learn(experiences) # Roll over last state and action self.last_state = next_state def act(self, state, enable_exploration): """Returns actions for given state(s) as per current policy.""" state = np.reshape(state, [-1, self.state_size]) action = self.actor_local.model.predict(state)[0] noise = np.zeros(self.action_size) if (enable_exploration): noise = self.noise.sample() return list(action + noise) def learn(self, experiences): """Update policy and value parameters using given batch of experience tuples.""" # Convert experience tuples to separate arrays for each element (states, actions, rewards, etc.) states = np.vstack([e.state for e in experiences if e is not None]) actions = np.array([e.action for e in experiences if e is not None]).astype(np.float32).reshape( -1, self.action_size) rewards = np.array([e.reward for e in experiences if e is not None ]).astype(np.float32).reshape(-1, 1) dones = np.array([e.done for e in experiences if e is not None]).astype(np.uint8).reshape(-1, 1) next_states = np.vstack( [e.next_state for e in experiences if e is not None]) # Get predicted next-state actions and Q values from target models # Q_targets_next = critic_target(next_state, actor_target(next_state)) actions_next = self.actor_target.model.predict_on_batch(next_states) Q_targets_next = self.critic_target.model.predict_on_batch( [next_states, actions_next]) # Compute Q targets for current states and train critic model (local) Q_targets = rewards + self.gamma * Q_targets_next * (1 - dones) self.critic_local.model.train_on_batch(x=[states, actions], y=Q_targets) # Train actor model (local) action_gradients = np.reshape( self.critic_local.get_action_gradients([states, actions, 0]), (-1, self.action_size)) self.actor_local.train_fn([states, action_gradients, 1]) # custom training function # Soft-update target models self.soft_update(self.critic_local.model, self.critic_target.model) self.soft_update(self.actor_local.model, self.actor_target.model) def soft_update(self, local_model, target_model): """Soft update model parameters.""" local_weights = np.array(local_model.get_weights()) target_weights = np.array(target_model.get_weights()) assert len(local_weights) == len( target_weights ), "Local and target model parameters must have the same size" new_weights = self.tau * local_weights + (1 - self.tau) * target_weights target_model.set_weights(new_weights) def load_model(self, actor_filename, critic_filename): self.actor_local.load_model(actor_filename) self.critic_local.load_model(critic_filename) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) def save_model(self, actor_filename, critic_filename): self.actor_local.save_model(actor_filename) self.critic_local.save_model(critic_filename)