def get_intent(self, sentence): """THis will return the set of intents identified in a sentence Arguments: sentence {string} -- THe sentence spokin by user Return : Returns the one hot vector telling us about the intents """ if sentence is not None: a = 0 sentence = sentence.lower() intent = self.intent.intent_module(sentence) print(intent) if (intent.find("+") >= 0): #print("TRUE====================") data = intent.split("+") print(data) intents = [] for i in data: intents.append(impdicts.intents2indx[i]) else: intents = impdicts.intents2indx[intent] return utils.multi_hot(intents, NO_INTENTS) else: raise Exception("Some Error in dialogue reading")
def meta_step_end3(self, option): """ This works on the following modes 1. intent-state-mod2 2. meta-reward-2 3. meta-state-1 Addition from meta_step_end2 : This will after every options check if slots for any remaining intent are completely filled and then remove theat intent from the intent state :param option: :return: The updated intent staet, after remobing option from it and the appropriate reward """ done = False reward = 0 print("The option picked up is : {}".format(option)) if option == 5: # TODO Changes from first : ( we check if there are active intents , if yes we penalize the agent with -w2 if np.sum(self.current_intent_state) > 0.01: reward = -self.w2 else: reward = self.user_agent_reward2() self.starting_slot_state_intent_group - self.current_slot_state.copy( ) intent_groups = len( self.current_obj_intent_groups ) # this gives the number of intent grousp that we need to cycel thoufh self.current_intent_group_no += 1 if self.current_intent_group_no >= intent_groups: done = True else: self.current_intent_state = utils.multi_hot( self.current_obj_intent_groups[ self.current_intent_group_no], 5) return self.latest_start_confidence_start, self.current_slot_state, self.current_intent_state, reward, done else: # check option in the intent state or not if self.current_intent_state[option] < 0.01: # i.e. zero reward = -self.w2 # the negative reward of an iteration # self.current_intent_state contains all the relevant intents else: reward = self.calculate_external_reward( np.copy(self.latest_start_confidence_start), self.current_slot_state, option) * self.w2 # this was a mistake in the revious code # the current_intent_state should not change self.current_intent_state[option] = 0.0 # @29/3/19 : New change for i in range(self.intent_space_size): # if the intent is there if self.current_intent_state[i] > 0.01: # this intent is yet ro ve served # check if all teh slots for this intent are folled or not if self.check_confidence_state(i): self.current_intent_state[ i] = 0.0 # i.e. all slots satisfied hence, remove that intent, not required return self.latest_start_confidence_start, self.current_slot_state, self.current_intent_state, reward, done
def multi_hot(self): print("in multi hot") self.current_intent_state = utils.multi_hot( self.current_obj_intent_groups[self.current_intent_group_no], 5) # the current_intent_state should not change #self.current_intent_state[option] = 0.0 for i in range(self.intent_space_size): # if the intent is there if self.current_intent_state[i] > 0.01: # this intent is yet ro ve served # check if all teh slots for this intent are folled or not if self.check_confidence_state(i): self.current_intent_state[i] = 0.0 return self.current_intent_state
def meta_step_end2(self, option): """ This works on the following modes 1. intent-state-mod2 2. meta-reward-2 3. meta-state-1 :param option: :return: The updated intent staet, after remobing option from it and the appropriate reward """ done = False reward = 0 print("The option picked up is : {}".format(option)) if option == 5: # TODO Changes from first : ( we check if there are active intents , if yes we penalize the agent with -w2 if np.sum(self.current_intent_state) > 0.01: reward = -self.w2 else: reward = self.user_agent_reward2() self.starting_slot_state_intent_group - self.current_slot_state.copy( ) intent_groups = len( self.current_obj_intent_groups ) # this gives the number of intent grousp that we need to cycel thoufh self.current_intent_group_no += 1 if self.current_intent_group_no >= intent_groups: done = True else: self.current_intent_state = utils.multi_hot( self.current_obj_intent_groups[ self.current_intent_group_no], 5) # TODO change the intenet stat return self.latest_start_confidence_start, self.current_slot_state, self.current_intent_state, reward, done else: # FIXME : 19.2.19 # TODO , currently the wrong option is not being penzliaed , we have to implmemnt to change the intent space whrn the option is picked and that intent has to be remoced gtom the state and for a wrong option we should give # check option in the intent state or not if self.current_intent_state[option] < 0.01: # i.e. zero reward = -self.w1 # the negative reward of an iteration # self.current_intent_state contains all the relevant intents else: reward = self.calculate_external_reward( np.copy(self.latest_start_confidence_start), self.current_slot_state, option) * self.w2 # the current_intent_state should not change self.current_intent_state[option] = 0.0 return self.latest_start_confidence_start, self.current_slot_state, self.current_intent_state, reward, done
def reset( self, prob_random_start: float = 0.5): # just trying this return syntax """ confidence_state, intent_state = env.reset() We need to init the object with a set of new intents :return: confidence_state : The slot state with confidence values intent_state : The intent state for the current intent # Status : Completed, not tested """ self.current_obj_intent = [] self.no_intents = np.random.randint( 1, self.intent_space_size + 1) # Produces a number between 1 and 5 for the number of intents # get a starting number of intents from 0 to intent_space temp_intents = list(range(0, self.intent_space_size)) for iter in range(self.no_intents): indx = np.random.randint(0, len(temp_intents)) self.current_obj_intent.append(temp_intents[indx]) del temp_intents[indx] # after these steps teh current_onbj_intent contains the intents and there order to be followred in sccheduling intents for the agent if random.random() < prob_random_start: # do a random init self.random_state_init() else: self.state_init() self.current_obj_intent_groups = self.create_intent_group() # now we will set the intital intent state of the system and also the buffer to store the intent states self.current_intent_group_no = 0 # Keeps track of the intent number being served self.intent_states = np.array([ utils.multi_hot( self.current_obj_intent_groups[self.current_intent_group_no], self.intent_space_size) ]) # setting the starting intent # self.intent_states = np.array([ util.one_hot( self.current_obj_intent[self.current_intent_no], self.intent_space_size)]) # setting the starting intent self.current_intent_state = self.intent_states[-1] self.starting_slot_state_intent_group = self.current_slot_state.copy() return [self.current_slot_state, self.current_intent_state]
def meta_step_end(self, option): done = False reward = 0 print("The option picked up is : {}".format(option)) if option == 5: # we need to switch to the next set of intents # We Will check if it has filled all the relevant slots for the intents and reward it appropritaley for each slot filled # Implementing the reward for the meta policy # And also modify the next intent state intent_groups = len( self.current_obj_intent_groups ) # this gives the number of intent grousp that we need to cycel thoufh reward = self.user_agent_reward( ) # THis function will return the reward for the user agnet action, self.current_intent_group_no += 1 # check if we are done with the thing or not. I.e. if we are equal with the number of groups # print(self.current_obj_intent_groups) # print(self.current_intent_group_no) # self.intent_state = utils.multi_hot(self.current_obj_intent_groups[self.current_intent_group_no], 5) if self.current_intent_group_no >= intent_groups: done = True else: self.current_intent_state = utils.multi_hot( self.current_obj_intent_groups[ self.current_intent_group_no], 5) # dont change the intente state return self.latest_start_confidence_start, self.current_slot_state, self.current_intent_state, reward, done # TODO we also need to penalize the agent for extra steps that it takes to acheive a certain task because, it should pick the most effecient process else: # FIXME : 19.2.19 # TODO , currently the wrong option is not being penzliaed , we have to implmemnt to change the intent space whrn the option is picked and that intent has to be remoced gtom the state and for a wrong option we should give reward = self.calculate_external_reward( np.copy(self.latest_start_confidence_start), self.current_slot_state, option) # the current_intent_state should not change return self.latest_start_confidence_start, self.current_slot_state, self.current_intent_state, reward, done