def _create_context_from_sentence(self, sentence): sentences_needed = Configuration.get_active( ).context_length * 2 # each context needs 2 sentences to be built sentence_query = Sentence.objects.filter( said_in=sentence.said_in).order_by('-said_on') sentences_available = sentence_query.count() num_sentences = min(sentences_needed, sentences_available) sentences = sentence_query[:num_sentences] sentences = list(reversed(sentences)) single_sentence = None while single_sentence is None or single_sentence.said_by == self._bot_user: # we need to find the first user made sentence try: single_sentence = sentences.pop() except IndexError: # there never was a user-said sentence... skip the response! return while len(sentences) > 0 and sentences[0].said_by == self._bot_user: # the first sentence needs to be user-said sentences.pop(0) logger.debug('Reacting to sentence {}'.format(single_sentence)) logger.debug('Have these sentences as context: {}'.format(sentences)) turns = Turn.sentences_to_turns(sentences, self._bot_user) context = Context.get_single_context( turns, Configuration.get_active().context_length) return context
def _init_model(self, load_model_dir=None): """ Initializes a DeepMind inspired model. Use the load_model_dir parameter to load a previously trained model. :param load_model_dir: directory of a previously trained and saved model, leave None if you want a fresh one use 'latest' for the last trained model with the given base name """ if not os.path.isdir(Configuration.get_active().log_dir): os.makedirs(Configuration.get_active().log_dir) num_models = len([ name for name in os.listdir(Configuration.get_active().log_dir) if os.path.isdir( os.path.join(Configuration.get_active().log_dir, name)) and self.model_base_name in name ]) if load_model_dir == 'latest': model_number = num_models - 1 model_name = '{}_v{:03d}'.format(self.model_base_name, model_number) elif load_model_dir is None: model_number = num_models model_name = '{}_v{:03d}'.format(self.model_base_name, model_number) else: model_name = load_model_dir self._model = get_deep_mind_model_no_context(name=model_name) self._target = get_deep_mind_model_no_context() # init target weights weights = self._model.get_weights() self._target.set_weights(weights)
def as_matrix(self) -> numpy.array: """ Gets this context in matrix representation. Concatenates state and action (in vector form) for each context step :return: matrix representation for this context, shape of (CONTEXT_LENGTH, STATE_SHAPE[0]+NUM_ACTIONS) """ # number of missing actions/states missing_context = self.context_length - len(self.actions) # if missing_context != 0: # logger.debug("Missing {} turns for context".format( # missing_context # )) ret = [] if self.states is not None and self.actions is not None: for state, action in zip(self.states, self.actions): entry = numpy.concatenate((state.as_vector(), action.as_vector())) ret.append(entry) # a single context vector entry consists of a state and action both as vectors and concatenated padding = numpy.zeros( (missing_context, Configuration.get_active().state_shape[0] + Configuration.get_active().number_actions) ) ret = numpy.array(ret) # logger.debug("Shape of known contexts is {}, while shape of padding is {}".format(ret.shape, padding.shape)) if len(ret) is not 0: ret = numpy.concatenate((ret, padding)) else: ret = padding return ret
def update_terminals_for_single_dialogue(dialogue: Dialogue, override=False, save=True): sentences = dialogue.sentence_set.all() num_sentences = len(sentences) for i, sentence in enumerate(sentences): if sentence.intent is not None and sentence.intent.template.name == 'common.bye' and override: logger.debug( 'Considering sentence "{}" to be terminal.'.format(sentence)) next_i = i + 1 if next_i < num_sentences: next_sentence = sentences[next_i] if next_sentence.intent is not None: duration_to_next = next_sentence.said_on - sentence.said_on seconds_between_sentences = duration_to_next.seconds seconds_between_sentences += duration_to_next.days * Configuration.get_active( ).seconds_per_day if seconds_between_sentences > Configuration.get_active( ).seconds_for_terminal: logger.debug( 'Got no other message for {} seconds, sentence is terminal!' .format(seconds_between_sentences)) sentence.terminal = True else: logger.debug( 'Got message "{}" after {} seconds, so sentence is not terminal!' .format(next_sentence, seconds_between_sentences)) sentence.terminal = False if save: sentence.save() else: logger.debug( 'There was no other message in this dialogue, setting "{}" to terminal' .format(sentence))
def vector_from_name(action_name: str): if not Configuration.get_active().is_action_intent(action_name): raise ValueError( 'There is no Action with name "{}"'.format(action_name)) vector = numpy.zeros(Configuration.get_active().number_actions) vector[Configuration.get_active().action_index_for_name( action_name)] = 1. return vector
def _intent_vector_from_sentence(sentence: Sentence) -> ndarray: if sentence.intent is None: intent_name = 'common.unknown' else: intent_name = sentence.intent.template.name intent_vector = numpy.zeros(Configuration.get_active().number_state_intents) if Configuration.get_active().is_state_intent(intent_name): intent_vector[Configuration.get_active().state_index_for_name(intent_name)] = 1. return intent_vector
def list_available_models() -> List[Tuple[str, Optional[Dict[str, int]]]]: models = [] for name in os.listdir(Configuration.get_active().weights_dir): stats = AbstractBot._load_stats_by_path( os.path.join(Configuration.get_active().weights_dir, name, 'stats.pickle')) model = (name, stats) models.append(model) return models
def _episode_to_range(episode_number) -> Tuple[int, int]: num_sentences = Sentence.objects.count() if num_sentences == 0: raise NoDataException start = episode_number * Configuration.get_active().episode_size end = (episode_number + 1) * Configuration.get_active().episode_size if episode_number < 0: start += num_sentences end += num_sentences return start, end
def on_message(self, sentence): assert isinstance(sentence, Sentence) logger.debug( "Received notification on new message to bot, processing...") factory = self._get_and_update_factory(sentence) context = self._create_context_from_sentence(sentence) state = State(sentence) # mapping unknown_intent -> "Sorry didn't understand that" is hard coded, to avoid confusion if sentence.intent.template.name != Configuration.get_active( ).unknown_intent.name: action_name = self.bot.query({ 'state_input': numpy.array([state.as_vector()]), 'context_input': numpy.array([context.as_matrix()]) }) else: action_name = Configuration.get_active( ).didnt_understand_intent.name human_user = User.objects.get(username=sentence.said_by) chat = Chat.objects.get(initiator=human_user, receiver=self._bot_user) sentence_value = factory.create_response(action_name) message = Message.objects.create(value=sentence_value, sent_by=self._bot_user, sent_in=chat) dialogue = sentence.said_in intent_template = IntentTemplate.objects.get(name=action_name) intent = Intent.objects.create(template=intent_template) response = Sentence.objects.create(value=sentence_value, said_in=dialogue, said_by=self._bot_user, raw_sentence=message, intent=intent, sentiment=0, created_by=self.bot.model_base_name) update_user_profile_for_single_dialogue(response.said_in) response.refresh_from_db() logger.info( 'Responded to sentence {}(intent="{}") with action "{}". Done processing message!' .format( sentence, sentence.intent.template.name if sentence.intent is not None else 'Unknown intent', action_name)) self.bot.train()
def all_transitions_from_turns( turns: List[Turn], context_length=Configuration.get_active().context_length): """ Creates transitions from a list of turns. Reverses that list of turns, so put in a list of turns with ascending "dates" of these turns. :param turns: List of Turns :param context_length: length of context for each turn :return: a list of Transitions """ transitions = [] num_turns = len(turns) turns = turns[::-1] # one slice needs to have context_length turns for the context # and one for each State s_0 (current state) as well as State s_1 (future state) slice_size = context_length + 2 for i, turn in enumerate(turns): remaining_turns = min(num_turns - i, slice_size) turns_slice = turns[i:i + remaining_turns] if len(turns_slice) >= 2: # need at least 2 states for a proper transition transition = Transition.single_transition_from_turns( turns_slice, context_length) transitions.append(transition) return transitions
def vector_from_sentence(sentence: Sentence): if sentence.intent is None: raise NoIntentError(sentence) if not Configuration.get_active().is_action_intent( sentence.intent.template.name): raise NoActionIntentError(sentence.intent, sentence) return Action.vector_from_name(sentence.intent.template.name)
def __init__(self): self._base_sentences = Configuration.get_active( ).response_templates.all() for template in self._base_sentences: template.prepare() self._context = {} self._tracking_table = { 'query.player.height': ['player'], 'query.player.information.age': ['player'], 'query.player.information.goals': ['player'], 'query.player.information.shoe': ['player'], 'query.player.news': ['content-type', 'player'], 'query.player.news.more': ['player', 'content-type'], 'userprofile.response.name': ['given-name'], 'userprofile.response.favorite_player': ['player'], 'userprofile.response.age': ['age'] } self._response_builders = { 'common.hi': self._greeting, 'common.thanks': self._thanks, 'common.how_are_you': self._how_are_you, 'response.player.news': self._content, 'response.player.information.age': self._player_age, 'response.player.information.height': self._player_height, 'response.player.information.goals': self._player_goals, 'response.player.information.shoe': self._player_shoe, 'offer.player.news': self._offer_content, } self._content_interface = SimpleContentInterface()
def _init_callbacks(self): """ Initializes callbacks needed for this model. Implement to specify callbacks used. Defaults to using only TensorBoard callback. """ self._callbacks = [ TensorBoard(log_dir=os.path.join( Configuration.get_active().log_dir, self._model.name)) ]
def get_single_context(turns, context_length: int = Configuration.get_active().context_length): """ Creates a context object from turns, useful for training :param turns: the Turns that make up the context, e.g. the turns prior to the current Sentence/State :param context_length: number of steps into the past :return: a Context object see __init__ """ assert len(turns) <= context_length states = [turn.user for turn in turns] actions = [turn.bot for turn in turns] return Context(states, actions, context_length)
def __init__(self, sentence: Sentence = None): """ creates a new state, if sentence is given, the fields will be populated with values from this sentence :param sentence: an object of type Sentence, which will be used to populate the state """ if sentence is not None: assert isinstance(sentence, Sentence) self.intent_name = sentence.intent.template.name self.intent_vector = State._intent_vector_from_sentence(sentence) try: self._intent_index = Configuration.get_active().state_index_for_name(self.intent_name) except ValueError: # not in list --> unknown intent self._intent_index = Configuration.get_active().state_index_for_name( Configuration.get_active().unknown_intent.name ) self.sentiment = float(sentence.sentiment) self.user_profile = sentence.user_profile self.user_profile_vector = State._convert_user_profile(sentence.user_profile)
def predict(self, inputs: Dict[str, numpy.ndarray]): """ Simple delegate for the keras.model.predict function :param inputs: input as dict of input_name->numpy.ndarray :return: an array of quality vectors for each input row (state/context pair), not processed, see keras.model.predict """ with self._graph.as_default(): return self._model.predict( inputs, batch_size=Configuration.get_active().batch_size)
def single_transition_from_turns( turns: List[Turn], context_length=Configuration.get_active().context_length): assert len( turns ) >= 2, 'You need at least 2 turns for a Transition (= s_0->s_1 with no context)' final_turn = turns[-1] current_turn = turns[-2] context = Context.get_single_context(turns[:-2], context_length) transition = Transition(current_turn.user, current_turn.bot, context, final_turn.user) return transition
def __init__(self, sentence: Sentence = None): """ :param sentence: sentence to get the action from :raises NoIntentError: if the given sentence has no intent :raises NoActionIntentError: if the given sentence has an intent, that cannot be interpreted as action """ self._action_vector = Action.vector_from_sentence(sentence) self._action_index = Configuration.get_active().action_index_for_name( sentence.intent.template.name) self.reward = float(sentence.reward) self.terminal = bool(sentence.terminal)
def run(self): # TODO: why? sleep(5) logger.info("Started TurnsTerminator.") logger.info("TurnsTerminator checking for unterminated sentences.") while True: last_sentence_in_dialogues = Sentence.objects.raw( 'SELECT * FROM turns_sentence GROUP BY said_in_id ORDER BY said_on DESC;' ) for sentence in last_sentence_in_dialogues: pause = timezone.now() - sentence.said_on pause = pause.seconds + pause.days * Configuration.get_active( ).seconds_per_day if pause >= Configuration.get_active( ).seconds_for_terminal and not sentence.terminal: logger.info( "Got no new sentence after {} seconds, {} has to be a terminal sentence" .format(pause, sentence.value)) sentence.terminal = True sentence.save() # Nyquist frequency, so we don't miss terminals sleep(Configuration.get_active().seconds_for_terminal / 2)
def train(self, validate=True): """Trains for a single episode :return: a bool indicating whether or not training was possible and successful. """ if not self.training_allowed: return False logger.debug('Acquiring lock...') with self._training_lock: if not self._can_sample_batch(self.episodes_seen): logger.debug('No more episodes to train on are available!') return False with self._graph.as_default(): episode_logs = {} training_loss = 0 batches_trained = 0 logger.info( 'Bot training on episode #{}'.format(self.episodes_seen + 1)) for step in range( 0, Configuration.get_active().steps_per_episode): num_samples, x, y = self._sample_batch(self.episodes_seen) if num_samples > 0: loss = self._perform_training_step((x, y)) batch_logs = {'loss': loss} self._on_batch_end(batches_trained, batch_logs) training_loss += batch_logs['loss'] batches_trained += 1 self.samples_seen += num_samples else: return False training_loss /= batches_trained episode_logs['loss'] = training_loss if validate: num_samples, x, y = self._sample_batch(self.episodes_seen) if num_samples > 0: episode_logs['val_loss'] = self._model.test_on_batch( x, y) episode_logs[ 'est_value'] = self._model.predict_on_batch(x) episode_logs['est_value'] = episode_logs[ 'est_value'].sum() / num_samples episode_logs['true_value'] = y['quality_output'].sum( ) / num_samples self.episodes_seen += 1 self._save_stats() self._save_weights() self._on_episode_end(self.episodes_seen, episode_logs) return True
def __str__(self) -> str: return '<Action {}>'.format( Configuration.get_active().action_intents[self._action_index].name)
def __init__(self): super().__init__(float(Configuration.get_active().epsilon))
def _sample_batch(self, episode_number) -> Tuple[int, Dict, Dict]: start, stop = DeepMindNoContextBot._episode_to_range(episode_number) sentences = \ [Sentence.sample_sentence_in_range(self._bot_user.username, start, stop) for _ in range(0, Configuration.get_active().batch_size)] transitions = [] for sentence in sentences: context_sentences = [] while len(context_sentences) < 4: assert sentence.said_by == self._bot_user.username context_sentences = Sentence.objects.filter( said_in=sentence.said_in).filter( said_on__lte=sentence.said_on).order_by( '-said_on' )[:Configuration.get_active().context_length * 2 + 4] context_sentences = list(reversed(context_sentences)) if len(context_sentences) < 4: # re-sample, since this sample has not enough context sentence = Sentence.sample_sentence_in_range( self._bot_user.username, start, stop) try: a1 = Action(context_sentences.pop()) s1 = State(context_sentences.pop()) turns = Turn.sentences_to_turns(context_sentences, self._bot_user) # context turns list should only be CONTEXT_LENGTH long raw_context_t1 = turns[0:Configuration.get_active(). context_length] context_t1 = Context.get_single_context( raw_context_t1, Configuration.get_active().context_length) a0 = Action(context_sentences.pop()) s0 = State(context_sentences.pop()) turns = Turn.sentences_to_turns(context_sentences, self._bot_user) context_t0 = Context.get_single_context( turns, Configuration.get_active().context_length) except IntentError as e: logger.error( 'Error occurred while processing sampled sentence {}. See below.' .format(sentence)) raise e transition = Transition(s0, a0, context_t0, s1, a1, context_t1) transitions.append(transition) if len(transitions) < Configuration.get_active().batch_size: return 0, {}, {} assert len(transitions) == Configuration.get_active().batch_size actions = [transition.action_t0 for transition in transitions] states = numpy.array( [transition.state_t0.as_vector() for transition in transitions]) contexts = numpy.array( [transition.context_t0.as_matrix() for transition in transitions]) future_states = numpy.array( [transition.state_t1.as_vector() for transition in transitions]) future_contexts = numpy.array( [transition.context_t1.as_matrix() for transition in transitions]) terminals = numpy.array( [0 if transition.terminal else 1 for transition in transitions]) rewards = numpy.array( [transition.action_t0.reward for transition in transitions]) assert future_states.shape == (Configuration.get_active( ).batch_size, ) + Configuration.get_active().state_shape assert contexts.shape == (Configuration.get_active().batch_size, ) + Configuration.get_active().context_shape assert rewards.shape == (Configuration.get_active().batch_size, ) target_quality = self._target.predict({ 'state_input': future_states, 'context_input': future_contexts }) assert target_quality.shape == ( Configuration.get_active().batch_size, Configuration.get_active().number_actions) quality_batch = target_quality.max(axis=1).flatten() quality_batch *= self.discount quality_batch *= terminals logger.debug("Future qualities are {}".format(quality_batch)) logger.debug('Rewards are {}'.format(rewards)) quality_batch = rewards + quality_batch logger.debug("Working with qualities {}".format(quality_batch)) target_quality = numpy.zeros( (Configuration.get_active().batch_size, Configuration.get_active().number_actions)) for target, action, quality in zip(target_quality, actions, quality_batch): target[action.intent_index] = quality return (len(states), { 'state_input': states, 'context_input': contexts }, { 'quality_output': target_quality })
def model_directory(self): return os.path.join(Configuration.get_active().weights_dir, self._model.name)
def __init__(self): super().__init__(float(Configuration.get_active().discount))
def _prediction_to_action_name(self, prediction: numpy.ndarray) -> str: prediction = prediction[0] return list(Configuration.get_active().action_intents.all())[ prediction.argmax()].name