class DomainIntent(Base): """Stores the intents which are defined in the domain.""" __tablename__ = "domain_intent" intent_id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) domain_id = sa.Column(sa.Integer, sa.ForeignKey("domain.id")) intent = sa.Column(sa.String) triggered_action = sa.Column(sa.String) use_entities = sa.Column(sa.Text) ignore_entities = sa.Column(sa.Text) domain = relationship("Domain", back_populates="intents") def as_dict(self) -> Dict[Text, Any]: config = { "use_entities": json.loads(self.use_entities or "true"), "ignore_entities": json.loads(self.ignore_entities or str([])), } if self.triggered_action: config["triggers"] = self.triggered_action return {self.intent: config}
class TrainingDataEntity(Base): """Stores annotated entities of the NLU training data.""" __tablename__ = "nlu_training_data_entity" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) example_id = sa.Column(sa.Integer, sa.ForeignKey("nlu_training_data.id")) example = relationship("TrainingData", back_populates="entities") entity = sa.Column(sa.String) value = sa.Column(sa.String) original_value = sa.Column(sa.String, index=True) start = sa.Column(sa.Integer) end = sa.Column(sa.Integer) extractor = sa.Column(sa.String) entity_synonym_id = sa.Column( sa.Integer, sa.ForeignKey("entity_synonym.id", ondelete="SET NULL") ) def as_dict(self) -> Dict[Text, Text]: entity_dict = { "start": self.start, "end": self.end, "entity": self.entity, "value": self.value, "entity_synonym_id": self.entity_synonym_id, } if self.extractor: entity_dict["extractor"] = self.extractor return entity_dict
class Intent(Base): """Stores the intents (currently only temporary ones).""" __tablename__ = "intent" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) name = sa.Column(sa.String) mapped_to = sa.Column(sa.String) is_temporary = sa.Column(sa.Boolean, default=True) project_id = sa.Column(sa.String, sa.ForeignKey("project.project_id")) temporary_examples = relationship( "TemporaryIntentExample", cascade="all, delete-orphan", back_populates="temporary_intent", ) def as_dict(self, include_example_hashes: bool = True) -> Dict[Text, Any]: intent = { "intent": self.name, "is_temporary": self.is_temporary, "mapped_to": self.mapped_to, } if include_example_hashes: intent["example_hashes"] = [t.example_hash for t in self.temporary_examples] return intent
class NluEvaluation(Base): """Stores the results of NLU evaluations.""" __tablename__ = "nlu_evaluation" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) model_id = sa.Column(sa.String, sa.ForeignKey("model.name")) model = relationship("Model", back_populates="nlu_evaluations") report = sa.Column(sa.Text) precision = sa.Column(sa.Float) f1 = sa.Column(sa.Float) accuracy = sa.Column(sa.Float) timestamp = sa.Column(sa.Float) # time of the evaluation as unix timestamp predictions = relationship("NluEvaluationPrediction", cascade="all", back_populates="evaluation") def as_dict(self) -> Dict[Text, Dict[Text, Any]]: return { "intent_evaluation": { "report": self.report, "f1_score": self.f1, "accuracy": self.accuracy, "precision": self.precision, "predictions": [p.as_dict() for p in self.predictions], "timestamp": self.timestamp, }, "model": self.model_id, }
class GitRepository(Base): """Stores credentials for connected git repositories.""" __tablename__ = "git_repository" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) name = sa.Column(sa.String) repository_url = sa.Column(sa.Text) ssh_key = sa.Column(sa.Text) git_service = sa.Column(sa.String) git_service_access_token = sa.Column(sa.Text) target_branch = sa.Column(sa.String) is_target_branch_protected = sa.Column(sa.Boolean) first_annotator_id = sa.Column(sa.String, sa.ForeignKey("rasa_x_user.username")) first_annotated_at = sa.Column( sa.Float) # annotation time as unix timestamp project_id = sa.Column(sa.String, sa.ForeignKey("project.project_id")) def as_dict(self) -> Dict[Text, Union[Text, int]]: return { "id": self.id, "name": self.name, "repository_url": self.repository_url, "git_service": self.git_service, "target_branch": self.target_branch, "is_target_branch_protected": self.is_target_branch_protected, }
class TrainingData(Base): """Stores the annotated NLU training data.""" __tablename__ = "nlu_training_data" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) hash = sa.Column(sa.String, index=True) text = sa.Column(sa.Text) intent = sa.Column(sa.String) annotator_id = sa.Column(sa.String, sa.ForeignKey("rasa_x_user.username")) project_id = sa.Column(sa.String, sa.ForeignKey("project.project_id")) annotated_at = sa.Column(sa.Float) # annotation time as unix timestamp filename = sa.Column(sa.String) entities = relationship( "TrainingDataEntity", lazy="joined", cascade="all, delete-orphan", back_populates="example", ) def as_dict(self) -> Dict[Text, Any]: return { "id": self.id, "text": self.text, "intent": self.intent, "entities": [e.as_dict() for e in self.entities], "hash": self.hash, "annotation": {"user": self.annotator_id, "time": self.annotated_at}, }
class TemporaryIntentExample(Base): """Stores which training examples belong to mapped temporary intents.""" __tablename__ = "temporary_intent_example" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) intent_id = sa.Column(sa.Integer, sa.ForeignKey("intent.id")) temporary_intent = relationship("Intent", back_populates="temporary_examples") example_hash = sa.Column(sa.String)
class DomainEntity(Base): """Stores the entities which are defined in the domain.""" __tablename__ = "domain_entity" entity_id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) domain_id = sa.Column(sa.Integer, sa.ForeignKey("domain.id")) entity = sa.Column(sa.String) domain = relationship("Domain", back_populates="entities")
class LookupTable(Base): """Stores annotated lookup tables of the NLU training data.""" __tablename__ = "lookup_table" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) project_id = sa.Column(sa.String, sa.ForeignKey("project.project_id")) name = sa.Column(sa.String) number_of_elements = Column(sa.Integer) # Load content only if it's actually accessed elements = deferred(Column(sa.Text)) referencing_nlu_file = sa.Column("filename", sa.String) def as_dict(self, should_include_filename: bool = False) -> Dict[Text, Text]: """Returns a JSON-like representation of this LookupTable object. Args: should_include_filename: If `True`, also include a `filename` property with the lookup table's filename in the result. Returns: Dict containing the LookupTable's attributes. """ value = { "id": self.id, "name": os.path.basename(self.name), # If users download the training data, we don't export the actual elements # of the lookup table, but merely include a link to the file with the # elements "elements": self.relative_file_path, "number_of_elements": self.number_of_elements, } if should_include_filename: value["filename"] = self.relative_file_path return value @property def relative_file_path(self) -> Text: # If we just have a file name (and not a path), we assume it's in the default # data directory if os.path.basename(self.name) == self.name: return str(Path(config.data_dir) / self.name) else: return self.name @property def absolute_file_path(self) -> Text: from rasax.community import utils as rasax_utils return str(rasax_utils.get_project_directory() / self.relative_file_path)
class ConversationEvent(Base): """Stores a single event which happened during a conversation.""" __tablename__ = "conversation_event" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) conversation_id = sa.Column(sa.String, sa.ForeignKey("conversation.sender_id"), index=True, nullable=False) conversation = relationship("Conversation", back_populates="events") type_name = sa.Column(sa.String, nullable=False) timestamp = sa.Column( sa.Float, index=True, nullable=False) # time of the event as unix timestamp intent_name = sa.Column(sa.String) action_name = sa.Column(sa.String) slot_name = sa.Column(sa.String) slot_value = sa.Column(sa.Text) policy = sa.Column(sa.String) is_flagged = sa.Column(sa.Boolean, default=False, nullable=False) data = sa.Column(sa.Text) message_log = relationship("MessageLog", back_populates="event", uselist=False) evaluation = sa.Column(sa.Text) rasa_environment = sa.Column(sa.String, default=DEFAULT_RASA_ENVIRONMENT) def as_rasa_dict(self) -> Dict[Text, Any]: """Return a JSON-like representation of the internal Rasa (framework) event referenced by this `ConversationEvent`. Attach some information specific to Rasa X as part of the Rasa event metadata. Returns: A JSON-like representation of the Rasa event referenced by this database entity. """ d = json.loads(self.data) # Add some metadata specific to Rasa X (namespaced with "rasa_x_") metadata = d.get("metadata") or {} metadata.update({ "rasa_x_flagged": self.is_flagged, "rasa_x_id": self.id }) d["metadata"] = metadata return d
class RegexFeature(Base): """Stores annotated regex features of the NLU training data.""" __tablename__ = "regex_feature" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) project_id = sa.Column(sa.String, sa.ForeignKey("project.project_id")) name = sa.Column(sa.String) pattern = sa.Column(sa.String) filename = sa.Column(sa.String) def as_dict(self) -> Dict[Text, Text]: return {"id": self.id, "name": self.name, "pattern": self.pattern}
class UserGoal(Base): """Stores the user goals which intents can belong to.""" __tablename__ = "user_goal" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) name = sa.Column(sa.String) project_id = sa.Column(sa.String, sa.ForeignKey("project.project_id")) intents = relationship( "UserGoalIntent", cascade="all, delete-orphan", back_populates="user_goal" ) def as_dict(self) -> Dict[Text, Any]: return {"name": self.name, "intents": [i.intent_name for i in self.intents]}
class DomainSlot(Base): """Stores the slots which are defined in the domain.""" __tablename__ = "domain_slot" slot_id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) domain_id = sa.Column(sa.Integer, sa.ForeignKey("domain.id")) slot = sa.Column(sa.String) auto_fill = sa.Column(sa.Boolean, default=True) initial_value = sa.Column(sa.String) type = sa.Column(sa.String, default="rasa.core.slots.UnfeaturizedSlot") values = sa.Column(sa.String) domain = relationship("Domain", back_populates="slots")
class MessageLog(Base): """Stores the intent classification results of the user messages.""" __tablename__ = "message_log" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) hash = sa.Column(sa.String) model_id = sa.Column(sa.Integer, sa.ForeignKey("model.id")) model = relationship("Model", back_populates="message_logs") archived = sa.Column(sa.Boolean, default=False) time = sa.Column(sa.Float) # time of the log as unix timestamp text = sa.Column(sa.Text) intent = sa.Column(sa.String) confidence = sa.Column(sa.Float) intent_ranking = sa.Column(sa.Text) entities = sa.Column(sa.Text) event_id = sa.Column(sa.Integer, sa.ForeignKey("conversation_event.id")) event = relationship("ConversationEvent", uselist=False, back_populates="message_log") __table_args__ = (sa.Index("message_log_idx_archived_text", "archived", "text"), ) def as_dict(self) -> Dict[Text, Any]: return { "id": self.id, "time": self.time, "model": self.model.name if self.model else None, "hash": self.hash, "project_id": self.model.project_id if self.model else None, "team": self.model.project.team if self.model else None, "user_input": { "text": self.text, "intent": { "name": self.intent, "confidence": self.confidence }, "intent_ranking": json.loads(self.intent_ranking), "entities": json.loads(self.entities), }, }
class ConversationTag(Base): """Stores conversation tags.""" __tablename__ = "conversation_tag" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) value = sa.Column(sa.String, nullable=False, index=True) color = sa.Column(sa.String, nullable=False) def as_dict(self) -> Dict[Text, Union[Text, int, List[Text]]]: return { "id": self.id, "value": self.value, "color": self.color, "conversations": [m.sender_id for m in self.conversations], }
class Story(Base): """Stores Rasa Core training stories.""" __tablename__ = "story" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) name = sa.Column(sa.String) story = sa.Column(sa.Text) user = sa.Column(sa.String, sa.ForeignKey("rasa_x_user.username")) annotated_at = sa.Column(sa.Float) # annotation time as unix timestamp filename = sa.Column(sa.String) def as_dict(self) -> Dict[Text, Any]: return { "id": self.id, "name": self.name, "story": self.story, "annotation": {"user": self.user, "time": self.annotated_at}, "filename": self.filename, }
class Response(Base): """Stores the responses.""" __tablename__ = "response" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) response_name = sa.Column(sa.String, nullable=False) text = sa.Column(sa.Text) content = sa.Column(sa.Text) annotator_id = sa.Column(sa.String, sa.ForeignKey("rasa_x_user.username")) annotated_at = sa.Column(sa.Float) # annotation time as unix timestamp project_id = sa.Column(sa.String, sa.ForeignKey("project.project_id")) edited_since_last_training = sa.Column(sa.Boolean, default=True) domain = relationship("Domain", back_populates="responses") domain_id = sa.Column(sa.Integer, sa.ForeignKey("domain.id")) def as_dict(self) -> Dict[Text, Any]: result = json.loads(self.content) result[RESPONSE_NAME_KEY] = self.response_name result["id"] = self.id result["annotator_id"] = self.annotator_id result["annotated_at"] = self.annotated_at result["project_id"] = self.project_id result["edited_since_last_training"] = self.edited_since_last_training return result @staticmethod def get_stripped_value(response: Dict[Text, Any], key: Text) -> Optional[Text]: """Get value by `key` from a `response` dictionary. Args: response: Response to strip. key: A key from dictionary to get. Returns: The stripped value, if it is a string. Otherwise None. """ v = response.get(key) return v.strip() if isinstance(v, str) else None
class DomainAction(Base): """Stores the actions which are defined in the domain.""" __tablename__ = "domain_action" action_id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) domain_id = sa.Column(sa.Integer, sa.ForeignKey("domain.id")) action = sa.Column(sa.String) is_form = sa.Column(sa.Boolean, default=False) domain = relationship("Domain", back_populates="actions") def as_dict(self) -> Dict[Text, Union[Text, bool]]: return { "id": self.action_id, "domain_id": self.domain_id, "name": self.action, "is_form": self.is_form, }
class NluEvaluationPrediction(Base): """Stores the predictions which were done as part of the NLU evaluation.""" __tablename__ = "nlu_evaluation_prediction" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) evaluation_id = sa.Column(sa.Integer, sa.ForeignKey("nlu_evaluation.id")) evaluation = relationship("NluEvaluation", back_populates="predictions") text = sa.Column(sa.String) intent = sa.Column(sa.String) predicted = sa.Column(sa.String) confidence = sa.Column(sa.Float) def as_dict(self) -> Dict[Text, Union[Text, sa.Float]]: return { "text": self.text, "intent": self.intent, "predicted": self.predicted, "confidence": self.confidence, }
class EntitySynonymValue(Base): """Stores values mapped to entity synonyms. This mapping (relationship) is what effectively creates a synonym (i.e. one term can be replaced for another).""" __tablename__ = "entity_synonym_value" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) entity_synonym_id = sa.Column(sa.Integer, sa.ForeignKey("entity_synonym.id")) entity_synonym = relationship("EntitySynonym", back_populates="synonym_values") name = sa.Column(sa.String, index=True) def as_dict(self, use_count: int) -> Dict[Text, Any]: """Returns a JSON-like representation of this EntitySynonymValue object. Args: use_count: Number of times this particular value appears in the NLU training data. Returns: Dict containing the EntitySynonymValue's attributes. """ return {"value": self.name, "id": self.id, "nlu_examples_count": use_count}
class EntitySynonym(Base): """Stores annotated entity synonyms of the NLU training data.""" __tablename__ = "entity_synonym" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) name = sa.Column(sa.String) synonym_values = relationship( "EntitySynonymValue", cascade="all, delete-orphan", order_by=lambda: EntitySynonymValue.id.asc(), back_populates="entity_synonym", ) project_id = sa.Column(sa.String, sa.ForeignKey("project.project_id")) filename = sa.Column(sa.String) def as_dict( self, value_use_counts: Optional[Dict[Text, int]] = None ) -> Dict[Text, Any]: """Returns a JSON-like representation of this EntitySynonym object. Args: value_use_counts: Dictionary containing the number of times each value mapped to this entity synonym is used in the NLU training data. Returns: Dict containing the EntitySynonym's attributes. """ serialized = {"id": self.id, "synonym_reference": self.name} if value_use_counts: serialized["mapped_values"] = [ value.as_dict(value_use_counts[value.id]) for value in self.synonym_values ] return serialized
class Model(Base): """Stores metadata about trained models.""" __tablename__ = "model" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) hash = sa.Column(sa.String, nullable=False) name = sa.Column(sa.String, nullable=False, unique=True) path = sa.Column(sa.String, nullable=False, unique=True) project_id = sa.Column(sa.String, sa.ForeignKey("project.project_id")) project = relationship("Project", back_populates="models") version = sa.Column(sa.String) # time when the training was performed as unix timestamp trained_at = sa.Column(sa.Float) tags = relationship("ModelTag", cascade="all, delete-orphan", back_populates="model") nlu_evaluations = relationship("NluEvaluation", back_populates="model", cascade="all, delete-orphan") # no `cascade="delete"` so that the message logs don't get removed when the models # gets removed message_logs = relationship("MessageLog", back_populates="model") def as_dict(self) -> Dict[Text, Any]: return { "hash": self.hash, "model": self.name, "path": self.path, "project": self.project_id, "trained_at": self.trained_at, "version": self.version, "tags": [t.tag for t in self.tags], }
# Stores mapping between Conversation and ConversationTag conversation_to_tag_mapping = sa.Table( "conversation_to_tag_mapping", Base.metadata, sa.Column( "conversation_id", sa.String, sa.ForeignKey("conversation.sender_id"), nullable=False, index=True, ), sa.Column( "tag_id", sa.String, utils.create_sequence("conversation_to_tag_mapping"), sa.ForeignKey("conversation_tag.id"), nullable=False, index=True, ), ) class ConversationEvent(Base): """Stores a single event which happened during a conversation.""" __tablename__ = "conversation_event" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True)
class Domain(Base): """Stores the domain of the currently deployed Core model.""" __tablename__ = "domain" id = sa.Column(sa.Integer, utils.create_sequence(__tablename__), primary_key=True) project_id = sa.Column(sa.String, sa.ForeignKey("project.project_id")) store_entities_as_slots = sa.Column(sa.Boolean, default=True) path = sa.Column(sa.String, nullable=True) session_expiration_time = sa.Column(sa.Float) carry_over_slots = sa.Column(sa.Boolean) actions = relationship( "DomainAction", cascade="all, delete-orphan", back_populates="domain", order_by=lambda: DomainAction.action_id.asc(), ) intents = relationship( "DomainIntent", cascade="all, delete-orphan", back_populates="domain", order_by=lambda: DomainIntent.domain_id.asc(), ) entities = relationship( "DomainEntity", cascade="all, delete-orphan", back_populates="domain", order_by=lambda: DomainEntity.entity_id.asc(), ) slots = relationship( "DomainSlot", cascade="all, delete-orphan", back_populates="domain", order_by=lambda: DomainSlot.slot_id.asc(), ) responses = relationship( "Response", cascade="all, delete-orphan", back_populates="domain", order_by=lambda: Response.id.asc(), ) @staticmethod def remove_annotations(response_dict: Dict[Text, Any]) -> Dict[Text, Any]: """Removes keys from responses that are irrelevant for the dumped domain. Args: response_dict: Response dictionary to remove the annotation keys from. The keys to remove are defined by `RESPONSE_ANNOTATION_KEYS`. Returns: The response dictionary with removed annotation keys. """ for key in RESPONSE_ANNOTATION_KEYS: if key in response_dict: del response_dict[key] return response_dict def dump_responses(self): response_list = [{ t.response_name: self.remove_annotations(json.loads(t.content)) } for t in self.responses] response_dict = defaultdict(list) for response in response_list: for k, v in response.items(): response_dict[k].append(v) return dict(response_dict) def _get_session_config(self) -> Dict[Text, Union[bool, float]]: session_config = {} if self.session_expiration_time is not None: # use an `int` if the `float` value is actually an `int` session_config[SESSION_EXPIRATION_TIME_KEY] = ( int(self.session_expiration_time) if self.session_expiration_time == int( self.session_expiration_time) else self.session_expiration_time) if self.carry_over_slots is not None: session_config[CARRY_OVER_SLOTS_KEY] = self.carry_over_slots return session_config def as_dict(self) -> Dict[Text, Any]: slots = {} for s in self.slots: name = s.slot slots[name] = {"auto_fill": s.auto_fill, "type": s.type} if s.initial_value: slots[name]["initial_value"] = json.loads(s.initial_value) if "categorical" in s.type.lower() and s.values: slots[name]["values"] = json.loads(s.values) domain_dict = { "config": { "store_entities_as_slots": self.store_entities_as_slots }, "actions": [e.action for e in self.actions if not e.is_form], "forms": [e.action for e in self.actions if e.is_form], "entities": [e.entity for e in self.entities], "intents": [i.as_dict() for i in self.intents], "slots": slots, "responses": self.dump_responses(), } if self.path: domain_dict["path"] = self.path session_config = self._get_session_config() if session_config: domain_dict[SESSION_CONFIG_KEY] = session_config return domain_dict def as_rasa_domain(self) -> RasaDomain: return RasaDomain.from_dict(self.as_dict()) def is_empty(self): return not any([ self.actions, self.responses, self.entities, self.intents, self.slots ])