Exemplo n.º 1
0
    def __init__(
        self,
        should_run_liveness_endpoint: bool = False,
        session: Optional["Session"] = None,
    ) -> None:
        """Abstract event consumer that implements a liveness endpoint.

        Args:
            should_run_liveness_endpoint: If `True`, runs a Sanic server as a
                background process that can be used to probe liveness of this service.
                The service will be exposed at a port defined by the
                `SELF_PORT` environment variable (5673 by default).
            session: SQLAlchemy session to use.

        """
        self.liveness_endpoint: Optional["Process"] = None
        self.start_liveness_endpoint_process(should_run_liveness_endpoint)

        self._session = session or db_utils.get_database_session(config.LOCAL_MODE)

        self.event_service = EventService(self._session)
        self.analytics_service = AnalyticsService(self._session)
        self.logs_service = LogsService(self._session)

        self.pending_events: Deque[PendingEvent] = deque(maxlen=MAX_PENDING_EVENTS)
    def _has_access_to_conversation(event_service: EventService,
                                    conversation_id: Text,
                                    user: Dict[Text, Any]) -> bool:
        """Check if `user` can access the conversation with `conversation_id`.

        Args:
            event_service: an `EventService` instance.
            conversation_id: ID of the conversation.
            user: Rasa X user who sent the HTTP request.

        Returns:
            `True` if the user has the access to the conversation and `False` otherwise.
        """
        if user_service.has_role(user, user_service.ADMIN):
            return True

        if (user_service.has_role(user, user_service.GUEST)
                and user[USERNAME_KEY] == conversation_id):
            # because username is a chat token in this case, and this token
            # allows to initiate a conversation
            return True

        conversation = event_service.get_conversation(conversation_id)
        if conversation and conversation.created_by == user.get(USERNAME_KEY):
            return True

        return False
Exemplo n.º 3
0
def _replay_tracker_events(tracker: DialogueStateTracker,
                           event_service: EventService) -> None:
    """Migrates the `events`, `logs`, `sessions` collections."""

    for event in tracker.events:
        event_dict = event.as_dict()
        # add sender id to event
        event_dict["sender_id"] = tracker.sender_id
        stringified_event = json.dumps(event_dict)
        # Update events + most of conversations metadata
        _ = event_service.save_event(stringified_event)
Exemplo n.º 4
0
def _migrate_tracker_store_to_rasa_x(
        endpoints_file: Text, max_number_of_trackers: Optional[int]) -> None:
    old_tracker_store = _get_old_tracker_store(endpoints_file)
    rasa_x_tracker_store = _get_rasa_x_tracker_store()

    # Disable warnings regarding not existing slots
    logging.getLogger("rasa.core.trackers").setLevel(logging.CRITICAL)

    if rasa_x_tracker_store.keys():
        should_migrate = questionary.confirm(
            "Found existing trackers in your Rasa X tracker store. Do you "
            "still want to migrate the new trackers?")

        if not should_migrate:
            exit(1)

    db_session = db_utils.get_database_session(True, create_tables=True)
    sql_migrations.run_migrations(db_session)
    event_service = EventService(db_session)

    sender_ids = old_tracker_store.keys()

    if max_number_of_trackers:
        sender_ids = sender_ids[:max_number_of_trackers]

    print_success("Start migrating {} trackers.".format(len(sender_ids)))

    nr_skipped_trackers = 0

    for sender_id in tqdm(sender_ids):
        if rasa_x_tracker_store.retrieve(sender_id):
            nr_skipped_trackers += 1
            logging.debug(
                "Tracker for sender '{}' already exists. Skipping the "
                "migration for it.".format(sender_id))
        else:
            tracker = old_tracker_store.retrieve(sender_id)

            # Migrate tracker store to new tracker store format
            rasa_x_tracker_store.save(tracker)

            # Replay events of tracker
            _replay_tracker_events(tracker, event_service)

    # Set latest event id so that the `SQLiteEventConsumer` only consumes not already
    # migrated events
    set_latest_event_id(db_session, rasa_x_tracker_store)

    print_success(
        "Finished migrating trackers ({} were skipped since they were "
        "already migrated).".format(nr_skipped_trackers))
    def as_dict(self) -> Dict[Text, Any]:
        from rasax.community.services.event_service import EventService

        result = {
            "sender_id":
            self.sender_id,
            "sender_name":
            EventService.get_sender_name(self),  # displayed in the UI
            "latest_event_time":
            self.latest_event_time,
            "latest_input_channel":
            self.latest_input_channel,
            "intents": [i.intent for i in self.unique_intents],
            "actions": [a.action for a in self.unique_actions],
            "minimum_action_confidence":
            self.minimum_action_confidence,
            "maximum_action_confidence":
            self.maximum_action_confidence,
            "minimum_intent_confidence":
            self.minimum_intent_confidence,
            "maximum_intent_confidence":
            self.maximum_intent_confidence,
            "in_training_data":
            self.in_training_data,
            "policies": [p.policy for p in self.unique_policies],
            "n_user_messages":
            self.number_user_messages,
            "has_flagged_messages":
            self.has_flagged_messages,
            "corrected_messages": [{
                "message_timestamp": c.message_timestamp,
                "intent": c.intent
            } for c in self.corrected_messages],
            "interactive":
            self.interactive,
            "tags":
            list(self.tags_set()),
            "created_by":
            self.created_by,
        }

        return result
Exemplo n.º 6
0
def _replay_tracker_events(
    tracker: DialogueStateTracker,
    event_service: EventService,
    logs_service: LogsService,
    analytics_service: AnalyticsService,
) -> None:
    """Migrates the `events`, `logs`, `sessions` collections."""

    for event in tracker.events:
        event_dict = event.as_dict()
        # add sender id to event
        event_dict["sender_id"] = tracker.sender_id
        stringified_event = json.dumps(event_dict)
        # Update events + most of conversations metadata
        event = event_service.save_event(stringified_event)
        # Save logs from conversations
        _save_nlu_log(logs_service, event_dict, event.id)
        # Update analytics
        analytics_service.save_analytics(stringified_event,
                                         sender_id=event.conversation_id)
def _event_service(request: Request) -> EventService:
    return EventService(request[REQUEST_DB_SESSION_KEY])
async def _get_project_status_event(
        session: Session,
        project_id: Text = config.project_name) -> Dict[Text, Any]:
    """Collect data used in `status` event.

    Args:
        session: Database session.
        project_id: The project ID.

    Returns:
        A dictionary containing statistics describing the current project's status.
    """

    from rasax.community.services.event_service import EventService
    from rasax.community.services.domain_service import DomainService
    from rasax.community.services.model_service import ModelService
    from rasax.community.services.data_service import DataService
    from rasax.community.services.story_service import StoryService
    from rasax.community.services.settings_service import SettingsService
    import rasax.community.services.test_service as test_service
    from rasax.community.services import stack_service

    event_service = EventService(session)
    domain_service = DomainService(session)
    model_service = ModelService(config.rasa_model_dir, session)
    data_service = DataService(session)
    story_service = StoryService(session)
    settings_service = SettingsService(session)

    domain = domain_service.get_domain(project_id) or {}
    nlu_data = data_service.get_nlu_training_data_object(project_id=project_id)
    stories = story_service.fetch_stories()

    num_conversations = event_service.get_conversation_metadata_for_all_clients(
    ).count
    num_events = event_service.get_events_count()
    num_models = model_service.get_model_count()
    lookup_tables = data_service.get_lookup_tables(project_id,
                                                   include_filenames=True)
    num_lookup_table_files = len(
        {table["filename"]
         for table in lookup_tables})
    num_lookup_table_entries = sum(
        table.get("number_of_elements", 0) for table in lookup_tables)
    synonyms = data_service.get_entity_synonyms(project_id)
    num_synonyms = sum(len(entry["synonyms"]) for entry in synonyms)
    num_regexes = data_service.get_regex_features(project_id).count

    rasa_services = settings_service.stack_services(project_id)
    version_responses = await stack_service.collect_version_calls(
        rasa_services, timeout_in_seconds=ENVIRONMENT_LIVE_TIMEOUT)

    environment_names = _environment_names(rasa_services)

    tags = event_service.get_all_conversation_tags()
    conversations_with_tags = set()
    for tag in tags:
        conversations_with_tags.update(tag["conversations"])

    e2e_tests = test_service.get_tests_from_file()

    return {
        # Use the SHA256 of the project ID in case its value contains
        # information about the user's use of Rasa X. On the analytics side,
        # having the original value or the hash makes no difference. This
        # reasoning is also applied on other values sent in this module.
        "project":
        hashlib.sha256(project_id.encode("utf-8")).hexdigest(),
        "local_mode":
        config.LOCAL_MODE,
        "rasa_x":
        __version__,
        "rasa_open_source":
        _rasa_version(version_responses),
        "num_intent_examples":
        len(nlu_data.intent_examples),
        "num_entity_examples":
        len(nlu_data.entity_examples),
        "num_actions":
        len(domain.get("actions", [])),
        "num_templates":
        len(
            domain.get("responses", [])
        ),  # Old nomenclature from when 'responses' were still called 'templates' in the domain
        "num_slots":
        len(domain.get("slots", [])),
        "num_forms":
        len(domain.get("forms", [])),
        "num_intents":
        len(domain.get("intents", [])),
        "num_entities":
        len(domain.get("entities", [])),
        "num_stories":
        len(stories),
        "num_conversations":
        num_conversations,
        "num_events":
        num_events,
        "num_models":
        num_models,
        "num_lookup_table_files":
        num_lookup_table_files,
        "num_lookup_table_entries":
        num_lookup_table_entries,
        "num_synonyms":
        num_synonyms,
        "num_regexes":
        num_regexes,
        "num_environments":
        len(environment_names),
        "environment_names":
        environment_names,
        "num_live_environments":
        _number_of_live_rasa_environments(version_responses),
        "uptime_seconds":
        utils.get_uptime(),
        "num_tags":
        len(tags),
        "num_conversations_with_tags":
        len(conversations_with_tags),
        "num_e2e_tests":
        len(e2e_tests),
    }
Exemplo n.º 9
0
def _migrate_tracker_store_to_rasa_x(
    old_endpoints_file: Text,
    new_endpoints_file: Optional[Text],
    is_local: bool,
    max_number_of_trackers: Optional[int],
) -> None:
    old_tracker_store = _get_tracker_store_from_endpoints_config(
        old_endpoints_file)

    reuse_old_tracker_store = old_endpoints_file == new_endpoints_file
    if reuse_old_tracker_store:
        print(
            "Old and new endpoints file is the same. "
            "I will skip migrating the tracker store and only migrate the events to Rasa X."
        )

    # Initialize Rasa X tracker store in any case
    rasa_x_tracker_store = _get_rasa_x_tracker_store(new_endpoints_file)

    # Disable warnings regarding not existing slots
    logging.getLogger("rasa.core.trackers").setLevel(logging.CRITICAL)

    if not reuse_old_tracker_store and rasa_x_tracker_store.keys():
        should_migrate = questionary.confirm(
            "Found existing trackers in your Rasa X tracker store. Do you "
            "still want to migrate the new trackers?")

        if not should_migrate:
            exit(1)

    db_session = db_utils.get_database_session(is_local)
    sql_migrations.run_migrations(db_session)
    event_service = EventService(db_session)

    sender_ids = old_tracker_store.keys()

    if max_number_of_trackers:
        sender_ids = sender_ids[:max_number_of_trackers]

    print_success("Start migrating {} trackers.".format(len(sender_ids)))

    nr_skipped_trackers = 0

    for sender_id in tqdm(sender_ids):
        tracker = old_tracker_store.retrieve(sender_id)

        if not reuse_old_tracker_store:
            if rasa_x_tracker_store.retrieve(sender_id):
                nr_skipped_trackers += 1
                logging.debug(
                    "Tracker for sender '{}' already exists. Skipping the "
                    "migration for it.".format(sender_id))

            else:
                # Migrate tracker store to new tracker store format
                rasa_x_tracker_store.save(tracker)

        # Replay events of tracker
        _replay_tracker_events(tracker, event_service)

    # Set latest event id so that the `SQLiteEventConsumer` only consumes not already
    # migrated events
    set_latest_event_id(db_session, rasa_x_tracker_store)

    print_success(
        "Finished migrating trackers ({} were skipped since they were "
        "already migrated).".format(nr_skipped_trackers))
Exemplo n.º 10
0
class EventConsumer:
    """Abstract base class for all event consumers."""

    type_name = None

    def __init__(
        self,
        should_run_liveness_endpoint: bool = False,
        session: Optional["Session"] = None,
    ) -> None:
        """Abstract event consumer that implements a liveness endpoint.

        Args:
            should_run_liveness_endpoint: If `True`, runs a Sanic server as a
                background process that can be used to probe liveness of this service.
                The service will be exposed at a port defined by the
                `SELF_PORT` environment variable (5673 by default).
            session: SQLAlchemy session to use.

        """
        self.liveness_endpoint: Optional["Process"] = None
        self.start_liveness_endpoint_process(should_run_liveness_endpoint)

        self._session = session or db_utils.get_database_session(config.LOCAL_MODE)

        self.event_service = EventService(self._session)
        self.analytics_service = AnalyticsService(self._session)
        self.logs_service = LogsService(self._session)

        self.pending_events: Deque[PendingEvent] = deque(maxlen=MAX_PENDING_EVENTS)

    def __enter__(self) -> None:
        pass

    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
        self._session.close()

    @staticmethod
    def _run_liveness_endpoint_process(consumer_type: Text) -> "Process":
        """Run a Sanic app as a multiprocessing.Process and return it.

        Args:
            consumer_type: Event consumer type.

        Returns:
            Sanic endpoint app as a multiprocessing.Process.

        """
        port = int(os.environ.get("SELF_PORT", "5673"))
        p = utils.run_in_process(
            fn=_run_liveness_app, args=(port, consumer_type), daemon=True
        )

        logger.info(f"Started Sanic liveness endpoint at port '{port}'.")

        return p

    def start_liveness_endpoint_process(
        self, should_run_liveness_endpoint: bool
    ) -> None:
        """Start liveness endpoint multiprocessing.Process if
        `should_run_liveness_endpoint` is `True`, else do nothing."""

        if should_run_liveness_endpoint:
            self.liveness_endpoint = self._run_liveness_endpoint_process(self.type_name)

    def kill_liveness_endpoint_process(self) -> None:
        """Kill liveness endpoint multiprocessing.Process if it is active."""

        if self.liveness_endpoint and self.liveness_endpoint.is_alive():
            self.liveness_endpoint.terminate()
            logger.info(
                f"Terminated event consumer liveness endpoint process "
                f"with PID '{self.liveness_endpoint.pid}'."
            )

    def log_event(
        self,
        data: Union[Text, bytes],
        sender_id: Optional[Text] = None,
        event_number: Optional[int] = None,
        origin: Optional[Text] = None,
        import_process_id: Optional[Text] = None,
    ) -> None:
        """Handle an incoming event forwarding it to necessary services and handlers.

        Args:
            data: Event to be logged.
            sender_id: Conversation ID sending the event.
            event_number: Event number associated with the event.
            origin: Rasa environment origin of the event.
            import_process_id: Unique ID if the event comes from a `rasa export`
                process.

        """

        log_operation = self._event_log_operation(
            data, sender_id, event_number, origin, import_process_id
        )

        try:
            log_operation()

            self._session.commit()

            self._process_pending_events()
        except sqlalchemy.exc.IntegrityError as e:
            logger.warning(
                f"Saving event failed due to an 'IntegrityError'. This "
                f"means that the event is already stored in the "
                f"database. The event data was '{data}'. {e}"
            )
            self._session.rollback()
        except Exception as e:
            logger.error(e)
            self._save_event_as_pending(data, log_operation)
            self._session.rollback()

    def _event_log_operation(
        self,
        data: Union[Text, bytes],
        sender_id: Optional[Text] = None,
        event_number: Optional[int] = None,
        origin: Optional[Text] = None,
        import_process_id: Optional[Text] = None,
    ) -> Callable[[], None]:
        def _log() -> None:
            event = self.event_service.save_event(
                data,
                sender_id=sender_id,
                event_number=event_number,
                origin=origin,
                import_process_id=import_process_id,
            )
            self.logs_service.save_nlu_logs_from_event(data, event.id)
            self.analytics_service.save_analytics(data, sender_id=event.conversation_id)

            if utils.is_enterprise_installed():
                from rasax.enterprise import reporting  # pytype: disable=import-error

                reporting.report_event(json.loads(data), event.conversation_id)

        return _log

    def _save_event_as_pending(
        self,
        raw_event: Union[Text, bytes],
        on_save: Optional[Callable[[], None]] = None,
    ) -> None:
        """Add `ConversationEvent` to pending events.

        Args:
            raw_event: Consumed event which has to be saved later since the last try
                failed.
            on_save: `Callable` that will be called to persist the event.
        """
        if len(self.pending_events) >= MAX_PENDING_EVENTS:
            pending_event = self.pending_events.popleft()
            warnings.warn(
                f"`PendingEvents` deque has exceeded its maximum length of "
                f"{MAX_PENDING_EVENTS}. The oldest event with data "
                f"{pending_event.raw_event} was removed."
            )

        self.pending_events.append(PendingEvent(raw_event, on_save))

    def _process_pending_events(self) -> None:
        """Process all pending events."""

        for pending_event in list(self.pending_events):
            try:
                pending_event.on_save()
                self._session.commit()
                self.pending_events.remove(pending_event)
            except Exception as e:
                self._session.rollback()
                logger.debug(
                    f"Cannot process the pending event with "
                    f"the following data: '{pending_event.raw_event}'."
                    f"Exception: {e}."
                )

    def consume(self):
        """Consume events."""
        raise NotImplementedError(
            "Each event consumer needs to implement the `consume()` method."
        )