示例#1
0
def test_process_results_overall(seed: int):
    rng = np.random.default_rng(seed=seed)
    (
        per_session_results,
        preceding_user_turn_numbers_used_per_marker,
    ) = _generate_random_examples(num_markers=3, rng=rng)
    markers = sorted(preceding_user_turn_numbers_used_per_marker.keys())
    num_sessions = len(per_session_results)

    stats = MarkerStatistics()
    for session_idx, results in enumerate(per_session_results):
        stats.process(
            session_idx=session_idx,
            sender_id=str(rng.choice(100)),
            meta_data_on_relevant_events_per_marker=results,
        )

    assert stats.num_sessions == num_sessions
    for marker in markers:
        # count how often we generated some results for a session:
        number_lists = preceding_user_turn_numbers_used_per_marker[marker]
        applied_at_least_once = sum(
            len(sub_list) > 0 for sub_list in number_lists)
        # and compare that to the expected count:
        assert stats.count_if_applied_at_least_once[
            marker] == applied_at_least_once
        # check if we collected the all the "preceding user turn numbers"
        concatenated_numbers = list(
            itertools.chain.from_iterable(
                preceding_user_turn_numbers_used_per_marker[marker]))
        assert stats.num_preceding_user_turns_collected[
            marker] == concatenated_numbers
示例#2
0
def test_process_results_per_session(seed: int):
    rng = np.random.default_rng(seed=seed)

    (
        per_session_results,
        preceding_user_turn_numbers_used_per_marker,
    ) = _generate_random_examples(num_markers=3, rng=rng)
    markers = sorted(preceding_user_turn_numbers_used_per_marker.keys())
    num_sessions = len(per_session_results)

    stats = MarkerStatistics()
    sender_ids = []
    session_indices = []
    for session_idx, results in enumerate(per_session_results):
        sender_id = str(rng.choice(100))
        session_idx = int(rng.choice(100))
        stats.process(
            session_idx=session_idx,
            sender_id=sender_id,
            meta_data_on_relevant_events_per_marker=results,
        )
        sender_ids.append(sender_id)
        session_indices.append(session_idx)

    assert stats.num_sessions == len(per_session_results)
    for marker in markers:
        for idx in range(num_sessions):
            expected_stats = compute_statistics(
                preceding_user_turn_numbers_used_per_marker[marker][idx])
            for stat_name, stat_value in expected_stats.items():
                assert pytest.approx(
                    stats.session_results[marker][stat_name][idx], stat_value)
    for idx in range(num_sessions):
        assert stats.session_identifier[idx] == (sender_ids[idx],
                                                 session_indices[idx])
示例#3
0
def test_per_session_statistics_to_csv(tmp_path: Path, seed: int):

    rng = np.random.default_rng(seed=seed)
    (
        per_session_results,
        preceding_user_turn_numbers_used_per_marker,
    ) = _generate_random_examples(num_markers=3,
                                  rng=rng,
                                  num_sessions_min=10,
                                  num_sessions_max=20)
    markers = sorted(preceding_user_turn_numbers_used_per_marker.keys())

    stats = MarkerStatistics()
    for session_idx, results in enumerate(per_session_results):
        stats.process(
            session_idx=session_idx,
            sender_id=str(rng.choice(100)),
            meta_data_on_relevant_events_per_marker=results,
        )

    tmp_file = tmp_path / "test.csv"
    stats.per_session_statistics_to_csv(path=tmp_file)

    with tmp_file.open(mode="r") as f:
        reader = csv.DictReader(f)
        rows = [row for row in reader]

    actual_information = {(row["sender_id"], row["session_idx"], row["marker"],
                           row["statistic"]): row["value"]
                          for row in rows}
    num_digits = 3
    expected_information = {
        (
            sender_id,
            str(session_idx),
            marker_name,
            MarkerStatistics._add_num_user_turns_str_to(stat_name),
        ): str(value) if np.isnan(value) else str(round(value, num_digits))
        for marker_name in markers
        for stat_name, values in stats.session_results[marker_name].items()
        for (sender_id,
             session_idx), value in zip(stats.session_identifier, values)
    }

    assert actual_information == expected_information
示例#4
0
    def evaluate_trackers(
        self,
        trackers: Iterator[Optional[DialogueStateTracker]],
        output_file: Path,
        session_stats_file: Optional[Path] = None,
        overall_stats_file: Optional[Path] = None,
    ) -> None:
        """Collect markers for each dialogue in each tracker loaded.

        Args:
            trackers: An iterator over the trackers from which we want to extract
                markers.
            output_file: Path to write out the extracted markers.
            session_stats_file: (Optional) Path to write out statistics about the
                extracted markers for each session separately.
            overall_stats_file: (Optional) Path to write out statistics about the
                markers extracted from all session data.

        Raises:
            `FileExistsError` if any of the specified files already exists
            `NotADirectoryError` if any of the specified files is supposed to be
                contained in a directory that does not exist
        """
        # Check files and folders before doing the costly swipe over the trackers:
        for path in [session_stats_file, overall_stats_file, output_file]:
            if path is not None and path.is_file():
                raise FileExistsError(
                    f"Expected that no file {path} already exists.")
            if path is not None and not path.parent.is_dir():
                raise NotADirectoryError(
                    f"Expected directory {path.parent} to exist.")

        # Apply marker to each session stored in each tracker and save the results.
        processed_trackers: Dict[Text, List[SessionEvaluation]] = {}
        for tracker in trackers:
            if tracker:
                tracker_result = self.evaluate_events(tracker.events)
                processed_trackers[tracker.sender_id] = tracker_result

        processed_trackers_count = len(processed_trackers)
        telemetry.track_markers_extracted(processed_trackers_count)
        Marker._save_results(output_file, processed_trackers)

        # Compute and write statistics if requested.
        if session_stats_file or overall_stats_file:
            from rasa.core.evaluation.marker_stats import MarkerStatistics

            stats = MarkerStatistics()
            for sender_id, tracker_result in processed_trackers.items():
                for session_idx, session_result in enumerate(tracker_result):
                    stats.process(
                        sender_id=sender_id,
                        session_idx=session_idx,
                        meta_data_on_relevant_events_per_marker=session_result,
                    )

            telemetry.track_markers_stats_computed(processed_trackers_count)
            if overall_stats_file:
                stats.overall_statistic_to_csv(path=overall_stats_file)
            if session_stats_file:
                stats.per_session_statistics_to_csv(path=session_stats_file)
示例#5
0
def test_overall_statistics_to_csv(tmp_path: Path, seed: int):
    rng = np.random.default_rng(seed=seed)
    (
        per_session_results,
        preceding_user_turn_numbers_used_per_marker,
    ) = _generate_random_examples(num_markers=3,
                                  rng=rng,
                                  num_sessions_min=10,
                                  num_sessions_max=20)
    markers = sorted(preceding_user_turn_numbers_used_per_marker.keys())
    num_sessions = len(per_session_results)

    stats = MarkerStatistics()
    for session_idx, results in enumerate(per_session_results):
        stats.process(
            session_idx=session_idx,
            sender_id=str(rng.choice(100)),
            meta_data_on_relevant_events_per_marker=results,
        )

    tmp_file = tmp_path / "test.csv"
    stats.overall_statistic_to_csv(path=tmp_file)

    with tmp_file.open(mode="r") as f:
        reader = csv.DictReader(f)
        rows = [row for row in reader]

    assert rows[0] == {
        "sender_id": "all",
        "session_idx": "nan",
        "marker": "-",
        "statistic": "total_number_of_sessions",
        "value": str(num_sessions),
    }

    num_digits = 3
    row_idx = 1
    for marker_name in markers:
        assert rows[row_idx] == {
            "sender_id": "all",
            "session_idx": "nan",
            "marker": marker_name,
            "statistic":
            "number_of_sessions_where_marker_applied_at_least_once",
            "value": str(stats.count_if_applied_at_least_once[marker_name]),
        }
        row_idx += 1
        assert rows[row_idx] == {
            "sender_id":
            "all",
            "session_idx":
            "nan",
            "marker":
            marker_name,
            "statistic":
            "percentage_of_sessions_where_marker_applied_at_least_once",
            "value":
            str(
                round(
                    stats.count_if_applied_at_least_once[marker_name] /
                    num_sessions * 100,
                    num_digits,
                )),
        }
        row_idx += 1

    for marker_name in markers:
        statistics = compute_statistics(
            stats.num_preceding_user_turns_collected[marker_name])
        for stat_name, stat_value in statistics.items():
            assert rows[row_idx] == {
                "sender_id": "all",
                "session_idx": "nan",
                "marker": marker_name,
                "statistic":
                MarkerStatistics._add_num_user_turns_str_to(stat_name),
                "value": str(round(stat_value, num_digits)),
            }
            row_idx += 1