Exemplo n.º 1
0
def test_marker_from_path_raises(tmp_path: Path,
                                 path_config_tuples: List[Tuple[Text, Any]]):
    for path_to_yaml, config in path_config_tuples:
        full_path = tmp_path / path_to_yaml
        folder = full_path.parents[0]
        if folder != tmp_path:
            Path.mkdir(folder, exist_ok=False)
        rasa.shared.utils.io.write_yaml(data=config, target=full_path)
    with pytest.raises(InvalidMarkerConfig):
        Marker.from_path(tmp_path)
Exemplo n.º 2
0
def test_marker_from_path_adds_special_or_marker(tmp_path: Path, configs: Any):

    yaml_file = tmp_path / "config.yml"
    rasa.shared.utils.io.write_yaml(data=configs, target=yaml_file)
    loaded = Marker.from_path(tmp_path)
    assert isinstance(loaded, OrMarker)
    assert loaded.name == Marker.ANY_MARKER
    assert len(loaded.sub_markers) == len(configs)
    assert all(
        isinstance(sub_marker, IntentDetectedMarker)
        for sub_marker in loaded.sub_markers)
Exemplo n.º 3
0
def test_marker_from_path_only_reads_yamls(tmp_path: Path):
    suffixes = [("yaml", True), ("yml", True), ("yaeml", False),
                ("config", False)]
    for idx, (suffix, allowed) in enumerate(suffixes):
        config = {
            f"marker-{idx}": {
                IntentDetectedMarker.positive_tag(): "intent"
            }
        }
        config_file = tmp_path / f"config-{idx}.{suffix}"
        rasa.shared.utils.io.write_yaml(data=config, target=config_file)
    loaded = Marker.from_path(tmp_path)
    assert len(loaded.sub_markers) == sum(allowed for _, allowed in suffixes)
    assert set(sub_marker.name for sub_marker in loaded.sub_markers) == set(
        f"marker-{idx}" for idx, (_, allowed) in enumerate(suffixes)
        if allowed)
Exemplo n.º 4
0
def _run_markers(
    seed: Optional[int],
    count: Optional[int],
    endpoint_config: Path,
    domain_path: Optional[Text],
    strategy: Text,
    config: Path,
    output_filename: Path,
    stats_file_prefix: Optional[Path] = None,
) -> None:
    """Run markers algorithm over specified config and tracker store.

    Args:
        seed: (Optional) The seed to initialise the random number generator for
              use with the 'sample' strategy.
        count: (Optional) Number of trackers to extract from (for any strategy
               except 'all').
        endpoint_config: Path to the endpoint configuration defining the tracker
                         store to use.
        domain_path: Path to the domain specification to use when validating the
                     marker definitions.
        strategy: Strategy to use when selecting trackers to extract from.
        config: Path to the markers definition file to use.
        output_filename: Path to write out the extracted markers.
        stats_file_prefix: (Optional) A prefix used to create paths where files with
            statistics on the marker extraction results will be written.
            It must consists of the path to the where those files should be stored
            and the common file prefix, e.g. '<path-to-stats-folder>/statistics'.
            Statistics derived from all marker extractions will be stored in
            '<path-to-stats-folder>/statistics-overall.csv', while the statistics
            computed per session will be stored in
            '<path-to-stats-folder>/statistics-per-session.csv'.
    """
    telemetry.track_markers_extraction_initiated(
        strategy=strategy,
        only_extract=stats_file_prefix is not None,
        seed=seed is not None,
        count=count,
    )

    domain = Domain.load(domain_path) if domain_path else None
    markers = Marker.from_path(config)
    if domain and not markers.validate_against_domain(domain):
        rasa.shared.utils.cli.print_error_and_exit(
            "Validation errors were found in the markers definition. "
            "Please see errors listed above and fix before running again."
        )

    # Calculate telemetry
    # All loaded markers are combined with one virtual OR over all markers
    num_markers = len(markers.sub_markers)
    max_depth = markers.max_depth() - 1
    # Find maximum branching of marker
    branching_factor = max(
        (
            len(sub_marker.sub_markers)
            for marker in markers.sub_markers
            for sub_marker in marker.flatten()
            if isinstance(sub_marker, OperatorMarker)
        ),
        default=0,
    )

    telemetry.track_markers_parsed_count(num_markers, max_depth, branching_factor)

    tracker_loader = _create_tracker_loader(
        endpoint_config, strategy, domain, count, seed
    )

    def _append_suffix(path: Optional[Path], suffix: Text) -> Optional[Path]:
        return path.parent / (path.name + suffix) if path else None

    try:
        markers.evaluate_trackers(
            trackers=tracker_loader.load(),
            output_file=output_filename,
            session_stats_file=_append_suffix(stats_file_prefix, STATS_SESSION_SUFFIX),
            overall_stats_file=_append_suffix(stats_file_prefix, STATS_OVERALL_SUFFIX),
        )
    except (FileExistsError, NotADirectoryError) as e:
        rasa.shared.utils.cli.print_error_and_exit(message=str(e))