示例#1
0
async def load_data(
    resource_name: Union[Text, "TrainingDataImporter"],
    domain: "Domain",
    remove_duplicates: bool = True,
    unique_last_num_states: Optional[int] = None,
    augmentation_factor: int = 50,
    tracker_limit: Optional[int] = None,
    use_story_concatenation: bool = True,
    debug_plots: bool = False,
    exclusion_percentage: Optional[int] = None,
) -> List["DialogueStateTracker"]:
    """
    Load training data from a resource.

    Args:
        resource_name: resource to load the data from. either a path or an importer
        domain: domain used for loading
        remove_duplicates: should duplicated training examples be removed?
        unique_last_num_states: number of states in a conversation that make the
            a tracker unique (this is used to identify duplicates)
        augmentation_factor:
            by how much should the story training data be augmented
        tracker_limit:
            maximum number of trackers to generate during augmentation
        use_story_concatenation:
            should stories be concatenated when doing data augmentation
        debug_plots:
            generate debug plots during loading
        exclusion_percentage:
            how much data to exclude

    Returns:
        list of loaded trackers
    """
    from rasa.core.training.generator import TrainingDataGenerator
    from rasa.importers.importer import TrainingDataImporter

    if resource_name:
        if isinstance(resource_name, TrainingDataImporter):
            graph = await resource_name.get_stories(
                exclusion_percentage=exclusion_percentage)
        else:
            graph = await extract_story_graph(
                resource_name,
                domain,
                exclusion_percentage=exclusion_percentage)

        g = TrainingDataGenerator(
            graph,
            domain,
            remove_duplicates,
            unique_last_num_states,
            augmentation_factor,
            tracker_limit,
            use_story_concatenation,
            debug_plots,
        )
        return g.generate()
    else:
        return []
示例#2
0
async def load_data(
    resource_name: Text,
    domain: "Domain",
    remove_duplicates: bool = True,
    unique_last_num_states: Optional[int] = None,
    augmentation_factor: int = 20,
    tracker_limit: Optional[int] = None,
    use_story_concatenation: bool = True,
    debug_plots=False,
    exclusion_percentage: int = None,
) -> List["DialogueStateTracker"]:
    from rasa.core.training import extract_story_graph
    from rasa.core.training.generator import TrainingDataGenerator

    if resource_name:
        graph = await extract_story_graph(
            resource_name, domain, exclusion_percentage=exclusion_percentage)

        g = TrainingDataGenerator(
            graph,
            domain,
            remove_duplicates,
            unique_last_num_states,
            augmentation_factor,
            tracker_limit,
            use_story_concatenation,
            debug_plots,
        )
        return g.generate()
    else:
        return []
示例#3
0
async def visualize_stories(
        story_steps: List[StoryStep],
        domain: Domain,
        output_file: Optional[Text],
        max_history: int,
        interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
        nlu_training_data: Optional["TrainingData"] = None,
        should_merge_nodes: bool = True,
        fontsize: int = 12,
        silent: bool = False):
    """Given a set of stories, generates a graph visualizing the flows in the
    stories.

    Visualization is always a trade off between making the graph as small as
    possible while
    at the same time making sure the meaning doesn't change to "much". The
    algorithm will
    compress the graph generated from the stories to merge nodes that are
    similar. Hence,
    the algorithm might create paths through the graph that aren't actually
    specified in the
    stories, but we try to minimize that.

    Output file defines if and where a file containing the plotted graph
    should be stored.

    The history defines how much 'memory' the graph has. This influences in
    which situations the
    algorithm will merge nodes. Nodes will only be merged if they are equal
    within the history, this
    means the larger the history is we take into account the less likely it
    is we merge any nodes.

    The training data parameter can be used to pass in a Rasa NLU training
    data instance. It will
    be used to replace the user messages from the story file with actual
    messages from the training data."""

    story_graph = StoryGraph(story_steps)

    g = TrainingDataGenerator(story_graph,
                              domain,
                              use_story_concatenation=False,
                              tracker_limit=100,
                              augmentation_factor=0)
    completed_trackers = g.generate(silent)
    event_sequences = [t.events for t in completed_trackers]

    graph = await visualize_neighborhood(None,
                                         event_sequences,
                                         output_file,
                                         max_history,
                                         interpreter,
                                         nlu_training_data,
                                         should_merge_nodes,
                                         max_distance=1,
                                         fontsize=fontsize)
    return graph
示例#4
0
文件: __init__.py 项目: eternius/asm
    async def train(self):
        """Train the engine.
        """
        nltk.download('punkt')
        lang = self.config['language']
        if not os.path.exists('data/' + self.config['skill-id']):
            _LOGGER.info("Starting Skill training.")
            _LOGGER.info("Generating stories.")
            data, domain_data, stories = await GenerateStories.run(
                self.config['skill-id'], self.config['language'], self.asm)
            training_data = TrainingData(training_examples=data)
            nlu_config = RasaNLUModelConfig({
                "language": lang,
                "pipeline": self.config['pipeline'],
                "data": None
            })

            trainer = Trainer(nlu_config, None, True)
            _LOGGER.info("Training Arcus NLU")
            trainer.train(training_data)
            trainer.persist("data/" + self.config['skill-id'], None, 'nlu')

            # Rasa core
            domain = Domain.from_dict(domain_data)

            reader = StoryFileReader(domain, RegexInterpreter(), None, False)
            story_steps = await reader.process_lines(stories)
            graph = StoryGraph(story_steps)

            g = TrainingDataGenerator(
                graph,
                domain,
                remove_duplicates=True,
                unique_last_num_states=None,
                augmentation_factor=20,
                tracker_limit=None,
                use_story_concatenation=True,
                debug_plots=False,
            )

            training_trackers = g.generate()
            policy_list = SimplePolicyEnsemble.from_dict(
                {"policies": self.config['policies']})
            policy_ensemble = SimplePolicyEnsemble(policy_list)

            _LOGGER.info("Training Arcus Core")
            policy_ensemble.train(training_trackers, domain)
            policy_ensemble.persist(
                "data/" + self.config['skill-id'] + "/core", False)
            domain.persist("data/" + self.config['skill-id'] + "/core/model")
            domain.persist_specification("data/" + self.config['skill-id'] +
                                         "/core")
示例#5
0
async def _generate_trackers(resource_name, agent,
                             max_stories=None,
                             use_e2e=False):
    from rasa.core.training.generator import TrainingDataGenerator

    from rasa.core import training
    story_graph = await training.extract_story_graph(
        resource_name, agent.domain, agent.interpreter, use_e2e)
    g = TrainingDataGenerator(story_graph, agent.domain,
                              use_story_concatenation=False,
                              augmentation_factor=0,
                              tracker_limit=max_stories)
    return g.generate()
示例#6
0
    def verify_story_structure(self,
                               ignore_warnings: bool = True,
                               max_history: Optional[int] = None) -> bool:
        """Verifies that the bot behaviour in stories is deterministic.

        Args:
            ignore_warnings: When `True`, return `True` even if conflicts were found.
            max_history: Maximal number of events to take into account for conflict identification.

        Returns:
            `False` is a conflict was found and `ignore_warnings` is `False`.
            `True` otherwise.
        """

        logger.info("Story structure validation...")

        trackers = TrainingDataGenerator(
            self.story_graph,
            domain=self.domain,
            remove_duplicates=False,
            augmentation_factor=0,
        ).generate()

        # Create a list of `StoryConflict` objects
        conflicts = rasa.core.training.story_conflict.find_story_conflicts(
            trackers, self.domain, max_history)

        if not conflicts:
            logger.info("No story structure conflicts found.")
        else:
            for conflict in conflicts:
                logger.warning(conflict)

        return ignore_warnings or not conflicts
示例#7
0
文件: test.py 项目: yueyedeai/rasa
async def _generate_trackers(
    resource_name: Text,
    agent: "Agent",
    max_stories: Optional[int] = None,
    use_e2e: bool = False,
) -> List[Any]:
    from rasa.core.training.generator import TrainingDataGenerator

    from rasa.core import training

    story_graph = await training.extract_story_graph(resource_name,
                                                     agent.domain, use_e2e)
    g = TrainingDataGenerator(
        story_graph,
        agent.domain,
        use_story_concatenation=False,
        augmentation_factor=0,
        tracker_limit=max_stories,
    )
    return g.generate_story_trackers()
async def _setup_trackers_for_testing(
        domain_path: Text, training_data_file: Text
) -> Tuple[List[TrackerWithCachedStates], Domain]:
    importer = RasaFileImporter(domain_path=domain_path,
                                training_data_paths=[training_data_file])
    validator = await Validator.from_importer(importer)

    trackers = TrainingDataGenerator(
        validator.story_graph,
        domain=validator.domain,
        remove_duplicates=False,
        augmentation_factor=0,
    ).generate()

    return trackers, validator.domain