示例#1
0
    def train(self, data: TrainingData, **kwargs: Any) -> "Interpreter":
        """Trains the underlying pipeline using the provided training data."""

        self.training_data = data

        self.training_data.validate()

        context = kwargs

        for component in self.pipeline:
            updates = component.provide_context()
            if updates:
                context.update(updates)

        # Before the training starts: check that all arguments are provided
        if not self.skip_validation:
            components.validate_required_components_from_data(
                self.pipeline, self.training_data)

        # data gets modified internally during the training - hence the copy
        working_data: TrainingData = copy.deepcopy(data)

        for i, component in enumerate(self.pipeline):
            if isinstance(component, (EntityExtractor, IntentClassifier)):
                working_data = working_data.without_empty_e2e_examples()

            logger.info(f"Starting to train component {component.name}")
            component.prepare_partial_processing(self.pipeline[:i], context)
            updates = component.train(working_data, self.config, **context)
            logger.info("Finished training component.")
            if updates:
                context.update(updates)

        return Interpreter(self.pipeline, context)
示例#2
0
    def train(self, data: TrainingData, **kwargs: Any) -> "Interpreter":
        """Trains the underlying pipeline using the provided training data."""

        self.training_data = data

        self.training_data.validate()

        context = kwargs  ##这里的context是什么?

        for component in self.pipeline:
            updates = component.provide_context(
            )  ##provide_context()函数 :The updated component configuration.
            if updates:
                context.update(updates)

        # Before the training starts: check that all arguments are provided
        if not self.skip_validation:
            components.validate_required_components_from_data(  ##检测response_selector
                self.pipeline, self.training_data)

        # data gets modified internally during the training - hence the copy
        working_data = copy.deepcopy(data)

        for i, component in enumerate(self.pipeline):
            logger.info(f"Starting to train component {component.name}")
            component.prepare_partial_processing(self.pipeline[:i], context)
            updates = component.train(working_data, self.config, **context)
            logger.info("Finished training component.")
            if updates:
                context.update(updates)

        return Interpreter(self.pipeline, context)
示例#3
0
    def train(self, data: TrainingData, **kwargs: Any) -> "Interpreter":
        """Trains the underlying pipeline using the provided training data."""

        self.training_data = data

        self.training_data.validate()

        context = kwargs

        for component in self.pipeline:
            updates = component.provide_context()
            if updates:
                context.update(updates)

        # Before the training starts: check that all arguments are provided
        if not self.skip_validation:
            components.validate_required_components_from_data(
                self.pipeline, self.training_data)

        # Warn if there is an obvious case of competing entity extractors
        components.warn_of_competing_extractors(self.pipeline)
        components.warn_of_competition_with_regex_extractor(
            self.pipeline, self.training_data)

        # data gets modified internally during the training - hence the copy
        working_data: TrainingData = copy.deepcopy(data)

        for i, component in enumerate(self.pipeline):
            logger.info(f"Starting to train component {component.name}")
            component.prepare_partial_processing(self.pipeline[:i], context)
            component.train(working_data, self.config, **context)
            logger.info("Finished training component.")

        return Interpreter(self.pipeline, context)
示例#4
0
def test_validate_required_components_from_data(
        config_path: Text, data_path: Text,
        expected_warning_excerpts: List[Text]):
    loaded_config = config.load(config_path)
    trainer = Trainer(loaded_config)
    training_data = load_data(data_path)
    with pytest.warns(UserWarning) as record:
        components.validate_required_components_from_data(
            trainer.pipeline, training_data)
    assert len(record) == 1
    assert all([excerpt in record[0].message.args[0]]
               for excerpt in expected_warning_excerpts)