def train(self, data: TrainingData, **kwargs: Any) -> "Interpreter": """Trains the underlying pipeline using the provided training data.""" self.training_data = data self.training_data.validate() context = kwargs for component in self.pipeline: updates = component.provide_context() if updates: context.update(updates) # Before the training starts: check that all arguments are provided if not self.skip_validation: components.validate_required_components_from_data( self.pipeline, self.training_data) # data gets modified internally during the training - hence the copy working_data: TrainingData = copy.deepcopy(data) for i, component in enumerate(self.pipeline): if isinstance(component, (EntityExtractor, IntentClassifier)): working_data = working_data.without_empty_e2e_examples() logger.info(f"Starting to train component {component.name}") component.prepare_partial_processing(self.pipeline[:i], context) updates = component.train(working_data, self.config, **context) logger.info("Finished training component.") if updates: context.update(updates) return Interpreter(self.pipeline, context)
def train(self, data: TrainingData, **kwargs: Any) -> "Interpreter": """Trains the underlying pipeline using the provided training data.""" self.training_data = data self.training_data.validate() context = kwargs ##这里的context是什么? for component in self.pipeline: updates = component.provide_context( ) ##provide_context()函数 :The updated component configuration. if updates: context.update(updates) # Before the training starts: check that all arguments are provided if not self.skip_validation: components.validate_required_components_from_data( ##检测response_selector self.pipeline, self.training_data) # data gets modified internally during the training - hence the copy working_data = copy.deepcopy(data) for i, component in enumerate(self.pipeline): logger.info(f"Starting to train component {component.name}") component.prepare_partial_processing(self.pipeline[:i], context) updates = component.train(working_data, self.config, **context) logger.info("Finished training component.") if updates: context.update(updates) return Interpreter(self.pipeline, context)
def train(self, data: TrainingData, **kwargs: Any) -> "Interpreter": """Trains the underlying pipeline using the provided training data.""" self.training_data = data self.training_data.validate() context = kwargs for component in self.pipeline: updates = component.provide_context() if updates: context.update(updates) # Before the training starts: check that all arguments are provided if not self.skip_validation: components.validate_required_components_from_data( self.pipeline, self.training_data) # Warn if there is an obvious case of competing entity extractors components.warn_of_competing_extractors(self.pipeline) components.warn_of_competition_with_regex_extractor( self.pipeline, self.training_data) # data gets modified internally during the training - hence the copy working_data: TrainingData = copy.deepcopy(data) for i, component in enumerate(self.pipeline): logger.info(f"Starting to train component {component.name}") component.prepare_partial_processing(self.pipeline[:i], context) component.train(working_data, self.config, **context) logger.info("Finished training component.") return Interpreter(self.pipeline, context)
def test_validate_required_components_from_data( config_path: Text, data_path: Text, expected_warning_excerpts: List[Text]): loaded_config = config.load(config_path) trainer = Trainer(loaded_config) training_data = load_data(data_path) with pytest.warns(UserWarning) as record: components.validate_required_components_from_data( trainer.pipeline, training_data) assert len(record) == 1 assert all([excerpt in record[0].message.args[0]] for excerpt in expected_warning_excerpts)