def test_pipeline_hierarchy_fit_correct(data_setup): data = data_setup train, _ = train_test_data_setup(data) first = PrimaryNode(operation_type='logit') second = SecondaryNode(operation_type='logit', nodes_from=[first]) third = SecondaryNode(operation_type='logit', nodes_from=[first]) final = SecondaryNode(operation_type='logit', nodes_from=[second, third]) pipeline = Pipeline() for node in [first, second, third, final]: pipeline.add_node(node) pipeline.unfit() train_predicted = pipeline.fit(input_data=train) assert pipeline.root_node.descriptive_id == ( '((/n_logit_default_params;)/' 'n_logit_default_params;;(/' 'n_logit_default_params;)/' 'n_logit_default_params;)/' 'n_logit_default_params') assert pipeline.length == 4 assert pipeline.depth == 3 assert train_predicted.predict.shape[0] == train.target.shape[0] assert final.fitted_operation is not None
def test_pipeline_unfit(data_fixture, request): data = request.getfixturevalue(data_fixture) pipeline = Pipeline(PrimaryNode('logit')) pipeline.fit(data) assert pipeline.is_fitted pipeline.unfit() assert not pipeline.is_fitted assert not pipeline.root_node.fitted_operation with pytest.raises(ValueError) as exc: assert pipeline.predict(data)
def composer_metric(self, metrics, train_data: Union[InputData, MultiModalData], test_data: Union[InputData, MultiModalData], pipeline: Pipeline) -> Optional[Tuple[Any]]: try: validate(pipeline) pipeline.log = self.log if type(metrics) is not list: metrics = [metrics] if self.cache is not None: # TODO improve cache pipeline.fit_from_cache(self.cache) if not pipeline.is_fitted: self.log.debug( f'Pipeline {pipeline.root_node.descriptive_id} fit started' ) pipeline.fit(input_data=train_data, time_constraint=self.composer_requirements. max_pipeline_fit_time) try: self.cache.save_pipeline(pipeline) except Exception as ex: self.log.info(f'Cache can not be saved: {ex}. Continue.') evaluated_metrics = () for metric in metrics: if callable(metric): metric_func = metric else: metric_func = MetricsRepository().metric_by_id(metric) evaluated_metrics = evaluated_metrics + (metric_func( pipeline, reference_data=test_data), ) self.log.debug( f'Pipeline {pipeline.root_node.descriptive_id} with metrics: {list(evaluated_metrics)}' ) # enforce memory cleaning pipeline.unfit() gc.collect() except Exception as ex: self.log.info(f'Pipeline assessment warning: {ex}. Continue.') evaluated_metrics = None return evaluated_metrics