def test_pipeline_first_fit_stage(self): fit_stages = self.pipeline_event.fitStages fit_event1 = fit_stages[0].fe # First Stage transformer = fit_event1.model expected_transformer = modeldb_types.Transformer( -1, 'PCA', 'decomposition PCA') utils.is_equal_transformer(transformer, expected_transformer, self) df = fit_event1.df expected_df = modeldb_types.DataFrame(-1, [ modeldb_types.DataFrameColumn('A', 'int64'), modeldb_types.DataFrameColumn('B', 'int64'), ], 100, 'digits-dataset') utils.is_equal_dataframe(df, expected_df, self) spec = fit_event1.spec expected_spec = modeldb_types.TransformerSpec(-1, 'PCA', [ modeldb_types.HyperParameter('copy', 'True', 'bool', FMIN, FMAX), modeldb_types.HyperParameter('n_components', 'None', 'NoneType', FMIN, FMAX), modeldb_types.HyperParameter('whiten', 'False', 'bool', FMIN, FMAX), ], 'decomposition PCA') utils.is_equal_transformer_spec(spec, expected_spec, self) self.assertEqual(fit_event1.featureColumns, ['A', 'B'])
def test_pipeline_second_fit_stage(self): fit_stages = self.pipeline_event.fitStages fit_event2 = fit_stages[1].fe # Second Stage transformer = fit_event2.model expected_transformer = modeldb_types.Transformer( -1, 'LinearRegression', 'basic linear reg') utils.is_equal_transformer(transformer, expected_transformer, self) df = fit_event2.df expected_df = modeldb_types.DataFrame( -1, [], 100, '') utils.is_equal_dataframe(df, expected_df, self) spec = fit_event2.spec expected_spec = modeldb_types.TransformerSpec( -1, 'LinearRegression', [ modeldb_types.HyperParameter( 'copy_X', 'True', 'bool', FMIN, FMAX), modeldb_types.HyperParameter( 'normalize', 'False', 'bool', FMIN, FMAX), modeldb_types.HyperParameter('n_jobs', '1', 'int', FMIN, FMAX), modeldb_types.HyperParameter( 'fit_intercept', 'True', 'bool', FMIN, FMAX) ], 'basic linear reg') utils.is_equal_transformer_spec(spec, expected_spec, self)
def test_transformer_spec(self): spec = self.fit_event.spec expected_spec = modeldb_types.TransformerSpec(-1, 'LinearRegression', [ modeldb_types.HyperParameter('copy_X', 'True', 'bool', FMIN, FMAX), modeldb_types.HyperParameter('normalize', 'False', 'bool', FMIN, FMAX), modeldb_types.HyperParameter('n_jobs', '1', 'int', FMIN, FMAX), modeldb_types.HyperParameter('fit_intercept', 'True', 'bool', FMIN, FMAX) ], 'linear reg') utils.is_equal_transformer_spec(spec, expected_spec, self)
def test_overall_pipeline_fit_event(self): fit_event = self.pipeline_event.pipelineFit utils.validate_fit_event_struct(fit_event, self) transformer = fit_event.model expected_transformer = modeldb_types.Transformer( -1, 'Pipeline', 'pipeline with pca + logistic') utils.is_equal_transformer(transformer, expected_transformer, self) df = fit_event.df expected_df = modeldb_types.DataFrame( -1, [ modeldb_types.DataFrameColumn('A', 'int64'), modeldb_types.DataFrameColumn('B', 'int64'), ], 100, 'digits-dataset') utils.is_equal_dataframe(df, expected_df, self) spec = fit_event.spec expected_spec = modeldb_types.TransformerSpec( -1, 'Pipeline', [ modeldb_types.HyperParameter( 'logistic__n_jobs', '1', 'int', FMIN, FMAX), modeldb_types.HyperParameter( 'pca__copy', 'True', 'bool', FMIN, FMAX), modeldb_types.HyperParameter( 'pca__n_components', 'None', 'NoneType', FMIN, FMAX), modeldb_types.HyperParameter( 'logistic__fit_intercept', 'True', 'bool', FMIN, FMAX), modeldb_types.HyperParameter( 'pca__whiten', 'False', 'bool', FMIN, FMAX), modeldb_types.HyperParameter( 'steps', "[('pca', PCA(copy=True, n_components=None, whiten=False)), ('logistic', LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False))]", 'list', FMIN, FMAX), modeldb_types.HyperParameter( 'logistic', 'LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)', 'LinearRegression', FMIN, FMAX), modeldb_types.HyperParameter( 'pca', 'PCA(copy=True, n_components=None, whiten=False)', 'PCA', FMIN, FMAX), modeldb_types.HyperParameter( 'logistic__normalize', 'False', 'bool', FMIN, FMAX), modeldb_types.HyperParameter( 'logistic__copy_X', 'True', 'bool', FMIN, FMAX) ], 'pipeline with pca + logistic') utils.is_equal_transformer_spec(spec, expected_spec, self) self.assertItemsEqual(fit_event.featureColumns, ['A', 'B'])