def determine_filters(name='', version=None, strict=True, **kwargs): ''' stateless method to determine which filters to apply when looking for existing persistable Returns: database class, filter dictionary :param registered_name: Class name registered in SimpleML :param strict: whether to assume same class and name == same persistable, or, load the data and compare the hash ''' if version is not None: filters = {'name': name, 'version': version} # Datasets are special because we cannot assert the data is the same until we load it elif strict: registered_name = kwargs.get('registered_name') new_dataset = SIMPLEML_REGISTRY.get(registered_name)(name=name, **kwargs) filters = { 'name': name, 'registered_name': registered_name, 'hash_': new_dataset._hash() } else: filters = { 'name': name, 'registered_name': kwargs.get('registered_name') } return BaseRawDataset, filters
def retrieve_from_registry(registered_name): ''' stateless method to query registry for class definitions. handles errors ''' cls = SIMPLEML_REGISTRY.get(registered_name) if cls is None: raise TrainingError( 'Referenced class unregistered: {}'.format(registered_name)) return cls
def create_new(registered_name, **kwargs): ''' Stateless method to create a new persistable with the desired parameters kwargs are passed directly to persistable :param registered_name: Class name registered in SimpleML ''' new_dataset = SIMPLEML_REGISTRY.get(registered_name)(**kwargs) new_dataset.build_dataframe() new_dataset.save() return new_dataset
def test_register_on_import(self): def import_new_class(): class BLAHBLAHTESTCLASS(with_metaclass(MetaRegistry, object)): __abstract__ = True return BLAHBLAHTESTCLASS class_name = 'BLAHBLAHTESTCLASS' self.assertNotIn(class_name, SIMPLEML_REGISTRY.registry) # Register fake_class = import_new_class() # Test self.assertIn(class_name, SIMPLEML_REGISTRY.registry) self.assertEqual(fake_class, SIMPLEML_REGISTRY.get(class_name))
def determine_filters(cls, name='', version=None, strict=True, **kwargs): ''' stateless method to determine which filters to apply when looking for existing persistable Returns: database class, filter dictionary :param registered_name: Class name registered in SimpleML :param strict: whether to assume same class and name = same persistable, or, load the data and compare the hash ''' if version is not None: filters = {'name': name, 'version': version} else: registered_name = kwargs.pop('registered_name') # Check if dependency object was passed dataset_pipeline = kwargs.pop('dataset_pipeline', None) if dataset_pipeline is None: # Use dependency reference to retrieve object dataset_pipeline = cls.retrieve_pipeline( **kwargs.pop('dataset_pipeline_kwargs', {})) if strict: # Build dummy object to retrieve hash to look for new_dataset = SIMPLEML_REGISTRY.get(registered_name)(name=name, **kwargs) new_dataset.add_pipeline(dataset_pipeline) new_dataset.build_dataframe() filters = { 'name': name, 'registered_name': registered_name, 'hash_': new_dataset._hash() } else: # Assume combo of name, class, and pipeline will be unique filters = { 'name': name, 'registered_name': registered_name, 'pipeline_id': dataset_pipeline.id } return BaseProcessedDataset, filters
def determine_filters(cls, name=None, model_id=None, strict=False, **kwargs): ''' stateless method to determine which filters to apply when looking for existing persistable Returns: database class, filter dictionary :param registered_name: Class name registered in SimpleML :param strict: whether to fit objects first before assuming they are identical In theory if all inputs and classes are the same, the outputs should deterministically be the same as well (up to random iter). So, you dont need to fit objects to be sure they are the same ''' if name is not None and model_id is not None: # Can't use default name because metrics are hard coded to reflect dataset split + class filters = { 'name': name, 'model_id': model_id, } else: # Check if dependency object was passed model = kwargs.pop('model', None) if model is None: # Use dependency reference to retrieve object model = cls.retrieve_model(**kwargs.pop('model_kwargs', {})) # Build dummy object to retrieve hash to look for registered_name = kwargs.pop('registered_name') new_metric = SIMPLEML_REGISTRY.get(registered_name)(name=name, **kwargs) new_metric.add_model(model) if strict: new_metric.score() filters = { 'name': new_metric.name, 'registered_name': registered_name, 'hash_': new_metric._hash() } return BaseMetric, filters
def create_new(cls, registered_name, model=None, **kwargs): ''' Stateless method to create a new persistable with the desired parameters kwargs are passed directly to persistable :param registered_name: Class name registered in SimpleML :param model: model class ''' if model is None: # Use dependency reference to retrieve object model = cls.retrieve_model(**kwargs.pop('model_kwargs', {})) new_metric = SIMPLEML_REGISTRY.get(registered_name)(**kwargs) new_metric.add_model(model) new_metric.score() new_metric.save() return new_metric
def create_new(cls, registered_name, dataset=None, **kwargs): ''' Stateless method to create a new persistable with the desired parameters kwargs are passed directly to persistable :param registered_name: Class name registered in SimpleML :param dataset: dataset object ''' if dataset is None: # Use dependency reference to retrieve object dataset = cls.retrieve_dataset(**kwargs.pop('dataset_kwargs', {})) new_pipeline = SIMPLEML_REGISTRY.get(registered_name)(**kwargs) new_pipeline.add_dataset(dataset) new_pipeline.fit() new_pipeline.save() return new_pipeline
def create_new(cls, registered_name, pipeline=None, **kwargs): ''' Stateless method to create a new persistable with the desired parameters kwargs are passed directly to persistable :param registered_name: Class name registered in SimpleML :param pipeline: pipeline object ''' if pipeline is None: # Use dependency reference to retrieve object pipeline = cls.retrieve_pipeline( **kwargs.pop('pipeline_kwargs', {})) new_model = SIMPLEML_REGISTRY.get(registered_name)(**kwargs) new_model.add_pipeline(pipeline) new_model.fit() new_model.save() return new_model
def determine_filters(cls, name='', version=None, strict=False, **kwargs): ''' stateless method to determine which filters to apply when looking for existing persistable Returns: database class, filter dictionary :param registered_name: Class name registered in SimpleML :param strict: whether to fit objects first before assuming they are identical In theory if all inputs and classes are the same, the outputs should deterministically be the same as well (up to random iter). So, you dont need to fit objects to be sure they are the same ''' if version is not None: filters = {'name': name, 'version': version} else: # Check if dependency object was passed pipeline = kwargs.pop('pipeline', None) if pipeline is None: # Use dependency reference to retrieve object pipeline = cls.retrieve_pipeline( **kwargs.pop('pipeline_kwargs', {})) # Build dummy object to retrieve hash to look for registered_name = kwargs.pop('registered_name') new_model = SIMPLEML_REGISTRY.get(registered_name)(name=name, **kwargs) new_model.add_pipeline(pipeline) if strict: new_model.fit() filters = { 'name': name, 'registered_name': registered_name, 'hash_': new_model._hash() } return BaseModel, filters
def _load_class(self): ''' Wrapper function to call global registry of all imported class names ''' return SIMPLEML_REGISTRY.get(self.registered_name)