Пример #1
0
    def test_custom_layers(self):
        """Test adding custom layers to the definition"""

        # Make a simple model
        model = _make_simple_model()

        tmpdir = mkdtemp()
        try:
            # Save it
            model_path = os.path.join(tmpdir, 'model.hd5')
            model.save(model_path)

            # Create the metadata
            metadata = KerasModel.create_model(
                model_path, ['y'],
                custom_objects={'Dense': keras.layers.Dense})
            metadata.set_title('test').set_name('test')

            # Make sure it has the custom object definitions
            self.assertIn('Dense',
                          metadata['servable']['options']['custom_objects'])

            # Validate it against DLHub schema
            validate_against_dlhub_schema(metadata.to_dict(), 'servable')
        finally:
            shutil.rmtree(tmpdir)

        # Test the errors
        with self.assertRaises(ValueError) as exc:
            metadata.add_custom_object('BadLayer', float)
        self.assertIn('subclass', str(exc.exception))
Пример #2
0
    def test_multinetwork(self):
        model = MultiNetwork()

        with TemporaryDirectory() as tp:
            model_path = os.path.join(tp, 'model.pth')
            torch.save(model, model_path)

            metadata = TorchModel.create_model(model_path, [(None, 4)]*2, [(None, 1)]*2,
                                               input_type='float', output_type=['float', 'float'])
            metadata.set_name('t').set_title('t')

            # Test the output shapes
            self.assertEqual(metadata['servable']['methods']['run']['input'],
                             {'type': 'tuple', 'description': 'Tuple of tensors',
                              'element_types': [{'type': 'ndarray',
                                                 'description': 'Tensor', 'shape': [None, 4],
                                                 'item_type': {'type': 'float'}},
                                                {'type': 'ndarray',
                                                 'description': 'Tensor', 'shape': [None, 4],
                                                 'item_type': {'type': 'float'}}]})
            self.assertEqual(metadata['servable']['methods']['run']['output'],
                             {'type': 'tuple', 'description': 'Tuple of tensors',
                              'element_types': [{'type': 'ndarray',
                                                 'description': 'Tensor', 'shape': [None, 1],
                                                 'item_type': {'type': 'float'}},
                                                {'type': 'ndarray',
                                                 'description': 'Tensor', 'shape': [None, 1],
                                                 'item_type': {'type': 'float'}}]})

            validate_against_dlhub_schema(metadata.to_dict(), 'servable')
Пример #3
0
    def test_pickle(self):
        pickle_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__), 'model.pkl'))

        # Make the model
        model = PythonClassMethodModel.create_model(pickle_path,
                                                    'predict_proba',
                                                    {'fake': 'kwarg'})
        model.set_title('Python example').set_name("class_method")

        # Make sure it throws value errors if inputs are not set
        with self.assertRaises(ValueError):
            model.to_dict()

        # Define the input and output types
        model.set_inputs('ndarray', 'Features for each entry', shape=[None, 4])
        model.set_outputs('ndarray',
                          'Predicted probabilities of being each iris species',
                          shape=[None, 3])

        # Make sure attempting to set "unpack" fails
        with self.assertRaises(ValueError):
            model.set_unpack_inputs(True)

        # Add some requirements
        model.add_requirement('scikit-learn', 'detect')
        model.add_requirement('numpy', 'detect')
        model.add_requirement(
            'sklearn', 'latest')  # Dummy project, version # shouldn't change

        # Check the model output
        output = model.to_dict()
        assert output['dlhub']['files'] == {'pickle': pickle_path}
        assert output['dlhub']['dependencies']['python'] == {
            'scikit-learn': skl_version,
            'numpy': numpy_version,
            'sklearn': '0.0'
        }
        assert output['servable']['shim'] == 'python.PythonClassMethodServable'
        assert 'run' in output['servable']['methods']
        assert output['servable']['methods']['run']['input'] == {
            'type': 'ndarray',
            'description': 'Features for each entry',
            'shape': [None, 4]
        }
        assert output['servable']['methods']['run']['output'] == {
            'type': 'ndarray',
            'description':
            'Predicted probabilities of being each iris species',
            'shape': [None, 3]
        }
        assert (output['servable']['methods']['run']['method_details']
                ['class_name'].endswith('.SVC'))
        assert (output['servable']['methods']['run']['method_details']
                ['method_name'] == 'predict_proba')

        self.assertEqual([pickle_path], model.list_files())
        validate_against_dlhub_schema(output, 'servable')
Пример #4
0
def test_multinetwork(tmpdir):
    model = MultiNetwork()

    model_path = os.path.join(tmpdir, 'model.pth')
    torch.save(model, model_path)

    metadata = TorchModel.create_model(model_path, [(None, 4)] * 2,
                                       [(None, 1)] * 2,
                                       input_type='float',
                                       output_type=['float', 'float'])
    metadata.set_name('t').set_title('t')

    # Test the output shapes
    assert metadata.servable.methods['run'].input.dict(exclude_none=True) == {
        'type':
        'tuple',
        'description':
        'Tuple of tensors',
        'element_types': [{
            'type': 'ndarray',
            'description': 'Tensor',
            'shape': [None, 4],
            'item_type': {
                'type': 'float'
            }
        }, {
            'type': 'ndarray',
            'description': 'Tensor',
            'shape': [None, 4],
            'item_type': {
                'type': 'float'
            }
        }]
    }
    assert metadata.servable.methods["run"].output.dict(exclude_none=True) == {
        'type':
        'tuple',
        'description':
        'Tuple of tensors',
        'element_types': [{
            'type': 'ndarray',
            'description': 'Tensor',
            'shape': [None, 1],
            'item_type': {
                'type': 'float'
            }
        }, {
            'type': 'ndarray',
            'description': 'Tensor',
            'shape': [None, 1],
            'item_type': {
                'type': 'float'
            }
        }]
    }

    validate_against_dlhub_schema(metadata.to_dict(), 'servable')
Пример #5
0
    def test_torch_single_input(self):
        # Make a Keras model
        model = _make_simple_model()

        # Save it to disk
        tempdir = mkdtemp()
        try:
            model_path = os.path.join(tempdir, 'model.pt')
            torch.save(model, model_path)

            # Create a model
            metadata = TorchModel.create_model(model_path, (2, 4), (3, 5))
            metadata.set_title('Torch Test')
            metadata.set_name('mlp')

            output = metadata.to_dict()
            self.assertEqual(output, {
                "datacite": {"creators": [], "titles": [{"title": "Torch Test"}],
                             "publisher": "DLHub", "publicationYear": _year,
                             "identifier": {"identifier": "10.YET/UNASSIGNED",
                                            "identifierType": "DOI"},
                             "resourceType": {"resourceTypeGeneral": "InteractiveResource"},
                             "descriptions": [],
                             "fundingReferences": [],
                             "relatedIdentifiers": [],
                             "alternateIdentifiers": [],
                             "rightsList": []},
                "dlhub": {"version": __version__, "domains": [],
                          "visible_to": ["public"],
                          'type': 'servable',
                          "name": "mlp", "files": {"model": model_path},
                          "dependencies": {"python": {
                              'torch': torch.__version__
                          }}},
                "servable": {"methods": {"run": {
                    "input": {"type": "ndarray", "description": "Tensor", "shape": [2, 4],
                              "item_type": {"type": "float"}},
                    "output": {"type": "ndarray", "description": "Tensor",
                               "shape": [3, 5], "item_type": {"type": "float"}}, "parameters": {},
                    "method_details": {
                        "method_name": "__call__"
                    }}},
                    "type": "Torch Model",
                    "shim": "torch.TorchServable",
                    "model_type": "Deep NN",
                    "model_summary": """Net(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)"""}})

            # Validate against schema
            validate_against_dlhub_schema(output, 'servable')
        finally:
            shutil.rmtree(tempdir)
Пример #6
0
    def publish_servable(self, model):
        """Submit a servable to DLHub

        If this servable has not been published before, it will be assigned a unique identifier.

        If it has been published before (DLHub detects if it has an identifier), then DLHub
        will update the servable to the new version.

        Args:
            model (BaseMetadataModel): Servable to be submitted
        Returns:
            (string): Task ID of this submission, used for checking for success
        """

        # Get the metadata
        metadata = model.to_dict(simplify_paths=True)

        # Mark the method used to submit the model
        metadata['dlhub']['transfer_method'] = {'POST': 'file'}

        # Validate against the servable schema
        validate_against_dlhub_schema(metadata, 'servable')

        # Wipe the fx cache so we don't keep reusing an old servable
        self.clear_funcx_cache()

        # Get the data to be submitted as a ZIP file
        fp, zip_filename = mkstemp('.zip')
        os.close(fp)
        os.unlink(zip_filename)
        try:
            model.get_zip_file(zip_filename)

            # Get the authorization headers
            headers = {}
            self.authorizer.set_authorization_header(headers)

            # Submit data to DLHub service
            with open(zip_filename, 'rb') as zf:
                reply = requests.post(slash_join(self.base_url, 'publish'),
                                      headers=headers,
                                      files={
                                          'json':
                                          ('dlhub.json', json.dumps(metadata),
                                           'application/json'),
                                          'file': ('servable.zip', zf,
                                                   'application/octet-stream')
                                      })

            # Return the task id
            if reply.status_code != 200:
                raise Exception(reply.text)
            return reply.json()['task_id']
        finally:
            os.unlink(zip_filename)
Пример #7
0
    def test_tf(self):
        # Make a model and save it to disk
        self.make_model()

        # Create the description
        model = TensorFlowModel.create_model(tf_export_path).set_title('TF Test')\
            .set_name('tf-test')

        # Generate the metadata for the test
        metadata = model.to_dict(simplify_paths=True)

        # Check whether the 'x' is listed first for the multiple-input model or second
        self.assertEqual({'other': ['saved_model.pb',
                                    os.path.join('variables', 'variables.data-00000-of-00001'),
                                    os.path.join('variables', 'variables.index')]},
                         metadata['dlhub']['files'])
        self.assertEqual(metadata['dlhub']['dependencies'],
                         {'python': {'tensorflow': tf.__version__}})
        self.assertEqual(metadata['servable'],
                         {'methods':
                             {'run': {
                                 'input': {'type': 'ndarray', 'description': 'x',
                                           'shape': [None, 3], 'item_type': {'type': 'float'}},
                                 'output': {'type': 'ndarray', 'description': 'y',
                                            'shape': [None, 3], 'item_type': {'type': 'float'}},
                                 'parameters': {},
                                 'method_details': {'input_nodes': ['Input:0'],
                                                    'output_nodes': ['add:0']}
                             }, 'length': {
                                 'input': {'type': 'ndarray', 'description': 'x',
                                           'shape': [None, 3], 'item_type': {'type': 'float'}},
                                 'output': {'type': 'float', 'description': 'len'},
                                 'parameters': {},
                                 'method_details': {'input_nodes': ['Input:0'],
                                                    'output_nodes': ['Sum:0']}
                             }, 'scalar_multiply': {
                                 'input': {'type': 'tuple', 'description': 'Arguments',
                                           'element_types': [
                                               {'type': 'ndarray', 'description': 'x',
                                                'shape': [None, 3], 'item_type': {'type': 'float'}},
                                               {'type': 'float', 'description': 'z'}
                                           ]},
                                 'output': {'type': 'ndarray', 'description': 'scale_mult',
                                            'shape': [None, 3], 'item_type': {'type': 'float'}},
                                 'parameters': {},
                                 'method_details': {'input_nodes': ['Input:0', 'ScalarMultiple:0'],
                                                    'output_nodes': ['scale_mult:0']}
                             }},
                             'shim': 'tensorflow.TensorFlowServable',
                             'type': 'TensorFlow Model'})

        validate_against_dlhub_schema(metadata, 'servable')
Пример #8
0
    def test_codemeta(self):
        m = TabularDataset()

        # Read in the codemeta file
        odir = os.getcwd()
        try:
            os.chdir(os.path.dirname(__file__))
            m.read_codemeta_file()
        finally:
            os.chdir(odir)

        # Check that it produces valid datacite
        validate_against_dlhub_schema(m['datacite'], 'datacite-v4.1')
Пример #9
0
def test_visibility():
    model = PythonStaticMethodModel.create_model('numpy.linalg', 'norm')
    model.set_name('1d_norm')
    model.set_title('Norm of a 1D Array')
    model.set_inputs('ndarray', 'Array to be normed', shape=[None])
    model.set_outputs('number', 'Norm of the array')

    model.set_visibility(users=['bec215bc-9169-4be9-af49-4872b5e11ef8'
                                ])  # Setting visibility to a user
    validate_against_dlhub_schema(model.to_dict(), 'servable')
    assert model.dlhub.visible_to[0].startswith('urn:globus:auth:identity:')

    model.set_visibility(groups=['fdb38a24-03c1-11e3-86f7-12313809f035'
                                 ])  # Setting visibility to a group
    validate_against_dlhub_schema(model.to_dict(), 'servable')
    assert len(
        model.dlhub.visible_to) == 1  # Ensure was replaced, not appended
    assert model.dlhub.visible_to[0].startswith('urn:globus:groups:id:')

    model.set_visibility(users=['foo'])  # Test using a non-UUID for user
    with raises(ValidationError):
        validate_against_dlhub_schema(model.to_dict(), 'servable')

    model.set_visibility()  # Default visibility is "public"
    validate_against_dlhub_schema(model.to_dict(), 'servable')
    assert model.dlhub.visible_to == ['public']
Пример #10
0
    def test_multiarg(self):
        """Test making descriptions with more than one argument"""

        # Initialize the model
        model = PythonStaticMethodModel.from_function_pointer(max)
        model.set_name('test').set_title('test')

        # Define the inputs and outputs
        model.set_inputs('tuple',
                         'Two numbers',
                         element_types=[
                             compose_argument_block('float', 'A number'),
                             compose_argument_block('float', 'A second number')
                         ])
        model.set_outputs('float', 'Maximum of the two numbers')

        # Mark that the inputs should be unpacked
        model.set_unpack_inputs(True)

        # Check the description
        self.assertEqual(
            model['servable']['methods']['run'], {
                'input': {
                    'type':
                    'tuple',
                    'description':
                    'Two numbers',
                    'element_types': [{
                        'type': 'float',
                        'description': 'A number'
                    }, {
                        'type': 'float',
                        'description': 'A second number'
                    }]
                },
                'output': {
                    'type': 'float',
                    'description': 'Maximum of the two numbers'
                },
                'method_details': {
                    'module': 'builtins',
                    'method_name': 'max',
                    'unpack': True,
                    'autobatch': False
                },
                'parameters': {}
            })

        validate_against_dlhub_schema(model.to_dict(), 'servable')
Пример #11
0
    def test_load_model(self):
        model_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'model.pkl'))

        # Load the model
        model_info = ScikitLearnModel.create_model(model_path, n_input_columns=4, classes=3)
        model_info.set_title('Sklearn example').set_name('sklearn')

        # Print out the metadata
        metadata = model_info.to_dict()

        # Test key components
        assert metadata['dlhub']['dependencies']['python'] == {
            'scikit-learn': '0.19.1'  # The version used to save the model
        }
        assert metadata['servable']['shim'] == 'sklearn.ScikitLearnServable'
        assert metadata['servable']['model_type'] == 'SVC'
        assert metadata['servable']['methods']['run'] == {
            "input": {
                "type": "ndarray",
                "shape": [None, 4],
                "description": ("List of records to evaluate with model. "
                                "Each record is a list of 4 variables."),
                "item_type": {
                    "type": "float"
                }
            },
            "output": {
                "type": "ndarray",
                "shape": [None, 3],
                "description": "Probabilities for membership in each of 3 classes",
                "item_type": {
                    "type": "float"
                }
            },
            "parameters": {},
            "method_details": {
                "method_name": "_predict_proba"
            }
        }
        assert metadata['servable']['model_summary'].startswith('SVC(')
        assert metadata['servable']['options'] == {
            'is_classifier': True,
            'serialization_method': 'pickle',
            'classes': ['Class 1', 'Class 2', 'Class 3']
        }

        # Check the schema validation
        validate_against_dlhub_schema(model_info.to_dict(), 'servable')
Пример #12
0
    def test_keras_multioutput(self):
        # Make a Keras model
        input_layer = keras.layers.Input(shape=(4, ))
        dense = keras.layers.Dense(16, activation='relu')(input_layer)
        output_1 = keras.layers.Dense(1, activation='relu')(dense)
        output_2 = keras.layers.Dense(2, activation='softmax')(dense)
        model = keras.models.Model([input_layer], [output_1, output_2])
        model.compile(optimizer='rmsprop', loss='mse')

        # Save it to disk
        tempdir = mkdtemp()
        try:
            model_path = os.path.join(tempdir, 'model.hd5')
            model.save(model_path)

            # Create a model
            metadata = KerasModel.create_model(model_path,
                                               [['y'], ['yes', 'no']])
            metadata.set_title('Keras Test')
            metadata.set_name('mlp')

            self.assertEqual(
                metadata['servable']['methods']['run']['output'], {
                    'type':
                    'tuple',
                    'description':
                    'Tuple of tensors',
                    'element_types': [{
                        'type': 'ndarray',
                        'description': 'Tensor',
                        'shape': [None, 1]
                    }, {
                        'type': 'ndarray',
                        'description': 'Tensor',
                        'shape': [None, 2]
                    }]
                })

            output = metadata.to_dict()

            # Validate against schema
            validate_against_dlhub_schema(output, 'servable')
        finally:
            shutil.rmtree(tempdir)
Пример #13
0
    def test_keras_single_input(self):
        # Make a Keras model
        model = _make_simple_model()

        # Save it to disk
        tempdir = mkdtemp()
        try:
            model_path = os.path.join(tempdir, 'model.hd5')
            model.save(model_path)

            # Create a model
            metadata = KerasModel.create_model(model_path, ["y"])
            metadata.set_title('Keras Test')
            metadata.set_name('mlp')

            # Validate against schema
            output = metadata.to_dict()
            validate_against_dlhub_schema(output, 'servable')
        finally:
            shutil.rmtree(tempdir)
Пример #14
0
    def test_tf(self):
        # Make a model and save it to disk
        if tf.__version__ < '2':
            _make_model_v1()
        else:
            _make_model_v2()

        # Create the description
        model = TensorFlowModel.create_model(tf_export_path).set_title('TF Test')\
            .set_name('tf-test')

        # Generate the metadata for the test
        metadata = model.to_dict(simplify_paths=True)

        # Make sure the files are there
        my_files = metadata['dlhub']['files']['other']
        assert 'saved_model.pb' in my_files
        assert os.path.join('variables', 'variables.data-00000-of-00001') in my_files
        assert os.path.join('variables', 'variables.index') in my_files

        # Check the tensorflow version
        self.assertEqual(metadata['dlhub']['dependencies'],
                         {'python': {'tensorflow': tf.__version__}})

        # Check whether the 'x' is listed first for the multiple-input model or second
        my_methods = metadata['servable']['methods']
        assert my_methods['run']['input']['type'] == 'ndarray'
        assert my_methods['run']['input']['shape'] == [None, 3]
        assert my_methods['run']['input']['item_type'] == {'type': 'float'}

        assert my_methods['scalar_multiply']['input']['type'] == 'tuple'
        assert my_methods['scalar_multiply']['input']['element_types'][0]['shape'] == [None, 3]
        assert my_methods['scalar_multiply']['input']['element_types'][1]['shape'] == []

        assert 'length' in my_methods
        assert 'scalar_multiply' in my_methods

        # Check the shim
        assert metadata['servable']['shim'] == 'tensorflow.TensorFlowServable'

        validate_against_dlhub_schema(metadata, 'servable')
Пример #15
0
def test_function():
    f = math.sqrt

    # Make the model
    model = PythonStaticMethodModel.from_function_pointer(f, autobatch=True)
    model.set_name("static_method").set_title('Python example')

    # Describe the inputs/outputs
    model.set_inputs('list', 'List of numbers', item_type='float')
    model.set_outputs('float', 'Square root of the number')

    # Generate the output
    output = model.to_dict()
    assert output['servable'] == {
        'type': 'Python static method',
        'shim': 'python.PythonStaticMethodServable',
        'options': {},
        'methods': {
            'run': {
                'input': {
                    'type': 'list',
                    'description': 'List of numbers',
                    'item_type': {
                        'type': 'float'
                    }
                },
                'output': {
                    'type': 'float',
                    'description': 'Square root of the number'
                },
                'parameters': {},
                'method_details': {
                    'module': 'math',
                    'method_name': 'sqrt',
                    'autobatch': True
                }
            }
        }
    }
    validate_against_dlhub_schema(output, 'servable')
Пример #16
0
def test_multiargs_pickle(tmpdir):
    # Make an example class
    x = ExampleClass(2)

    # Save a pickle
    filename = str(tmpdir / 'test.pkl')
    with open(filename, 'wb') as fp:
        pkl.dump(x, fp)

    # Make the metadata file
    model = PythonClassMethodModel.create_model(filename, 'f')
    model.set_title('Example function')
    model.set_name('function')
    model.set_inputs('tuple', 'inputs',
                     element_types=[compose_argument_block('float', 'Number')] * 2)
    model.set_outputs('float', 'Output')
    model.set_unpack_inputs(True)

    # Make the servable
    validate_against_dlhub_schema(model.to_dict(), 'servable')
    servable = PythonClassMethodServable(**model.to_dict())

    # Test the servable
    assert 4 == servable.run([1, 2])[0]
Пример #17
0
def test_torch_single_input(tmpdir):
    # Make a Keras model
    model = _make_simple_model()

    # Save it to disk
    model_path = os.path.join(tmpdir, 'model.pt')
    torch.save(model, model_path)

    # Create a model
    metadata = TorchModel.create_model(model_path, (2, 4), (3, 5))
    metadata.set_title('Torch Test')
    metadata.set_name('mlp')

    output = metadata.to_dict()
    assert output["dlhub"] == {
        "version": __version__,
        "domains": [],
        "visible_to": ['public'],
        'type': 'servable',
        "name": "mlp",
        "files": {
            "model": model_path
        },
        "dependencies": {
            "python": {
                'torch': torch.__version__
            }
        }
    }
    print("\n" + output["servable"]["model_summary"])
    assert output["servable"] == {
        "methods": {
            "run": {
                "input": {
                    "type": "ndarray",
                    "description": "Tensor",
                    "shape": [2, 4],
                    "item_type": {
                        "type": "float"
                    }
                },
                "output": {
                    "type": "ndarray",
                    "description": "Tensor",
                    "shape": [3, 5],
                    "item_type": {
                        "type": "float"
                    }
                },
                "parameters": {},
                "method_details": {
                    "method_name": "__call__"
                }
            }
        },
        "type": "Torch Model",
        "shim": "torch.TorchServable",
        "model_type": "Deep NN",
        "model_summary": """Net(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)""",
        "options": {}
    }

    # Validate against schema
    validate_against_dlhub_schema(output, 'servable')
Пример #18
0
    def test_pipeline(self):
        """Make a pipeline composed of two numpy steps"""

        # Generate the two steps
        step1 = PythonStaticMethodModel.create_model('numpy', 'max', function_kwargs={'axis': 1})\
            .set_name('step1')
        step2 = PythonStaticMethodModel.create_model('numpy',
                                                     'mean').set_name('step2')

        # Make the pipeline
        pipeline = PipelineModel().set_title(
            'Average of Column Maximums').set_name('numpy_test')
        pipeline.add_step('username', step1.name, 'Maximum of each column',
                          {'axis': 0})
        pipeline.add_step('username', step2.name, 'Average of the maximums')

        # Generate the pipeline metadata
        metadata = pipeline.to_dict()
        correct_metadata = {
            'datacite': {
                'creators': [],
                'titles': [{
                    'title': 'Average of Column Maximums'
                }],
                'publisher': 'DLHub',
                'publicationYear': _year,
                'identifier': {
                    'identifier': '10.YET/UNASSIGNED',
                    'identifierType': 'DOI'
                },
                'resourceType': {
                    'resourceTypeGeneral': 'InteractiveResource'
                },
                "descriptions": [],
                "fundingReferences": [],
                "relatedIdentifiers": [],
                "alternateIdentifiers": [],
                "rightsList": []
            },
            'dlhub': {
                'version': __version__,
                'domains': [],
                'visible_to': ['public'],
                'name': 'numpy_test',
                'type': 'pipeline',
                'files': {}
            },
            'pipeline': {
                'steps': [{
                    'author': 'username',
                    'name': step1.name,
                    'description': 'Maximum of each column',
                    'parameters': {
                        'axis': 0
                    }
                }, {
                    'author': 'username',
                    'name': step2.name,
                    'description': 'Average of the maximums'
                }]
            }
        }
        self.assertEqual(metadata, correct_metadata)
        validate_against_dlhub_schema(metadata, 'pipeline')
Пример #19
0
    def test_pickle(self):
        pickle_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'model.pkl'))

        # Make the model
        model = PythonClassMethodModel.create_model(pickle_path, 'predict_proba', {'fake': 'kwarg'})
        model.set_title('Python example').set_name("class_method")

        # Make sure it throws value errors if inputs are not set
        with self.assertRaises(ValueError):
            model.to_dict()

        # Define the input and output types
        model.set_inputs('ndarray', 'Features for each entry', shape=[None, 4])
        model.set_outputs('ndarray', 'Predicted probabilities of being each iris species',
                          shape=[None, 3])

        # Make sure attempting to set "unpack" fails
        with self.assertRaises(ValueError):
            model.set_unpack_inputs(True)

        # Add some requirements
        model.add_requirement('scikit-learn', 'detect')
        model.add_requirement('numpy', 'detect')
        model.add_requirement('sklearn', 'latest')  # Dummy project, version # shouldn't change

        # Check the model output
        output = model.to_dict()
        correct_output = {
            'datacite': {
                'creators': [],
                'titles': [{
                    'title': 'Python example'
                }],
                'publisher': 'DLHub',
                'resourceType': {
                    'resourceTypeGeneral': 'InteractiveResource'
                },
                'identifier': {
                    'identifier': '10.YET/UNASSIGNED',
                    'identifierType': 'DOI'
                },
                'publicationYear': _year,
                "descriptions": [],
                "fundingReferences": [],
                "relatedIdentifiers": [],
                "alternateIdentifiers": [],
                "rightsList": []
            },
            'dlhub': {
                'version': __version__,
                'domains': [],
                'visible_to': ['public'],
                'name': 'class_method',
                'type': 'servable',
                'files': {
                    'pickle': pickle_path
                },
                'dependencies': {
                    'python': {
                        'scikit-learn': skl_version,
                        'numpy': numpy_version,
                        'sklearn': '0.0'
                    }
                }
            },
            'servable': {
                'type': 'Python class method',
                'shim': 'python.PythonClassMethodServable',
                'methods': {
                    'run': {
                        'input': {
                            'type': 'ndarray',
                            'description': 'Features for each entry',
                            'shape': [None, 4]
                        },
                        'output': {
                            'type': 'ndarray',
                            'description': 'Predicted probabilities of being each iris species',
                            'shape': [None, 3]
                        },
                        'parameters': {
                            'fake': 'kwarg'
                        },
                        'method_details': {
                            'class_name': 'sklearn.svm.classes.SVC',
                            'method_name': 'predict_proba'
                        },
                    }
                }
            }
        }
        self.assertEqual(output, correct_output)
        self.assertEqual([pickle_path], model.list_files())
        validate_against_dlhub_schema(output, 'servable')
Пример #20
0
    def test_keras_single_input(self):
        # Make a Keras model
        model = _make_simple_model()

        # Save it to disk
        tempdir = mkdtemp()
        try:
            model_path = os.path.join(tempdir, 'model.hd5')
            model.save(model_path)

            # Create a model
            metadata = KerasModel.create_model(model_path, ["y"])
            metadata.set_title('Keras Test')
            metadata.set_name('mlp')

            output = metadata.to_dict()
            self.assertEqual(
                output, {
                    "datacite": {
                        "creators": [],
                        "titles": [{
                            "title": "Keras Test"
                        }],
                        "publisher": "DLHub",
                        "publicationYear": _year,
                        "identifier": {
                            "identifier": "10.YET/UNASSIGNED",
                            "identifierType": "DOI"
                        },
                        "resourceType": {
                            "resourceTypeGeneral": "InteractiveResource"
                        },
                        "descriptions": [],
                        "fundingReferences": [],
                        "relatedIdentifiers": [],
                        "alternateIdentifiers": [],
                        "rightsList": []
                    },
                    "dlhub": {
                        "version": __version__,
                        "domains": [],
                        "visible_to": ["public"],
                        'type': 'servable',
                        "name": "mlp",
                        "files": {
                            "model": model_path
                        },
                        "dependencies": {
                            "python": {
                                'keras': keras_version,
                                'h5py': h5py_version,
                                'tensorflow': tf_version
                            }
                        }
                    },
                    "servable": {
                        "methods": {
                            "run": {
                                "input": {
                                    "type": "ndarray",
                                    "description": "Tensor",
                                    "shape": [None, 1]
                                },
                                "output": {
                                    "type": "ndarray",
                                    "description": "Tensor",
                                    "shape": [None, 1]
                                },
                                "parameters": {},
                                "method_details": {
                                    "method_name": "predict",
                                    "classes": ["y"]
                                }
                            }
                        },
                        "type":
                        "Keras Model",
                        "shim":
                        "keras.KerasServable",
                        "model_type":
                        "Deep NN",
                        "model_summary":
                        """_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
hidden (Dense)               (None, 16)                32        
_________________________________________________________________
output (Dense)               (None, 1)                 17        
=================================================================
Total params: 49
Trainable params: 49
Non-trainable params: 0
_________________________________________________________________
"""
                    }
                })  # noqa: W291 (trailing whitespace needed for text match)

            # Validate against schema
            validate_against_dlhub_schema(output, 'servable')
        finally:
            shutil.rmtree(tempdir)
Пример #21
0
    def test_tabular_dataset(self):
        data_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__), 'test.csv'))
        m = TabularDataset.create_model(data_path)

        # Add some nonsense
        m.set_title('Example dataset')
        m.set_name('example_dataset')
        with self.assertRaises(ValueError):
            m.set_name('has whitespace')
        m.mark_inputs(['x'])
        m.mark_labels(['y'])
        m.annotate_column('x', description='Input variable', units='cm')
        m.annotate_column('y', data_type='scalar')
        self.assertEqual(
            m.to_dict(), {
                "datacite": {
                    "titles": [{
                        'title': "Example dataset"
                    }],
                    "creators": [],
                    "publisher": "DLHub",
                    "resourceType": {
                        "resourceTypeGeneral": "Dataset"
                    },
                    "publicationYear": _year,
                    'identifier': {
                        'identifier': '10.YET/UNASSIGNED',
                        'identifierType': 'DOI'
                    },
                    "descriptions": [],
                    "fundingReferences": [],
                    "relatedIdentifiers": [],
                    "alternateIdentifiers": [],
                    "rightsList": [],
                },
                "dlhub": {
                    "version": __version__,
                    "visible_to": ["public"],
                    "domains": [],
                    "name": "example_dataset",
                    "type": "dataset",
                    "files": {
                        'data': data_path
                    }
                },
                "dataset": {
                    "columns": [{
                        "name": "x",
                        "description": "Input variable",
                        "type": "integer",
                        "units": "cm"
                    }, {
                        "name": "y",
                        "type": "scalar"
                    }],
                    "inputs": ["x"],
                    "labels": ["y"],
                    "format":
                    "csv",
                    "read_options": {}
                }
            })
        validate_against_dlhub_schema(m.to_dict(), "dataset")

        # Test the simplification of files
        metadata = m.to_dict(simplify_paths=True)
        self.assertEqual({'data': 'test.csv'}, metadata['dlhub']['files'])

        # Make sure the paths saved in the object have not changed
        self.assertEqual({'data': data_path}, m['dlhub']['files'])
Пример #22
0
    def test_load_model(self):
        model_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__), 'model.pkl'))

        # Load the model
        model_info = ScikitLearnModel.create_model(model_path,
                                                   n_input_columns=4,
                                                   classes=3)
        model_info.set_title('Sklearn example').set_name('sklearn')
        expected = {
            'datacite': {
                'creators': [],
                'publisher': 'DLHub',
                'titles': [{
                    'title': 'Sklearn example'
                }],
                'resourceType': {
                    'resourceTypeGeneral': "InteractiveResource"
                },
                'identifier': {
                    'identifier': '10.YET/UNASSIGNED',
                    'identifierType': 'DOI'
                },
                'publicationYear': _year,
                "descriptions": [],
                "fundingReferences": [],
                "relatedIdentifiers": [],
                "alternateIdentifiers": [],
                "rightsList": []
            },
            "dlhub": {
                "version": __version__,
                "visible_to": ["public"],
                'name': 'sklearn',
                "domains": [],
                'type': 'servable',
                'files': {
                    'model': model_path
                },
                'dependencies': {
                    'python': {
                        'scikit-learn': '0.19.1'
                    }
                }
            },
            'servable': {
                'type':
                'Scikit-learn estimator',
                'model_type':
                'SVC',
                'shim':
                'sklearn.ScikitLearnServable',
                'methods': {
                    'run': {
                        "input": {
                            "type":
                            "ndarray",
                            "shape": [None, 4],
                            "description":
                            ("List of records to evaluate with model. "
                             "Each record is a list of 4 variables."),
                            "item_type": {
                                "type": "float"
                            }
                        },
                        "output": {
                            "type": "ndarray",
                            "shape": [None, 3],
                            "description":
                            "Probabilities for membership in each of 3 classes",
                            "item_type": {
                                "type": "float"
                            }
                        },
                        "parameters": {},
                        "method_details": {
                            "method_name": "_predict_proba"
                        }
                    }
                },
                'model_summary':
                ("SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,\n"
                 "    decision_function_shape='ovr', degree=3, gamma='auto', "
                 "kernel='linear',\n"
                 "    max_iter=-1, probability=True, random_state=None,"
                 " shrinking=True, tol=0.001,\n"
                 "    verbose=False)"),
                'options': {
                    'is_classifier': True,
                    'serialization_method': 'pickle',
                    'classes': ['Class 1', 'Class 2', 'Class 3']
                }
            }
        }
        self.assertEquals(model_info.to_dict(), expected)
        validate_against_dlhub_schema(model_info.to_dict(), 'servable')
model.set_title("Classification Model for AMDET Properties")
model.set_name("deep-smiles_model")
model.set_abstract(
    "A deep learning model that predicts AMDET properties given a SMILES string of a molecule."
)
model.add_alternate_identifier("https://github.com/MengyuanZhu/Deep-SMILES",
                               "URL")

#  Add requirements
model.add_requirement('tensorflow', 'detect')
model.add_requirement('keras', 'detect')

# Sanity Check: Make sure it fits the schema
metadata = model.to_dict()
print(json.dumps(metadata, indent=2))
validate_against_dlhub_schema(metadata, 'servable')
with open('model_metadata.json', 'w') as fp:
    json.dump(metadata, fp, indent=2)

# Describe the encoding step
#  The first step is to turn a string into a list of integers
string_length = model.input['shape'][-1]
model = PythonStaticMethodModel('app',
                                'encode_string',
                                function_kwargs={'length': string_length},
                                autobatch=True)

#  Describe the inputs and outputs
model.set_inputs('list', 'List of SMILES strings', item_type='string')
model.set_outputs('list',
                  'List of encoded strings.',
Пример #24
0
 def test_dataset(self):
     m = Dataset().set_authors(["Ward, Logan"], ["University of Chicago"])\
         .set_title("Example dataset").add_alternate_identifier("10.11", "DOI")\
         .add_related_identifier("10.11", "DOI", 'IsDescribedBy')\
         .add_funding_reference("ANL LDRD", '1', 'ISNI', '201801',
                                'DLHub', 'http://funding.uri')\
         .set_version(1)\
         .add_rights("https://www.gnu.org/licenses/gpl-3.0.en.html", "GPL v3.0")\
         .set_abstract("Abstract").set_methods("Methods")\
         .set_visibility(['public']).set_domains(["materials science"]).set_name("example_data")
     correct_entry = {
         "datacite": {
             "creators": [{
                 "givenName": "Logan",
                 "familyName": "Ward",
                 "affiliations": "University of Chicago"
             }],
             "titles": [{
                 'title': "Example dataset"
             }],
             "publisher":
             'DLHub',
             "publicationYear":
             _year,
             "version":
             '1',
             "resourceType": {
                 "resourceTypeGeneral": "Dataset"
             },
             "descriptions": [{
                 "description": "Abstract",
                 "descriptionType": "Abstract"
             }, {
                 "description": "Methods",
                 "descriptionType": "Methods"
             }],
             "fundingReferences": [{
                 "awardNumber": {
                     "awardNumber": "201801",
                     "awardURI": "http://funding.uri"
                 },
                 "awardTitle": "DLHub",
                 "funderIdentifier": {
                     'funderIdentifier': '1',
                     'funderIdentifierType': 'ISNI'
                 },
                 "funderName": "ANL LDRD"
             }],
             "relatedIdentifiers": [{
                 "relatedIdentifier": "10.11",
                 "relatedIdentifierType": "DOI",
                 "relationType": "IsDescribedBy"
             }],
             "alternateIdentifiers": [{
                 "alternateIdentifier": "10.11",
                 "alternateIdentifierType": "DOI"
             }],
             "rightsList": [{
                 "rightsURI":
                 "https://www.gnu.org/licenses/gpl-3.0.en.html",
                 "rights": "GPL v3.0"
             }],
             'identifier': {
                 'identifier': '10.YET/UNASSIGNED',
                 'identifierType': 'DOI'
             },
         },
         "dlhub": {
             "version": __version__,
             "visible_to": ["public"],
             "domains": ["materials science"],
             "name": "example_data",
             'type': 'dataset',
             "files": {}
         },
         "dataset": {}
     }
     self.assertEqual(m.to_dict(), correct_entry)
     validate_against_dlhub_schema(m.to_dict(), "dataset")
Пример #25
0
    def test_function(self):
        f = math.sqrt

        # Make the model
        model = PythonStaticMethodModel.from_function_pointer(f, autobatch=True)
        model.set_name("static_method").set_title('Python example')

        # Describe the inputs/outputs
        model.set_inputs('list', 'List of numbers', item_type='float')
        model.set_outputs('float', 'Square root of the number')

        # Generate the output
        output = model.to_dict()
        correct_output = {
            'datacite': {
                'creators': [],
                'titles': [{
                    'title': 'Python example'
                }],
                'publisher': 'DLHub',
                'resourceType': {
                    'resourceTypeGeneral': 'InteractiveResource'
                },
                'identifier': {
                    'identifier': '10.YET/UNASSIGNED',
                    'identifierType': 'DOI'
                },
                'publicationYear': _year,
                "descriptions": [],
                "fundingReferences": [],
                "relatedIdentifiers": [],
                "alternateIdentifiers": [],
                "rightsList": []
            },
            'dlhub': {
                'version': __version__,
                'domains': [],
                'visible_to': ['public'],
                "name": "static_method",
                'type': 'servable',
                'files': {}
            },
            'servable': {
                'type': 'Python static method',
                'shim': 'python.PythonStaticMethodServable',
                'methods': {
                    'run': {
                        'input': {
                            'type': 'list',
                            'description': 'List of numbers',
                            'item_type': {
                                'type': 'float'
                            }
                        },
                        'output': {
                            'type': 'float',
                            'description': 'Square root of the number'
                        },
                        'parameters': {},
                        'method_details': {
                            'module': 'math',
                            'method_name': 'sqrt',
                            'autobatch': True
                        }
                    }
                }
            }
        }
        self.assertEqual(output, correct_output)
        validate_against_dlhub_schema(output, 'servable')
def dump_metadata(model, path):
    metadata = model.to_dict()
    print(json.dumps(metadata, indent=2))
    validate_against_dlhub_schema(metadata, 'servable')
    with open(path, 'w') as fp:
        json.dump(metadata, fp, indent=2)