def __init__(self, model, description = ''): if not isinstance(model, graphlab.Model) and not is_path(model): raise TypeError('Model must be a GraphLab model or a path to a model.') super(ModelPredictiveObject, self).__init__(description) self._set_model(model)
def _set_model(self, model): self.dependencies = {'model':model} if is_path(model): # This is a path, can't do anything till we load it. self._model_methods = {} return self._model_methods = model._get_queryable_methods() if type(self._model_methods) != dict: raise RuntimeError("_get_queryable_methods for model %s should return a \ dictionary" % model.__class__) for (method, description) in self._model_methods.iteritems(): if type(description) != dict: raise RuntimeError("model %s _get_queryable_methods should use dict as method\ description."% mode.__class__) for (param_name, param_type) in description.iteritems(): if (param_type not in ['sframe', 'sarray']): raise RuntimeError("model %s _get_queryable_methods should only use \ 'sframe' or 'sarray' type. %s is not supported" % (model.__class__, param_type))
def _save_imp(self, po_path, dependency_path, aws_credentials): '''Save the predictive object to a directory The files for a predictive object are laid out the following way: po_path/definition/meta -- serialized json file about the predictive object, including: description, dependencies, etc. po_path/definition/definition -- cloudpickle-serialized PredictiveObject dependency_path -- all dependent GraphLab objects, each in its own directory: dependency_path/uri1/ -- serialized GraphLab object with uri1 dependency_path/uri2/ -- serialized GraphLab object with uri2 ''' fu.create_directory(po_path) describe = { 'description': self.description, 'dependencies': {}, 'schema_version' : self.schema_version } for (uri, gl_obj) in self.dependencies.iteritems(): # If it isn't already saved, save it. temp_path = None try: if not fu.is_path(gl_obj): obj_type = self._get_graphlab_object_type(gl_obj) temp_path = tempfile.mkdtemp() __logger__.info("Saving dependent GraphLab %s (%s) locally to '%s' " % (obj_type, uri, temp_path)) gl_obj.save(temp_path) gl_obj = temp_path else: obj_type = get_graphlab_object_type(gl_obj) # Copy the saved object without loading it. save_path = os.path.join(dependency_path, uri) __logger__.info("Copying dependent GraphLab %s(%s) from '%s' to '%s' " % (obj_type, uri, gl_obj, save_path)) if fu.is_s3_path(gl_obj) and fu.is_s3_path(save_path): fu.intra_s3_copy_model(gl_obj, save_path, aws_credentials) elif fu.is_local_path(gl_obj) and fu.is_s3_path(save_path): fu.s3_copy_model(gl_obj, save_path, aws_credentials) elif fu.is_local_path(gl_obj) and fu.is_local_path(save_path): # Useful for unit tests shutil.copytree(gl_obj, save_path) else: raise RuntimeError("Copy GraphLab object from S3 to local path is not supported. GraphLab object path: %s, save path: %s" % (gl_obj, save_path)) finally: if temp_path: shutil.rmtree(temp_path) # add to the global describe dictionary describe['dependencies'][uri] = { 'path': save_path, 'type': obj_type } # persist the global description describe_path = self._get_describe_path(po_path) self._save_object(describe_path, describe) # persist the definition of myself definition_path = self._get_definition_path(po_path) try: with open(definition_path, 'wb') as f: _cloudpickle.dump(self, f) except Exception as e: __logger__.error('Unable to save object: %s' % (e.message)) raise e