Пример #1
0
    def _deserialize_output(self, task):
        """
        Deserialize the output from a task.

        Parameters
        ----------
        Task definition of interest.

        Returns
        -------
        The output of the run-time task associated with the task definition.
        """
        filepath = self._task_output_paths[task]

        non_hdfs_file_path = filepath

        # Unpickler has no support for passing in additional HADOOP_CONF_DIR
        # so we download HDFS folder first before calling to unpickler
        if _file_util.is_hdfs_path(filepath):
            non_hdfs_file_path = _make_temp_directory("job_output_")
            _file_util.download_from_hdfs(filepath, non_hdfs_file_path,
                hadoop_conf_dir=self.environment.hadoop_conf_dir, is_dir = True)

        unpickler = gl_pickle.GLUnpickler(non_hdfs_file_path)

        # We cannot delete this temporary file path becaue SFrame lazily load
        # the content from disk. But the temporary folder will be removed
        # eventually when the python session goes away

        return unpickler.load()
def load_artifact(filename):
    """
    Load an artifact object.

    Parameters
    ----------
    filename : A GLPickle archive filename.

    """
    unpickler = gl_pickle.GLUnpickler(filename)

    # Get the version
    version = unpickler.load()

    # Load the constructor that knows how to load a empty object
    post_import_cls = unpickler.load()

    # Construct an empty object by following the module to the constructor
    module = eval(post_import_cls.__module__)   # requires graphlab has been imported
    cls = module.__getattribute__(post_import_cls.__name__)

    # Load the object with the right version.
    obj = cls._load_version(unpickler, version)

    unpickler.close()

    # Return the object
    return obj
Пример #3
0
 def _deserialize(file_path):
     """
     Takes a path to a serialized job file. Returns the deserialized
     job object.
     """
     unpickler = gl_pickle.GLUnpickler(file_path)
     ret = unpickler.load()
     unpickler.close()
     return ret
Пример #4
0
    def _load_local(cls, path):
        path = fu.expand_full_path(path)
        if not os.path.exists(path):
            raise RuntimeError("Path %s does not exist." % path)

        try:
            unpickler = _gl_pickle.GLUnpickler(path)
            po_schema_version = unpickler.load()
            required_files = unpickler.load()

            # layout the required files before loading the function
            # Required files are moved to be managed separatly from the Predictive
            # object since schema version 4 (jumped to 6 to be inline with PS version)
            # so no longer need to deserialize the required files as part of load
            if po_schema_version <= 3:
                cls._deserialize_required_files(required_files)
            else:
                # Do not need to load the dependent files after schema version 3
                # because the loading is handled in higher level by Predictive
                # service
                pass
            po_obj = unpickler.load()
            unpickler.close()

        except Exception as e:
            import traceback
            trace = traceback.format_exc()
            err_msg = "Traceback\n %s\n" % trace
            err_msg += "Error type    : %s\n" % e.__class__.__name__
            err_msg += "Error message : %s\n" % str(e)

            raise RuntimeError('Unable to load predictive object. Error: %s' %
                               err_msg)

        if po_schema_version > _PREDICTIVE_OBJECT_SCHEMA_VERSION:
            raise RuntimeError("Your GraphLab Create only supports Predictive "
                "Objects with schema version up to '%s', the Predictive Object "
                "you are trying to load has schema version '%s'. Please upgrade your "
                "GraphLab Create version to most up-to-date one." % \
                (_PREDICTIVE_OBJECT_SCHEMA_VERSION, po_schema_version))

        return po_obj
Пример #5
0
        def model_wrapper(unity_proxy):

            # Load the proxy object. This returns a proxy object with
            # 'temp_file' set to where the object is pickled.
            model_proxy = proxy_wrapper(unity_proxy)
            temp_file = model_proxy.temp_file

            # Setup the unpickler.
            unpickler = gl_pickle.GLUnpickler(temp_file)

            # Get the version
            version = unpickler.load()

            # Load the class name.
            cls_name = unpickler.load()
            cls = _get_class_from_name(cls_name)

            # Load the object with the right version.
            obj = cls._load_version(unpickler, version)

            # Return the object
            return obj
Пример #6
0
    def _get_map_job_results(self,_silent=True):
        '''
        Get results of all map jobs.

        Returns
        --------
        job outputs : list
          A list of results from the job. if a certain job failed, the result would be None
        '''

        result_folder = self.get_path_join_method()(self._exec_dir, 'output')
        __LOGGER__.info("Retrieving job results from %s..." % result_folder)
        if _file_util.is_local_path(result_folder):
            local_folder = result_folder
        else:
            local_folder = self._download_remote_folder_to_local(result_folder, silent=True)

        output = []
        for t in self._stages[0]:
            try:
                task_output_file = self._task_output_paths[t]
                local_file = self.get_path_join_method()(
                    local_folder,
                    _os.path.split(task_output_file)[1])

                unpickler = gl_pickle.GLUnpickler(local_file)
                output.append(unpickler.load())
            except Exception as e:
                if not _silent:
                    __LOGGER__.warning("Ignored exception when retrieving result for task %s, error: %s" % (t.name, e))
                output.append(None)

        # Alert --cannot remove the temp result folder because the result SFrame
        # may depend on the files to exist on disk

        return output
Пример #7
0
def load_model(location):
    """
    Load any GraphLab Create model that was previously saved.

    This function assumes the model (can be any model) was previously saved in
    GraphLab Create model format with model.save(filename).

    Parameters
    ----------
    location : string
        Location of the model to load. Can be a local path or a remote URL.
        Because models are saved as directories, there is no file extension.

    Examples
    ----------
    >>> model.save('my_model_file')
    >>> loaded_model = gl.load_model('my_model_file')
    """
    _mt._get_metric_tracker().track('toolkit.model.load_model')

    # Check if the location is a dir_archive, if not, use glunpickler to load
    # as pure python model

    # We need to fix this sometime, but here is the explanation of the stupid
    # check below:
    #
    # If the location is a http location, skip the check, and directly proceed
    # to load model as dir_archive. This is because
    # 1) exists() does not work with http protocol, and
    # 2) GLUnpickler does not support http
    protocol = file_util.get_protocol(location)
    dir_archive_exists = False
    if protocol == '':
        model_path = file_util.expand_full_path(location)
        dir_archive_exists = file_util.exists(
            os.path.join(model_path, 'dir_archive.ini'))
    else:
        model_path = location
        if protocol in ['http', 'https']:
            dir_archive_exists = True
        else:
            import posixpath
            dir_archive_exists = file_util.exists(
                posixpath.join(model_path, 'dir_archive.ini'))

    if not dir_archive_exists:
        # Not a ToolkitError so try unpickling the model.
        unpickler = gl_pickle.GLUnpickler(location)

        # Get the version
        version = unpickler.load()

        # Load the class name.
        cls_name = unpickler.load()
        cls = _get_class_from_name(cls_name)

        # Load the object with the right version.
        model = cls._load_version(unpickler, version)

        unpickler.close()

        # Return the model
        return model
    else:
        _internal_url = _make_internal_url(location)
        return glconnect.get_unity().load_model(_internal_url)