示例#1
0
 def _get_progress_from_file(self):
     log_location = '/'.join([self._working_dir, 'progress.log'])
     if _file_util.exists(log_location):
         content = _file_util.read(log_location)
         if content:
             return content[:-1]
     else:
         __LOGGER__.warn('Progress log file cannot be found')
     return ""
示例#2
0
def load_model(location):
    """
    Load any GraphLab Create model that was previously saved.

    This function assumes the model (can be any model) was previously saved in
    GraphLab Create model format with model.save(filename).

    Parameters
    ----------
    location : string
        Location of the model to load. Can be a local path or a remote URL.
        Because models are saved as directories, there is no file extension.

    Examples
    ----------
    >>> model.save('my_model_file')
    >>> loaded_model = gl.load_model('my_model_file')
    """
    _mt._get_metric_tracker().track('toolkit.model.load_model')

    # Check if the location is a dir_archive, if not, use glunpickler to load
    # as pure python model

    # We need to fix this sometime, but here is the explanation of the stupid
    # check below:
    #
    # If the location is a http location, skip the check, and directly proceed
    # to load model as dir_archive. This is because
    # 1) exists() does not work with http protocol, and
    # 2) GLUnpickler does not support http
    if (not file_util.get_protocol(location) in ['http', 'https']) and \
            (not file_util.exists(location +  '/dir_archive.ini')):
        # Not a ToolkitError so try unpickling the model.
        unpickler = gl_pickle.GLUnpickler(location)

        # Get the version
        version = unpickler.load()

        # Load the class name.
        cls_name = unpickler.load()
        cls = _get_class_from_name(cls_name)

        # Load the object with the right version.
        model = cls._load_version(unpickler, version)

        unpickler.close()

        # Return the model
        return model
    else:
        _internal_url = _make_internal_url(location)
        return glconnect.get_unity().load_model(_internal_url)
def get_log_metric_server_address(log_server_address_file, timeout=120):
    starttime = time.time()
    try:
        while(not file_util.exists(log_server_address_file)):
            time.sleep(.05)
            if (time.time() - starttime) > timeout:
                __logger__.warning('Unable to get server log (timeout reached)')
                return ""
        ret_str = file_util.read(log_server_address_file)
        if ret_str.endswith('$'):
            return ret_str[:-1]
    except Exception as e:
        __logger__.warning(e)
    return ""
def get_log_metric_server_address(log_server_address_file, timeout=120):
    starttime = time.time()
    try:
        while (not file_util.exists(log_server_address_file)):
            time.sleep(.05)
            if (time.time() - starttime) > timeout:
                __logger__.warning(
                    'Unable to get server log (timeout reached)')
                return ""
        ret_str = file_util.read(log_server_address_file)
        if ret_str.endswith('$'):
            return ret_str[:-1]
    except Exception as e:
        __logger__.warning(e)
    return ""
    def receive_from_file(self):
        try:
            if file_util.exists(self.file_url):
                __logger__.debug("Read from %s" % self.file_url)
                content = file_util.read(self.file_url)
                leftover_progress_content = content[len(self.total_received_message):]
                # Final log file incomplete
                if not leftover_progress_content.endswith('$'):
                    return False

                if len(leftover_progress_content):
                    self.out.write(leftover_progress_content[:-1])  # ignore $
                    self.out.flush()
                self.total_received_message += leftover_progress_content
                return True
        except Exception as e:
            __logger__.warning(e)
        return False
    def receive_from_file(self):
        try:
            if file_util.exists(self.file_url):
                __logger__.debug("Read from %s" % self.file_url)
                content = file_util.read(self.file_url)
                leftover_progress_content = content[
                    len(self.total_received_message):]
                # Final log file incomplete
                if not leftover_progress_content.endswith('$'):
                    return False

                if len(leftover_progress_content):
                    self.out.write(leftover_progress_content[:-1])  # ignore $
                    self.out.flush()
                self.total_received_message += leftover_progress_content
                return True
        except Exception as e:
            __logger__.warning(e)
        return False
示例#7
0
 def _get_job_log_server_address(self, timeout=10):
     if self._log_server_address:
         return self._log_server_address
     log_server_address_file = '/'.join([self._working_dir, 'metric_server_address'])
     starttime = time.time()
     timeout = False
     __LOGGER__.info('Waiting for log server address to be available')
     while(not _file_util.exists(log_server_address_file)):
         time.sleep(1)
         if (time.time() - starttime) > timeout:
             __LOGGER__.info('Timeout waiting for log server address')
             timeout = True
             break
     if not timeout:
         ret_str = _file_util.read(log_server_address_file)
         if ret_str.endswith('$'):
             self._log_server_address = ret_str[:-1] + "/progress"
             __LOGGER__.info('Log server address: %s' % self._log_server_address)
             return self._log_server_address
     return ""
示例#8
0
def dml_exec(function_name, data, env='auto', verbose=True, **kwargs):
    """
    Executes a distributed ml function

    Parameters
    ----------
    function_name : str
        Name of the distributed function to be executed. The function symbol
        must exists in the unity distributed shared library.

    data : dict
        Key value arguments to the function stored in a dictionary

    env : DMLEnvironemnt
        Contains job environment parameters and a job submit function.

    **kwargs : dict
        Additional options.
        See _get_worker_args and _get_commander_args.
            - check_hdfs : {0, 1} Perform sanity check for hdfs read and write
            - startup_timeout : int Timeout in seconds for cluster setup

    Return
    ------
    (success, message, result_path) : bool, str, str
    """
    from graphlab.extensions import dml_function_invocation, init_dml_class_registry
    init_dml_class_registry()

    if env == 'auto':
        env = DMLRemoteEnvironment()

    if not file_util.exists(env.working_dir):
        _log.debug('Creating working directory: %s' % env.working_dir)
        file_util.mkdir(env.working_dir)
    else:
        _log.debug('Using existing working directory: %s' % env.working_dir)

    _log.info('Running distributed execution with %d workers. Working directory: %s' % (env.num_workers, env.working_dir))

    success = False
    message = ""
    result_path = None

    # Job function arguments
    try:
        _log.info('Serializing arguments to %s' % env.working_dir)
        args = dml_function_invocation()
        data_copy = copy(data)
        internal_working_dir = _make_internal_url(env.working_dir)
        data_copy['__base_path__'] = internal_working_dir
        args.from_dict(data_copy, internal_working_dir)
        json_data = args.to_str()

        # sanitize the base path url
    
        sanitized_json_data = json_data
        if file_util.is_s3_path(json_data): 
            sanitized_json_data = _sanitize_internal_s3_url(json_data)   

        _log.info('Serialized arguments: %s' % sanitized_json_data)
    except Exception as e:
        success = False
        message = 'Error serializing arguments. %s' % str(e)
        return (success, message, None)

    # Submit job
    try:
        job = dml_submit(function_name, json_data, env,
                         metric_server_address_file=COMMANDER_LOG_SERVER_ADDRESS_FILE,
                         logprogress_file=PROGRESS_LOG_FILE,
                         **kwargs)
    except KeyboardInterrupt:
        message = 'Canceled by user'
        return (success, message, None)

    _log.info('Waiting for workers to start ... ')
    logprinter = None
    if verbose:
        log_server_address_path = os.path.join(env.working_dir,
                                               COMMANDER_LOG_SERVER_ADDRESS_FILE)
        log_server_address = get_log_metric_server_address(log_server_address_path,
                                                           timeout=INIT_TIMEOUT_PER_WORKER * env.num_workers)
        if len(log_server_address) > 0:
            tmp_log_dir = tempfile.mkdtemp(prefix='graphlab_dml_log_')
            fd_list = []
            logprinter = LogPrinter()
            # Attach log progress stream
            logprinter.add_stream(LogStream(log_server_address + '/progress',
                                            os.path.join(env.working_dir, PROGRESS_LOG_FILE),
                                            sys.stdout))
            # Attach commander log stream
            local_commander_log = open(os.path.join(tmp_log_dir, COMMANDER_LOG_FILE), 'w')
            fd_list.append(local_commander_log)
            logprinter.add_stream(LogStream(log_server_address + '/commander',
                                            os.path.join(env.working_dir, COMMANDER_LOG_FILE),
                                            local_commander_log))
            # Attach worker log streams
            for i in range(env.num_workers):
                local_worker_log = open(os.path.join(tmp_log_dir, WORKER_LOG_FILE(i)), 'w')
                fd_list.append(local_worker_log)
                logprinter.add_stream(LogStream(log_server_address + '/worker%d' % i,
                                                os.path.join(env.working_dir, WORKER_LOG_FILE(i)),
                                                local_worker_log))
            logprinter.start()
            _log.info('Success. Worker logs are avaiable at %s ' % tmp_log_dir)

    _log.debug('Wait for job to finish')
    (success, message) = _wait_and_parse_job_result(job)

    if logprinter:
        logprinter.stop()
        for fd in fd_list:
            fd.close()

    if success:
        try:
            result_path = os.path.join(env.working_dir, env.output_name)
            ret_str = file_util.read(result_path)
            sanitized_ret_str = _sanitize_internal_s3_url(ret_str)
            _log.debug('Deserializing results: %s' % sanitized_ret_str)

            args.from_str(ret_str)
            response = args.to_dict()

            # Check toolkit response for "result" key or "exception" key.
            if 'result' in response:
                return (success, message, response['result'])
            elif 'exception' in response:
                return (False, response['exception'], None)
            else:
                raise ValueError('Invalid toolkit response. Must have "result" or \
                                 "exception" as key')
        except Exception as e:
            success = False
            message = 'Error deserializing results. %s' % str(e)
            return (success, message, None)
    else:
        return (success, message, None)
示例#9
0
def dml_submit(function_name, str_data, env, **kwargs):
    """
    Executes a distributed ml function

    Parameters
    ----------
    function_name : str
        Name of the distributed function to be executed. The function symbol
        must exists in the unity distributed shared library.

    str_data : str
        Arguments as serialized string to be passed to the distributed
        function.

    env : DMLEnvironemnt
        Contains job environment parameters and a job submit function.

    **kwargs : dict
        Additional options. See _get_worker_args and _get_commander_args.
        - check_hdfs : {0, 1} Perform sanity check for hdfs read and write
        - startup_timeout : int Timeout in seconds for cluster setup

    Return
    ------
    job : map_job
    """
    _log.debug('Submitting job')

    if not file_util.exists(env.working_dir):
        file_util.mkdir(env.working_dir)

    map_job_args = _get_dml_exec_args(function_name, str_data, env,
                                      output_name=env.output_name,
                                      **kwargs)

    _log.debug('job arguments: %s' % str(map_job_args))

    # The following code achieve the same as
    # """return env.submit(subprocess_exec, map_job_args)"""
    # but requires one less container. (Having commander code taking one entire container is wasteful)

    # It uses group_exec and pack the commander function and the first worker
    # function into one map tasks. The rest workers stay the same.
    # group_exec returns a list of results, so the output is a nested list of results,
    # we overload the job.get_results function to flatten the results.

    def commander_exec():
        return lambda: subprocess_exe(**map_job_args[0])

    def worker_exec(i):
        return lambda: subprocess_exe(**map_job_args[i + 1])

    worker_to_function_group = [[worker_exec(i)] for i in range(env.num_workers)]
    worker_to_function_group[0].insert(0, commander_exec())
    job = env.submit(group_exec, [{'lambdas': fgroup} for fgroup in worker_to_function_group])

    # Decoreate the job get_results function to flatten the results
    def flatten_results(packed_results):
        return [item for sublist in packed_results for item in sublist]

    def decorate_with_flatten_results(f_original):
        def f_decorated():
            results = f_original()
            return flatten_results(results)
        return f_decorated
    job.get_results = decorate_with_flatten_results(job.get_results)
    return job
示例#10
0
def load_model(location):
    """
    Load any GraphLab Create model that was previously saved.

    This function assumes the model (can be any model) was previously saved in
    GraphLab Create model format with model.save(filename).

    Parameters
    ----------
    location : string
        Location of the model to load. Can be a local path or a remote URL.
        Because models are saved as directories, there is no file extension.

    Examples
    ----------
    >>> model.save('my_model_file')
    >>> loaded_model = gl.load_model('my_model_file')
    """
    _mt._get_metric_tracker().track('toolkit.model.load_model')

    # Check if the location is a dir_archive, if not, use glunpickler to load
    # as pure python model

    # We need to fix this sometime, but here is the explanation of the stupid
    # check below:
    #
    # If the location is a http location, skip the check, and directly proceed
    # to load model as dir_archive. This is because
    # 1) exists() does not work with http protocol, and
    # 2) GLUnpickler does not support http
    protocol = file_util.get_protocol(location)
    dir_archive_exists = False
    if protocol == '':
        model_path = file_util.expand_full_path(location)
        dir_archive_exists = file_util.exists(
            os.path.join(model_path, 'dir_archive.ini'))
    else:
        model_path = location
        if protocol in ['http', 'https']:
            dir_archive_exists = True
        else:
            import posixpath
            dir_archive_exists = file_util.exists(
                posixpath.join(model_path, 'dir_archive.ini'))

    if not dir_archive_exists:
        # Not a ToolkitError so try unpickling the model.
        unpickler = gl_pickle.GLUnpickler(location)

        # Get the version
        version = unpickler.load()

        # Load the class name.
        cls_name = unpickler.load()
        cls = _get_class_from_name(cls_name)

        # Load the object with the right version.
        model = cls._load_version(unpickler, version)

        unpickler.close()

        # Return the model
        return model
    else:
        _internal_url = _make_internal_url(location)
        return glconnect.get_unity().load_model(_internal_url)