Пример #1
0
    def __get_log_file_content(self, url, handler):
        """
        Get and return the log file content
        """
        log_file_path = str(handler.get_argument("log", None))
        job = self.__load_job()
        content = ""
        max_size = long(1048576)  # max size is 1mb
        status_code = 200
        if log_file_path:
            try:
                if _file_util.is_local_path(log_file_path):
                    if _os.path.getsize(log_file_path) > max_size:
                        raise RuntimeError(
                            "Cannot read file greater than max size.")
                    else:
                        content = self.__load_local_log_file(log_file_path)
                elif _file_util.is_s3_path(log_file_path):
                    content = _file_util.read_file_to_string_s3(
                        log_file_path, max_size,
                        job.environment.get_credentials())
                elif _file_util.is_hdfs_path(log_file_path):
                    content = _file_util.read_file_to_string_hdfs(
                        log_file_path, max_size,
                        job.environment.hadoop_conf_dir)
                else:
                    status_code = 404
                    content = "Log file path (%s) is not valid." % log_file_path
            except RuntimeError:
                status_code = 413
                content = "File size too large. Please load log file manually at %s." % log_file_path

        handler.set_status(status_code)
        handler.set_header("Content-Type", "text/plain")
        handler.write(content)
Пример #2
0
    def __get_log_file_content(self, url, handler):
        """
        Get and return the log file content
        """
        log_file_path = str(handler.get_argument("log", None))
        job = self.__load_job()
        content = ""
        max_size = 1048576L # max size is 1mb
        status_code = 200
        if log_file_path:
            try:
                if _file_util.is_local_path(log_file_path):
                    if _os.path.getsize(log_file_path) > max_size:
                        raise RuntimeError("Cannot read file greater than max size.")
                    else:
                        content = self.__load_local_log_file(log_file_path)
                elif _file_util.is_s3_path(log_file_path):
                    content = _file_util.read_file_to_string_s3(log_file_path,
                                                        max_size,
                                                        job.environment.get_credentials())
                elif _file_util.is_hdfs_path(log_file_path):
                    content = _file_util.read_file_to_string_hdfs(log_file_path,
                                                        max_size,
                                                        job.environment.hadoop_conf_dir)
                else:
                    status_code = 404
                    content = "Log file path (%s) is not valid." % log_file_path
            except RuntimeError:
                status_code = 413
                content = "File size too large. Please load log file manually at %s." % log_file_path

        handler.set_status(status_code)
        handler.set_header("Content-Type", "text/plain")
        handler.write(content)
Пример #3
0
def show_available_packages(turi_dist_path, hadoop_conf_dir=None):
    '''
    Show all availabe packages in Hadoop Turi Distributed installation

    turi_dist_path : str
        The location where Turi Distributed is installed. This usually comes from
        your Hadoop Administrator. This path must be a valid HDFS path.

    hadoop_conf_dir : str, optional
        Hadoop configure directory where Hadoop configuration files are stored.
        If not given, the configuration file is automatically searched in your
        CLASSPATH. hadoop_conf_dir must be a local file path.

    Returns
    -------
    out : dict
        Dict of two lists, default_packages in the format:

            "rsa==3.1.4",
            "scikit-learn==0.16.1",
            "scipy==0.15.1"
        and user_packages, additional PyPi packages which have been uploaded to the Turi Distributed
        installation.  user_packages has the format:

            "names-0.3.0.tar.gz",
            "boto-2.33.0-py2.py3-none-any.whl",
            ...

    '''
    hadoop_conf_dir = _file_util.expand_full_path(
        hadoop_conf_dir) if hadoop_conf_dir else None
    _validate_turi_distr_param(turi_dist_path, hadoop_conf_dir)

    conda_list = turi_dist_path + HadoopCluster._DIST_CONDA_LIST
    user_list = turi_dist_path + HadoopCluster._DIST_USER_PKG
    packages = _file_util.read_file_to_string_hdfs(
        conda_list, hadoop_conf_dir=hadoop_conf_dir)
    if packages is None:
        raise RuntimeError(
            "It seems like you do not have a valid Turi Distributed"
            " installation. Please contact your Hadoop administrator.")

    lines = packages.split(_os.linesep)
    output_lines = []
    for l in lines:
        splited = l.split()
        if len(splited) == 3:
            output_lines.append('%s==%s' % (splited[0], splited[1]))

    result = {'default_packages': output_lines}
    user_add = _file_util.list_hdfs(user_list, hadoop_conf_dir=hadoop_conf_dir)
    user = [_os.path.basename(x['path']) for x in user_add]
    result['user_packages'] = user
    return result
def show_available_packages(dato_dist_path, hadoop_conf_dir = None):
    '''
    Show all availabe packages in Hadoop Dato Distributed installation

    dato_dist_path : str
        The location where Dato Distributed is installed. This usually comes from
        your Hadoop Administrator. This path must be a valid HDFS path.

    hadoop_conf_dir : str, optional
        Hadoop configure directory where Hadoop configuration files are stored.
        If not given, the configuration file is automatically searched in your
        CLASSPATH. hadoop_conf_dir must be a local file path.

    Returns
    -------
    out : dict
        Dict of two lists, default_packages in the format:

            "rsa==3.1.4",
            "scikit-learn==0.16.1",
            "scipy==0.15.1"
        and user_packages, additional PyPi packages which have been uploaded to the Dato Distributed
        installation.  user_packages has the format:

            "names-0.3.0.tar.gz",
            "boto-2.33.0-py2.py3-none-any.whl",
            ...

    '''
    hadoop_conf_dir = _file_util.expand_full_path(hadoop_conf_dir) if hadoop_conf_dir else None
    _validate_dato_distr_param(dato_dist_path, hadoop_conf_dir)

    conda_list = dato_dist_path + HadoopCluster._DIST_CONDA_LIST
    user_list = dato_dist_path + HadoopCluster._DIST_USER_PKG
    packages = _file_util.read_file_to_string_hdfs(conda_list, hadoop_conf_dir=hadoop_conf_dir)
    if packages is None:
        raise RuntimeError("It seems like you do not have a valid Dato Distributed"
        " installation. Please contact your Hadoop administrator.")

    lines = packages.split(_os.linesep)
    output_lines = []
    for l in lines:
        splited = l.split()
        if len(splited) == 3:
            output_lines.append('%s==%s' % (splited[0], splited[1]))

    result = {'default_packages': output_lines}
    user_add = _file_util.list_hdfs(user_list, hadoop_conf_dir=hadoop_conf_dir)
    user = [_os.path.basename(x['path']) for x in user_add]
    result['user_packages'] = user
    return result