Python as_unicode示例，commoncode.text.as_unicode Python示例

示例#1

0

显示文件

def get_env(base_vars=None, lib_dir=None):
    """
    Return a dictionary of environment variables for command execution with
    appropriate LD paths. Use the optional `base_vars` environment variables
    dictionary as a base if provided. Note: if `base_vars`  contains LD
    variables these will be overwritten.
    Add `lib_dir` as a proper "LD_LIBRARY_PATH"-like path if provided.
    """
    env_vars = {}
    if base_vars:
        env_vars.update(base_vars)

    # Create and add LD environment variables
    if lib_dir and on_posix:
        new_path = '%(lib_dir)s' % locals()
        # on Linux/posix
        ld_lib_path = os.environ.get(LD_LIBRARY_PATH)
        env_vars.update(
            {LD_LIBRARY_PATH: update_path_var(ld_lib_path, new_path)})
        # on Mac, though LD_LIBRARY_PATH should work too
        dyld_lib_path = os.environ.get(DYLD_LIBRARY_PATH)
        env_vars.update(
            {DYLD_LIBRARY_PATH: update_path_var(dyld_lib_path, new_path)})

    if py2:
        # ensure that we use bytes on py2 and unicode on py3
        def to_bytes(s):
            return s if isinstance(s, bytes) else s.encode('utf-8')
        env_vars = {to_bytes(k): to_bytes(v) for k, v in env_vars.items()}
    else:
        env_vars = {text.as_unicode(k): text.as_unicode(v) for k, v in env_vars.items()}

    return env_vars

示例#2

0

显示文件

文件： command.py 项目： nexB/commoncode

def get_env(base_vars=None, lib_dir=None):
    """
    Return a dictionary of environment variables for command execution with
    appropriate DY/LD_LIBRARY_PATH path variables. Use the optional `base_vars`
    environment variables dictionary as a base if provided. Note: if `base_vars`
    contains DY/LD_LIBRARY_PATH variables these will be overwritten. On POSIX,
    add `lib_dir` as DY/LD_LIBRARY_PATH-like path if provided.
    """
    env_vars = {}
    if base_vars:
        env_vars.update(base_vars)

    # Create and add LD environment variables
    if lib_dir and on_posix:
        new_path = f'{lib_dir}'
        # on Linux/posix
        ld_lib_path = os.environ.get(LD_LIBRARY_PATH)
        env_vars.update(
            {LD_LIBRARY_PATH: update_path_var(ld_lib_path, new_path)})
        # on Mac, though LD_LIBRARY_PATH should work too
        dyld_lib_path = os.environ.get(DYLD_LIBRARY_PATH)
        env_vars.update(
            {DYLD_LIBRARY_PATH: update_path_var(dyld_lib_path, new_path)})

    env_vars = {
        text.as_unicode(k): text.as_unicode(v)
        for k, v in env_vars.items()
    }

    return env_vars

示例#3

0

显示文件

文件： test_text.py 项目： nexB/commoncode

def test_as_unicode():
    assert text.as_unicode('') == ''
    assert isinstance(text.as_unicode(b'some bytes'), str)
    assert text.as_unicode(None) == None
    try:
        text.as_unicode(['foo'])
        raise Exception('Exception should have been raised')
    except AssertionError:
        pass

示例#4

0

显示文件

def test_as_unicode():
    assert '' == text.as_unicode('')
    assert isinstance(text.as_unicode(b'some bytes'), compat.unicode)
    assert None == text.as_unicode(None)
    try:
        text.as_unicode(['foo'])
        raise Exception('Exception should have been raised')
    except AssertionError:
        pass

示例#5

0

显示文件

def safe_path(path, posix=False):
    """
    Convert `path` to a safe and portable POSIX path usable on multiple OSes. The
    returned path is an ASCII-only byte string, resolved for relative segments and
    itself relative.

    The `path` is treated as a POSIX path if `posix` is True or as a Windows path
    with blackslash separators otherwise.
    """
    # if the path is UTF, try to use unicode instead
    if not isinstance(path, unicode):
        path = as_unicode(path)

    path = path.strip()

    if not is_posixpath(path):
        path = as_winpath(path)
        posix = False

    path = resolve(path, posix)

    _pathmod, path_sep = path_handlers(path, posix)

    segments = [s.strip() for s in path.split(path_sep) if s.strip()]
    segments = [portable_filename(s) for s in segments]

    # print('safe_path: orig:', orig_path, 'segments:', segments)

    if not segments:
        return '_'

    # always return posix
    sep = u'/' if isinstance(path, unicode) else b'/'
    path = sep.join(segments)
    return as_posixpath(path)

示例#6

0

显示文件

文件： paths.py 项目： ocabrisses/scancode-toolkit

def safe_path(path, posix=False):
    """
    Convert `path` to a safe and portable POSIX path usable on multiple OSes. The
    returned path is an ASCII-only byte string, resolved for relative segments and
    itself relative.

    The `path` is treated as a POSIX path if `posix` is True or as a Windows path
    with blackslash separators otherwise.
    """
    # if the path is UTF, try to use unicode instead
    if not isinstance(path, unicode):
        path = as_unicode(path)

    path = path.strip()

    if not is_posixpath(path):
        path = as_winpath(path)
        posix = False

    path = resolve(path, posix)

    _pathmod, path_sep = path_handlers(path, posix)

    segments = [s.strip() for s in path.split(path_sep) if s.strip()]
    segments = [portable_filename(s) for s in segments]

    # print('safe_path: orig:', orig_path, 'segments:', segments)

    if not segments:
        return '_'

    # always return posix
    sep = u'/' if isinstance(path, unicode) else b'/'
    path = sep.join(segments)
    return as_posixpath(path)

示例#7

0

显示文件

 def hunk_data(hnk):
     return dict(
         startsrc=hnk.startsrc,
         linessrc=hnk.linessrc,
         starttgt=hnk.starttgt,
         linestgt=hnk.linestgt,
         desc=text.as_unicode(hnk.desc),
     )

示例#8

0

显示文件

文件： utils.py 项目： lerwine/license-scan-git-commit

def path_as_unicode(path):
    """
    Return path as unicode.
    """
    if isinstance(path, unicode):
        return path
    try:
        return path.decode(get_fs_encoding())
    except UnicodeDecodeError:
        return as_unicode(path)

示例#9

0

显示文件

文件： contenttype.py 项目： SmartsYoung/FenixscanX

def get_text_file_start(location, length=4096):
    """
    Return a unicode string with up the first "length" characters from the text
    file at location.
    """
    content = None
    # read the first 4K of the file
    try:
        with io.open(location, 'r') as f:
            content = f.read(length)
    except:
        # try again as bytes and force unicode
        with open(location, 'rb') as f:
            content = text.as_unicode(f.read(length))
    finally:
        return content

示例#10

0

显示文件

def is_markup(location):
    """
    Return True is the file at `location` is some kind of markup, such as HTML,
    XML, PHP, etc.
    """
    T = get_type(location)

    # do not care for small files
    if T.size < 64:
        return False

    if not T.is_text:
        return False

    if location.endswith(extensions):
        return True

    with open(location, 'rb') as f:
        start = as_unicode(f.read(1024))

    if start.startswith('<'):
        return True

    # count whitespaces
    no_spaces = ''.join(start.split())

    # count opening and closing tags_count
    counts = Counter(c for c in no_spaces if c in '<>')

    if not all(c in counts for c in '<>'):
        return False

    if not all(counts.values()):
        return False

    # ~ 5 percent of tag <> markers
    has_tags = sum(counts.values()) / len(no_spaces) > 0.05

    # check if we have some significant proportion of tag-like characters
    open_close = counts['>'] / counts['<']
    # ratio of open to close tags should approach 1: accept a 20% drift
    balanced = abs(1 - open_close) < .2
    return has_tags and balanced

示例#11

0

显示文件

 def __init__(self,
              rc=None,
              archive_struct=None,
              archive_func=None,
              root_ex=None):
     self.root_ex = root_ex
     if root_ex and isinstance(root_ex, ArchiveException):
         self.rc = root_ex.rc
         self.errno = root_ex.errno
         msg = root_ex.args or []
         msg = map(text.as_unicode, msg)
         msg = u'\n'.join(msg)
         self.msg = msg or None
         self.func = root_ex.func
     else:
         self.rc = rc
         self.errno = archive_struct and errno(archive_struct) or None
         msg = archive_struct and err_msg(archive_struct) or ''
         self.msg = msg and text.as_unicode(msg) or 'Unknown error'
         self.func = archive_func and archive_func.__name__ or None

示例#12

0

显示文件

    def get_path(self, func, func_w):
        """
        Return a path calling first the path function `func` then the wide char
        equivalent `func_w` if `func` did not provide a path.

        The path returned is either byte (on Python 2) or unicode string (Python
        3) On Python 2, if a path is unicode its bytes are converted to
        UTF-8-encoded bytes.
        """
        path = func(self.entry_struct)
        if not path:
            path = func_w(self.entry_struct)

        if py2 and isinstance(path, compat.unicode):
            # FIXME: encoding MAY fail if the encoding is NOT UTF-8!
            # .... should we transliterate there?
            path = path.encode('utf-8')

        if py3 and not isinstance(path, compat.unicode):
            path = text.as_unicode(path)

        return path

示例#13

0

显示文件

文件： contenttype.py 项目： xavierfigueroav/scancode-toolkit

def get_pygments_lexer(location):
    """
    Given an input file location, return a Pygments lexer appropriate for
    lexing this file content.
    """
    try:
        T = _registry[location]
        if T.is_binary:
            return
    except KeyError:
        if is_binary(location):
            return
    try:
        # FIXME: Latest Pygments versions should work fine
        # win32_bug_on_s_files = dejacode.on_windows and location.endswith('.s')

        # NOTE: we use only the location for its file name here, we could use
        # lowercase location may be
        lexer = get_lexer_for_filename(location, stripnl=False, stripall=False)
        return lexer

    except LexerClassNotFound:
        try:
            # if Pygments does not guess we should not carry forward
            # read the first 4K of the file
            try:
                with io.open(location, 'r') as f:
                    content = f.read(4096)
            except:
                # try again as bytes and force unicode
                with open(location, 'rb') as f:
                    content = text.as_unicode(f.read(4096))

            guessed = guess_lexer(content)
            return guessed
        except LexerClassNotFound:
            return

示例#14

0

显示文件

def pe_info(location):
    """
    Return a mapping of common data available for a Windows dll or exe PE
    (portable executable).
    Return None for non-Windows PE files.
    Return an empty mapping for PE from which we could not collect data.

    Also collect extra data found if any, returned as a dictionary under the
    'extra_data' key in the returned mapping.
    """
    if not location:
        return {}

    T = contenttype.get_type(location)

    if not T.is_winexe:
        return {}

    result = dict([(
        k,
        None,
    ) for k in PE_INFO_KEYS])
    extra_data = result['extra_data'] = {}

    with closing(pefile.PE(location)) as pe:
        if not hasattr(pe, 'FileInfo'):
            # No fileinfo section: we return just empties
            return result

        # >>> pe.FileInfo: this is a list of list of Structure objects:
        # [[<Structure: [VarFileInfo] >,  <Structure: [StringFileInfo]>]]
        file_info = pe.FileInfo
        if not file_info or not isinstance(file_info, list):
            if TRACE:
                logger.debug('pe_info: not file_info')
            return result

        # here we have a non-empty list
        file_info = file_info[0]
        if TRACE:
            logger.debug('pe_info: file_info:', file_info)

        string_file_info = [
            x for x in file_info if type(x) == pefile.Structure
            and hasattr(x, 'name') and x.name == 'StringFileInfo'
        ]

        if not string_file_info:
            # No stringfileinfo section: we return just empties
            if TRACE:
                logger.debug('pe_info: not string_file_info')
            return result

        string_file_info = string_file_info[0]

        if not hasattr(string_file_info, 'StringTable'):
            # No fileinfo.StringTable section: we return just empties
            if TRACE:
                logger.debug('pe_info: not StringTable')
            return result

        string_table = string_file_info.StringTable
        if not string_table or not isinstance(string_table, list):
            return result

        string_table = string_table[0]

        if TRACE:
            logger.debug('pe_info: Entries keys: ' +
                         str(set(k for k in string_table.entries)))

            logger.debug('pe_info: Entry values:')
            for k, v in string_table.entries.items():
                logger.debug('  ' + str(k) + ': ' + repr(type(v)) + repr(v))

        for k, v in string_table.entries.items():
            # convert unicode to a safe ASCII representation
            key = text.as_unicode(k).strip()
            value = text.as_unicode(v).strip()
            value = fix_text(value)
            if key in PE_INFO_KEYSET:
                result[key] = value
            else:
                extra_data[key] = value

    return result

示例#15

0

显示文件

def pe_info(location, include_extra_data=False):
    """
    Return a mapping of common data available for a Windows dll or exe PE
    (portable executable).
    Return None for non-Windows PE files.
    Return an empty mapping for PE from which we could not collect data.

    If `include_extra_data` is True, also collect extra data found if any,
    returned as a dictionary under the 'extra_data' key in the returned mapping.
    """
    if not location:
        return {}

    T = contenttype.get_type(location)

    if not T.is_winexe:
        return {}

    # FIXME: WTF: we initialize with empty values, as we must always
    # return something for all values
    result = OrderedDict([(
        k,
        None,
    ) for k in PE_INFO_KEYS])
    result['extra_data'] = OrderedDict()

    try:
        with closing(pefile.PE(location)) as pe:
            if not hasattr(pe, 'FileInfo'):
                # No fileinfo section: we return just empties
                return result

            # >>> pe.FileInfo: this is a list of list of Structure objects:
            # [[<Structure: [VarFileInfo] >,  <Structure: [StringFileInfo]>]]
            pefi = pe.FileInfo
            if not pefi or not isinstance(pefi, list):
                if TRACE:
                    logger.debug('pe_info: not pefi')
                return result

            # here we have anon-empty list
            pefi = pefi[0]
            if TRACE:
                logger.debug('pe_info: pefi:', pefi)

            sfi = [
                x for x in pefi if type(x) == pefile.Structure
                and hasattr(x, 'name') and x.name == 'StringFileInfo'
            ]

            if not sfi:
                # No stringfileinfo section: we return just empties
                if TRACE:
                    logger.debug('pe_info: not sfi')
                return result

            sfi = sfi[0]

            if not hasattr(sfi, 'StringTable'):
                # No fileinfo.StringTable section: we return just empties
                if TRACE:
                    logger.debug('pe_info: not StringTable')
                return result

            strtab = sfi.StringTable
            if not strtab or not isinstance(strtab, list):
                return result

            strtab = strtab[0]

            if TRACE:
                logger.debug('pe_info: Entries keys: ' +
                             str(set(k for k in strtab.entries)))
                logger.debug('pe_info: Entry values:')
                for k, v in strtab.entries.items():
                    logger.debug('  ' + str(k) + ': ' + repr(type(v)) +
                                 repr(v))

            for k, v in strtab.entries.items():
                # convert unicode to a safe ASCII representation
                key = text.as_unicode(k).strip()
                value = text.as_unicode(v).strip()
                value = fix_text(value)
                if key in PE_INFO_KEYSET:
                    result[key] = value
                else:
                    # collect extra_data if any:
                    result['extra_data'][key] = value

    except Exception as e:
        raise
        if TRACE:
            logger.debug('pe_info: Failed to collect infos: ' + repr(e))
        # FIXME: return empty for now: this is wrong

    # the ordering of extra_data is not guaranteed on Python 2 because the dict is not ordered
    result['extra_data'] = OrderedDict(sorted(result['extra_data'].items()))
    return result