Python toascii示例，commoncode.text.toascii Python示例

示例#1

0

显示文件

文件： command.py 项目： ocabrisses/scancode-toolkit

def execute(cmd, args, root_dir=None, cwd=None, env=None, to_files=False):
    """
    Run a `cmd` external command with the `args` arguments list and return the
    return code, the stdout and stderr.

    To avoid RAM exhaustion, always write stdout and stderr streams to files.

    If `to_files` is False, return the content of stderr and stdout as ASCII
    strings. Otherwise, return the locations to the stderr and stdout
    temporary files.

    Resolve the `cmd` location using os/arch local/vendored location based on
    using `root_dir`. No resolution is done if root_dir is None

    Run the command using the `cwd` current working directory with an
    `env` dict of environment variables.
    """
    assert cmd
    cmd_loc, bin_dir, lib_dir = get_locations(cmd, root_dir)
    full_cmd = [cmd_loc or cmd] + args or []
    env = get_env(env, lib_dir) or None
    cwd = cwd or curr_dir

    # temp files for stderr and stdout
    tmp_dir = fileutils.get_temp_dir(base_dir='cmd')
    sop = os.path.join(tmp_dir, 'stdout')
    sep = os.path.join(tmp_dir, 'stderr')

    # shell==True is DANGEROUS but we are not running arbitrary commands
    # though we can execute command that just happen to be in the path
    shell = True if on_windows else False

    logger.debug('Executing command %(cmd)r as %(full_cmd)r with: env=%(env)r, '
                 'shell=%(shell)r, cwd=%(cwd)r, stdout=%(sop)r, stderr=%(sep)r.'
                 % locals())

    proc = None
    try:
        with open(sop, 'wb') as stdout, open(sep, 'wb') as stderr:
            # -1 defaults bufsize to system bufsize
            pargs = dict(cwd=cwd, env=env, stdout=stdout, stderr=stderr,
                         shell=shell, bufsize=-1, universal_newlines=True)
            proc = subprocess.Popen(full_cmd, **pargs)
            stdout, stderr = proc.communicate()
            rc = proc.returncode if proc else 0
    finally:
        close(proc)

    if not to_files:
        # return output as ASCII string loaded from the output files
        sop = text.toascii(open(sop, 'rb').read().strip())
        sep = text.toascii(open(sep, 'rb').read().strip())
    return rc, sop, sep

示例#2

0

显示文件

文件： extract_cli.py 项目： ocabrisses/scancode-toolkit

    def display_extract_summary():
        """
        Display a summary of warnings and errors if any.
        """
        has_warnings = False
        has_errors = False
        summary = []
        for xev in extract_results:
            has_errors = has_errors or bool(xev.errors)
            has_warnings = has_warnings or bool(xev.warnings)
            source = fileutils.as_posixpath(xev.source)
            if not isinstance(source, unicode):
                source = toascii(source, translit=True).decode('utf-8', 'replace')
                source = utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir)
            for e in xev.errors:
                echo_stderr('ERROR extracting: %(source)s: %(e)s' % locals(), fg='red')
            for warn in xev.warnings:
                echo_stderr('WARNING extracting: %(source)s: %(warn)s' % locals(), fg='yellow')

        summary_color = 'green'
        if has_warnings:
            summary_color = 'yellow'
        if has_errors:
            summary_color = 'red'

        echo_stderr('Extracting done.', fg=summary_color, reset=True)

示例#3

0

显示文件

文件： strings.py 项目： balusarakesh/dje_license_search

def strings_in_file(location, filt=filter_string):
    """
    Yield ASCCI strings encoded as Unicode extracted from a file at location.
    """
    for s in file_strings(location):
        if is_good(s, filt):
            s = s.strip()
            if s:
                yield toascii(s)

示例#4

0

显示文件

文件： extract_cli.py 项目： ocabrisses/scancode-toolkit

 def extract_event(item):
     """
     Display an extract event.
     """
     if quiet:
         return ''
     if not item:
         return ''
     source = item.source
     if not isinstance(source, unicode):
         source = toascii(source, translit=True).decode('utf-8', 'replace')
     if verbose:
         if item.done:
             return ''
         line = source and utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) or ''
     else:
         line = source and fileutils.file_name(source) or ''
     if not isinstance(line, unicode):
         line = toascii(line, translit=True).decode('utf-8', 'replace')
     return 'Extracting: %(line)s' % locals()

示例#5

0

显示文件

文件： cli.py 项目： ocabrisses/scancode-toolkit

 def scan_event(item):
     """Progress event displayed each time a file is scanned"""
     if quiet or not item or not display_fn:
         return ''
     _scan_success, _scanned_path = item
     _scanned_path = unicode(toascii(_scanned_path))
     if verbose:
         _progress_line = _scanned_path
     else:
         _progress_line = fixed_width_file_name(_scanned_path, max_file_name_len)
     return style('Scanned: ') + style(_progress_line, fg=_scan_success and 'green' or 'red')

示例#6

0

显示文件

文件： strings.py 项目： ocabrisses/scancode-toolkit

def strings_from_file(location, buff_size=1024 * 1024, ascii=False, clean=True, min_len=MIN_LEN):
    """
    Yield unicode strings made only of ASCII characters found in file at location.
    Process the file in chunks (to limit memory usage). If ascii is True, strings
    are converted to plain ASCII "str or byte" strings instead of unicode.
    """
    min_len = MIN_LEN
    with open(location, 'rb') as f:
        while 1:
            buf = f.read(buff_size)
            if not buf:
                break
            for s in strings_from_string(buf, clean=clean, min_len=min_len):
                if ascii:
                    s = toascii(s)
                    s = s.strip()
                    if not s or len(s) < min_len:
                        continue
                yield s

示例#7

0

显示文件

文件： paths.py 项目： 10imaging/scancode-toolkit

def safe_path(path, lowered=True, resolved=True):
    """
    Convert a path-like string `path` to a posix path string safer to use as a
    file path on all OSes. The path is lowercased. Non-ASCII alphanumeric
    characters and spaces are replaced with an underscore.
    The path is optionally resolved and lowercased.
    """
    safe = path.strip()
    # TODO: replace COM/PRN/LPT windows special names
    # TODO: resolve 'UNC' windows paths
    # TODO: strip leading windows drives
    # remove any unsafe chars
    safe = safe.translate(path_safe)
    safe = text.toascii(safe)
    safe = fileutils.as_posixpath(safe)
    if lowered:
        safe = safe.lower()
    if resolved:
        safe = resolve(safe)
    return safe

示例#8

0

显示文件

文件： test_text.py 项目： nexB/commoncode

def test_toascii_works_with_empty_unicode_or_bytes():
    assert text.toascii(b'', translit=False) == u''
    assert text.toascii(u'', translit=True) == u''
    assert text.toascii(b'', translit=False) == u''
    assert text.toascii(u'', translit=True) == u''

示例#9

0

显示文件

文件： test_text.py 项目： nexB/commoncode

def test_toascii():
    acc = u"ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿẞß®©œŒØøÆæ₵₡￠¢Žž"
    expected = r'AAAAAACEEEEIIIINOOOOOUUUUYaaaaaaceeeeiiiinooooouuuuyyZz'
    assert text.toascii(acc, translit=False) == expected
    expected = r'AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyySsss(r)(c)oeOEOoAEae_CL/CC/Zz'
    assert text.toascii(acc, translit=True) == expected

示例#10

0

显示文件

def portable_filename(filename):
    """
    Return a new name for `filename` that is portable across operating systems.

    In particular the returned file name is guaranteed to be:
    - a portable name on most OSses using a limited ASCII characters set including
      some limited punctuation.
    - a valid name on Linux, Windows and Mac.

    Unicode file names are transliterated to plain ASCII.

    See for more details:
    - http://www.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap03.html
    - https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
    - http://www.boost.org/doc/libs/1_36_0/libs/filesystem/doc/portability_guide.htm

    Also inspired by Werkzeug:
    https://raw.githubusercontent.com/pallets/werkzeug/8c2d63ce247ba1345e1b9332a68ceff93b2c07ab/werkzeug/utils.py

    For example:
    >>> expected = 'A___file__with_Spaces.mov'
    >>> assert expected == portable_filename("A:\\ file/ with Spaces.mov")

    Unresolved relative paths will be trated as a single filename. Use
    resolve instead if you want to resolve paths:
    >>> expected = '___.._.._etc_passwd'
    >>> assert expected == portable_filename("../../../etc/passwd")

    Unicode name are transliterated:
    >>> expected = 'This_contain_UMLAUT_umlauts.txt'
    >>> assert expected == portable_filename(u'This contain UMLAUT \xfcml\xe4uts.txt')
    """
    filename = toascii(filename, translit=True)

    if not filename:
        return '_'

    filename = replace_illegal_chars('_', filename)

    # these are illegal both upper and lowercase and with or without an extension
    # we insert an underscore after the base name.
    windows_illegal_names = set([
        'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
        'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
        'aux', 'con', 'nul', 'prn'
    ])

    basename, dot, extension = filename.partition('.')
    if basename.lower() in windows_illegal_names:
        filename = ''.join([basename, '_', dot, extension])

    # no name made only of dots.
    if set(filename) == set(['.']):
        filename = 'dot' * len(filename)

    # replaced any leading dotdot
    if filename != '..' and filename.startswith('..'):
        while filename.startswith('..'):
            filename = filename.replace('..', '__', 1)

    return filename

示例#11

0

显示文件

文件： copyright_summary.py 项目： xavierfigueroav/scancode-toolkit

 def transliterate(self):
     self.key = toascii(self.key, translit=True)

示例#12

0

显示文件

文件： command.py 项目： SmartsYoung/FenixscanX

def execute2(cmd_loc, args, lib_dir=None, cwd=None, env=None, to_files=False, log=TRACE):
    """
    Run a `cmd_loc` command with the `args` arguments list and return the return
    code, the stdout and stderr.

    To avoid RAM exhaustion, always write stdout and stderr streams to files.

    If `to_files` is False, return the content of stderr and stdout as ASCII
    strings. Otherwise, return the locations to the stderr and stdout temporary
    files.

    Run the command using the `cwd` current working directory with an `env` dict
    of environment variables.
    """
    assert cmd_loc
    full_cmd = [cmd_loc] + (args or [])

    env = get_env(env, lib_dir) or None
    cwd = cwd or curr_dir

    # temp files for stderr and stdout
    tmp_dir = get_temp_dir(prefix='cmd-')

    if on_linux and py2:
        stdout = b'stdout'
        stderr = b'stderr'
    else:
        stdout = 'stdout'
        stderr = 'stderr'

    sop = path.join(tmp_dir, stdout)
    sep = path.join(tmp_dir, stderr)

    # shell==True is DANGEROUS but we are not running arbitrary commands
    # though we can execute commands that just happen to be in the path
    shell = True if on_windows else False

    if log:
        printer = logger.debug if TRACE else lambda x: print(x)
        printer(
            'Executing command %(cmd_loc)r as:\n%(full_cmd)r\nwith: env=%(env)r\n'
            'shell=%(shell)r\ncwd=%(cwd)r\nstdout=%(sop)r\nstderr=%(sep)r'
            % locals())

    proc = None
    rc = 100

    if py2:
        okwargs = dict(mode='wb')
    if py3:
        okwargs = dict(mode='w', encoding='utf-8')

    try:
        with io.open(sop, **okwargs) as stdout, io.open(sep, **okwargs) as stderr:
            with pushd(lib_dir):
                popen_args = dict(
                    cwd=cwd,
                    env=env,
                    stdout=stdout,
                    stderr=stderr,
                    shell=shell,
                    # -1 defaults bufsize to system bufsize
                    bufsize=-1,
                    universal_newlines=True,
                )

                proc = subprocess.Popen(full_cmd, **popen_args)
                stdout, stderr = proc.communicate()
                rc = proc.returncode if proc else 0

    finally:
        close(proc)

    if not to_files:
        # return output as ASCII string loaded from the output files
        sop = text.toascii(open(sop, 'rb').read().strip())
        sep = text.toascii(open(sep, 'rb').read().strip())
    return rc, sop, sep

示例#13

0

显示文件

文件： paths.py 项目： ocabrisses/scancode-toolkit

def portable_filename(filename):
    """
    Return a new name for `filename` that is portable across operating systems.

    In particular the returned file name is guaranteed to be:
    - a portable name on most OSses using a limited ASCII characters set including
      some limited punctuation.
    - a valid name on Linux, Windows and Mac.

    Unicode file names are transliterated to plain ASCII.

    See for more details:
    - http://www.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap03.html
    - https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
    - http://www.boost.org/doc/libs/1_36_0/libs/filesystem/doc/portability_guide.htm

    Also inspired by Werkzeug:
    https://raw.githubusercontent.com/pallets/werkzeug/8c2d63ce247ba1345e1b9332a68ceff93b2c07ab/werkzeug/utils.py

    For example:
    >>> expected = 'A___file__with_Spaces.mov'
    >>> assert expected == portable_filename("A:\\ file/ with Spaces.mov")

    Unresolved relative paths will be trated as a single filename. Use
    resolve instead if you want to resolve paths:
    >>> expected = '___.._.._etc_passwd'
    >>> assert expected == portable_filename("../../../etc/passwd")

    Unicode name are transliterated:
    >>> expected = 'This_contain_UMLAUT_umlauts.txt'
    >>> assert expected == portable_filename(u'This contain UMLAUT \xfcml\xe4uts.txt')
    """
    filename = toascii(filename, translit=True)

    if not filename:
        return '_'

    filename = replace_illegal_chars('_', filename)

    # these are illegal both upper and lowercase and with or without an extension
    # we insert an underscore after the base name.
    windows_illegal_names = set([
        'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
        'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
        'aux', 'con', 'nul', 'prn'
    ])

    basename, dot, extension = filename.partition('.')
    if basename.lower() in windows_illegal_names:
        filename = ''.join([basename, '_', dot, extension])


    # no name made only of dots.
    if set(filename) == set(['.']):
        filename = 'dot' * len(filename)

    # replaced any leading dotdot
    if filename != '..' and filename.startswith('..'):
        while filename.startswith('..'):
            filename = filename.replace('..', '__', 1)

    return filename

示例#14

0

显示文件

文件： command.py 项目： nexB/commoncode

def execute(cmd_loc, args, cwd=None, env=None, to_files=False, log=TRACE):
    """
    Run a `cmd_loc` command with the `args` arguments list and return the return
    code, the stdout and stderr.

    To avoid RAM exhaustion, always write stdout and stderr streams to files.

    If `to_files` is False, return the content of stderr and stdout as ASCII
    strings. Otherwise, return the locations to the stderr and stdout temporary
    files.

    Run the command using the `cwd` current working directory with an `env` dict
    of environment variables.
    """
    assert cmd_loc
    full_cmd = [cmd_loc] + (args or [])

    # any shared object should be either in the PATH, the rpath or
    # side-by-side with the exceutable
    cmd_dir = os.path.dirname(cmd_loc)
    env = get_env(env, lib_dir=cmd_dir) or None
    cwd = cwd or curr_dir

    # temp files for stderr and stdout
    tmp_dir = get_temp_dir(prefix='cmd-')

    sop = path.join(tmp_dir, 'stdout')
    sep = path.join(tmp_dir, 'stderr')

    # shell==True is DANGEROUS but we are not running arbitrary commands
    # though we can execute commands that just happen to be in the path
    # See why we need it on Windows https://bugs.python.org/issue8557
    shell = True if on_windows else False

    if log:
        printer = logger.debug if TRACE else lambda x: print(x)
        printer(
            'Executing command %(cmd_loc)r as:\n%(full_cmd)r\nwith: env=%(env)r\n'
            'shell=%(shell)r\ncwd=%(cwd)r\nstdout=%(sop)r\nstderr=%(sep)r' %
            locals())

    proc = None
    rc = 100

    try:
        with io.open(sop,
                     'wb') as stdout, io.open(sep,
                                              'wb') as stderr, pushd(cmd_dir):
            proc = subprocess.Popen(
                full_cmd,
                cwd=cwd,
                env=env,
                stdout=stdout,
                stderr=stderr,
                shell=shell,
                # -1 defaults bufsize to system bufsize
                bufsize=-1,
                universal_newlines=True,
            )
            stdout, stderr = proc.communicate()
            rc = proc.returncode if proc else 0
    finally:
        close(proc)

    if not to_files:
        # return output as ASCII string loaded from the output files
        with open(sop, 'rb') as so:
            sor = so.read()
            sop = text.toascii(sor).strip()

        with open(sep, 'rb') as se:
            ser = se.read()
            sep = text.toascii(ser).strip()

    return rc, sop, sep

示例#15

0

显示文件

文件： win_pe.py 项目： vsurge/barista

def pe_info(location, include_extra_data=False):
    """
    Return a mapping of common data available for a Windows dll or exe
    PE (portable executable).

    Return None for non windows PE executables.
    Return an empty mapping for PE from which we could not collect data.
    If include_extra_data is True, also collect extra data found if any, returned
    as a dictionary under the 'extra_data' key in the returned dict.
    """
    if not location:
        return {}

    T = contenttype.get_type(location)

    if not T.is_winexe:
        return {}
    # FIXME: WTF: we initialize with empty values, as we must always
    # return something for all values
    peinf = OrderedDict([(
        k,
        None,
    ) for k in PE_INFO_KEYS] + [(
        'extra_data',
        {},
    )])

    try:
        with closing(pefile.PE(location)) as pe:
            if not hasattr(pe, 'FileInfo'):
                # No fileinfo section: we return just empties
                return peinf

            # >>> pe.FileInfo: this is a list of list of Structure objects:
            # [[<Structure: [VarFileInfo] >,  <Structure: [StringFileInfo]>]]
            pefi = pe.FileInfo
            if not pefi or not isinstance(pefi, list):
                if TRACE:
                    logger.debug('pe_info: not pefi')
                return peinf

            pefi = pefi[0]

            sfi = [
                x for x in pefi if type(x) == pefile.Structure
                and hasattr(x, 'name') and x.name == 'StringFileInfo'
            ]

            if not sfi:
                # No stringfileinfo section: we return just empties
                if TRACE:
                    logger.debug('pe_info: not sfi')
                return peinf

            sfi = sfi[0]

            if not hasattr(sfi, 'StringTable'):
                # No fileinfo.StringTable section: we return just empties
                if TRACE:
                    logger.debug('pe_info: not StringTable')
                return peinf

            strtab = sfi.StringTable
            if not strtab or not isinstance(strtab, list):
                return peinf

            strtab = strtab[0]

            if TRACE:
                logger.debug('pe_info: Entries keys: ' +
                             str(set(k for k in strtab.entries)))
                logger.debug('pe_info: Entry values:')
                for k, v in strtab.entries.items():
                    logger.debug('  ' + str(k) + ': ' + repr(v))

            for k, v in strtab.entries.items():
                # convert unicode to a safe ASCII representation
                value = unicode(text.toascii(v).strip())
                if k in PE_INFO_KEYSET:
                    peinf[k] = value
                else:
                    # collect extra_data if any:
                    peinf['extra_data'][k] = value

    except Exception as e:
        raise
        if TRACE:
            logger.debug('pe_info: Failed to collect infos: ' + repr(e))
        # FIXME: return empty for now: this is wrong

    return peinf