def execute(cmd, args, root_dir=None, cwd=None, env=None, to_files=False): """ Run a `cmd` external command with the `args` arguments list and return the return code, the stdout and stderr. To avoid RAM exhaustion, always write stdout and stderr streams to files. If `to_files` is False, return the content of stderr and stdout as ASCII strings. Otherwise, return the locations to the stderr and stdout temporary files. Resolve the `cmd` location using os/arch local/vendored location based on using `root_dir`. No resolution is done if root_dir is None Run the command using the `cwd` current working directory with an `env` dict of environment variables. """ assert cmd cmd_loc, bin_dir, lib_dir = get_locations(cmd, root_dir) full_cmd = [cmd_loc or cmd] + args or [] env = get_env(env, lib_dir) or None cwd = cwd or curr_dir # temp files for stderr and stdout tmp_dir = fileutils.get_temp_dir(base_dir='cmd') sop = os.path.join(tmp_dir, 'stdout') sep = os.path.join(tmp_dir, 'stderr') # shell==True is DANGEROUS but we are not running arbitrary commands # though we can execute command that just happen to be in the path shell = True if on_windows else False logger.debug('Executing command %(cmd)r as %(full_cmd)r with: env=%(env)r, ' 'shell=%(shell)r, cwd=%(cwd)r, stdout=%(sop)r, stderr=%(sep)r.' % locals()) proc = None try: with open(sop, 'wb') as stdout, open(sep, 'wb') as stderr: # -1 defaults bufsize to system bufsize pargs = dict(cwd=cwd, env=env, stdout=stdout, stderr=stderr, shell=shell, bufsize=-1, universal_newlines=True) proc = subprocess.Popen(full_cmd, **pargs) stdout, stderr = proc.communicate() rc = proc.returncode if proc else 0 finally: close(proc) if not to_files: # return output as ASCII string loaded from the output files sop = text.toascii(open(sop, 'rb').read().strip()) sep = text.toascii(open(sep, 'rb').read().strip()) return rc, sop, sep
def display_extract_summary(): """ Display a summary of warnings and errors if any. """ has_warnings = False has_errors = False summary = [] for xev in extract_results: has_errors = has_errors or bool(xev.errors) has_warnings = has_warnings or bool(xev.warnings) source = fileutils.as_posixpath(xev.source) if not isinstance(source, unicode): source = toascii(source, translit=True).decode('utf-8', 'replace') source = utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) for e in xev.errors: echo_stderr('ERROR extracting: %(source)s: %(e)s' % locals(), fg='red') for warn in xev.warnings: echo_stderr('WARNING extracting: %(source)s: %(warn)s' % locals(), fg='yellow') summary_color = 'green' if has_warnings: summary_color = 'yellow' if has_errors: summary_color = 'red' echo_stderr('Extracting done.', fg=summary_color, reset=True)
def strings_in_file(location, filt=filter_string): """ Yield ASCCI strings encoded as Unicode extracted from a file at location. """ for s in file_strings(location): if is_good(s, filt): s = s.strip() if s: yield toascii(s)
def extract_event(item): """ Display an extract event. """ if quiet: return '' if not item: return '' source = item.source if not isinstance(source, unicode): source = toascii(source, translit=True).decode('utf-8', 'replace') if verbose: if item.done: return '' line = source and utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) or '' else: line = source and fileutils.file_name(source) or '' if not isinstance(line, unicode): line = toascii(line, translit=True).decode('utf-8', 'replace') return 'Extracting: %(line)s' % locals()
def scan_event(item): """Progress event displayed each time a file is scanned""" if quiet or not item or not display_fn: return '' _scan_success, _scanned_path = item _scanned_path = unicode(toascii(_scanned_path)) if verbose: _progress_line = _scanned_path else: _progress_line = fixed_width_file_name(_scanned_path, max_file_name_len) return style('Scanned: ') + style(_progress_line, fg=_scan_success and 'green' or 'red')
def strings_from_file(location, buff_size=1024 * 1024, ascii=False, clean=True, min_len=MIN_LEN): """ Yield unicode strings made only of ASCII characters found in file at location. Process the file in chunks (to limit memory usage). If ascii is True, strings are converted to plain ASCII "str or byte" strings instead of unicode. """ min_len = MIN_LEN with open(location, 'rb') as f: while 1: buf = f.read(buff_size) if not buf: break for s in strings_from_string(buf, clean=clean, min_len=min_len): if ascii: s = toascii(s) s = s.strip() if not s or len(s) < min_len: continue yield s
def safe_path(path, lowered=True, resolved=True): """ Convert a path-like string `path` to a posix path string safer to use as a file path on all OSes. The path is lowercased. Non-ASCII alphanumeric characters and spaces are replaced with an underscore. The path is optionally resolved and lowercased. """ safe = path.strip() # TODO: replace COM/PRN/LPT windows special names # TODO: resolve 'UNC' windows paths # TODO: strip leading windows drives # remove any unsafe chars safe = safe.translate(path_safe) safe = text.toascii(safe) safe = fileutils.as_posixpath(safe) if lowered: safe = safe.lower() if resolved: safe = resolve(safe) return safe
def test_toascii_works_with_empty_unicode_or_bytes(): assert text.toascii(b'', translit=False) == u'' assert text.toascii(u'', translit=True) == u'' assert text.toascii(b'', translit=False) == u'' assert text.toascii(u'', translit=True) == u''
def test_toascii(): acc = u"ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿẞß®©œŒØøÆæ₵₡¢¢Žž" expected = r'AAAAAACEEEEIIIINOOOOOUUUUYaaaaaaceeeeiiiinooooouuuuyyZz' assert text.toascii(acc, translit=False) == expected expected = r'AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyySsss(r)(c)oeOEOoAEae_CL/CC/Zz' assert text.toascii(acc, translit=True) == expected
def portable_filename(filename): """ Return a new name for `filename` that is portable across operating systems. In particular the returned file name is guaranteed to be: - a portable name on most OSses using a limited ASCII characters set including some limited punctuation. - a valid name on Linux, Windows and Mac. Unicode file names are transliterated to plain ASCII. See for more details: - http://www.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap03.html - https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx - http://www.boost.org/doc/libs/1_36_0/libs/filesystem/doc/portability_guide.htm Also inspired by Werkzeug: https://raw.githubusercontent.com/pallets/werkzeug/8c2d63ce247ba1345e1b9332a68ceff93b2c07ab/werkzeug/utils.py For example: >>> expected = 'A___file__with_Spaces.mov' >>> assert expected == portable_filename("A:\\ file/ with Spaces.mov") Unresolved relative paths will be trated as a single filename. Use resolve instead if you want to resolve paths: >>> expected = '___.._.._etc_passwd' >>> assert expected == portable_filename("../../../etc/passwd") Unicode name are transliterated: >>> expected = 'This_contain_UMLAUT_umlauts.txt' >>> assert expected == portable_filename(u'This contain UMLAUT \xfcml\xe4uts.txt') """ filename = toascii(filename, translit=True) if not filename: return '_' filename = replace_illegal_chars('_', filename) # these are illegal both upper and lowercase and with or without an extension # we insert an underscore after the base name. windows_illegal_names = set([ 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9', 'aux', 'con', 'nul', 'prn' ]) basename, dot, extension = filename.partition('.') if basename.lower() in windows_illegal_names: filename = ''.join([basename, '_', dot, extension]) # no name made only of dots. if set(filename) == set(['.']): filename = 'dot' * len(filename) # replaced any leading dotdot if filename != '..' and filename.startswith('..'): while filename.startswith('..'): filename = filename.replace('..', '__', 1) return filename
def transliterate(self): self.key = toascii(self.key, translit=True)
def execute2(cmd_loc, args, lib_dir=None, cwd=None, env=None, to_files=False, log=TRACE): """ Run a `cmd_loc` command with the `args` arguments list and return the return code, the stdout and stderr. To avoid RAM exhaustion, always write stdout and stderr streams to files. If `to_files` is False, return the content of stderr and stdout as ASCII strings. Otherwise, return the locations to the stderr and stdout temporary files. Run the command using the `cwd` current working directory with an `env` dict of environment variables. """ assert cmd_loc full_cmd = [cmd_loc] + (args or []) env = get_env(env, lib_dir) or None cwd = cwd or curr_dir # temp files for stderr and stdout tmp_dir = get_temp_dir(prefix='cmd-') if on_linux and py2: stdout = b'stdout' stderr = b'stderr' else: stdout = 'stdout' stderr = 'stderr' sop = path.join(tmp_dir, stdout) sep = path.join(tmp_dir, stderr) # shell==True is DANGEROUS but we are not running arbitrary commands # though we can execute commands that just happen to be in the path shell = True if on_windows else False if log: printer = logger.debug if TRACE else lambda x: print(x) printer( 'Executing command %(cmd_loc)r as:\n%(full_cmd)r\nwith: env=%(env)r\n' 'shell=%(shell)r\ncwd=%(cwd)r\nstdout=%(sop)r\nstderr=%(sep)r' % locals()) proc = None rc = 100 if py2: okwargs = dict(mode='wb') if py3: okwargs = dict(mode='w', encoding='utf-8') try: with io.open(sop, **okwargs) as stdout, io.open(sep, **okwargs) as stderr: with pushd(lib_dir): popen_args = dict( cwd=cwd, env=env, stdout=stdout, stderr=stderr, shell=shell, # -1 defaults bufsize to system bufsize bufsize=-1, universal_newlines=True, ) proc = subprocess.Popen(full_cmd, **popen_args) stdout, stderr = proc.communicate() rc = proc.returncode if proc else 0 finally: close(proc) if not to_files: # return output as ASCII string loaded from the output files sop = text.toascii(open(sop, 'rb').read().strip()) sep = text.toascii(open(sep, 'rb').read().strip()) return rc, sop, sep
def execute(cmd_loc, args, cwd=None, env=None, to_files=False, log=TRACE): """ Run a `cmd_loc` command with the `args` arguments list and return the return code, the stdout and stderr. To avoid RAM exhaustion, always write stdout and stderr streams to files. If `to_files` is False, return the content of stderr and stdout as ASCII strings. Otherwise, return the locations to the stderr and stdout temporary files. Run the command using the `cwd` current working directory with an `env` dict of environment variables. """ assert cmd_loc full_cmd = [cmd_loc] + (args or []) # any shared object should be either in the PATH, the rpath or # side-by-side with the exceutable cmd_dir = os.path.dirname(cmd_loc) env = get_env(env, lib_dir=cmd_dir) or None cwd = cwd or curr_dir # temp files for stderr and stdout tmp_dir = get_temp_dir(prefix='cmd-') sop = path.join(tmp_dir, 'stdout') sep = path.join(tmp_dir, 'stderr') # shell==True is DANGEROUS but we are not running arbitrary commands # though we can execute commands that just happen to be in the path # See why we need it on Windows https://bugs.python.org/issue8557 shell = True if on_windows else False if log: printer = logger.debug if TRACE else lambda x: print(x) printer( 'Executing command %(cmd_loc)r as:\n%(full_cmd)r\nwith: env=%(env)r\n' 'shell=%(shell)r\ncwd=%(cwd)r\nstdout=%(sop)r\nstderr=%(sep)r' % locals()) proc = None rc = 100 try: with io.open(sop, 'wb') as stdout, io.open(sep, 'wb') as stderr, pushd(cmd_dir): proc = subprocess.Popen( full_cmd, cwd=cwd, env=env, stdout=stdout, stderr=stderr, shell=shell, # -1 defaults bufsize to system bufsize bufsize=-1, universal_newlines=True, ) stdout, stderr = proc.communicate() rc = proc.returncode if proc else 0 finally: close(proc) if not to_files: # return output as ASCII string loaded from the output files with open(sop, 'rb') as so: sor = so.read() sop = text.toascii(sor).strip() with open(sep, 'rb') as se: ser = se.read() sep = text.toascii(ser).strip() return rc, sop, sep
def pe_info(location, include_extra_data=False): """ Return a mapping of common data available for a Windows dll or exe PE (portable executable). Return None for non windows PE executables. Return an empty mapping for PE from which we could not collect data. If include_extra_data is True, also collect extra data found if any, returned as a dictionary under the 'extra_data' key in the returned dict. """ if not location: return {} T = contenttype.get_type(location) if not T.is_winexe: return {} # FIXME: WTF: we initialize with empty values, as we must always # return something for all values peinf = OrderedDict([( k, None, ) for k in PE_INFO_KEYS] + [( 'extra_data', {}, )]) try: with closing(pefile.PE(location)) as pe: if not hasattr(pe, 'FileInfo'): # No fileinfo section: we return just empties return peinf # >>> pe.FileInfo: this is a list of list of Structure objects: # [[<Structure: [VarFileInfo] >, <Structure: [StringFileInfo]>]] pefi = pe.FileInfo if not pefi or not isinstance(pefi, list): if TRACE: logger.debug('pe_info: not pefi') return peinf pefi = pefi[0] sfi = [ x for x in pefi if type(x) == pefile.Structure and hasattr(x, 'name') and x.name == 'StringFileInfo' ] if not sfi: # No stringfileinfo section: we return just empties if TRACE: logger.debug('pe_info: not sfi') return peinf sfi = sfi[0] if not hasattr(sfi, 'StringTable'): # No fileinfo.StringTable section: we return just empties if TRACE: logger.debug('pe_info: not StringTable') return peinf strtab = sfi.StringTable if not strtab or not isinstance(strtab, list): return peinf strtab = strtab[0] if TRACE: logger.debug('pe_info: Entries keys: ' + str(set(k for k in strtab.entries))) logger.debug('pe_info: Entry values:') for k, v in strtab.entries.items(): logger.debug(' ' + str(k) + ': ' + repr(v)) for k, v in strtab.entries.items(): # convert unicode to a safe ASCII representation value = unicode(text.toascii(v).strip()) if k in PE_INFO_KEYSET: peinf[k] = value else: # collect extra_data if any: peinf['extra_data'][k] = value except Exception as e: raise if TRACE: logger.debug('pe_info: Failed to collect infos: ' + repr(e)) # FIXME: return empty for now: this is wrong return peinf