def scan(input_path, scanners, verbose=False, quiet=False, processes=1, timeout=DEFAULT_TIMEOUT, diag=False, scans_cache_class=None, strip_root=False, full_root=False, ignore=None): """ Return a tuple of (files_count, scan_results, success) where scan_results is an iterable and success is a boolean. Run each requested scan proper: each individual file scan is cached on disk to free memory. Then the whole set of scans is loaded from the cache and streamed at the end. """ assert scans_cache_class scan_summary = OrderedDict() scan_summary['scanned_path'] = input_path scan_summary['processes'] = processes # Display scan start details ############################ # FIXME: it does not make sense to use tuple and positional values scans = [k for k, v in scanners.items() if v[0]] _scans = ', '.join(scans) if not quiet: echo_stderr( 'Scanning files for: %(_scans)s with %(processes)d process(es)...' % locals()) scan_summary['scans'] = scans[:] scan_start = time() indexing_time = 0 # FIXME: It does not make sense to use tuple and positional values with_licenses, _ = scanners.get('licenses', (False, '')) if with_licenses: # build index outside of the main loop for speed # this also ensures that forked processes will get the index on POSIX naturally if not quiet: echo_stderr('Building license detection index...', fg='green', nl=False) from licensedcode.cache import get_index get_index(False) indexing_time = time() - scan_start if not quiet: echo_stderr('Done.', fg='green', nl=True) scan_summary['indexing_time'] = indexing_time # TODO: handle pickling errors as in ./scancode -cilp samples/ -n3: note they are only caused by a FanoutCache # TODO: handle other exceptions properly to avoid any hanging # maxtasksperchild helps with recycling processes in case of leaks pool = get_pool(processes=processes, maxtasksperchild=1000) ignore = ignore or {} resources = resource_paths(input_path, ignore) logfile_path = scans_cache_class().cache_files_log paths_with_error = [] files_count = 0 with codecs.open(logfile_path, 'w', encoding='utf-8') as logfile_fd: logged_resources = _resource_logger(logfile_fd, resources) scanit = partial(_scanit, scanners=scanners, scans_cache_class=scans_cache_class, diag=diag, timeout=timeout) max_file_name_len = compute_fn_max_len() # do not display a file name in progress bar if there is less than 5 chars available. display_fn = bool(max_file_name_len > 10) try: # Using chunksize is documented as much more efficient in the Python doc. # Yet "1" still provides a better and more progressive feedback. # With imap_unordered, results are returned as soon as ready and out of order. scanned_files = pool.imap_unordered(scanit, logged_resources, chunksize=1) pool.close() if not quiet: echo_stderr('Scanning files...', fg='green') def scan_event(item): """Progress event displayed each time a file is scanned""" if quiet or not item or not display_fn: return '' _scan_success, _scanned_path = item if verbose: _progress_line = _scanned_path else: _progress_line = fixed_width_file_name( _scanned_path, max_file_name_len) return style('Scanned: ') + style( _progress_line, fg=_scan_success and 'green' or 'red') scanning_errors = [] files_count = 0 with progressmanager(scanned_files, item_show_func=scan_event, show_pos=True, verbose=verbose, quiet=quiet, file=sys.stderr) as scanned: while True: try: result = scanned.next() scan_success, scanned_rel_path = result if not scan_success: paths_with_error.append(scanned_rel_path) files_count += 1 except StopIteration: break except KeyboardInterrupt: print('\nAborted with Ctrl+C!') pool.terminate() break finally: # ensure the pool is really dead to work around a Python 2.7.3 bug: # http://bugs.python.org/issue15101 pool.terminate() # TODO: add stats to results somehow # Compute stats ########################## scan_summary['files_count'] = files_count scan_summary['files_with_errors'] = paths_with_error total_time = time() - scan_start scanning_time = total_time - indexing_time scan_summary['total_time'] = total_time scan_summary['scanning_time'] = scanning_time files_scanned_per_second = round(float(files_count) / scanning_time, 2) scan_summary['files_scanned_per_second'] = files_scanned_per_second if not quiet: # Display stats ########################## echo_stderr('Scanning done.', fg=paths_with_error and 'red' or 'green') if paths_with_error: if diag: echo_stderr('Some files failed to scan properly:', fg='red') # iterate cached results to collect all scan errors cached_scan = scans_cache_class() root_dir = _get_root_dir(input_path, strip_root, full_root) scan_results = cached_scan.iterate( scans, root_dir, paths_subset=paths_with_error) for scan_result in scan_results: errored_path = scan_result.get('path', '') echo_stderr('Path: ' + errored_path, fg='red') for error in scan_result.get('scan_errors', []): for emsg in error.splitlines(False): echo_stderr(' ' + emsg) echo_stderr('') else: echo_stderr( 'Some files failed to scan properly. Use the --diag option for additional details:', fg='red') for errored_path in paths_with_error: echo_stderr(' ' + errored_path, fg='red') echo_stderr( 'Scan statistics: %(files_count)d files scanned in %(total_time)ds.' % locals()) echo_stderr( 'Scan options: %(_scans)s with %(processes)d process(es).' % locals()) echo_stderr( 'Scanning speed: %(files_scanned_per_second)s files per sec.' % locals()) echo_stderr('Scanning time: %(scanning_time)ds.' % locals()) echo_stderr('Indexing time: %(indexing_time)ds.' % locals(), reset=True) success = not paths_with_error # finally return an iterator on cached results cached_scan = scans_cache_class() root_dir = _get_root_dir(input_path, strip_root, full_root) return files_count, cached_scan.iterate(scans, root_dir), success
def scan(input_path, copyright=True, license=True, package=True, # @ReservedAssignment email=False, url=False, info=True, verbose=False, quiet=False): # @ReservedAssignment """ Do the scans proper, return results. """ # save paths to report paths relative to the original input original_input = fileutils.as_posixpath(input_path) abs_input = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(input_path))) # note: "flag and function" expressions return the function if flag is True scanners = { 'copyrights': copyright and get_copyrights, 'licenses': license and get_licenses, 'packages': package and get_package_infos, 'emails': email and get_emails, 'urls': url and get_urls, 'infos': info and get_file_infos, } results = [] # note: we inline progress display functions to close on some args def scan_start(): """Progress event displayed at start of scan""" return style('Scanning files...', fg='green') def scan_event(item): """Progress event displayed each time a file is scanned""" if item: line = verbose and item or fileutils.file_name(item) or '' return 'Scanning: %(line)s' % locals() def scan_end(): """Progress event displayed at end of scan""" has_warnings = False has_errors = False summary = [] summary_color = 'green' summary_color = has_warnings and 'yellow' or summary_color summary_color = has_errors and 'red' or summary_color summary.append(style('Scanning done.', fg=summary_color, reset=True)) return '\n'.join(summary) ignored = partial(ignore.is_ignored, ignores=ignore.ignores_VCS, unignores={}) resources = fileutils.resource_iter(abs_input, ignored=ignored) with utils.progressmanager(resources, item_show_func=scan_event, start_show_func=scan_start, finish_show_func=scan_end, verbose=verbose, show_pos=True, quiet=quiet ) as progressive_resources: for resource in progressive_resources: res = fileutils.as_posixpath(resource) # fix paths: keep the location as relative to the original input relative_path = utils.get_relative_path(original_input, abs_input, res) scan_result = OrderedDict(location=relative_path) # Should we yield instead? scan_result.update(scan_one(res, scanners)) results.append(scan_result) # TODO: eventually merge scans for the same resource location... # TODO: fix absolute paths as relative to original input argument... return results
def extractcode(ctx, input, verbose, quiet, shallow, *args, **kwargs): # @ReservedAssignment """extract archives and compressed files found in the <input> file or directory tree. Use this command before scanning proper as an <input> preparation step. Archives found inside an extracted archive are extracted recursively. Extraction is done in-place in a directory named '-extract' side-by-side with an archive. """ abs_location = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(input))) def extract_event(item): """ Display an extract event. """ if quiet: return '' if not item: return '' source = item.source if not isinstance(source, unicode): source = toascii(source, translit=True).decode('utf-8', 'replace') if verbose: if item.done: return '' line = source and utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) or '' else: line = source and fileutils.file_name(source) or '' if not isinstance(line, unicode): line = toascii(line, translit=True).decode('utf-8', 'replace') return 'Extracting: %(line)s' % locals() def display_extract_summary(): """ Display a summary of warnings and errors if any. """ has_warnings = False has_errors = False summary = [] for xev in extract_results: has_errors = has_errors or bool(xev.errors) has_warnings = has_warnings or bool(xev.warnings) source = fileutils.as_posixpath(xev.source) if not isinstance(source, unicode): source = toascii(source, translit=True).decode('utf-8', 'replace') source = utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) for e in xev.errors: echo_stderr('ERROR extracting: %(source)s: %(e)s' % locals(), fg='red') for warn in xev.warnings: echo_stderr('WARNING extracting: %(source)s: %(warn)s' % locals(), fg='yellow') summary_color = 'green' if has_warnings: summary_color = 'yellow' if has_errors: summary_color = 'red' echo_stderr('Extracting done.', fg=summary_color, reset=True) # use for relative paths computation len_base_path = len(abs_location) base_is_dir = filetype.is_dir(abs_location) extract_results = [] has_extract_errors = False if not quiet: echo_stderr('Extracting archives...', fg='green') with utils.progressmanager(extract_archives(abs_location, recurse=not shallow), item_show_func=extract_event, verbose=verbose, quiet=quiet) as extraction_events: for xev in extraction_events: if xev.done and (xev.warnings or xev.errors): has_extract_errors = has_extract_errors or xev.errors extract_results.append(xev) if not quiet: display_extract_summary() rc = 1 if has_extract_errors else 0 ctx.exit(rc)
def scan(input_path, output_file, scanners, license_score=0, license_text=False, verbose=False, quiet=False, processes=1, timeout=DEFAULT_TIMEOUT, diag=False, scans_cache_class=None, strip_root=False): """ Return a tuple of (files_count, scan_results) where scan_results is an iterable. Run each requested scan proper: each individual file scan is cached on disk to free memory. Then the whole set of scans is loaded from the cache and streamed at the end. """ assert scans_cache_class scan_summary = OrderedDict() scan_summary['scanned_path'] = input_path scan_summary['processes'] = processes get_licenses_with_score = partial(get_licenses, min_score=license_score, include_text=license_text, diag=diag) # Display scan start details ############################ # FIXME: this is does not make sense to use tuple and positional values scans = [k for k, v in scanners.items() if v[0]] _scans = ', '.join(scans) if not quiet: echo_stderr('Scanning files for: %(_scans)s with %(processes)d process(es)...' % locals()) save_logs('Scanning files for: %(_scans)s with %(processes)d process(es)...' % locals(),output_file) scan_summary['scans'] = scans[:] scan_start = time() indexing_time = 0 # FIXME: this is does not make sense to use tuple and positional values with_licenses, _ = scanners.get('licenses', (False, '')) if with_licenses: # build index outside of the main loop # this also ensures that forked processes will get the index on POSIX naturally if not quiet: echo_stderr('Building license detection index...', fg='green', nl=False) save_logs('Building license detection index...',output_file) from licensedcode.index import get_index _idx = get_index() indexing_time = time() - scan_start if not quiet: echo_stderr('Done.', fg='green', nl=True) save_logs('Done.',output_file) scan_summary['indexing_time'] = indexing_time # TODO: handle pickling errors as in ./scancode -cilp samples/ -n3: note they are only caused by a FanoutCache # TODO: handle other exceptions properly to avoid any hanging # maxtasksperchild helps with recycling processes in case of leaks pool = get_pool(processes=processes, maxtasksperchild=1000) resources = resource_paths(input_path) logfile_path = scans_cache_class().cache_files_log with open(logfile_path, 'wb') as logfile_fd: logged_resources = _resource_logger(logfile_fd, resources) scanit = partial(_scanit, scanners=scanners, scans_cache_class=scans_cache_class, diag=diag, timeout=timeout) try: # Using chunksize is documented as much more efficient in the Python doc. # Yet "1" still provides a better and more progressive feedback. # With imap_unordered, results are returned as soon as ready and out of order. scanned_files = pool.imap_unordered(scanit, logged_resources, chunksize=1) pool.close() if not quiet: echo_stderr('Scanning files...', fg='green') save_logs('Scanning files...',output_file) def scan_event(item): """Progress event displayed each time a file is scanned""" if quiet: return '' if item: _scan_success, _scanned_path = item _progress_line = verbose and _scanned_path or fixed_width_file_name(_scanned_path) save_logs('Scanned: '+_progress_line,output_file) return style('Scanned: ') + style(_progress_line, fg=_scan_success and 'green' or 'red') scanning_errors = [] files_count = 0 with utils.progressmanager(scanned_files, item_show_func=scan_event, show_pos=True, verbose=verbose, quiet=quiet, file=sys.stderr) as scanned: while True: try: result = scanned.next() scan_success, scanned_rel_path = result if not scan_success: scanning_errors.append(scanned_rel_path) files_count += 1 except StopIteration: break except KeyboardInterrupt: print('\nAborted with Ctrl+C!') pool.terminate() break finally: # ensure the pool is really dead to work around a Python 2.7.3 bug: # http://bugs.python.org/issue15101 pool.terminate() # TODO: add stats to results somehow # Compute stats ########################## scan_summary['files_count'] = files_count scan_summary['files_with_errors'] = scanning_errors total_time = time() - scan_start scanning_time = total_time - indexing_time scan_summary['total_time'] = total_time scan_summary['scanning_time'] = scanning_time files_scanned_per_second = round(float(files_count) / scanning_time , 2) scan_summary['files_scanned_per_second'] = files_scanned_per_second if not quiet: # Display stats ########################## echo_stderr('Scanning done.', fg=scanning_errors and 'red' or 'green') save_logs('Scanning done.',output_file) if scanning_errors: echo_stderr('Some files failed to scan properly. See scan for details:', fg='red') save_logs('Some files failed to scan properly. See scan for details:',output_file) for errored_path in scanning_errors: echo_stderr(' ' + errored_path, fg='red') save_logs(' ' + errored_path,output_file) echo_stderr('Scan statistics: %(files_count)d files scanned in %(total_time)ds.' % locals()) filename = os.path.basename(output_file.name).rsplit('.',1)[0] fileinfo = locals() fileinfo['filename'] = filename conn = sqlite3.connect('data.db') conn.execute("UPDATE scanhistory SET number=%(files_count)d,scantime=%(total_time)d WHERE id='%(filename)s'" % fileinfo) conn.commit() conn.close() save_logs('Scan statistics: %(files_count)d files scanned in %(total_time)ds.' % locals(),output_file) echo_stderr('Scan options: %(_scans)s with %(processes)d process(es).' % locals()) save_logs('Scan options: %(_scans)s with %(processes)d process(es).' % locals(),output_file) echo_stderr('Scanning speed: %(files_scanned_per_second)s files per sec.' % locals()) save_logs('Scanning speed: %(files_scanned_per_second)s files per sec.' % locals(),output_file) echo_stderr('Scanning time: %(scanning_time)ds.' % locals()) save_logs('Scanning time: %(scanning_time)ds.' % locals(),output_file) echo_stderr('Indexing time: %(indexing_time)ds.' % locals(), reset=True) save_logs('Indexing time: %(indexing_time)ds.' % locals(),output_file) # finally return an iterator on cached results cached_scan = scans_cache_class() root_dir = _get_root_dir(input_path, strip_root) return files_count, cached_scan.iterate(scans, root_dir)
def scan(input_path, scanners, verbose=False, quiet=False, processes=1, timeout=DEFAULT_TIMEOUT, diag=False, scans_cache_class=None, strip_root=False, full_root=False, pre_scan_plugins=()): """ Return a tuple of (files_count, scan_results, success) where scan_results is an iterable and success is a boolean. Run each requested scan proper: each individual file scan is cached on disk to free memory. Then the whole set of scans is loaded from the cache and streamed at the end. """ assert scans_cache_class scan_summary = OrderedDict() scan_summary['scanned_path'] = input_path scan_summary['processes'] = processes # Display scan start details ############################ # FIXME: it does not make sense to use tuple and positional values scans = [k for k, v in scanners.items() if v[0]] _scans = ', '.join(scans) if not quiet: echo_stderr('Scanning files for: %(_scans)s with %(processes)d process(es)...' % locals()) scan_summary['scans'] = scans[:] scan_start = time() indexing_time = 0 # FIXME: It does not make sense to use tuple and positional values with_licenses, _ = scanners.get('licenses', (False, '')) if with_licenses: # build index outside of the main loop for speed # this also ensures that forked processes will get the index on POSIX naturally if not quiet: echo_stderr('Building license detection index...', fg='green', nl=False) from licensedcode.cache import get_index get_index(False) indexing_time = time() - scan_start if not quiet: echo_stderr('Done.', fg='green', nl=True) scan_summary['indexing_time'] = indexing_time pool = None resources = resource_paths(input_path, diag, scans_cache_class, pre_scan_plugins=pre_scan_plugins) paths_with_error = [] files_count = 0 logfile_path = scans_cache_class().cache_files_log if on_linux: file_logger = partial(open, logfile_path, 'wb') else: file_logger = partial(codecs.open, logfile_path, 'w', encoding='utf-8') with file_logger() as logfile_fd: logged_resources = _resource_logger(logfile_fd, resources) scanit = partial(_scanit, scanners=scanners, scans_cache_class=scans_cache_class, diag=diag, timeout=timeout, processes=processes) max_file_name_len = compute_fn_max_len() # do not display a file name in progress bar if there is less than 5 chars available. display_fn = bool(max_file_name_len > 10) try: if processes: # maxtasksperchild helps with recycling processes in case of leaks pool = get_pool(processes=processes, maxtasksperchild=1000) # Using chunksize is documented as much more efficient in the Python doc. # Yet "1" still provides a better and more progressive feedback. # With imap_unordered, results are returned as soon as ready and out of order. scanned_files = pool.imap_unordered(scanit, logged_resources, chunksize=1) pool.close() else: # no multiprocessing with processes=0 scanned_files = imap(scanit, logged_resources) if not quiet: echo_stderr('Disabling multi-processing and multi-threading...', fg='yellow') if not quiet: echo_stderr('Scanning files...', fg='green') def scan_event(item): """Progress event displayed each time a file is scanned""" if quiet or not item or not display_fn: return '' _scan_success, _scanned_path = item _scanned_path = unicode(toascii(_scanned_path)) if verbose: _progress_line = _scanned_path else: _progress_line = fixed_width_file_name(_scanned_path, max_file_name_len) return style('Scanned: ') + style(_progress_line, fg=_scan_success and 'green' or 'red') scanning_errors = [] files_count = 0 with progressmanager( scanned_files, item_show_func=scan_event, show_pos=True, verbose=verbose, quiet=quiet, file=sys.stderr) as scanned: while True: try: result = scanned.next() scan_success, scanned_rel_path = result if not scan_success: paths_with_error.append(scanned_rel_path) files_count += 1 except StopIteration: break except KeyboardInterrupt: print('\nAborted with Ctrl+C!') if pool: pool.terminate() break finally: if pool: # ensure the pool is really dead to work around a Python 2.7.3 bug: # http://bugs.python.org/issue15101 pool.terminate() # TODO: add stats to results somehow # Compute stats ########################## scan_summary['files_count'] = files_count scan_summary['files_with_errors'] = paths_with_error total_time = time() - scan_start scanning_time = total_time - indexing_time scan_summary['total_time'] = total_time scan_summary['scanning_time'] = scanning_time files_scanned_per_second = round(float(files_count) / scanning_time , 2) scan_summary['files_scanned_per_second'] = files_scanned_per_second if not quiet: # Display stats ########################## echo_stderr('Scanning done.', fg=paths_with_error and 'red' or 'green') if paths_with_error: if diag: echo_stderr('Some files failed to scan properly:', fg='red') # iterate cached results to collect all scan errors cached_scan = scans_cache_class() root_dir = _get_root_dir(input_path, strip_root, full_root) scan_results = cached_scan.iterate(scans, root_dir, paths_subset=paths_with_error) for scan_result in scan_results: errored_path = scan_result.get('path', '') echo_stderr('Path: ' + errored_path, fg='red') for error in scan_result.get('scan_errors', []): for emsg in error.splitlines(False): echo_stderr(' ' + emsg) echo_stderr('') else: echo_stderr('Some files failed to scan properly. Use the --diag option for additional details:', fg='red') for errored_path in paths_with_error: echo_stderr(' ' + errored_path, fg='red') echo_stderr('Scan statistics: %(files_count)d files scanned in %(total_time)ds.' % locals()) echo_stderr('Scan options: %(_scans)s with %(processes)d process(es).' % locals()) echo_stderr('Scanning speed: %(files_scanned_per_second)s files per sec.' % locals()) echo_stderr('Scanning time: %(scanning_time)ds.' % locals()) echo_stderr('Indexing time: %(indexing_time)ds.' % locals(), reset=True) success = not paths_with_error # finally return an iterator on cached results cached_scan = scans_cache_class() root_dir = _get_root_dir(input_path, strip_root, full_root) return files_count, cached_scan.iterate(scans, root_dir), success
def extractcode(ctx, input, verbose, quiet, *args, **kwargs): # @ReservedAssignment """extract archives and compressed files found in the <input> file or directory tree. Use this command before scanning proper, as an <input> preparation step. Archives found inside an extracted archive are extracted recursively. Extraction is done in-place in a directory named '-extract' side-by-side with an archive. """ original_input = fileutils.as_posixpath(input) abs_input = fileutils.as_posixpath( os.path.abspath(os.path.expanduser(input))) # note: we inline functions so they can close on local variables def extract_start(): return style('Extracting archives...', fg='green') def extract_event(item): """ Display an extract event. """ if not item: return '' if verbose: if item.done: return '' line = utils.get_relative_path(original_input, abs_input, as_posixpath(item.source)) or '' else: line = fileutils.file_name(item.source) or '' return 'Extracting: %(line)s' % locals() def extract_end(): """ Display a summary of warnings and errors if any. """ has_warnings = False has_errors = False summary = [] for xev in extract_results: has_errors = has_errors or bool(xev.errors) has_warnings = has_warnings or bool(xev.warnings) source = as_posixpath(xev.source) source = utils.get_relative_path(original_input, abs_input, source) for e in xev.errors: summary.append( style('ERROR extracting: %(source)s: %(e)r' % locals(), fg='red', reset=False)) for warn in xev.warnings: summary.append( style('WARNING extracting: %(source)s: %(warn)r' % locals(), fg='yellow', reset=False)) summary_color = 'green' if has_warnings: summary_color = 'yellow' if has_errors: summary_color = 'red' summary.append(style('Extracting done.', fg=summary_color, reset=True)) return '\n'.join(summary) extract_results = [] has_extract_errors = False with utils.progressmanager( extract_archives(abs_input), item_show_func=extract_event, start_show_func=extract_start, finish_show_func=extract_end, verbose=verbose, quiet=quiet, ) as extraction_events: for xev in extraction_events: if xev.done and (xev.warnings or xev.errors): has_extract_errors = has_extract_errors or xev.errors extract_results.append(xev) rc = 1 if has_extract_errors else 0 ctx.exit(rc)
def scan( input_path, copyright=True, license=True, package=True, # @ReservedAssignment info=True, verbose=False, quiet=False): # @ReservedAssignment """ Do the scans proper, return results. """ # save paths to report paths relative to the original input original_input = fileutils.as_posixpath(input_path) abs_input = fileutils.as_posixpath( os.path.abspath(os.path.expanduser(input_path))) # note: "flag and function" expressions return the function if flag is True scanners = { 'copyrights': copyright and get_copyrights, 'licenses': license and get_licenses, 'packages': package and get_package_infos, 'infos': info and get_file_infos, } results = [] # note: we inline progress display functions to close on some args def scan_start(): """Progress event displayed at start of scan""" return style('Scanning files...', fg='green') def scan_event(item): """Progress event displayed each time a file is scanned""" if item: line = verbose and item or fileutils.file_name(item) or '' return 'Scanning: %(line)s' % locals() def scan_end(): """Progress event displayed at end of scan""" has_warnings = False has_errors = False summary = [] summary_color = 'green' summary_color = has_warnings and 'yellow' or summary_color summary_color = has_errors and 'red' or summary_color summary.append(style('Scanning done.', fg=summary_color, reset=True)) return '\n'.join(summary) ignored = partial(ignore.is_ignored, ignores=ignore.ignores_VCS, unignores={}) resources = fileutils.resource_iter(abs_input, ignored=ignored) with utils.progressmanager(resources, item_show_func=scan_event, start_show_func=scan_start, finish_show_func=scan_end, verbose=verbose, show_pos=True, quiet=quiet) as progressive_resources: for resource in progressive_resources: res = fileutils.as_posixpath(resource) # fix paths: keep the location as relative to the original input relative_path = utils.get_relative_path(original_input, abs_input, res) scan_result = OrderedDict(location=relative_path) # Should we yield instead? scan_result.update(scan_one(res, scanners)) results.append(scan_result) # TODO: eventually merge scans for the same resource location... # TODO: fix absolute paths as relative to original input argument... return results
def extractcode(ctx, input, verbose, quiet, *args, **kwargs): # @ReservedAssignment """extract archives and compressed files found in the <input> file or directory tree. Use this command before scanning proper, as an <input> preparation step. Archives found inside an extracted archive are extracted recursively. Extraction is done in-place in a directory named '-extract' side-by-side with an archive. """ original_input = fileutils.as_posixpath(input) abs_input = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(input))) # note: we inline functions so they can close on local variables def extract_start(): return style('Extracting archives...', fg='green') def extract_event(item): """ Display an extract event. """ if not item: return '' if verbose: if item.done: return '' line = utils.get_relative_path(original_input, abs_input, as_posixpath(item.source)) or '' else: line = fileutils.file_name(item.source) or '' return 'Extracting: %(line)s' % locals() def extract_end(): """ Display a summary of warnings and errors if any. """ has_warnings = False has_errors = False summary = [] for xev in extract_results: has_errors = has_errors or bool(xev.errors) has_warnings = has_warnings or bool(xev.warnings) source = as_posixpath(xev.source) source = utils.get_relative_path(original_input, abs_input, source) for e in xev.errors: summary.append(style('ERROR extracting: %(source)s: %(e)r' % locals(), fg='red', reset=False)) for warn in xev.warnings: summary.append(style('WARNING extracting: %(source)s: %(warn)r' % locals(), fg='yellow', reset=False)) summary_color = 'green' if has_warnings: summary_color = 'yellow' if has_errors: summary_color = 'red' summary.append(style('Extracting done.', fg=summary_color, reset=True)) return '\n'.join(summary) extract_results = [] has_extract_errors = False with utils.progressmanager(extract_archives(abs_input), item_show_func=extract_event, start_show_func=extract_start, finish_show_func=extract_end, verbose=verbose, quiet=quiet, ) as extraction_events: for xev in extraction_events: if xev.done and (xev.warnings or xev.errors): has_extract_errors = has_extract_errors or xev.errors extract_results.append(xev) rc = 1 if has_extract_errors else 0 ctx.exit(rc)