def get_results(codebase, as_list=False, **kwargs): """ Return an ordered mapping of scan results collected from a `codebase`. if `as_list` consume the "files" iterator in a list sequence. """ codebase.add_files_count_to_current_header() results = dict([('headers', codebase.get_headers()), ]) # add codebase toplevel attributes such as summaries if codebase.attributes: results.update(codebase.attributes.to_dict()) files = OutputPlugin.get_files(codebase, **kwargs) if as_list: files = list(files) results['files'] = files if TRACE: logger_debug('get_results: files') files = list(files) from pprint import pformat logger_debug(pformat(files)) return results
def write_results(codebase, output_file, pretty=False, **kwargs): """ Write headers, files, and other attributes from `codebase` to `output_file` Enable JSON indentation if `pretty` is True """ # Set indentation for JSON output if `pretty` is True # We use a separate dict for jsonstream kwargs since we are passing # this function's kwargs as arguments to OutputPlugin.get_files() if pretty: jsonstreams_kwargs = dict(indent=2, pretty=True) else: jsonstreams_kwargs = dict(indent=None, pretty=False) # If `output_file` is a path string, open the file at path `output_file` and use it as `output_file` close_fd = False if isinstance(output_file, str): output_file = open(output_file, 'w') close_fd = True # Begin wri'w' JSON to `output_file` with jsonstreams.Stream( jsonstreams.Type.OBJECT, fd=output_file, close_fd=close_fd, **jsonstreams_kwargs ) as s: # Write headers codebase.add_files_count_to_current_header() codebase_headers = codebase.get_headers() s.write('headers', codebase_headers) # Write attributes if codebase.attributes: for attribute_key, attribute_value in codebase.attributes.to_dict().items(): s.write(attribute_key, attribute_value) # Write files codebase_files = OutputPlugin.get_files(codebase, **kwargs) # OutputPlugin.get_files() returns a generator, not JSON-serializable codebase_files = list(codebase_files) s.write('files', codebase_files)
def build_copyright_paragraphs(codebase, **kwargs): """ Yield paragraphs built from the codebase. The codebase is assumed to contains license and copyright detections. """ codebase.add_files_count_to_current_header() header_para = CopyrightHeaderParagraph( format= 'https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/', # TODO: add some details, but not all these # comment=saneyaml.dump(codebase.get_headers()), comment=notice, ) yield header_para # TODO: create CopyrightLicenseParagraph for common licenses # TODO: group files that share copyright and license # TODO: infer files patternsas in decopy # for now this is dumb and will generate one paragraph per scanned file for scanned_file in OutputPlugin.get_files(codebase, **kwargs): if scanned_file['type'] == 'directory': continue dfiles = scanned_file['path'] dlicense = build_license(scanned_file) dcopyright = build_copyright_field(scanned_file) file_para = CopyrightFilesParagraph.from_dict( dict( files=dfiles, license=dlicense, copyright=dcopyright, )) yield file_para