class EmailScanner(ScanPlugin): """ Scan a Resource for emails. """ resource_attributes = dict(emails=attr.ib(default=attr.Factory(list))) sort_order = 8 options = [ CommandLineOption(('-e', '--email',), is_flag=True, default=False, help='Scan <input> for emails.', help_group=OTHER_SCAN_GROUP), CommandLineOption(('--max-email',), type=int, default=50, metavar='INT', show_default=True, required_options=['email'], help='Report only up to INT emails found in a file. Use 0 for no limit.', help_group=SCAN_OPTIONS_GROUP), ] def is_enabled(self, email, **kwargs): return email def get_scanner(self, max_email=50, **kwargs): from scancode.api import get_emails return partial(get_emails, threshold=max_email)
class PackageScanner(ScanPlugin): """ Scan a Resource for Package manifests and report these as "packages" at the right file or directory level. """ resource_attributes = OrderedDict() resource_attributes['packages'] = attr.ib(default=attr.Factory(list), repr=False) sort_order = 6 required_plugins = [ 'scan:licenses', ] options = [ CommandLineOption( ( '-p', '--package', ), is_flag=True, default=False, help='Scan <input> for package manifests and build scripts.', help_group=SCAN_GROUP, sort_order=20), CommandLineOption( ('--list-packages', ), is_flag=True, is_eager=True, callback=print_packages, help='Show the list of supported package types and exit.', help_group=DOC_GROUP), ] def is_enabled(self, package, **kwargs): return package def get_scanner(self, **kwargs): """ Return a scanner callable to scan a Resource for packages. """ from scancode.api import get_package_info return get_package_info def process_codebase(self, codebase, **kwargs): """ Set the package root given a package "type". """ if codebase.has_single_resource: # What if we scanned a single file and we do not have a root proper? return for resource in codebase.walk(topdown=False): set_packages_root(resource, codebase)
class ScanSummary(PostScanPlugin): """ Summarize a scan at the codebase level. """ sort_order = 10 codebase_attributes = dict(summary=attr.ib( default=attr.Factory(OrderedDict))) options = [ CommandLineOption( ('--summary', ), is_flag=True, default=False, help= 'Summarize license, copyright and other scans at the codebase level.', help_group=POST_SCAN_GROUP) ] def is_enabled(self, summary, **kwargs): return summary def process_codebase(self, codebase, summary, **kwargs): if TRACE_LIGHT: logger_debug('ScanSummary:process_codebase') summarize_codebase(codebase, keep_details=False, **kwargs)
class ScanKeyFilesSummary(PostScanPlugin): """ Summarize a scan at the codebase level for only key files. """ sort_order = 150 # mapping of summary data at the codebase level for key files codebase_attributes = dict(summary_of_key_files=attr.ib( default=attr.Factory(OrderedDict))) options = [ CommandLineOption( ('--summary-key-files', ), is_flag=True, default=False, help='Summarize license, copyright and other scans for key, ' 'top-level files. Key files are top-level codebase files such ' 'as COPYING, README and package manifests as reported by the ' '--classify option "is_legal", "is_readme", "is_manifest" ' 'and "is_top_level" flags.', help_group=POST_SCAN_GROUP, required_options=['classify', 'summary']) ] def is_enabled(self, summary_key_files, **kwargs): return summary_key_files def process_codebase(self, codebase, summary_key_files, **kwargs): summarize_codebase_key_files(codebase, **kwargs)
class ScanSummaryWithDetails(PostScanPlugin): """ Summarize a scan at the codebase level and keep file and directory details. """ # mapping of summary data at the codebase level for the whole codebase codebase_attributes = dict(summary=attr.ib( default=attr.Factory(OrderedDict))) # store summaries at the file and directory level in this attribute when # keep details is True resource_attributes = dict(summary=attr.ib( default=attr.Factory(OrderedDict))) sort_order = 100 options = [ CommandLineOption( ('--summary-with-details', ), is_flag=True, default=False, help= 'Summarize license, copyright and other scans at the codebase level, ' 'keeping intermediate details at the file and directory level.', help_group=POST_SCAN_GROUP) ] def is_enabled(self, summary_with_details, **kwargs): return summary_with_details def process_codebase(self, codebase, summary_with_details, **kwargs): summarize_codebase(codebase, keep_details=True, **kwargs)
class GeneratedCodeDetector(ScanPlugin): """ Tag a file as generated. """ resource_attributes = dict(is_generated=Boolean( help='True if this file is likely an automatically generated file.')) sort_order = 50 options = [ CommandLineOption( ('--generated', ), is_flag=True, default=False, help='Classify automatically generated code files with a flag.', help_group=OTHER_SCAN_GROUP, sort_order=50, ) ] def is_enabled(self, generated, **kwargs): return generated def get_scanner(self, **kwargs): return generated_scanner
class KeywordsLinesScanner(ScanPlugin): """ Scan the number of lines of code and lines of the keywords """ resource_attributes = OrderedDict( codelines=attr.ib(default=attr.Factory(int), repr=False), keywordsline=attr.ib(default=attr.Factory(int), repr=False), matchedlines=attr.ib(default=attr.Factory(list), repr=False), ) options = [ CommandLineOption(('--keyword-scan', ), type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, path_type=PATH_TYPE), metavar='FILE', help='Use this yml file to read the keywords', help_group=SCAN_GROUP, sort_order=100), ] def is_enabled(self, keyword_scan, **kwargs): return keyword_scan def get_scanner(self, **kwargs): return partial(get_keywordsscan, keyword_scan=kwargs['keyword_scan'])
class KeywordsLinesScanner(ScanPlugin): """ Scan the number of lines of code and lines of the keywords """ resource_attributes = OrderedDict( codelines=attr.ib(default=attr.Factory(int), repr=False), keywordsline=attr.ib(default=attr.Factory(int), repr=False), matchedlines=attr.ib(default=attr.Factory(list), repr=False), ) options = [ CommandLineOption( ('--keywordsscan', ), is_flag=True, default=False, help=' Scan the number of lines of code and search for keywords.', help_group=SCAN_GROUP, sort_order=100), ] def is_enabled(self, keywordsscan, **kwargs): return keywordsscan def get_scanner(self, **kwargs): return get_keywordsscan
class ScanByFacetSummary(PostScanPlugin): """ Summarize a scan at the codebase level groupping by facets. """ sort_order = 200 codebase_attributes = dict(summary_by_facet=attr.ib( default=attr.Factory(list))) options = [ CommandLineOption( ('--summary-by-facet', ), is_flag=True, default=False, help='Summarize license, copyright and other scans and group the ' 'results by facet.', help_group=POST_SCAN_GROUP, required_options=['facet', 'summary']) ] def is_enabled(self, summary_by_facet, **kwargs): return summary_by_facet def process_codebase(self, codebase, summary_by_facet, **kwargs): if TRACE_LIGHT: logger_debug('ScanByFacetSummary:process_codebase') summarize_codebase_by_facet(codebase, **kwargs)
class LicenseClarityScore(PostScanPlugin): """ Compute a License clarity score at the codebase level. """ codebase_attributes = dict(license_clarity_score=Mapping( help='Computed license clarity score as mapping containing the score ' 'proper and each scoring elements.')) sort_order = 110 options = [ CommandLineOption( ('--license-clarity-score', ), is_flag=True, default=False, help= 'Compute a summary license clarity score at the codebase level.', help_group=POST_SCAN_GROUP, required_options=['classify', 'license', 'copyright'], ) ] def is_enabled(self, license_clarity_score, **kwargs): return license_clarity_score def process_codebase(self, codebase, license_clarity_score, **kwargs): if TRACE: logger_debug('LicenseClarityScore:process_codebase') scoring_elements = compute_license_score(codebase, **kwargs) codebase.attributes.license_clarity_score.update(scoring_elements)
class CopyrightScanner(ScanPlugin): """ Scan a Resource for copyrights. """ resource_attributes = OrderedDict( copyrights=attr.ib(default=attr.Factory(list)), holders=attr.ib(default=attr.Factory(list)), authors=attr.ib(default=attr.Factory(list)), ) sort_order = 4 options = [ CommandLineOption(('-c', '--copyright',), is_flag=True, default=False, help='Scan <input> for copyrights.', help_group=SCAN_GROUP, sort_order=50), ] def is_enabled(self, copyright, **kwargs): # NOQA return copyright def get_scanner(self, **kwargs): from scancode.api import get_copyrights return get_copyrights
class SpdxRdfOutput(OutputPlugin): options = [ CommandLineOption(('--spdx-rdf', ), type=FileOptionType(lazy=True, mode='w', encoding='utf-8'), metavar='FILE', help='Write scan output as SPDX RDF to FILE.', help_group=OUTPUT_GROUP) ] def is_enabled(self, spdx_rdf, **kwargs): return spdx_rdf def process_codebase(self, codebase, spdx_rdf, **kwargs): check_sha1(codebase) files = self.get_files(codebase, **kwargs) header = codebase.get_or_create_current_header() tool_name = header.tool_name tool_version = header.tool_version notice = header.notice input = kwargs.get('input', '') # NOQA write_spdx(spdx_rdf, files, tool_name, tool_version, notice, input, as_tagvalue=False)
class JsonPrettyOutput(OutputPlugin): options = [ CommandLineOption( ( '--json-pp', 'output_json_pp', ), type=FileOptionType(mode='wb', lazy=True), metavar='FILE', help='Write scan output as pretty-printed JSON to FILE.', help_group=OUTPUT_GROUP, sort_order=10), ] def is_enabled(self, output_json_pp, **kwargs): return output_json_pp def process_codebase(self, codebase, output_json_pp, **kwargs): files = self.get_files(codebase, **kwargs) write_json(codebase, files, output_file=output_json_pp, pretty=True, **kwargs)
class RedundantCluesFilter(PostScanPlugin): """ Filter redundant clues (copyrights, authors, emails, and urls) that are already contained in another more important scan result. """ sort_order = 1 options = [ CommandLineOption( ('--filter-clues', ), is_flag=True, default=False, help='Filter redundant duplicated clues already contained in ' 'detected license and copyright texts and notices.', help_group=POST_SCAN_GROUP) ] def is_enabled(self, filter_clues, **kwargs): return filter_clues def process_codebase(self, codebase, **kwargs): """ Update detected clues to remove redundant clues already found in another detected clue for all the resources of codebase. """ if TRACE: logger_debug('RedundantFilter:process_codebase') from licensedcode.cache import get_index rules_by_id = {r.identifier: r for r in get_index().rules_by_rid} for resource in codebase.walk(): filtered = filter_ignorable_resource_clues(resource, rules_by_id) if filtered: filtered.save(codebase)
class IgnoreCopyrights(OutputFilterPlugin): """ Filter findings that match given copyright holder or author patterns. Has no effect unless the --copyright scan is requested. """ options = [ CommandLineOption( ('--ignore-copyright-holder', ), multiple=True, metavar='<pattern>', help='Ignore a file (and all its findings) if a copyright holder ' 'contains a match to the <pattern> regular expression. ' 'Note that this will ignore a file even if it has other scanned ' 'data such as a license or errors.', help_group=OUTPUT_FILTER_GROUP), CommandLineOption( ('--ignore-author', ), multiple=True, metavar='<pattern>', help='Ignore a file (and all its findings) if an author ' 'contains a match to the <pattern> regular expression. ' 'Note that this will ignore a file even if it has other findings ' 'such as a license or errors.', help_group=OUTPUT_FILTER_GROUP) ] def is_enabled(self, ignore_copyright_holder, ignore_author, **kwargs): # NOQA return bool(ignore_copyright_holder or ignore_author) def process_codebase(self, codebase, ignore_copyright_holder, ignore_author, **kwargs): ignored_holders = [re.compile(r) for r in ignore_copyright_holder] ignored_authors = [re.compile(r) for r in ignore_author] for resource in codebase.walk(): holders = set(c['value'] for c in getattr(resource, 'holders', [])) authors = set(c['value'] for c in getattr(resource, 'authors', [])) if TRACE: logger_debug('holders:', holders) logger_debug('authors:', authors) if is_ignored(ignored_holders, holders) or is_ignored( ignored_authors, authors): resource.is_filtered = True codebase.save_resource(resource)
class IsLicenseText(PostScanPlugin): """ Set the "is_license_text" flag to true for at the file level for text files that contain mostly (as 90% of their size) license texts or notices. Has no effect unless --license, --license-text and --info scan data are available. """ resource_attributes = dict( is_license_text=attr.ib(default=False, type=bool, repr=False)) sort_order = 80 options = [ CommandLineOption( ('--is-license-text', ), is_flag=True, default=False, required_options=['info', 'license_text'], help='Set the "is_license_text" flag to true for files that contain ' 'mostly license texts and notices (e.g over 90% of the content). [EXPERIMENTAL]', help_group=POST_SCAN_GROUP) ] def is_enabled(self, is_license_text, **kwargs): return is_license_text def process_codebase(self, codebase, is_license_text, **kwargs): """ Set the `is_license_text` to True for files taht contain over 90% of detected license texts. """ for resource in codebase.walk(): if not resource.is_text: continue # keep unique texts/line ranges since we repeat this for each matched licenses license_texts = set( (lic['matched_text'], lic['start_line'], lic['end_line'], lic.get('matched_rule', {}).get('match_coverage', 0)) for lic in resource.licenses) # use coverage to skew the actual matched length license_texts_size = 0 for txt, _, _, cov in license_texts: # these are the meta characters used t mark non matched parts txt = txt.replace('[', '').replace(']', '') license_texts_size += len(txt) * (cov / 100) if TRACE: logger_debug('IsLicenseText: license size:', license_texts_size, 'size:', resource.size, 'license_texts_size >= (resource.size * 0.9)', license_texts_size >= (resource.size * 0.9), 'resource.size * 0.9:', resource.size * 0.9) if license_texts_size >= (resource.size * 0.9): resource.is_license_text = True resource.save(codebase)
class LicenceModifications(PostScanPlugin): """ Add the "licence_modifications" attribute to a resouce if it does not contain any license """ resource_attributes = dict(licence_modifications=attr.ib( default=attr.Factory(dict))) options = [ CommandLineOption( ('--licence-modifications', ), is_flag=True, default=False, help='Generate a list of files in case of modified license', help_group=POST_SCAN_GROUP), ] def is_enabled(self, licence_modifications, **kwargs): return licence_modifications def process_codebase(self, codebase, licence_modifications, **kwargs): """ Populate a licence_modifications mapping with license modification text """ if not self.is_enabled(licence_modifications): return for resource in codebase.walk(topdown=True): if not resource.is_file: continue try: licence_score_match = set( [entry.get('score') for entry in resource.licenses]) except AttributeError: # add licence_modifications regardless if there is license modification info or not logger.info( "Adding licence_modifications regardless if there is license modification info or not" ) resource.licence_modifications = {} codebase.save_resource(resource) continue for licensemodification in licence_score_match: if licensemodification != '100.0': modification_score = 100.00 - licensemodification if modification_score != 0.0: resource.licence_modifications = { "modinfo": "license is %s percent modified " % (modification_score) } codebase.save_resource(resource)
class AddFacet(PreScanPlugin): """ Assign one or more "facet" to each file (and NOT to directories). Facets are a way to qualify that some part of the scanned code may be core code vs. test vs. data, etc. """ resource_attributes = dict( facets=attr.ib(default=attr.Factory(list), repr=False)) sort_order = 20 options = [ CommandLineOption( ('--facet', ), multiple=True, metavar='<facet>=<pattern>', callback=validate_facets, help='Add the <facet> to files with a path matching <pattern>.', help_group=PRE_SCAN_GROUP, sort_order=80, ) ] def is_enabled(self, facet, **kwargs): if TRACE: logger_debug('is_enabled: facet:', facet) return bool(facet) def process_codebase(self, codebase, facet=(), **kwargs): """ Add facets to file resources using the `facet` definition of facets. Each entry in the `facet` sequence is a string as in <facet>:<pattern> """ if not facet: return facet_definitions, _invalid_facet_definitions = build_facets(facet) if TRACE: logger_debug('facet_definitions:', facet_definitions) # Walk the codebase and set the facets for each file (and only files) for resource in codebase.walk(topdown=True): if not resource.is_file: continue facets = compute_path_facets(resource.path, facet_definitions) if facets: resource.facets = facets else: resource.facets = [FACET_CORE] resource.save(codebase)
class CustomTemplateOutput(OutputPlugin): options = [ CommandLineOption(('--custom-output', ), type=FileOptionType(mode='w', encoding='utf-8', lazy=True), required_options=['custom_template'], metavar='FILE', help='Write scan output to FILE formatted with ' 'the custom Jinja template file.', help_group=OUTPUT_GROUP, sort_order=60), CommandLineOption( ('--custom-template', ), type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, path_type=PATH_TYPE), required_options=['custom_output'], metavar='FILE', help='Use this Jinja template FILE as a custom template.', help_group=OUTPUT_GROUP, sort_order=65), ] def is_enabled(self, custom_output, custom_template, **kwargs): return custom_output and custom_template def process_codebase(self, codebase, custom_output, custom_template, **kwargs): results = self.get_files(codebase, **kwargs) version = codebase.get_or_create_current_header().tool_version if on_linux: custom_template = fsencode(custom_template) template_loc = custom_template output_file = custom_output write_templated(output_file, results, version, template_loc)
class OnlyLicensesTitles(PostScanPlugin): """ Add the "only_licenses_titles" attribute to a resouce if it does not contain any license """ resource_attributes = dict(only_licenses_titles=attr.ib( default=attr.Factory(dict))) sort_order = 9 options = [ CommandLineOption( ('--only-licenses-titles', ), is_flag=True, default=False, help='Generate a list of files with only license titles', help_group=POST_SCAN_GROUP) ] def is_enabled(self, only_licenses_titles, **kwargs): return only_licenses_titles def process_codebase(self, codebase, only_licenses_titles, **kwargs): """ Populate a only_licenses_titles only_licenses_titles mapping """ if not self.is_enabled(only_licenses_titles): return for resource in codebase.walk(topdown=True): if not resource.is_file: continue try: resource_start_line = set( [entry.get('start_line') for entry in resource.licenses]) resource_end_line = set( [entry.get('end_line') for entry in resource.licenses]) except AttributeError: resource.only_licenses_titles = {} codebase.save_resource(resource) continue for singlelinetitles in resource_start_line: resource.only_licenses_titles = { "LineStart": resource_start_line, "LineEnd": resource_end_line } codebase.save_resource(resource)
class ProcessIgnore(PreScanPlugin): """ Ignore files matching the supplied pattern. """ options = [ CommandLineOption(('--ignore', ), multiple=True, metavar='<pattern>', help='Ignore files matching <pattern>.', sort_order=10, help_group=PRE_SCAN_GROUP) ] def is_enabled(self, ignore, **kwargs): return ignore def process_codebase(self, codebase, ignore=(), **kwargs): """ Remove ignored Resources from the resource tree. """ if not ignore: return ignores = { pattern: 'User ignore: Supplied by --ignore' for pattern in ignore } ignorable = partial(is_ignored, ignores=ignores) rids_to_remove = [] remove_resource = codebase.remove_resource # First, walk the codebase from the top-down and collect the rids of # Resources that can be removed. for resource in codebase.walk(topdown=True): if ignorable(resource.path): for child in resource.children(codebase): rids_to_remove.append(child.rid) rids_to_remove.append(resource.rid) # Then, walk bottom-up and remove the ignored Resources from the # Codebase if the Resource's rid is in our list of rid's to remove. for resource in codebase.walk(topdown=False): resource_rid = resource.rid if resource_rid in rids_to_remove: rids_to_remove.remove(resource_rid) remove_resource(resource)
class NoLicenses(PostScanPlugin): """ Add the "no_licenses" attribute to a resouce if it does not contain any license """ resource_attributes = dict(no_licenses=attr.ib(default=attr.Factory(dict))) sort_order = 9 options = [ CommandLineOption(('--no-licenses', ), is_flag=True, default=False, help='Generate a list of no licences files', help_group=POST_SCAN_GROUP), ] def is_enabled(self, no_licenses, **kwargs): return no_licenses def process_codebase(self, codebase, no_licenses, **kwargs): """ Populate a no_license mapping with four attributes: filename, label, icon, and color_code at the File Resource level. """ if not self.is_enabled(no_licenses): return for resource in codebase.walk(topdown=True): if not resource.is_file: continue try: resource_no_licenses = set( [entry.get('short_name') for entry in resource.licenses]) except AttributeError: # add no_licenses regardless if there is license info or not logger.dubug( "add no_licenses regardless if there is license info or not" ) resource.no_licenses = {} codebase.save_resource(resource) continue for license in resource_no_licenses: if license: resource.no_licenses = "%s is Present" % (license) codebase.save_resource(resource)
class MarkSource(PostScanPlugin): """ Set the "is_source" flag to true for directories that contain over 90% of source files as direct children. Has no effect unless the --info scan is requested. """ resource_attributes = dict( source_count=attr.ib(default=0, type=int, repr=False)) sort_order = 8 options = [ CommandLineOption( ('--mark-source', ), is_flag=True, default=False, required_options=['info'], help='Set the "is_source" to true for directories that contain ' 'over 90% of source files as children and descendants. ' 'Count the number of source files in a directory as a new source_file_counts attribute', help_group=POST_SCAN_GROUP) ] def is_enabled(self, mark_source, info, **kwargs): return mark_source and info def process_codebase(self, codebase, mark_source, **kwargs): """ Set the `is_source` to True in directories if they contain over 90% of source code files at full depth. """ for resource in codebase.walk(topdown=False): if resource.is_file: continue children = resource.children(codebase) if not children: continue src_count = sum(1 for c in children if c.is_file and c.is_source) src_count += sum(c.source_count for c in children if not c.is_file) is_source = is_source_directory(src_count, resource.files_count) if src_count and is_source: resource.is_source = is_source resource.source_count = src_count codebase.save_resource(resource)
class JsonLinesOutput(OutputPlugin): options = [ CommandLineOption(( '--json-lines', 'output_json_lines', ), type=FileOptionType(mode=mode, lazy=True), metavar='FILE', help='Write scan output as JSON Lines to FILE.', help_group=OUTPUT_GROUP, sort_order=15), ] def is_enabled(self, output_json_lines, **kwargs): return output_json_lines # TODO: reuse the json output code and merge that in a single plugin def process_codebase(self, codebase, output_json_lines, **kwargs): # NOTE: we write as binary, not text files = self.get_files(codebase, **kwargs) codebase.add_files_count_to_current_header() headers = OrderedDict(headers=codebase.get_headers()) simplejson_kwargs = dict(iterable_as_array=True, encoding='utf-8', separators=( comma, colon, )) output_json_lines.write(simplejson.dumps(headers, **simplejson_kwargs)) output_json_lines.write(eol) for name, value in codebase.attributes.to_dict().items(): if value: smry = {name: value} output_json_lines.write( simplejson.dumps(smry, **simplejson_kwargs)) output_json_lines.write(eol) for scanned_file in files: scanned_file_line = {file_key: [scanned_file]} output_json_lines.write( simplejson.dumps(scanned_file_line, **simplejson_kwargs)) output_json_lines.write(eol)
class JsonCompactOutput(OutputPlugin): options = [ CommandLineOption(('--json', 'output_json',), type=FileOptionType(mode='wb', lazy=True), metavar='FILE', help='Write scan output as compact JSON to FILE.', help_group=OUTPUT_GROUP, sort_order=10), ] def is_enabled(self, output_json, **kwargs): return output_json def process_codebase(self, codebase, output_json, **kwargs): results = get_results(codebase, as_list=False, **kwargs) write_json(results, output_file=output_json, pretty=False)
class CsvOutput(OutputPlugin): options = [ CommandLineOption(('--csv', ), type=FileOptionType(mode='wb', lazy=True), metavar='FILE', help='Write scan output as CSV to FILE.', help_group=OUTPUT_GROUP, sort_order=30), ] def is_enabled(self, csv, **kwargs): return csv def process_codebase(self, codebase, csv, **kwargs): results = self.get_files(codebase, **kwargs) write_csv(results, csv)
class FingerprintScanner(ScanPlugin): """ Scan a file Resource to generate fingerprint. """ resource_attributes = dict(fingerprint=attr.ib(default=None, repr=False)) sort_order = 1 options = [ CommandLineOption(('-f', '--fingerprint'), is_flag=True, default=False, help='Scan <input> to generate simhash fingerprints for similarity matching.', help_group=OTHER_SCAN_GROUP) ] def is_enabled(self, fingerprint, **kwargs): return fingerprint def get_scanner(self, **kwargs): return get_fingerprint
class HtmlOutput(OutputPlugin): options = [ CommandLineOption(('--html',), type=FileOptionType(mode='w', encoding='utf-8', lazy=True), metavar='FILE', help='Write scan output as HTML to FILE.', help_group=OUTPUT_GROUP, sort_order=50), ] def is_enabled(self, html, **kwargs): return html def process_codebase(self, codebase, html, **kwargs): results = self.get_files(codebase, **kwargs) version = codebase.get_or_create_current_header().tool_version template_loc = join(TEMPLATES_DIR, 'html', 'template.html') output_file = html write_templated(output_file, results, version, template_loc)
class InfoScanner(ScanPlugin): """ Scan a file Resource for miscellaneous information such as mime/filetype and basic checksums. """ resource_attributes = OrderedDict([ ('date', attr.ib(default=None, repr=False)), ('sha1', attr.ib(default=None, repr=False)), ('md5', attr.ib(default=None, repr=False)), ('sha256', attr.ib(default=None, repr=False)), ('mime_type', attr.ib(default=None, repr=False)), ('file_type', attr.ib(default=None, repr=False)), ('programming_language', attr.ib(default=None, repr=False)), ('is_binary', attr.ib(default=False, type=bool, repr=False)), ('is_text', attr.ib(default=False, type=bool, repr=False)), ('is_archive', attr.ib(default=False, type=bool, repr=False)), ('is_media', attr.ib(default=False, type=bool, repr=False)), ('is_source', attr.ib(default=False, type=bool, repr=False)), ('is_script', attr.ib(default=False, type=bool, repr=False)), ]) sort_order = 0 options = [ CommandLineOption( ('-i', '--info'), is_flag=True, default=False, help='Scan <input> for file information (size, checksums, etc).', help_group=OTHER_SCAN_GROUP, sort_order=10) ] def is_enabled(self, info, **kwargs): return info def get_scanner(self, **kwargs): from scancode.api import get_file_info return get_file_info
class LKMClueScanner(ScanPlugin): """ Scan lkm-clue information from the resource. """ resource_attributes = OrderedDict(lkm_clue=attr.ib( default=attr.Factory(OrderedDict), repr=False), ) options = [ CommandLineOption( ('--lkmclue', ), is_flag=True, default=False, help= 'Collect LKM module clues and type indicating a possible Linux Kernel Module. (formerly lkm_hint and lkm_line).', help_group=SCAN_GROUP, sort_order=100), ] def is_enabled(self, lkmclue, **kwargs): return lkmclue def get_scanner(self, **kwargs): return get_lkm_clues