Пример #1
0
class UrlScanner(ScanPlugin):
    """
    Scan a Resource for URLs.
    """

    resource_attributes = dict(urls=attr.ib(default=attr.Factory(list)))

    sort_order = 10

    options = [
        PluggableCommandLineOption(('-u', '--url',),
            is_flag=True, default=False,
            help='Scan <input> for urls.',
            help_group=OTHER_SCAN_GROUP),

        PluggableCommandLineOption(('--max-url',),
            type=int, default=50,
            metavar='INT',
            required_options=['url'],
            show_default=True,
            help='Report only up to INT urls found in a file. Use 0 for no limit.',
            help_group=SCAN_OPTIONS_GROUP),
    ]

    def is_enabled(self, url, **kwargs):
        return url

    def get_scanner(self, max_url=50, **kwargs):
        from scancode.api import get_urls
        return partial(get_urls, threshold=max_url)
Пример #2
0
class EmailScanner(ScanPlugin):
    """
    Scan a Resource for emails.
    """
    resource_attributes = dict(emails=attr.ib(default=attr.Factory(list)))

    sort_order = 8

    options = [
        PluggableCommandLineOption(('-e', '--email',),
            is_flag=True, default=False,
            help='Scan <input> for emails.',
            help_group=OTHER_SCAN_GROUP),

        PluggableCommandLineOption(('--max-email',),
            type=int, default=50,
            metavar='INT',
            show_default=True,
            required_options=['email'],
            help='Report only up to INT emails found in a file. Use 0 for no limit.',
            help_group=SCAN_OPTIONS_GROUP),
    ]

    def is_enabled(self, email, **kwargs):
        return email

    def get_scanner(self, max_email=50, test_slow_mode=False, test_error_mode=False, **kwargs):
        from scanengine.api import get_emails
        return partial(
            get_emails,
            threshold=max_email,
            test_slow_mode=test_slow_mode,
            test_error_mode=test_error_mode
        )
class PackageScanner(ScanPlugin):
    """
    Scan a Resource for Package manifests and report these as "packages" at the
    right file or directory level.
    """

    resource_attributes = OrderedDict()
    resource_attributes['packages'] = attr.ib(default=attr.Factory(list),
                                              repr=False)

    sort_order = 6

    required_plugins = [
        'scan:licenses',
    ]

    options = [
        PluggableCommandLineOption(
            (
                '-p',
                '--package',
            ),
            is_flag=True,
            default=False,
            help='Scan <input> for package manifests and build scripts.',
            help_group=SCAN_GROUP,
            sort_order=20),
        PluggableCommandLineOption(
            ('--list-packages', ),
            is_flag=True,
            is_eager=True,
            callback=print_packages,
            help='Show the list of supported package types and exit.',
            help_group=DOC_GROUP),
    ]

    def is_enabled(self, package, **kwargs):
        return package

    def get_scanner(self, **kwargs):
        """
        Return a scanner callable to scan a Resource for packages.
        """
        from scancode.api import get_package_info
        return get_package_info

    def process_codebase(self, codebase, **kwargs):
        """
        Set the package root given a package "type".
        """
        if codebase.has_single_resource:
            # What if we scanned a single file and we do not have a root proper?
            return

        for resource in codebase.walk(topdown=False):
            set_packages_root(resource, codebase)
Пример #4
0
class IgnoreCopyrights(OutputFilterPlugin):
    """
    Filter findings that match given copyright holder or author patterns.
    Has no effect unless the --copyright scan is requested.
    """

    options = [
        PluggableCommandLineOption(
            ('--ignore-copyright-holder', ),
            multiple=True,
            metavar='<pattern>',
            help='Ignore a file (and all its findings) if a copyright holder '
            'contains a match to the <pattern> regular expression. '
            'Note that this will ignore a file even if it has other scanned '
            'data such as a license or errors.',
            help_group=OUTPUT_FILTER_GROUP),
        PluggableCommandLineOption(
            ('--ignore-author', ),
            multiple=True,
            metavar='<pattern>',
            help='Ignore a file (and all its findings) if an author '
            'contains a match to the <pattern> regular expression. '
            'Note that this will ignore a file even if it has other findings '
            'such as a license or errors.',
            help_group=OUTPUT_FILTER_GROUP)
    ]

    def is_enabled(self, ignore_copyright_holder, ignore_author,
                   **kwargs):  # NOQA
        return bool(ignore_copyright_holder or ignore_author)

    def process_codebase(self, codebase, ignore_copyright_holder,
                         ignore_author, **kwargs):
        ignored_holders = [re.compile(r) for r in ignore_copyright_holder]
        ignored_authors = [re.compile(r) for r in ignore_author]

        for resource in codebase.walk():
            holders = set(c['holder']
                          for c in getattr(resource, 'holders', []))
            authors = set(c['author']
                          for c in getattr(resource, 'authors', []))
            if TRACE:
                logger_debug('holders:', holders)
                logger_debug('authors:', authors)

            if is_ignored(ignored_holders, holders) or is_ignored(
                    ignored_authors, authors):
                resource.is_filtered = True
                codebase.save_resource(resource)
Пример #5
0
class DebianCopyrightOutput(OutputPlugin):

    options = [
        PluggableCommandLineOption(
            (
                '--debian',
                'output_debian',
            ),
            type=FileOptionType(mode='w', lazy=True),
            metavar='FILE',
            help=
            'Write scan output in machine-readable Debian copyright format to FILE.',
            help_group=OUTPUT_GROUP,

            # this is temporary , we should not needed these options explicitly
            # but instead adapt to the available data
            required_options=['copyright', 'license', 'license_text'],
            sort_order=60),
    ]

    def is_enabled(self, output_debian, **kwargs):
        return output_debian

    def process_codebase(self, codebase, output_debian, **kwargs):
        debian_copyright = build_debian_copyright(codebase, **kwargs)
        write_debian_copyright(
            debian_copyright=debian_copyright,
            output_file=output_debian,
        )
Пример #6
0
class SpdxRdfOutput(OutputPlugin):

    options = [
        PluggableCommandLineOption(
            ('--spdx-rdf', ),
            type=FileOptionType(mode='w', encoding='utf-8', lazy=True),
            metavar='FILE',
            help='Write scan output as SPDX RDF to FILE.',
            help_group=OUTPUT_GROUP)
    ]

    def is_enabled(self, spdx_rdf, **kwargs):
        return spdx_rdf

    def process_codebase(self, codebase, spdx_rdf, **kwargs):
        check_sha1(codebase)
        files = self.get_files(codebase, **kwargs)
        header = codebase.get_or_create_current_header()
        tool_name = header.tool_name
        tool_version = header.tool_version
        notice = header.notice
        input = kwargs.get('input', '')  # NOQA

        write_spdx(spdx_rdf,
                   files,
                   tool_name,
                   tool_version,
                   notice,
                   input,
                   as_tagvalue=False)
Пример #7
0
class ScanByFacetSummary(PostScanPlugin):
    """
    Summarize a scan at the codebase level groupping by facets.
    """
    sort_order = 200
    codebase_attributes = dict(summary_by_facet=attr.ib(
        default=attr.Factory(list)))

    options = [
        PluggableCommandLineOption(
            ('--summary-by-facet', ),
            is_flag=True,
            default=False,
            help='Summarize license, copyright and other scans and group the '
            'results by facet.',
            help_group=POST_SCAN_GROUP,
            required_options=['facet', 'summary'])
    ]

    def is_enabled(self, summary_by_facet, **kwargs):
        return summary_by_facet

    def process_codebase(self, codebase, summary_by_facet, **kwargs):
        if TRACE_LIGHT: logger_debug('ScanByFacetSummary:process_codebase')
        summarize_codebase_by_facet(codebase, **kwargs)
class RedundantCluesFilter(PostScanPlugin):
    """
    Filter redundant clues (copyrights, authors, emails, and urls) that are already
    contained in another more important scan result.
    """
    sort_order = 1

    options = [
        PluggableCommandLineOption(
            ('--filter-clues', ),
            is_flag=True,
            default=False,
            help='Filter redundant duplicated clues already contained in '
            'detected license and copyright texts and notices.',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, filter_clues, **kwargs):
        return filter_clues

    def process_codebase(self, codebase, **kwargs):
        """
        Update detected clues to remove redundant clues already found in another
        detected clue for all the resources of codebase.
        """
        if TRACE: logger_debug('RedundantFilter:process_codebase')

        from licensedcode.cache import get_index

        rules_by_id = {r.identifier: r for r in get_index().rules_by_rid}

        for resource in codebase.walk():
            filtered = filter_ignorable_resource_clues(resource, rules_by_id)
            if filtered:
                filtered.save(codebase)
Пример #9
0
class Tallies(PostScanPlugin):
    """
    Compute tallies for license, copyright and other scans at the codebase level
    """
    sort_order = 10

    codebase_attributes = dict(tallies=attr.ib(default=attr.Factory(dict)))

    options = [
        PluggableCommandLineOption(
            ('--tallies', ),
            is_flag=True,
            default=False,
            help=
            'Compute tallies for license, copyright and other scans at the codebase level.',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, tallies, **kwargs):
        return tallies

    def process_codebase(self, codebase, tallies, **kwargs):
        if TRACE_LIGHT: logger_debug('Tallies:process_codebase')
        tallies = compute_codebase_tallies(codebase,
                                           keep_details=False,
                                           **kwargs)
        codebase.attributes.tallies.update(tallies)
Пример #10
0
class ScanKeyFilesSummary(PostScanPlugin):
    """
    Summarize a scan at the codebase level for only key files.
    """
    sort_order = 150

    # mapping of summary data at the codebase level for key files
    codebase_attributes = dict(summary_of_key_files=attr.ib(
        default=attr.Factory(dict)))

    options = [
        PluggableCommandLineOption(
            ('--summary-key-files', ),
            is_flag=True,
            default=False,
            help='Summarize license, copyright and other scans for key, '
            'top-level files. Key files are top-level codebase files such '
            'as COPYING, README and package manifests as reported by the '
            '--classify option "is_legal", "is_readme", "is_manifest" '
            'and "is_top_level" flags.',
            help_group=POST_SCAN_GROUP,
            required_options=['classify', 'summary'])
    ]

    def is_enabled(self, summary_key_files, **kwargs):
        return summary_key_files

    def process_codebase(self, codebase, summary_key_files, **kwargs):
        summarize_codebase_key_files(codebase, **kwargs)
Пример #11
0
class LicenseClarityScore(PostScanPlugin):
    """
    Compute a License clarity score at the codebase level.
    """
    codebase_attributes = dict(license_clarity_score=Mapping(
        help='Computed license clarity score as mapping containing the score '
             'proper and each scoring elements.'))

    sort_order = 110

    options = [
        PluggableCommandLineOption(('--license-clarity-score',),
            is_flag=True,
            default=False,
            help='Compute a summary license clarity score at the codebase level.',
            help_group=POST_SCAN_GROUP,
            required_options=[
                'classify',
            ],
        )
    ]

    def is_enabled(self, license_clarity_score, **kwargs):
        return license_clarity_score

    def process_codebase(self, codebase, license_clarity_score, **kwargs):
        if TRACE:
            logger_debug('LicenseClarityScore:process_codebase')
        scoring_elements = compute_license_score(codebase)
        codebase.attributes.license_clarity_score.update(scoring_elements)
Пример #12
0
class CycloneDxXmlOutput(OutputPlugin):
    """
    Output plugin to write scan results in CycloneDX XML format.
    For additional information on the format,
    please see https://cyclonedx.org/specification/overview/
    """

    options = [
        PluggableCommandLineOption(
            (
                '--cyclonedx-xml',
                'output_cyclonedx_xml',
            ),
            type=FileOptionType(mode='w', encoding='utf-8', lazy=True),
            metavar='FILE',
            help='Write scan output in CycloneDX XML format to FILE.',
            help_group=OUTPUT_GROUP,
            sort_order=70,
        ),
    ]

    def is_enabled(self, output_cyclonedx_xml, **kwargs):
        return output_cyclonedx_xml

    def process_codebase(self, codebase, output_cyclonedx_xml, **kwargs):
        bom = CycloneDxBom.from_codebase(codebase)
        bom.write_xml(output_file=output_cyclonedx_xml)
class IsLicenseText(PostScanPlugin):
    """
    Set the "is_license_text" flag to true for at the file level for text files
    that contain mostly (as 90% of their size) license texts or notices.
    Has no effect unless --license, --license-text and --info scan data
    are available.
    """

    resource_attributes = dict(is_license_text=attr.ib(default=False, type=bool, repr=False))

    sort_order = 80

    options = [
        PluggableCommandLineOption(('--is-license-text',),
            is_flag=True, default=False,
            required_options=['info', 'license_text'],
            help='Set the "is_license_text" flag to true for files that contain '
                 'mostly license texts and notices (e.g over 90% of the content).'
                 '[DEPRECATED] this is now built-in in the --license-text option '
                 'with  a "percentage_of_license_text" attribute.',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, is_license_text, **kwargs):
        return is_license_text

    def process_codebase(self, codebase, is_license_text, **kwargs):
        """
        Set the `is_license_text` to True for files that contain over 90% of
        detected license texts.
        """

        for resource in codebase.walk():
            if not resource.is_text:
                continue
            # keep unique texts/line ranges since we repeat this for each matched licenses
            license_texts = set()
            for lic in resource.licenses:
                license_texts.add(
                    (lic.get('matched_text'),
                     lic.get('start_line', 0),
                     lic.get('end_line', 0),
                     lic.get('matched_rule', {}).get('match_coverage', 0))
                )

            # use coverage to weight and estimate of the the actual matched length
            license_texts_size = 0
            for txt, _, _, cov in license_texts:
                license_texts_size += len(txt) * (cov / 100)
            if TRACE:
                logger_debug(
                    'IsLicenseText: license size:', license_texts_size,
                    'size:', resource.size,
                    'license_texts_size >= (resource.size * 0.9)', license_texts_size >= (resource.size * 0.9),
                    'resource.size * 0.9:', resource.size * 0.9
                )

            if license_texts_size >= (resource.size * 0.9):
                resource.is_license_text = True
                resource.save(codebase)
Пример #14
0
class OnlyFindings(OutputFilterPlugin):
    """
    Filter files or directories without scan findings for the requested scans.
    """

    options = [
        PluggableCommandLineOption(('--only-findings',), is_flag=True,
            help='Only return files or directories with findings for the '
                 'requested scans. Files and directories without findings are '
                 'omitted (file information is not treated as findings).',
            help_group=OUTPUT_FILTER_GROUP)
    ]

    def is_enabled(self, only_findings, **kwargs):
        return only_findings

    def process_codebase(self, codebase, resource_attributes_by_plugin, **kwargs):
        """
        Set Resource.is_filtered to True for resources from the codebase that do
        not have findings e.g. if they have no scan data (cinfo) and no
        errors.
        """
        resource_attributes_with_findings = set(['scan_errors'])
        for plugin_qname, keys in resource_attributes_by_plugin.items():
            if plugin_qname == 'scan:info':
                # skip info resource_attributes
                continue
            resource_attributes_with_findings.update(keys)

        for resource in codebase.walk():
            if has_findings(resource, resource_attributes_with_findings):
                continue
            resource.is_filtered = True
            codebase.save_resource(resource)
Пример #15
0
class FacetTallies(PostScanPlugin):
    """
    Compute tallies for a scan at the codebase level, grouping by facets.
    """
    sort_order = 200
    codebase_attributes = dict(tallies_by_facet=attr.ib(
        default=attr.Factory(list)))

    options = [
        PluggableCommandLineOption(
            ('--tallies-by-facet', ),
            is_flag=True,
            default=False,
            help=
            'Compute tallies for license, copyright and other scans and group the '
            'results by facet.',
            help_group=POST_SCAN_GROUP,
            required_options=['facet', 'tallies'])
    ]

    def is_enabled(self, tallies_by_facet, **kwargs):
        return tallies_by_facet

    def process_codebase(self, codebase, tallies_by_facet, **kwargs):
        if TRACE_LIGHT: logger_debug('FacetTallies:process_codebase')
        tally_codebase_by_facet(codebase, **kwargs)
Пример #16
0
class ScanSummary(PostScanPlugin):
    """
    Summarize a scan at the codebase level.
    """
    sort_order = 10

    codebase_attributes = OrderedDict(summary=attr.ib(
        default=attr.Factory(OrderedDict)))

    options = [
        PluggableCommandLineOption(
            ('--summary', ),
            is_flag=True,
            default=False,
            help=
            'Summarize license, copyright and other scans at the codebase level.',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, summary, **kwargs):
        return summary

    def process_codebase(self, codebase, summary, **kwargs):
        if TRACE_LIGHT: logger_debug('ScanSummary:process_codebase')
        summarize_codebase(codebase, keep_details=False, **kwargs)
Пример #17
0
class GWTScanner(ScanPlugin):
    """
    Parse GWT (Google Web Toolkit) ".symbolMap" files to extract compilation/debug
    symbols. Used to infer the relationship between the compiled JavaScript and
    the original Java Source code.
    """
    resource_attributes = dict(gwt=attr.ib(default=attr.Factory(list),
                                           repr=False), )

    options = [
        PluggableCommandLineOption(
            ('--gwt', ),
            is_flag=True,
            default=False,
            help=
            'Parse GWT (Google Web Toolkit) ".symbolMap" files to extract compilation/debug symbols. Used to infer the relationship between the compiled JavaScript and the original Java Source code.',
            help_group=SCAN_GROUP,
            sort_order=100),
    ]

    def is_enabled(self, gwt, **kwargs):
        return gwt

    def get_scanner(self, **kwargs):
        return gwt_scan
Пример #18
0
class ScanSummaryWithDetails(PostScanPlugin):
    """
    Summarize a scan at the codebase level and keep file and directory details.
    """
    # mapping of summary data at the codebase level for the whole codebase
    codebase_attributes = dict(summary=attr.ib(default=attr.Factory(dict)))
    # store summaries at the file and directory level in this attribute when
    # keep details is True
    resource_attributes = dict(summary=attr.ib(default=attr.Factory(dict)))
    sort_order = 100

    options = [
        PluggableCommandLineOption(
            ('--summary-with-details', ),
            is_flag=True,
            default=False,
            help=
            'Summarize license, copyright and other scans at the codebase level, '
            'keeping intermediate details at the file and directory level.',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, summary_with_details, **kwargs):
        return summary_with_details

    def process_codebase(self, codebase, summary_with_details, **kwargs):
        summarize_codebase(codebase, keep_details=True, **kwargs)
Пример #19
0
class CopyrightScanner(ScanPlugin):
    """
    Scan a Resource for copyrights.
    """

    resource_attributes = dict([
        ('copyrights', attr.ib(default=attr.Factory(list))),
        ('holders', attr.ib(default=attr.Factory(list))),
        ('authors', attr.ib(default=attr.Factory(list))),
    ])

    sort_order = 4

    options = [
        PluggableCommandLineOption((
            '-c',
            '--copyright',
        ),
                                   is_flag=True,
                                   default=False,
                                   help='Scan <input> for copyrights.',
                                   help_group=SCAN_GROUP,
                                   sort_order=50),
    ]

    def is_enabled(self, copyright, **kwargs):  # NOQA
        return copyright

    def get_scanner(self, **kwargs):
        from scancode.api import get_copyrights
        return get_copyrights
Пример #20
0
class GeneratedCodeDetector(ScanPlugin):
    """
    Tag a file as generated.
    """
    resource_attributes = dict(is_generated=Boolean(
        help='True if this file is likely an automatically generated file.'))

    sort_order = 50

    options = [
        PluggableCommandLineOption(
            ('--generated', ),
            is_flag=True,
            default=False,
            help='Classify automatically generated code files with a flag.',
            help_group=OTHER_SCAN_GROUP,
            sort_order=50,
        )
    ]

    def is_enabled(self, generated, **kwargs):
        return generated

    def get_scanner(self, **kwargs):
        return generated_scanner
Пример #21
0
class LicenseClarityScore(PostScanPlugin):
    """
    Compute a License clarity score at the codebase level.
    """

    codebase_attributes = dict(summary=attr.ib(default=attr.Factory(dict)))

    sort_order = 5

    options = [
        PluggableCommandLineOption(
            ('--license-clarity-score', ),
            is_flag=True,
            default=False,
            help=
            'Compute a summary license clarity score at the codebase level.',
            help_group=POST_SCAN_GROUP,
            required_options=[
                'classify',
            ],
        )
    ]

    def is_enabled(self, license_clarity_score, **kwargs):
        return license_clarity_score

    def process_codebase(self, codebase, license_clarity_score, **kwargs):
        if TRACE:
            logger_debug('LicenseClarityScore:process_codebase')
        scoring_elements, declared_license_expression = compute_license_score(
            codebase)
        codebase.attributes.summary[
            'declared_license_expression'] = declared_license_expression
        codebase.attributes.summary[
            'license_clarity_score'] = scoring_elements.to_dict()
Пример #22
0
class LicensePolicy(PostScanPlugin):
    """
    Add the "license_policy" attribute to a resouce if it contains a
    detected license key that is found in the license_policy.yml file
    """

    resource_attributes = dict(license_policy=attr.ib(default=attr.Factory(dict)))

    sort_order = 9

    options = [
        PluggableCommandLineOption(('--license-policy',),
            multiple=False,
            metavar='FILE',
            help='Load a License Policy file and apply it to the scan at the '
                 'Resource level.',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, license_policy, **kwargs):
        return license_policy

    def process_codebase(self, codebase, license_policy, **kwargs):
        """
        Populate a license_policy mapping with four attributes: license_key, label,
        icon, and color_code at the File Resource level.
        """
        if not self.is_enabled(license_policy):
            return

        if has_policy_duplicates(license_policy):
            codebase.errors.append('ERROR: License Policy file contains duplicate entries.\n')
            return

        # get a list of unique license policies from the license_policy file
        policies = load_license_policy(license_policy).get('license_policies', [])

        # apply policy to Resources if they contain an offending license
        for resource in codebase.walk(topdown=True):
            if not resource.is_file:
                continue

            try:
                resource_license_keys = set([entry.get('key') for entry in resource.licenses])

            except AttributeError:
                # add license_policy regardless if there is license info or not
                resource.license_policy = {}
                codebase.save_resource(resource)
                continue

            for key in resource_license_keys:
                for policy in policies:
                    if key == policy.get('license_key'):
                        # Apply the policy to the Resource
                        resource.license_policy = policy
                        codebase.save_resource(resource)
Пример #23
0
class AddFacet(PreScanPlugin):
    """
    Assign one or more "facet" to each file (and NOT to directories). Facets are
    a way to qualify that some part of the scanned code may be core code vs.
    test vs. data, etc.
    """

    resource_attributes = dict(
        facets=attr.ib(default=attr.Factory(list), repr=False))

    sort_order = 20

    options = [
        PluggableCommandLineOption(
            ('--facet', ),
            multiple=True,
            metavar='<facet>=<pattern>',
            callback=validate_facets,
            help='Add the <facet> to files with a path matching <pattern>.',
            help_group=PRE_SCAN_GROUP,
            sort_order=80,
        )
    ]

    def is_enabled(self, facet, **kwargs):
        if TRACE:
            logger_debug('is_enabled: facet:', facet)

        return bool(facet)

    def process_codebase(self, codebase, facet=(), **kwargs):
        """
        Add facets to file resources using the `facet` definition of facets.
        Each entry in the `facet` sequence is a string as in <facet>:<pattern>
        """

        if not facet:
            return

        facet_definitions, _invalid_facet_definitions = build_facets(facet)

        if TRACE:
            logger_debug('facet_definitions:', facet_definitions)

        # Walk the codebase and set the facets for each file (and only files)
        for resource in codebase.walk(topdown=True):
            if not resource.is_file:
                continue
            facets = compute_path_facets(resource.path, facet_definitions)
            if facets:
                resource.facets = facets
            else:
                resource.facets = [FACET_CORE]
            resource.save(codebase)
Пример #24
0
class CustomTemplateOutput(OutputPlugin):

    options = [
        PluggableCommandLineOption(
            ('--custom-output', ),
            type=FileOptionType(mode='w', encoding='utf-8', lazy=True),
            required_options=['custom_template'],
            metavar='FILE',
            help='Write scan output to FILE formatted with '
            'the custom Jinja template file.',
            help_group=OUTPUT_GROUP,
            sort_order=60),
        PluggableCommandLineOption(
            ('--custom-template', ),
            type=click.Path(exists=True,
                            file_okay=True,
                            dir_okay=False,
                            readable=True,
                            path_type=PATH_TYPE),
            required_options=['custom_output'],
            metavar='FILE',
            help='Use this Jinja template FILE as a custom template.',
            help_group=OUTPUT_GROUP,
            sort_order=65),
    ]

    def is_enabled(self, custom_output, custom_template, **kwargs):
        return custom_output and custom_template

    def process_codebase(self, codebase, custom_output, custom_template,
                         **kwargs):
        results = self.get_files(codebase, **kwargs)
        version = codebase.get_or_create_current_header().tool_version

        if on_linux and py2:
            custom_template = fsencode(custom_template)

        template_loc = custom_template
        output_file = custom_output
        write_templated(output_file, results, version, template_loc)
Пример #25
0
class MarkSource(PostScanPlugin):
    """
    Set the "is_source" flag to true for directories that contain
    over 90% of source files as direct children.
    Has no effect unless the --info scan is requested.
    """

    resource_attributes = dict(
        source_count=attr.ib(default=0, type=int, repr=False))

    sort_order = 8

    options = [
        PluggableCommandLineOption(
            ('--mark-source', ),
            is_flag=True,
            default=False,
            required_options=['info'],
            help='Set the "is_source" to true for directories that contain '
            'over 90% of source files as children and descendants. '
            'Count the number of source files in a directory as a new source_file_counts attribute',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, mark_source, info, **kwargs):
        return mark_source and info

    def process_codebase(self, codebase, mark_source, **kwargs):
        """
        Set the `is_source` to True in directories if they contain over 90% of
        source code files at full depth.
        """
        for resource in codebase.walk(topdown=False):
            if resource.is_file:
                continue

            children = resource.children(codebase)
            if not children:
                continue

            src_count = sum(1 for c in children if c.is_file and c.is_source)
            src_count += sum(c.source_count for c in children if not c.is_file)
            is_source = is_source_directory(src_count, resource.files_count)

            if src_count and is_source:
                resource.is_source = is_source
                resource.source_count = src_count
                codebase.save_resource(resource)
Пример #26
0
class JsonLinesOutput(OutputPlugin):

    options = [
        PluggableCommandLineOption(
            (
                '--json-lines',
                'output_json_lines',
            ),
            type=FileOptionType(mode=mode, lazy=True),
            metavar='FILE',
            help='Write scan output as JSON Lines to FILE.',
            help_group=OUTPUT_GROUP,
            sort_order=15),
    ]

    def is_enabled(self, output_json_lines, **kwargs):
        return output_json_lines

    # TODO: reuse the json output code and merge that in a single plugin
    def process_codebase(self, codebase, output_json_lines, **kwargs):
        # NOTE: we write as binary, not text
        files = self.get_files(codebase, **kwargs)

        codebase.add_files_count_to_current_header()

        headers = OrderedDict(headers=codebase.get_headers())

        simplejson_kwargs = dict(iterable_as_array=True,
                                 encoding='utf-8',
                                 separators=(
                                     comma,
                                     colon,
                                 ))
        output_json_lines.write(simplejson.dumps(headers, **simplejson_kwargs))
        output_json_lines.write(eol)

        for name, value in codebase.attributes.to_dict().items():
            if value:
                smry = {name: value}
                output_json_lines.write(
                    simplejson.dumps(smry, **simplejson_kwargs))
                output_json_lines.write(eol)

        for scanned_file in files:
            scanned_file_line = {file_key: [scanned_file]}
            output_json_lines.write(
                simplejson.dumps(scanned_file_line, **simplejson_kwargs))
            output_json_lines.write(eol)
Пример #27
0
class JsonPrettyOutput(OutputPlugin):

    options = [
        PluggableCommandLineOption(('--json-pp', 'output_json_pp',),
            type=FileOptionType(mode='w', encoding='utf-8', lazy=True),
            metavar='FILE',
            help='Write scan output as pretty-printed JSON to FILE.',
            help_group=OUTPUT_GROUP,
            sort_order=10),
    ]

    def is_enabled(self, output_json_pp, **kwargs):
        return output_json_pp

    def process_codebase(self, codebase, output_json_pp, **kwargs):
        write_results(codebase, output_file=output_json_pp, pretty=True, **kwargs)
class LicensesReference(PostScanPlugin):
    """
    Add a reference list of all licenses data and text.
    """
    codebase_attributes = dict(licenses_reference=attr.ib(default=attr.Factory(list)))

    sort_order = 500

    options = [
        PluggableCommandLineOption(('--licenses-reference',),
            is_flag=True, default=False,
            help='Include a reference of all the licenses referenced in this '
                 'scan with the data details and full texts.',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, licenses_reference, **kwargs):
        return licenses_reference

    def process_codebase(self, codebase, licenses_reference, **kwargs):
        from licensedcode.cache import get_licenses_db
        licensing = Licensing()

        license_keys = set()

        for resource in codebase.walk():
            licexps = getattr(resource, 'license_expressions', []) or []
            for expression in licexps:
                if expression:
                    license_keys.update(licensing.license_keys(expression))

        packages = getattr(codebase, 'packages', []) or []
        for package in packages:
            # FXIME: license_expression attribute name is changing soon
            expression = package.get('license_expression')
            if expression:
                license_keys.update(licensing.license_keys(expression))

                resource.save(codebase)

        db = get_licenses_db()
        for key in sorted(license_keys):
            license_details = db[key].to_dict(
                include_ignorables=False,
                include_text=True,
            )
            codebase.attributes.licenses_reference.append(license_details)
Пример #29
0
class CsvOutput(OutputPlugin):

    options = [
        PluggableCommandLineOption(('--csv',),
            type=FileOptionType(mode='wb', lazy=True),
            metavar='FILE',
            help='Write scan output as CSV to FILE.',
            help_group=OUTPUT_GROUP,
            sort_order=30),
    ]

    def is_enabled(self, csv, **kwargs):
        return csv

    def process_codebase(self, codebase, csv, **kwargs):
        results = self.get_files(codebase, **kwargs)
        write_csv(results, csv)
Пример #30
0
class HtmlOutput(OutputPlugin):

    options = [
        PluggableCommandLineOption(('--html',),
            type=FileOptionType(mode='w', encoding='utf-8', lazy=True),
            metavar='FILE',
            help='Write scan output as HTML to FILE.',
            help_group=OUTPUT_GROUP,
            sort_order=50),
    ]

    def is_enabled(self, html, **kwargs):
        return html

    def process_codebase(self, codebase, html, **kwargs):
        results = self.get_files(codebase, **kwargs)
        version = codebase.get_or_create_current_header().tool_version
        template_loc = join(TEMPLATES_DIR, 'html', 'template.html')
        output_file = html
        write_templated(output_file, results, version, template_loc)