Пример #1
0
class EmailScanner(ScanPlugin):
    """
    Scan a Resource for emails.
    """
    resource_attributes = dict(emails=attr.ib(default=attr.Factory(list)))

    sort_order = 8

    options = [
        CommandLineOption(('-e', '--email',),
            is_flag=True, default=False,
            help='Scan <input> for emails.',
            help_group=OTHER_SCAN_GROUP),

        CommandLineOption(('--max-email',),
            type=int, default=50,
            metavar='INT',
            show_default=True,
            required_options=['email'],
            help='Report only up to INT emails found in a file. Use 0 for no limit.',
            help_group=SCAN_OPTIONS_GROUP),
    ]

    def is_enabled(self, email, **kwargs):
        return email

    def get_scanner(self, max_email=50, **kwargs):
        from scancode.api import get_emails
        return partial(get_emails, threshold=max_email)
class PackageScanner(ScanPlugin):
    """
    Scan a Resource for Package manifests and report these as "packages" at the
    right file or directory level.
    """

    resource_attributes = OrderedDict()
    resource_attributes['packages'] = attr.ib(default=attr.Factory(list),
                                              repr=False)

    sort_order = 6

    required_plugins = [
        'scan:licenses',
    ]

    options = [
        CommandLineOption(
            (
                '-p',
                '--package',
            ),
            is_flag=True,
            default=False,
            help='Scan <input> for package manifests and build scripts.',
            help_group=SCAN_GROUP,
            sort_order=20),
        CommandLineOption(
            ('--list-packages', ),
            is_flag=True,
            is_eager=True,
            callback=print_packages,
            help='Show the list of supported package types and exit.',
            help_group=DOC_GROUP),
    ]

    def is_enabled(self, package, **kwargs):
        return package

    def get_scanner(self, **kwargs):
        """
        Return a scanner callable to scan a Resource for packages.
        """
        from scancode.api import get_package_info
        return get_package_info

    def process_codebase(self, codebase, **kwargs):
        """
        Set the package root given a package "type".
        """
        if codebase.has_single_resource:
            # What if we scanned a single file and we do not have a root proper?
            return

        for resource in codebase.walk(topdown=False):
            set_packages_root(resource, codebase)
Пример #3
0
class ScanSummary(PostScanPlugin):
    """
    Summarize a scan at the codebase level.
    """
    sort_order = 10

    codebase_attributes = dict(summary=attr.ib(
        default=attr.Factory(OrderedDict)))

    options = [
        CommandLineOption(
            ('--summary', ),
            is_flag=True,
            default=False,
            help=
            'Summarize license, copyright and other scans at the codebase level.',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, summary, **kwargs):
        return summary

    def process_codebase(self, codebase, summary, **kwargs):
        if TRACE_LIGHT: logger_debug('ScanSummary:process_codebase')
        summarize_codebase(codebase, keep_details=False, **kwargs)
Пример #4
0
class ScanKeyFilesSummary(PostScanPlugin):
    """
    Summarize a scan at the codebase level for only key files.
    """
    sort_order = 150

    # mapping of summary data at the codebase level for key files
    codebase_attributes = dict(summary_of_key_files=attr.ib(
        default=attr.Factory(OrderedDict)))

    options = [
        CommandLineOption(
            ('--summary-key-files', ),
            is_flag=True,
            default=False,
            help='Summarize license, copyright and other scans for key, '
            'top-level files. Key files are top-level codebase files such '
            'as COPYING, README and package manifests as reported by the '
            '--classify option "is_legal", "is_readme", "is_manifest" '
            'and "is_top_level" flags.',
            help_group=POST_SCAN_GROUP,
            required_options=['classify', 'summary'])
    ]

    def is_enabled(self, summary_key_files, **kwargs):
        return summary_key_files

    def process_codebase(self, codebase, summary_key_files, **kwargs):
        summarize_codebase_key_files(codebase, **kwargs)
Пример #5
0
class ScanSummaryWithDetails(PostScanPlugin):
    """
    Summarize a scan at the codebase level and keep file and directory details.
    """
    # mapping of summary data at the codebase level for the whole codebase
    codebase_attributes = dict(summary=attr.ib(
        default=attr.Factory(OrderedDict)))
    # store summaries at the file and directory level in this attribute when
    # keep details is True
    resource_attributes = dict(summary=attr.ib(
        default=attr.Factory(OrderedDict)))
    sort_order = 100

    options = [
        CommandLineOption(
            ('--summary-with-details', ),
            is_flag=True,
            default=False,
            help=
            'Summarize license, copyright and other scans at the codebase level, '
            'keeping intermediate details at the file and directory level.',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, summary_with_details, **kwargs):
        return summary_with_details

    def process_codebase(self, codebase, summary_with_details, **kwargs):
        summarize_codebase(codebase, keep_details=True, **kwargs)
Пример #6
0
class GeneratedCodeDetector(ScanPlugin):
    """
    Tag a file as generated.
    """
    resource_attributes = dict(is_generated=Boolean(
        help='True if this file is likely an automatically generated file.'))

    sort_order = 50

    options = [
        CommandLineOption(
            ('--generated', ),
            is_flag=True,
            default=False,
            help='Classify automatically generated code files with a flag.',
            help_group=OTHER_SCAN_GROUP,
            sort_order=50,
        )
    ]

    def is_enabled(self, generated, **kwargs):
        return generated

    def get_scanner(self, **kwargs):
        return generated_scanner
Пример #7
0
class KeywordsLinesScanner(ScanPlugin):
    """
        Scan the number of lines of code and lines of the keywords
    """
    resource_attributes = OrderedDict(
        codelines=attr.ib(default=attr.Factory(int), repr=False),
        keywordsline=attr.ib(default=attr.Factory(int), repr=False),
        matchedlines=attr.ib(default=attr.Factory(list), repr=False),
    )

    options = [
        CommandLineOption(('--keyword-scan', ),
                          type=click.Path(exists=True,
                                          file_okay=True,
                                          dir_okay=False,
                                          readable=True,
                                          path_type=PATH_TYPE),
                          metavar='FILE',
                          help='Use this yml file to read the keywords',
                          help_group=SCAN_GROUP,
                          sort_order=100),
    ]

    def is_enabled(self, keyword_scan, **kwargs):
        return keyword_scan

    def get_scanner(self, **kwargs):
        return partial(get_keywordsscan, keyword_scan=kwargs['keyword_scan'])
class KeywordsLinesScanner(ScanPlugin):
    """
        Scan the number of lines of code and lines of the keywords
    """
    resource_attributes = OrderedDict(
        codelines=attr.ib(default=attr.Factory(int), repr=False),
        keywordsline=attr.ib(default=attr.Factory(int), repr=False),
        matchedlines=attr.ib(default=attr.Factory(list), repr=False),
    )

    options = [
        CommandLineOption(
            ('--keywordsscan', ),
            is_flag=True,
            default=False,
            help='  Scan the number of lines of code and search for keywords.',
            help_group=SCAN_GROUP,
            sort_order=100),
    ]

    def is_enabled(self, keywordsscan, **kwargs):
        return keywordsscan

    def get_scanner(self, **kwargs):
        return get_keywordsscan
Пример #9
0
class ScanByFacetSummary(PostScanPlugin):
    """
    Summarize a scan at the codebase level groupping by facets.
    """
    sort_order = 200
    codebase_attributes = dict(summary_by_facet=attr.ib(
        default=attr.Factory(list)))

    options = [
        CommandLineOption(
            ('--summary-by-facet', ),
            is_flag=True,
            default=False,
            help='Summarize license, copyright and other scans and group the '
            'results by facet.',
            help_group=POST_SCAN_GROUP,
            required_options=['facet', 'summary'])
    ]

    def is_enabled(self, summary_by_facet, **kwargs):
        return summary_by_facet

    def process_codebase(self, codebase, summary_by_facet, **kwargs):
        if TRACE_LIGHT: logger_debug('ScanByFacetSummary:process_codebase')
        summarize_codebase_by_facet(codebase, **kwargs)
Пример #10
0
class LicenseClarityScore(PostScanPlugin):
    """
    Compute a License clarity score at the codebase level.
    """
    codebase_attributes = dict(license_clarity_score=Mapping(
        help='Computed license clarity score as mapping containing the score '
        'proper and each scoring elements.'))

    sort_order = 110

    options = [
        CommandLineOption(
            ('--license-clarity-score', ),
            is_flag=True,
            default=False,
            help=
            'Compute a summary license clarity score at the codebase level.',
            help_group=POST_SCAN_GROUP,
            required_options=['classify', 'license', 'copyright'],
        )
    ]

    def is_enabled(self, license_clarity_score, **kwargs):
        return license_clarity_score

    def process_codebase(self, codebase, license_clarity_score, **kwargs):
        if TRACE:
            logger_debug('LicenseClarityScore:process_codebase')
        scoring_elements = compute_license_score(codebase, **kwargs)
        codebase.attributes.license_clarity_score.update(scoring_elements)
Пример #11
0
class CopyrightScanner(ScanPlugin):
    """
    Scan a Resource for copyrights.
    """

    resource_attributes = OrderedDict(
        copyrights=attr.ib(default=attr.Factory(list)),
        holders=attr.ib(default=attr.Factory(list)),
        authors=attr.ib(default=attr.Factory(list)),
    )

    sort_order = 4

    options = [
        CommandLineOption(('-c', '--copyright',),
            is_flag=True, default=False,
            help='Scan <input> for copyrights.',
            help_group=SCAN_GROUP,
            sort_order=50),
    ]

    def is_enabled(self, copyright, **kwargs):  # NOQA
        return copyright

    def get_scanner(self, **kwargs):
        from scancode.api import get_copyrights
        return get_copyrights
Пример #12
0
class SpdxRdfOutput(OutputPlugin):

    options = [
        CommandLineOption(('--spdx-rdf', ),
                          type=FileOptionType(lazy=True,
                                              mode='w',
                                              encoding='utf-8'),
                          metavar='FILE',
                          help='Write scan output as SPDX RDF to FILE.',
                          help_group=OUTPUT_GROUP)
    ]

    def is_enabled(self, spdx_rdf, **kwargs):
        return spdx_rdf

    def process_codebase(self, codebase, spdx_rdf, **kwargs):
        check_sha1(codebase)
        files = self.get_files(codebase, **kwargs)
        header = codebase.get_or_create_current_header()
        tool_name = header.tool_name
        tool_version = header.tool_version
        notice = header.notice
        input = kwargs.get('input', '')  # NOQA

        write_spdx(spdx_rdf,
                   files,
                   tool_name,
                   tool_version,
                   notice,
                   input,
                   as_tagvalue=False)
Пример #13
0
class JsonPrettyOutput(OutputPlugin):

    options = [
        CommandLineOption(
            (
                '--json-pp',
                'output_json_pp',
            ),
            type=FileOptionType(mode='wb', lazy=True),
            metavar='FILE',
            help='Write scan output as pretty-printed JSON to FILE.',
            help_group=OUTPUT_GROUP,
            sort_order=10),
    ]

    def is_enabled(self, output_json_pp, **kwargs):
        return output_json_pp

    def process_codebase(self, codebase, output_json_pp, **kwargs):
        files = self.get_files(codebase, **kwargs)
        write_json(codebase,
                   files,
                   output_file=output_json_pp,
                   pretty=True,
                   **kwargs)
Пример #14
0
class RedundantCluesFilter(PostScanPlugin):
    """
    Filter redundant clues (copyrights, authors, emails, and urls) that are already
    contained in another more important scan result.
    """
    sort_order = 1

    options = [
        CommandLineOption(
            ('--filter-clues', ),
            is_flag=True,
            default=False,
            help='Filter redundant duplicated clues already contained in '
            'detected license and copyright texts and notices.',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, filter_clues, **kwargs):
        return filter_clues

    def process_codebase(self, codebase, **kwargs):
        """
        Update detected clues to remove redundant clues already found in another
        detected clue for all the resources of codebase.
        """
        if TRACE: logger_debug('RedundantFilter:process_codebase')

        from licensedcode.cache import get_index

        rules_by_id = {r.identifier: r for r in get_index().rules_by_rid}

        for resource in codebase.walk():
            filtered = filter_ignorable_resource_clues(resource, rules_by_id)
            if filtered:
                filtered.save(codebase)
Пример #15
0
class IgnoreCopyrights(OutputFilterPlugin):
    """
    Filter findings that match given copyright holder or author patterns.
    Has no effect unless the --copyright scan is requested.
    """

    options = [
        CommandLineOption(
            ('--ignore-copyright-holder', ),
            multiple=True,
            metavar='<pattern>',
            help='Ignore a file (and all its findings) if a copyright holder '
            'contains a match to the <pattern> regular expression. '
            'Note that this will ignore a file even if it has other scanned '
            'data such as a license or errors.',
            help_group=OUTPUT_FILTER_GROUP),
        CommandLineOption(
            ('--ignore-author', ),
            multiple=True,
            metavar='<pattern>',
            help='Ignore a file (and all its findings) if an author '
            'contains a match to the <pattern> regular expression. '
            'Note that this will ignore a file even if it has other findings '
            'such as a license or errors.',
            help_group=OUTPUT_FILTER_GROUP)
    ]

    def is_enabled(self, ignore_copyright_holder, ignore_author,
                   **kwargs):  # NOQA
        return bool(ignore_copyright_holder or ignore_author)

    def process_codebase(self, codebase, ignore_copyright_holder,
                         ignore_author, **kwargs):
        ignored_holders = [re.compile(r) for r in ignore_copyright_holder]
        ignored_authors = [re.compile(r) for r in ignore_author]

        for resource in codebase.walk():
            holders = set(c['value'] for c in getattr(resource, 'holders', []))
            authors = set(c['value'] for c in getattr(resource, 'authors', []))
            if TRACE:
                logger_debug('holders:', holders)
                logger_debug('authors:', authors)

            if is_ignored(ignored_holders, holders) or is_ignored(
                    ignored_authors, authors):
                resource.is_filtered = True
                codebase.save_resource(resource)
Пример #16
0
class IsLicenseText(PostScanPlugin):
    """
    Set the "is_license_text" flag to true for at the file level for text files
    that contain mostly (as 90% of their size) license texts or notices.
    Has no effect unless --license, --license-text and --info scan data
    are available.
    """

    resource_attributes = dict(
        is_license_text=attr.ib(default=False, type=bool, repr=False))

    sort_order = 80

    options = [
        CommandLineOption(
            ('--is-license-text', ),
            is_flag=True,
            default=False,
            required_options=['info', 'license_text'],
            help='Set the "is_license_text" flag to true for files that contain '
            'mostly license texts and notices (e.g over 90% of the content). [EXPERIMENTAL]',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, is_license_text, **kwargs):
        return is_license_text

    def process_codebase(self, codebase, is_license_text, **kwargs):
        """
        Set the `is_license_text` to True for files taht contain over 90% of
        detected license texts.
        """

        for resource in codebase.walk():
            if not resource.is_text:
                continue
            # keep unique texts/line ranges since we repeat this for each matched licenses
            license_texts = set(
                (lic['matched_text'], lic['start_line'], lic['end_line'],
                 lic.get('matched_rule', {}).get('match_coverage', 0))
                for lic in resource.licenses)
            # use coverage to skew the actual matched length
            license_texts_size = 0
            for txt, _, _, cov in license_texts:
                # these are the meta characters used t mark non matched parts
                txt = txt.replace('[', '').replace(']', '')
                license_texts_size += len(txt) * (cov / 100)
            if TRACE:
                logger_debug('IsLicenseText: license size:',
                             license_texts_size, 'size:', resource.size,
                             'license_texts_size >= (resource.size * 0.9)',
                             license_texts_size >= (resource.size * 0.9),
                             'resource.size * 0.9:', resource.size * 0.9)

            if license_texts_size >= (resource.size * 0.9):
                resource.is_license_text = True
                resource.save(codebase)
class LicenceModifications(PostScanPlugin):
    """
    Add the "licence_modifications" attribute to a resouce if it does not contain any license
    """

    resource_attributes = dict(licence_modifications=attr.ib(
        default=attr.Factory(dict)))

    options = [
        CommandLineOption(
            ('--licence-modifications', ),
            is_flag=True,
            default=False,
            help='Generate a list of files in case of modified license',
            help_group=POST_SCAN_GROUP),
    ]

    def is_enabled(self, licence_modifications, **kwargs):
        return licence_modifications

    def process_codebase(self, codebase, licence_modifications, **kwargs):
        """
        Populate a licence_modifications mapping with license modification text
        """
        if not self.is_enabled(licence_modifications):
            return

        for resource in codebase.walk(topdown=True):
            if not resource.is_file:
                continue

            try:
                licence_score_match = set(
                    [entry.get('score') for entry in resource.licenses])

            except AttributeError:
                # add licence_modifications regardless if there is license modification info or not
                logger.info(
                    "Adding licence_modifications regardless if there is license modification info or not"
                )
                resource.licence_modifications = {}
                codebase.save_resource(resource)
                continue

            for licensemodification in licence_score_match:
                if licensemodification != '100.0':
                    modification_score = 100.00 - licensemodification
                    if modification_score != 0.0:
                        resource.licence_modifications = {
                            "modinfo":
                            "license is %s percent modified " %
                            (modification_score)
                        }
                        codebase.save_resource(resource)
Пример #18
0
class AddFacet(PreScanPlugin):
    """
    Assign one or more "facet" to each file (and NOT to directories). Facets are
    a way to qualify that some part of the scanned code may be core code vs.
    test vs. data, etc.
    """

    resource_attributes = dict(
        facets=attr.ib(default=attr.Factory(list), repr=False))

    sort_order = 20

    options = [
        CommandLineOption(
            ('--facet', ),
            multiple=True,
            metavar='<facet>=<pattern>',
            callback=validate_facets,
            help='Add the <facet> to files with a path matching <pattern>.',
            help_group=PRE_SCAN_GROUP,
            sort_order=80,
        )
    ]

    def is_enabled(self, facet, **kwargs):
        if TRACE:
            logger_debug('is_enabled: facet:', facet)

        return bool(facet)

    def process_codebase(self, codebase, facet=(), **kwargs):
        """
        Add facets to file resources using the `facet` definition of facets.
        Each entry in the `facet` sequence is a string as in <facet>:<pattern>
        """

        if not facet:
            return

        facet_definitions, _invalid_facet_definitions = build_facets(facet)

        if TRACE:
            logger_debug('facet_definitions:', facet_definitions)

        # Walk the codebase and set the facets for each file (and only files)
        for resource in codebase.walk(topdown=True):
            if not resource.is_file:
                continue
            facets = compute_path_facets(resource.path, facet_definitions)
            if facets:
                resource.facets = facets
            else:
                resource.facets = [FACET_CORE]
            resource.save(codebase)
Пример #19
0
class CustomTemplateOutput(OutputPlugin):

    options = [
        CommandLineOption(('--custom-output', ),
                          type=FileOptionType(mode='w',
                                              encoding='utf-8',
                                              lazy=True),
                          required_options=['custom_template'],
                          metavar='FILE',
                          help='Write scan output to FILE formatted with '
                          'the custom Jinja template file.',
                          help_group=OUTPUT_GROUP,
                          sort_order=60),
        CommandLineOption(
            ('--custom-template', ),
            type=click.Path(exists=True,
                            file_okay=True,
                            dir_okay=False,
                            readable=True,
                            path_type=PATH_TYPE),
            required_options=['custom_output'],
            metavar='FILE',
            help='Use this Jinja template FILE as a custom template.',
            help_group=OUTPUT_GROUP,
            sort_order=65),
    ]

    def is_enabled(self, custom_output, custom_template, **kwargs):
        return custom_output and custom_template

    def process_codebase(self, codebase, custom_output, custom_template,
                         **kwargs):
        results = self.get_files(codebase, **kwargs)
        version = codebase.get_or_create_current_header().tool_version

        if on_linux:
            custom_template = fsencode(custom_template)

        template_loc = custom_template
        output_file = custom_output
        write_templated(output_file, results, version, template_loc)
class OnlyLicensesTitles(PostScanPlugin):
    """
    Add the "only_licenses_titles" attribute to a resouce if it does not contain any license 

    """

    resource_attributes = dict(only_licenses_titles=attr.ib(
        default=attr.Factory(dict)))

    sort_order = 9

    options = [
        CommandLineOption(
            ('--only-licenses-titles', ),
            is_flag=True,
            default=False,
            help='Generate a list of files with only license titles',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, only_licenses_titles, **kwargs):
        return only_licenses_titles

    def process_codebase(self, codebase, only_licenses_titles, **kwargs):
        """
        Populate a only_licenses_titles only_licenses_titles mapping 
        """
        if not self.is_enabled(only_licenses_titles):
            return

        for resource in codebase.walk(topdown=True):
            if not resource.is_file:
                continue

            try:
                resource_start_line = set(
                    [entry.get('start_line') for entry in resource.licenses])
                resource_end_line = set(
                    [entry.get('end_line') for entry in resource.licenses])

            except AttributeError:
                resource.only_licenses_titles = {}
                codebase.save_resource(resource)
                continue

            for singlelinetitles in resource_start_line:
                resource.only_licenses_titles = {
                    "LineStart": resource_start_line,
                    "LineEnd": resource_end_line
                }
                codebase.save_resource(resource)
Пример #21
0
class ProcessIgnore(PreScanPlugin):
    """
    Ignore files matching the supplied pattern.
    """

    options = [
        CommandLineOption(('--ignore', ),
                          multiple=True,
                          metavar='<pattern>',
                          help='Ignore files matching <pattern>.',
                          sort_order=10,
                          help_group=PRE_SCAN_GROUP)
    ]

    def is_enabled(self, ignore, **kwargs):
        return ignore

    def process_codebase(self, codebase, ignore=(), **kwargs):
        """
        Remove ignored Resources from the resource tree.
        """

        if not ignore:
            return

        ignores = {
            pattern: 'User ignore: Supplied by --ignore'
            for pattern in ignore
        }

        ignorable = partial(is_ignored, ignores=ignores)
        rids_to_remove = []
        remove_resource = codebase.remove_resource

        # First, walk the codebase from the top-down and collect the rids of
        # Resources that can be removed.
        for resource in codebase.walk(topdown=True):
            if ignorable(resource.path):
                for child in resource.children(codebase):
                    rids_to_remove.append(child.rid)
                rids_to_remove.append(resource.rid)

        # Then, walk bottom-up and remove the ignored Resources from the
        # Codebase if the Resource's rid is in our list of rid's to remove.
        for resource in codebase.walk(topdown=False):
            resource_rid = resource.rid
            if resource_rid in rids_to_remove:
                rids_to_remove.remove(resource_rid)
                remove_resource(resource)
Пример #22
0
class NoLicenses(PostScanPlugin):
    """
    Add the "no_licenses" attribute to a resouce if it does not contain any license
    """

    resource_attributes = dict(no_licenses=attr.ib(default=attr.Factory(dict)))

    sort_order = 9

    options = [
        CommandLineOption(('--no-licenses', ),
                          is_flag=True,
                          default=False,
                          help='Generate a list of no licences files',
                          help_group=POST_SCAN_GROUP),
    ]

    def is_enabled(self, no_licenses, **kwargs):
        return no_licenses

    def process_codebase(self, codebase, no_licenses, **kwargs):
        """
        Populate a no_license mapping with four attributes: filename, label,
        icon, and color_code at the File Resource level.
        """
        if not self.is_enabled(no_licenses):
            return

        for resource in codebase.walk(topdown=True):
            if not resource.is_file:
                continue

            try:
                resource_no_licenses = set(
                    [entry.get('short_name') for entry in resource.licenses])

            except AttributeError:
                # add no_licenses regardless if there is license info or not
                logger.dubug(
                    "add no_licenses regardless if there is license info or not"
                )
                resource.no_licenses = {}
                codebase.save_resource(resource)
                continue

            for license in resource_no_licenses:
                if license:
                    resource.no_licenses = "%s is Present" % (license)
                    codebase.save_resource(resource)
Пример #23
0
class MarkSource(PostScanPlugin):
    """
    Set the "is_source" flag to true for directories that contain
    over 90% of source files as direct children.
    Has no effect unless the --info scan is requested.
    """

    resource_attributes = dict(
        source_count=attr.ib(default=0, type=int, repr=False))

    sort_order = 8

    options = [
        CommandLineOption(
            ('--mark-source', ),
            is_flag=True,
            default=False,
            required_options=['info'],
            help='Set the "is_source" to true for directories that contain '
            'over 90% of source files as children and descendants. '
            'Count the number of source files in a directory as a new source_file_counts attribute',
            help_group=POST_SCAN_GROUP)
    ]

    def is_enabled(self, mark_source, info, **kwargs):
        return mark_source and info

    def process_codebase(self, codebase, mark_source, **kwargs):
        """
        Set the `is_source` to True in directories if they contain over 90% of
        source code files at full depth.
        """
        for resource in codebase.walk(topdown=False):
            if resource.is_file:
                continue

            children = resource.children(codebase)
            if not children:
                continue

            src_count = sum(1 for c in children if c.is_file and c.is_source)
            src_count += sum(c.source_count for c in children if not c.is_file)
            is_source = is_source_directory(src_count, resource.files_count)

            if src_count and is_source:
                resource.is_source = is_source
                resource.source_count = src_count
                codebase.save_resource(resource)
class JsonLinesOutput(OutputPlugin):

    options = [
        CommandLineOption((
            '--json-lines',
            'output_json_lines',
        ),
                          type=FileOptionType(mode=mode, lazy=True),
                          metavar='FILE',
                          help='Write scan output as JSON Lines to FILE.',
                          help_group=OUTPUT_GROUP,
                          sort_order=15),
    ]

    def is_enabled(self, output_json_lines, **kwargs):
        return output_json_lines

    # TODO: reuse the json output code and merge that in a single plugin
    def process_codebase(self, codebase, output_json_lines, **kwargs):
        # NOTE: we write as binary, not text
        files = self.get_files(codebase, **kwargs)

        codebase.add_files_count_to_current_header()

        headers = OrderedDict(headers=codebase.get_headers())

        simplejson_kwargs = dict(iterable_as_array=True,
                                 encoding='utf-8',
                                 separators=(
                                     comma,
                                     colon,
                                 ))
        output_json_lines.write(simplejson.dumps(headers, **simplejson_kwargs))
        output_json_lines.write(eol)

        for name, value in codebase.attributes.to_dict().items():
            if value:
                smry = {name: value}
                output_json_lines.write(
                    simplejson.dumps(smry, **simplejson_kwargs))
                output_json_lines.write(eol)

        for scanned_file in files:
            scanned_file_line = {file_key: [scanned_file]}
            output_json_lines.write(
                simplejson.dumps(scanned_file_line, **simplejson_kwargs))
            output_json_lines.write(eol)
Пример #25
0
class JsonCompactOutput(OutputPlugin):

    options = [
        CommandLineOption(('--json', 'output_json',),
            type=FileOptionType(mode='wb', lazy=True),
            metavar='FILE',
            help='Write scan output as compact JSON to FILE.',
            help_group=OUTPUT_GROUP,
            sort_order=10),
    ]

    def is_enabled(self, output_json, **kwargs):
        return output_json

    def process_codebase(self, codebase, output_json, **kwargs):
        results = get_results(codebase, as_list=False, **kwargs)
        write_json(results, output_file=output_json, pretty=False)
Пример #26
0
class CsvOutput(OutputPlugin):

    options = [
        CommandLineOption(('--csv', ),
                          type=FileOptionType(mode='wb', lazy=True),
                          metavar='FILE',
                          help='Write scan output as CSV to FILE.',
                          help_group=OUTPUT_GROUP,
                          sort_order=30),
    ]

    def is_enabled(self, csv, **kwargs):
        return csv

    def process_codebase(self, codebase, csv, **kwargs):
        results = self.get_files(codebase, **kwargs)
        write_csv(results, csv)
class FingerprintScanner(ScanPlugin):
    """
    Scan a file Resource to generate fingerprint.
    """
    resource_attributes = dict(fingerprint=attr.ib(default=None, repr=False))

    sort_order = 1

    options = [
        CommandLineOption(('-f', '--fingerprint'),
                          is_flag=True, default=False,
                          help='Scan <input> to generate simhash fingerprints for similarity matching.',
                          help_group=OTHER_SCAN_GROUP)
    ]

    def is_enabled(self, fingerprint, **kwargs):
        return fingerprint

    def get_scanner(self, **kwargs):
        return get_fingerprint
Пример #28
0
class HtmlOutput(OutputPlugin):

    options = [
        CommandLineOption(('--html',),
            type=FileOptionType(mode='w', encoding='utf-8', lazy=True),
            metavar='FILE',
            help='Write scan output as HTML to FILE.',
            help_group=OUTPUT_GROUP,
            sort_order=50),
    ]

    def is_enabled(self, html, **kwargs):
        return html

    def process_codebase(self, codebase, html, **kwargs):
        results = self.get_files(codebase, **kwargs)
        version = codebase.get_or_create_current_header().tool_version
        template_loc = join(TEMPLATES_DIR, 'html', 'template.html')
        output_file = html
        write_templated(output_file, results, version, template_loc)
class InfoScanner(ScanPlugin):
    """
    Scan a file Resource for miscellaneous information such as mime/filetype and
    basic checksums.
    """
    resource_attributes = OrderedDict([
        ('date', attr.ib(default=None, repr=False)),
        ('sha1', attr.ib(default=None, repr=False)),
        ('md5', attr.ib(default=None, repr=False)),
        ('sha256', attr.ib(default=None, repr=False)),
        ('mime_type', attr.ib(default=None, repr=False)),
        ('file_type', attr.ib(default=None, repr=False)),
        ('programming_language', attr.ib(default=None, repr=False)),
        ('is_binary', attr.ib(default=False, type=bool, repr=False)),
        ('is_text', attr.ib(default=False, type=bool, repr=False)),
        ('is_archive', attr.ib(default=False, type=bool, repr=False)),
        ('is_media', attr.ib(default=False, type=bool, repr=False)),
        ('is_source', attr.ib(default=False, type=bool, repr=False)),
        ('is_script', attr.ib(default=False, type=bool, repr=False)),
    ])

    sort_order = 0

    options = [
        CommandLineOption(
            ('-i', '--info'),
            is_flag=True,
            default=False,
            help='Scan <input> for file information (size, checksums, etc).',
            help_group=OTHER_SCAN_GROUP,
            sort_order=10)
    ]

    def is_enabled(self, info, **kwargs):
        return info

    def get_scanner(self, **kwargs):
        from scancode.api import get_file_info
        return get_file_info
Пример #30
0
class LKMClueScanner(ScanPlugin):
    """
    Scan lkm-clue information from the resource.
    """
    resource_attributes = OrderedDict(lkm_clue=attr.ib(
        default=attr.Factory(OrderedDict), repr=False), )

    options = [
        CommandLineOption(
            ('--lkmclue', ),
            is_flag=True,
            default=False,
            help=
            'Collect LKM module clues and type indicating a possible Linux Kernel Module. (formerly lkm_hint and lkm_line).',
            help_group=SCAN_GROUP,
            sort_order=100),
    ]

    def is_enabled(self, lkmclue, **kwargs):
        return lkmclue

    def get_scanner(self, **kwargs):
        return get_lkm_clues