示例#1
0
def language_summarizer(resource, children, keep_details=False):
    """
    Populate a programming_language summary list of mappings such as
        {value: "programming_language", count: "count of occurences"}
    sorted by decreasing count.
    """
    PROG_LANG = 'programming_language'
    languages = []
    prog_lang = getattr(resource, PROG_LANG, [])
    if not prog_lang:
        if resource.is_file:
            # also count files with no detection
            languages.append(None)
    else:
        languages.append(prog_lang)

    # Collect direct children expression summaries
    for child in children:
        child_summaries = get_resource_summary(
            child, key=PROG_LANG, as_attribute=keep_details) or []
        for child_summary in child_summaries:
            values = [child_summary['value']] * child_summary['count']
            languages.extend(values)

    # summarize proper
    languages_counter = summarize_languages(languages)
    summarized = sorted_counter(languages_counter)
    set_resource_summary(resource,
                         key=PROG_LANG,
                         value=summarized,
                         as_attribute=keep_details)
    return summarized
示例#2
0
def license_summarizer(resource, children, keep_details=False):
    """
    Populate a license_expressions list of mappings such as
        {value: "expression", count: "count of occurences"}
    sorted by decreasing count.
    """
    LIC_EXP = 'license_expressions'
    license_expressions = []

    # Collect current data
    lic_expressions = getattr(resource, LIC_EXP, [])
    if not lic_expressions and resource.is_file:
        # also count files with no detection
        license_expressions.append(None)
    else:
        license_expressions.extend(lic_expressions)

    # Collect direct children expression summary
    for child in children:
        child_summaries = get_resource_summary(
            child, key=LIC_EXP, as_attribute=keep_details) or []
        for child_summary in child_summaries:
            # TODO: review this: this feels rather weird
            values = [child_summary['value']] * child_summary['count']
            license_expressions.extend(values)

    # summarize proper
    licenses_counter = summarize_licenses(license_expressions)
    summarized = sorted_counter(licenses_counter)
    set_resource_summary(resource,
                         key=LIC_EXP,
                         value=summarized,
                         as_attribute=keep_details)
    return summarized
def build_summary(resource,
                  children,
                  attribute,
                  summarizer,
                  keep_details=False):
    """
    Update the `resource` Resource with a summary of itself and its `children`
    Resources and this for the `attribute` key (such as copyrights, etc).

     - `attribute` is the name of the attribute ('copyrights', 'holders' etc.)
     - `summarizer` is a function that takes a list of texts and returns
        summarized texts with counts
     """
    # Collect current data
    values = getattr(resource, attribute, [])

    no_detection_counter = 0

    if values:
        # keep current data as plain strings
        candidate_texts = [entry.get('value') for entry in values]
    else:
        candidate_texts = []
        if resource.is_file:
            no_detection_counter += 1

    # Collect direct children existing summaries
    for child in children:
        child_summaries = get_resource_summary(
            child, key=attribute, as_attribute=keep_details) or []
        for child_summary in child_summaries:
            count = child_summary['count']
            value = child_summary['value']
            if value:
                candidate_texts.append(Text(value, value, count))
            else:
                no_detection_counter += count

    # summarize proper using the provided function
    summarized = summarizer(candidate_texts)

    # add back the counter of things without detection
    if no_detection_counter:
        summarized.update({None: no_detection_counter})

    summarized = sorted_counter(summarized)
    if TRACE:
        logger_debug('COPYRIGHT summarized:', summarized)
    set_resource_summary(resource,
                         key=attribute,
                         value=summarized,
                         as_attribute=keep_details)
    return summarized
示例#4
0
def summarize_codebase_key_files(codebase, **kwargs):
    """
    Summarize codebase key files.
    """
    summarizable_attributes = codebase.attributes.summary.keys()
    if TRACE: logger_debug('summarizable_attributes:', summarizable_attributes)

    # TODO: we cannot summarize packages with "key files for now
    really_summarizable_attributes = set([
        'license_expressions',
        'copyrights',
        'holders',
        'authors',
        'programming_language',
        # 'packages',
    ])
    summarizable_attributes = [
        k for k in summarizable_attributes
        if k in really_summarizable_attributes
    ]

    # create one counter for each summarized attribute
    summarizable_values_by_key = OrderedDict([
        (key, []) for key in summarizable_attributes
    ])

    # filter to get only key files
    key_files = (res for res in codebase.walk(topdown=True)
                 if (res.is_file and res.is_top_level and (
                     res.is_readme or res.is_legal or res.is_manifest)))

    for resource in key_files:
        for key, values in summarizable_values_by_key.items():
            # note we assume things are stored as extra-data, not as direct
            # Resource attributes
            res_summaries = get_resource_summary(
                resource, key=key, as_attribute=False) or []
            for summary in res_summaries:
                # each summary is a mapping with value/count: we transform back to values
                values.extend([summary['value']] * summary['count'])

    summary_counters = []
    for key, values in summarizable_values_by_key.items():
        summarized = summarize_values(values, key)
        summary_counters.append((key, summarized))

    sorted_summaries = OrderedDict([(key, sorted_counter(counter))
                                    for key, counter in summary_counters])

    codebase.attributes.summary_of_key_files = sorted_summaries

    if TRACE: logger_debug('codebase summary_of_key_files:', sorted_summaries)
示例#5
0
def summarize_codebase_by_facet(codebase, **kwargs):
    """
    Summarize codebase by facte.
    """
    from summarycode import facet as facet_module

    summarizable = codebase.attributes.summary.keys()
    if TRACE:
        logger_debug('summarize_codebase_by_facet for attributes:',
                     summarizable)

    # create one group of by-facet values lists for each summarized attribute
    summarizable_values_by_key_by_facet = dict([
        (facet, dict([(key, []) for key in summarizable]))
        for facet in facet_module.FACETS
    ])

    for resource in codebase.walk(topdown=True):
        if not resource.is_file:
            continue

        for facet in resource.facets:
            # note: this will fail loudly if the facet is not a known one
            values_by_attribute = summarizable_values_by_key_by_facet[facet]
            for key, values in values_by_attribute.items():
                # note we assume things are stored as extra-data, not as direct
                # Resource attributes
                res_summaries = get_resource_summary(
                    resource, key=key, as_attribute=False) or []
                for summary in res_summaries:
                    # each summary is a mapping with value/count: we transform back to discrete values
                    sum_value = summary.get('value')
                    if sum_value:
                        values.extend([sum_value] * summary['count'])

    final_summaries = []
    for facet, summarizable_values_by_key in summarizable_values_by_key_by_facet.items(
    ):
        summary_counters = ((key, summarize_values(
            values,
            key)) for key, values in summarizable_values_by_key.items())

        sorted_summaries = dict([(key, sorted_counter(counter))
                                 for key, counter in summary_counters])

        facet_summary = dict(facet=facet)
        facet_summary['summary'] = sorted_summaries
        final_summaries.append(facet_summary)

    codebase.attributes.summary_by_facet.extend(final_summaries)

    if TRACE: logger_debug('codebase summary_by_facet:', final_summaries)
示例#6
0
def summarize_codebase_key_files(codebase, **kwargs):
    """
    Summarize codebase key files.
    """
    summarizables = codebase.attributes.summary.keys()
    if TRACE: logger_debug('summarizables:', summarizables)

    # TODO: we cannot summarize packages with "key files" for now
    summarizables = [k for k in summarizables if k in SUMMARIZABLE_ATTRS]

    # create one counter for each summarized attribute
    summarizable_values_by_key = dict([(key, []) for key in summarizables])

    # filter to get only key files
    key_files = (res for res in codebase.walk(topdown=True)
                 if (res.is_file and res.is_top_level and (
                     res.is_readme or res.is_legal or res.is_manifest)))

    for resource in key_files:
        for key, values in summarizable_values_by_key.items():
            # note we assume things are stored as extra-data, not as direct
            # Resource attributes
            res_summaries = get_resource_summary(
                resource, key=key, as_attribute=False) or []
            for summary in res_summaries:
                # each summary is a mapping with value/count: we transform back to values
                sum_value = summary.get('value')
                if sum_value:
                    values.extend([sum_value] * summary['count'])

    summary_counters = []
    for key, values in summarizable_values_by_key.items():
        if key not in SUMMARIZABLE_ATTRS:
            continue
        summarized = summarize_values(values, key)
        summary_counters.append((key, summarized))

    sorted_summaries = dict([(key, sorted_counter(counter))
                             for key, counter in summary_counters])

    codebase.attributes.summary_of_key_files = sorted_summaries

    if TRACE: logger_debug('codebase summary_of_key_files:', sorted_summaries)
示例#7
0
def package_summarizer(resource, children, keep_details=False):
    """
    Populate a packages summary list of packages mappings.

    Note: `keep_details` is never used, as we are not keeping details of
    packages as this has no value.
    """
    packages = []

    # Collect current data
    current_packages = getattr(resource, 'packages') or []

    if TRACE_LIGHT and current_packages:
        from packagedcode.models import Package
        packs = [Package.create(**p) for p in current_packages]
        logger_debug('package_summarizer: for:', resource,
                     'current_packages are:', packs)

    current_packages = add_files(current_packages, resource)
    packages.extend(current_packages)

    if TRACE_LIGHT and packages:
        logger_debug()
        from packagedcode.models import Package  # NOQA
        packs = [Package.create(**p) for p in packages]
        logger_debug('package_summarizer: for:', resource, 'packages are:',
                     packs)

    # Collect direct children packages summary
    for child in children:
        child_summaries = get_resource_summary(
            child, key='packages', as_attribute=False) or []
        packages.extend(child_summaries)

    # summarize proper
    set_resource_summary(resource,
                         key='packages',
                         value=packages,
                         as_attribute=False)
    return packages