Python unique示例，attributecode.util.unique Python示例

示例#1

0

显示文件

文件： cmd.py 项目： srthkdb/aboutcode-toolkit

def get_error_messages(errors, quiet=False, verbose=False):
    """
    Return a tuple of (list of error message strings to report,
    severe_errors_count) given an `errors` list of Error objects and using the
    `quiet` and `verbose` flags.
    """
    errors = unique(errors)
    severe_errors = filter_errors(errors, WARNING)
    severe_errors_count = len(severe_errors)

    messages = []

    if severe_errors and not quiet:
        error_msg = 'Command completed with {} errors or warnings.'.format(
            severe_errors_count)
        messages.append(error_msg)

    for severity, message in errors:
        sevcode = severities.get(severity) or 'UNKNOWN'
        msg = '{sevcode}: {message}'.format(**locals())
        if not quiet:
            if verbose:
                messages.append(msg)
            elif severity >= WARNING:
                messages.append(msg)
    return messages, severe_errors_count

示例#2

0

显示文件

def check_duplicated_columns(location):
    """
    Return a list of errors for duplicated column names in a CSV file
    at location.
    """
    location = add_unc(location)
    with codecs.open(location, 'rb', encoding='utf-8-sig',
                     errors='replace') as csvfile:
        reader = csv.reader(csvfile)
        columns = next(reader)
        columns = [col for col in columns]

    seen = set()
    dupes = OrderedDict()
    for col in columns:
        c = col.lower()
        if c in seen:
            if c in dupes:
                dupes[c].append(col)
            else:
                dupes[c] = [col]
        seen.add(c.lower())

    errors = []
    if dupes:
        dup_msg = []
        for name, names in dupes.items():
            names = u', '.join(names)
            msg = '%(name)s with %(names)s' % locals()
            dup_msg.append(msg)
        dup_msg = u', '.join(dup_msg)
        msg = ('Duplicated column name(s): %(dup_msg)s\n' % locals() +
               'Please correct the input and re-run.')
        errors.append(Error(ERROR, msg))
    return unique(errors)

示例#3

0

显示文件

文件： test_util.py 项目： oneforthidiot/aboutcode-toolkit

    def test_unique_can_handle_About_object(self):
        base_dir = 'some_dir'
        test = {
            'about_resource': '.',
            'author': '',
            'copyright': 'Copyright (c) 2013-2014 nexB Inc.',
            'custom1': 'some custom',
            'custom_empty': '',
            'description': 'AboutCode is a tool\nfor files.',
            'license': 'apache-2.0',
            'name': 'AboutCode',
            'owner': 'nexB Inc.'
        }

        a = model.About()
        a.load_dict(test, base_dir)

        c = model.About()
        c.load_dict(test, base_dir)

        b = model.About()
        test.update(dict(about_resource='asdasdasd'))
        b.load_dict(test, base_dir)

        abouts = [a, b]
        results = util.unique(abouts)
        assert [a] == results

示例#4

0

显示文件

文件： cmd.py 项目： oneforthidiot/aboutcode-toolkit

def inventory(location, output, format, quiet, verbose):  # NOQA
    """
Collect the inventory of .ABOUT file data as CSV or JSON.

LOCATION: Path to an .ABOUT file or a directory with .ABOUT files.

OUTPUT: Path to the JSON or CSV inventory file to create.
    """
    if not quiet:
        print_version()
        click.echo('Collecting inventory from ABOUT files...')

    if location.lower().endswith('.zip'):
        # accept zipped ABOUT files as input
        location = extract_zip(location)
    errors, abouts = collect_inventory(location)
    write_errors = write_output(abouts=abouts, location=output, format=format)
    errors.extend(write_errors)
    errors = unique(errors)
    errors_count = report_errors(errors,
                                 quiet,
                                 verbose,
                                 log_file_loc=output + '-error.log')
    if not quiet:
        msg = 'Inventory collected in {output}.'.format(**locals())
        click.echo(msg)
    sys.exit(errors_count)

示例#5

0

显示文件

文件： cmd.py 项目： oneforthidiot/aboutcode-toolkit

def check(location, verbose):
    """
Check .ABOUT file(s) at LOCATION for validity and print error messages.

LOCATION: Path to a file or directory containing .ABOUT files.
    """
    print_version()
    click.echo('Checking ABOUT files...')
    errors, _abouts = collect_inventory(location)
    errors = unique(errors)
    severe_errors_count = report_errors(errors, quiet=False, verbose=verbose)
    sys.exit(severe_errors_count)

示例#6

0

显示文件

文件： cmd.py 项目： srthkdb/aboutcode-toolkit

def report_errors(errors, quiet, verbose, log_file_loc=None):
    """
    Report the `errors` list of Error objects to screen based on the `quiet` and
    `verbose` flags.

    If `log_file_loc` file location is provided also write a verbose log to this
    file.
    Return True if there were severe error reported.
    """
    errors = unique(errors)
    messages, severe_errors_count = get_error_messages(errors, quiet, verbose)
    for msg in messages:
        click.echo(msg)
    if log_file_loc:
        log_msgs, _ = get_error_messages(errors, quiet=False, verbose=True)
        with io.open(log_file_loc, 'w', encoding='utf-8') as lf:
            lf.write('\n'.join(log_msgs))
    return severe_errors_count

示例#7

0

显示文件

文件： cmd.py 项目： oneforthidiot/aboutcode-toolkit

def transform(location, output, configuration, quiet, verbose):  # NOQA
    """
Transform the CSV/JSON file at LOCATION by applying renamings, filters and checks
and write a new CSV/JSON to OUTPUT.

LOCATION: Path to a CSV/JSON file.

OUTPUT: Path to CSV/JSON inventory file to create.
    """
    from attributecode.transform import transform_csv_to_csv
    from attributecode.transform import transform_json_to_json
    from attributecode.transform import Transformer

    if not configuration:
        transformer = Transformer.default()
    else:
        transformer = Transformer.from_file(configuration)

    if location.endswith('.csv') and output.endswith('.csv'):
        errors = transform_csv_to_csv(location, output, transformer)
    elif location.endswith('.json') and output.endswith('.json'):
        errors = transform_json_to_json(location, output, transformer)
    else:
        msg = 'Extension for the input and output need to be the same.'
        click.echo(msg)
        sys.exit()

    if not quiet:
        print_version()
        click.echo('Transforming...')

    errors = unique(errors)
    errors_count = report_errors(errors,
                                 quiet,
                                 verbose,
                                 log_file_loc=output + '-error.log')
    if not quiet and not errors:
        msg = 'Transformed file written to {output}.'.format(**locals())
        click.echo(msg)
    sys.exit(errors_count)

示例#8

0

显示文件

文件： cmd.py 项目： oneforthidiot/aboutcode-toolkit

def attrib(location, output, template, vartext, quiet, verbose):
    """
Generate an attribution document at OUTPUT using .ABOUT files at LOCATION.

LOCATION: Path to a file, directory or .zip archive containing .ABOUT files.

OUTPUT: Path where to write the attribution document.
    """
    if not quiet:
        print_version()
        click.echo('Generating attribution...')

    # accept zipped ABOUT files as input
    if location.lower().endswith('.zip'):
        location = extract_zip(location)

    errors, abouts = collect_inventory(location)

    attrib_errors, rendered = generate_attribution_doc(
        abouts=abouts,
        output_location=output,
        template_loc=template,
        variables=vartext,
    )
    errors.extend(attrib_errors)
    errors = unique(errors)
    errors_count = report_errors(errors,
                                 quiet,
                                 verbose,
                                 log_file_loc=output + '-error.log')

    if not quiet:
        if rendered:
            msg = 'Attribution generated in: {output}'.format(**locals())
            click.echo(msg)
        else:
            msg = 'Attribution generation failed.'
            click.echo(msg)
    sys.exit(errors_count)

示例#9

0

显示文件

文件： cmd.py 项目： oneforthidiot/aboutcode-toolkit

def gen(location, output, android, fetch_license, reference, quiet, verbose):
    """
Generate .ABOUT files in OUTPUT from an inventory of .ABOUT files at LOCATION.

LOCATION: Path to a JSON or CSV inventory file.

OUTPUT: Path to a directory where ABOUT files are generated.
    """
    if not quiet:
        print_version()
        click.echo('Generating .ABOUT files...')

    #FIXME: This should be checked in the `click`
    if not location.endswith((
            '.csv',
            '.json',
    )):
        raise click.UsageError(
            'ERROR: Invalid input file extension: must be one .csv or .json.')

    errors, abouts = generate_about_files(
        location=location,
        base_dir=output,
        android=android,
        reference_dir=reference,
        fetch_license=fetch_license,
    )

    errors = unique(errors)
    errors_count = report_errors(errors,
                                 quiet,
                                 verbose,
                                 log_file_loc=output + '-error.log')
    if not quiet:
        abouts_count = len(abouts)
        msg = '{abouts_count} .ABOUT files generated in {output}.'.format(
            **locals())
        click.echo(msg)
    sys.exit(errors_count)

示例#10

0

显示文件

def generate(location,
             base_dir,
             android=None,
             reference_dir=None,
             fetch_license=False):
    """
    Load ABOUT data from a CSV inventory at `location`. Write ABOUT files to
    base_dir. Return errors and about objects.
    """
    not_exist_errors = []
    notice_dict = {}
    api_url = ''
    api_key = ''
    gen_license = False
    # FIXME: use two different arguments: key and url
    # Check if the fetch_license contains valid argument
    if fetch_license:
        # Strip the ' and " for api_url, and api_key from input
        api_url = fetch_license[0].strip("'").strip('"')
        api_key = fetch_license[1].strip("'").strip('"')
        gen_license = True

    # TODO: WHY use posix??
    bdir = to_posix(base_dir)

    errors, abouts = load_inventory(location=location,
                                    base_dir=bdir,
                                    reference_dir=reference_dir)

    if gen_license:
        license_dict, err = model.pre_process_and_fetch_license_dict(
            abouts, api_url, api_key)
        if err:
            for e in err:
                # Avoid having same error multiple times
                if not e in errors:
                    errors.append(e)

    for about in abouts:
        if about.about_file_path.startswith('/'):
            about.about_file_path = about.about_file_path.lstrip('/')
        dump_loc = join(bdir, about.about_file_path.lstrip('/'))

        # The following code is to check if there is any directory ends with spaces
        split_path = about.about_file_path.split('/')
        dir_endswith_space = False
        for segment in split_path:
            if segment.endswith(' '):
                msg = (
                    u'File path : '
                    u'%(dump_loc)s '
                    u'contains directory name ends with spaces which is not '
                    u'allowed. Generation skipped.' % locals())
                errors.append(Error(ERROR, msg))
                dir_endswith_space = True
                break
        if dir_endswith_space:
            # Continue to work on the next about object
            continue

        try:
            # Generate value for 'about_resource' if it does not exist
            if not about.about_resource.value:
                about.about_resource.value = OrderedDict()
                about_resource_value = ''
                if about.about_file_path.endswith('/'):
                    about_resource_value = u'.'
                else:
                    about_resource_value = basename(about.about_file_path)
                about.about_resource.value[about_resource_value] = None
                about.about_resource.present = True
                # Check for the existence of the 'about_resource'
                # If the input already have the 'about_resource' field, it will
                # be validated when creating the about object
                loc = util.to_posix(dump_loc)
                about_file_loc = loc
                path = join(dirname(util.to_posix(about_file_loc)),
                            about_resource_value)
                if not exists(path):
                    path = util.to_posix(path.strip(UNC_PREFIX_POSIX))
                    path = normpath(path)
                    msg = (u'Field about_resource: '
                           u'%(path)s '
                           u'does not exist' % locals())
                    not_exist_errors.append(msg)

            if gen_license:
                # Write generated LICENSE file
                license_key_name_context_url_list = about.dump_lic(
                    dump_loc, license_dict)
                if license_key_name_context_url_list:
                    # use value not "presence"
                    if not about.license_file.present:
                        about.license_file.value = OrderedDict()
                        for lic_key, lic_name, lic_context, lic_url in license_key_name_context_url_list:
                            gen_license_name = lic_key + u'.LICENSE'
                            about.license_file.value[
                                gen_license_name] = lic_context
                            about.license_file.present = True
                            if not about.license_name.present:
                                about.license_name.value.append(lic_name)
                            if not about.license_url.present:
                                about.license_url.value.append(lic_url)
                        if about.license_url.value:
                            about.license_url.present = True
                        if about.license_name.value:
                            about.license_name.present = True

            about.dump(dump_loc)

            if android:
                """
                Create MODULE_LICENSE_XXX and get context to create NOTICE file
                follow the standard from Android Open Source Project
                """
                import os
                parent_path = os.path.dirname(util.to_posix(dump_loc))

                about.android_module_license(parent_path)
                notice_path, notice_context = about.android_notice(parent_path)
                if notice_path in notice_dict.keys():
                    notice_dict[notice_path] += '\n\n' + notice_context
                else:
                    notice_dict[notice_path] = notice_context

            for e in not_exist_errors:
                errors.append(Error(INFO, e))

        except Exception as e:
            # only keep the first 100 char of the exception
            # TODO: truncated errors are likely making diagnotics harder
            emsg = repr(e)[:100]
            msg = (u'Failed to write .ABOUT file at : '
                   u'%(dump_loc)s '
                   u'with error: %(emsg)s' % locals())
            errors.append(Error(ERROR, msg))

    if android:
        # Check if there is already a NOTICE file present
        for path in notice_dict.keys():
            if os.path.exists(path):
                msg = (u'NOTICE file already exist at: %s' % path)
                errors.append(Error(ERROR, msg))
            else:
                about.dump_android_notice(path, notice_dict[path])

    return unique(errors), abouts

示例#11

0

显示文件

def load_inventory(location, base_dir, reference_dir=None):
    """
    Load the inventory file at `location` for ABOUT and LICENSE files stored in
    the `base_dir`. Return a list of errors and a list of About objects
    validated against the `base_dir`.

    Optionally use `reference_dir` as the directory location of extra reference
    license and notice files to reuse.
    """
    errors = []
    abouts = []
    base_dir = util.to_posix(base_dir)
    # FIXME: do not mix up CSV and JSON
    if location.endswith('.csv'):
        # FIXME: this should not be done here.
        dup_cols_err = check_duplicated_columns(location)
        if dup_cols_err:
            errors.extend(dup_cols_err)
            return errors, abouts
        inventory = util.load_csv(location)
    else:
        inventory = util.load_json(location)

    try:
        # FIXME: this should not be done here.
        dup_about_resource_err = check_duplicated_about_resource(inventory)
        if dup_about_resource_err:
            errors.extend(dup_about_resource_err)
            return errors, abouts
        newline_in_file = check_newline_in_file_field(inventory)
        if newline_in_file:
            errors.extend(newline_in_file)
            return errors, abouts
    except Exception as e:
        # TODO: why catch ALL Exception
        msg = "The essential field 'about_resource' is not found in the <input>"
        errors.append(Error(CRITICAL, msg))
        return errors, abouts

    for i, fields in enumerate(inventory):
        # check does the input contains the required fields
        required_fields = model.About.required_fields

        for f in required_fields:
            if f not in fields:
                msg = "Required field: %(f)r not found in the <input>" % locals(
                )
                errors.append(Error(ERROR, msg))
                return errors, abouts
        afp = fields.get(model.About.ABOUT_RESOURCE_ATTR)

        # FIXME: this should not be a failure condition
        if not afp or not afp.strip():
            msg = 'Empty column: %(afp)r. Cannot generate .ABOUT file.' % locals(
            )
            errors.append(Error(ERROR, msg))
            continue
        else:
            afp = util.to_posix(afp)
            loc = join(base_dir, afp)
        about = model.About(about_file_path=afp)
        about.location = loc

        # Update value for 'about_resource'
        # keep only the filename or '.' if it's a directory
        if 'about_resource' in fields:
            updated_resource_value = u''
            resource_path = fields['about_resource']
            if resource_path.endswith(u'/'):
                updated_resource_value = u'.'
            else:
                updated_resource_value = basename(resource_path)
            fields['about_resource'] = updated_resource_value

        ld_errors = about.load_dict(
            fields,
            base_dir,
            running_inventory=False,
            reference_dir=reference_dir,
        )
        """
        # 'about_resource' field will be generated during the process.
        # No error need to be raise for the missing 'about_resource'.
        for e in ld_errors:
            if e.message == 'Field about_resource is required':
                ld_errors.remove(e)
        """
        for e in ld_errors:
            if not e in errors:
                errors.extend(ld_errors)
        abouts.append(about)

    return unique(errors), abouts

示例#12

0

显示文件

文件： test_util.py 项目： oneforthidiot/aboutcode-toolkit

 def test_unique_does_deduplicate_and_keep_ordering(self):
     items = ['a', 'b', 'd', 'b', 'c', 'a']
     expected = ['a', 'b', 'd', 'c']
     results = util.unique(items)
     assert expected == results

示例#13

0

显示文件

def attributecode(input, output, configuration, djc, scancode,
                  min_license_score, reference, template, vartext, quiet,
                  verbose):
    """
    Generate attribution from a JSON, CSV or Excel file.
    """
    if scancode:
        if not input.endswith('.json'):
            msg = 'The input file from scancode toolkit needs to be in JSON format.'
            click.echo(msg)
            sys.exit(1)
        if not min_license_score:
            min_license_score = DEFAULT_LICENSE_SCORE

    if min_license_score:
        if not scancode:
            msg = (
                'This option requires a JSON file generated by scancode toolkit as the input. '
                + 'The "--scancode" option is required.')
            click.echo(msg)
            sys.exit(1)

    errors, abouts = load_inventory(location=input,
                                    configuration=configuration,
                                    scancode=scancode,
                                    reference_dir=reference)

    license_dict, lic_errors = pre_process_and_fetch_license_dict(
        abouts, djc, scancode, reference)
    errors.extend(lic_errors)
    sorted_license_dict = sorted(license_dict)

    # Read the license_file and store in a dictionary
    for about in abouts:
        if about.license_file.value or about.notice_file.value:
            if not reference:
                msg = '"license_file" / "notice_file" field contains value. Use `--reference` to indicate its parent directory.'
                click.echo(msg)
                sys.exit(1)
            if about.license_file.value:
                file_name = about.license_file.value
                error, text = get_file_text(file_name, reference)
                if not error:
                    about.license_file.value = {}
                    about.license_file.value[file_name] = text
                else:
                    errors.append(error)
            if about.notice_file.value:
                file_name = about.notice_file.value
                error, text = get_file_text(file_name, reference)
                if not error:
                    about.notice_file.value = {}
                    about.notice_file.value[file_name] = text
                else:
                    errors.append(error)

    rendered = ''
    if abouts:
        attrib_errors, rendered = generate_attribution_doc(
            abouts=abouts,
            license_dict=dict(sorted(license_dict.items())),
            output_location=output,
            min_license_score=min_license_score,
            template_loc=template,
            variables=vartext,
        )
        errors.extend(attrib_errors)

    errors = unique(errors)
    errors_count = report_errors(errors,
                                 quiet,
                                 verbose,
                                 log_file_loc=output + '-error.log')

    if rendered:
        # Check if the default template is used
        import filecmp
        default_template = os.path.join(
            os.path.abspath(os.path.dirname(__file__)),
            '../../templates/default_html.template')
        if filecmp.cmp(default_template, template):
            num_comps = number_of_component_generated_from_default_template(
                output)
            msg = '{num_comps} component(s) is/are in the generated attribution at the {output}'.format(
                **locals())
        else:
            msg = 'Attribution generated at: {output}'.format(**locals())
        click.echo(msg)
    else:
        msg = 'Attribution generation failed.'
        click.echo(msg)

    sys.exit(errors_count)