def get_error_messages(errors, quiet=False, verbose=False): """ Return a tuple of (list of error message strings to report, severe_errors_count) given an `errors` list of Error objects and using the `quiet` and `verbose` flags. """ errors = unique(errors) severe_errors = filter_errors(errors, WARNING) severe_errors_count = len(severe_errors) messages = [] if severe_errors and not quiet: error_msg = 'Command completed with {} errors or warnings.'.format( severe_errors_count) messages.append(error_msg) for severity, message in errors: sevcode = severities.get(severity) or 'UNKNOWN' msg = '{sevcode}: {message}'.format(**locals()) if not quiet: if verbose: messages.append(msg) elif severity >= WARNING: messages.append(msg) return messages, severe_errors_count
def check_duplicated_columns(location): """ Return a list of errors for duplicated column names in a CSV file at location. """ location = add_unc(location) with codecs.open(location, 'rb', encoding='utf-8-sig', errors='replace') as csvfile: reader = csv.reader(csvfile) columns = next(reader) columns = [col for col in columns] seen = set() dupes = OrderedDict() for col in columns: c = col.lower() if c in seen: if c in dupes: dupes[c].append(col) else: dupes[c] = [col] seen.add(c.lower()) errors = [] if dupes: dup_msg = [] for name, names in dupes.items(): names = u', '.join(names) msg = '%(name)s with %(names)s' % locals() dup_msg.append(msg) dup_msg = u', '.join(dup_msg) msg = ('Duplicated column name(s): %(dup_msg)s\n' % locals() + 'Please correct the input and re-run.') errors.append(Error(ERROR, msg)) return unique(errors)
def test_unique_can_handle_About_object(self): base_dir = 'some_dir' test = { 'about_resource': '.', 'author': '', 'copyright': 'Copyright (c) 2013-2014 nexB Inc.', 'custom1': 'some custom', 'custom_empty': '', 'description': 'AboutCode is a tool\nfor files.', 'license': 'apache-2.0', 'name': 'AboutCode', 'owner': 'nexB Inc.' } a = model.About() a.load_dict(test, base_dir) c = model.About() c.load_dict(test, base_dir) b = model.About() test.update(dict(about_resource='asdasdasd')) b.load_dict(test, base_dir) abouts = [a, b] results = util.unique(abouts) assert [a] == results
def inventory(location, output, format, quiet, verbose): # NOQA """ Collect the inventory of .ABOUT file data as CSV or JSON. LOCATION: Path to an .ABOUT file or a directory with .ABOUT files. OUTPUT: Path to the JSON or CSV inventory file to create. """ if not quiet: print_version() click.echo('Collecting inventory from ABOUT files...') if location.lower().endswith('.zip'): # accept zipped ABOUT files as input location = extract_zip(location) errors, abouts = collect_inventory(location) write_errors = write_output(abouts=abouts, location=output, format=format) errors.extend(write_errors) errors = unique(errors) errors_count = report_errors(errors, quiet, verbose, log_file_loc=output + '-error.log') if not quiet: msg = 'Inventory collected in {output}.'.format(**locals()) click.echo(msg) sys.exit(errors_count)
def check(location, verbose): """ Check .ABOUT file(s) at LOCATION for validity and print error messages. LOCATION: Path to a file or directory containing .ABOUT files. """ print_version() click.echo('Checking ABOUT files...') errors, _abouts = collect_inventory(location) errors = unique(errors) severe_errors_count = report_errors(errors, quiet=False, verbose=verbose) sys.exit(severe_errors_count)
def report_errors(errors, quiet, verbose, log_file_loc=None): """ Report the `errors` list of Error objects to screen based on the `quiet` and `verbose` flags. If `log_file_loc` file location is provided also write a verbose log to this file. Return True if there were severe error reported. """ errors = unique(errors) messages, severe_errors_count = get_error_messages(errors, quiet, verbose) for msg in messages: click.echo(msg) if log_file_loc: log_msgs, _ = get_error_messages(errors, quiet=False, verbose=True) with io.open(log_file_loc, 'w', encoding='utf-8') as lf: lf.write('\n'.join(log_msgs)) return severe_errors_count
def transform(location, output, configuration, quiet, verbose): # NOQA """ Transform the CSV/JSON file at LOCATION by applying renamings, filters and checks and write a new CSV/JSON to OUTPUT. LOCATION: Path to a CSV/JSON file. OUTPUT: Path to CSV/JSON inventory file to create. """ from attributecode.transform import transform_csv_to_csv from attributecode.transform import transform_json_to_json from attributecode.transform import Transformer if not configuration: transformer = Transformer.default() else: transformer = Transformer.from_file(configuration) if location.endswith('.csv') and output.endswith('.csv'): errors = transform_csv_to_csv(location, output, transformer) elif location.endswith('.json') and output.endswith('.json'): errors = transform_json_to_json(location, output, transformer) else: msg = 'Extension for the input and output need to be the same.' click.echo(msg) sys.exit() if not quiet: print_version() click.echo('Transforming...') errors = unique(errors) errors_count = report_errors(errors, quiet, verbose, log_file_loc=output + '-error.log') if not quiet and not errors: msg = 'Transformed file written to {output}.'.format(**locals()) click.echo(msg) sys.exit(errors_count)
def attrib(location, output, template, vartext, quiet, verbose): """ Generate an attribution document at OUTPUT using .ABOUT files at LOCATION. LOCATION: Path to a file, directory or .zip archive containing .ABOUT files. OUTPUT: Path where to write the attribution document. """ if not quiet: print_version() click.echo('Generating attribution...') # accept zipped ABOUT files as input if location.lower().endswith('.zip'): location = extract_zip(location) errors, abouts = collect_inventory(location) attrib_errors, rendered = generate_attribution_doc( abouts=abouts, output_location=output, template_loc=template, variables=vartext, ) errors.extend(attrib_errors) errors = unique(errors) errors_count = report_errors(errors, quiet, verbose, log_file_loc=output + '-error.log') if not quiet: if rendered: msg = 'Attribution generated in: {output}'.format(**locals()) click.echo(msg) else: msg = 'Attribution generation failed.' click.echo(msg) sys.exit(errors_count)
def gen(location, output, android, fetch_license, reference, quiet, verbose): """ Generate .ABOUT files in OUTPUT from an inventory of .ABOUT files at LOCATION. LOCATION: Path to a JSON or CSV inventory file. OUTPUT: Path to a directory where ABOUT files are generated. """ if not quiet: print_version() click.echo('Generating .ABOUT files...') #FIXME: This should be checked in the `click` if not location.endswith(( '.csv', '.json', )): raise click.UsageError( 'ERROR: Invalid input file extension: must be one .csv or .json.') errors, abouts = generate_about_files( location=location, base_dir=output, android=android, reference_dir=reference, fetch_license=fetch_license, ) errors = unique(errors) errors_count = report_errors(errors, quiet, verbose, log_file_loc=output + '-error.log') if not quiet: abouts_count = len(abouts) msg = '{abouts_count} .ABOUT files generated in {output}.'.format( **locals()) click.echo(msg) sys.exit(errors_count)
def generate(location, base_dir, android=None, reference_dir=None, fetch_license=False): """ Load ABOUT data from a CSV inventory at `location`. Write ABOUT files to base_dir. Return errors and about objects. """ not_exist_errors = [] notice_dict = {} api_url = '' api_key = '' gen_license = False # FIXME: use two different arguments: key and url # Check if the fetch_license contains valid argument if fetch_license: # Strip the ' and " for api_url, and api_key from input api_url = fetch_license[0].strip("'").strip('"') api_key = fetch_license[1].strip("'").strip('"') gen_license = True # TODO: WHY use posix?? bdir = to_posix(base_dir) errors, abouts = load_inventory(location=location, base_dir=bdir, reference_dir=reference_dir) if gen_license: license_dict, err = model.pre_process_and_fetch_license_dict( abouts, api_url, api_key) if err: for e in err: # Avoid having same error multiple times if not e in errors: errors.append(e) for about in abouts: if about.about_file_path.startswith('/'): about.about_file_path = about.about_file_path.lstrip('/') dump_loc = join(bdir, about.about_file_path.lstrip('/')) # The following code is to check if there is any directory ends with spaces split_path = about.about_file_path.split('/') dir_endswith_space = False for segment in split_path: if segment.endswith(' '): msg = ( u'File path : ' u'%(dump_loc)s ' u'contains directory name ends with spaces which is not ' u'allowed. Generation skipped.' % locals()) errors.append(Error(ERROR, msg)) dir_endswith_space = True break if dir_endswith_space: # Continue to work on the next about object continue try: # Generate value for 'about_resource' if it does not exist if not about.about_resource.value: about.about_resource.value = OrderedDict() about_resource_value = '' if about.about_file_path.endswith('/'): about_resource_value = u'.' else: about_resource_value = basename(about.about_file_path) about.about_resource.value[about_resource_value] = None about.about_resource.present = True # Check for the existence of the 'about_resource' # If the input already have the 'about_resource' field, it will # be validated when creating the about object loc = util.to_posix(dump_loc) about_file_loc = loc path = join(dirname(util.to_posix(about_file_loc)), about_resource_value) if not exists(path): path = util.to_posix(path.strip(UNC_PREFIX_POSIX)) path = normpath(path) msg = (u'Field about_resource: ' u'%(path)s ' u'does not exist' % locals()) not_exist_errors.append(msg) if gen_license: # Write generated LICENSE file license_key_name_context_url_list = about.dump_lic( dump_loc, license_dict) if license_key_name_context_url_list: # use value not "presence" if not about.license_file.present: about.license_file.value = OrderedDict() for lic_key, lic_name, lic_context, lic_url in license_key_name_context_url_list: gen_license_name = lic_key + u'.LICENSE' about.license_file.value[ gen_license_name] = lic_context about.license_file.present = True if not about.license_name.present: about.license_name.value.append(lic_name) if not about.license_url.present: about.license_url.value.append(lic_url) if about.license_url.value: about.license_url.present = True if about.license_name.value: about.license_name.present = True about.dump(dump_loc) if android: """ Create MODULE_LICENSE_XXX and get context to create NOTICE file follow the standard from Android Open Source Project """ import os parent_path = os.path.dirname(util.to_posix(dump_loc)) about.android_module_license(parent_path) notice_path, notice_context = about.android_notice(parent_path) if notice_path in notice_dict.keys(): notice_dict[notice_path] += '\n\n' + notice_context else: notice_dict[notice_path] = notice_context for e in not_exist_errors: errors.append(Error(INFO, e)) except Exception as e: # only keep the first 100 char of the exception # TODO: truncated errors are likely making diagnotics harder emsg = repr(e)[:100] msg = (u'Failed to write .ABOUT file at : ' u'%(dump_loc)s ' u'with error: %(emsg)s' % locals()) errors.append(Error(ERROR, msg)) if android: # Check if there is already a NOTICE file present for path in notice_dict.keys(): if os.path.exists(path): msg = (u'NOTICE file already exist at: %s' % path) errors.append(Error(ERROR, msg)) else: about.dump_android_notice(path, notice_dict[path]) return unique(errors), abouts
def load_inventory(location, base_dir, reference_dir=None): """ Load the inventory file at `location` for ABOUT and LICENSE files stored in the `base_dir`. Return a list of errors and a list of About objects validated against the `base_dir`. Optionally use `reference_dir` as the directory location of extra reference license and notice files to reuse. """ errors = [] abouts = [] base_dir = util.to_posix(base_dir) # FIXME: do not mix up CSV and JSON if location.endswith('.csv'): # FIXME: this should not be done here. dup_cols_err = check_duplicated_columns(location) if dup_cols_err: errors.extend(dup_cols_err) return errors, abouts inventory = util.load_csv(location) else: inventory = util.load_json(location) try: # FIXME: this should not be done here. dup_about_resource_err = check_duplicated_about_resource(inventory) if dup_about_resource_err: errors.extend(dup_about_resource_err) return errors, abouts newline_in_file = check_newline_in_file_field(inventory) if newline_in_file: errors.extend(newline_in_file) return errors, abouts except Exception as e: # TODO: why catch ALL Exception msg = "The essential field 'about_resource' is not found in the <input>" errors.append(Error(CRITICAL, msg)) return errors, abouts for i, fields in enumerate(inventory): # check does the input contains the required fields required_fields = model.About.required_fields for f in required_fields: if f not in fields: msg = "Required field: %(f)r not found in the <input>" % locals( ) errors.append(Error(ERROR, msg)) return errors, abouts afp = fields.get(model.About.ABOUT_RESOURCE_ATTR) # FIXME: this should not be a failure condition if not afp or not afp.strip(): msg = 'Empty column: %(afp)r. Cannot generate .ABOUT file.' % locals( ) errors.append(Error(ERROR, msg)) continue else: afp = util.to_posix(afp) loc = join(base_dir, afp) about = model.About(about_file_path=afp) about.location = loc # Update value for 'about_resource' # keep only the filename or '.' if it's a directory if 'about_resource' in fields: updated_resource_value = u'' resource_path = fields['about_resource'] if resource_path.endswith(u'/'): updated_resource_value = u'.' else: updated_resource_value = basename(resource_path) fields['about_resource'] = updated_resource_value ld_errors = about.load_dict( fields, base_dir, running_inventory=False, reference_dir=reference_dir, ) """ # 'about_resource' field will be generated during the process. # No error need to be raise for the missing 'about_resource'. for e in ld_errors: if e.message == 'Field about_resource is required': ld_errors.remove(e) """ for e in ld_errors: if not e in errors: errors.extend(ld_errors) abouts.append(about) return unique(errors), abouts
def test_unique_does_deduplicate_and_keep_ordering(self): items = ['a', 'b', 'd', 'b', 'c', 'a'] expected = ['a', 'b', 'd', 'c'] results = util.unique(items) assert expected == results
def attributecode(input, output, configuration, djc, scancode, min_license_score, reference, template, vartext, quiet, verbose): """ Generate attribution from a JSON, CSV or Excel file. """ if scancode: if not input.endswith('.json'): msg = 'The input file from scancode toolkit needs to be in JSON format.' click.echo(msg) sys.exit(1) if not min_license_score: min_license_score = DEFAULT_LICENSE_SCORE if min_license_score: if not scancode: msg = ( 'This option requires a JSON file generated by scancode toolkit as the input. ' + 'The "--scancode" option is required.') click.echo(msg) sys.exit(1) errors, abouts = load_inventory(location=input, configuration=configuration, scancode=scancode, reference_dir=reference) license_dict, lic_errors = pre_process_and_fetch_license_dict( abouts, djc, scancode, reference) errors.extend(lic_errors) sorted_license_dict = sorted(license_dict) # Read the license_file and store in a dictionary for about in abouts: if about.license_file.value or about.notice_file.value: if not reference: msg = '"license_file" / "notice_file" field contains value. Use `--reference` to indicate its parent directory.' click.echo(msg) sys.exit(1) if about.license_file.value: file_name = about.license_file.value error, text = get_file_text(file_name, reference) if not error: about.license_file.value = {} about.license_file.value[file_name] = text else: errors.append(error) if about.notice_file.value: file_name = about.notice_file.value error, text = get_file_text(file_name, reference) if not error: about.notice_file.value = {} about.notice_file.value[file_name] = text else: errors.append(error) rendered = '' if abouts: attrib_errors, rendered = generate_attribution_doc( abouts=abouts, license_dict=dict(sorted(license_dict.items())), output_location=output, min_license_score=min_license_score, template_loc=template, variables=vartext, ) errors.extend(attrib_errors) errors = unique(errors) errors_count = report_errors(errors, quiet, verbose, log_file_loc=output + '-error.log') if rendered: # Check if the default template is used import filecmp default_template = os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../templates/default_html.template') if filecmp.cmp(default_template, template): num_comps = number_of_component_generated_from_default_template( output) msg = '{num_comps} component(s) is/are in the generated attribution at the {output}'.format( **locals()) else: msg = 'Attribution generated at: {output}'.format(**locals()) click.echo(msg) else: msg = 'Attribution generation failed.' click.echo(msg) sys.exit(errors_count)