def check_ignorable_clues(rule): """ Validate that all ignorable clues defined in a `rule` Rule object are properly detected in that rule text file. """ from itertools import chain from scancode import api text_file = rule.text_file # scan clues scan_data = {} scan_data.update(api.get_copyrights(text_file)) scan_data.update(api.get_urls(text_file, threshold=0)) scan_data.update(api.get_emails(text_file, threshold=0)) results = OrderedDict() for what, detections in scan_data.items(): # remove lines for detected in detections: detected.pop('start_line', None) detected.pop('end_line', None) # remove keys and keep only values e.g. a list of detected copyrights, # emails, etc detections = sorted( set(chain(*(detected.values() for detected in detections)))) results['ignorable_' + what] = detections # collect ignorables expected = OrderedDict([ ('ignorable_copyrights', rule.ignorable_copyrights or []), ('ignorable_holders', rule.ignorable_holders or []), ('ignorable_authors', rule.ignorable_authors or []), ('ignorable_urls', rule.ignorable_urls or []), ('ignorable_emails', rule.ignorable_emails or []), ]) results = OrderedDict([(k, v) for k, v in sorted(results.items()) if v]) expected = OrderedDict([(k, v) for k, v in sorted(expected.items()) if v]) try: assert expected == results except: # On failure, we compare againto get additional failure details such as # a clickable text_file path data_file = rule.data_file if not data_file: data_file = text_file.replace('.LICENSE', '.yml') results['files'] = [ 'file://{data_file}'.format(**locals()), 'file://{text_file}'.format(**locals()), ] # this assert will always fail and provide a more detailed failure trace assert saneyaml.dump(expected) == saneyaml.dump(results)
def test_get_copyrights_include_copyrights_and_authors(self): test_file = self.get_test_loc('api/copyright/iproute.c') cops = list(api.get_copyrights(test_file)) expected = [ OrderedDict([ (u'statements', [u'Copyright (c) 2010 Patrick McHardy']), (u'holders', [u'Patrick McHardy']), (u'authors', []), (u'start_line', 2), (u'end_line', 2)]), OrderedDict([ (u'statements', []), (u'holders', []), (u'authors', [u'Patrick McHardy <*****@*****.**>']), (u'start_line', 11), (u'end_line', 11)]) ] assert expected == cops
def test_get_copyrights_include_copyrights_and_authors(self): test_file = self.get_test_loc('api/copyright/iproute.c') cops = api.get_copyrights(test_file) expected = OrderedDict([ ('copyrights', [ OrderedDict([(u'value', u'Copyright (c) 2010 Patrick McHardy'), (u'start_line', 2), (u'end_line', 2)]) ]), ('holders', [ OrderedDict([(u'value', u'Patrick McHardy'), (u'start_line', 2), (u'end_line', 2)]) ]), ('authors', [ OrderedDict([(u'value', u'Patrick McHardy <*****@*****.**>'), (u'start_line', 11), (u'end_line', 11)]) ]), ]) assert expected == cops
def scan_one(input_file, copyright, license, verbose=False): """ Scan one file and return scanned data. """ if verbose: click.secho('Scanning: %(input_file)s: ' % locals(), nl=False, fg='blue') data = {'location': input_file} if copyright: if verbose: click.secho('copyrights. ', nl=False, fg='green') data['copyrights'] = list(get_copyrights(input_file)) if license: if verbose: click.secho('licenses. ', nl=False, fg='green') data['licenses'] = list(get_licenses(input_file)) if verbose: click.secho('', nl=True) return data
def scan_one(input_file, copyright, license, verbose=False): # @ReservedAssignment """ Scan one file and return scanned data. """ if verbose: click.secho('Scanning: %(input_file)s: ' % locals(), nl=False, fg='blue') data = {'location': input_file} if copyright: if verbose: click.secho('copyrights. ', nl=False, fg='green') data['copyrights'] = list(get_copyrights(input_file)) if license: if verbose: click.secho('licenses. ', nl=False, fg='green') data['licenses'] = list(get_licenses(input_file)) if verbose: click.secho('', nl=True) return data