def test_get_urls(self): test_file = self.get_test_loc('api/url/IMarkerActionFilter.java') results = api.get_urls(test_file) expected = dict(urls=[ OrderedDict([(u'url', u'http://www.eclipse.org/legal/epl-v10.html'), (u'start_line', 2), (u'end_line', 2)]), OrderedDict([(u'url', u'https://github.com/rpm-software-management'), (u'start_line', 4), (u'end_line', 4)]), OrderedDict([(u'url', u'https://gitlab.com/Conan_Kudo'), (u'start_line', 6), (u'end_line', 6)]), ]) assert expected == results results = api.get_urls(test_file, threshold=0) assert expected == results
def test_get_urls_with_threshold(self): test_file = self.get_test_loc('api/url/IMarkerActionFilter.java') expected = dict(urls=[ OrderedDict([(u'url', u'http://www.eclipse.org/legal/epl-v10.html'), (u'start_line', 2), (u'end_line', 2)]) ]) results = api.get_urls(test_file, threshold=1) assert expected == results
def check_ignorable_clues(rule): """ Validate that all ignorable clues defined in a `rule` Rule object are properly detected in that rule text file. """ from itertools import chain from scancode import api text_file = rule.text_file # scan clues scan_data = {} scan_data.update(api.get_copyrights(text_file)) scan_data.update(api.get_urls(text_file, threshold=0)) scan_data.update(api.get_emails(text_file, threshold=0)) results = OrderedDict() for what, detections in scan_data.items(): # remove lines for detected in detections: detected.pop('start_line', None) detected.pop('end_line', None) # remove keys and keep only values e.g. a list of detected copyrights, # emails, etc detections = sorted( set(chain(*(detected.values() for detected in detections)))) results['ignorable_' + what] = detections # collect ignorables expected = OrderedDict([ ('ignorable_copyrights', rule.ignorable_copyrights or []), ('ignorable_holders', rule.ignorable_holders or []), ('ignorable_authors', rule.ignorable_authors or []), ('ignorable_urls', rule.ignorable_urls or []), ('ignorable_emails', rule.ignorable_emails or []), ]) results = OrderedDict([(k, v) for k, v in sorted(results.items()) if v]) expected = OrderedDict([(k, v) for k, v in sorted(expected.items()) if v]) try: assert expected == results except: # On failure, we compare againto get additional failure details such as # a clickable text_file path data_file = rule.data_file if not data_file: data_file = text_file.replace('.LICENSE', '.yml') results['files'] = [ 'file://{data_file}'.format(**locals()), 'file://{text_file}'.format(**locals()), ] # this assert will always fail and provide a more detailed failure trace assert saneyaml.dump(expected) == saneyaml.dump(results)