def __init__(self): self.by_key = load_licenses(with_deprecated=True) self.by_spdx_key = get_licenses_by_spdx_key(self.by_key.values()) # TODO: not yet used foreign_dir = join(licensedcode.models.data_dir, 'non-english', 'licenses') self.non_english_by_key = load_licenses(foreign_dir, with_deprecated=True) self.non_english_by_spdx_key = get_licenses_by_spdx_key(self.non_english_by_key.values())
def test_dump_license(self): test_dir = self.get_test_loc('models/licenses', copy=True) lics = models.load_licenses(test_dir, with_deprecated=True) for l in lics.values(): l.dump() lics = models.load_licenses(test_dir, with_deprecated=True) # Note: one license is obsolete and not loaded. Other are various exception/version cases results = as_sorted_mapping_seq(lics.values()) expected = self.get_test_loc('models/licenses.dump.expected.json') check_json(expected, results)
def test_relocate_license_with_key(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir, with_deprecated=True) new_dir = self.get_temp_dir('relocate_lics') for l in lics.values(): l.relocate(new_dir, new_key='new_key-' + l.key) lics = models.load_licenses(new_dir, with_deprecated=True) # Note: one license is obsolete and not loaded. Other are various exception/version cases results = as_sorted_mapping_seq(lics.values()) expected = self.get_test_loc('models/licenses-new-key.expected.json') check_json(expected, results)
def __init__(self, src_dir, match_text=False, match_approx=False): """ `src_dir` is where the License objects are dumped. """ src_dir = os.path.realpath(src_dir) self.src_dir = src_dir self.match_text = match_text self.match_approx = match_approx self.fetched = False if os.path.exists(src_dir): # fetch ONLY if the directory is empty self.fetched = True else: os.mkdir(src_dir) self.update_dir = self.src_dir.rstrip('\\/') + '-update' if not os.path.exists(self.update_dir): os.mkdir(self.update_dir) self.new_dir = self.src_dir.rstrip('\\/') + '-new' if not os.path.exists(self.new_dir): os.mkdir(self.new_dir) self.del_dir = self.src_dir.rstrip('\\/') + '-del' if not os.path.exists(self.del_dir): os.mkdir(self.del_dir) self.scancodes_by_key = get_licenses_db() self.scancodes_by_spdx_key = { l.spdx_license_key.lower(): l for l in self.scancodes_by_key.values() if l.spdx_license_key } composites_dir = os.path.join(licensedcode.data_dir, 'composites', 'licenses') self.composites_by_key = load_licenses(composites_dir, with_deprecated=True) self.composites_by_spdx_key = { l.spdx_license_key.lower(): l for l in self.composites_by_key.values() if l.spdx_license_key } foreign_dir = os.path.join(licensedcode.data_dir, 'non-english', 'licenses') self.non_english_by_key = load_licenses(foreign_dir, with_deprecated=True) self.non_english_by_spdx_key = { l.spdx_license_key.lower(): l for l in self.non_english_by_key.values() if l.spdx_license_key }
def test_relocate_license(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir, with_deprecated=True) new_dir = self.get_temp_dir('relocate_lics') for l in lics.values(): l.relocate(new_dir) lics = models.load_licenses(new_dir, with_deprecated=True) # Note: one license is obsolete and not loaded. Other are various exception/version cases results = sorted(l.to_dict() for l in lics.values()) expected = self.get_test_loc('models/licenses.expected.json') check_json(expected, results)
def test_rules_from_licenses(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) rules = list(models.rules_from_licenses(lics)) assert 4 == len(rules) for rule in rules: assert 'distribut' in rule.text().lower()
def test_build_rules_from_licenses(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) rules = models.build_rules_from_licenses(lics) results = sorted(r.to_dict() for r in rules) expected = self.get_test_loc('models/license_rules.expected.json') check_json(expected, results)
def test_get_rules_from_license_texts(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) rules = [r for r in models.get_rules_from_license_texts(lics)] self.assertEqual(4, len(rules)) for rule in rules: self.assertTrue('distribut' in rule.text.lower())
def add_spdx_key_rules(): """ Check that every known SPDX license key is properly detected exactly by a license rule. If not, create a new rule """ by_key = load_licenses(with_deprecated=True) by_spdx_key = synclic.get_licenses_by_spdx_key(by_key.values(), include_other=True) click.echo("Checking all SPDX ids.") # first accumulate non-matches unmatched_licenses = {} for spdx_key, license_obj in sorted(by_spdx_key.items()): # click.echo('.', nl=False) if spdx_key in very_common_ids: continue if is_matched(spdx_key, license_obj.key): continue unmatched_licenses[spdx_key] = license_obj click.echo("") click.echo("{} SPDX ids not matched.".format(len(unmatched_licenses))) # then create all rules at once for spdx_key, license_obj in sorted(unmatched_licenses.items()): add_rule(spdx_key, license_obj)
def test_load_license(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) # Note: one license is obsolete and not loaded. Other are various exception/version cases results = as_sorted_mapping_seq(lics.values()) expected = self.get_test_loc('models/licenses.load.expected.json') check_json(expected, results)
def build_index(licenses_db=None, licenses_data_dir=None, rules_data_dir=None): """ Return an index built from rules and licenses directories """ from licensedcode.index import LicenseIndex from licensedcode.models import get_rules from licensedcode.models import get_all_spdx_key_tokens from licensedcode.models import get_license_tokens from licensedcode.models import licenses_data_dir as ldd from licensedcode.models import rules_data_dir as rdd from licensedcode.models import load_licenses from licensedcode.legalese import common_license_words licenses_data_dir = licenses_data_dir or ldd rules_data_dir = rules_data_dir or rdd licenses_db = licenses_db or load_licenses( licenses_data_dir=licenses_data_dir) rules = get_rules(licenses_db=licenses_db, rules_data_dir=rules_data_dir) legalese = common_license_words spdx_tokens = set(get_all_spdx_key_tokens(licenses_db)) license_tokens = set(get_license_tokens()) return LicenseIndex( rules, _legalese=legalese, _spdx_tokens=spdx_tokens, _license_tokens=license_tokens, )
def test_validate_license_library_can_return_errors(self): test_dir = self.get_test_loc('models/validate') lics = models.load_licenses(test_dir) errors, warnings, infos = models.License.validate(lics, no_dupe_urls=True, verbose=True) expected_errors = { 'GLOBAL': [ 'Duplicate texts in multiple licenses:apache-2.0: TEXT, bsd-ack-carrot2: TEXT', 'Duplicate short name:GPL 1.0 in licenses:gpl-1.0-plus, gpl-1.0', 'Duplicate name:GNU General Public License 1.0 in licenses:gpl-1.0-plus, gpl-1.0' ], 'bsd-ack-carrot2': ['No short name', 'No name', 'No category', 'No owner'], 'gpl-1.0': ['Unknown license category: GNU Copyleft'], 'w3c-docs-19990405': ['Unknown license category: Permissive Restricted'] } assert expected_errors == errors expected_warnings = { 'gpl-1.0': [ 'Some empty text_urls values', 'Some empty other_urls values', 'Homepage URL also in text_urls', 'Homepage URL also in other_urls', 'Homepage URL same as faq_url', 'Homepage URL same as osi_url', 'osi_url same as faq_url', 'Some duplicated URLs' ] } assert expected_warnings == warnings expected_infos = {'w3c-docs-19990405': [u'No license text']} assert expected_infos == infos
def test_get_rules_from_license_texts(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) rules = [r for r in models.get_rules_from_license_texts(lics)] assert 4 == len(rules) for rule in rules: assert 'distribut' in rule.text.lower()
def test_load_licenses_fails_if_directory_contains_orphaned_files(self): test_dir = self.get_test_loc('models/orphaned_licenses') try: list(models.load_licenses(test_dir)) self.fail('Exception not raised') except Exception as e: assert 'Some License data or text files are orphaned' in str(e)
def test_build_rules_from_licenses(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) rules = models.build_rules_from_licenses(lics) results = sorted(r.to_dict() for r in rules) expected = self.get_test_loc('models/rules.expected.json') check_json(expected, results)
def get_licenses(self, scancode_licenses): """ Return a mapping of key -> ScanCode License objects either fetched externally or loaded from the existing `self.original_dir` """ print('Fetching and storing external licenses in:', self.original_dir) licenses = [] for lic, text in self.fetch_licenses(scancode_licenses): try: with io.open(lic.text_file, 'w', encoding='utf-8') as tf: tf.write(text) lic.dump() licenses.append(lic) except: if TRACE: print() print(repr(lic)) raise print('Stored %d external licenses in: %r.' % ( len(licenses), self.original_dir, )) print('Modified (or not modified) external licenses will be in: %r.' % (self.update_dir, )) fileutils.copytree(self.original_dir, self.update_dir) print('New external licenses will be in: %r.' % (self.new_dir, )) return load_licenses(self.update_dir, with_deprecated=True)
def test_load_license(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) # Note: one license is obsolete and not loaded. Other are various exception/version cases results = sorted(l.to_dict() for l in lics.values()) expected = self.get_test_loc('models/licenses.expected.json') check_json(expected, results)
def test_build_rules_from_licenses(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) rules = models.build_rules_from_licenses(lics) results = as_sorted_mapping_seq(rules) expected = self.get_test_loc('models/license_rules.expected.json') check_json(expected, results)
def __init__(self, src_dir, match_text=False, match_approx=False): """ `src_dir` is where the License objects are dumped. """ src_dir = os.path.realpath(src_dir) self.src_dir = src_dir self.match_text = match_text self.match_approx = match_approx self.fetched = False if os.path.exists(src_dir): # fetch ONLY if the directory is empty self.fetched = True else: os.mkdir(src_dir) self.update_dir = self.src_dir.rstrip('\\/') + '-update' if not os.path.exists(self.update_dir): os.mkdir(self.update_dir) self.new_dir = self.src_dir.rstrip('\\/') + '-new' if not os.path.exists(self.new_dir): os.mkdir(self.new_dir) self.del_dir = self.src_dir.rstrip('\\/') + '-del' if not os.path.exists(self.del_dir): os.mkdir(self.del_dir) self.scancodes_by_key = get_licenses_db() self.scancodes_by_spdx_key = {l.spdx_license_key.lower(): l for l in self.scancodes_by_key.values() if l.spdx_license_key} composites_dir = os.path.join(licensedcode.data_dir, 'composites', 'licenses') self.composites_by_key = load_licenses(composites_dir, with_deprecated=True) self.composites_by_spdx_key = {l.spdx_license_key.lower(): l for l in self.composites_by_key.values() if l.spdx_license_key} foreign_dir = os.path.join(licensedcode.data_dir, 'non-english', 'licenses') self.non_english_by_key = load_licenses(foreign_dir, with_deprecated=True) self.non_english_by_spdx_key = {l.spdx_license_key.lower(): l for l in self.non_english_by_key.values() if l.spdx_license_key}
def get_licenses(self): """ Return a mapping of key -> ScanCode License objects either fetched externally or loaded from the existing `self.src_dir` """ if self.fetched: print('Reusing (possibly modified) external licenses stored in:', self.update_dir) return load_licenses(self.update_dir, with_deprecated=True) else: print('Fetching and storing external licenses in:', self.src_dir) licenses = {l.key: l for l in self.fetch_licenses()} print('Stored %d external licenses in: %r.' % (len(licenses), self.src_dir,)) fileutils.copytree(self.src_dir, self.update_dir) print('Modified external licenses will be in: %r.' % (self.update_dir,)) print('New external licenses will be in: %r.' % (self.new_dir,)) print('Deleted external licenses will be in: %r.' % (self.del_dir,)) return load_licenses(self.update_dir, with_deprecated=True)
def get_licenses(): """ Load the licenses from the ScanCode-toolkit `licensedcode` data and return a mapping of `key` to `license` objects. The result is cached in memory so the load_licenses() process is only executed once on the first `get_licenses()` call. """ return load_licenses()
def test_validate_license_library_data(self): errors, warnings, infos = models.License.validate( licenses=models.load_licenses(with_deprecated=False), verbose=False, ) assert errors == {} assert warnings == {} assert infos
def test_get_spdx_symbols_fails_on_duplicated_other_spdx_keys(self): test_dir = self.get_test_loc('spdx/db-dupe-other') from licensedcode.models import load_licenses test_licenses = load_licenses(test_dir) try: cache.get_spdx_symbols(_test_licenses=test_licenses) self.fail('ValueError not raised!') except ValueError as e: assert 'Duplicated "other" SPDX license key' in str(e)
def build_unknown_spdx_symbol(licenses_db=None): """ Return the unknown SPDX license symbol given a `licenses_db` mapping of {key: License} or the standard license db. """ from license_expression import LicenseSymbolLike from licensedcode.models import load_licenses licenses_db = licenses_db or load_licenses() return LicenseSymbolLike(licenses_db['unknown-spdx'])
def test_load_license(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) # one license is obsolete and not loaded expected = [u'apache-2.0', u'bsd-ack-carrot2', u'w3c-docs-19990405'] assert expected == sorted(lics.keys()) assert all(isinstance(l, models.License) for l in lics.values()) # test a sample of a licenses field assert '1994-2002 World Wide Web Consortium' in lics[u'w3c-docs-19990405'].text
def test_validate_license_library_can_return_errors(self): test_dir = self.get_test_loc('models/validate') lics = models.load_licenses(test_dir) errors, warnings, infos = models.License.validate( lics, no_dupe_urls=True, verbose=False, ) expected_errors = { 'GLOBAL': [ 'Duplicate texts in multiple licenses: apache-2.0: TEXT, bsd-ack-carrot2: TEXT', 'Duplicate short name (ignoring case): gpl 1.0 in licenses: gpl-1.0-plus, gpl-1.0', 'Duplicate name (ignoring case): gnu general public license 1.0 in licenses: gpl-1.0-plus, gpl-1.0' ], 'bsd-ack-carrot2': [ 'No short name', 'No name', 'No category: Use "Unstated License" if not known.', 'No owner: Use "Unspecified" if not known.', 'No SPDX license key' ], 'foo-2.0': ['Unknown language: foobar', 'No SPDX license key'], 'gpl-1.0': [ 'Unknown license category: GNU Copyleft.\nUse one of these valid categories:\n' 'Commercial\nCopyleft\nCopyleft Limited\nFree Restricted\n' 'Patent License\nPermissive\nProprietary Free\nPublic Domain\nSource-available\nUnstated License', 'No SPDX license key' ], 'w3c-docs-19990405': [ 'Unknown license category: Permissive Restricted.\nUse one of these valid categories:\n' 'Commercial\nCopyleft\nCopyleft Limited\nFree Restricted\n' 'Patent License\nPermissive\nProprietary Free\nPublic Domain\nSource-available\nUnstated License', 'No SPDX license key' ], } assert errors == expected_errors expected_warnings = { 'gpl-1.0': [ 'Some empty text_urls values', 'Some empty other_urls values', 'Homepage URL also in text_urls', 'Homepage URL also in other_urls', 'Homepage URL same as faq_url', 'Homepage URL same as osi_url', 'osi_url same as faq_url', 'Some duplicated URLs' ] } assert warnings == expected_warnings expected_infos = { 'foo-2.0': ['No license text'], 'w3c-docs-19990405': ['No license text'], } assert infos == expected_infos
def test_load_license(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) # one license is obsolete and not loaded expected = [u'apache-2.0', u'bsd-ack-carrot2', u'w3c-docs-19990405'] assert expected == sorted(lics.keys()) assert all(isinstance(l, models.License) for l in lics.values()) # test a sample of a licenses field assert '1994-2002 World Wide Web Consortium' in lics[ u'w3c-docs-19990405'].text
def build_licenses_db(licenses_data_dir=None): """ Return a mapping of license key -> license object loaded from a ``licenses_data_dir``. """ from licensedcode.models import load_licenses if not licenses_data_dir: from licensedcode.models import licenses_data_dir as ldd licenses_data_dir = ldd return load_licenses(licenses_data_dir)
def build_licensing(licenses_db=None): """ Return a `license_expression.Licensing` objet built from a `licenses_db` mapping of {key: License} or the standard license db. """ from license_expression import LicenseSymbolLike from license_expression import Licensing from licensedcode.models import load_licenses licenses_db = licenses_db or load_licenses() return Licensing((LicenseSymbolLike(lic) for lic in licenses_db.values()))
def get_licenses_db(licenses_data_dir=None): """ Return a mapping of license key -> license object. """ global _LICENSES if not _LICENSES: from licensedcode.models import load_licenses if not licenses_data_dir: from licensedcode.models import licenses_data_dir as ldd licenses_data_dir = ldd _LICENSES = load_licenses(licenses_data_dir) return _LICENSES
def get_licenses_db(licenses_data_dir=None): """ Return a mapping of license key -> license object. """ global _LICENSES if not _LICENSES : from licensedcode.models import load_licenses if not licenses_data_dir: from licensedcode.models import licenses_data_dir as ldd licenses_data_dir = ldd _LICENSES = load_licenses(licenses_data_dir) return _LICENSES
def _patch_license_list(): """ Patch the SPDX library license list to match the list of ScanCode known SPDX licenses. """ global _spdx_list_is_patched if not _spdx_list_is_patched: from spdx.config import LICENSE_MAP from licensedcode.models import load_licenses licenses = load_licenses(with_deprecated=True) spdx_licenses = get_licenses_by_spdx_key(licenses.values()) LICENSE_MAP.update(spdx_licenses) _spdx_list_is_patched = True
def get_licenses(self): """ Return a mapping of key -> ScanCode License objects either fetched externally or loaded from the existing `self.src_dir` """ if self.fetched: print('Reusing (possibly modified) external licenses stored in:', self.update_dir) return load_licenses(self.update_dir, with_deprecated=True) else: print('Fetching and storing external licenses in:', self.src_dir) licenses = {l.key: l for l in self.fetch_licenses()} print('Stored %d external licenses in: %r.' % ( len(licenses), self.src_dir, )) fileutils.copytree(self.src_dir, self.update_dir) print('Modified external licenses will be in: %r.' % (self.update_dir, )) print('New external licenses will be in: %r.' % (self.new_dir, )) print('Deleted external licenses will be in: %r.' % (self.del_dir, )) return load_licenses(self.update_dir, with_deprecated=True)
def cli(license_dir, verbose): """ Create one SPDX tag-value document for each non-SPDX ScanCode licenses. Store these in the DIR directory """ base_kwargs = dict( license=True, license_text=True, info=True, strip_root=True, quiet=True, return_results=False, ) licenses_by_key = load_licenses(with_deprecated=False) for i, lic in enumerate(licenses_by_key.values()): ld = lic.to_dict() if lic.spdx_license_key: if verbose: click.echo( "Skipping ScanCode: {key} that is an SPDX license: {spdx_license_key}" .format(**ld)) continue if not lic.text_file or not os.path.exists(lic.text_file): if verbose: click.echo("Skipping license without text: {key}".format(**ld)) continue if lic.category not in FOSS_CATEGORIES: if verbose: click.echo("Skipping non FOSS license: {key}".format(**ld)) continue output = "licenseref-scancode-{key}.spdx".format(**ld) output = os.path.join(license_dir, output) if verbose: click.echo( "Creating SPDX document for license: {key}".format(**ld)) click.echo("at: {output}".format(**locals())) with io.open(output, "w", encoding="utf-8") as ouput_file: kwargs = dict(input=lic.text_file, spdx_tv=ouput_file) kwargs.update(base_kwargs) run_scan(**kwargs)
def test_get_spdx_symbols_from_dir(self): test_dir = self.get_test_loc('spdx/db') from licensedcode.models import load_licenses test_licenses = load_licenses(test_dir) result = { key: val.key for key, val in cache.get_spdx_symbols(_test_licenses=test_licenses).items() } expected = { u'bar': u'xxd', u'foo': u'xxd', u'qt-lgpl-exception-1.1': u'qt-lgpl-exception-1.1', u'xskat': u'xskat' } assert expected == result
def get_licenses_db(licenses_data_dir=None, _test_mode=False): """ Return a mapping of license key -> license object. """ global _LICENSES_BY_KEY if not _LICENSES_BY_KEY or _test_mode: from licensedcode.models import load_licenses if not licenses_data_dir: from licensedcode.models import licenses_data_dir as ldd licenses_data_dir = ldd lics_by_key = load_licenses(licenses_data_dir) if _test_mode: # Do not cache when testing return lics_by_key _LICENSES_BY_KEY = lics_by_key return _LICENSES_BY_KEY
def build_spdx_symbols(licenses_db=None): """ Return a mapping of {lowercased SPDX license key: LicenseSymbolLike} where LicenseSymbolLike wraps a License object loaded from a `licenses_db` mapping of {key: License} or the standard license db. """ from license_expression import LicenseSymbolLike from licensedcode.models import load_licenses licenses_db = licenses_db or load_licenses() symbols_by_spdx_key = {} for lic in licenses_db.values(): if not (lic.spdx_license_key or lic.other_spdx_license_keys): continue symbol = LicenseSymbolLike(lic) if lic.spdx_license_key: slk = lic.spdx_license_key.lower() existing = symbols_by_spdx_key.get(slk) if existing: raise ValueError( 'Duplicated SPDX license key: %(slk)r defined in ' '%(lic)r and %(existing)r' % locals()) symbols_by_spdx_key[slk] = symbol for other_spdx in lic.other_spdx_license_keys: if not (other_spdx and other_spdx.strip()): continue slk = other_spdx.lower() existing = symbols_by_spdx_key.get(slk) if existing: raise ValueError( 'Duplicated "other" SPDX license key: %(slk)r defined ' 'in %(lic)r and %(existing)r' % locals()) symbols_by_spdx_key[slk] = symbol return symbols_by_spdx_key
def test_get_texts(self): test_dir = self.get_test_loc('models/licenses') lics = models.load_licenses(test_dir) for lic in lics.values(): assert 'distribut' in lic.text.lower()