def check_spdx_license(licenseText): """Check the license text against the spdx license list. """ licenseText = unicode(licenseText.decode('string_escape'), 'utf-8') r = redis.StrictRedis(host=getRedisHost(), port=6379, db=0) # if redis is empty build the spdx license list in the redis database if r.keys('*') == []: build_spdx_licenses() spdxLicenseIds = r.keys() spdxLicenseTexts = r.mget(spdxLicenseIds) licenseData = dict(zip(spdxLicenseIds, spdxLicenseTexts)) matches = get_close_matches(licenseText, licenseData) if not matches: matchedLicenseIds = None matchType = 'No match' elif 1.0 in matches.values() or all(0.99 < score for score in matches.values()): matchedLicenseIds = matches.keys() matchType = 'Perfect match' else: for licenseID in matches: listedLicense = getListedLicense(licenseID) isTextStandard = checkTextStandardLicense(listedLicense, licenseText) if not isTextStandard: matchedLicenseIds = licenseID matchType = 'Standard License match' else: matchedLicenseIds = max(matches, key=matches.get) matchType = 'Close match' return matchedLicenseIds, matchType
def check_new_licenses_and_rejected_licenses(inputLicenseText, urlType): """ Check if the license text matches with that of a license that is either a not yet approved license or a rejected license. returns the close matches of license text along with the license issue URL. """ issues = get_rejected_licenses_issues(urlType) issues.extend(get_yet_not_approved_licenses_issues(urlType)) licenseData = get_license_data(issues) matches = get_close_matches(inputLicenseText, licenseData) matches = matches.keys() if not matches: return matches, '' issueUrl = get_issue_url_by_id(matches[0], issues) return matches, issueUrl
def matcher(text_file, threshold, build): """SPDX License matcher to match license text against the SPDX license list using an algorithm which finds close matches. """ try: # For python 2 inputText = codecs.open(text_file, 'r', encoding='string_escape').read() inputText = unicode(inputText, 'utf-8') except: # For python 3 inputText = codecs.open(text_file, 'r', encoding='unicode_escape').read() if build or is_keys_empty(): click.echo('Building SPDX License List. This may take a while...') build_spdx_licenses() r = redis.StrictRedis(host=os.environ.get(key="SPDX_REDIS_HOST", default="localhost"), port=6379, db=0) keys = list(r.keys()) values = r.mget(keys) licenseData = dict(list(zip(keys, values))) matches = get_close_matches(inputText, licenseData, threshold) matchingString = get_matching_string(matches, inputText) if matchingString == '': licenseID = max(matches, key=matches.get) spdxLicenseText = get_spdx_license_text(licenseID) similarityPercent = get_similarity_percent(spdxLicenseText, inputText) click.echo( colors( '\nThe given license text matches {}% with that of {} based on Levenstein distance.' .format(similarityPercent, licenseID), 94)) differences = generate_diff(spdxLicenseText, inputText) for line in differences: if line[0] == '+': line = colors(line, 92) if line[0] == '-': line = colors(line, 91) if line[0] == '@': line = colors(line, 90) click.echo(line) else: click.echo(colors(matchingString, 92))