def get_translation_object(self): domain = self.license.resource_slug return get_translation_object( django_language_code=cc_to_django_language_code( self.language_code), domain=domain, )
def name_local(legal_code): return get_language_info( cc_to_django_language_code(legal_code.language_code) )["name_local"]
def handle(self, input_directory, **options): if options["versions"]: versions_to_include = options["versions"].split(",") else: versions_to_include = None if options["languages"]: languages_to_include = set(["en"]) | set(options["languages"].split(",")) else: languages_to_include = None self.unwrapped = options["unwrapped"] licenses_created = 0 legalcodes_created = 0 legalcodes_to_import = [] # Get list of html filenames for CC0 and any BY license (any version). # We'll filter out the filenames for unwanted versions later. html_filenames = sorted( [ f for f in os.listdir(input_directory) if (f.startswith("by") or f.startswith("zero_1.0")) and f.endswith(".html") ] ) for filename in html_filenames: # print(filename) metadata = parse_legalcode_filename(filename) basename = os.path.splitext(filename)[0] fullpath = os.path.join(input_directory, filename) license_code = metadata["license_code"] version = metadata["version"] jurisdiction_code = metadata["jurisdiction_code"] cc_language_code = metadata[ "cc_language_code" ] or get_default_language_for_jurisdiction(jurisdiction_code) # Make sure this is a valid language code (one we know about) django_language_code = cc_to_django_language_code(cc_language_code) if django_language_code not in settings.LANG_INFO: raise ValueError(f"Invalid language_code={cc_language_code}") # Just CC0, BY 3.0, & 4.0, and apply any command line options include = ( ( (license_code in BY_LICENSE_CODES and version in {"3.0", "4.0"}) or license_code in CC0_LICENSE_CODES ) and (versions_to_include is None or version in versions_to_include) and ( languages_to_include is None or cc_language_code in languages_to_include ) ) if not include: continue about_url = metadata["about_url"] # These are valid for BY only license_code_parts = license_code.split("-") if "by" in license_code_parts: permits_derivative_works = "nd" not in license_code_parts permits_reproduction = "nd" not in license_code_parts permits_distribution = "nd" not in license_code_parts permits_sharing = "nd" not in license_code_parts requires_share_alike = "sa" in license_code_parts requires_notice = True requires_attribution = True requires_source_code = False # GPL, LGPL only, I think prohibits_commercial_use = "nc" in license_code_parts prohibits_high_income_nation_use = False # Not any BY 4.0 license elif license_code == "CC0": # permits anything, requires nothing, prohibits nothing permits_derivative_works = True permits_reproduction = True permits_distribution = True permits_sharing = True requires_share_alike = False requires_notice = False requires_attribution = False requires_source_code = False prohibits_commercial_use = False prohibits_high_income_nation_use = False else: raise NotImplementedError(basename) # Find or create a License object license, created = License.objects.get_or_create( about=about_url, defaults=dict( license_code=license_code, version=version, jurisdiction_code=jurisdiction_code, permits_derivative_works=permits_derivative_works, permits_reproduction=permits_reproduction, permits_distribution=permits_distribution, permits_sharing=permits_sharing, requires_share_alike=requires_share_alike, requires_notice=requires_notice, requires_attribution=requires_attribution, requires_source_code=requires_source_code, prohibits_commercial_use=prohibits_commercial_use, prohibits_high_income_nation_use=prohibits_high_income_nation_use, ), ) if created: licenses_created += 1 # Find or create a LegalCode object legalcode, created = LegalCode.objects.get_or_create( license=license, language_code=cc_language_code, defaults=dict( html_file=fullpath, ), ) if created: legalcodes_created += 1 legalcodes_to_import.append(legalcode) # print( # f"Created {licenses_created} licenses and {legalcodes_created} translation objects" # ) # NOW parse the HTML and output message files legalcodes_to_import = LegalCode.objects.filter( pk__in=[lc.pk for lc in legalcodes_to_import] ) # What are the language codes we have HTML files for? cc_language_codes = sorted(set(lc.language_code for lc in legalcodes_to_import)) english_by_license_code_version = {} # We have to do English first. Django gets confused if you try to load # another language and it can't find English, I guess it's looking for # something to fall back to. cc_language_codes.remove( "en" ) # If english isn't in this list, something is wrong for cc_language_code in ["en"] + cc_language_codes: for legalcode in legalcodes_to_import.filter( language_code=cc_language_code, ).order_by( "-license__version", "license__license_code", "license__jurisdiction_code", ): license = legalcode.license license_code = license.license_code version = license.version # print( # f"Importing {legalcode.html_file} {license_code} lang={cc_language_code}" # ) with open(legalcode.html_file, "r", encoding="utf-8") as f: content = f.read() if version == "4.0": messages_text = self.import_by_40_license_html( content=content, legalcode=legalcode, ) elif version == "3.0": if license.jurisdiction_code: # Ported license: we just save the HTML for now legalcode.html = self.import_by_30_ported_license_html( content=content, legalcode=legalcode, ) legalcode.save() continue else: # Unported license: we parse out the messages like 4.0 messages_text = self.import_by_30_unported_license_html( content=content, legalcode=legalcode, ) elif license_code == "CC0": messages_text = self.import_cc0_license_html( content=content, legalcode=legalcode, ) else: raise NotImplementedError( f"Have not implemented parsing for {license_code} {version} licenses." ) if version != "3.0": # 3.0 doesn't have any translation files - might be the same for other versions key = f"{license_code}|{version}" if cc_language_code == "en": english_by_license_code_version[key] = messages_text english_messages = english_by_license_code_version[key] pofile = POFile() # The syntax used to wrap messages in a .po file is difficult if you ever # want to copy/paste the messages, so if --unwrapped was passed, set a # wrap width that will essentially disable wrapping. if self.unwrapped: pofile.wrapwidth = 999999 pofile.metadata = { "Project-Id-Version": f"{license_code}-{version}", # 'Report-Msgid-Bugs-To': '*****@*****.**', # 'POT-Creation-Date': '2007-10-18 14:00+0100', # 'PO-Revision-Date': '2007-10-18 14:00+0100', # 'Last-Translator': 'you <*****@*****.**>', # 'Language-Team': 'English <*****@*****.**>', "Language": cc_language_code, "MIME-Version": "1.0", "Content-Type": "text/plain; charset=utf-8", "Content-Transfer-Encoding": "8bit", } # Use the English message text as the message key for internal_key, translation in messages_text.items(): if cc_language_code == "en": message_key = translation.strip() message_value = "" else: # WORKAROUND - by-nc-nd 4.0 NL has an extra item under s3a. # https://github.com/creativecommons/creativecommons.org/pull/1160 if ( internal_key == "s3a4_if_you_share_adapted_material" and internal_key not in english_messages ): message_key = ( "If You Share Adapted Material You produce, the Adapter's " "License You apply must not prevent recipients of the Adapted " "Material from complying with this Public License." ) else: message_key = english_messages[internal_key] message_value = translation pofile.append( POEntry( msgid=clean_string(message_key), msgstr=clean_string(message_value), ) ) po_filename = legalcode.translation_filename() dir = os.path.dirname(po_filename) if not os.path.isdir(dir): os.makedirs(dir) # Save mofile ourself. We could call 'compilemessages' but it wants to # compile everything, which is both overkill and can fail if the venv # or project source is not writable. We know this dir is writable, so # just save this pofile and mofile ourselves. save_pofile_as_pofile_and_mofile(pofile, po_filename)
def parse_legalcode_filename(filename): """ Given the filename where the HTML text of a license is stored, return a dictionary with the metadata we can figure out from it. The filename should not include any path. A trailing .html is okay. COPIED FROM https://github.com/creativecommons/cc-link-checker/blob/6bb2eae4151c5f7949b73f8d066c309f2413c4a5/link_checker.py#L231 and modified a great deal. """ basename = filename if basename.endswith(".html"): basename = basename[:-5] parts = basename.split("_") license = parts.pop(0) if license == "samplingplus": license = "sampling+" elif license == "nc-samplingplus": license = "nc-sampling+" license_code_for_url = license version = parts.pop(0) jurisdiction = None language = None if license.startswith("zero"): license_code_to_return = "CC0" path_base = "publicdomain" else: license_code_to_return = license path_base = "licenses" if parts and float(version) < 4.0: jurisdiction = parts.pop(0) if parts: language = parts.pop(0) if language: legalcode = f"legalcode.{language}" else: legalcode = False url = posixpath.join("http://creativecommons.org", path_base) url = posixpath.join(url, license_code_for_url) url = posixpath.join(url, version) if jurisdiction: url = posixpath.join(url, jurisdiction) cc_language_code = language or get_default_language_for_jurisdiction( jurisdiction, "") else: cc_language_code = language or DEFAULT_LANGUAGE_CODE if legalcode: url = posixpath.join(url, legalcode) else: url = f"{url}/" if not cc_language_code: raise ValueError(f"What language? filename={filename}") # Make sure this is a valid language code (one we know about) django_language_code = cc_to_django_language_code(cc_language_code) if django_language_code not in settings.LANG_INFO: raise ValueError( f"Invalid language_code={cc_language_code} dj={django_language_code}" ) data = dict( license_code=license_code_to_return, version=version, jurisdiction_code=jurisdiction or "", cc_language_code=cc_language_code, url=url, about_url=compute_about_url(license_code_for_url, version, jurisdiction or ""), ) return data