Пример #1
0
def lang_codes_names_and_resource_types(
    working_dir: str = settings.working_dir(),
    translations_json_location: str = settings.TRANSLATIONS_JSON_LOCATION,
) -> Iterable[model.CodeNameTypeTriplet]:
    """
    Convenience method that can be called to get the list of all
    tuples containing language code, language name, and list of
    resource types available for that language.

    Example usage in repl:
    >>> from document.domain import resource_lookup
    >>> data = resource_lookup.lang_codes_names_and_resource_types()
    Lookup the resource types available for zh
    >>> [pair[2] for pair in data if pair[0] == "zh"]
    [['cuv', 'tn', 'tq', 'tw']]
    """
    # Using jsonpath in a loop here was prohibitively slow so we
    # use the dictionary in this case.
    for lang in fetch_source_data(working_dir, translations_json_location):
        resource_types: list[str] = []
        for resource_type_dict in lang["contents"]:
            try:
                resource_type = resource_type_dict["code"]
                resource_types.append(resource_type)
            except Exception:
                resource_type = None
            yield model.CodeNameTypeTriplet(
                lang_code=lang["code"],
                lang_name=lang["name"],
                resource_types=resource_types,
            )
Пример #2
0
def lang_codes_names_and_contents_codes(
    working_dir: str = settings.working_dir(),
    translations_json_location: str = settings.TRANSLATIONS_JSON_LOCATION,
) -> Sequence[tuple[str, str, str]]:
    """
    Convenience test method that can be called to get the set of all
    language codes, their associated language names, and contents level
    codes as tuples. Contents level code is a reference to the structure
    of translations.json, e.g.:

    [
        {
        "name": "Abadi",
        "code": "kbt",
        "direction": "ltr",
        "contents": [
        {
            "name": "Bible",
            "code": "reg",    <---- contents > code
            "subcontents": [
            ...

    Example usage in repl:
    >>> from document.domain import resource_lookup
    >>> data = resource_lookup.lang_codes_names_and_contents_codes()
    >>> [(pair[0], pair[1]) for pair in data if pair[2] == "nil"]
    [('grc', 'Ancient Greek'), ('acq', 'لهجة تعزية-عدنية'), ('gaj-x-ymnk', 'Gadsup Yomunka'), ('mve', 'مارواري (Pakistan)'), ('lus', 'Lushai'), ('mor', 'Moro'), ('tig', 'Tigre')]
    # Other possible queries:
    >>> [(pair[0], pair[1]) for pair in data if pair[2] == "reg"]
    >>> [(pair[0], pair[1]) for pair in data if pair[2] == "ulb"]
    >>> [(pair[0], pair[1]) for pair in data if pair[2] == "cuv"]
    >>> [(pair[0], pair[1]) for pair in data if pair[2] == "udb"]
    >>> [(pair[0], pair[1]) for pair in data if pair[2] == "udb"]
    # Lookup the resource type available for zh
    >>> [pair[2] for pair in data if pair[0] == "zh"]
    ['cuv']
    >>> data = sorted(data, key=lambda tuple: tuple[2])
    >>> import itertools
    >>> [tuple[0] for tuple in list(itertools.groupby(data, key=lambda tuple: tuple[2]))]
    # 'nil' is None
    >>> ['cuv', 'dkl', 'kar', 'nil', 'pdb', 'reg', 'rg', 'tn', 'tw', 'udb', 'ugnt', 'uhb', 'ulb', 'ulb-wa', 'ust']
    >>> for resource_type in [tuple[0] for tuple in list(itertools.groupby(data, key=lambda tuple: tuple[2]))]:
    ...   [(resource_type, pair[0], pair[1]) for pair in data if pair[2] == resource_type]
    ...
    # See <project dir>/lang_codes_names_and_contents_codes_groups.json for
    output dumped to json format.
    """
    lang_codes_names_and_contents_codes: list[tuple[str, str, str]] = []
    # Using jsonpath in a loop here was prohibitively slow so we
    # use the dictionary in this case.
    for d in fetch_source_data(working_dir, translations_json_location):
        try:
            contents_code = d["contents"][0]["code"]
        except Exception:
            contents_code = "nil"
        lang_codes_names_and_contents_codes.append(
            (d["code"], d["name"], contents_code))
    return lang_codes_names_and_contents_codes
Пример #3
0
def lang_codes_names_resource_types_and_resource_codes(
    working_dir: str = settings.working_dir(),
    translations_json_location: str = settings.TRANSLATIONS_JSON_LOCATION,
) -> Iterable[tuple[str, str, Sequence[tuple[str, Sequence[str]]]]]:
    """
    Convenience method that can be called to get the set
    of all tuples containing language code,
    language name, list of resource types available for that
    language, and the resource_codes available for each resource
    type.

    Example usage in repl:
    >>> from document.domain import resource_lookup
    >>> data = resource_lookup.lang_codes_names_resource_types_and_resource_codes()
    Lookup the resource type available for zh
    >>> [pair[2] for pair in data if pair[0] == "zh"]
    [[('cuv', ['gen', 'exo', 'lev', 'num', 'deu', 'jos', 'jdg', 'rut',
    '1sa', '2sa', '1ki', '2ki', '1ch', '2ch', 'ezr', 'neh', 'est',
    'job', 'psa', 'pro', 'ecc', 'sng', 'isa', 'jer', 'lam', 'ezk',
    'dan', 'hos', 'jol', 'amo', 'oba', 'jon', 'mic', 'nam', 'hab',
    'zep', 'hag', 'zec', 'mal', 'mat', 'mrk', 'luk', 'jhn', 'act',
    'rom', '1co', '2co', 'gal', 'eph', 'php', 'col', '1th', '2th',
    '1ti', '2ti', 'tit', 'phm', 'heb', 'jas', '1pe', '2pe', '1jn',
    '2jn', '3jn', 'jud', 'rev']), ('tn', []), ('tq', []), ('tw',
    [])]]
    """
    # Using jsonpath in a loop here was prohibitively slow so we
    # use the dictionary in this case.
    for lang in fetch_source_data(working_dir, translations_json_location):
        resource_types: list[tuple[str, list[str]]] = []
        for resource_type_dict in lang["contents"]:
            # Usage of dpath at this point:
            # (Pdb) import dpath.util
            # (Pdb) dpath.util.search(resource_type_dict, "subcontents/0/code")
            # {'subcontents': [{'code': '2co'}]}
            # (Pdb) dpath.util.search(resource_type_dict, "subcontents")
            # {'subcontents': [{'name': '2 Corinthians', 'category': 'bible-nt', 'code': '2co', 'sort': 48, 'links': [{'url': 'http://read.bibletranslationtools.org/u/Southern./kbt_2co_text_reg/92731d1550/', 'format': 'Read on Web'}, {'url': '../download-scripture?repo_url=https%3A%2F%2Fcontent.bibletranslationtools.org%2Fsouthern.%2Fkbt_2co_text_reg&book_name=2%20Corinthians', 'format': 'Download'}]}]}
            # (Pdb) dpath.util.search(resource_type_dict, "subcontents")["subcontents"]
            # [{'name': '2 Corinthians', 'category': 'bible-nt', 'code': '2co', 'sort': 48, 'links': [{'url': 'http://read.bibletranslationtools.org/u/Southern./kbt_2co_text_reg/92731d1550/', 'format': 'Read on Web'}, {'url': '../download-scripture?repo_url=https%3A%2F%2Fcontent.bibletranslationtools.org%2Fsouthern.%2Fkbt_2co_text_reg&book_name=2%20Corinthians', 'format': 'Download'}]}]
            # (Pdb) interact
            # >>> for x in dpath.util.search(resource_type_dict, "subcontents")["subcontents"]:
            # ...   print(x["code"])
            # ...
            # 2co
            try:
                resource_type = resource_type_dict["code"]
            except Exception:
                resource_type = None
            resource_codes_list = resource_type_dict["subcontents"]
            resource_codes: list[str] = []
            for resource_code_dict in resource_codes_list:
                resource_code = resource_code_dict["code"]
                resource_codes.append(resource_code)
            if resource_type is not None:
                resource_types.append((resource_type, resource_codes))
            yield (lang["code"], lang["name"], resource_types)
Пример #4
0
def resource_directory(
        lang_code: str,
        resource_type: str,
        working_dir: str = settings.working_dir(),
) -> str:
    """Return the resource directory for the resource_lookup_dto."""
    return os.path.join(
        working_dir,
        "{}_{}".format(lang_code, resource_type),
    )
Пример #5
0
def lang_codes(
    working_dir: str = settings.working_dir(),
    translations_json_location: str = settings.TRANSLATIONS_JSON_LOCATION,
) -> Iterable[Any]:
    """
    Convenience method that can be called from UI to get the set
    of all language codes available through API. Presumably this
    could be called to populate a drop-down menu.
    """
    for lang in fetch_source_data(working_dir, translations_json_location):
        yield lang["code"]
Пример #6
0
def _lookup(
    json_path: str,
    working_dir: str = settings.working_dir(),
    translations_json_location: str = settings.TRANSLATIONS_JSON_LOCATION,
) -> Any:
    """Return jsonpath value or empty list if JSON node doesn't exist."""
    json_data = fetch_source_data(working_dir, translations_json_location)
    value = jp.match(
        json_path,
        json_data,
    )
    value_set = set(value)
    return list(value_set)
Пример #7
0
def lang_codes_and_names(
    working_dir: str = settings.working_dir(),
    translations_json_location: str = settings.TRANSLATIONS_JSON_LOCATION,
) -> list[tuple[str, str]]:
    """
    Convenience method that can be called from UI to get the set
    of all language code, name tuples available through API.
    Presumably this could be called to populate a drop-down menu.
    """
    values = []
    for d in fetch_source_data(working_dir, translations_json_location):
        values.append(
            (d["code"], "{} (language code: {})".format(d["name"], d["code"])))
    return sorted(values, key=lambda value: value[1])
Пример #8
0
def tw_resource_dir(lang_code: str) -> Optional[str]:
    """
    Return the location of the TW resource asset directory given the
    lang_code of the language under consideration. The location is
    based on an established convention for the directory structure to
    be consistent across lang_code, resource_type, and resource_code
    combinations.
    """
    # This is a bit hacky to "know" how to derive the actual directory path
    # file pattern/convention to expect and use it literally. But, Being
    # able to derive the tw_resource_dir location from only a lang_code a
    # constant, TW, and a convention allows us to decouple TWResource from
    # other Resource subclass instances. They'd be coupled if we had
    # to pass the value of TWResource's resource_dir to Resource
    # subclasses otherwise. It is a design tradeoff.
    tw_resource_dir_candidates = glob("{}/{}_{}*/{}_{}*".format(
        settings.working_dir(), lang_code, TW, lang_code, TW))
    # If tw_resource_dir_candidates is empty it is because the user
    # did not request a TW resource as part of their document request
    # which is a valid state of affairs of course. We return the empty
    # string in such cases.
    return tw_resource_dir_candidates[0] if tw_resource_dir_candidates else None
Пример #9
0
def convert_html_to_pdf(
    document_request_key: str,
    book_content_units: Iterable[model.BookContent],
    unfound_resource_lookup_dtos: Iterable[model.ResourceLookupDto],
    unloaded_resource_lookup_dtos: Iterable[model.ResourceLookupDto],
    output_dir: str = settings.output_dir(),
    logo_image_path: str = settings.LOGO_IMAGE_PATH,
    working_dir: str = settings.working_dir(),
    wkhtmltopdf_options: Mapping[str,
                                 Optional[str]] = settings.WKHTMLTOPDF_OPTIONS,
    docker_container_pdf_output_dir: str = settings.
    DOCKER_CONTAINER_PDF_OUTPUT_DIR,
    in_container: bool = settings.IN_CONTAINER,
    book_names: Mapping[str, str] = bible_books.BOOK_NAMES,
) -> None:
    """Generate PDF from HTML."""
    now = datetime.datetime.now()
    revision_date = "Generated on: {}-{}-{}".format(now.year, now.month,
                                                    now.day)
    title = "{}".format(
        COMMASPACE.join(
            sorted({
                "{}: {}".format(
                    book_content_unit.lang_name,
                    book_names[book_content_unit.resource_code],
                )
                for book_content_unit in book_content_units
            })))
    unfound = "{}".format(
        COMMASPACE.join(
            sorted({
                "{}-{}-{}".format(
                    unfound_resource_lookup_dto.lang_code,
                    unfound_resource_lookup_dto.resource_type,
                    unfound_resource_lookup_dto.resource_code,
                )
                for unfound_resource_lookup_dto in unfound_resource_lookup_dtos
            })))
    unloaded = "{}".format(
        COMMASPACE.join(
            sorted({
                "{}-{}-{}".format(
                    unloaded_resource_lookup_dto.lang_code,
                    unloaded_resource_lookup_dto.resource_type,
                    unloaded_resource_lookup_dto.resource_code,
                )
                for unloaded_resource_lookup_dto in
                unloaded_resource_lookup_dtos
            })))
    if unloaded:
        logger.debug("Resource requests that could not be loaded: %s",
                     unloaded)
    html_file_path = "{}.html".format(
        os.path.join(output_dir, document_request_key))
    assert os.path.exists(html_file_path)
    output_pdf_file_path = pdf_output_filename(document_request_key)
    with open(logo_image_path, "rb") as fin:
        base64_encoded_logo_image = base64.b64encode(fin.read())
        images: dict[str, str | bytes] = {
            "logo": base64_encoded_logo_image,
        }
    # Use Jinja2 to instantiate the cover page.
    cover = instantiated_template(
        "cover",
        model.CoverPayload(
            title=title,
            unfound=unfound,
            unloaded=unloaded,
            revision_date=revision_date,
            images=images,
        ),
    )
    cover_filepath = os.path.join(working_dir, "cover.html")
    with open(cover_filepath, "w") as fout:
        fout.write(cover)
    pdfkit.from_file(
        html_file_path,
        output_pdf_file_path,
        options=wkhtmltopdf_options,
        cover=cover_filepath,
    )
    assert os.path.exists(output_pdf_file_path)
    copy_command = "cp {} {}".format(
        output_pdf_file_path,
        docker_container_pdf_output_dir,
    )
    logger.debug("IN_CONTAINER: {}".format(in_container))
    if in_container:
        logger.info("About to cp PDF to from Docker volume to host")
        logger.debug("Copy PDF command: %s", copy_command)
        subprocess.call(copy_command, shell=True)
Пример #10
0
    def transform_tn_missing_resource_code_markdown_links(self, source: str) -> str:
        """
        Transform the translation note rc link into a link pointing to
        the anchor link for the translation note for chapter verse
        reference.
        """
        for match in re.finditer(
            link_regexes.TN_MARKDOWN_RELATIVE_TO_CURRENT_BOOK_SCRIPTURE_LINK_RE, source
        ):
            scripture_ref = match.group("scripture_ref")
            chapter_num = match.group("chapter_num")
            verse_ref = match.group("verse_ref")

            matching_resource_requests: list[model.ResourceRequest] = [
                resource_request
                for resource_request in self._resource_requests
                if resource_request.lang_code == self._lang_code
                and TN in resource_request.resource_type
            ]
            resource_code = ""
            if matching_resource_requests:
                matching_resource_request: model.ResourceRequest = (
                    matching_resource_requests[0]
                )
                resource_code = matching_resource_request.resource_code
                # Build a file path to the TN note being requested.
                first_resource_path_segment = "{}_{}".format(
                    matching_resource_request.lang_code,
                    matching_resource_request.resource_type,
                )
                second_resource_path_segment = "{}_tn".format(
                    matching_resource_request.lang_code
                )
                path = "{}.md".format(
                    os.path.join(
                        settings.working_dir(),
                        first_resource_path_segment,
                        second_resource_path_segment,
                        resource_code,
                        chapter_num,
                        verse_ref,
                    )
                )
                if os.path.exists(path):  # file path to TN note exists
                    # Create anchor link to translation note
                    new_link = settings.TRANSLATION_NOTE_ANCHOR_LINK_FMT_STR.format(
                        scripture_ref,
                        self._lang_code,
                        bible_books.BOOK_NUMBERS[resource_code].zfill(3),
                        chapter_num.zfill(3),
                        verse_ref.zfill(3),
                    )
                    # Replace the match text with the new anchor link
                    source = source.replace(
                        match.group(0),  # The whole match
                        "({})".format(new_link),
                    )
                else:  # TN note file does not exist.
                    # Replace match text from the source text with the
                    # link text only so that is not clickable.
                    # The whole match plus surrounding parenthesis
                    source = source.replace(
                        "({})".format(match.group(0)), scripture_ref
                    )
            else:  # TN resource that link requested was not included as part of the
                # DocumentRequest Replace match text from the source text with the link
                # text only so that is not clickable.
                # The whole match plus surrounding parenthesis
                source = source.replace("({})".format(match.group(0)), scripture_ref)

        return source
Пример #11
0
    def transform_tn_prefixed_markdown_links(self, source: str) -> str:
        """
        Transform the translation note rc link into a link pointing to
        the anchor link for the translation note for chapter verse
        reference.
        """
        for match in re.finditer(link_regexes.TN_MARKDOWN_SCRIPTURE_LINK_RE, source):
            scripture_ref = match.group("scripture_ref")
            lang_code = match.group("lang_code")
            resource_code = match.group("resource_code")
            chapter_num = match.group("chapter_num")
            verse_ref = match.group("verse_ref")

            # NOTE(id:check_for_resource_request) To bother getting the TN resource
            # asset file referenced in the matched link we must know that said TN
            # resource identified by the lang_code/resource_type/resource_code combo
            # in the link has been requested by the user in the DocumentRequest.
            tn_resource_requests: list[model.ResourceRequest] = [
                resource_request
                for resource_request in self._resource_requests
                if resource_request.lang_code == lang_code
                and TN in resource_request.resource_type
                and resource_request.resource_code == resource_code
            ]
            if tn_resource_requests:
                tn_resource_request: model.ResourceRequest = tn_resource_requests[0]
                # Build a file path to the TN note being requested.
                first_resource_path_segment = "{}_{}".format(
                    tn_resource_request.lang_code,
                    tn_resource_request.resource_type,
                )
                second_resource_path_segment = "{}_tn".format(
                    tn_resource_request.lang_code
                )
                path = "{}.md".format(
                    os.path.join(
                        settings.working_dir(),
                        first_resource_path_segment,
                        second_resource_path_segment,
                        resource_code,
                        chapter_num,
                        verse_ref,
                    )
                )
                if os.path.exists(path):  # file path to TN note exists
                    # Create anchor link to translation note
                    new_link = settings.TRANSLATION_NOTE_ANCHOR_LINK_FMT_STR.format(
                        scripture_ref,
                        tn_resource_request.lang_code,
                        bible_books.BOOK_NUMBERS[
                            tn_resource_request.resource_code
                        ].zfill(3),
                        chapter_num.zfill(3),
                        verse_ref.zfill(3),
                    )
                    # Replace the match text with the new anchor link
                    source = source.replace(
                        match.group(0),  # The whole match
                        "({})".format(new_link),
                    )
                else:  # TN note file does not exist.
                    # Replace link with link text only.
                    source = source.replace(match.group(0), scripture_ref)
            else:  # TN resource that link requested was not included as part of the DocumentRequest
                # Replace link with link text only.
                source = source.replace(match.group(0), scripture_ref)

        return source