def test_update_index_layer():
    src_opf_path = data_path / "v1" / "v1.opf"
    dst_opf_path = data_path / "v1_base_edited" / "v1_base_edited.opf"
    expected_opf_path = data_path / "v2" / "v2.opf"

    # edit v1 base
    dst_opf_path.mkdir(exist_ok=True, parents=True)
    shutil.copytree(str(src_opf_path / "layers"),
                    str(dst_opf_path / "layers"),
                    dirs_exist_ok=True)
    shutil.copy(str(src_opf_path / "index.yml"),
                str(dst_opf_path / "index.yml"))
    shutil.copytree(str(expected_opf_path / "base"),
                    str(dst_opf_path / "base"),
                    dirs_exist_ok=True)

    pecha = PechaBaseUpdate(src_opf_path, dst_opf_path)
    pecha.update()

    # test annotations layers
    for layer in ["title", "yigchung", "quotes", "tsawa", "sapche"]:
        result_layer, expected_layer = get_layer(layer, "v1_base_edited", "v2")
        is_layer_same(result_layer, expected_layer)

    # test index layer
    result_index_layer = load_yaml(dst_opf_path / "index.yml")
    expected_index_layer = load_yaml(expected_opf_path / "index.yml")

    is_index_same(result_index_layer, expected_index_layer)

    shutil.rmtree(str(dst_opf_path.parent))
Exemplo n.º 2
0
 def update_layers(self, vol_id, updater):
     """
     Update all the layer annotations
     """
     for layer_fn in (self.layer_path / vol_id).iterdir():
         layer = load_yaml(layer_fn)
         update_ann_layer(layer, updater)
         dump_yaml(layer, layer_fn)
Exemplo n.º 3
0
 def get_meta_data(self):
     opf_path = self.opf_path
     try:
         meta = load_yaml((opf_path / "meta.yml"))
     except Exception:
         print("Meta data not Found!!!")
         meta = {}
     return meta
Exemplo n.º 4
0
 def get_old_layers(self, new_layers):
     layers = defaultdict(dict)
     for layer in new_layers:
         for vol in self.dirs["layers_path"].iterdir():
             vol_layer_fn = vol / f"{layer}.yml"
             if not vol_layer_fn.is_file():
                 continue
             layers[layer][vol.name] = load_yaml(vol_layer_fn)
     return layers
Exemplo n.º 5
0
    def update_index_layer(self):
        layer = load_yaml(self.index_path)
        for ann in layer["annotations"]:
            # update text span
            self.update_text_span(ann["span"])

            # update sub-text span
            for sub_text in ann["parts"]:
                self.update_text_span(sub_text["span"])

        dump_yaml(layer, self.index_path)
Exemplo n.º 6
0
 def get_text_spans(self, text_id, index_layer):
     """
     get spans of text
     """
     text_span = {}
     if not index_layer:
         index_layer = load_yaml(self.opf_path / "index.yml")
     for id, anno in index_layer["annotations"].items():
         if anno["parts"]:
             for sub_topic in anno["parts"]:
                 if sub_topic["work_id"] == text_id:
                     text_span[f'v{sub_topic["span"]["vol"]:03}'] = sub_topic["span"]
         if anno["work_id"] == text_id:
             for span in anno["span"]:
                 text_span[f'v{span["vol"]:03}'] = span
     return text_span
Exemplo n.º 7
0
    def serialize(self):
        """Opf is serialize to html format in order to present it in editor workspace

        Yields:
            str, str: base file name, serialized html of that base file
        """
        self.apply_layers()
        self.layers = [layer for layer in self.layers if layer != "Pagination"]

        results = self.get_result()
        for base_name, result in results.items():
            footnote_ref_tag = ""
            if "Footnote" in self.layers:
                footnote_fn = self.opf_path / "layers" / base_name / "Footnote.yml"
                footnote_layer = load_yaml(footnote_fn)
                footnote_ref_tag = self.get_footnote_references(
                    footnote_layer["annotations"])
            result = self.p_tag_adder(result)
            result = f"<html>\n<head>\n<title></title>\n</head>\n<body>\n{result}{footnote_ref_tag}</body>\n</html>"
            yield base_name, result
Exemplo n.º 8
0
 def apply_layers(self):
     """
     This applies all the layers recorded in self.layers. If self.layers is none, it reads all the layers from the layer directory.
     """
     if not self.index_layer:
         index_path = self.opf_path / "index.yml"
         if index_path.is_file():
             self.index_layer = load_yaml(index_path)
             self.apply_index()
     else:
         self.apply_index()
     for vol_id in self.base_layers:
         if not self.layers:
             self.layers = self.get_all_layer(vol_id)
         if "Pagination" in self.layers:
             pagination_index = self.layers.index("Pagination")
             del self.layers[pagination_index]
             self.layers.append("Pagination")
         for layer_id in self.layers:
             self.apply_layer(vol_id, layer_id)
Exemplo n.º 9
0
 def apply_layer(self, vol_id, layer_id):
     """
     This reads the file opfpath/layers/layer_id.yml and applies all the annotations it contains, in the order in which they appear.
     I think it can be implemented in this class by just calling self.apply_annotation on each annotation of the file.
     """
     layer_fn = self.opf_path / "layers" / vol_id / f"{layer_id}.yml"
     if not layer_fn.is_file():
         return
     layer = load_yaml(layer_fn)
     for ann_id, ann in layer["annotations"].items():
         # text begins in middle of the page
         if (
             ann["span"]["end"] >= self.text_spans[vol_id]["start"]
             and ann["span"]["start"] <= self.text_spans[vol_id]["end"]
         ):
             ann["type"] = layer["annotation_type"]
             ann["id"] = ann_id
             try:
                 uuid2localid = layer["local_ids"]
             except Exception:
                 uuid2localid = ""
             self.apply_annotation(vol_id, ann, uuid2localid)
Exemplo n.º 10
0
 def get_index_layer(self, text_id, index_layer):
     if not index_layer:
         index_layer = load_yaml(self.opf_path / "index.yml")
     text_index_layer = defaultdict(str)
     text_index_layer["id"] = index_layer["id"]
     text_index_layer["annotation_type"] = index_layer["annotation_type"]
     text_index_layer["revision"] = index_layer["revision"]
     annotations = defaultdict(str)
     for id, anno in index_layer["annotations"].items():
         if anno["work_id"] == text_id:
             annotations[id] = anno
         elif anno["parts"]:
             annotation = {}
             annotation_span_list = []
             for sub_topic in anno["parts"]:
                 if sub_topic["work_id"] == text_id:
                     annotation["work_id"] = sub_topic["work_id"]
                     annotation_span_list.append(sub_topic["span"])
                     annotation["parts"] = []
             if annotation_span_list:
                 annotation["span"] = annotation_span_list
                 annotations[id] = annotation
     text_index_layer["annotations"] = annotations
     return text_index_layer
Exemplo n.º 11
0
    def serialize(self, toc_levels={}, output_path="./output/epub_output"):
        """This module serialize .opf file to other format such as .epub etc. In case of epub,
        we are using calibre ebook-convert command to do the conversion by passing our custom css template
        and embedding our custom font. The converted output will be then saved in current directory
        as {pecha_id}.epub.

        Args:
        pecha_id (string): Pecha id that needs to be exported in other format

        """
        output_path = Path(output_path)
        out_html_fn = f"{self.meta['id']}.html"
        pecha_title = self.meta["source_metadata"].get("title", "")
        cover_image = self.meta["source_metadata"].get("cover", "")

        self.apply_layers()
        self.layers = [layer for layer in self.layers if layer != "Pagination"]

        results = self.get_result()
        for vol_id, result in results.items():
            result = f"{self.get_front_page()}{result}"
            footnote_ref_tag = ""
            if "Footnote" in self.layers:
                footnote_fn = self.opf_path / "layers" / vol_id / "Footnote.yml"
                footnote_layer = load_yaml(footnote_fn)
                footnote_ref_tag = self.get_footnote_references(
                    footnote_layer["annotations"])
            result = self.p_tag_adder(result)
            result = self.indentation_adjustment(result)
            serialized_html = (
                f"<html>\n<head>\n\t<title>{pecha_title}</title>\n</head>\n<body>\n"
            )
            serialized_html += f"{result}{footnote_ref_tag}</body>\n</html>"
            Path(out_html_fn).write_text(serialized_html)
            # Downloading css template file from ebook template repo and saving it
            template = requests.get(
                "https://raw.githubusercontent.com/OpenPecha/ebook-template/master/tsadra_template.css"
            )
            Path("template.css").write_bytes(template.content)
            # Running ebook-convert command to convert html file to .epub (From calibre)
            # XPath expression to detect chapter titles.
            if not toc_levels:
                toc_levels = Tsadra_template.toc_levels
            toc_levels = self.set_toc_level(toc_levels, serialized_html)
            level1_toc_Xpath = toc_levels.get(1, "")
            level2_toc_Xpath = toc_levels.get(2, "")
            level3_toc_Xpath = toc_levels.get(3, "")

            cover_path = self.opf_path / f"assets/image/{cover_image}"
            out_epub_fn = output_path / f"{self.meta['id']}.epub"
            font_family = "Monlam Uni Ouchan2"
            if cover_path.is_file():
                os.system(
                    f'ebook-convert {out_html_fn} {out_epub_fn} --extra-css=./template.css --embed-font-family="{font_family}" --page-breaks-before="{Tsadra_template.book_title_Xpath}" --cover={cover_path} --flow-size=0 --level1-toc="{level1_toc_Xpath}" --level2-toc="{level2_toc_Xpath}" --level3-toc="{level3_toc_Xpath}" --use-auto-toc --disable-font-rescaling'
                )
            else:
                os.system(
                    f'ebook-convert {out_html_fn} {out_epub_fn} --extra-css=./template.css --embed-font-family="{font_family}" --page-breaks-before="{Tsadra_template.book_title_Xpath}" --flow-size=0 --level1-toc="{level1_toc_Xpath}" --level2-toc="{level2_toc_Xpath}" --level3-toc="{level3_toc_Xpath}" --use-auto-toc --disable-font-rescaling'
                )
            # Removing html file and template file
            os.system(f"rm {out_html_fn}")
            os.system("rm template.css")
            if out_epub_fn.is_file():
                self.embed_ibook_specific_font(out_epub_fn)
            return out_epub_fn
Exemplo n.º 12
0
 def read_index_file(self) -> Dict:
     if not self.index_fn.is_file():
         raise FileNotFoundError
     return load_yaml(self.index_fn)
Exemplo n.º 13
0
 def read_meta_file(self) -> Dict:
     return load_yaml(self.meta_fn)
Exemplo n.º 14
0
 def read_layers_file(self, base_name: str,
                      layer_name: LayerEnum) -> Union[Dict, None]:
     layer_fn = self.layers_path / base_name / f"{layer_name}.yml"
     if layer_fn.is_file():
         return load_yaml(layer_fn)