Exemplo n.º 1
0
    def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) -> Union[str, None]:
        xmi_attrs = {"{http://www.omg.org/XMI}version": "2.0"}

        root = etree.Element(etree.QName(self._nsmap["xmi"], "XMI"), nsmap=self._nsmap, **xmi_attrs)

        self._serialize_cas_null(root)

        # Find all fs, even the ones that are not directly added to a sofa
        for fs in sorted(cas._find_all_fs(), key=lambda a: a.xmiID):
            self._serialize_feature_structure(cas, root, fs)

        for sofa in cas.sofas:
            self._serialize_sofa(root, sofa)

        for view in cas.views:
            self._serialize_view(root, view)

        doc = etree.ElementTree(root)
        etree.cleanup_namespaces(doc, top_nsmap=self._nsmap)

        return_str = sink is None
        if return_str:
            sink = BytesIO()

        doc.write(sink, xml_declaration=True, pretty_print=pretty_print, encoding="UTF-8")

        if return_str:
            return sink.getvalue().decode("utf-8")

        return None
Exemplo n.º 2
0
    def serialize(
        self,
        sink: Union[IO, str, None],
        cas: Cas,
        pretty_print: bool = True,
        ensure_ascii: bool = False,
        type_system_mode: TypeSystemMode = TypeSystemMode.FULL,
    ) -> Union[str, None]:
        feature_structures = []

        views = {}
        for view in cas.views:
            views[view.sofa.sofaID] = self._serialize_view(view)

            if view.sofa.sofaArray:
                json_sofa_array_fs = self._serialize_feature_structure(
                    view.sofa.sofaArray)
                feature_structures.append(json_sofa_array_fs)
            json_sofa_fs = self._serialize_feature_structure(view.sofa)
            feature_structures.append(json_sofa_fs)

        # Find all fs, even the ones that are not directly added to a sofa
        used_types = set()
        for fs in sorted(
                cas._find_all_fs(include_inlinable_arrays_and_lists=True),
                key=lambda a: a.xmiID):
            used_types.add(fs.type)
            json_fs = self._serialize_feature_structure(fs)
            feature_structures.append(json_fs)

        types = None
        if type_system_mode is not TypeSystemMode.NONE:
            types = {}

            if type_system_mode is TypeSystemMode.MINIMAL:
                # Build transitive closure of used types by following parents, features, etc.
                types_to_include = cas.typesystem.transitive_closure(
                    used_types)
            elif type_system_mode is TypeSystemMode.FULL:
                types_to_include = cas.typesystem.get_types()

            for type_ in types_to_include:
                if type_.name == TYPE_NAME_DOCUMENT_ANNOTATION:
                    continue
                json_type = self._serialize_type(type_)
                types[json_type[NAME_FIELD]] = json_type

        data = {}
        if types is not None:
            data[TYPES_FIELD] = types
        if feature_structures is not None:
            data[FEATURE_STRUCTURES_FIELD] = feature_structures
        if views is not None:
            data[VIEWS_FIELD] = views

        if sink and not isinstance(sink, TextIOBase):
            sink = TextIOWrapper(sink, encoding="utf-8", write_through=True)

        if sink:
            json.dump(
                data,
                sink,
                sort_keys=False,
                indent=2 if pretty_print else None,
                ensure_ascii=ensure_ascii,
                allow_nan=False,
            )
        else:
            return json.dumps(data,
                              sort_keys=False,
                              indent=2 if pretty_print else None,
                              ensure_ascii=ensure_ascii,
                              allow_nan=False)

        if isinstance(sink, TextIOWrapper):
            sink.detach()  # Prevent TextIOWrapper from closing the BytesIO

        return None