示例#1
0
def get_clusters(tree: etree._ElementTree) -> ty.Dict[str, ty.Set[str]]:
    chains_grp = tree.xpath(
        './tei:standOff/tei:annotation[@tei:type="coreference"]/tei:linkGrp[@tei:type="schema"]',
        namespaces=NSMAP,
    )[0]

    mentions = tree.xpath(
        ('./tei:standOff/tei:annotation[@tei:type="coreference"]'
         '/tei:spanGrp[@tei:subtype="mention"]/tei:span'),
        namespaces=NSMAP,
    )

    res = dict()
    for c in chains_grp.iter(f"{TEI}link"):
        target = c.attrib[f"{TEI}target"]
        res[c.attrib[f"{XML}id"]] = set((t[1:] for t in target.split()))

    non_sing = set().union(*res.values())
    for m in mentions:
        i = m.attrib[f"{XML}id"]
        if i not in non_sing:
            res[i] = {i}
    a_id, b_id, intersect = next(
        ((a_id, b_id, intersect) for a_id, a in res.items()
         for b_id, b in res.items() if b is not a
         for intersect in (a.intersection(b), ) if intersect),
        (None, None, None),
    )
    if intersect is not None:
        raise Exception(
            f"Schemas {a_id} and {b_id} are not disjoints: {intersect}")
    return res
示例#2
0
    def _report_status_checks(self, processed_report_doc: etree._ElementTree,
                              embedded: bool):
        super()._report_status_checks(processed_report_doc, embedded)

        # check for any unsupported local features, e.g. DataTable
        # NOTE - we could eventually have different validators for local and uploaded reports
        if embedded:
            pass
        else:
            # TODO - validate at least a single element
            asset_blocks = processed_report_doc.xpath("count(/Report/Main//*)")
            if asset_blocks < 3:
                raise InvalidReportError(
                    "Empty report - must contain at least one asset/block")
            elif asset_blocks < 4:
                url = "https://docs.datapane.com/reports/blocks/layout-pages-and-selects"
                display_msg(
                    text=
                    f"Your report only contains a single element - did you know you can include additional plots, tables and text in a report? Check out {url} for more info",
                    md=
                    f"Your report only contains a single element - did you know you can include additional plots, tables and text in a report? Check out [the docs]({url}) for more info",
                )

            has_text: bool = processed_report_doc.xpath(
                "boolean(/Report/Main/Page//Text)")
            if not has_text:
                display_msg(
                    "Your report doesn't contain any text - consider using TextReport to upload assets and add text to your report from your browser"
                )
示例#3
0
def possible_smx_tags(lang1: str, pos: str,
                      tree: _ElementTree) -> Iterator[Tuple[str, List[str]]]:
    """Transfer sme semtags to smX lemma.

    Args:
        lang1: the language where the semtags should be fetched.
        pos: part of speech of the lemmas.
        tree: an etree containing the content of a apertium bidix file.

    Yields:
        A tuple containing a lemma of the other language and the
        semtags of the corresponding lang1 lemma.
    """
    # TODO: Merge semtags
    # Extract lemma: tags from sme .lexc file
    sme_sem_tag = {word: sem_tags for word, sem_tags in lang_tags(lang1, pos)}

    # Iterate through all lemmas in bidix where n = pos
    for symbol in tree.xpath('.//p/l/s[@n="{}"]'.format(pos)):
        # Get the bidix p element
        pair = symbol.getparent().getparent()
        # Extract sem_tags for the sme word
        sem_tags = sme_sem_tag.get(pair.find('l').text)
        if sem_tags and pair.find('r').text is not None:
            # Extract the smX lemma, add the sme semtags to it
            yield (pair.find('r').text, sorted(sem_tags))
示例#4
0
    def parse_bundle_relations(self, xml_tree: etree._ElementTree) -> list:
        relation_xpath = """
        //rel_abstract_bundle |
        //rel_bundle_abstract |
        //rel_concrete_bundle |
        //rel_bundle_concrete
        """

        source_xpaht = ".//mxCell"

        relations = {}
        for relation in xml_tree.xpath(relation_xpath):
            relation_dict = dict(relation.items())
            relation_source_dict = dict(relation.find(source_xpaht).items())

            if relation_dict and relation_source_dict:
                source_id = int(relation_source_dict["target"])

                if not relations.get(source_id):
                    relations[source_id] = []

                relations[source_id].append(
                    {
                        "destination": int(relation_source_dict["source"]),
                    }
                )

        return relations
def generate_session_class(
    omc_interface_xml: etree._ElementTree,
) -> Code:
    elements_code = Code()
    code = Code(
        "class OMCSession(",
        CodeWithIndent(
            "OMCSessionBase,",
        ),
        "):",
        CodeWithIndent(
            elements_code
        )
    )

    elements_code.append("OpenModelica = OpenModelica")
    OpenModelica_Scripting, = omc_interface_xml.xpath(
        '//*[@id="OpenModelica.Scripting"]'
    )
    for modelica_class in OpenModelica_Scripting.xpath('./classes/*'):
        if modelica_class.tag == "package":
            continue

        className = TypeName(modelica_class.attrib["id"])
        if is_supported_element(modelica_class):
            elements_code.append(
                f"{className.last_identifier} = {className}"
            )
        else:
            elements_code.append(
                f"# {className.last_identifier} = {className}"
            )

    return code
示例#6
0
    def xmlGetTextNodes(self, doc: etree._ElementTree, xpath: str,
                        namespaces: dict):
        """Shorthand to retrieve serialized text nodes matching a specific xpath.

        :param lxml.etree._ElementTree doc: XML element to parse
        :param str xpath: Xpath to reach
        :param dict namespaces: XML namespaces like `lxml.etree.getroot().nsmap`
        """
        return ", ".join(doc.xpath(xpath, namespaces=namespaces))
示例#7
0
def parse_additional_resources(etree: ET) -> Tuple[Tuple[str, str], ...]:
    """Parse tuple of additional resources."""
    return tuple(
        map(
            lambda et: (
                clean(et.text_content()),
                first(et.xpath(".//a/@href")),
            ),
            etree.xpath("//*[@id='additional-resources']//p"),
        )
    )
示例#8
0
    def parse_root(self, xml_tree: etree._ElementTree) -> dict:
        xpath = "//root"

        for root in xml_tree.xpath(xpath):
            root_dict = dict(root.items())
            if root_dict:
                return {
                    "id": int(root_dict["id"]),
                    "name": root_dict["label"],
                }

        raise ValueError()
示例#9
0
def get_fs(tree: etree._ElementTree) -> ty.Dict[str, FeatureStructure]:
    """Find and parse all the feature structures in `tree`.

    Return
    -------

    A dict mapping feature structures ids to their parsed contents.
    """
    fs_lst = tree.xpath("//tei:fs", namespaces=NSMAP)
    if not fs_lst:
        raise ElementNotFoundError(
            "There are no feature structure elements in this tree"
        )

    return {xmlid(fs): parse_fs(fs) for fs in fs_lst}
def generate_module_py(
    omc_interface_xml: etree._ElementTree,
) -> Code:
    return Code(
        empty_line,
        generate_import_statements(),
        empty_line * 2,
        generate_nested_modelica_class(
            omc_interface_xml.xpath('//*[@id]')
        ).to_code(),
        empty_line * 2,
        generate_session_class(
            omc_interface_xml,
        ),
    )
示例#11
0
    def _collect_action_list(et: etree._ElementTree, action_list_name: str) \
            -> Tuple[Union[None, etree._Element], Union[None, etree._Element], List[Union[None, etree._Element]]]:
        al_elem = et.find(f"*actionList[@name='{action_list_name}']")
        condition = et.xpath(f"*/condition/actionListName[text()='{action_list_name}']/..")[0]

        if al_elem is None or condition is None:
            return None, None, [None]

        # Collect affected state objects
        state_objects = list()
        for state_object_name in condition.xpath(f"stateCondition/stateObjectName"):
            state_object = et.find(f"*stateObject[@name='{state_object_name.text}']")
            if state_object is not None:
                state_objects.append(state_object)

        return al_elem, condition, state_objects
示例#12
0
    def xmlGetTextTag(self, doc: etree._ElementTree, xpath: str,
                      namespaces: dict, key: str):
        """Function to get information in tag when information isn't in nodes matching a specific xpath.

        :param lxml.etree._ElementTree doc: XML element to parse
        :param str xpath: Xpath to reach
        :param dict namespaces: XML namespaces like 'lxml.etree.getroot().nsmap'
        :param key : XML key to find like 'codeListValue'
        """

        tag = doc.xpath(xpath, namespaces=namespaces)
        if len(tag) > 0:
            tag = tag[0].get(key, None)
        else:
            tag = "None"

        return tag
示例#13
0
    def parse_features(self, xml_tree: etree._ElementTree) -> dict:
        xpath = """
        //concrete |
        //abstract
        """

        features = {}
        for feature in xml_tree.xpath(xpath):
            feature_dict = dict(feature.items())
            if feature_dict and not FEATURE_CLON_SUFIX in feature_dict["id"]:
                feature_id = int(feature_dict["id"])
                features[feature_id] = {
                    "id": feature_id,
                    "name": feature_dict["label"],
                }

        return features
示例#14
0
def get_mentions(tree: etree._ElementTree,) -> ty.Dict[ty.Tuple[str, str], Mention]:
    """Extract the mentions from an ANCOR-TEI document."""
    mentions = tree.xpath(
        (
            './tei:standOff/tei:annotation[@tei:type="coreference"]'
            '/tei:spanGrp[@tei:subtype="mention"]/tei:span'
        ),
        namespaces=NSMAP,
    )
    if not mentions:
        raise ValueError("`tree` has no mention spans")

    features = get_fs(tree)

    texts_lst = tree.findall(f"{TEI}text")
    if not texts_lst:
        raise ValueError(
            "Attempting to extract mentions from a document without a text"
        )

    tokens_id_store = {
        xmlid(elt): elt for text in texts_lst for elt in text.iter(*TOKEN_TAGS)
    }

    res = dict()
    for m_elt in mentions:
        try:
            m = Mention.from_urs(m_elt, tokens_id_store.get, features.get)
        except ValueError as e:
            logger.warning(f"Skipping span {xmlid(m)}: {e}")
            continue
        if m.span_type not in MENTION_TYPES:
            if m.span_type in IGNORED_MENTION_TYPES:
                logger.debug(
                    f"Ignoring span {m.identifier!r} with mention type {m.span_type!r}"
                )
            else:
                logger.warning(
                    f"Span {m.identifier!r} has an invalid mention type ({m.span_type!r})"
                )
            continue
        res[(xmlid(m.targets[0]), xmlid(m.targets[-1]))] = m
    return res
示例#15
0
def get_chains(tree: etree._ElementTree) -> ty.Dict[str, ty.Set[str]]:
    chains_grp_lst = tree.xpath(
        './tei:standOff/tei:annotation[@tei:type="coreference"]/tei:linkGrp[@tei:type="schema"]',
        namespaces=NSMAP,
    )
    chains_grp = chains_grp_lst[0]
    if len(chains_grp_lst) > 1:
        logger.warning(
            "There are more than one schema group in this document"
            f", only {xmlid(chains_grp)!r} will be taken into account"
        )

    res = dict()
    for c in chains_grp.iter(f"{TEI}link"):
        c_id = xmlid(c)
        target = c.get(f"{TEI}target")
        if target is None:
            raise ValueError(f"Schema {c_id!r} has no target attribute")
        res[c_id] = set((target_to_id(t) for t in target.split()))
    return res
示例#16
0
    def parse_feature_relations(self, xml_tree: etree._ElementTree) -> dict:
        relation_xpath = """
        //rel_concrete_root |
        //rel_abstract_root |
        //rel_concrete_abstract |
        //rel_concrete_concrete |
        //rel_abstract_concrete |
        //rel_abstract_abstract
        """

        source_xpaht = ".//mxCell"

        relations = {}
        for relation in xml_tree.xpath(relation_xpath):
            relation_dict = dict(relation.items())
            relation_source_dict = dict(relation.find(source_xpaht).items())
            if relation_dict and relation_source_dict:

                # Account for inverted direction in requires relations
                if relation_dict["relType"] == "requires":
                    source_id = int(relation_source_dict["source"])
                    destination_id = int(relation_source_dict["target"])
                else:
                    source_id = int(relation_source_dict["target"])
                    destination_id = int(relation_source_dict["source"])

                if not relations.get(source_id):
                    relations[source_id] = []

                relations[source_id].append(
                    {
                        "destination": destination_id,
                        "constraint_type": relation_dict["relType"],
                    }
                )

        return relations
示例#17
0
    def parse_bundles(self, xml_tree: etree._ElementTree) -> dict:
        xpath = "//bundle"

        bundles = {}
        for bundle in xml_tree.xpath(xpath):
            bundle_dict = dict(bundle.items())
            if bundle_dict:
                bundle_id = int(bundle_dict["id"])
                bundle_type = bundle_dict["bundleType"].lower()

                if bundle_type == "range":
                    bundles[bundle_id] = {
                        "id": int(bundle_id),
                        "bundle_type": "group_cardinality",
                        "low_threshold": bundle_dict["lowRange"],
                        "high_threshold": bundle_dict["highRange"],
                    }
                else:
                    bundles[bundle_id] = {
                        "id": int(bundle_id),
                        "bundle_type": bundle_type,
                    }

        return bundles
示例#18
0
def parse_abstract(etree: ET) -> str:
    """Parse abstract."""
    return etree.xpath(
        "//div[contains(@class, 'field-type-text-with-summary')]//p//text()"
    )
示例#19
0
def parse_authors(etree: ET) -> str:
    """Parse all authors."""
    return etree.xpath("//div[@id='info']")
示例#20
0
def parse_references(etree: ET) -> TS:
    """Parse tuple of reference."""
    return etree.xpath("//*[@id='bibliography']//p")
示例#21
0
def parse_canonical(etree: ET) -> str:
    """Parse canonical."""
    return etree.xpath("//link[contains(@rel, 'canonical')]/@href")
示例#22
0
def parse_keywords(etree: ET) -> TS:
    """Parse tuple of keywords."""
    return etree.xpath("//*[@id='keywords']//li//text()")
示例#23
0
def parse_learning_objectives(etree: ET) -> TS:
    """Parse tuple of learning objectives."""
    return etree.xpath(
        "//div[contains(@class, 'field-name-field-learning-objectives')]"
        "//li"
    )
示例#24
0
def parse_instructional_assessment_questions(etree: ET) -> TS:
    """Parse tuple of instructional assessment questions."""
    return etree.xpath(
        "//div[contains(@class, 'field-name-field-learning-questions')]"
        "//div[contains(@class, 'even')]/ol/li"
    )
示例#25
0
def parse_related_topics(etree: ET) -> TS:
    """Parse tuple of related topics."""
    return etree.xpath("//*[@id='related-topics']//a//@href")
 def get_crumb(cls, page: _ElementTree) -> PaliCrumb:
     last_href: _Element = page.xpath("//CRUMBS/a")[-1]
     pali_type = PaliCrumb(last_href.get("href"))
     return pali_type
 def get_paragraphs(cls, page: _ElementTree) -> List[_Element]:
     return page.xpath("//body//p")
示例#28
0
def validate_xpath(xmlnode: etree._ElementTree,
                   xpath: str,
                   ns: str,
                   attr: str,
                   valrow: Dict,
                   failcat: str = "ERROR") -> Dict:
    """ Populates valrow with validation results

    Populates valrow with validation results of the attribute in the node
    specified by xpath expression

    Args:
        xmlnode (etree._ElementTree): root or parent xmlnode
        xpath (str): xpath expression to search for
        ns (str): namespace for xpath
        attr (str): String with the attribute for wihc retrieve the value. If
            empty, the text value of the first node (if found) is used instead.
        valrow (Dict): initialized validation row where populate validation
            result.
        failcat (str): string with validation output category when validation
            fails (i.e., ERROR or WARNING)
    Returns:
        Dict: Validation row populated with the validation results.
    """

    valrow["XPATH"] = xpath
    if ns != "":
        valnodes = xmlnode.xpath(xpath.replace("/", "/ns:"),
                                 namespaces={"ns": ns})
    else:
        valnodes = xmlnode.xpath(xpath)

    valrow["VALIOUT"] = "ERROR"
    valrow[
        "VALIMSG"] = "Validation unknown error parsing xpath expression in XML"
    if len(valnodes) == 1:
        valnode = valnodes[0]
        if attr == "":
            txt = valnode.text
            if txt is None:
                txt = ""
                valrow["VALIOUT"] = failcat
                valrow[
                    "VALIMSG"] = "Node found but value is missing or empty" \
                                 " string"
            else:
                valrow["VALIOUT"] = "PASSED"
                valrow["VALIMSG"] = ""
            valrow["VALUE"] = txt
        else:
            txt = valnode.get(attr)
            if txt is None:
                txt = ""
                valrow["VALIOUT"] = failcat
                valrow["VALIMSG"] = "Node found but attribute is missing"
            else:
                valrow["VALIOUT"] = "PASSED"
                valrow["VALIMSG"] = ""
            valrow["VALUE"] = txt
    else:
        if len(valnodes) > 1:
            valrow["VALIOUT"] = failcat
            valrow["VALIMSG"] = "Multiple nodes in XML"
        else:
            valrow["VALIOUT"] = failcat
            valrow["VALIMSG"] = "Node not found"

    return valrow
示例#29
0
def Check(root: etree._ElementTree) -> int:
    if (len(root.xpath('.//sign/text()')) != 1): return 1
    if (len(root.xpath('.//clef-octave-change/text()')) > 1): return 2
    if (len(root.xpath('.//fifths/text()')) != 1): return 3
    return 0
示例#30
0
def parse_topic_description(etree: ET) -> TS:
    """Parse tuple of topic description."""
    return etree.xpath("//*[@id='toc']//ol//a//text()")