Пример #1
0
def _write_match_tmp(signatures: dict, u2variants: dict, p_proteins: str,
                     p_uniprot2matches: str, start: str, stop: Optional[str],
                     output: str):
    proteins = Store(p_proteins)
    u2matches = Store(p_uniprot2matches)
    with open(output, "wt", encoding="utf-8") as fh:
        doc = getDOMImplementation().createDocument(None, None, None)

        for uniprot_acc, protein in proteins.range(start, stop):
            elem = doc.createElement("protein")
            elem.setAttribute("id", uniprot_acc)
            elem.setAttribute("name", protein["identifier"])
            elem.setAttribute("length", str(protein["length"]))
            elem.setAttribute("crc64", protein["crc64"])

            try:
                protein_entries = u2matches[uniprot_acc]
            except KeyError:
                pass
            else:
                for signature_acc in sorted(protein_entries):
                    try:
                        signature = signatures[signature_acc]
                    except KeyError:
                        # InterPro entry
                        continue

                    elem.appendChild(
                        _create_match(doc, signature,
                                      protein_entries[signature_acc]))
            finally:
                elem.writexml(fh, addindent="  ", newl="\n")

            protein_variants = u2variants.get(uniprot_acc, [])
            for variant, length, crc64, matches in protein_variants:
                elem = doc.createElement("protein")
                elem.setAttribute("id", variant)
                elem.setAttribute("name", variant)
                elem.setAttribute("length", str(length))
                elem.setAttribute("crc64", crc64)

                for signature_acc in sorted(matches):
                    try:
                        signature = signatures[signature_acc]
                    except KeyError:
                        # InterPro entry
                        continue

                    elem.appendChild(
                        _create_match(doc, signature, matches[signature_acc]))

                elem.writexml(fh, addindent="  ", newl="\n")
Пример #2
0
def _write_feature_tmp(features: dict, p_proteins: str,
                       p_uniprot2features: str, start: str,
                       stop: Optional[str], output: str):
    proteins = Store(p_proteins)
    u2features = Store(p_uniprot2features)

    with open(output, "wt", encoding="utf-8") as fh:
        doc = getDOMImplementation().createDocument(None, None, None)

        # for uniprot_acc, protein in proteins.range(start, stop):
        for uniprot_acc, protein_features in u2features.range(start, stop):
            protein = proteins[uniprot_acc]
            elem = doc.createElement("protein")
            elem.setAttribute("id", uniprot_acc)
            elem.setAttribute("name", protein["identifier"])
            elem.setAttribute("length", str(protein["length"]))
            elem.setAttribute("crc64", protein["crc64"])

            for feature_acc in sorted(protein_features):
                feature = features[feature_acc]
                feature_match = protein_features[feature_acc]

                match = doc.createElement("match")
                match.setAttribute("id", feature_acc)
                match.setAttribute("name", feature["name"])
                match.setAttribute("dbname", feature["database"])
                match.setAttribute("status", 'T')
                match.setAttribute("model", feature_acc)
                match.setAttribute("evd", feature["evidence"])

                for loc in sorted(feature_match["locations"]):
                    # there is only one fragment per location
                    pos_start, pos_end, seq_feature = loc

                    lcn = doc.createElement("lcn")
                    lcn.setAttribute("start", str(pos_start))
                    lcn.setAttribute("end", str(pos_end))

                    if seq_feature:
                        lcn.setAttribute("sequence-feature", seq_feature)

                    match.appendChild(lcn)

                elem.appendChild(match)

            elem.writexml(fh, addindent="  ", newl="\n")