Exemplo n.º 1
0
class BioPAXModel():
    """Paxtools model for BioPAX data and utils."""

    def __init__(self):
        """Initialize Java and load BioPAX classes of interest."""
        if not isJVMStarted():
            startJVM(
                getDefaultJVMPath(),
                "-ea",
                "-Xmx1g",
                "-Djava.class.path=%s" %
                os.path.join(
                    os.path.dirname(__file__), "paxtools/paxtools.jar"))

        self.java_io_ = JPackage("java.io")
        self.paxtools_ = JPackage("org.biopax.paxtools")
        self.io_ = self.paxtools_.io.SimpleIOHandler(
            self.paxtools_.model.BioPAXLevel.L3)

        self.protein_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.Protein", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.protein_reference_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.ProteinReference", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.fragment_feature_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.FragmentFeature", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.modification_feature_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.ModificationFeature", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.small_molecule_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.SmallMolecule", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.small_molecule_reference_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.SmallMoleculeReference", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.rna_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.Rna", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.rna_reference_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.RnaReference", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.complex_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.Complex", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.dna_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.Dna", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.dna_reference_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.DnaReference", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.catalysis_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.Catalysis", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.biochemical_reaction_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.BiochemicalReaction", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.complex_assembly_class_ = java.lang.Class.forName(
            "org.biopax.paxtools.model.level3.ComplexAssembly", True,
            java.lang.ClassLoader.getSystemClassLoader())
        self.model_ = None

    def load(self, filename):
        """Import a BioPAX model from the file."""
        file_is = self.java_io_.FileInputStream(
            filename)
        self.model_ = self.io_.convertFromOWL(file_is)
        file_is.close()

    def is_protein_family(self, reference_id):
        """Check if the protein object is a family."""
        reference = self.model_.getByID(reference_id)
        if len(reference.getMemberEntityReference()) > 0:
            return True
        else:
            physical_entities = reference.getEntityReferenceOf()
            for entity in physical_entities:
                if len(entity.getMemberPhysicalEntity()) > 0:
                    return True
            return False

    def is_fragment(self, protein_id):
        """Check if the protein object is a fragment (region)."""
        protein = self.model_.getByID(protein_id)
        features = protein.getFeature()
        for f in features:
            if f.getModelInterface() == self.fragment_feature_class_:
                return True
        return False

    def get_modifications(self, physical_entity_uri, ignore_features=None):
        """Get all residues and state flags of the physical entity."""
        residues = set()
        flags = set()
        entity = self.model_.getByID(physical_entity_uri)
        features = entity.getFeature()
        for f in features:
            if ignore_features is None or f.getUri() not in ignore_features:
                if f.getModelInterface() == self.modification_feature_class_:
                    if f.getFeatureLocation() is not None:
                        residues.add(f.getUri())
                    else:
                        flags.add(f.getUri())
        return {"residues": residues, "flags": flags}

    def residue_in_region(self, residue_id, region_id):
        """Test whether residue lies in the given region."""
        residue = self.model_.getByID(residue_id)
        region = self.model_.getByID(region_id)

        region_location = region.getFeatureLocation()
        region_start = region_location.getSequenceIntervalBegin()
        region_end = region_location.getSequenceIntervalEnd()

        residue_location = residue.getFeatureLocation()
        if region_start is not None and\
           region_end is not None and\
           residue_location is not None:
            x = residue_location.getSequencePosition()
            start = region_start.getSequencePosition()
            end = region_end.getSequencePosition()
            return (x >= start) and (x <= end)
        else:
            return False

    def fetch_protein_reference(self, uri):
        """."""
        protein_reference = self.model_.getByID(uri)
        xrefs = set(protein_reference.getXref())
        uniprotid = None
        # if len(xref) > 1:
        #     warnings.warn(
        #         "Protein reference %s (%s) has ambiguous Unified Reference!" %
        #         (str(protein_reference.getName()), str(protein_reference)))
        # elif len(xref) < 1:
        #     warnings.warn(
        #         "Protein reference %s (%s) does not have Unified Reference!" %
        #         (str(protein_reference.getName()), str(protein_reference)))
        result_xrefs = {}
        for ref in xrefs:
            db = ref.getDb()
            if "uniprot" in ref.getDb().lower():
                uniprotid = ref.getId()
            else:
                result_xrefs[db] = ref.getId()

        if uniprotid is None:
            warnings.warn(
                "Protein reference '{}' does not contain a UniProt ID !".format(
                    protein_reference.getName()))

        name = list(protein_reference.getName())
        locations = set()
        for entity in protein_reference.getEntityReferenceOf():
            location = entity.getCellularLocation()
            if location is not None:
                locations.update(list(location.getTerm()))
        return (uniprotid, result_xrefs, name, locations)

    def fetch_region_reference(self, uri):
        region_feature = self.model_.getByID(uri)
        location = region_feature.getFeatureLocation()

        start = None
        end = None
        start_ref = location.getSequenceIntervalBegin()
        if start_ref is not None:
            start = start_ref.getSequencePosition()
        end_ref = location.getSequenceIntervalEnd()
        if end_ref is not None:
            end = end_ref.getSequencePosition()

        return (start, end)

    def fetch_residue_reference(self, uri):
        residue = self.model_.getByID(uri)
        references = set(
            [el.getEntityReference().getUri() for el in residue.getFeatureOf()]
        )
        if len(references) > 1:
            warnings.warn(
                "Residue (%s) references to more than one protein!" %
                (uri))
        loc =\
            residue.getFeatureLocation().getSequencePosition()
        aa = resolve_aa(residue.getModificationType())
        return (aa, loc)

    def fetch_state_reference(self, uri):
        flag = self.model_.getByID(uri)
        references = set()
        for el in flag.getFeatureOf():
            if el.getModelInterface() == self.complex_class_:
                references.add(el.getUri())
            else:
                references.add(el.getEntityReference().getUri())

        if len(references) > 1:
            warnings.warn(
                "State flag (%s) references to more than one protein!" %
                (uri))
        states = list(flag.getModificationType().getTerm())
        if len(states) == 1:
            return states[0]
        else:
            warnings.warn("Ambiguous state (%s)! Cannot convert to node" %
                          states)

    def get_by_id(self, uri):
        return self.model_.getByID(uri)