def store_bio_searchio_hit(self, searchio_hit: Hit, target: str) -> None: """Store bio searchio hit.""" organism_obj, created = Organism.objects.get_or_create( abbreviation="multispecies", genus="multispecies", species="multispecies", common_name="multispecies", ) if not hasattr(searchio_hit, "accession"): searchio_hit.accession = None # if interproscan-xml parsing, get db name from Hit.attributes. if target == "InterPro": db_name = searchio_hit.attributes["Target"].upper() # prevents the creation of multiple databases for SIGNALP if db_name.startswith("SIGNALP"): db_name = "SIGNALP" db, created = Db.objects.get_or_create(name=db_name) # if blast-xml parsing, db name is self.db ("BLAST_source") else: db = self.db dbxref, created = Dbxref.objects.get_or_create( db=db, accession=searchio_hit.id) feature, created = Feature.objects.get_or_create( organism=organism_obj, uniquename=searchio_hit.id, type_id=self.so_term_protein_match.cvterm_id, name=searchio_hit.accession, dbxref=dbxref, defaults={ "is_analysis": False, "is_obsolete": False, "timeaccessioned": datetime.now(timezone.utc), "timelastmodified": datetime.now(timezone.utc), }, ) for aux_dbxref in searchio_hit.dbxrefs: aux_db, aux_term = aux_dbxref.split(":", 1) if aux_db == "GO": try: term_db = Db.objects.get(name=aux_db.upper()) dbxref = Dbxref.objects.get(db=term_db, accession=aux_term) cvterm = Cvterm.objects.get(dbxref=dbxref) FeatureCvterm.objects.get_or_create( feature=feature, cvterm=cvterm, pub=self.pub, is_not=False, rank=0, ) except ObjectDoesNotExist: self.ignored_goterms.add(aux_dbxref) else: term_db, created = Db.objects.get_or_create( name=aux_db.upper()) dbxref, created = Dbxref.objects.get_or_create( db=term_db, accession=aux_term) FeatureDbxref.objects.get_or_create(feature=feature, dbxref=dbxref, is_current=1) return None
def test_store_bio_searchio_hit(self): """Tests - store bio searchio hit.""" # create RO term: contained in test_db = Db.objects.create(name="RO") test_dbxref = Dbxref.objects.create(accession="00002", db=test_db) test_cv = Cv.objects.create(name="relationship") Cvterm.objects.create( name="contained in", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # create SO terms: protein_match test_cv = Cv.objects.create(name="sequence") test_db = Db.objects.create(name="SO") test_dbxref = Dbxref.objects.create(accession="00001", db=test_db) Cvterm.objects.create( name="protein_match", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="00002", db=test_db) Cvterm.objects.create( name="polypeptide", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # create GO term test_db = Db.objects.create(name="GO") test_dbxref = Dbxref.objects.create(accession="1234", db=test_db) test_cv = Cv.objects.create(name="biological_process") Cvterm.objects.create( name="GO:1234", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # create a bio searchio hit test_searchio_hit = Hit() test_searchio_hit.id = "PF1234" test_searchio_hit.accession = "PFAM mock domain" test_searchio_hit.attributes["Target"] = "PFAM" test_searchio_hit.dbxrefs = [ "GO:1234", "IPR:IPR012345", "Reactome:R-HSA-12345" ] Organism.objects.create(genus="test", species="organism") # instantiate the loader test_feature_file = FeatureLoader(filename="file.name", source="InterproScan_source") # store the bio searchio hit # From interproscan target = "InterPro" test_feature_file.store_bio_searchio_hit(test_searchio_hit, target) test_feature = Feature.objects.get(uniquename="PF1234") self.assertEqual("PFAM mock domain", test_feature.name) test_dbxref = Dbxref.objects.get(accession="IPR012345") test_feature_dbxref = FeatureDbxref.objects.get(feature=test_feature, dbxref=test_dbxref) self.assertEqual(True, test_feature_dbxref.is_current) test_cvterm = Cvterm.objects.get(name="GO:1234") test_feature_cvterm = FeatureCvterm.objects.get(feature=test_feature, cvterm=test_cvterm) self.assertEqual(0, test_feature_cvterm.rank)