def test_store_organism_publication(self): """Tests - store organism publication.""" test_organism = Organism.objects.create(genus="Mus", species="musculus") db2 = BibDatabase() db2.entries = [ { "journal": "Nice Journal", "comments": "A comment", "pages": "12--23", "month": "jan", "abstract": "This is an abstract. This line should be " "long enough to test multilines...", "title": "An amazing title", "year": "2013", "doi": "10.1186/s12864-016-2535-300002", "volume": "12", "ID": "Teste2018", "author": "Foo, b. and Foo1, b. and Foo b.", "keyword": "keyword1, keyword2", "ENTRYTYPE": "article", } ] for entry in db2.entries: bibtest = PublicationLoader() bibtest.store_bibtex_entry(entry) OrganismLoader().store_organism_publication( organism="Mus musculus", doi="10.1186/s12864-016-2535-300002" ) test_organismpub = OrganismPub.objects.get(organism=test_organism) self.assertEqual("An amazing title", test_organismpub.pub.title)
def test_store_biopython_seq_record_DOI(self): """Tests - __init__ and store_biopython_seq_record with DOI.""" # DOI TESTING db2 = BibDatabase() db2.entries = [{ "journal": "Nice Journal", "comments": "A comment", "pages": "12--23", "month": "jan", "abstract": "This is an abstract. This line should be " "long enough to test multilines...", "title": "An amazing title", "year": "2013", "doi": "10.1186/s12864-016-2535-300002", "volume": "12", "ID": "Teste2018", "author": "Foo, b. and Foo1, b. and Foo b.", "keyword": "keyword1, keyword2", "ENTRYTYPE": "article", }] for entry in db2.entries: bibtest3 = PublicationLoader() bibtest3.store_bibtex_entry(entry) test_bibtex3 = Pub.objects.get(uniquename="Teste2018") test_bibtex3_pubdbxref = PubDbxref.objects.get(pub=test_bibtex3) test_bibtex3_dbxref = Dbxref.objects.get( dbxref_id=test_bibtex3_pubdbxref.dbxref_id) self.assertEqual("10.1186/s12864-016-2535-300002", test_bibtex3_dbxref.accession) Organism.objects.create(genus="Mus", species="musculus") test_seq_file_pub = SequenceLoader( filename="sequence_doi.fasta", doi="10.1186/s12864-016-2535-300002") test_seq_obj_pub = SeqRecord(Seq("acgtgtgtgcatgctagatcgatgcatgca"), id="chr2", description="chromosome 2") test_seq_file_pub.store_biopython_seq_record(test_seq_obj_pub, "assembly", "Mus musculus") test_feature_doi = Feature.objects.get(name="chromosome 2") self.assertEqual("chr2", test_feature_doi.uniquename) test_feature_pub_doi = FeaturePub.objects.get( pub_id=test_bibtex3.pub_id) test_pub_dbxref_doi = PubDbxref.objects.get( pub_id=test_feature_pub_doi.pub_id) test_dbxref_doi = Dbxref.objects.get( dbxref_id=test_pub_dbxref_doi.dbxref_id) self.assertEqual("10.1186/s12864-016-2535-300002", test_dbxref_doi.accession) # test remove_file self.assertTrue( Dbxrefprop.objects.filter(value="sequence_doi.fasta").exists()) call_command("remove_file", "--name=sequence_doi.fasta", "--verbosity=0") self.assertFalse( Dbxrefprop.objects.filter(value="sequence_doi.fasta").exists())
def test_store_pub_record(self): """Tests - __init__ and store_pub_record.""" # test PublicationLoader test_entry2 = dict() test_entry2["ENTRYTYPE"] = "article" test_entry2["ID"] = "Chado2006" test_entry2["title"] = "A mock test title" test_entry2["year"] = "2006" test_entry2["pages"] = "12000" test_entry2["doi"] = "10.1111/s12122-012-1313-4" test_entry2["author"] = "Foo, b. and Foo1, b. and Foo b." test_entry2["volume"] = "v2" test_entry2["journal"] = "Journal of Testing" bibtest = PublicationLoader() bibtest.store_bibtex_entry(test_entry2) test_bibtex = Pub.objects.get(uniquename="Chado2006") self.assertEqual("v2", test_bibtex.volume) # test mock bibtexparser object database' db = BibDatabase() # pages ommited db.entries = [{ "journal": "Nice Journal", "comments": "A comment", "month": "jan", "abstract": "This is an abstract. This line should be " "long enough to test multilines...", "title": "An amazing title", "year": "2013", "doi": "10.1111/s12122-012-1313-5", "volume": "12", "ID": "Cesar2013", "author": "Foo, b. and Foo1, b. and Foo b.", "keyword": "keyword1, keyword2", "ENTRYTYPE": "article", }] for entry in db.entries: bibtest2 = PublicationLoader() bibtest2.store_bibtex_entry(entry) test_bibtex2 = Pub.objects.get(uniquename="Cesar2013") self.assertEqual("12", test_bibtex2.volume) self.assertEqual(None, test_bibtex2.pages) test_bibtex2_pub_dbxref = PubDbxref.objects.get( pub_id=test_bibtex2.pub_id) self.assertEqual(test_bibtex2.pub_id, test_bibtex2_pub_dbxref.pub_id) # test remove publication (with cascade enabled) self.assertTrue(Pub.objects.filter(uniquename="Cesar2013").exists()) call_command("remove_publication", "--doi=10.1111/s12122-012-1313-5", "--verbosity=0") self.assertFalse(Pub.objects.filter(uniquename="Cesar2013").exists()) # check if dbxref remains self.assertTrue( Dbxref.objects.filter( accession="10.1111/s12122-012-1313-5").exists())
def handle(self, file=str, verbosity: int = 1, cpu: int = 1, **options): """Execute the main function.""" if verbosity > 0: self.stdout.write("Preprocessing") try: FileValidator().validate(file) except ImportingError as e: raise CommandError(e) # filename = os.path.basename(file) bib_database = None try: bib_database = bibtexparser.load(open(file)) except ValueError as e: return CommandError(e) bibtex = PublicationLoader() pool = ThreadPoolExecutor(max_workers=cpu) tasks = list() for entry in bib_database.entries: # create model object for each entry if entry["ENTRYTYPE"]: tasks.append(pool.submit(bibtex.store_bibtex_entry, entry)) if verbosity > 0: self.stdout.write("Loading") for task in tqdm( as_completed(tasks), total=len(tasks), disable=False if verbosity > 0 else True, ): try: task.result() except ImportingError as e: raise CommandError(e) pool.shutdown() if verbosity > 0: self.stdout.write(self.style.SUCCESS("Done"))
def test_store_feature_publication(self): """Tests - store feature publication.""" test_db = Db.objects.create(name="RO") test_dbxref = Dbxref.objects.create(accession="00002", db=test_db) test_cv = Cv.objects.create(name="relationship") Cvterm.objects.create( name="contained in", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_db = Db.objects.create(name="SO") test_dbxref = Dbxref.objects.create(accession="12345", db=test_db) test_cv = Cv.objects.create(name="sequence") test_so_term = Cvterm.objects.create( name="gene", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="123456", db=test_db) Cvterm.objects.create( name="polypeptide", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="123455", db=test_db) Cvterm.objects.create( name="protein_match", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_organism = Organism.objects.create(genus="Mus", species="musculus") test_db = Db.objects.create(name="GFF_SOURCE") test_dbxref = Dbxref.objects.create(accession="feat_gene", db=test_db) test_feature = Feature.objects.create( organism=test_organism, uniquename="feat_gene", dbxref=test_dbxref, is_analysis=False, type_id=test_so_term.cvterm_id, is_obsolete=False, timeaccessioned=datetime.now(timezone.utc), timelastmodified=datetime.now(timezone.utc), ) db2 = BibDatabase() db2.entries = [{ "journal": "Nice Journal", "comments": "A comment", "pages": "12--23", "month": "jan", "abstract": "This is an abstract. This line should be " "long enough to test multilines...", "title": "An amazing title", "year": "2013", "doi": "10.1186/s12864-016-2535-300002", "volume": "12", "ID": "Teste2018", "author": "Foo, b. and Foo1, b. and Foo b.", "keyword": "keyword1, keyword2", "ENTRYTYPE": "article", }] for entry in db2.entries: bibtest = PublicationLoader() bibtest.store_bibtex_entry(entry) test_feature_file = FeatureLoader(filename="file.name", source="GFF_loader") test_feature_file.store_feature_publication( feature="feat_gene", soterm="gene", doi="10.1186/s12864-016-2535-300002") test_featurepub = FeaturePub.objects.get(feature=test_feature) self.assertEqual("An amazing title", test_featurepub.pub.title)
def test_store_tabix_GFF_feature(self): """Tests - store tabix feature / store relationships.""" # creating exact term test_db_global = Db.objects.create(name="_global") test_dbxref = Dbxref.objects.create(accession="exact", db=test_db_global) test_cv = Cv.objects.create(name="synonym_type") Cvterm.objects.create( name="exact", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # creating part_of term test_dbxref = Dbxref.objects.create(accession="part_of", db=test_db_global) test_cv = Cv.objects.create(name="sequence") Cvterm.objects.create( name="part_of", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # create SO terms: assembly, gene, and exon test_db = Db.objects.create(name="SO") test_dbxref = Dbxref.objects.create(accession="00001", db=test_db) test_cvterm_assembly = Cvterm.objects.create( name="assembly", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="00002", db=test_db) Cvterm.objects.create( name="gene", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="00003", db=test_db) Cvterm.objects.create( name="exon", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="00004", db=test_db) Cvterm.objects.create( name="polypeptide", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="00005", db=test_db) Cvterm.objects.create( name="protein_match", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # create RO term: contained in test_db = Db.objects.create(name="RO") test_dbxref = Dbxref.objects.create(accession="00002", db=test_db) test_cv = Cv.objects.create(name="relationship") Cvterm.objects.create( name="contained in", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # create an organism test_organism = Organism.objects.create(genus="Mus", species="musculus") # create a srcfeature test_db = Db.objects.create(name="FASTA_SOURCE") test_dbxref = Dbxref.objects.create(accession="contig1", db=test_db) feature = Feature.objects.create( dbxref=test_dbxref, organism=test_organism, name="contig1", type=test_cvterm_assembly, uniquename="contig1", is_analysis=False, is_obsolete=False, timeaccessioned=datetime.now(timezone.utc), timelastmodified=datetime.now(timezone.utc), ) # DOI TESTING db2 = BibDatabase() db2.entries = [{ "journal": "Nice Journal", "comments": "A comment", "pages": "12--23", "month": "jan", "abstract": "This is an abstract. This line should be " "long enough to test multilines...", "title": "An amazing title", "year": "2013", "doi": "10.1186/s12864-016-2535-300002", "volume": "12", "ID": "Teste2018", "author": "Foo, b. and Foo1, b. and Foo b.", "keyword": "keyword1, keyword2", "ENTRYTYPE": "article", }] for entry in db2.entries: bibtest3 = PublicationLoader() bibtest3.store_bibtex_entry(entry) test_bibtex3 = Pub.objects.get(uniquename="Teste2018") test_bibtex3_pubdbxref = PubDbxref.objects.get(pub=test_bibtex3) test_bibtex3_dbxref = Dbxref.objects.get( dbxref_id=test_bibtex3_pubdbxref.dbxref_id) self.assertEqual("10.1186/s12864-016-2535-300002", test_bibtex3_dbxref.accession) # DOI: try to link feature to publication's DOI featurepub_test = None if feature and test_bibtex3_pubdbxref: featurepub_test = FeaturePub.objects.create( feature_id=feature.feature_id, pub_id=test_bibtex3_pubdbxref.pub_id) test_pub = Pub.objects.get(pub_id=featurepub_test.pub_id) self.assertEqual("An amazing title", test_pub.title) test_pubdbxref = PubDbxref.objects.get(pub=test_pub) test_dbxref = Dbxref.objects.get(dbxref_id=test_pubdbxref.dbxref_id) self.assertEqual("10.1186/s12864-016-2535-300002", test_dbxref.accession) # create a tabix feature class TabixFeature(object): """mock tabix feature.""" test_tabix_feature1 = TabixFeature() test_tabix_feature1.contig = "contig1" test_tabix_feature1.feature = "gene" test_tabix_feature1.start = "10" test_tabix_feature1.end = "100" test_tabix_feature1.strand = "+" test_tabix_feature1.frame = "1" test_tabix_feature1.attributes = "id=id1;name=name1" test_tabix_feature2 = TabixFeature() test_tabix_feature2.contig = "contig1" test_tabix_feature2.feature = "exon" test_tabix_feature2.start = "10" test_tabix_feature2.end = "100" test_tabix_feature2.strand = "-" test_tabix_feature2.frame = "2" test_tabix_feature2.attributes = "id=id2;name=name2;parent=id1" # instantiate the loader test_feature_file = FeatureLoader(filename="file.name", source="GFF_source") organism = "Mus musculus" # store the tabix feature qtl = False test_feature_file.store_tabix_GFF_feature(test_tabix_feature1, organism, qtl) test_feature_file.store_tabix_GFF_feature(test_tabix_feature2, organism, qtl) # store the relationships for item in test_feature_file.relationships: test_feature_file.store_relationship(organism, item["subject_id"], item["object_id"]) test_feature = Feature.objects.get(uniquename="id2") test_featureloc = Featureloc.objects.get(feature=test_feature) test_feature_relationship = FeatureRelationship.objects.get( object=test_feature.feature_id) test_src_feature = Feature.objects.get( feature_id=test_feature_relationship.subject.feature_id) self.assertEqual("name2", test_feature.name) self.assertEqual(10, test_featureloc.fmin) self.assertEqual("id1", test_src_feature.uniquename)
def test_store_tabix_VCF_feature(self): """Tests - store tabix VCF feature / store relationships.""" # creating exact term test_db_global = Db.objects.create(name="_global") test_dbxref = Dbxref.objects.create(accession="exact", db=test_db_global) test_cv = Cv.objects.create(name="synonym_type") Cvterm.objects.create( name="exact", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # creating part_of term test_dbxref = Dbxref.objects.create(accession="part_of", db=test_db_global) test_cv = Cv.objects.create(name="sequence") Cvterm.objects.create( name="part_of", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # create SO terms: assembly, gene, and exon test_db = Db.objects.create(name="SO") test_dbxref = Dbxref.objects.create(accession="00001", db=test_db) test_cvterm_assembly = Cvterm.objects.create( name="assembly", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="00002", db=test_db) Cvterm.objects.create( name="snv", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="00003", db=test_db) Cvterm.objects.create( name="snp", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="00004", db=test_db) Cvterm.objects.create( name="polypeptide", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="00005", db=test_db) Cvterm.objects.create( name="protein_match", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) test_dbxref = Dbxref.objects.create(accession="00006", db=test_db) Cvterm.objects.create( name="quality_value", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # create RO term: contained in test_db = Db.objects.create(name="RO") test_dbxref = Dbxref.objects.create(accession="00002", db=test_db) test_cv = Cv.objects.create(name="relationship") Cvterm.objects.create( name="contained in", cv=test_cv, dbxref=test_dbxref, is_obsolete=0, is_relationshiptype=0, ) # create an organism test_organism = Organism.objects.create(genus="Mus", species="musculus") # create a srcfeature test_db = Db.objects.create(name="FASTA_SOURCE") test_dbxref = Dbxref.objects.create(accession="contig1", db=test_db) feature = Feature.objects.create( dbxref=test_dbxref, organism=test_organism, name="contig1", type=test_cvterm_assembly, uniquename="contig1", is_analysis=False, is_obsolete=False, timeaccessioned=datetime.now(timezone.utc), timelastmodified=datetime.now(timezone.utc), ) # DOI TESTING db2 = BibDatabase() db2.entries = [{ "journal": "Nice Journal", "comments": "A comment", "pages": "12--23", "month": "jan", "abstract": "This is an abstract. This line should be " "long enough to test multilines...", "title": "An amazing title", "year": "2013", "doi": "10.1186/s12864-016-2535-300002", "volume": "12", "ID": "Teste2018", "author": "Foo, b. and Foo1, b. and Foo b.", "keyword": "keyword1, keyword2", "ENTRYTYPE": "article", }] for entry in db2.entries: bibtest3 = PublicationLoader() bibtest3.store_bibtex_entry(entry) test_bibtex3 = Pub.objects.get(uniquename="Teste2018") test_bibtex3_pubdbxref = PubDbxref.objects.get(pub=test_bibtex3) test_bibtex3_dbxref = Dbxref.objects.get( dbxref_id=test_bibtex3_pubdbxref.dbxref_id) self.assertEqual("10.1186/s12864-016-2535-300002", test_bibtex3_dbxref.accession) # DOI: try to link feature to publication's DOI featurepub_test = None if feature and test_bibtex3_pubdbxref: featurepub_test = FeaturePub.objects.create( feature_id=feature.feature_id, pub_id=test_bibtex3_pubdbxref.pub_id) test_pub = Pub.objects.get(pub_id=featurepub_test.pub_id) self.assertEqual("An amazing title", test_pub.title) test_pubdbxref = PubDbxref.objects.get(pub=test_pub) test_dbxref = Dbxref.objects.get(dbxref_id=test_pubdbxref.dbxref_id) self.assertEqual("10.1186/s12864-016-2535-300002", test_dbxref.accession) # create a tabix feature class TabixFeature(object): """mock tabix feature.""" test_tabix_feature1 = TabixFeature() test_tabix_feature1.contig = "contig1" test_tabix_feature1.feature = "snp" test_tabix_feature1.pos = 10 test_tabix_feature1.id = "id1" test_tabix_feature1.ref = "A" test_tabix_feature1.alt = "T,C" test_tabix_feature1.info = "TSA=snv" test_tabix_feature1.qual = 10 test_tabix_feature2 = TabixFeature() test_tabix_feature2.contig = "contig1" test_tabix_feature2.feature = "snv" test_tabix_feature2.pos = 100 test_tabix_feature2.id = "id2" test_tabix_feature2.ref = "G" test_tabix_feature2.alt = "C,A" test_tabix_feature2.info = "VC=snp;SAO=0" test_tabix_feature2.qual = 20 # instantiate the loader test_feature_file = FeatureLoader(filename="file.name", source="VCF_SOURCE") organism = "Mus musculus" # store the tabix feature test_feature_file.store_tabix_VCF_feature(test_tabix_feature1, organism) test_feature_file.store_tabix_VCF_feature(test_tabix_feature2, organism) test_feature = Feature.objects.get(uniquename="id2") test_featurelocs = Featureloc.objects.filter(feature=test_feature) self.assertEqual(100, test_featurelocs[0].fmin) self.assertEqual("G", test_featurelocs[0].residue_info) self.assertEqual("C", test_featurelocs[1].residue_info) self.assertEqual("A", test_featurelocs[2].residue_info) self.assertEqual(0, test_featurelocs[0].rank) self.assertEqual(1, test_featurelocs[1].rank) self.assertEqual(2, test_featurelocs[2].rank) self.assertEqual("contig1", test_featurelocs[0].srcfeature.uniquename)