def protein_manager(self, database="datanator"): return query_protein.QueryProtein(username=self.username, password=self.password, server=self.server, authSource=self.authDB, readPreference=self.read_preference, replicaSet=self.repl, database=database)
def data_from_mongo_protein(self, server, db, username, password, verbose=False, readPreference='nearest', authSource='admin', projection={'_id': 0}, query={}): ''' Acquire documents from protein collection in datanator Args: server (:obj:`str`): mongodb ip address db (:obj:`str`): database name username (:obj:`str`): username for mongodb login password (:obj:`str`): password for mongodb login verbose (:obj:`bool`): display verbose messages readPreference (:obj:`str`): mongodb readpreference authSource (:obj:`str`): database login info is authenticating against projection (:obj:`str`): mongodb query projection query (:obj:`str`): mongodb query filter Returns: (:obj:`tuple`): tuple containing: docs (:obj:`pymongo.Cursor`): pymongo cursor object that points to all documents in protein collection; count (:obj:`int`): number of documents returned ''' protein_manager = query_protein.QueryProtein( server=server, database=db, verbose=verbose, username=username, authSource=authSource, password=password, readPreference=readPreference) docs = protein_manager.collection.find(filter=query, projection=projection) count = protein_manager.collection.count_documents(query) return (count, docs)
def __init__(self, server, src_db='datanator', des_db='datanator', collection_str='uniprot', username=None, password=None, readPreference='nearest', authSource='admin', verbose=True, max_entries=float('inf')): super().__init__(MongoDB=server, db=des_db, verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource, readPreference=readPreference) self.collection_str = collection_str self.max_entries = max_entries self.verbose = verbose self.des_client, self.des_db, self.des_collection = self.con_db(collection_str) self.koc_manager = query_kegg_organism_code.QueryKOC(username=username, password=password, server=server, authSource=authSource, collection_str='kegg_organism_code', readPreference=readPreference, database=src_db) self.uniprot_manager = query_uniprot.QueryUniprot(username=username, password=password, server=server, authSource=authSource, database=src_db, collection_str='uniprot', readPreference=readPreference) self.kegg_manager = query_kegg_orthology.QueryKO(username=username, password=password, server=server, authSource=authSource, database=src_db, max_entries=max_entries, verbose=verbose, readPreference=readPreference) self.protein_manager = query_protein.QueryProtein(username=username, password=password, server=server, authSource=authSource, database=src_db, max_entries=max_entries, verbose=verbose, readPreference=readPreference) self.uniprot_nosql_manager = uniprot_nosql.UniprotNoSQL(MongoDB=server, db=des_db, max_entries=max_entries, verbose=verbose, username=username, password=password, authSource=authSource) self.endpoint = 'https://www.kegg.jp/ssdb-bin/ssdb_best?org_gene='
def __init__(self, db=None, MongoDB=None, cache_directory=None, verbose=False, max_entries=float('inf'), replicaSet=None, username=None, password=None, authSource='admin'): ''' Attributes: cache_directory: JSON file (converted from sqlite) directory db: mongodb database name MongoDB: MongoDB server address and login e.g. 'mongodb://mongo:27017/' ''' self.db = db self.MongoDB = MongoDB self.cache_directory = cache_directory self.verbose = verbose self.max_entries = max_entries self.collection_str = 'sabio_rk_old' super(SabioRkNoSQL, self).__init__(cache_dirname=cache_directory, MongoDB=MongoDB, replicaSet=replicaSet, db=db, verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource) self.client, self.db_obj, self.collection = self.con_db( self.collection_str) self.sabio_reaction_entries = self.db_obj['sabio_reaction_entries'] self.kegg_collection = self.db_obj['kegg_orthology'] self.sabiork_manager = query_sabiork.QuerySabio( MongoDB=MongoDB, db=db, collection_str='sabio_rk', verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource) self.protein_manager = query_protein.QueryProtein( username=username, password=password, server=MongoDB, authSource=authSource, database=db, max_entries=max_entries, verbose=verbose, collection_str='uniprot', readPreference='nearest') self.ec = self.db_obj['ec'] self.chem_manager = chem_util.ChemUtil() self.tax_manager = query_taxon_tree.QueryTaxonTree(username=username, MongoDB=MongoDB, password=password) self.file_manager = file_util.FileUtil()
def __init__(self, username=None, password=None, server=None, authSource='admin', src_database='datanator', max_entries=float('inf'), verbose=True, collection='protein', destination_database='datanator', cache_dir=None): ''' Args: src_database (:obj: `str`): name of database in which source collections reside destination_database (:obj: `str`): name of database to put the aggregated collection ''' self.max_entries = max_entries self.verbose = verbose self.cache_dir = cache_dir self.mongo_manager = mongo_util.MongoUtil(MongoDB=server, username=username, password=password, authSource=authSource, db=src_database) self.pax_manager = query_pax.QueryPax(MongoDB=server, db=src_database, collection_str='pax', verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource) self.kegg_manager = query_kegg_orthology.QueryKO( server=server, database=src_database, verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource) self.taxon_manager = query_taxon_tree.QueryTaxonTree( collection_str='taxon_tree', verbose=verbose, max_entries=max_entries, username=username, MongoDB=server, password=password, db=src_database, authSource=authSource) self.protein_manager = query_protein.QueryProtein( username=username, password=password, server=server, collection_str='protein', max_entries=max_entries, database=src_database) self.client, self.db, self.col = mongo_util.MongoUtil( MongoDB=server, username=username, password=password, authSource=authSource, db=destination_database).con_db(collection) self.bad_kinlawid = [ 24416, 24417, 24418, 24419, 24420, 24421, 24422, 24423 ] self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY)
def setUpClass(cls): cls.db = 'test' conf = config.TestConfig() username = conf.USERNAME password = conf.PASSWORD MongoDB = conf.SERVER cls.MongoDB = MongoDB cls.username = username cls.password = password cls.src = query_protein.QueryProtein(server=cls.MongoDB, database=cls.db, verbose=True, max_entries=20, username = cls.username, password = cls.password, collection_str='test_query_protein', readPreference='primary') cls.src_1 = query_protein.QueryProtein(server=cls.MongoDB, database='datanator', verbose=True, username = cls.username, password = cls.password, readPreference='nearest') cls.src_2 = query_protein.QueryProtein(server=cls.MongoDB, database='datanator-test', verbose=True, username = cls.username, password = cls.password, readPreference='nearest') cls.src.db_obj.drop_collection('test_query_protein') mock_doc_0 = {'uniprot_id': 'MOCK_0', 'ancestor_taxon_id': [105,104,103,102,101], 'ancestor_name': ['name_5', 'name_4','name_3','name_2','name_1'], 'ko_number': 'MOCK_0', 'ncbi_taxonomy_id': 100, 'abundances': 0} mock_doc_1 = {'uniprot_id': 'MOCK_1', 'ko_number': 'MOCK_0'} # missing ancestor_taxon_id mock_doc_2 = {'uniprot_id': 'MOCK_2', 'ancestor_taxon_id': [105,104,103], 'ancestor_name': ['name_5', 'name_4','name_3'], 'ko_number': 'MOCK_0', 'ncbi_taxonomy_id': 102, 'abundances': 2} mock_doc_3 = {'uniprot_id': 'MOCK_3', 'ancestor_taxon_id': [105,104], 'ancestor_name': ['name_5', 'name_4'], 'ko_number': 'MOCK_1', 'ncbi_taxonomy_id': 103, 'abundances': 3} # different ko_number mock_doc_4 = {'uniprot_id': 'MOCK_4', 'ancestor_taxon_id': [105], 'ancestor_name': ['name_5'], 'ko_number': 'MOCK_0', 'ncbi_taxonomy_id': 104, 'abundances': 4} mock_doc_5 = {'uniprot_id': 'MOCK_5', 'ancestor_taxon_id': [105], 'ancestor_name': ['name_5'], 'ncbi_taxonomy_id': 104, 'abundances': 5} mock_doc_6 = {'uniprot_id': 'MOCK_6', 'ancestor_taxon_id': [105], 'ancestor_name': ['name_5'], 'ko_number': 'MOCK_0', 'ncbi_taxonomy_id': 104, 'abundances': 6} dic_0 = {'ncbi_taxonomy_id': 0, 'species_name': 's0', 'ancestor_taxon_id': [5,4,3,2,1], 'ancestor_name': ['s5', 's4', 's3', 's2', 's1'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot0', "protein_name": 'special name one', 'kinetics': [{'ncbi_taxonomy_id': 100, 'kinlaw_id': 1}, {'ncbi_taxonomy_id': 101, 'kinlaw_id': 2}], 'abundances': [], 'ko_name': ['KO0 name']} dic_1 = {'ncbi_taxonomy_id': 1, 'species_name': 's1', 'ancestor_taxon_id': [5,4,3,2], 'ancestor_name': ['s5', 's4', 's3', 's2'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot1', "protein_name": 'nonspeciali name one'} dic_2 = {'ncbi_taxonomy_id': 2, 'species_name': 's2', 'ancestor_taxon_id': [5,4,3], 'ancestor_name': ['s5', 's4', 's3'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot2', "protein_name": 'nonspeciali name two'} dic_3 = {'ncbi_taxonomy_id': 3, 'species_name': 's3', 'ancestor_taxon_id': [5,4], 'ancestor_name': ['s5', 's4'], 'ko_number': 'ko3', 'uniprot_id': 'uniprot3', "protein_name": 'your name one'} dic_4 = {'ncbi_taxonomy_id': 4, 'species_name': 's4', 'ancestor_taxon_id': [5], 'ancestor_name': ['s5'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot4'} dic_5 = {'ncbi_taxonomy_id': 5, 'species_name': 's5', 'ancestor_taxon_id': [], 'ancestor_name': [], 'ko_number': 'KO0', 'uniprot_id': 'uniprot5'} dic_6 = {'ncbi_taxonomy_id': 6, 'species_name': 's6', 'ancestor_taxon_id': [5,4,3,2], 'ancestor_name': ['s5', 's4', 's3', 's2'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot6', "protein_name": 'your name two', 'ko_name': 'ko name 0', 'abundances': []} dic_15 = {'ncbi_taxonomy_id': 6, 'species_name': 's6', 'ancestor_taxon_id': [5,4,3,2], 'ancestor_name': ['s5', 's4', 's3', 's2'], 'ko_number': 'KO1', 'uniprot_id': 'uniprot15', "protein_name": 'your name fifteen', 'ko_name': ['ko name 1']} dic_14 = {'ncbi_taxonomy_id': 14, 'species_name': 's6 something', 'ancestor_taxon_id': [5,4,3,2], 'ancestor_name': ['s5', 's4', 's3', 's2'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot6', "protein_name": 'your name three'} dic_7 = {'ncbi_taxonomy_id': 7, 'species_name': 's7', 'ancestor_taxon_id': [5,4,3,2,6], 'ancestor_name': ['s5', 's4', 's3', 's2', 's6'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot7', "protein_name": 'special name two'} dic_8 = {'ncbi_taxonomy_id': 8, 'species_name': 's8', 'ancestor_taxon_id': [5,4,3,2,6,7], 'ancestor_name': ['s5', 's4', 's3', 's2', 's6', 's7'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot8'} dic_9 = {'ncbi_taxonomy_id': 9, 'species_name': 's9', 'ancestor_taxon_id': [5,4,3], 'ancestor_name': ['s5', 's4', 's3'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot9'} dic_10 = {'ncbi_taxonomy_id': 10, 'species_name': 's10', 'ancestor_taxon_id': [5,4,3,9], 'ancestor_name': ['s5', 's4', 's3', 's9'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot10'} dic_11 = {'ncbi_taxonomy_id': 11, 'species_name': 's11', 'ancestor_taxon_id': [5,4,3,2,1,0], 'ancestor_name': ['s5', 's4', 's3', 's2', 's1', 's0'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot11'} dic_12 = {'ncbi_taxonomy_id': 12, 'species_name': 's12', 'ancestor_taxon_id': [5,4,3,2,1,0], 'ancestor_name': ['s5', 's4', 's3', 's2', 's1', 's0'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot12'} dic_13 = {'ncbi_taxonomy_id': 13, 'species_name': 's13', 'ancestor_taxon_id': [5,4,3,2,1], 'ancestor_name': ['s5', 's4', 's3', 's2', 's1'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot13', 'kinetics':[{'ncbi_taxonomy_id': 100, 'kinlaw_id': 1}, {'ncbi_taxonomy_id': 101, 'kinlaw_id': 2}]} dic_16 = {'ncbi_taxonomy_id': 6, 'species_name': 's6', 'ancestor_taxon_id': [5,4,3,2], 'ancestor_name': ['s5', 's4', 's3', 's2'], 'ko_number': 'KO1', 'uniprot_id': 'uniprot16', "protein_name": 'your name fifteen'} cls.src.collection.insert_many([mock_doc_0, mock_doc_1, mock_doc_2, mock_doc_3, mock_doc_4,mock_doc_5,mock_doc_6]) cls.src.collection.insert_many([dic_0,dic_1,dic_2,dic_3,dic_4,dic_5,dic_6,dic_7,dic_8,dic_9,dic_10,dic_11,dic_12,dic_13,dic_14,dic_15,dic_16])