def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None): super().__init__( graph_type, are_bnodes_skolemized, 'biogrid', ingest_title= 'Biological General Repository for Interaction Datasets', ingest_url='http://thebiogrid.org', license_url= 'https://wiki.thebiogrid.org/doku.php/terms_and_conditions' # data_rights=None, # file_handle=None ) self.tax_ids = tax_ids # Defaults # our favorite animals # taxids = [9606,10090,10116,7227,7955,6239,8355] if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] if 'test_ids' not in config.get_config() or \ 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['gene'] # data-source specific warnings # (will be removed when issues are cleared) logger.warning( "several MI experimental codes do not exactly map to ECO; " "using approximations.") return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None): super().__init__( graph_type, are_bnodes_skolemized, 'go', ingest_title='Gene Ontology', ingest_url='http://www.geneontology.org', license_url=None, data_rights='http://geneontology.org/page/use-and-license' # file_handle=None ) # Defaults self.tax_ids = tax_ids self.test_ids = list() if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] logger.info("No taxa set. Defaulting to %s", str(tax_ids)) else: logger.info("Filtering on the following taxa: %s", str(tax_ids)) if 'test_ids' not in config.get_config() or 'gene' \ not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['gene'] # build the id map for mapping uniprot ids to genes ... ONCE self.uniprot_entrez_id_map = self.get_uniprot_entrez_id_map() self.eco_map = self.get_eco_map(self.map_files['eco_map']) return
def __init__(self, tax_ids=None, gene_ids=None): Source.__init__(self, 'ensembl') self.tax_ids = tax_ids self.gene_ids = gene_ids self.load_bindings() self.dataset = Dataset( 'ensembl', 'ENSEMBL', 'http://www.ensembl.org', None) # Defaults if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] self.gene_ids = [] if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.properties = Feature.properties logger.setLevel(logging.INFO) return
def __init__(self, graph_type, are_bnodes_skolemized, method, tax_ids=None): super().__init__( graph_type, are_bnodes_skolemized, # method ??? this should be lowercase ingest name/identifier 'oma', 'Ortholgous MAtrix Hierarchical Orthologous Groups', 'https://omabrowser.org/', license_url=None, data_rights="https://creativecommons.org/licenses/by-sa/2.5/", # file_handle=None ) self.tax_ids = tax_ids self._map_orthology_code_to_RO_FOO = { 'orthologGroup': OrthologyAssoc.ortho_rel['orthologous'], 'paralogGroup': OrthologyAssoc.ortho_rel['paralogous'] } if 'test_ids' not in config.get_config() \ or 'protein' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['protein'] return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__( graph_type, are_bnodes_skolemized, 'coriell', ingest_title='Coriell Institute for Medical Research', ingest_url='https://ccr.coriell.org/' # website disclaimer 'https://www.coriell.org/1/About-Us/Legal-Notice' # wet material https://www.coriell.org/1/NINDS/About/Shared-Usage-Guidelines # license_url=None, # data_rights=None, # file_handle=None ) # data-source specific warnings # (will be removed when issues are cleared) LOG.warning('We assume that if a species is not provided, ' 'that it is a Human-derived cell line') LOG.warning('We map all omim ids as a disease/phenotype entity, ' 'but should be fixed in the future') # TODO # check if config exists; if it doesn't, error out and let user know if 'dbauth' not in config.get_config() or \ 'coriell' not in config.get_config()['dbauth']: LOG.error("not configured with FTP user/password.") # raise error return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None, gene_ids=None): super().__init__( graph_type, are_bnodes_skolemized, 'hgnc', ingest_title='HGNC', ingest_url='https://www.genenames.org/', license_url='ftp://ftp.ebi.ac.uk/pub/databases/genenames/README.txt' # data_rights=None, # file_handle=None ) self.tax_ids = tax_ids self.gene_ids = gene_ids self.gene_ids = [] if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.hs_txid = self.globaltt['H**o sapiens'] return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__( graph_type, are_bnodes_skolemized, 'genereviews', ingest_title='Gene Reviews', ingest_url='http://genereviews.org/', license_url=None, data_rights='http://www.ncbi.nlm.nih.gov/books/NBK138602/', # file_handle=None ) self.dataset.set_citation('GeneReviews:NBK1116') self.book_ids = set() self.all_books = {} if 'test_ids' not in config.get_config() or\ 'disease' not in config.get_config()['test_ids']: LOG.warning("not configured with disease test ids.") self.test_ids = list() else: # select ony those test ids that are omim's. self.test_ids = config.get_config()['test_ids']['disease'] self.omim_replaced = {} # id_num to SET of id nums self.omim_type = {} # id_num to onto_term return
def fetch(self, is_dl_forced=False): '''connection details for DISCO''' cxn = {} cxn['host'] = 'nif-db.crbs.ucsd.edu' cxn['database'] = 'disco_crawler' cxn['port'] = '5432' cxn['user'] = config.get_config()['user']['disco'] cxn['password'] = config.get_config()['keys'][cxn['user']] self.dataset.setFileAccessUrl( 'jdbc:postgresql://'+cxn['host']+':'+cxn['port']+'/'+cxn['database'], is_object_literal=True) # process the tables # self.fetch_from_pgdb(self.tables,cxn,100) #for testing self.fetch_from_pgdb(self.tables, cxn) self.get_files(is_dl_forced) # FIXME: Everything needed for data provenance? fstat = os.stat('/'.join((self.rawdir, 'dvp.pr_nlx_157874_1'))) filedate = datetime.utcfromtimestamp(fstat[ST_CTIME]).strftime("%Y-%m-%d") self.dataset.setVersion(filedate) return
def __init__(self): Source.__init__(self, 'hpoa') self.load_bindings() self.dataset = Dataset( 'hpoa', 'Human Phenotype Ontology', 'http://www.human-phenotype-ontology.org', None, 'http://www.human-phenotype-ontology.org/contao/index.php/legal-issues.html') self.replaced_id_count = 0 if 'test_ids' not in config.get_config()\ or 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_ids = [] else: self.test_ids = config.get_config()['test_ids']['disease'] # data-source specific warnings to be removed when issues are cleared logger.warning( "note that some ECO classes are missing for ICE, PCS, and ITM;" + " using temporary mappings.") return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__( graph_type, are_bnodes_skolemized, 'coriell', ingest_title='Coriell Institute for Medical Research', ingest_url='https://ccr.coriell.org/' # website disclaimer 'https://www.coriell.org/1/About-Us/Legal-Notice' # wet material https://www.coriell.org/1/NINDS/About/Shared-Usage-Guidelines # license_url=None, # data_rights=None, # file_handle=None ) # data-source specific warnings # (will be removed when issues are cleared) LOG.warning( 'We assume that if a species is not provided, ' 'that it is a Human-derived cell line') LOG.warning( 'We map all omim ids as a disease/phenotype entity, ' 'but should be fixed in the future') # TODO # check if config exists; if it doesn't, error out and let user know if 'dbauth' not in config.get_config() or \ 'coriell' not in config.get_config()['dbauth']: LOG.error("not configured with FTP user/password.") # raise error return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None): super().__init__(graph_type, are_bnodes_skolemized, 'go') # Defaults self.tax_ids = tax_ids if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] logger.info("No taxa set. Defaulting to %s", str(tax_ids)) else: logger.info("Filtering on the following taxa: %s", str(tax_ids)) # update the dataset object with details about this resource # NO LICENSE for this resource self.dataset = Dataset( 'go', 'GeneOntology', 'http://www.geneontology.org', None, "https://creativecommons.org/licenses/by/4.0/legalcode", 'http://geneontology.org/page/use-and-license') if 'test_ids' not in config.get_config() or \ 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['gene'] return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None, gene_ids=None): super().__init__(graph_type, are_bnodes_skolemized, 'ensembl') self.tax_ids = tax_ids self.gene_ids = gene_ids self.dataset = Dataset('ensembl', 'ENSEMBL', 'http://uswest.ensembl.org', None) # Defaults if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] self.gene_ids = [] if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.properties = Feature.properties logger.setLevel(logging.INFO) return
def __init__(self, tax_ids=None): super().__init__('biogrid') self.tax_ids = tax_ids self.load_bindings() self.dataset = Dataset( 'biogrid', 'The BioGrid', 'http://thebiogrid.org/', None, 'http://wiki.thebiogrid.org/doku.php/terms_and_conditions') # Defaults # our favorite animals # taxids = [9606,10090,10116,7227,7955,6239,8355] if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] if 'test_ids' not in config.get_config() or \ 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['gene'] # data-source specific warnings # (will be removed when issues are cleared) logger.warning( "several MI experimental codes do not exactly map to ECO; " "using approximations.") return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None, gene_ids=None): super().__init__( graph_type, are_bnodes_skolemized, 'ensembl', ingest_title='ENSEMBL', ingest_url='http://uswest.ensembl.org' # license_url=None, # data_rights=None, # file_handle=None ) self.tax_ids = tax_ids self.gene_ids = gene_ids # Defaults if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] self.gene_ids = [] if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] logger.setLevel(logging.INFO) return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__( graph_type, are_bnodes_skolemized, 'kegg', ingest_title='Kyoto Encyclopedia of Genes and Genomes', ingest_url='http://www.genome.jp/kegg/', license_url='http://www.kegg.jp/kegg/legal.html' # data_rights=None, # file_handle=None ) # check to see if there are any ids configured in the config; # otherwise, warn if 'test_ids' not in config.get_config() or\ 'disease' not in config.get_config()['test_ids']: LOG.warning("not configured with disease test ids.") else: self.test_ids['disease'] += \ config.get_config()['test_ids']['disease'] self.label_hash = {} self.omim_disease_hash = {} # to hold the mappings of omim:kegg ids self.kegg_disease_hash = {} # to hold the mappings of kegg:omim ids return
def __init__(self, tax_ids=None): super().__init__("biogrid") self.tax_ids = tax_ids self.load_bindings() self.dataset = Dataset( "biogrid", "The BioGrid", "http://thebiogrid.org/", None, "http://wiki.thebiogrid.org/doku.php/terms_and_conditions", ) # Defaults # taxids = [9606,10090,10116,7227,7955,6239,8355] #our favorite animals if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] if "test_ids" not in config.get_config() or "gene" not in config.get_config()["test_ids"]: logger.warn("not configured with gene test ids.") else: self.test_ids = config.get_config()["test_ids"]["gene"] # data-source specific warnings (will be removed when issues are cleared) logger.warn("several MI experimental codes do not exactly map to ECO; using approximations.") return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None, gene_ids=None): super().__init__(graph_type, are_bnodes_skolemized, 'ncbigene') self.tax_ids = tax_ids self.gene_ids = gene_ids self.filter = 'taxids' self.dataset = Dataset( 'ncbigene', 'National Center for Biotechnology Information', 'http://ncbi.nih.nlm.gov/gene', None, 'http://www.ncbi.nlm.nih.gov/About/disclaimer.html', 'https://creativecommons.org/publicdomain/mark/1.0/') # Defaults if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] logger.info("No taxa set. Defaulting to %s", str(tax_ids)) else: logger.info("Filtering on the following taxa: %s", str(tax_ids)) self.gene_ids = [] if 'test_ids' not in \ config.get_config() or \ 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.properties = Feature.properties self.class_or_indiv = {} return
def __init__(self, graph_type, are_bnodes_skolemized, data_release_version=None): super().__init__( graph_type=graph_type, are_bnodes_skolemized=are_bnodes_skolemized, data_release_version=data_release_version, name='omim', ingest_title='Online Mendelian Inheritance in Man', ingest_url='http://www.omim.org', ingest_logo='source-omim.png', # ingest_desc=None, license_url=None, data_rights='http://omim.org/help/agreement', # file_handle=None ) self.omim_ncbigene_idmap = {} # check if config exists; if it doesn't, error out and let user know if 'keys' not in config.get_config() and \ 'omim' not in config.get_config()['keys']: LOG.error("not configured with API key.") if 'disease' in self.all_test_ids: # local_id (numeric) portion of omim identifier self.test_ids = [ x[5:] for x in self.all_test_ids['disease'] if x[:5] == 'OMIM:'] else: LOG.warning("not configured with gene test ids.") self.test_ids = [] self.disorder_regex = re.compile(r'(.*), (\d{6})\s*(?:\((\d+)\))?') self.nogene_regex = re.compile(r'(.*)\s+\((\d+)\)')
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None, gene_ids=None): super().__init__(graph_type, are_bnodes_skolemized, 'ensembl') self.tax_ids = tax_ids self.gene_ids = gene_ids self.dataset = Dataset( 'ensembl', 'ENSEMBL', 'http://uswest.ensembl.org', None) # Defaults if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] self.gene_ids = [] if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.properties = Feature.properties logger.setLevel(logging.INFO) return
def fetch(self, is_dl_forced=False): '''connection details for DISCO''' cxn = {} cxn['host'] = 'nif-db.crbs.ucsd.edu' cxn['database'] = 'disco_crawler' cxn['port'] = '5432' cxn['user'] = config.get_config()['user']['disco'] cxn['password'] = config.get_config()['keys'][cxn['user']] self.dataset.setFileAccessUrl('jdbc:postgresql://' + cxn['host'] + ':' + cxn['port'] + '/' + cxn['database'], is_object_literal=True) # process the tables # self.fetch_from_pgdb(self.tables,cxn,100) #for testing self.fetch_from_pgdb(self.tables, cxn) self.get_files(is_dl_forced) # FIXME: Everything needed for data provenance? fstat = os.stat('/'.join((self.rawdir, 'dvp.pr_nlx_157874_1'))) filedate = datetime.utcfromtimestamp( fstat[ST_CTIME]).strftime("%Y-%m-%d") self.dataset.setVersion(filedate)
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'omim') self.dataset = Dataset( 'omim', 'Online Mendelian Inheritance in Man', 'http://www.omim.org', None, 'http://omim.org/help/agreement') self.omim_ncbigene_idmap = {} # data-source specific warnings # (will be removed when issues are cleared) # check if config exists; if it doesn't, error out and let user know if 'keys' not in config.get_config() and \ 'omim' not in config.get_config()['keys']: logger.error("not configured with API key.") # check to see if there's any ids configured in the config; # otherwise, warn if 'test_ids' not in config.get_config() or \ 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") else: # select ony those test ids that are omim's. self.test_ids += \ [obj.replace('OMIM:', '') for obj in config.get_config()['test_ids']['disease'] if re.match(r'OMIM:', obj)] return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None): super().__init__(graph_type, are_bnodes_skolemized, 'biogrid') self.tax_ids = tax_ids self.dataset = Dataset( 'biogrid', 'The BioGrid', 'http://thebiogrid.org/', None, 'http://wiki.thebiogrid.org/doku.php/terms_and_conditions') # Defaults # our favorite animals # taxids = [9606,10090,10116,7227,7955,6239,8355] if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] if 'test_ids' not in config.get_config() or \ 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['gene'] # data-source specific warnings # (will be removed when issues are cleared) logger.warning( "several MI experimental codes do not exactly map to ECO; " "using approximations.") return
def __init__(self, tax_ids=None, gene_ids=None): Source.__init__(self, 'ncbigene') self.tax_ids = tax_ids self.gene_ids = gene_ids self.filter = 'taxids' self.load_bindings() self.dataset = Dataset('ncbigene', 'National Center for Biotechnology Information', 'http://ncbi.nih.nlm.gov/gene', None, 'http://www.ncbi.nlm.nih.gov/About/disclaimer.html', 'https://creativecommons.org/publicdomain/mark/1.0/') # data-source specific warnings (will be removed when issues are cleared) # Defaults if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] logger.info("No taxa set. Defaulting to %s", str(tax_ids)) else: logger.info("Filtering on the following taxa: %s", str(tax_ids)) self.gene_ids = [] if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']: logger.warn("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.properties = Feature.properties return
def __init__(self, graph_type, are_bnodes_skolemized, method, tax_ids=None): super().__init__(graph_type, are_bnodes_skolemized, method) self.tax_ids = tax_ids self._map_orthology_code_to_RO = { 'orthologGroup': OrthologyAssoc.ortho_rel['orthologous'], 'paralogGroup': OrthologyAssoc.ortho_rel['paralogous']} if 'test_ids' not in config.get_config() \ or 'protein' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['protein'] return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'orphanet') self.dataset = Dataset( 'orphanet', 'Orphanet', 'http://www.orpha.net', None, 'http://creativecommons.org/licenses/by-nd/3.0/', 'http://omim.org/help/agreement') # check to see if there's any ids configured in the config; # otherwise, warn if 'test_ids' not in config.get_config() or \ 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") return
def fetch(self, is_dl_forced=False): '''create the connection details for DISCO''' cxn = config.get_config()['dbauth']['disco'] cxn.update({ 'host': 'nif-db.crbs.ucsd.edu', 'database': 'disco_crawler', 'port': 5432 }) self.dataset.setFileAccessUrl(''.join( ('jdbc:postgresql://', cxn['host'], ':', str(cxn['port']), '/', cxn['database'])), is_object_literal=True) # process the tables # self.fetch_from_pgdb(self.tables,cxn,100) #for testing self.fetch_from_pgdb(self.tables, cxn) self.get_files(is_dl_forced) # FIXME: Everything needed for data provenance? st = os.stat('/'.join((self.rawdir, 'dvp.pr_nlx_157874_1'))) filedate = datetime.utcfromtimestamp(st[ST_CTIME]).strftime("%Y-%m-%d") self.dataset.setVersion(filedate) return
def __init__(self): Source.__init__(self, 'coriell') self.load_bindings() self.dataset = Dataset('coriell', 'Coriell', 'http://ccr.coriell.org/', None) # data-source specific warnings (will be removed when issues are cleared) logger.warn('We assume that if a species is not provided, that it is a Human-derived cell line') logger.warn('We map all omim ids as a disease/phenotype entity, but should be fixed in the future') # check if config exists; if it doesn't, error out and let user know if 'dbauth' not in config.get_config() or 'coriell' not in config.get_config()['dbauth']: logger.error("not configured with FTP user/password.") return
def __init__(self): Source.__init__(self, 'orphanet') self.load_bindings() self.dataset = Dataset( 'orphanet', 'Orphanet', 'http://www.orpha.net', None, 'http://creativecommons.org/licenses/by-nd/3.0/', 'http://omim.org/help/agreement') # check to see if there's any ids configured in the config; # otherwise, warn if 'test_ids' not in config.get_config() or \ 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") return
def __init__(self): Source.__init__(self, 'eom') self.namespaces.update(curie_map.get()) # update the dataset object with details about this resource # TODO put this into a conf file? self.dataset = Dataset('eom', 'EOM', 'http://elementsofmorphology.nih.gov', None, 'http://www.genome.gov/copyright.cfm', 'https://creativecommons.org/publicdomain/mark/1.0/') # check if config exists; if it doesn't, error out and let user know if 'dbauth' not in config.get_config() or 'disco' not in config.get_config()['dbauth']: logger.error("not configured with PG user/password.") # source-specific warnings. will be cleared when resolved. return
def __init__(self, tax_ids=None, gene_ids=None): Source.__init__(self, 'hgnc') self.tax_ids = tax_ids self.gene_ids = gene_ids self.load_bindings() self.dataset = Dataset('hgnc', 'HGNC', 'http://www.genenames.org', None) self.gene_ids = [] if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']: logger.warn("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.properties = Feature.properties return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'eom') # update the dataset object with details about this resource # TODO put this into a conf file? self.dataset = Dataset( 'eom', 'EOM', 'http://elementsofmorphology.nih.gov', None, 'http://www.genome.gov/copyright.cfm', 'https://creativecommons.org/publicdomain/mark/1.0/') # check if config exists; if it doesn't, error out and let user know if 'dbauth' not in config.get_config() or \ 'disco' not in config.get_config()['dbauth']: logger.error("not configured with PG user/password.") # source-specific warnings. will be cleared when resolved. return
def __init__(self): Source.__init__(self, 'gwascatalog') self.load_bindings() self.dataset = Dataset( 'gwascatalog', 'GWAS Catalog', 'http://www.ebi.ac.uk/gwas/', 'The NHGRI-EBI Catalog of published genome-wide association studies', 'http://creativecommons.org/licenses/by/3.0/', None) # 'http://www.ebi.ac.uk/gwas/docs/about' # TODO add this if 'test_ids' not in config.get_config() or \ 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids'] return
def __init__(self, graph_type, are_bnodes_skolemized, method, tax_ids=None): super().__init__(graph_type, are_bnodes_skolemized, method) self.tax_ids = tax_ids self._map_orthology_code_to_RO = { 'orthologGroup': OrthologyAssoc.ortho_rel['orthologous'], 'paralogGroup': OrthologyAssoc.ortho_rel['paralogous'] } if 'test_ids' not in config.get_config() \ or 'protein' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['protein'] return
def __init__(self): Source.__init__(self, 'kegg') # update the dataset object with details about this resource self.dataset = Dataset('kegg', 'KEGG', 'http://www.genome.jp/kegg/', None, None, 'http://www.kegg.jp/kegg/legal.html') # source-specific warnings. will be cleared when resolved. # check to see if there's any ids configured in the config; otherwise, warn if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']: logger.warn("not configured with disease test ids.") else: self.test_ids['disease'] += config.get_config()['test_ids']['disease'] self.label_hash = {} self.omim_disease_hash = {} # to hold the mappings of omim:kegg ids self.kegg_disease_hash = {} # to hold the mappings of kegg:omim ids return
def __init__(self): Source.__init__(self, "orphanet") self.load_bindings() self.dataset = Dataset( "orphanet", "Orphanet", "http://www.orpha.net", None, "http://creativecommons.org/licenses/by-nd/3.0/", "http://omim.org/help/agreement", ) # check to see if there's any ids configured in the config; otherwise, warn if "test_ids" not in config.get_config() or "disease" not in config.get_config()["test_ids"]: logger.warn("not configured with disease test ids.") return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None): super().__init__(graph_type, are_bnodes_skolemized, 'panther') self.tax_ids = tax_ids self.dataset = Dataset( 'panther', 'Protein ANalysis THrough Evolutionary Relationships', 'http://pantherdb.org/', None, 'http://www.pantherdb.org/terms/disclaimer.jsp') # # Defaults # if self.tax_ids is None: # self.tax_ids = [9606, 10090, 7955] if 'test_ids' not in config.get_config() \ or 'protein' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['protein'] return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None, gene_ids=None): super().__init__(graph_type, are_bnodes_skolemized, 'hgnc') self.tax_ids = tax_ids self.gene_ids = gene_ids self.dataset = Dataset( 'hgnc', 'HGNC', 'http://www.genenames.org', None) self.gene_ids = [] if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.properties = Feature.properties return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'decipher') self.dataset = Dataset( 'decipher', 'Development Disorder Genotype – Phenotype Database', 'https://decipher.sanger.ac.uk/', None, 'https://decipher.sanger.ac.uk/legal') if 'test_ids' not in config.get_config() \ or 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_ids = [] else: self.test_ids = config.get_config()['test_ids']['disease'] self.g = self.graph self.geno = Genotype(self.g) self.model = Model(self.g) return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'coriell') self.dataset = Dataset('coriell', 'Coriell', 'http://ccr.coriell.org/', None) # data-source specific warnings # (will be removed when issues are cleared) logger.warning('We assume that if a species is not provided, ' 'that it is a Human-derived cell line') logger.warning('We map all omim ids as a disease/phenotype entity, ' 'but should be fixed in the future') # TODO # check if config exists; if it doesn't, error out and let user know if 'dbauth' not in config.get_config() or \ 'coriell' not in config.get_config()['dbauth']: logger.error("not configured with FTP user/password.") return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'genereviews') self.dataset = Dataset( 'genereviews', 'Gene Reviews', 'http://genereviews.org/', None, 'http://www.ncbi.nlm.nih.gov/books/NBK138602/') self.dataset.set_citation('GeneReviews:NBK1116') self.book_ids = set() self.all_books = {} if 'test_ids' not in config.get_config() or\ 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_ids = list() else: # select ony those test ids that are omim's. self.test_ids = config.get_config()['test_ids']['disease'] return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__( graph_type, are_bnodes_skolemized, 'orphanet', ingest_title='Orphanet', ingest_url='http://www.orpha.net', license_url='http://creativecommons.org/licenses/by-nd/3.0/', data_rights='http://omim.org/help/agreement' # file_handle=None ) # check to see if there's any ids configured in the config; # otherwise, warn # TODO remove if 'test_ids' not in config.get_config() or 'disease' \ not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'genereviews') self.dataset = Dataset('genereviews', 'Gene Reviews', 'http://genereviews.org/', None, 'http://www.ncbi.nlm.nih.gov/books/NBK138602/') self.dataset.set_citation('GeneReviews:NBK1116') self.book_ids = set() self.all_books = {} if 'test_ids' not in config.get_config() or\ 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_ids = list() else: # select ony those test ids that are omim's. self.test_ids = config.get_config()['test_ids']['disease'] return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__( graph_type, are_bnodes_skolemized, 'eom', ingest_title='Elements of Morphology', ingest_url='http://elementsofmorphology.nih.gov', data_rights='http://www.genome.gov/copyright.cfm', license_url='https://creativecommons.org/publicdomain/mark/1.0/' # file_handle=None ) # check if config exists; if it doesn't, error out and let user know if 'dbauth' not in config.get_config() or \ 'disco' not in config.get_config()['dbauth']: logger.error("not configured with PG user/password.") # source-specific warnings. will be cleared when resolved. return
def __init__(self, tax_ids=None): super().__init__('panther') self.tax_ids = tax_ids self.load_bindings() self.dataset = Dataset('panther', 'Protein ANalysis THrough Evolutionary Relationships', 'http://pantherdb.org/', None, 'http://www.pantherdb.org/terms/disclaimer.jsp') # # Defaults # if self.tax_ids is None: # self.tax_ids = [9606, 10090, 7955] if 'test_ids' not in config.get_config() or 'protein' not in config.get_config()['test_ids']: logger.warn("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['protein'] # data-source specific warnings (will be removed when issues are cleared) return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None, gene_ids=None): super().__init__(graph_type, are_bnodes_skolemized, 'clinvar') self.tax_ids = tax_ids self.gene_ids = gene_ids self.filter = 'taxids' self.dataset = Dataset( 'ClinVar', 'National Center for Biotechnology Information', 'http://www.ncbi.nlm.nih.gov/clinvar/', None, 'http://www.ncbi.nlm.nih.gov/About/disclaimer.html', 'https://creativecommons.org/publicdomain/mark/1.0/') if 'test_ids' not in config.get_config() or \ 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] if 'test_ids' not in config.get_config() or \ 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") else: self.disease_ids = config.get_config()['test_ids']['disease'] self.properties = Feature.properties return
def __init__(self): Source.__init__(self, 'ctd') self.dataset = Dataset( 'ctd', 'CTD', 'http://ctdbase.org', None, 'http://ctdbase.org/about/legal.jsp') if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") self.test_geneids = [] else: self.test_geneids = config.get_config()['test_ids']['gene'] if 'test_ids' not in config.get_config() \ or 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_diseaseids = [] else: self.test_diseaseids = config.get_config()['test_ids']['disease'] self.gu = GraphUtils(curie_map.get()) self.g = self.graph self.geno = Genotype(self.g) return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'ctd', ingest_title='Comparative Toxicogenomics Database', ingest_url='http://ctdbase.org', license_url='http://ctdbase.org/about/legal.jsp' # data_rights=None, # file_handle=None ) if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") self.test_geneids = [] else: self.test_geneids = config.get_config()['test_ids']['gene'] if 'test_ids' not in config.get_config() \ or 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_diseaseids = [] else: self.test_diseaseids = config.get_config()['test_ids']['disease'] self.geno = Genotype(self.graph) self.pathway = Pathway(self.graph) return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'ctd') self.dataset = Dataset( 'ctd', 'CTD', 'http://ctdbase.org', None, 'http://ctdbase.org/about/legal.jsp') if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") self.test_geneids = [] else: self.test_geneids = config.get_config()['test_ids']['gene'] if 'test_ids' not in config.get_config() \ or 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_diseaseids = [] else: self.test_diseaseids = config.get_config()['test_ids']['disease'] self.g = self.graph self.geno = Genotype(self.graph) self.pathway = Pathway(self.graph) return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'kegg') # update the dataset object with details about this resource self.dataset = Dataset('kegg', 'KEGG', 'http://www.genome.jp/kegg/', None, None, 'http://www.kegg.jp/kegg/legal.html') # check to see if there are any ids configured in the config; # otherwise, warn if 'test_ids' not in config.get_config() or\ 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") else: self.test_ids['disease'] += \ config.get_config()['test_ids']['disease'] self.label_hash = {} self.omim_disease_hash = {} # to hold the mappings of omim:kegg ids self.kegg_disease_hash = {} # to hold the mappings of kegg:omim ids return
def __init__(self): Source.__init__(self, 'decipher') self.load_bindings() self.dataset = Dataset( 'decipher', 'Development Disorder Genotype – Phenotype Database', 'https://decipher.sanger.ac.uk/', None, 'https://decipher.sanger.ac.uk/legal') if 'test_ids' not in config.get_config() \ or 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_ids = [] else: self.test_ids = config.get_config()['test_ids']['disease'] self.gu = GraphUtils(curie_map.get()) self.g = self.graph self.geno = Genotype(self.g) return
def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None, gene_ids=None): super().__init__( graph_type, are_bnodes_skolemized, 'ncbigene', ingest_title='National Center for Biotechnology Information', ingest_url='http://ncbi.nih.nlm.gov/gene', # ingest_desc=None, license_url='https://creativecommons.org/publicdomain/mark/1.0/', data_rights='http://www.ncbi.nlm.nih.gov/About/disclaimer.html' # file_handle=None ) self.tax_ids = tax_ids self.gene_ids = gene_ids self.id_filter = 'taxids' # 'geneids # Defaults if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] logger.info("No taxa set. Defaulting to %s", str(tax_ids)) else: logger.info("Filtering on the following taxa: %s", str(tax_ids)) self.gene_ids = [] if 'test_ids' not in config.get_config() or 'gene' \ not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.class_or_indiv = {} return
def __init__(self): Source.__init__(self, 'omim') self.load_bindings() self.dataset = Dataset('omim', 'Online Mendelian Inheritance in Man', 'http://www.omim.org', None, 'http://omim.org/help/agreement') # data-source specific warnings (will be removed when issues are cleared) # check if config exists; if it doesn't, error out and let user know if 'keys' not in config.get_config() and 'omim' not in config.get_config()['keys']: logger.error("not configured with API key.") # check to see if there's any ids configured in the config; otherwise, warn if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']: logger.warn("not configured with disease test ids.") else: # select ony those test ids that are omim's. self.test_ids += [obj.replace('OMIM:', '') for obj in config.get_config()['test_ids']['disease'] if re.match('OMIM:', obj)] return
def __init__(self, graph_type, are_bnodes_skolemized): super().__init__(graph_type, are_bnodes_skolemized, 'gwascatalog') if graph_type != 'rdf_graph': raise ValueError("UDP requires a rdf_graph") self.dataset = Dataset( 'gwascatalog', 'GWAS Catalog', 'http://www.ebi.ac.uk/gwas/', 'The NHGRI-EBI Catalog of published genome-wide association studies', 'http://creativecommons.org/licenses/by/3.0/', None) # 'http://www.ebi.ac.uk/gwas/docs/about' # TODO add this if 'test_ids' not in config.get_config() or \ 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids'] # build a dictionary of genomic location to identifiers, # to try to get the equivalences self.id_location_map = dict() return
def __init__(self): Source.__init__(self, 'genereviews') self.load_bindings() self.dataset = Dataset( 'genereviews', 'Gene Reviews', 'http://genereviews.org/', None, 'http://www.ncbi.nlm.nih.gov/books/NBK138602/') self.dataset.set_citation('GeneReviews:NBK1116') self.gu = GraphUtils(curie_map.get()) self.book_ids = set() self.all_books = {} if 'test_ids' not in config.get_config() or\ 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_ids = list() else: # select ony those test ids that are omim's. self.test_ids = config.get_config()['test_ids']['disease'] return