def create_release(self): r = wdi_helpers.Release('Disease Ontology release {}'.format(self.date.strftime('%Y-%m-%d')), 'Release of the Disease Ontology', self.date.strftime('%Y-%m-%d'), archive_url=self.version, edition_of_wdid='Q5282129', pub_date=self.date.date().strftime('+%Y-%m-%dT%H:%M:%SZ')) wd_item_id = r.get_or_create(self.login) if wd_item_id: self.release = wd_item_id else: raise ValueError("unable to create release")
def create_release(self, login): self.release = wdi_helpers.Release('{} release {}'.format(self.NAME, self.edition), 'Release of the {}'.format(self.NAME), self.edition, archive_url=self.version, edition_of_wdid=self.QID, pub_date=self.date.date().strftime('+%Y-%m-%dT%H:%M:%SZ'), sparql_endpoint_url=self.sparql_endpoint_url, mediawiki_api_url=self.mediawiki_api_url) wd_item_id = self.release.get_or_create(login) if wd_item_id: self.release_qid = wd_item_id else: raise ValueError("unable to create release")
def create_release(self): # get information about ontology to create/get release ontology_label = Graph.get_item_label(self.ontology_qid) print(ontology_label) r = wdi_helpers.Release( '{} release {}'.format(ontology_label, self.date.strftime('%Y-%m-%d')), 'Release of {}'.format(ontology_label), self.date.strftime('%Y-%m-%d'), archive_url=self.version, edition_of_wdid=self.ontology_qid, pub_date=self.date.date().strftime('+%Y-%m-%dT%H:%M:%SZ')) wd_item_id = r.get_or_create(self.login) if wd_item_id: self.release_qid = wd_item_id else: raise ValueError("unable to create release")
def make_ref_source(source_doc, id_prop, identifier, login=None): """ Reference is made up of: stated_in: if the source has a release #: release edition else, stated in the source link to id: link to identifier in source retrieved: only if source has no release # login: must be passed if you want to be able to create new release items :param source_doc: Example source_doc = {'_id': 'uniprot', 'timestamp': '20161006'} or source_doc = {'_id': 'ensembl', 'release': '86'} :param id_prop: :param identifier: :return: """ source = source_doc['id'] if source not in source_items: raise ValueError("Unknown source for reference creation: {}".format(source)) assert id_prop.startswith("P") link_to_id = wdi_core.WDString(value=str(identifier), prop_nr=id_prop, is_reference=True) if "release" in source_doc: source_doc['release'] = str(source_doc['release']) title = "{} Release {}".format(source_doc['id'], source_doc['release']) description = "Release {} of {}".format(source_doc['release'], source_doc['id']) edition_of_wdid = source_items[source_doc['id']] release = wdi_helpers.Release(title, description, source_doc['release'], edition_of_wdid=edition_of_wdid).get_or_create(login) stated_in = wdi_core.WDItemID(value=release, prop_nr='P248', is_reference=True) reference = [stated_in, link_to_id] else: date_string = source_doc['timestamp'] retrieved = datetime.strptime(date_string, "%Y%m%d") stated_in = wdi_core.WDItemID(value=source_items[source], prop_nr='P248', is_reference=True) retrieved = wdi_core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) reference = [stated_in, retrieved, link_to_id] return reference
args = parser.parse_args() if not (args.protein or args.items): args.protein = args.items = True log_dir = args.log_dir if args.log_dir else "./logs" login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) version_date = date_parse(args.interpro_date) version_num = args.interpro_version release = wdi_helpers.Release( title="InterPro Release {}".format(version_num), description="Release {} of the InterPro database & software".format( version_num), edition_of_wdid="Q3047275", edition=version_num, pub_date=version_date, archive_url="ftp://ftp.ebi.ac.uk/pub/databases/interpro/{}/".format( version_num)) release_wdid = release.get_or_create(login) print("release_wdid: {}".format(release_wdid)) if args.items: print("running item bot") ItemsBot.main(login, release_wdid, log_dir=log_dir, run_one=args.run_one, write=not args.dummy)
def main(metadata, log_dir="./logs", fast_run=True, write=True): """ Main function for creating/updating genes :param metadata: looks like: {"ensembl" : 84, "cpdb" : 31, "netaffy" : "na35", "ucsc" : "20160620", .. } :type metadata: dict :param log_dir: dir to store logs :type log_dir: str :param fast_run: use fast run mode :type fast_run: bool :param write: actually perform write :type write: bool :return: None """ # login login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) wdi_core.WDItemEngine.setup_logging(log_dir=log_dir, logger_name='WD_logger', log_name=log_name, header=json.dumps(__metadata__)) # get all ids mappings entrez_wdid = wdi_helpers.id_mapper(PROPS['Entrez Gene ID']) wdid_entrez = {v: k for k, v in entrez_wdid.items()} homo_wdid = wdi_helpers.id_mapper(PROPS['HomoloGene ID'], return_as_set=True) wdid_homo = dict() for h**o, wdids in homo_wdid.items(): for wdid in wdids: wdid_homo[wdid] = h**o entrez_homo = { wdid_entrez[wdid]: h**o for wdid, h**o in wdid_homo.items() if wdid in wdid_entrez } taxon_wdid = wdi_helpers.id_mapper(PROPS['NCBI Taxonomy ID']) # only do certain records mgd = MyGeneDownloader( q="_exists_:homologene AND type_of_gene:protein-coding", fields=','.join(['taxid', 'homologene', 'entrezgene'])) docs, total = mgd.query() docs = list(tqdm(docs, total=total)) records = HelperBot.tag_mygene_docs(docs, metadata) # group together all orthologs # d[taxid][entrezgene] = { set of entrezgene ids for orthologs } d = defaultdict(lambda: defaultdict(set)) entrez_taxon = dict() # keep this for the qualifier on the statements for doc in records: this_taxid = doc['taxid']['@value'] this_entrez = doc['entrezgene']['@value'] entrez_taxon[str(this_entrez)] = str(this_taxid) if str(this_entrez) not in entrez_wdid: continue for taxid, entrez in doc['homologene']['@value']['genes']: if taxid == 4932 and this_taxid == 559292: # ridiculous workaround because entrez has the taxid for the strain and homologene has it for the species # TODO: This needs to be fixed if you want to use other things that may have species/strains .. ?` continue if taxid != this_taxid and str(entrez) in entrez_wdid: d[str(this_taxid)][str(this_entrez)].add(str(entrez)) print("taxid: # of genes : {}".format({k: len(v) for k, v in d.items()})) homogene_ver = metadata['homologene'] release = wdi_helpers.Release( "HomoloGene build{}".format(homogene_ver), "Version of HomoloGene", homogene_ver, edition_of_wdid='Q468215', archive_url='ftp://ftp.ncbi.nih.gov/pub/HomoloGene/build{}/'.format( homogene_ver)).get_or_create(login) reference = lambda homogeneid: [ wdi_core.WDItemID(release, PROPS['stated in'], is_reference=True), wdi_core.WDExternalID( homogeneid, PROPS['HomoloGene ID'], is_reference=True) ] ec = 0 for taxid, subd in tqdm(d.items()): for entrezgene, orthologs in tqdm(subd.items(), leave=False): try: do_item(entrezgene, orthologs, reference, entrez_homo, entrez_taxon, taxon_wdid, entrez_wdid, login, write) except Exception as e: wdi_helpers.format_msg(entrezgene, PROPS['Entrez Gene ID'], None, str(e), type(e)) ec += 1 # clear the fast run store once we move on to the next taxon wdi_core.WDItemEngine.fast_run_store = [] wdi_core.WDItemEngine.fast_run_container = None print("Completed succesfully with {} exceptions".format(ec))