def bioconcept_alias_starter(): bud_session = bud_session_maker() nex_session = nex_session_maker() key_to_bioconcept = dict([(x.unique_key(), x) for x in nex_session.query(Bioconcept).all()]) key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source).all()]) #Go aliases for old_goterm in make_db_starter(bud_session.query(Go).options(joinedload('go_gosynonyms')), 1000)(): go_key = (get_go_format_name(old_goterm.go_go_id), 'GO') if go_key in key_to_bioconcept: for go_gosynonym in old_goterm.go_gosynonyms: synonym = go_gosynonym.gosynonym yield {'display_name': synonym.name, 'source': key_to_source['SGD'], 'bioconcept_id': key_to_bioconcept[go_key].id, 'date_created': synonym.date_created, 'created_by': synonym.created_by} else: print 'Go term not found: ' + str(go_key) yield None #Phenotype aliases for cvtermsynonym in bud_session.query(CVTermSynonym).join(CVTerm).filter(CVTerm.cv_no == 6).all(): observable = cvtermsynonym.cvterm.name.lower() if observable == 'observable': observable = 'ypo' phenotype_key = (create_format_name(observable), 'OBSERVABLE') if phenotype_key in key_to_bioconcept: yield {'display_name': cvtermsynonym.synonym, 'source': key_to_source['SGD'], 'bioconcept_id': key_to_bioconcept[phenotype_key].id, 'date_created': cvtermsynonym.date_created, 'created_by': cvtermsynonym.created_by} else: print 'Phenotype not found: ' + str(phenotype_key) yield None for cvterm_dbxref in bud_session.query(CVTermDbxref).join(CVTerm).filter(CVTerm.cv_no == 6).options(joinedload('dbxref')).all(): observable = cvterm_dbxref.cvterm.name.lower() if observable == 'observable': observable = 'ypo' phenotype_key = (create_format_name(observable), 'OBSERVABLE') if phenotype_key in key_to_bioconcept: yield {'display_name': cvterm_dbxref.dbxref.dbxref_id, 'source': key_to_source['SGD'], 'category': cvterm_dbxref.dbxref.dbxref_type, 'bioconcept_id': key_to_bioconcept[phenotype_key].id, 'date_created': cvterm_dbxref.dbxref.date_created, 'created_by': cvterm_dbxref.dbxref.created_by} else: print 'Phenotype not found: ' + str(phenotype_key) yield None bud_session.close() nex_session.close()
def create_phenotype_format_name(observable, qualifier): if qualifier is None: format_name = create_format_name(observable.lower()) else: observable = '.' if observable is None else observable qualifier = '.' if qualifier is None else qualifier format_name = create_format_name(qualifier.lower() + '_' + observable.lower()) return format_name
def phenotype_starter(): bud_session = bud_session_maker() nex_session = nex_session_maker() key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source).all()]) key_to_observable = dict([(x.unique_key(), x) for x in nex_session.query(Observable).all()]) for bud_obj in bud_session.query(Phenotype).all(): observable_key = (create_format_name(bud_obj.observable).lower(), 'OBSERVABLE') if observable_key in key_to_observable: yield {'source': key_to_source['SGD'], 'observable': key_to_observable[observable_key], 'qualifier': bud_obj.qualifier, 'date_created': bud_obj.date_created, 'created_by': bud_obj.created_by} for bud_obj in make_db_starter(bud_session.query(PhenotypeFeature).join(PhenotypeFeature.phenotype).filter(Phenotype.observable.in_(chemical_phenotypes)), 1000)(): if bud_obj.experiment is None: yield None chemicals = bud_obj.experiment.chemicals if len(chemicals) == 0: yield None chemical = ' and '.join([x[0] for x in chemicals]) old_observable = bud_obj.phenotype.observable description = None if old_observable == 'resistance to chemicals': new_observable = bud_obj.phenotype.observable.replace('chemicals', chemical) description = 'The level of resistance to exposure to ' + chemical + '.' elif old_observable == 'chemical compound accumulation': new_observable = bud_obj.phenotype.observable.replace('chemical compound', chemical) description = 'The production and/or storage of ' + chemical + '.' elif old_observable == 'chemical compound excretion': new_observable = bud_obj.phenotype.observable.replace('chemical compound', chemical) description = 'The excretion from the cell of ' + chemical + '.' else: new_observable = None if new_observable is not None: observable_key = (create_format_name(new_observable).lower(), 'OBSERVABLE') if observable_key in key_to_observable: yield {'source': key_to_source['SGD'], 'observable': key_to_observable[observable_key], 'qualifier': bud_obj.phenotype.qualifier, 'description': description, 'date_created': bud_obj.date_created, 'created_by': bud_obj.created_by} bud_session.close() nex_session.close()
def author_reference_starter(): bud_session = bud_session_maker() nex_session = nex_session_maker() id_to_reference = dict([(x.id, x) for x in nex_session.query(Reference).all()]) key_to_author = dict([(x.unique_key(), x) for x in nex_session.query(Author).all()]) key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source)]) for old_author_reference in bud_session.query(OldAuthorReference).all(): author_key = create_format_name(old_author_reference.author.name) reference_id = old_author_reference.reference_id if author_key in key_to_author and reference_id in id_to_reference: yield {'id': old_author_reference.id, 'source': key_to_source['PubMed'], 'author': key_to_author[author_key], 'reference': id_to_reference[reference_id], 'order': old_author_reference.order, 'author_type': old_author_reference.type, 'date_created': old_author_reference.author.date_created, 'created_by': old_author_reference.author.created_by} else: print 'Author or reference not found: ' + str(author_key) + ' ' + str(reference_id) bud_session.close() nex_session.close()
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) if self.display_name == 'observable': self.display_name = 'Yeast Phenotype Ontology' self.format_name = 'ypo' self.link = '/ontology/phenotype/ypo/overview' else: self.format_name = create_format_name(self.display_name.lower()) self.link = '/observable/' + self.format_name + '/overview'
def reftype_starter(): bud_session = bud_session_maker() nex_session = nex_session_maker() key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source)]) for old_reftype in bud_session.query(RefType).all(): source_key = create_format_name(old_reftype.source) source = None if source_key not in key_to_source else key_to_source[source_key] yield {'id': old_reftype.id, 'display_name': old_reftype.name, 'source': source, 'date_created': old_reftype.date_created, 'created_by': old_reftype.created_by} bud_session.close() nex_session.close()
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.format_name = create_format_name(obj_json["display_name"]) self.link = "/tag/" + self.format_name + "/overview"
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.format_name = create_format_name(obj_json.get("display_name")).replace(".", "") self.link = "/strain/" + self.format_name + "/overview"
def reference_starter(): bud_session = bud_session_maker() nex_session = nex_session_maker() key_to_journal = dict([(x.unique_key(), x) for x in nex_session.query(Journal).all()]) key_to_book = dict([(x.unique_key(), x) for x in nex_session.query(Book).all()]) key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source).all()]) reference_id_to_doi = dict([(x.reference_id, x.url.url[18:]) for x in bud_session.query(Ref_URL).options(joinedload('url')).all() if x.url.url_type == 'DOI full text']) reference_id_to_pmcid = dict([(x.reference_id, x.url.url.replace('http://www.ncbi.nlm.nih.gov/pmc/articles/', '')[:-1]) for x in bud_session.query(Ref_URL).options(joinedload('url')).all() if x.url.url_type == 'PMC full text']) for old_reference in bud_session.query(Reference).order_by(Reference.id.desc()).options(joinedload('book'), joinedload('journal')).all(): citation = create_citation(old_reference.citation) display_name = create_display_name(citation) new_journal = None old_journal = old_reference.journal if old_journal is not None: abbreviation = old_journal.abbreviation if old_journal.issn == '0948-5023': abbreviation = 'J Mol Model (Online)' journal_key = (old_journal.full_name, abbreviation) new_journal = None if journal_key not in key_to_journal else key_to_journal[journal_key] new_book = None old_book = old_reference.book if old_book is not None: book_key = (old_book.title, old_book.volume_title) new_book = None if book_key not in key_to_book else key_to_book[book_key] pubmed_id = None if old_reference.pubmed_id is not None: pubmed_id = old_reference.pubmed_id year = None if old_reference.year is not None: year = int(old_reference.year) source_key = create_format_name(old_reference.source) source = None if source_key in key_to_source: source = key_to_source[source_key] else: print 'Source not found: ' + source_key yield None doi = None if old_reference.id not in reference_id_to_doi else reference_id_to_doi[old_reference.id] pmcid = None if old_reference.id not in reference_id_to_pmcid else reference_id_to_pmcid[old_reference.id] yield {'id': old_reference.id, 'display_name': display_name, 'sgdid': old_reference.dbxref_id, 'source': source, 'ref_status': old_reference.status, 'pubmed_id': pubmed_id, 'fulltext_status': old_reference.pdf_status, 'citation': citation, 'year': year, 'date_published': old_reference.date_published, 'date_revised': old_reference.date_revised, 'issue': old_reference.issue, 'page': old_reference.page, 'volume': old_reference.volume, 'title': old_reference.title, 'journal': new_journal, 'book': new_book, 'doi': doi, 'pubmed_central_id': pmcid, 'date_created': old_reference.date_created, 'created_by': old_reference.created_by} bud_session.close() nex_session.close()
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.format_name = create_format_name(self.display_name)
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.format_name = create_format_name(self.display_name) self.link = '/author/' + self.format_name + '/overview'
def bioentity_url_starter(): bud_session = bud_session_maker() nex_session = nex_session_maker() key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source).all()]) id_to_bioentity = dict([(x.id, x) for x in nex_session.query(Bioentity).all()]) for bud_obj in make_db_starter(bud_session.query(FeatUrl).options(joinedload('url')), 1000)(): old_url = bud_obj.url url_type = old_url.url_type link = old_url.url bioentity_id = bud_obj.feature_id for old_webdisplay in old_url.displays: if bioentity_id in id_to_bioentity: bioentity = id_to_bioentity[bioentity_id] if url_type == 'query by SGDID': link = link.replace('_SUBSTITUTE_THIS_', str(bioentity.sgdid)) elif url_type == 'query by SGD ORF name with anchor' or url_type == 'query by SGD ORF name' or url_type == 'query by ID assigned by database': link = link.replace('_SUBSTITUTE_THIS_', str(bioentity.format_name)) else: print "Can't handle this url. " + str(old_url.url_type) yield None category = None if old_webdisplay.label_location not in category_mapping else category_mapping[old_webdisplay.label_location] yield {'display_name': old_webdisplay.label_name, 'link': link, 'source': key_to_source[create_format_name(old_url.source)], 'category': category, 'bioentity_id': bioentity_id, 'date_created': old_url.date_created, 'created_by': old_url.created_by} else: #print 'Bioentity not found: ' + str(bioentity_id) yield None for bud_obj in make_db_starter(bud_session.query(DbxrefFeat).options(joinedload('dbxref'), joinedload('dbxref.dbxref_urls')), 1000)(): old_urls = bud_obj.dbxref.urls dbxref_id = bud_obj.dbxref.dbxref_id bioentity_id = bud_obj.feature_id for old_url in old_urls: for old_webdisplay in old_url.displays: if bioentity_id in id_to_bioentity: bioentity = id_to_bioentity[bioentity_id] url_type = old_url.url_type link = old_url.url if url_type == 'query by SGD ORF name with anchor' or url_type == 'query by SGD ORF name': link = link.replace('_SUBSTITUTE_THIS_', bioentity.format_name) elif url_type == 'query by ID assigned by database': link = link.replace('_SUBSTITUTE_THIS_', str(dbxref_id)) elif url_type == 'query by SGDID': link = link.replace('_SUBSTITUTE_THIS_', bioentity.sgdid) else: print "Can't handle this url. " + str(old_url.url_type) yield None category = None if old_webdisplay.label_location not in category_mapping else category_mapping[old_webdisplay.label_location] yield {'display_name': old_webdisplay.label_name, 'link': link, 'source': key_to_source[create_format_name(old_url.source)], 'category': category, 'bioentity_id': bioentity_id, 'date_created': old_url.date_created, 'created_by': old_url.created_by} else: #print 'Bioentity not found: ' + str(bioentity_id) yield None for locus in nex_session.query(Locus).all(): yield {'display_name': 'SPELL', 'link': 'http://spell.yeastgenome.org/search/show_results?search_string=' + locus.format_name, 'source': key_to_source['SGD'], 'category': 'LOCUS_EXPRESSION', 'bioentity_id': locus.id} yield {'display_name': 'Gene/Sequence Resources', 'link': '/cgi-bin/seqTools?back=1&seqname=' + locus.format_name, 'source': key_to_source['SGD'], 'category': 'LOCUS_SEQUENCE', 'bioentity_id': locus.id} yield {'display_name': 'ORF Map', 'link': '/cgi-bin/ORFMAP/ORFmap?dbid=' + locus.sgdid, 'source': key_to_source['SGD'], 'category': 'LOCUS_SEQUENCE', 'bioentity_id': locus.id} yield {'display_name': 'GBrowse', 'link': 'http://browse.yeastgenome.org/fgb2/gbrowse/scgenome/?name=' + locus.format_name, 'source': key_to_source['SGD'], 'category': 'LOCUS_SEQUENCE', 'bioentity_id': locus.id} yield {'display_name': 'BLASTN', 'link': '/cgi-bin/blast-sgd.pl?name=' + locus.format_name, 'source': key_to_source['SGD'], 'category': 'LOCUS_SEQUENCE_SECTION', 'bioentity_id': locus.id} yield {'display_name': 'BLASTP', 'link': '/cgi-bin/blast-sgd.pl?name=' + locus.format_name + '&suffix=prot', 'source': key_to_source['SGD'], 'category': 'LOCUS_SEQUENCE_SECTION', 'bioentity_id': locus.id} yield {'display_name': 'Variant Viewer', 'link': '/variant-viewer#/' + locus.sgdid, 'source': key_to_source['SGD'], 'category': 'LOCUS_SEQUENCE_OTHER_STRAINS', 'bioentity_id': locus.id} yield {'display_name': 'Yeast Phenotype Ontology', 'link': '/ontology/phenotype/ypo/overview', 'source': key_to_source['SGD'], 'category': 'LOCUS_PHENOTYPE_ONTOLOGY', 'bioentity_id': locus.id} bud_session.close() nex_session.close()
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.format_name = None if obj_json.get('display_name') is None else create_format_name(obj_json.get('display_name'))
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.format_name = None if obj_json.get('display_name') is None else create_format_name(obj_json.get('display_name'))[:95] self.link = None if self.format_name is None else '/chemical/' + self.format_name + '/overview'
def bioconcept_relation_starter(): bud_session = bud_session_maker() nex_session = nex_session_maker() key_to_bioconcept = dict([(x.unique_key(), x) for x in nex_session.query(Bioconcept).all()]) key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source).all()]) # GO relations for gopath in make_db_starter(bud_session.query(GoPath).filter(GoPath.generation == 1).options(joinedload('child'), joinedload('ancestor')), 1000)(): parent_key = (get_go_format_name(gopath.ancestor.go_go_id), 'GO') child_key = (get_go_format_name(gopath.child.go_go_id), 'GO') if parent_key in key_to_bioconcept and child_key in key_to_bioconcept: yield {'source': key_to_source['SGD'], 'relation_type': gopath.relationship_type, 'parent_id': key_to_bioconcept[parent_key].id, 'child_id': key_to_bioconcept[child_key].id} else: print 'Could not find go. Parent: ' + str(parent_key) + ' Child: ' + str(child_key) yield None old_gosets = bud_session.query(GoSet).filter(GoSet.name == 'Yeast GO-Slim').options(joinedload('go')).all() slim_ids = set() for old_goset in old_gosets: go_key = (get_go_format_name(old_goset.go.go_go_id), 'GO') if go_key[0] != 'GO:0008150' and go_key[0] != 'GO:0003674' and go_key[0] != 'GO:0005575' and go_key in key_to_bioconcept: slim_ids.add(key_to_bioconcept[go_key].id) else: print 'GO term not found: ' + str(go_key) #Go Slim go_child_id_to_parent_ids = {} for go_relation in nex_session.query(Bioconceptrelation).filter(Bioconceptrelation.relation_type == 'is a'): if go_relation.child_id in go_child_id_to_parent_ids: go_child_id_to_parent_ids[go_relation.child_id].append(go_relation.parent_id) else: go_child_id_to_parent_ids[go_relation.child_id] = [go_relation.parent_id] for child_id in go_child_id_to_parent_ids: parent_ids = go_child_id_to_parent_ids[child_id] while len(parent_ids) > 0: new_parent_ids = set() for parent_id in parent_ids: if parent_id in slim_ids: yield {'source': key_to_source['SGD'], 'parent_id': parent_id, 'child_id': child_id, 'relation_type': 'GO_SLIM'} if parent_id in go_child_id_to_parent_ids: new_parent_ids.update(go_child_id_to_parent_ids[parent_id]) parent_ids = new_parent_ids #Phenotype relations for cvtermrel in bud_session.query(CVTermRel).options(joinedload('child'), joinedload('parent')).all(): parent_key = (create_format_name(cvtermrel.parent.name.lower()), 'OBSERVABLE') child_key = (create_format_name(cvtermrel.child.name.lower()), 'OBSERVABLE') if parent_key == ('observable', 'OBSERVABLE'): parent_key = ('ypo', 'OBSERVABLE') if parent_key in key_to_bioconcept and child_key in key_to_bioconcept: yield {'source': key_to_source['SGD'], 'relation_type': cvtermrel.relationship_type, 'parent_id': key_to_bioconcept[parent_key].id, 'child_id': key_to_bioconcept[child_key].id, 'date_created': cvtermrel.date_created, 'created_by': cvtermrel.created_by} for old_phenotype in make_db_starter(bud_session.query(OldPhenotype).filter(OldPhenotype.observable.in_(chemical_phenotypes)).options( joinedload('phenotype_features'), joinedload('phenotype_features.experiment')), 1000)(): for phenotype_feature in old_phenotype.phenotype_features: chemical = ' and '.join([x[0] for x in phenotype_feature.experiment.chemicals]) old_observable = old_phenotype.observable if old_observable == 'resistance to chemicals': new_observable = old_phenotype.observable.replace('chemicals', chemical) else: new_observable = old_phenotype.observable.replace('chemical compound', chemical) parent_key = (create_format_name(old_observable.lower()), 'OBSERVABLE') child_key = (create_format_name(new_observable.lower()), 'OBSERVABLE') if parent_key in key_to_bioconcept and child_key in key_to_bioconcept: yield {'source': key_to_source['SGD'], 'relation_type': 'is a', 'parent_id': key_to_bioconcept[parent_key].id, 'child_id': key_to_bioconcept[child_key].id} else: print 'Could not find phenotype. Parent: ' + str(parent_key) + ' Child: ' + str(child_key) yield None #Phenotype Slim phenotype_slim = {'cell_death', 'chromosome-plasmid_maintenance', 'intracellular_transport', 'mitotic_cell_cycle', 'prion_state', 'stress_resistance', 'budding', 'filamentous_growth', 'lifespan', 'sexual_cycle', 'viable', 'inviable', 'competitive_fitness', 'viability', 'haploinsufficient', 'haploproficient', 'metabolism_and_growth', 'cellular_morphology', 'culture_appearance', 'ypo'} for phenotype in nex_session.query(Phenotype).all(): ancestor = phenotype.observable while ancestor is not None and ancestor.format_name not in phenotype_slim: if len(ancestor.parents) > 0: ancestor = ancestor.parents[0].parent else: ancestor = None if ancestor is not None: yield {'source': key_to_source['SGD'], 'parent_id': ancestor.id, 'child_id': phenotype.id, 'relation_type': 'PHENOTYPE_SLIM'} bud_session.close() nex_session.close()
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.format_name = create_format_name(obj_json.get('display_name')).replace('.', '') self.link = '/strain/' + self.format_name + '/overview'
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.format_name = create_format_name(obj_json['display_name']) self.link = '/tag/' + self.format_name + '/overview'
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.display_name = self.title self.format_name = create_format_name(self.title + '' if self.volume_title is None else ('_' + self.volume_title))
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.format_name = create_format_name(obj_json.get('display_name')) if obj_json.get('eco_id') in eco_id_to_category: self.category = eco_id_to_category[obj_json.get('eco_id')]
def __init__(self, obj_json): UpdateByJsonMixin.__init__(self, obj_json) self.display_name = self.title if self.title is not None else self.med_abbr self.format_name = create_format_name(self.display_name[:99] if self.med_abbr is None else self.display_name[:50] + '_' + self.med_abbr[:49])