def load(self, context, debug=0): cursor = context.get_cursor() # load all value groups cursor.execute('SELECT * FROM dv_group') fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: if row == None: break # FIXME: dtuplify row = dtuple.DatabaseTuple(desc, row) group = ValueGroup(row['group_name'], row['description'], int(row['dv_group_id'])) cursor.execute( """ SELECT * FROM dv_values WHERE dv_group_id = %s """, group.value_group_id) val_fields = [d[0] for d in cursor.description] val_desc = dtuple.TupleDescriptor([[f] for f in val_fields]) val_rows = cursor.fetchall() for val_row in val_rows: val_row = dtuple.DatabaseTuple(val_desc, val_row) value = Value(int(val_row['dv_group_id']), int(val_row['serial_number']), val_row['description'], int(val_row['dv_id'])) group.add_value(value) self.add_group(group)
def load(self, context): cursor = context.get_cursor() if self.uid == -1: if not self.concept_id == -1 \ and not self.category_id == -1: cursor.execute( """ SELECT * FROM category_concepts WHERE category_id = %s AND concept_id = %s """, (self.category_id, self.concept_id)) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) row = cursor.fetchone() if row: row = dtuple.DatabaseTuple(desc, row) self.uid = row['uid'] self.is_broad = row['is_broad'] self.is_default = row['is_default'] else: self.logger.debug('No matched rows') return else: self.logger.debug('Not enough info') return else: cursor.execute( """ SELECT * FROM category_concepts WHERE uid = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) row = cursor.fetchone() if row: row = dtuple.DatabaseTuple(desc, row) self.concept_id = row['concept_id'] self.is_broad = row['is_broad'] self.is_default = row['is_default'] cursor.execute( """ SELECT * FROM category_concept_groups WHERE category_concept_id = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) self.groups.append(row['category_group_id']) self.concept = Concept(context, self.concept_id)
def load(self, context, load_synonyms=True): if self.uid == -1: return # Is it already loaded? Convenience check for client calls # don't need to verify loads from the cache. if context.config.use_cache: try: if self.term: # Already loaded return except AttributeError: # Note already loaded, so continue pass cursor = context.get_cursor() cursor.execute( """ SELECT umls_concepts.preferred_name, umls_concepts_sources.umls_source_id, umls_concepts_sources.umls_source_code FROM umls_concepts, umls_concepts_sources WHERE umls_concepts_sources.umls_concept_id = umls_concepts.umls_concept_id AND umls_concepts.umls_concept_id = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) self.term = row['preferred_name'] self.sources.append( (row['umls_source_id'], row['umls_source_code'])) self.concept_source_id = row['umls_source_id'] self.concept_source_code = row['umls_source_code'] if load_synonyms: # NOTE: Is there any value in using umls_term_id? It's ignored here. cursor.execute( """ SELECT term FROM umls_terms WHERE umls_concept_id = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) synonym = row['term'] if not synonym in self.synonyms: self.synonyms.append(synonym)
def load(self, context, load_concepts=False): if self.uid == -1: return cursor = context.get_cursor() cursor.execute( """ SELECT * FROM categories WHERE uid = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) row = cursor.fetchone() row = dtuple.DatabaseTuple(desc, row) self.name = row['name'] self.set_types(row['concept_types']) cursor.execute( """ SELECT * FROM category_groups WHERE category_id = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) group = CategoryGroup(uid=row['uid'], category_id=self.uid, name=row['name']) self.add_group(group) if load_concepts: cursor.execute( """ SELECT * FROM category_concepts WHERE category_id = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) cat_concept = CategoryConcept(uid=row['uid'], category_id=self.uid, concept_id=row['concept_id']) cat_concept.is_broad = row['is_broad'] cat_concept.is_default = row['is_default'] cat_concept.load(context) self.add_concept(cat_concept)
def load(self, context, show='unfinished', start=0, size=25): """ Load a batch and its queued records. """ if self.uid == -1: return cursor = context.get_cursor() cursor.execute( """ SELECT * FROM queued_batches WHERE uid = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) for field in fields: self.set(field, row[field]) show = str(show) if show == 'unfinished': show_clause = ' AND status < %s ' % QueuedRecord.STATUS_CURATED elif show == 'unclaimed': show_clause = ' AND status = %s ' % QueuedRecord.STATUS_UNCLAIMED elif show == 'all': show_clause = ' AND 1 ' if str(size) == 'all': limit_clause = '' else: limit_clause = ' LIMIT %s, %s ' % (int(start), int(size)) cursor.execute( """ SELECT * FROM queued_records WHERE queued_batch_id = %s """ + show_clause + """ ORDER BY uid """ + limit_clause, (int(self.uid))) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) record = QueuedRecord(context, row['uid']) self.queued_records[record.uid] = record self.num_records = len(self.queued_records)
def load(self, context): if self.uid == -1: return # Is it already loaded? Convenience check for client calls # don't need to verify loads from the cache. if context.config.use_cache: try: if getattr(self, self.CACHE_CHECK_KEY): # Already loaded return except AttributeError: # Note already loaded, so continue pass cursor = context.get_cursor() cursor.execute( """ SELECT reference, comments FROM human_studies WHERE uid = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) row = cursor.fetchone() row = dtuple.DatabaseTuple(desc, row) self.reference = row['reference'] self.comments = row['comments'] if context.config.use_cache: context.cache_set('%s:%s' % (self.CACHE_KEY, self.uid), self)
def load_from_issn (self, context, issn): if not issn: return if context.config.use_cache: j = context.cache_get('%s:%s' % (self.ALT_CACHE_KEY, issn)) if j \ and j.journal_title: for att in ('uid', 'journal_title', 'abbreviation', 'issn', 'eissn', 'iso_abbr', 'nlm_id'): self.set(att, getattr(j, att)) return cursor = context.get_cursor() cursor.execute(""" SELECT uid, journal_title, abbreviation, nlm_id FROM medline_journals WHERE issn = %s """, issn) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() if rows: row = rows[0] row = dtuple.DatabaseTuple(desc, row) for k, v in row.items(): self.set(k, v) self.issn = issn if context.config.use_cache: context.cache_set('%s:%s' % (self.CACHE_KEY, self.uid), self) context.cache_set('%s:%s' % (self.ALT_CACHE_KEY, self.issn), self)
def summary_set_from_concept(context, concept_id, concept_type): """ For a given concept_id with a particular concept_type, return the set of summary/study pairs for that concept. """ if concept_type in ('exposure', 'outcome'): table_name = concept_type + 's' elif concept_type == 'species': table_name = concept_type else: return None pairs = [] try: cursor = context.get_cursor() query = """ SELECT summary_concepts.*, %s.study_id FROM %s, summary_concepts WHERE %s.uid = summary_concepts.study_concept_id AND %s.concept_id = """ % (table_name, table_name, table_name, table_name) cursor.execute(query + " %s ", int(concept_id)) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) for row in cursor.fetchall(): row = dtuple.DatabaseTuple(desc, row) summary = Summary(context, row['summary_id']) st = study.Study(context, row['study_id']) pairs.append((summary, st)) except Exception, e: context.logger.error(e) return None
def load (self, context): if self.uid == -1: return # Is it already loaded? Convenience check for client calls # don't need to verify loads from the cache. if context.config.use_cache: try: if self.journal_title: # Already loaded return except AttributeError: # Note already loaded, so continue pass cursor = context.get_cursor() cursor.execute(""" SELECT journal_title, abbreviation, issn, nlm_id FROM medline_journals WHERE uid = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) for k, v in row.items(): self.set(k, v) if context.config.use_cache: context.cache_set('%s:%s' % (self.ALT_CACHE_KEY, self.issn), self)
def load (self, context, load_terms=True): if self.uid == -1: return cursor = context.get_cursor() cursor.execute(""" SELECT * FROM sources WHERE uid = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() if rows and len(rows) > 0: row = dtuple.DatabaseTuple(desc, rows[0]) for field in fields: self.set(field, row[field]) if not load_terms: return cursor.execute(""" SELECT * FROM terms WHERE source_id = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) term = Term() for field in fields: term.set(field, row[field]) self.terms[term.uid] = term self.term_tokens[term.token] = term.uid
def load(self, context): cursor = context.get_cursor() cursor.execute( """ SELECT * FROM gazeteer WHERE uid = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) row = cursor.fetchone() if row: row = dtuple.DatabaseTuple(desc, row) for field in fields: self.set(field, row[field])
def load(self, context): cursor = context.get_cursor() cursor.execute(""" SELECT * FROM queued_batches ORDER BY uid, date_added """) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) batch = Batch() for field in fields: batch.set(field, row[field]) self.batches.append(batch)
def load (self, context): if self.uid == -1: return cursor = context.get_cursor() cursor.execute(""" SELECT * FROM terms WHERE uid = %s """, self.uid) rows = cursor.fetchall() fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) if rows and len(rows) > 0: row = dtuple.DatabaseTuple(desc, rows[0]) for field in fields: self.set(field, row[field])
def get_studies(context): studies = [] cursor = context.get_cursor() cursor.execute(""" SELECT uid FROM human_studies ORDER BY reference """) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) hs = HumanStudy(context, row['uid']) studies.append(hs) return studies
def find_references(context, token=''): if not token: return get_studies(context) studies = [] cursor = context.get_cursor() cursor.execute( """ SELECT uid FROM human_studies WHERE reference LIKE %s ORDER BY reference """, '%s%s' % (token, '%')) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) hs = HumanStudy(context, row['uid']) studies.append(hs) return studies
def load(self, context): if self.uid == -1: return # Is it already loaded? Convenience check for client calls # don't need to verify loads from the cache. if context.config.use_cache: try: if getattr(self, self.CACHE_CHECK_KEY): # Already loaded return except AttributeError: # Note already loaded, so continue pass try: cursor = context.get_cursor() # Summary, load thyself cursor.execute( """ SELECT * FROM summaries WHERE uid = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() if not rows: print 'Tried to load summary with id', self.uid print traceback.print_exc() raise ValueError row = dtuple.DatabaseTuple(desc, rows[0]) for f in fields: self.set(f, row[f]) # ...and thy concepts cursor.execute( """ SELECT * FROM summary_concepts WHERE summary_id = %s """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) summary_concept = SummaryConcept(uid=row['uid']) summary_concept.summary_id = self.uid summary_concept.concept_type = row['concept_type'] summary_concept.study_concept_id = row['study_concept_id'] getattr(self, summary_concept.CONCEPT_TYPES[ row['concept_type']]).append(summary_concept) # ...and thy references cursor.execute( """ SELECT human_study_id FROM summary_human_refs WHERE summary_id = %s ORDER by uid """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: self.human_refs.append(row[0]) self.cache_set(context) #except ValueError: # raise ValueError except: print traceback.print_exc()
def load(self, context, load_metadata=True): """ Load a queued record. Note that if a source is not specified, every term will be looked-up again from the DB (rather than read from memory). """ # Is it already loaded? Convenience check for client calls # don't need to verify loads from the cache. It's possible it's # already loaded without metadata, in which case: reload. if context.config.use_cache: try: if self.queued_batch_id >= 0 \ and self.metadata: # Already loaded return except AttributeError: # Not already loaded, so continue pass cursor = context.get_cursor() try: # To be safe, specify full table.field names because names overlap cursor.execute( """ SELECT queued_records.uid, queued_records.queued_batch_id, queued_records.status, queued_records.user_id, queued_records.user_id, queued_records.study_id, queued_records.title, queued_records.source, queued_records.unique_identifier, queued_records.duplicate_score, queued_records.needs_paper, queued_batches.source_id AS batch_source_id FROM queued_records, queued_batches WHERE queued_records.uid = %s AND queued_batches.uid = queued_records.queued_batch_id """, int(self.uid)) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() if not rows: raise ValueError('No records found') row = dtuple.DatabaseTuple(desc, rows[0]) # remove source_id from fields, it's not a proper attribute on self fields.remove('batch_source_id') # but save it for later! batch_source_id = row['batch_source_id'] for field in fields: self.set(field, row[field]) source_catalog = context.get_source_catalog() if load_metadata: # NOTE: the "ORDER BY sequence_position" might be a bad hack, # but it should preserve author name order. # FIXME if not. And TESTME! cursor.execute( """ SELECT * FROM queued_record_metadata WHERE queued_record_id = %s ORDER BY sequence_position """, self.uid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) term = source_catalog.get_term(row['term_id']) self.add_metadata(term, row['value'], extra=row['extra']) except ValueError: raise ValueError('Record not found')
# use pmid_list so pmids can be sorted for easier side-by-side comparison pmid_list = [] for line in open(options.filename): pmid_list.append(int(line.strip())) pmid_list.sort() for pmid in pmid_list: cursor.execute(""" SELECT studies.uid AS study_id, queued_records.uid AS queued_record_id FROM queued_records, studies WHERE queued_records.study_id = studies.uid AND studies.status = 2 AND queued_records.unique_identifier = %s """, pmid) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) row = cursor.fetchone() if row: row = dtuple.DatabaseTuple(desc, row) study = Study(row['study_id']) study.load(cursor) studies.append((study, pmid)) else: # sort by unique_identifier for easier side-by-side comparison cursor.execute(""" SELECT studies.uid AS study_id, queued_records.uid AS queued_record_id, queued_records.unique_identifier FROM studies, queued_records WHERE studies.uid = queued_records.study_id AND studies.status = 2
def search(self, context, feature_name, region='', country='', params={}): cursor = context.get_cursor() results = [] search_token = feature_name.strip() + '%' if region: region_name = region.strip() + '%' if country: country_name = country.strip() + '%' cursor.execute( """ SELECT gazeteer.uid, gazeteer.name, gazeteer.country_code, gazeteer.adm1, gazeteer.feature_type, gazeteer.latitude, gazeteer.longitude FROM gazeteer, gazeteer_countries, gazeteer_fips_codes WHERE gazeteer_countries.code = gazeteer.country_code AND gazeteer_fips_codes.fips_code = gazeteer.adm1 AND MATCH (gazeteer.name) AGAINST (%s) AND gazeteer_countries.name LIKE %s AND gazeteer_fips_codes.name LIKE %s LIMIT 100 """, (feature_name, country_name, region_name)) else: cursor.execute( """ SELECT gazeteer.uid, gazeteer.name, gazeteer.country_code, gazeteer.adm1, gazeteer.feature_type, gazeteer.latitude, gazeteer.longitude FROM gazeteer, gazeteer_fips_codes WHERE gazeteer_fips_codes.fips_code = gazeteer.adm1 AND MATCH (gazeteer.name) AGAINST (%s) AND gazeteer_fips_codes.name LIKE %s LIMIT 100 """, (feature_name, region_name)) elif country: country_name = country.strip() + '%' cursor.execute( """ SELECT gazeteer.uid, gazeteer.name, gazeteer.country_code, gazeteer.adm1, gazeteer.feature_type, gazeteer.latitude, gazeteer.longitude FROM gazeteer, gazeteer_countries WHERE gazeteer_countries.code = gazeteer.country_code AND MATCH (gazeteer.name) AGAINST (%s) AND gazeteer_countries.name LIKE %s LIMIT 100 """, (feature_name, country_name)) else: cursor.execute( """ SELECT * FROM gazeteer WHERE MATCH (name) AGAINST (%s) LIMIT 100 """, feature_name) while 1: row = cursor.fetchone() if row == None: break feature = Feature() fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) row = dtuple.DatabaseTuple(desc, row) for field in fields: feature.set(field, row[field]) results.append(feature) # Try to bump up coarse "relevance" of exact matches results_ranked = results for result in results_ranked: if result.name.lower() == feature_name.lower(): results_ranked.remove(result) results_ranked.insert(0, result) return results_ranked
def load_by_pmid(self, context, pubmed_id): cursor = context.get_cursor() cursor.execute( """ SELECT * FROM sentinel_studies WHERE pubmed_id = %s """, pubmed_id) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) row = cursor.fetchone() if row: row = dtuple.DatabaseTuple(desc, row) for field in fields: self.set(field, row[field]) cursor.execute( """ SELECT mesh_heading FROM reference_mesh WHERE reference_id = %s """, self.reference_id) while 1: row = cursor.fetchone() if row == None: break sh = SubjectHeading(row[0]) self.mesh.append(sh) cursor.execute( """ SELECT human_disease, nature_of_relevance FROM reference_disease WHERE reference_id = %s """, self.reference_id) while 1: row = cursor.fetchone() if row == None: break self.outcomes.append((row[0], row[1])) cursor.execute( """ SELECT methodology FROM reference_methodology WHERE reference_id = %s """, self.reference_id) while 1: row = cursor.fetchone() if row == None: break self.methodologies.append(row[0]) cursor.execute( """ SELECT exposure_agent FROM reference_exposure WHERE reference_id = %s """, self.reference_id) while 1: row = cursor.fetchone() if row == None: break self.exposures.append(row[0]) cursor.execute( """ SELECT species_name FROM reference_species WHERE reference_id = %s """, self.reference_id) while 1: row = cursor.fetchone() if row == None: break self.species.append(row[0])
import dtuple import mx.ODBC.Windows as odbc flist = ["Name", "Num", "LinkText"] descr = dtuple.TupleDescriptor([[n] for n in flist]) conn = odbc.connect("HoldenWebSQL") # Connect to a database curs = conn.cursor() # Create a cursor sql = """SELECT %s FROM StdPage WHERE PageSet='Std' AND Num<25 ORDER BY PageSet, Num""" % ", ".join(flist) print sql curs.execute(sql) rows = curs.fetchall() for row in rows: row = dtuple.DatabaseTuple(descr, row) print "Attribute: Name: %s Number: %d" % (row.Name, row.Num or 0) print "Subscript: Name: %s Number: %d" % (row[0], row[1] or 0) print "Mapping: Name: %s Number: %d" % (row["Name"], row["Num"] or 0) conn.close()
def find_concepts(context, search_term): cursor = context.get_cursor() concepts = {} if isinstance(search_term, types.IntType): cursor.execute( """ SELECT umls_terms.umls_concept_id, term, preferred_name, umls_source_id FROM umls_terms, umls_concepts, umls_concepts_sources WHERE umls_concepts.umls_concept_id = %s AND umls_concepts.umls_concept_id = umls_terms.umls_concept_id AND umls_concepts_sources.umls_concept_id = umls_concepts.umls_concept_id GROUP BY umls_concepts.umls_concept_id ORDER BY term, preferred_name """, search_term) else: # Assumes search_term is text if search_term \ and len(search_term) > 0: query_term = search_term.strip().replace(' ', '% ') + '%' cursor.execute( """ SELECT umls_terms.umls_concept_id, term, preferred_name, umls_source_id FROM umls_terms, umls_concepts, umls_concepts_sources WHERE term LIKE %s AND umls_concepts.umls_concept_id = umls_terms.umls_concept_id AND umls_concepts_sources.umls_concept_id = umls_concepts.umls_concept_id GROUP BY umls_concepts.umls_concept_id ORDER BY term, preferred_name """, query_term) fields = [d[0] for d in cursor.description] desc = dtuple.TupleDescriptor([[f] for f in fields]) rows = cursor.fetchall() for row in rows: row = dtuple.DatabaseTuple(desc, row) if not concepts.has_key( (row['umls_concept_id'], row['umls_source_id'])): concept = Concept(uid=row['umls_concept_id']) concept.concept_source_id = row['umls_source_id'] concept.term = row['preferred_name'] concept.synonyms.append(row['term']) concepts[(concept.uid, concept.concept_source_id)] = concept else: concept = concepts[(row['umls_concept_id'], row['umls_source_id'])] if not row['term'] in concept.synonyms: concept.synonyms.append(row['term']) concepts[(concept.uid, concept.concept_source_id)] = concept if not isinstance(search_term, types.IntType): # Try to bump up coarse "relevance" of exact matches concepts_ranked = concepts.values() for concept in concepts_ranked: if concept.term.lower() == search_term.lower()\ or search_term.lower() in [syn.lower() for syn in concept.synonyms]: concepts_ranked.remove(concept) concepts_ranked.insert(0, concept) return concepts_ranked return concepts.values()