def get(self, db, collection=None, id=None): import random from biothings.utils.mongo import get_src_conn get_random = self.get_argument('random', None) != 'false' size = int(self.get_argument('size', 10)) conn = get_src_conn() if collection: if collection == 'fs': import gridfs fs = gridfs.GridFS(conn[db]) out = fs.list() else: collection = conn[db][collection] if id: out = collection.find_one({"_id": id}) elif get_random: cnt = collection.count() num = random.randint(0, max(cnt - size, 0)) out = list(collection.find().skip(num).limit(size)) else: out = list(collection.find().limit(size)) else: #list all collection in this db out = conn[db].collection_names() def date_handler(obj): return obj.isoformat() if hasattr(obj, 'isoformat') else obj _json_data = json.dumps(out, default=date_handler) self.set_header("Content-Type", "application/json; charset=UTF-8") self.write(_json_data)
def prepare(self, state={}): """Sync uploader information with database (or given state dict)""" if self.prepared: return if state: # let's be explicit, _state takes what it wants for k in self._state: self._state[k] = state[k] return self._state["conn"] = get_src_conn() self._state["db"] = self._state["conn"][self.__class__.__database__] self._state["collection"] = self._state["db"][self.collection_name] self._state["src_dump"] = self.prepare_src_dump() self._state["src_master"] = get_src_master() self._state["logger"], self.logfile = self.setup_log() self.data_folder = self.src_doc.get("download", {}).get("data_folder") or \ self.src_doc.get("data_folder") # flag ready self.prepared = True
'uniprot', 'uniprot.uniprot_pdb', # 'uniprot.uniprot_ipi', # IPI is now discontinued, last update is still in the db, but won't be updated. 'uniprot.uniprot_pir' ], 'pharmgkb': ['pharmgkb'], 'reporter': ['reporter'], 'ucsc': ['ucsc.ucsc_exons'], 'exac': ['exac.broadinstitute_exac'], 'cpdb': ['cpdb'], 'reagent': ['reagent'], } __sources__ = None # should be a list defined at runtime conn = get_src_conn() doc_register = {} class GeneDocSourceMaster(dict): '''A class to manage various genedoc data sources.''' __collection__ = DATA_SRC_MASTER_COLLECTION __database__ = DATA_SRC_DATABASE use_dot_notation = True use_schemaless = True structure = { 'name': str, 'timestamp': datetime.datetime, }