def create_backend(db_col_names,name_only=False,**kwargs): """ Guess what's inside 'db_col_names' and return the corresponding backend. - It could be a string (by default, will lookup a mongo collection in target database) - or a tuple("target|src","col_name") - or a ("mongodb://*****:*****@host","db","col_name") URI. - or a ("es_host:port","index_name","doc_type") If name_only is true, just return the name uniquely identifying the collection or index URI connection. """ col = None db = None is_mongo = True if type(db_col_names) == str: db = mongo.get_target_db() col = db[db_col_names] # normalize params db_col_names = ["%s:%s" % (db.client.HOST,db.client.PORT),db.name,col.name] elif db_col_names[0].startswith("mongodb://"): assert len(db_col_names) == 3, "Missing connection information for %s" % repr(db_col_names) conn = mongo.MongoClient(db_col_names[0]) db = conn[db_col_names[1]] col = db[db_col_names[2]] # normalize params db_col_names = ["%s:%s" % (db.client.HOST,db.client.PORT),db.name,col.name] elif len(db_col_names) == 3 and ":" in db_col_names[0]: is_mongo = False idxr = ESIndexer(index=db_col_names[1],doc_type=db_col_names[2],es_host=db_col_names[0],**kwargs) db = idxr col = db_col_names[1] else: assert len(db_col_names) == 2, "Missing connection information for %s" % repr(db_col_names) db = db_col_names[0] == "target" and mongo.get_target_db() or mongo.get_src_db() col = db[db_col_names[1]] # normalize params (0:host, 1:port) db_col_names = ["%s:%s" % (db.client.address[0],db.client.address[1]),db.name,col.name] assert not col is None, "Could not create collection object from %s" % repr(db_col_names) if name_only: if is_mongo: return "mongo_%s_%s_%s" % (db_col_names[0].replace(":","_"), db_col_names[1],db_col_names[2]) else: return "es_%s_%s_%s" % (db_col_names[0].replace(":","_"), db_col_names[1],db_col_names[2]) else: if is_mongo: return DocMongoBackend(db,col) else: return DocESBackend(db)
def do(index): def snapshot_launched(f): try: self.logger.info("Snapshot launched: %s" % f.result()) except Exception as e: self.logger.error("Error while lauching snapshot: %s" % e) fut.set_exception(e) if "snapshot" in steps: pinfo = { "category": "index", "source": index, "step": "snapshot", "description": es_snapshot_host } self.logger.info( "Creating snapshot for index '%s' on host '%s', repository '%s'" % (index, es_snapshot_host, btconfig.SNAPSHOT_REPOSITORY)) job = yield from self.job_manager.defer_to_thread( pinfo, partial(idxr.snapshot, btconfig.SNAPSHOT_REPOSITORY, snapshot, mode=mode)) job.add_done_callback(snapshot_launched) yield from job while True: state = get_status() if state in ["INIT", "IN_PROGRESS", "STARTED"]: yield from asyncio.sleep( getattr(btconfig, "MONITOR_SNAPSHOT_DELAY", 60)) else: if state == "SUCCESS": # if "meta" is required, it will set the result later if not "meta" in steps: fut.set_result(state) self.logger.info("Snapshot '%s' successfully created (host: '%s', repository: '%s')" % \ (snapshot,es_snapshot_host,btconfig.SNAPSHOT_REPOSITORY),extra={"notify":True}) else: e = IndexerException("Snapshot '%s' failed: %s" % (snapshot, state)) fut.set_exception(e) self.logger.error("Failed creating snapshot '%s' (host: %s, repository: %s), state: %s" % \ (snapshot,es_snapshot_host,btconfig.SNAPSHOT_REPOSITORY,state),extra={"notify":True}) raise e break if "meta" in steps: try: esb = DocESBackend(idxr) self.logger.info( "Generating JSON metadata for full release '%s'" % esb.version) repo = idxr._es.snapshot.get_repository( btconfig.URL_SNAPSHOT_REPOSITORY) # generate json metadata about this diff release full_meta = { "type": "full", "build_version": esb.version, "app_version": None, "metadata": { "repository": repo, "snapshot_name": snapshot } } assert esb.version, "Can't retrieve a version from index '%s'" % index build_info = "%s.json" % esb.version build_info_path = os.path.join(btconfig.DIFF_PATH, build_info) json.dump(full_meta, open(build_info_path, "w")) # override lastmodified header with our own timestamp local_ts = dtparse( idxr.get_mapping_meta()["_meta"]["timestamp"]) utc_epoch = str(int(time.mktime(local_ts.timetuple()))) # it's a full release, but all build info metadata (full, incremental) all go # to the diff bucket (this is the main entry) s3key = os.path.join(btconfig.S3_DIFF_FOLDER, build_info) aws.send_s3_file(build_info_path, s3key, aws_key=btconfig.AWS_KEY, aws_secret=btconfig.AWS_SECRET, s3_bucket=btconfig.S3_DIFF_BUCKET, metadata={"lastmodified": utc_epoch}, overwrite=True) url = aws.get_s3_url(s3key, aws_key=btconfig.AWS_KEY, aws_secret=btconfig.AWS_SECRET, s3_bucket=btconfig.S3_DIFF_BUCKET) self.logger.info( "Full release metadata published for version: '%s'" % url) publish_data_version(esb.version) self.logger.info("Registered version '%s'" % (esb.version)) fut.set_result("SUCCESS") except Exception as e: self.logger.error( "Error while publishing metadata for snapshot '%s': %s" % (snapshot, e)) fut.set_exception(e)
def create_backend(db_col_names, name_only=False, follow_ref=False, **kwargs): """ Guess what's inside 'db_col_names' and return the corresponding backend. - It could be a string (will first check for an src_build doc to check a backend_url field, if nothing there, will lookup a mongo collection in target database) - or a tuple("target|src","col_name") - or a ("mongodb://*****:*****@host","db","col_name") URI. - or a ("es_host:port","index_name","doc_type") If name_only is true, just return the name uniquely identifying the collection or index URI connection. """ col = None db = None is_mongo = True if type(db_col_names) == str: # first check build doc, if there's backend_url key, we'll use it instead of # direclty using db_col_names as target collection (see LinkDataBuilder) bdoc = get_src_build().find_one({"_id": db_col_names}) if follow_ref and bdoc and bdoc.get( "backend_url") and bdoc["backend_url"] != db_col_names: return create_backend(bdoc["backend_url"], name_only=name_only, follow_ref=follow_ref, **kwargs) else: db = mongo.get_target_db() col = db[db_col_names] # normalize params db_col_names = [ "%s:%s" % (db.client.HOST, db.client.PORT), db.name, col.name ] elif db_col_names[0].startswith("mongodb://"): assert len( db_col_names ) == 3, "Missing connection information for %s" % repr(db_col_names) conn = mongo.MongoClient(db_col_names[0]) db = conn[db_col_names[1]] col = db[db_col_names[2]] # normalize params db_col_names = [ "%s:%s" % (db.client.HOST, db.client.PORT), db.name, col.name ] elif len(db_col_names) == 3 and ":" in db_col_names[0]: is_mongo = False idxr = ESIndexer(index=db_col_names[1], doc_type=db_col_names[2], es_host=db_col_names[0], **kwargs) db = idxr col = db_col_names[1] else: assert len( db_col_names ) == 2, "Missing connection information for %s" % repr(db_col_names) db = db_col_names[0] == "target" and mongo.get_target_db( ) or mongo.get_src_db() col = db[db_col_names[1]] # normalize params (0:host, 1:port) db_col_names = [ "%s:%s" % (db.client.address[0], db.client.address[1]), db.name, col.name ] assert col is not None, "Could not create collection object from %s" % repr( db_col_names) if name_only: if is_mongo: return "mongo_%s_%s_%s" % (db_col_names[0].replace( ":", "_"), db_col_names[1], db_col_names[2]) else: return "es_%s_%s_%s" % (db_col_names[0].replace( ":", "_"), db_col_names[1], db_col_names[2]) else: if is_mongo: return DocMongoBackend(db, col) else: return DocESBackend(db)