def get_or_create_index(destination_settings, source): #CHECK IF INDEX, OR ALIAS, EXISTS es = ElasticSearch(destination_settings) aliases = es.get_aliases() indexes = [a for a in aliases if a.alias == destination_settings.index] if not indexes: #CREATE INDEX schema = source.get_schema() assert schema.settings assert schema.mappings ElasticSearch.create_index(destination_settings, schema, limit_replicas=True) elif len(indexes) > 1: Log.error("do not know how to replicate to more than one index") elif indexes[0].alias != None: destination_settings.alias = destination_settings.index destination_settings.index = indexes[0].index return ElasticSearch(destination_settings)
def open_test_instance(name, settings): if settings.filename: Log.note("Using {{filename}} as {{type}}", { "filename": settings.filename, "type": name }) return Fake_ES(settings) else: Log.note("Using ES cluster at {{host}} as {{type}}", { "host": settings.host, "type": name }) ElasticSearch.delete_index(settings) schema = CNV.JSON2object(File(settings.schema_file).read(), flexible=True, paths=True) es = ElasticSearch.create_index(settings, schema, limit_replicas=True) return es
def main(settings): #USE A FILE if settings.source.filename != None: settings.destination.alias = settings.destination.index settings.destination.index = ElasticSearch.proto_name(settings.destination.alias) schema = CNV.JSON2object(File(settings.source.schema_filename).read()) if transform_bugzilla.USE_ATTACHMENTS_DOT: schema = CNV.JSON2object(CNV.object2JSON(schema).replace("attachments_", "attachments.")) dest = ElasticSearch.create_index(settings.destination, schema, limit_replicas=True) dest.set_refresh_interval(-1) extract_from_file(settings.source, dest) dest.set_refresh_interval(1) dest.delete_all_but(settings.destination.alias, settings.destination.index) dest.add_alias(settings.destination.alias) return # SYNCH WITH source ES INDEX source=ElasticSearch(settings.source) destination=get_or_create_index(settings["destination"], source) # GET LAST UPDATED time_file = File(settings.param.last_replication_time) from_file = None if time_file.exists: from_file = CNV.milli2datetime(CNV.value2int(time_file.read())) from_es = get_last_updated(destination) last_updated = nvl(MIN(from_file, from_es), CNV.milli2datetime(0)) current_time = datetime.utcnow() pending = get_pending(source, last_updated) with ThreadedQueue(destination, size=1000) as data_sink: replicate(source, data_sink, pending, last_updated) # RECORD LAST UPDATED time_file.write(unicode(CNV.datetime2milli(current_time)))
File(settings.param.first_run_time).delete() return setup_es(settings, db, es, es_comments) else: # START ETL FROM BEGINNING, MAKE NEW INDEX last_run_time = 0 if not es: # BUG VERSIONS schema = File(settings.es.schema_file).read() if transform_bugzilla.USE_ATTACHMENTS_DOT: schema = schema.replace("attachments_", "attachments\\.") schema=CNV.JSON2object(schema, paths=True) schema.settings=jsons.expand_dot(schema.settings) if not settings.es.alias: settings.es.alias = settings.es.index settings.es.index = ElasticSearch.proto_name(settings.es.alias) es = ElasticSearch.create_index(settings.es, schema, limit_replicas=True) # BUG COMMENTS comment_schema = File(settings.es_comments.schema_file).read() comment_schema=CNV.JSON2object(comment_schema, paths=True) comment_schema.settings=jsons.expand_dot(comment_schema.settings) if not settings.es_comments.alias: settings.es_comments.alias = settings.es_comments.index settings.es_comments.index = ElasticSearch.proto_name(settings.es_comments.alias) es_comments = ElasticSearch.create_index(settings.es_comments, comment_schema, limit_replicas=True) File(settings.param.first_run_time).write(unicode(CNV.datetime2milli(current_run_time))) return current_run_time, es, es_comments, last_run_time
return setup_es(settings, db, es, es_comments) else: # START ETL FROM BEGINNING, MAKE NEW INDEX last_run_time = 0 if not es: # BUG VERSIONS schema = File(settings.es.schema_file).read() if transform_bugzilla.USE_ATTACHMENTS_DOT: schema = schema.replace("attachments_", "attachments\\.") schema = CNV.JSON2object(schema, paths=True) schema.settings = jsons.expand_dot(schema.settings) if not settings.es.alias: settings.es.alias = settings.es.index settings.es.index = ElasticSearch.proto_name(settings.es.alias) es = ElasticSearch.create_index(settings.es, schema, limit_replicas=True) # BUG COMMENTS comment_schema = File(settings.es_comments.schema_file).read() comment_schema = CNV.JSON2object(comment_schema, paths=True) comment_schema.settings = jsons.expand_dot(comment_schema.settings) if not settings.es_comments.alias: settings.es_comments.alias = settings.es_comments.index settings.es_comments.index = ElasticSearch.proto_name( settings.es_comments.alias) es_comments = ElasticSearch.create_index(settings.es_comments, comment_schema, limit_replicas=True) File(settings.param.first_run_time).write(