def main(settings):
    #USE A FILE
    if settings.source.filename != None:
        settings.destination.alias = settings.destination.index
        settings.destination.index = ElasticSearch.proto_name(settings.destination.alias)
        schema = CNV.JSON2object(File(settings.source.schema_filename).read())
        if transform_bugzilla.USE_ATTACHMENTS_DOT:
            schema = CNV.JSON2object(CNV.object2JSON(schema).replace("attachments_", "attachments."))

        dest = ElasticSearch.create_index(settings.destination, schema, limit_replicas=True)
        dest.set_refresh_interval(-1)
        extract_from_file(settings.source, dest)
        dest.set_refresh_interval(1)

        dest.delete_all_but(settings.destination.alias, settings.destination.index)
        dest.add_alias(settings.destination.alias)
        return

    # SYNCH WITH source ES INDEX
    source=ElasticSearch(settings.source)
    destination=get_or_create_index(settings["destination"], source)

    # GET LAST UPDATED
    time_file = File(settings.param.last_replication_time)
    from_file = None
    if time_file.exists:
        from_file = CNV.milli2datetime(CNV.value2int(time_file.read()))
    from_es = get_last_updated(destination)
    last_updated = nvl(MIN(from_file, from_es), CNV.milli2datetime(0))
    current_time = datetime.utcnow()

    pending = get_pending(source, last_updated)
    with ThreadedQueue(destination, size=1000) as data_sink:
        replicate(source, data_sink, pending, last_updated)

    # RECORD LAST UPDATED
    time_file.write(unicode(CNV.datetime2milli(current_time)))
Пример #2
0
            Log.warning("can not resume ETL, restarting", e)
            File(settings.param.first_run_time).delete()
            return setup_es(settings, db, es, es_comments)
    else:
        # START ETL FROM BEGINNING, MAKE NEW INDEX
        last_run_time = 0
        if not es:
            # BUG VERSIONS
            schema = File(settings.es.schema_file).read()
            if transform_bugzilla.USE_ATTACHMENTS_DOT:
                schema = schema.replace("attachments_", "attachments\\.")
            schema=CNV.JSON2object(schema, paths=True)
            schema.settings=jsons.expand_dot(schema.settings)
            if not settings.es.alias:
                settings.es.alias = settings.es.index
                settings.es.index = ElasticSearch.proto_name(settings.es.alias)
            es = ElasticSearch.create_index(settings.es, schema, limit_replicas=True)

            # BUG COMMENTS
            comment_schema = File(settings.es_comments.schema_file).read()
            comment_schema=CNV.JSON2object(comment_schema, paths=True)
            comment_schema.settings=jsons.expand_dot(comment_schema.settings)
            if not settings.es_comments.alias:
                settings.es_comments.alias = settings.es_comments.index
                settings.es_comments.index = ElasticSearch.proto_name(settings.es_comments.alias)
            es_comments = ElasticSearch.create_index(settings.es_comments, comment_schema, limit_replicas=True)

        File(settings.param.first_run_time).write(unicode(CNV.datetime2milli(current_run_time)))

    return current_run_time, es, es_comments, last_run_time
Пример #3
0
            Log.warning("can not resume ETL, restarting", e)
            File(settings.param.first_run_time).delete()
            return setup_es(settings, db, es, es_comments)
    else:
        # START ETL FROM BEGINNING, MAKE NEW INDEX
        last_run_time = 0
        if not es:
            # BUG VERSIONS
            schema = File(settings.es.schema_file).read()
            if transform_bugzilla.USE_ATTACHMENTS_DOT:
                schema = schema.replace("attachments_", "attachments\\.")
            schema = CNV.JSON2object(schema, paths=True)
            schema.settings = jsons.expand_dot(schema.settings)
            if not settings.es.alias:
                settings.es.alias = settings.es.index
                settings.es.index = ElasticSearch.proto_name(settings.es.alias)
            es = ElasticSearch.create_index(settings.es,
                                            schema,
                                            limit_replicas=True)

            # BUG COMMENTS
            comment_schema = File(settings.es_comments.schema_file).read()
            comment_schema = CNV.JSON2object(comment_schema, paths=True)
            comment_schema.settings = jsons.expand_dot(comment_schema.settings)
            if not settings.es_comments.alias:
                settings.es_comments.alias = settings.es_comments.index
                settings.es_comments.index = ElasticSearch.proto_name(
                    settings.es_comments.alias)
            es_comments = ElasticSearch.create_index(settings.es_comments,
                                                     comment_schema,
                                                     limit_replicas=True)