def setUp(self):
     settings = startup.read_settings(filename="leak_check_settings.json")
     Log.start(settings.debug)
     self.private = ElasticSearch(settings.private)
     self.public = ElasticSearch(settings.public)
     self.public_comments = ElasticSearch(settings.public_comments)
     self.settings = settings
    def random_sample_of_bugs(self):
        """
        I USE THIS TO FIND BUGS THAT CAUSE MY CODE PROBLEMS.  OF COURSE, IT ONLY WORKS
        WHEN I HAVE A REFERENCE TO COMPARE TO
        """
        NUM_TO_TEST = 100
        MAX_BUG_ID = 900000

        with DB(self.settings.bugzilla) as db:
            candidate = elasticsearch.make_test_instance(
                "candidate", self.settings.candidate)
            reference = ElasticSearch(self.settings.private_bugs_reference)

            #GO FASTER BY STORING LOCAL FILE
            local_cache = File(self.settings.param.temp_dir +
                               "/private_bugs.json")
            if local_cache.exists:
                private_bugs = set(CNV.JSON2object(local_cache.read()))
            else:
                with Timer("get private bugs"):
                    private_bugs = compare_es.get_private_bugs(reference)
                    local_cache.write(CNV.object2JSON(private_bugs))

            while True:
                some_bugs = [
                    b for b in
                    [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)]
                    if b not in private_bugs
                ]

                Log.note("Test with the following bug_ids: {{bugs}}",
                         {"bugs": some_bugs})

                #SETUP RUN PARAMETERS
                param = Struct()
                param.end_time = CNV.datetime2milli(get_current_time(db))
                param.start_time = 0
                param.start_time_str = extract_bugzilla.milli2string(db, 0)
                param.alias_file = self.settings.param.alias_file

                try:
                    with ThreadedQueue(candidate, 100) as output:
                        etl(db, output, param, please_stop=None)

                    #COMPARE ALL BUGS
                    Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
                    found_errors = compare_both(candidate, reference,
                                                self.settings, some_bugs)
                    if found_errors:
                        Log.note("Errors found")
                        break
                    else:
                        pass
                except Exception, e:
                    Log.warning(
                        "Total failure during compare of bugs {{bugs}}",
                        {"bugs": some_bugs}, e)
def get_or_create_index(destination_settings, source):
    #CHECK IF INDEX, OR ALIAS, EXISTS
    es = ElasticSearch(destination_settings)
    aliases = es.get_aliases()

    indexes = [a for a in aliases if a.alias == destination_settings.index]
    if not indexes:
        #CREATE INDEX
        schema = source.get_schema()
        assert schema.settings
        assert schema.mappings
        ElasticSearch.create_index(destination_settings, schema, limit_replicas=True)
    elif len(indexes) > 1:
        Log.error("do not know how to replicate to more than one index")
    elif indexes[0].alias != None:
        destination_settings.alias = destination_settings.index
        destination_settings.index = indexes[0].index

    return ElasticSearch(destination_settings)
Пример #4
0
def test_replication():
    try:
        settings = startup.read_settings(filename="replication_settings.json")
        Log.start(settings.debug)

        source = ElasticSearch(settings.source)
        destination = replicate.get_or_create_index(settings["destination"],
                                                    source)

        replicate.replicate(source, destination, [537285],
                            CNV.string2datetime("19900101", "%Y%m%d"))
    finally:
        Log.stop()
def main(settings):
    #USE A FILE
    if settings.source.filename != None:
        settings.destination.alias = settings.destination.index
        settings.destination.index = ElasticSearch.proto_name(settings.destination.alias)
        schema = CNV.JSON2object(File(settings.source.schema_filename).read())
        if transform_bugzilla.USE_ATTACHMENTS_DOT:
            schema = CNV.JSON2object(CNV.object2JSON(schema).replace("attachments_", "attachments."))

        dest = ElasticSearch.create_index(settings.destination, schema, limit_replicas=True)
        dest.set_refresh_interval(-1)
        extract_from_file(settings.source, dest)
        dest.set_refresh_interval(1)

        dest.delete_all_but(settings.destination.alias, settings.destination.index)
        dest.add_alias(settings.destination.alias)
        return

    # SYNCH WITH source ES INDEX
    source=ElasticSearch(settings.source)
    destination=get_or_create_index(settings["destination"], source)

    # GET LAST UPDATED
    time_file = File(settings.param.last_replication_time)
    from_file = None
    if time_file.exists:
        from_file = CNV.milli2datetime(CNV.value2int(time_file.read()))
    from_es = get_last_updated(destination)
    last_updated = nvl(MIN(from_file, from_es), CNV.milli2datetime(0))
    current_time = datetime.utcnow()

    pending = get_pending(source, last_updated)
    with ThreadedQueue(destination, size=1000) as data_sink:
        replicate(source, data_sink, pending, last_updated)

    # RECORD LAST UPDATED
    time_file.write(unicode(CNV.datetime2milli(current_time)))
Пример #6
0
def setup_es(settings, db, es, es_comments):
    """
    SETUP ES CONNECTIONS TO REFLECT IF WE ARE RESUMING, INCREMENTAL, OR STARTING OVER
    """
    current_run_time = get_current_time(db)

    if File(settings.param.first_run_time).exists and File(
            settings.param.last_run_time).exists:
        # INCREMENTAL UPDATE; DO NOT MAKE NEW INDEX
        last_run_time = long(File(settings.param.last_run_time).read())
        if not es:
            es = ElasticSearch(settings.es)
            es_comments = ElasticSearch(settings.es_comments)
    elif File(settings.param.first_run_time).exists:
        # DO NOT MAKE NEW INDEX, CONTINUE INITIAL FILL
        try:
            last_run_time = 0
            current_run_time = long(File(settings.param.first_run_time).read())
            if not es:
                if not settings.es.alias:
                    temp = ElasticSearch(settings.es).get_proto(
                        settings.es.index)
                    settings.es.alias = settings.es.index
                    settings.es.index = temp.last()
                es = ElasticSearch(settings.es)
                es.set_refresh_interval(
                    1
                )  #REQUIRED SO WE CAN SEE WHAT BUGS HAVE BEEN LOADED ALREADY

                if not settings.es_comments.alias:
                    temp = ElasticSearch(settings.es_comments).get_proto(
                        settings.es_comments.index)
                    settings.es_comments.alias = settings.es_comments.index
                    settings.es_comments.index = temp.last()
                es_comments = ElasticSearch(settings.es_comments)
        except Exception, e:
            Log.warning("can not resume ETL, restarting", e)
            File(settings.param.first_run_time).delete()
            return setup_es(settings, db, es, es_comments)