def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds

        self.es = Cluster(kwargs).get_or_create_index(
            schema=json2value(value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            typed=True,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)
class TextLog_usingElasticSearch(TextLog):

    @use_settings
    def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        self.es = Cluster(settings).get_or_create_index(
            schema=convert.json2value(convert.value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            tjson=False,
            settings=settings
        )
        self.es.add_alias("debug")
        self.queue = self.es.threaded_queue(max_size=max_size, batch_size=batch_size)

    def write(self, template, params):
        if params.get("template"):
            # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
            self.queue.add({"value": params})
        else:
            template = strings.limit(template, 2000)
            self.queue.add({"value": {"template": template, "params": params}}, timeout=3*MINUTE)
        return self

    def stop(self):
        try:
            self.queue.add(Thread.STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT
        except Exception, e:
            pass

        try:
            self.queue.close()
        except Exception, f:
            pass
Пример #3
0
 def __init__(self,
              host,
              index,
              type="query",
              max_size=10,
              batch_size=10,
              kwargs=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     es = Cluster(kwargs).get_or_create_index(schema=convert.json2value(
         convert.value2json(SCHEMA), leaves=True),
                                              limit_replicas=True,
                                              kwargs=kwargs)
     #ENSURE THE TYPE EXISTS FOR PROBING
     try:
         es.add({
             "id": "dummy",
             "value": {
                 "hash": "dummy",
                 "create_time": Date.now(),
                 "last_used": Date.now(),
                 "query": {}
             }
         })
     except Exception, e:
         Log.warning("Problem saving query", cause=e)
Пример #4
0
def open_test_instance(name, filename=None, es=None, kwargs=None):
    if filename != None:
        Log.note(
            "Using {{filename}} as {{type}}",
            filename=filename,
            type=name
        )
        return FakeES(filename=filename)
    else:
        Log.note(
            "Using ES cluster at {{host}} as {{type}}",
            host=es.host,
            type=name
        )
        cluster = Cluster(es)
        try:
            old_index = cluster.get_index(es)
            cluster.delete_index(old_index.settings.index)
        except Exception as e:
            if "Can not find index" not in e:
                Log.error("unexpected", cause=e)

        output = cluster.create_index(limit_replicas=True, limit_replicas_warning=False, kwargs=es)
        output.delete_all_but_self()
        output.add_alias(es.index)
        return output
Пример #5
0
class TextLog_usingElasticSearch(TextLog):
    @use_settings
    def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        self.es = Cluster(settings).get_or_create_index(
            schema=convert.json2value(convert.value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            tjson=True,
            settings=settings
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(settings.alias, settings.index))
        self.queue = Queue("debug logs to es", max=max_size, silent=True)
        self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
        self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
        Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        if params.get("template"):
            # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
            self.queue.add({"value": params})
        else:
            template = strings.limit(template, 2000)
            self.queue.add({"value": {"template": template, "params": params}}, timeout=3 * MINUTE)
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                Thread.sleep(seconds=1)
                messages = wrap(self.queue.pop_all())
                if messages:
                    # for m in messages:
                    #     m.value.params = leafer(m.value.params)
                    #     m.value.error = leafer(m.value.error)
                    for g, mm in jx.groupby(messages, size=self.batch_size):
                        self.es.extend(mm)
                    bad_count = 0
            except Exception, e:
                Log.warning("Problem inserting logs into ES", cause=e)
                bad_count += 1
                if bad_count > 5:
                    break
        Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index)

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Thread.sleep(seconds=1)
                self.queue.pop_all()
            except Exception, e:
                Log.warning("Should not happen", cause=e)
class TextLog_usingElasticSearch(TextLog):
    @use_settings
    def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        self.es = Cluster(settings).get_or_create_index(
            schema=convert.json2value(convert.value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            tjson=True,
            settings=settings,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(settings.alias, settings.index))
        self.queue = Queue("debug logs to es", max=max_size, silent=True)
        self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
        self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
        Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        if params.get("template"):
            # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
            self.queue.add({"value": params})
        else:
            template = strings.limit(template, 2000)
            self.queue.add({"value": {"template": template, "params": params}}, timeout=3 * MINUTE)
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                Thread.sleep(seconds=1)
                messages = wrap(self.queue.pop_all())
                if messages:
                    # for m in messages:
                    #     m.value.params = leafer(m.value.params)
                    #     m.value.error = leafer(m.value.error)
                    for g, mm in jx.groupby(messages, size=self.batch_size):
                        self.es.extend(mm)
                    bad_count = 0
            except Exception, e:
                Log.warning("Problem inserting logs into ES", cause=e)
                bad_count += 1
                if bad_count > 5:
                    break
        Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index)

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Thread.sleep(seconds=1)
                self.queue.pop_all()
            except Exception, e:
                Log.warning("Should not happen", cause=e)
Пример #7
0
 def __init__(self,
              host,
              index,
              type="log",
              max_size=1000,
              batch_size=100,
              settings=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     self.es = Cluster(settings).get_or_create_index(
         schema=convert.json2value(convert.value2json(SCHEMA), paths=True),
         limit_replicas=True,
         settings=settings)
     self.queue = self.es.threaded_queue(max_size=max_size,
                                         batch_size=batch_size)
Пример #8
0
 def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     self.es = Cluster(settings).get_or_create_index(
         schema=convert.json2value(convert.value2json(SCHEMA), leaves=True),
         limit_replicas=True,
         tjson=True,
         settings=settings
     )
     self.batch_size = batch_size
     self.es.add_alias(coalesce(settings.alias, settings.index))
     self.queue = Queue("debug logs to es", max=max_size, silent=True)
     self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
     self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
     Thread.run("add debug logs to es", self._insert_loop)
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds
        kwargs.host = Random.sample(listwrap(host), 1)[0]

        schema = json2value(value2json(SCHEMA), leaves=True)
        schema.mappings[type].properties["~N~"].type = "nested"
        self.es = Cluster(kwargs).get_or_create_index(
            schema=schema,
            limit_replicas=True,
            typed=True,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)
class Log_usingElasticSearch(BaseLog):

    @use_settings
    def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        self.es = Cluster(settings).get_or_create_index(
            schema=convert.json2value(convert.value2json(SCHEMA), paths=True),
            limit_replicas=True,
            settings=settings
        )
        self.queue = self.es.threaded_queue(max_size=max_size, batch_size=batch_size)

    def write(self, template, params):
        try:
            if params.get("template"):
                # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
                self.queue.add({"value": params})
            else:
                if len(template) > 2000:
                    template = template[:1997] + "..."
                self.queue.add({"value": {"template": template, "params": params}})
            return self
        except Exception, e:
            raise e  # OH NO!
Пример #11
0
class Log_usingElasticSearch(BaseLog):
    @use_settings
    def __init__(self,
                 host,
                 index,
                 type="log",
                 max_size=1000,
                 batch_size=100,
                 settings=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        self.es = Cluster(settings).get_or_create_index(
            schema=convert.json2value(convert.value2json(SCHEMA), paths=True),
            limit_replicas=True,
            settings=settings)
        self.queue = self.es.threaded_queue(max_size=max_size,
                                            batch_size=batch_size)

    def write(self, template, params):
        try:
            if params.get("template"):
                # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
                self.queue.add({"value": params})
            else:
                if len(template) > 2000:
                    template = template[:1997] + "..."
                self.queue.add(
                    {"value": {
                        "template": template,
                        "params": params
                    }})
            return self
        except Exception, e:
            raise e  # OH NO!
 def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     self.es = Cluster(settings).get_or_create_index(
         schema=convert.json2value(convert.value2json(SCHEMA), paths=True),
         limit_replicas=True,
         settings=settings
     )
     self.queue = self.es.threaded_queue(max_size=max_size, batch_size=batch_size)
Пример #13
0
 def __init__(self,
              host,
              index,
              type=DATA_TYPE,
              max_size=10,
              batch_size=10,
              kwargs=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     es = Cluster(kwargs).get_or_create_index(schema=json2value(
         convert.value2json(SCHEMA), leaves=True),
                                              limit_replicas=True,
                                              typed=False,
                                              kwargs=kwargs)
     es.add_alias(index)
     self.queue = es.threaded_queue(max_size=max_size,
                                    batch_size=batch_size,
                                    period=1)
     self.es = jx_elasticsearch.new_instance(es.settings)
Пример #14
0
def open_test_instance(name, filename=None, es=None, kwargs=None):
    if filename != None:
        Log.note(
            "Using {{filename}} as {{type}}",
            filename=filename,
            type=name
        )
        return FakeES(filename=filename)
    else:
        Log.note(
            "Using ES cluster at {{host}} as {{type}}",
            host=es.host,
            type=name
        )
        cluster = Cluster(es)
        try:
            old_index = cluster.get_index(es)
            cluster.delete_index(old_index.settings.index)
        except Exception as e:
            if "Can not find index" not in e:
                Log.error("unexpected", cause=e)

        output = cluster.create_index(limit_replicas=True, limit_replicas_warning=False, kwargs=es)
        output.delete_all_but_self()
        output.add_alias(es.index)
        return output
 def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     self.es = Cluster(settings).get_or_create_index(
         schema=convert.json2value(convert.value2json(SCHEMA), leaves=True),
         limit_replicas=True,
         tjson=True,
         settings=settings
     )
     self.batch_size=batch_size
     self.es.add_alias("debug")
     self.queue = Queue("debug logs to es", max=max_size, silent=True)
     Thread.run("add debug logs to es", self._insert_loop)
Пример #16
0
def open_test_instance(name, settings):
    if settings.filename:
        Log.note("Using {{filename}} as {{type}}",
                 filename=settings.filename,
                 type=name)
        return FakeES(settings)
    else:
        Log.note("Using ES cluster at {{host}} as {{type}}",
                 host=settings.host,
                 type=name)
        cluster = Cluster(settings)
        try:
            old_index = cluster.get_index(kwargs=settings)
            old_index.delete()
        except Exception as e:
            if "Can not find index" not in e:
                Log.error("unexpected", cause=e)

        es = cluster.create_index(limit_replicas=True,
                                  limit_replicas_warning=False,
                                  kwargs=settings)
        es.delete_all_but_self()
        es.add_alias(settings.index)
        return es
Пример #17
0
def open_test_instance(name, settings):
    if settings.filename:
        Log.note("Using {{filename}} as {{type}}",
                 filename=settings.filename,
                 type=name)
        return Fake_ES(settings)
    else:
        Log.note("Using ES cluster at {{host}} as {{type}}",
                 host=settings.host,
                 type=name)

        Index(settings).delete()

        es = Cluster(settings).create_index(settings, limit_replicas=True)
        return es
 def __init__(self, host, index, port=9200, type="log", max_size=1000, batch_size=100, kwargs=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     self.es = Cluster(kwargs).get_or_create_index(
         schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
         limit_replicas=True,
         tjson=True,
         kwargs=kwargs
     )
     self.batch_size = batch_size
     self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
     self.queue = Queue("debug logs to es", max=max_size, silent=True)
     self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
     self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
     Thread.run("add debug logs to es", self._insert_loop)
Пример #19
0
def main():
    settings = startup.read_settings(defs={
       "name": ["--restart", "--reset", "--redo"],
       "help": "force a reprocessing of all data",
       "action": "store_true",
       "dest": "restart"
    })
    Log.start(settings.debug)

    try:
        with startup.SingleInstance(flavor_id=settings.args.filename):
            if settings.args.restart:
                reviews = Cluster(settings.destination).create_index(settings.destination)
            else:
                reviews = Cluster(settings.destination).get_proto(settings.destination)

            bugs = Cluster(settings.source).get_index(settings.source)

            with FromES(bugs) as esq:
                es_max_bug = esq.query({
                    "from": "private_bugs",
                    "select": {"name": "max_bug", "value": "bug_id", "aggregate": "maximum"}
                })

            #PROBE WHAT RANGE OF BUGS IS LEFT TO DO (IN EVENT OF FAILURE)
            with FromES(reviews) as esq:
                es_min_bug = esq.query({
                    "from": "reviews",
                    "select": {"name": "min_bug", "value": "bug_id", "aggregate": "minimum"}
                })

            batch_size = coalesce(bugs.settings.batch_size, settings.size, 1000)
            threads = coalesce(settings.threads, 4)
            Log.note(str(settings.min_bug))
            min_bug = int(coalesce(settings.min_bug, 0))
            max_bug = int(coalesce(settings.max_bug, Math.min(es_min_bug + batch_size * threads, es_max_bug)))

            with ThreadedQueue(reviews, batch_size=coalesce(reviews.settings.batch_size, 100)) as sink:
                func = functools.partial(full_etl, settings, sink)
                with Multithread(func, threads=threads) as m:
                    m.inbound.silent = True
                    Log.note("bugs from {{min}} to {{max}}, step {{step}}", {
                        "min": min_bug,
                        "max": max_bug,
                        "step": batch_size
                    })
                    m.execute(reversed([{"bugs": range(s, e)} for s, e in qb.intervals(min_bug, max_bug, size=1000)]))

            if settings.args.restart:
                reviews.add_alias()
                reviews.delete_all_but_self()
    finally:
        Log.stop()
Пример #20
0
def main():
    try:
        settings = startup.read_settings(defs=[{
            "name": ["--no_restart", "--no_reset", "--no_redo", "--norestart", "--noreset", "--noredo"],
            "help": "do not allow creation of new index (for debugging rouge resets)",
            "action": "store_true",
            "dest": "no_restart"
        }, {
            "name": ["--restart", "--reset", "--redo"],
            "help": "force a reprocessing of all data",
            "action": "store_true",
            "dest": "restart"
        }, {
            "name": ["--file", "--scan_file", "--scanfile", "--use_file", "--usefile"],
            "help": "scan file for missing ids",
            "action": "store_true",
            "dest": "scan_file"
        }, {
            "name": ["--nofile", "--no_file", "--no-file"],
            "help": "do not scan file for missing ids",
            "action": "store_false",
            "dest": "scan_file"
        }])
        Log.start(settings.debug)

        with startup.SingleInstance(flavor_id=settings.args.filename):
            settings.production.threads = nvl(settings.production.threads, 1)
            settings.param.output_file = nvl(settings.param.output_file, "./results/raw_json_blobs.tab")

            transformer = DZ_to_ES(settings.pushlog)

            #RESET ONLY IF NEW Transform IS USED
            if settings.args.restart:
                es = Cluster(settings.elasticsearch).create_index(settings.elasticsearch)
                es.add_alias()
                es.delete_all_but_self()
                extract_from_datazilla_using_id(es, settings, transformer)
            else:
                es = Cluster(settings.elasticsearch).get_or_create_index(settings.elasticsearch)
                extract_from_datazilla_using_id(es, settings, transformer)
    except Exception, e:
        Log.error("Problem with etl", e)
class StructuredLogger_usingElasticSearch(StructuredLogger):
    @override
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds

        self.es = Cluster(kwargs).get_or_create_index(
            schema=json2value(value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            typed=True,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        try:
            params.template = strings.limit(params.template, 2000)
            params.format = None
            self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60)
        except Exception as e:
            sys.stdout.write(text_type(Except.wrap(e)))
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                messages = wrap(self.queue.pop_all())
                if not messages:
                    Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                    continue

                for g, mm in jx.groupby(messages, size=self.batch_size):
                    scrubbed = []
                    for i, message in enumerate(mm):
                        if message is THREAD_STOP:
                            please_stop.go()
                            continue
                        try:
                            messages = flatten_causal_chain(message.value)
                            scrubbed.append(
                                {"value": [_deep_json_to_string(m, depth=3) for m in messages]}
                            )
                        except Exception as e:
                            Log.warning("Problem adding to scrubbed list", cause=e)

                    self.es.extend(scrubbed)
                    bad_count = 0
            except Exception as f:
                Log.warning("Problem inserting logs into ES", cause=f)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning(
                        "Given up trying to write debug logs to ES index {{index}}",
                        index=self.es.settings.index,
                    )
                Till(seconds=PAUSE_AFTER_BAD_INSERT).wait()

        self.es.flush()

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                self.queue.pop_all()
            except Exception as e:
                Log.warning("Should not happen", cause=e)

    def stop(self):
        with suppress_exception:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT

        with suppress_exception:
            self.queue.close()
        self.worker.join()
Пример #22
0
class StructuredLogger_usingElasticSearch(StructuredLogger):
    @override
    def __init__(self,
                 host,
                 index,
                 port=9200,
                 type="log",
                 max_size=1000,
                 batch_size=100,
                 kwargs=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        self.es = Cluster(kwargs).get_or_create_index(
            schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            tjson=True,
            kwargs=kwargs)
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=max_size, silent=True)
        self.es.settings.retry.times = coalesce(self.es.settings.retry.times,
                                                3)
        self.es.settings.retry.sleep = Duration(
            coalesce(self.es.settings.retry.sleep, MINUTE))
        Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        if params.get("template"):
            # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
            self.queue.add({"value": params})
        else:
            template = strings.limit(template, 2000)
            self.queue.add({"value": {
                "template": template,
                "params": params
            }},
                           timeout=3 * MINUTE)
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                Till(seconds=1).wait()
                messages = wrap(self.queue.pop_all())
                if not messages:
                    continue

                for g, mm in jx.groupby(messages, size=self.batch_size):
                    scrubbed = []
                    try:
                        for i, message in enumerate(mm):
                            if message is THREAD_STOP:
                                please_stop.go()
                                return
                            scrubbed.append(
                                _deep_json_to_string(message, depth=3))
                    finally:
                        self.es.extend(scrubbed)
                    bad_count = 0
            except Exception as e:
                Log.warning("Problem inserting logs into ES", cause=e)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning(
                        "Given up trying to write debug logs to ES index {{index}}",
                        index=self.es.settings.index)
                Till(seconds=30).wait()

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Till(seconds=1).wait()
                self.queue.pop_all()
            except Exception as e:
                Log.warning("Should not happen", cause=e)

    def stop(self):
        with suppress_exception:
            self.queue.add(
                THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT

        with suppress_exception:
            self.queue.close()
Пример #23
0

container_types = Data(elasticsearch=ESUtils, )

try:
    # read_alternate_settings
    filename = os.environ.get("TEST_CONFIG")
    if filename:
        test_jx.global_settings = mo_json_config.get("file://" + filename)
    else:
        Log.alert(
            "No TEST_CONFIG environment variable to point to config file.  Using "
            + DEFAULT_TEST_CONFIG)
        test_jx.global_settings = mo_json_config.get("file://" +
                                                     DEFAULT_TEST_CONFIG)
    constants.set(test_jx.global_settings.constants)
    Log.start(test_jx.global_settings.debug)

    if not test_jx.global_settings.use:
        Log.error('Must have a {"use": type} set in the config file')

    test_jx.global_settings.elasticsearch.version = Cluster(
        test_jx.global_settings.elasticsearch).version
    test_jx.utils = container_types[test_jx.global_settings.use](
        test_jx.global_settings)
except Exception as e:
    Log.warning("problem", cause=e)

Log.alert("Resetting test count")
NEXT = 0
class StructuredLogger_usingElasticSearch(StructuredLogger):
    @override
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds
        kwargs.host = Random.sample(listwrap(host), 1)[0]

        schema = json2value(value2json(SCHEMA), leaves=True)
        schema.mappings[type].properties["~N~"].type = "nested"
        self.es = Cluster(kwargs).get_or_create_index(
            schema=schema,
            limit_replicas=True,
            typed=True,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        try:
            params.template = strings.limit(params.template, 2000)
            params.format = None
            self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60)
        except Exception as e:
            sys.stdout.write(text_type(Except.wrap(e)))
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                messages = wrap(self.queue.pop_all())
                if not messages:
                    Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                    continue

                for g, mm in jx.groupby(messages, size=self.batch_size):
                    scrubbed = []
                    for i, message in enumerate(mm):
                        if message is THREAD_STOP:
                            please_stop.go()
                            continue
                        try:
                            messages = flatten_causal_chain(message.value)
                            scrubbed.append(
                                {
                                    "value": [
                                        _deep_json_to_string(m, depth=3)
                                        for m in messages
                                    ]
                                }
                            )
                        except Exception as e:
                            Log.warning("Problem adding to scrubbed list", cause=e)

                    self.es.extend(scrubbed)
                    bad_count = 0
            except Exception as f:
                Log.warning("Problem inserting logs into ES", cause=f)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning(
                        "Given up trying to write debug logs to ES index {{index}}",
                        index=self.es.settings.index,
                    )
                Till(seconds=PAUSE_AFTER_BAD_INSERT).wait()

        self.es.flush()

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                self.queue.pop_all()
            except Exception as e:
                Log.warning("Should not happen", cause=e)

    def stop(self):
        with suppress_exception:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT

        with suppress_exception:
            self.queue.close()
        self.worker.join()
class StructuredLogger_usingElasticSearch(StructuredLogger):
    @override
    def __init__(self, host, index, port=9200, type="log", max_size=1000, batch_size=100, kwargs=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        self.es = Cluster(kwargs).get_or_create_index(
            schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            tjson=True,
            kwargs=kwargs
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=max_size, silent=True)
        self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
        self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
        Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        if params.get("template"):
            # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
            self.queue.add({"value": params})
        else:
            template = strings.limit(template, 2000)
            self.queue.add({"value": {"template": template, "params": params}}, timeout=3 * MINUTE)
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                Till(seconds=1).wait()
                messages = wrap(self.queue.pop_all())
                if not messages:
                    continue

                for g, mm in jx.groupby(messages, size=self.batch_size):
                    scrubbed = []
                    try:
                        for i, message in enumerate(mm):
                            if message is THREAD_STOP:
                                please_stop.go()
                                return
                            scrubbed.append(_deep_json_to_string(message, depth=3))
                    finally:
                        self.es.extend(scrubbed)
                    bad_count = 0
            except Exception as e:
                Log.warning("Problem inserting logs into ES", cause=e)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index)
                Till(seconds=30).wait()

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Till(seconds=1).wait()
                self.queue.pop_all()
            except Exception as e:
                Log.warning("Should not happen", cause=e)

    def stop(self):
        with suppress_exception:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT

        with suppress_exception:
            self.queue.close()