Пример #1
0
def main():
    try:
        settings = startup.read_settings()
        with startup.SingleInstance(settings.args.filename):
            constants.set(settings.constants)
            Log.start(settings.debug)

            extractor = Extract(settings)

            def extract(please_stop):
                with MySQL(**settings.snowflake.database) as db:
                    with db.transaction():
                        for kwargs in extractor.queue:
                            if please_stop:
                                break
                            try:
                                extractor.extract(db=db,
                                                  please_stop=please_stop,
                                                  **kwargs)
                            except Exception as e:
                                Log.warning("Could not extract", cause=e)
                                extractor.queue.add(kwargs)

            for i in range(settings.extract.threads):
                Thread.run("extract #" + text_type(i), extract)

            please_stop = Signal()
            Thread.wait_for_shutdown_signal(please_stop=please_stop,
                                            allow_exit=True,
                                            wait_forever=False)
    except Exception as e:
        Log.warning("Problem with data extraction", e)
    finally:
        Log.stop()
Пример #2
0
    def __exit__(self, exc_type, exc_val, exc_tb):
        from mo_threads import Thread

        Thread.run("delete file " + self.name,
                   delete_daemon,
                   file=self,
                   caller_stack=get_stacktrace(1))
Пример #3
0
    def __init__(self, stream):
        assert stream

        use_UTF8 = False

        if isinstance(stream, basestring):
            if stream.startswith("sys."):
                use_UTF8 = True  # sys.* ARE OLD AND CAN NOT HANDLE unicode
            self.stream = eval(stream)
            name = stream
        else:
            self.stream = stream
            name = "stream"

        # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
        from mo_threads import Queue

        if use_UTF8:
            def utf8_appender(value):
                if isinstance(value, unicode):
                    value = value.encode('utf8')
                self.stream.write(value)

            appender = utf8_appender
        else:
            appender = self.stream.write

        self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True)
        self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3)
        self.thread.parent.remove_child(self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()
Пример #4
0
def es_bulksetop(esq, frum, query):
    abs_limit = MIN([query.limit, MAX_DOCUMENTS])
    guid = randoms.base64(32, extra="-_")

    schema = frum.schema
    all_paths, split_decoders, var_to_columns = pre_process(query)
    new_select, split_select, flatten = get_selects(query)
    op, split_wheres = setop_to_es_queries(query, all_paths, split_select, var_to_columns)
    es_query = es_query_proto(split_select, op, split_wheres, schema)
    es_query.size = MIN([query.chunk_size, MAX_CHUNK_SIZE])
    es_query.sort = jx_sort_to_es_sort(query.sort, schema)
    if not es_query.sort:
        es_query.sort = ["_doc"]

    formatter = formatters[query.format](abs_limit, new_select, query)

    Thread.run(
        "Download " + guid,
        extractor,
        guid,
        abs_limit,
        esq,
        es_query,
        formatter,
        parent_thread=Null,
    ).release()

    output = to_data(
        {
            "url": URL_PREFIX / (guid + ".json"),
            "status": URL_PREFIX / (guid + ".status.json"),
            "meta": {"format": query.format, "es_query": es_query, "limit": abs_limit},
        }
    )
    return output
Пример #5
0
    def __init__(self,
                 host,
                 index,
                 port=9200,
                 type="log",
                 queue_size=1000,
                 batch_size=100,
                 kwargs=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep,
                                               MINUTE)).seconds

        self.es = Cluster(kwargs).get_or_create_index(
            schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            tjson=True,
            kwargs=kwargs)
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        Thread.run("add debug logs to es", self._insert_loop)
Пример #6
0
def queue_consumer(pull_queue, please_stop=None):
    queue = aws.Queue(pull_queue)
    time_offset = None
    request_count = 0

    while not please_stop:
        request = queue.pop(till=please_stop)
        if please_stop:
            break
        if not request:
            Log.note("Nothing in queue, pausing for 5 seconds...")
            (please_stop | Till(seconds=5)).wait()
            continue

        if SKIP_TRY_REQUESTS and 'try' in request.where['and'].eq.branch:
            Log.note("Skipping try revision.")
            queue.commit()
            continue

        now = Date.now().unix
        if time_offset is None:
            time_offset = now - request.meta.request_time

        next_request = request.meta.request_time + time_offset
        if next_request > now:
            Log.note("Next request in {{wait_time}}", wait_time=Duration(seconds=next_request - now))
            Till(till=next_request).wait()

        Thread.run("request "+text_type(request_count), one_request, request)
        request_count += 1
        queue.commit()
Пример #7
0
    def test_lock_and_till(self):
        locker = Lock("prime lock")
        got_lock = Signal()
        a_is_ready = Signal("a lock")
        b_is_ready = Signal("b lock")

        def loop(is_ready, please_stop):
            with locker:
                while not got_lock:
                    # Log.note("{{thread}} is waiting", thread=Thread.current().name)
                    locker.wait(till=Till(seconds=0))
                    is_ready.go()
                locker.wait()
                Log.note("thread is expected to get here")
        thread_a = Thread.run("a", loop, a_is_ready)
        thread_b = Thread.run("b", loop, b_is_ready)

        a_is_ready.wait()
        b_is_ready.wait()
        with locker:
            got_lock.go()
            Till(seconds=0.1).wait()  # MUST WAIT FOR a AND b TO PERFORM locker.wait()
            Log.note("leaving")
            pass
        with locker:
            Log.note("leaving again")
            pass
        Till(seconds=1).wait()

        self.assertTrue(bool(thread_a.stopped), "Thread should be done by now")
        self.assertTrue(bool(thread_b.stopped), "Thread should be done by now")
Пример #8
0
    def __init__(self, rate=None, amortization_period=None, source=None, database=None, kwargs=None):
        self.amortization_period = coalesce(amortization_period, AMORTIZATION_PERIOD)
        self.rate = coalesce(rate, HG_REQUEST_PER_SECOND)
        self.cache_locker = Lock()
        self.cache = {}  # MAP FROM url TO (ready, headers, response, timestamp) PAIR
        self.no_cache = {}  # VERY SHORT TERM CACHE
        self.workers = []
        self.todo = Queue(APP_NAME+" todo")
        self.requests = Queue(APP_NAME + " requests", max=int(self.rate * self.amortization_period.seconds))
        self.url = URL(source.url)
        self.db = Sqlite(database)
        self.inbound_rate = RateLogger("Inbound")
        self.outbound_rate = RateLogger("hg.mo")

        if not self.db.query("SELECT name FROM sqlite_master WHERE type='table'").data:
            with self.db.transaction() as t:
                t.execute(
                    "CREATE TABLE cache ("
                    "   path TEXT PRIMARY KEY, "
                    "   headers TEXT, "
                    "   response TEXT, "
                    "   timestamp REAL "
                    ")"
                )

        self.threads = [
            Thread.run(APP_NAME+" worker" + text_type(i), self._worker)
            for i in range(CONCURRENCY)
        ]
        self.limiter = Thread.run(APP_NAME+" limiter", self._rate_limiter)
        self.cleaner = Thread.run(APP_NAME+" cleaner", self._cache_cleaner)
Пример #9
0
    def test_queue_speed(self):
        SCALE = 1000*10

        done = Signal("done")
        slow = Queue()
        q = ThreadedQueue("test queue", queue=slow)

        def empty(please_stop):
            while not please_stop:
                item = q.pop()
                if item is THREAD_STOP:
                    break

            done.go()

        Thread.run("empty", empty)

        timer = Timer("add {{num}} to queue", param={"num": SCALE})
        with timer:
            for i in range(SCALE):
                q.add(i)
            q.add(THREAD_STOP)
            Log.note("Done insert")
            done.wait()

        self.assertLess(timer.duration.seconds, 1.5, "Expecting queue to be fast")
Пример #10
0
    def __init__(self, name):
        self.name = name
        self.lock = Lock("rate locker")
        self.request_rate = 0.0
        self.last_request = Date.now()

        Thread.run("rate logger", self._daemon)
Пример #11
0
    def __init__(self, name):
        self.name = name
        self.lock = Lock("rate locker")
        self.request_rate = 0.0
        self.last_request = Date.now()

        Thread.run("rate logger", self._daemon)
Пример #12
0
    def __init__(self, stream):
        assert stream

        if is_text(stream):
            name = stream
            stream = self.stream = eval(stream)
            if name.startswith("sys.") and PY3:
                self.stream = Data(write=lambda d: stream.write(d.decode('utf8')))
        else:
            name = "stream"
            self.stream = stream

        # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
        from mo_threads import Queue

        def utf8_appender(value):
            if is_text(value):
                value = value.encode('utf8')
            self.stream.write(value)

        appender = utf8_appender

        self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True)
        self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3)
        self.thread.parent.remove_child(self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()
Пример #13
0
    def __init__(self, rate=None, amortization_period=None, source=None, database=None, kwargs=None):
        self.amortization_period = coalesce(amortization_period, AMORTIZATION_PERIOD)
        self.rate = coalesce(rate, HG_REQUEST_PER_SECOND)
        self.cache_locker = Lock()
        self.cache = {}  # MAP FROM url TO (ready, headers, response, timestamp) PAIR
        self.no_cache = {}  # VERY SHORT TERM CACHE
        self.workers = []
        self.todo = Queue(APP_NAME+" todo")
        self.requests = Queue(APP_NAME + " requests", max=int(self.rate * self.amortization_period.seconds))
        self.url = URL(source.url)
        self.db = Sqlite(database)
        self.inbound_rate = RateLogger("Inbound")
        self.outbound_rate = RateLogger("hg.mo")

        if not self.db.query("SELECT name FROM sqlite_master WHERE type='table'").data:
            with self.db.transaction() as t:
                t.execute(
                    "CREATE TABLE cache ("
                    "   path TEXT PRIMARY KEY, "
                    "   headers TEXT, "
                    "   response TEXT, "
                    "   timestamp REAL "
                    ")"
                )

        self.threads = [
            Thread.run(APP_NAME+" worker" + text_type(i), self._worker)
            for i in range(CONCURRENCY)
        ]
        self.limiter = Thread.run(APP_NAME+" limiter", self._rate_limiter)
        self.cleaner = Thread.run(APP_NAME+" cleaner", self._cache_cleaner)
Пример #14
0
def queue_consumer(pull_queue, please_stop=None):
    queue = aws.Queue(pull_queue)
    time_offset = None
    request_count = 0

    while not please_stop:
        request = queue.pop(till=please_stop)
        if please_stop:
            break
        if not request:
            Log.note("Nothing in queue, pausing for 5 seconds...")
            (please_stop | Till(seconds=5)).wait()
            continue

        if SKIP_TRY_REQUESTS and 'try' in request.where['and'].eq.branch:
            Log.note("Skipping try revision.")
            queue.commit()
            continue

        now = Date.now().unix
        if time_offset is None:
            time_offset = now - request.meta.request_time

        next_request = request.meta.request_time + time_offset
        if next_request > now:
            Log.note("Next request in {{wait_time}}",
                     wait_time=Duration(seconds=next_request - now))
            Till(till=next_request).wait()

        Thread.run("request " + text_type(request_count), one_request, request)
        request_count += 1
        queue.commit()
Пример #15
0
def capture_termination_signal(please_stop):
    """
    WILL SIGNAL please_stop WHEN THIS AWS INSTANCE IS DUE FOR SHUTDOWN
    """
    def worker(please_stop):
        while not please_stop:
            try:
                response = requests.get(
                    "http://169.254.169.254/latest/meta-data/spot/termination-time"
                )
                if response.status_code not in [400, 404]:
                    Log.alert("Shutdown AWS Spot Node {{name}} {{type}}",
                              name=machine_metadata.name,
                              type=machine_metadata.aws_instance_type)
                    please_stop.go()
            except Exception as e:
                e = Except.wrap(e)
                if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e:
                    Log.note(
                        "AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)"
                    )
                    return
                else:
                    Log.warning("AWS shutdown detection has problems", cause=e)
                (Till(seconds=61) | please_stop).wait()
            (Till(seconds=11) | please_stop).wait()

    Thread.run("listen for termination", worker)
Пример #16
0
def capture_termination_signal(please_stop):
    """
    WILL SIGNAL please_stop WHEN THIS AWS INSTANCE IS DUE FOR SHUTDOWN
    """
    def worker(please_stop):
        seen_problem = False
        while not please_stop:
            request_time = (time.time() - timer.START)/60  # MINUTES
            try:
                response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time")
                seen_problem = False
                if response.status_code not in [400, 404]:
                    Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type)
                    please_stop.go()
            except Exception as e:
                e = Except.wrap(e)
                if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e:
                    Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)")
                    return
                elif seen_problem:
                    # IGNORE THE FIRST PROBLEM
                    Log.warning("AWS shutdown detection has more than one consecutive problem: (last request {{time|round(1)}} minutes since startup)", time=request_time, cause=e)
                seen_problem = True

                (Till(seconds=61) | please_stop).wait()
            (Till(seconds=11) | please_stop).wait()

    Thread.run("listen for termination", worker)
Пример #17
0
    def __init__(self, host, index, alias=None, name=None, port=9200, kwargs=None):
        global _elasticsearch
        if hasattr(self, "settings"):
            return

        from pyLibrary.queries.containers.list_usingPythonList import ListContainer
        from pyLibrary.env import elasticsearch as _elasticsearch

        self.settings = kwargs
        self.default_name = coalesce(name, alias, index)
        self.default_es = _elasticsearch.Cluster(kwargs=kwargs)
        self.todo = Queue("refresh metadata", max=100000, unique=True)

        self.es_metadata = Null
        self.last_es_metadata = Date.now()-OLD_METADATA

        self.meta=Data()
        table_columns = metadata_tables()
        column_columns = metadata_columns()
        self.meta.tables = ListContainer("meta.tables", [], wrap({c.names["."]: c for c in table_columns}))
        self.meta.columns = ColumnList()
        self.meta.columns.insert(column_columns)
        self.meta.columns.insert(table_columns)
        # TODO: fix monitor so it does not bring down ES
        if ENABLE_META_SCAN:
            self.worker = Thread.run("refresh metadata", self.monitor)
        else:
            self.worker = Thread.run("refresh metadata", self.not_monitor)
        return
Пример #18
0
    def __init__(self, name, config):
        config = to_data(config)
        if config.debug.logs:
            Log.error("not allowed to configure logging on other process")

        Log.note("begin process")
        # WINDOWS REQUIRED shell, WHILE LINUX NOT
        shell = "windows" in platform.system().lower()
        self.process = Process(
            name, [PYTHON, "-u", "mo_threads" + os.sep + "python_worker.py"],
            debug=False,
            cwd=os.getcwd(),
            shell=shell)
        self.process.stdin.add(
            value2json(set_default({}, config, {"debug": {
                "trace": True
            }})))
        status = self.process.stdout.pop()
        if status != '{"out":"ok"}':
            Log.error("could not start python\n{{error|indent}}",
                      error=self.process.stderr.pop_all() + [status] +
                      self.process.stdin.pop_all())
        self.lock = Lock("wait for response from " + name)
        self.current_task = DONE
        self.current_response = None
        self.current_error = None

        self.daemon = Thread.run("", self._daemon)
        self.errors = Thread.run("", self._stderr)
Пример #19
0
    def __init__(self, host, index, alias=None, name=None, port=9200, kwargs=None):
        global _elasticsearch
        if hasattr(self, "settings"):
            return

        from pyLibrary.queries.containers.list_usingPythonList import ListContainer
        from pyLibrary.env import elasticsearch as _elasticsearch

        self.settings = kwargs
        self.default_name = coalesce(name, alias, index)
        self.default_es = _elasticsearch.Cluster(kwargs=kwargs)
        self.todo = Queue("refresh metadata", max=100000, unique=True)

        self.es_metadata = Null
        self.last_es_metadata = Date.now()-OLD_METADATA

        self.meta=Data()
        table_columns = metadata_tables()
        column_columns = metadata_columns()
        self.meta.tables = ListContainer("meta.tables", [], wrap({c.names["."]: c for c in table_columns}))
        self.meta.columns = ColumnList()
        self.meta.columns.insert(column_columns)
        self.meta.columns.insert(table_columns)
        # TODO: fix monitor so it does not bring down ES
        if ENABLE_META_SCAN:
            self.worker = Thread.run("refresh metadata", self.monitor)
        else:
            self.worker = Thread.run("refresh metadata", self.not_monitor)
        return
Пример #20
0
    def __init__(self, logger):
        if not isinstance(logger, StructuredLogger):
            Log.error("Expecting a StructuredLogger")

        self.queue = Queue("Queue for " + self.__class__.__name__,
                           max=10000,
                           silent=True,
                           allow_add_after_close=True)
        self.logger = logger

        def worker(logger, please_stop):
            try:
                while not please_stop:
                    Till(seconds=1).wait()
                    logs = self.queue.pop_all()
                    for log in logs:
                        if log is THREAD_STOP:
                            please_stop.go()
                        else:
                            logger.write(**log)
            finally:
                logger.stop()

        self.thread = Thread("Thread for " + self.__class__.__name__, worker,
                             logger)
        self.thread.parent.remove_child(
            self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()
class StructuredLogger_usingThread(StructuredLogger):
    def __init__(self, logger, period=PERIOD):
        if not isinstance(logger, StructuredLogger):
            Log.error("Expecting a StructuredLogger")

        self.logger = logger
        self.queue = Queue(
            "Queue for " + self.__class__.__name__,
            max=10000,
            silent=True,
            allow_add_after_close=True,
        )
        self.thread = Thread("Thread for " + self.__class__.__name__, worker,
                             logger, self.queue, period)
        # worker WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.parent.remove_child(self.thread)
        self.thread.start()

    def write(self, template, params):
        try:
            self.queue.add({"template": template, "params": params})
            return self
        except Exception as e:
            e = Except.wrap(e)
            raise e  # OH NO!

    def stop(self):
        try:
            self.queue.add(
                THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT
            self.thread.join()
        except Exception as e:
            Log.note("problem in threaded logger" + str(e))
Пример #22
0
    def _get_queue(self, row):
        row = wrap(row)
        if row.json:
            row.value, row.json = json2value(row.json), None
        timestamp = Date(self.rollover_field(row.value))
        if timestamp == None:
            return Null
        elif timestamp < Date.today() - self.rollover_max:
            return DATA_TOO_OLD

        rounded_timestamp = timestamp.floor(self.rollover_interval)
        with self.locker:
            queue = self.known_queues.get(rounded_timestamp.unix)
        if queue == None:
            candidates = sort_using_key(
                filter(
                    lambda r: re.match(
                        re.escape(self.settings.index) + r"\d\d\d\d\d\d\d\d_\d\d\d\d\d\d$",
                        r['index']
                    ),
                    self.cluster.get_aliases()
                ),
                key=lambda r: r['index']
            )
            best = None
            for c in candidates:
                c = wrap(c)
                c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT)
                if timestamp > c.date:
                    best = c
            if not best or rounded_timestamp > best.date:
                if rounded_timestamp < wrap(candidates[-1]).date:
                    es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)
                else:
                    try:
                        es = self.cluster.create_index(create_timestamp=rounded_timestamp, kwargs=self.settings)
                        es.add_alias(self.settings.index)
                    except Exception as e:
                        e = Except.wrap(e)
                        if "IndexAlreadyExistsException" not in e:
                            Log.error("Problem creating index", cause=e)
                        return self._get_queue(row)  # TRY AGAIN
            else:
                es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)

            def refresh(please_stop):
                try:
                    es.set_refresh_interval(seconds=60 * 10, timeout=5)
                except Exception:
                    Log.note("Could not set refresh interval for {{index}}", index=es.settings.index)

            Thread.run("refresh", refresh)

            self._delete_old_indexes(candidates)
            threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
            with self.locker:
                queue = self.known_queues[rounded_timestamp.unix] = threaded_queue
        return queue
Пример #23
0
def main():
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)
        please_stop = Signal("main stop signal")
        Thread.wait_for_shutdown_signal(please_stop)
    except Exception, e:
        Log.error("Problem with etl", cause=e)
Пример #24
0
class StructuredLogger_usingThreadedStream(StructuredLogger):
    # stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING
    # WHICH WILL eval() TO ONE
    def __init__(self, stream):
        assert stream

        use_UTF8 = False

        if isinstance(stream, basestring):
            if stream.startswith("sys."):
                use_UTF8 = True  # sys.* ARE OLD AND CAN NOT HANDLE unicode
            self.stream = eval(stream)
            name = stream
        else:
            self.stream = stream
            name = "stream"

        # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
        from mo_threads import Queue

        if use_UTF8:
            def utf8_appender(value):
                if isinstance(value, unicode):
                    value = value.encode('utf8')
                self.stream.write(value)

            appender = utf8_appender
        else:
            appender = self.stream.write

        self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True)
        self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3)
        self.thread.parent.remove_child(self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()

    def write(self, template, params):
        try:
            self.queue.add({"template": template, "params": params})
            return self
        except Exception as e:
            raise e  # OH NO!

    def stop(self):
        try:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT
            self.thread.join()
        except Exception as e:
            if DEBUG_LOGGING:
                raise e

        try:
            self.queue.close()
        except Exception, f:
            if DEBUG_LOGGING:
                raise f
Пример #25
0
class StructuredLogger_usingThread(StructuredLogger):
    def __init__(self, logger):
        if not isinstance(logger, StructuredLogger):
            Log.error("Expecting a StructuredLogger")

        self.queue = Queue("Queue for " + self.__class__.__name__,
                           max=10000,
                           silent=True,
                           allow_add_after_close=True)
        self.logger = logger

        def worker(logger, please_stop):
            try:
                while not please_stop:
                    logs = self.queue.pop_all()
                    if not logs:
                        (Till(seconds=1) | please_stop).wait()
                        continue
                    for log in logs:
                        if log is THREAD_STOP:
                            please_stop.go()
                        else:
                            logger.write(**log)
            except Exception as e:
                print("problem in " + StructuredLogger_usingThread.__name__ +
                      ": " + str(e))
            finally:
                Log.note("stop the child")
                logger.stop()

        self.thread = Thread("Thread for " + self.__class__.__name__, worker,
                             logger)
        self.thread.parent.remove_child(
            self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()

    def write(self, template, params):
        try:
            self.queue.add({"template": template, "params": params})
            return self
        except Exception as e:
            e = Except.wrap(e)
            raise e  # OH NO!

    def stop(self):
        try:
            self.queue.add(
                THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT
            self.thread.join()
            Log.note("joined on thread")
        except Exception as e:
            Log.note("problem in threaded logger" + str(e))

        with suppress_exception:
            self.queue.close()
Пример #26
0
 def inners():
     for t in data.hits.hits:
         for i in t.inner_hits[literal_field(query_path)].hits.hits:
             t._inner = i._source
             for k, e in post_expressions.items():
                 t[k] = e(t)
             yield t
     if more_filter:
         Thread.join(need_more)
         for t in more[0].hits.hits:
             yield t
Пример #27
0
 def inners():
     for t in data.hits.hits:
         for i in t.inner_hits[literal_field(query_path)].hits.hits:
             t._inner = i._source
             for k, e in post_expressions.items():
                 t[k] = e(t)
             yield t
     if more_filter:
         Thread.join(need_more)
         for t in more[0].hits.hits:
             yield t
Пример #28
0
    def __exit__(self, exc_type, exc_val, exc_tb):
        Log.note("clean pulse exit")
        self.please_stop.go()
        with suppress_exception:
            self.target_queue.close()
            Log.note("stop put into queue")

        try:
            self.pulse.disconnect()
        except Exception as e:
            Log.warning("Can not disconnect during pulse exit, ignoring", e)
        Thread.__exit__(self, exc_type, exc_val, exc_tb)
Пример #29
0
    def __exit__(self, exc_type, exc_val, exc_tb):
        Log.note("clean pulse exit")
        self.please_stop.go()
        with suppress_exception:
            self.target_queue.close()
            Log.note("stop put into queue")

        try:
            self.pulse.disconnect()
        except Exception as e:
            Log.warning("Can not disconnect during pulse exit, ignoring", e)
        Thread.__exit__(self, exc_type, exc_val, exc_tb)
Пример #30
0
class StructuredLogger_usingThreadedStream(StructuredLogger):
    # stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING
    # WHICH WILL eval() TO ONE
    def __init__(self, stream):
        assert stream

        if is_text(stream):
            name = stream
            stream = self.stream = eval(stream)
            if name.startswith("sys.") and PY3:
                self.stream = Data(write=lambda d: stream.write(d.decode('utf8')))
        else:
            name = "stream"
            self.stream = stream

        # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
        from mo_threads import Queue

        def utf8_appender(value):
            if is_text(value):
                value = value.encode('utf8')
            self.stream.write(value)

        appender = utf8_appender

        self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True)
        self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3)
        self.thread.parent.remove_child(self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()

    def write(self, template, params):
        try:
            self.queue.add({"template": template, "params": params})
            return self
        except Exception as e:
            raise e  # OH NO!

    def stop(self):
        try:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT
            self.thread.join()
        except Exception as e:
            if DEBUG_LOGGING:
                raise e

        try:
            self.queue.close()
        except Exception as f:
            if DEBUG_LOGGING:
                raise f
Пример #31
0
    def __init__(self):
        self.out_of_memory_restart = False

        self.total_locker = Lock()
        self.total_files_requested = 0
        self.total_tuids_mapped = 0

        self.threads_locker = Lock()
        self.waiting = 0
        self.threads_waiting = 0

        self.requests_locker = Lock()
        self.requests_total = 0
        self.requests_complete = 0
        self.requests_incomplete = 0
        self.requests_passed = 0
        self.requests_failed = 0

        self.prev_mem = 0
        self.curr_mem = 0
        self.initial_growth = {}

        Thread.run("pc-daemon", self.run_pc_daemon)
        Thread.run("threads-daemon", self.run_threads_daemon)
        Thread.run("memory-daemon", self.run_memory_daemon)
        Thread.run("requests-daemon", self.run_requests_daemon)
Пример #32
0
    def test_and_signals(self):
        acc = []
        locker = Lock()

        def worker(please_stop):
            with locker:
                acc.append("worker")
        a = Thread.run("a", worker)
        b = Thread.run("b", worker)
        c = Thread.run("c", worker)

        (a.stopped & b.stopped & c.stopped).wait()
        acc.append("done")
        self.assertEqual(acc, ["worker", "worker", "worker", "done"])
Пример #33
0
class StructuredLogger_usingThreadedStream(StructuredLogger):
    # stream CAN BE AN OBJCET WITH write() METHOD, OR A STRING
    # WHICH WILL eval() TO ONE
    def __init__(self, stream):
        assert stream

        if isinstance(stream, text_type):
            name = stream
            stream = self.stream = eval(stream)
            if name.startswith("sys.") and PY3:
                self.stream = Data(write=lambda d: stream.write(d.decode('utf8')))
        else:
            name = "stream"
            self.stream = stream

        # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
        from mo_threads import Queue

        def utf8_appender(value):
            if isinstance(value, text_type):
                value = value.encode('utf8')
            self.stream.write(value)

        appender = utf8_appender

        self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True)
        self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3)
        self.thread.parent.remove_child(self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()

    def write(self, template, params):
        try:
            self.queue.add({"template": template, "params": params})
            return self
        except Exception as e:
            raise e  # OH NO!

    def stop(self):
        try:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT
            self.thread.join()
        except Exception as e:
            if DEBUG_LOGGING:
                raise e

        try:
            self.queue.close()
        except Exception as f:
            if DEBUG_LOGGING:
                raise f
Пример #34
0
    def __init__(self):
        self.out_of_memory_restart = False

        self.total_locker = Lock()
        self.total_files_requested = 0
        self.total_tuids_mapped = 0

        self.threads_locker = Lock()
        self.waiting = 0
        self.threads_waiting = 0

        self.requests_locker = Lock()
        self.requests_total = 0
        self.requests_complete = 0
        self.requests_incomplete = 0
        self.requests_passed = 0
        self.requests_failed = 0

        self.prev_mem = 0
        self.curr_mem = 0
        self.initial_growth = {}

        Thread.run("pc-daemon", self.run_pc_daemon)
        Thread.run("threads-daemon", self.run_threads_daemon)
        Thread.run("memory-daemon", self.run_memory_daemon)
        Thread.run("requests-daemon", self.run_requests_daemon)
Пример #35
0
    def __init__(self,
                 host,
                 index,
                 sql_file='metadata.sqlite',
                 alias=None,
                 name=None,
                 port=9200,
                 kwargs=None):
        if hasattr(self, "settings"):
            return

        self.too_old = TOO_OLD
        self.settings = kwargs
        self.default_name = coalesce(name, alias, index)
        self.es_cluster = elasticsearch.Cluster(kwargs=kwargs)

        self.index_does_not_exist = set()
        self.todo = Queue("refresh metadata", max=100000, unique=True)

        self.index_to_alias = Relation_usingList()

        self.es_metadata = Null
        self.metadata_last_updated = Date.now() - OLD_METADATA

        self.meta = Data()
        self.meta.columns = ColumnList()

        self.alias_to_query_paths = {
            "meta.columns": [['.']],
            "meta.tables": [['.']]
        }
        self.alias_last_updated = {
            "meta.columns": Date.now(),
            "meta.tables": Date.now()
        }
        table_columns = metadata_tables()
        self.meta.tables = ListContainer(
            "meta.tables",
            [
                # TableDesc("meta.columns", None, ".", Date.now()),
                # TableDesc("meta.tables", None, ".", Date.now())
            ],
            jx_base.Schema(".", table_columns))
        self.meta.columns.extend(table_columns)
        # TODO: fix monitor so it does not bring down ES
        if ENABLE_META_SCAN:
            self.worker = Thread.run("refresh metadata", self.monitor)
        else:
            self.worker = Thread.run("refresh metadata", self.not_monitor)
        return
Пример #36
0
    def __init__(self, conn=None, tuid_service=None, kwargs=None):
        try:
            self.config = kwargs

            self.conn = conn if conn else sql.Sql(self.config.database.name)
            self.hg_cache = HgMozillaOrg(
                kwargs=self.config.hg_cache,
                use_cache=True) if self.config.hg_cache else Null

            self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService(
                database=None,
                hg=None,
                kwargs=self.config,
                conn=self.conn,
                clogger=self)
            self.rev_locker = Lock()
            self.working_locker = Lock()

            self.init_db()
            self.next_revnum = coalesce(
                self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1)
            self.csets_todo_backwards = Queue(
                name="Clogger.csets_todo_backwards")
            self.deletions_todo = Queue(name="Clogger.deletions_todo")
            self.maintenance_signal = Signal(name="Clogger.maintenance_signal")
            self.config = self.config.tuid

            self.disable_backfilling = False
            self.disable_tipfilling = False
            self.disable_deletion = False
            self.disable_maintenance = False

            # Make sure we are filled before allowing queries
            numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
            if numrevs < MINIMUM_PERMANENT_CSETS:
                Log.note("Filling in csets to hold {{minim}} csets.",
                         minim=MINIMUM_PERMANENT_CSETS)
                oldest_rev = 'tip'
                with self.conn.transaction() as t:
                    tmp = t.query(
                        "SELECT min(revnum), revision FROM csetLog").data[0][1]
                    if tmp:
                        oldest_rev = tmp
                self._fill_in_range(MINIMUM_PERMANENT_CSETS - numrevs,
                                    oldest_rev,
                                    timestamp=False)

            Log.note(
                "Table is filled with atleast {{minim}} entries. Starting workers...",
                minim=MINIMUM_PERMANENT_CSETS)

            Thread.run('clogger-tip', self.fill_forward_continuous)
            Thread.run('clogger-backfill', self.fill_backward_with_list)
            Thread.run('clogger-maintenance', self.csetLog_maintenance)
            Thread.run('clogger-deleter', self.csetLog_deleter)

            Log.note("Started clogger workers.")
        except Exception as e:
            Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
Пример #37
0
class StructuredLogger_usingThread(StructuredLogger):

    def __init__(self, logger):
        if not isinstance(logger, StructuredLogger):
            Log.error("Expecting a StructuredLogger")

        self.queue = Queue("Queue for " + self.__class__.__name__, max=10000, silent=True, allow_add_after_close=True)
        self.logger = logger

        def worker(logger, please_stop):
            try:
                while not please_stop:
                    logs = self.queue.pop_all()
                    if not logs:
                        (Till(seconds=1) | please_stop).wait()
                        continue
                    for log in logs:
                        if log is THREAD_STOP:
                            please_stop.go()
                        else:
                            logger.write(**log)
            except Exception as e:
                print("problem in " + StructuredLogger_usingThread.__name__ + ": " + str(e))
            finally:
                Log.note("stop the child")
                logger.stop()

        self.thread = Thread("Thread for " + self.__class__.__name__, worker, logger)
        self.thread.parent.remove_child(self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()

    def write(self, template, params):
        try:
            self.queue.add({"template": template, "params": params})
            return self
        except Exception as e:
            e = Except.wrap(e)
            raise e  # OH NO!

    def stop(self):
        try:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT
            self.thread.join()
            Log.note("joined on thread")
        except Exception as e:
            Log.note("problem in threaded logger" + str(e))

        with suppress_exception:
            self.queue.close()
Пример #38
0
    def __init__(self, name, config):
        config = wrap(config)
        if config.debug.logs:
            Log.error("not allowed to configure logging on other process")

        self.process = Process(name, [PYTHON, "mo_threads" + os.sep + "python_worker.py"], shell=True)
        self.process.stdin.add(value2json(set_default({"debug": {"trace": True}}, config)))

        self.lock = Lock("wait for response from "+name)
        self.current_task = None
        self.current_response = None
        self.current_error = None

        self.daemon = Thread.run("", self._daemon)
        self.errors = Thread.run("", self._stderr)
Пример #39
0
 def __init__(self, name):
     Table.__init__(self, "meta.columns")
     self.db_file = File("metadata." + name + ".sqlite")
     self.data = {}  # MAP FROM ES_INDEX TO (abs_column_name to COLUMNS)
     self.locker = Lock()
     self._schema = None
     self.db = sqlite3.connect(
         database=self.db_file.abspath, check_same_thread=False, isolation_level=None
     )
     self.last_load = Null
     self.todo = Queue(
         "update columns to db"
     )  # HOLD (action, column) PAIR, WHERE action in ['insert', 'update']
     self._db_load()
     Thread.run("update " + name, self._db_worker)
Пример #40
0
    def test_till_in_loop(self):

        def loop(please_stop):
            counter = 0
            while not please_stop:
                (Till(seconds=0.001) | please_stop).wait()
                counter += 1
                Log.note("{{count}}", count=counter)

        please_stop=Signal("please_stop")
        Thread.run("loop", loop, please_stop=please_stop)
        Till(seconds=1).wait()
        with please_stop.lock:
            self.assertLessEqual(len(please_stop.job_queue), 1, "Expecting only one pending job on go")
        please_stop.go()
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds
        kwargs.host = Random.sample(listwrap(host), 1)[0]

        schema = json2value(value2json(SCHEMA), leaves=True)
        schema.mappings[type].properties["~N~"].type = "nested"
        self.es = Cluster(kwargs).get_or_create_index(
            schema=schema,
            limit_replicas=True,
            typed=True,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)
Пример #42
0
    def __init__(self, logger):
        if not isinstance(logger, StructuredLogger):
            Log.error("Expecting a StructuredLogger")

        self.queue = Queue("Queue for " + self.__class__.__name__, max=10000, silent=True, allow_add_after_close=True)
        self.logger = logger

        def worker(logger, please_stop):
            try:
                while not please_stop:
                    logs = self.queue.pop_all()
                    if not logs:
                        (Till(seconds=1) | please_stop).wait()
                        continue
                    for log in logs:
                        if log is THREAD_STOP:
                            please_stop.go()
                        else:
                            logger.write(**log)
            except Exception as e:
                print("problem in " + StructuredLogger_usingThread.__name__ + ": " + str(e))
            finally:
                Log.note("stop the child")
                logger.stop()

        self.thread = Thread("Thread for " + self.__class__.__name__, worker, logger)
        self.thread.parent.remove_child(self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()
 def __init__(self, host, index, port=9200, type="log", max_size=1000, batch_size=100, kwargs=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     self.es = Cluster(kwargs).get_or_create_index(
         schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
         limit_replicas=True,
         tjson=True,
         kwargs=kwargs
     )
     self.batch_size = batch_size
     self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
     self.queue = Queue("debug logs to es", max=max_size, silent=True)
     self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
     self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
     Thread.run("add debug logs to es", self._insert_loop)
Пример #44
0
    def __init__(self, stream):
        assert stream

        if isinstance(stream, text_type):
            name = stream
            stream = self.stream = eval(stream)
            if name.startswith("sys.") and PY3:
                self.stream = Data(write=lambda d: stream.write(d.decode('utf8')))
        else:
            name = "stream"
            self.stream = stream

        # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD
        from mo_threads import Queue

        def utf8_appender(value):
            if isinstance(value, text_type):
                value = value.encode('utf8')
            self.stream.write(value)

        appender = utf8_appender

        self.queue = Queue("queue for " + self.__class__.__name__ + "(" + name + ")", max=10000, silent=True)
        self.thread = Thread("log to " + self.__class__.__name__ + "(" + name + ")", time_delta_pusher, appender=appender, queue=self.queue, interval=0.3)
        self.thread.parent.remove_child(self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()
Пример #45
0
    def _find_revision(self, revision):
        please_stop = False
        locker = Lock()
        output = []
        queue = Queue("branches", max=2000)
        queue.extend(b for b in self.branches if b.locale == DEFAULT_LOCALE and b.name in ["try", "mozilla-inbound", "autoland"])
        queue.add(THREAD_STOP)

        problems = []
        def _find(please_stop):
            for b in queue:
                if please_stop:
                    return
                try:
                    url = b.url + "json-info?node=" + revision
                    rev = self.get_revision(Revision(branch=b, changeset={"id": revision}))
                    with locker:
                        output.append(rev)
                    Log.note("Revision found at {{url}}", url=url)
                except Exception as f:
                    problems.append(f)

        threads = []
        for i in range(3):
            threads.append(Thread.run("find changeset " + text_type(i), _find, please_stop=please_stop))

        for t in threads:
            with assert_no_exception:
                t.join()

        return output
Пример #46
0
    def setup(
        self,
        instance,   # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP
        utility     # THE utility OBJECT FOUND IN CONFIG
    ):
        with self.locker:
            if not self.settings.setup_timeout:
                Log.error("expecting instance.setup_timeout to prevent setup from locking")

            def worker(please_stop):
                cpu_count = int(round(utility.cpu))

                with hide('output'):
                    Log.note("setup {{instance}}", instance=instance.id)
                    self._config_fabric(instance)
                    Log.note("update packages on {{instance}} ip={{ip}}", instance=instance.id, ip=instance.ip_address)
                    try:
                        self._update_ubuntu_packages()
                    except Exception as e:
                        Log.warning("Can not setup {{instance}}, type={{type}}", instance=instance.id, type=instance.instance_type, cause=e)
                        return
                    Log.note("setup etl on {{instance}}", instance=instance.id)
                    self._setup_etl_code()
                    Log.note("setup grcov on {{instance}}", instance=instance.id)
                    self._setup_grcov()
                    Log.note("add config file on {{instance}}", instance=instance.id)
                    self._add_private_file()
                    Log.note("setup supervisor on {{instance}}", instance=instance.id)
                    self._setup_etl_supervisor(cpu_count)
                    Log.note("setup done {{instance}}", instance=instance.id)
            worker_thread = Thread.run("etl setup started at "+unicode(Date.now().format()), worker)
            (Till(timeout=Duration(self.settings.setup_timeout).seconds) | worker_thread.stopped).wait()
            if not worker_thread.stopped:
                Log.error("critical failure in thread {{name|quote}}", name=worker_thread.name)
            worker_thread.join()
Пример #47
0
    def __init__(self, host, index, sql_file='metadata.sqlite', alias=None, name=None, port=9200, kwargs=None):
        if hasattr(self, "settings"):
            return

        self.too_old = TOO_OLD
        self.settings = kwargs
        self.default_name = coalesce(name, alias, index)
        self.es_cluster = elasticsearch.Cluster(kwargs=kwargs)

        self.index_does_not_exist = set()
        self.todo = Queue("refresh metadata", max=100000, unique=True)

        self.index_to_alias = Relation_usingList()

        self.es_metadata = Null
        self.metadata_last_updated = Date.now() - OLD_METADATA

        self.meta = Data()
        self.meta.columns = ColumnList()

        self.alias_to_query_paths = {
            "meta.columns": [['.']],
            "meta.tables": [['.']]
        }
        self.alias_last_updated = {
            "meta.columns": Date.now(),
            "meta.tables": Date.now()
        }
        table_columns = metadata_tables()
        self.meta.tables = ListContainer(
            "meta.tables",
            [
                # TableDesc("meta.columns", None, ".", Date.now()),
                # TableDesc("meta.tables", None, ".", Date.now())
            ],
            jx_base.Schema(".", table_columns)
        )
        self.meta.columns.extend(table_columns)
        # TODO: fix monitor so it does not bring down ES
        if ENABLE_META_SCAN:
            self.worker = Thread.run("refresh metadata", self.monitor)
        else:
            self.worker = Thread.run("refresh metadata", self.not_monitor)
        return
Пример #48
0
    def __init__(
        self,
        hg=None,        # CONNECT TO hg
        repo=None,      # CONNECTION INFO FOR ES CACHE
        branches=None,  # CONNECTION INFO FOR ES CACHE
        use_cache=False,   # True IF WE WILL USE THE ES FOR DOWNLOADING BRANCHES
        timeout=30 * SECOND,
        kwargs=None
    ):
        if not _hg_branches:
            _late_imports()

        self.es_locker = Lock()
        self.todo = mo_threads.Queue("todo for hg daemon", max=DAEMON_QUEUE_SIZE)

        self.settings = kwargs
        self.timeout = Duration(timeout)

        # VERIFY CONNECTIVITY
        with Explanation("Test connect with hg"):
            response = http.head(self.settings.hg.url)

        if branches == None:
            self.branches = _hg_branches.get_branches(kwargs=kwargs)
            self.es = None
            return

        self.last_cache_miss = Date.now()

        set_default(repo, {"schema": revision_schema})
        self.es = elasticsearch.Cluster(kwargs=repo).get_or_create_index(kwargs=repo)

        def setup_es(please_stop):
            with suppress_exception:
                self.es.add_alias()

            with suppress_exception:
                self.es.set_refresh_interval(seconds=1)

        Thread.run("setup_es", setup_es)
        self.branches = _hg_branches.get_branches(kwargs=kwargs)
        self.timeout = timeout
        Thread.run("hg daemon", self._daemon)
Пример #49
0
def _setup():
    threads = Data()
    signals = Data()

    db = Sqlite()
    db.query("CREATE TABLE my_table (value TEXT)")

    for name in ["a", "b"]:
        signals[name] = [{"begin": Signal(), "done": Signal()} for _ in range(4)]
        threads[name] = Thread.run(name, _work, name, db, signals[name])

    return db, threads, signals
Пример #50
0
    def __init__(self, filename=None, db=None, upgrade=True):
        """
        :param db:  Optional, wrap a sqlite db in a thread
        :return: Multithread-safe database
        """
        if upgrade and not _upgraded:
            _upgrade()

        self.filename = filename
        self.db = db
        self.queue = Queue("sql commands")   # HOLD (command, result, signal) PAIRS
        self.worker = Thread.run("sqlite db thread", self._worker)
        self.get_trace = DEBUG
        self.upgrade = upgrade
Пример #51
0
def capture_termination_signal(please_stop):
    """
    WILL SIGNAL please_stop WHEN THIS AWS INSTANCE IS DUE FOR SHUTDOWN
    """

    def worker(please_stop):
        while not please_stop:
            try:
                response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time")
                if response.status_code not in [400, 404]:
                    Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type)
                    please_stop.go()
            except Exception as e:
                e = Except.wrap(e)
                if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e:
                    Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)")
                    return
                else:
                    Log.warning("AWS shutdown detection has problems", cause=e)
                (Till(seconds=61) | please_stop).wait()
            (Till(seconds=11) | please_stop).wait()

    Thread.run("listen for termination", worker, please_stop=please_stop)
Пример #52
0
    def __init__(
        self,
        exchange,  # name of the Pulse exchange
        topic,  # message name pattern to subscribe to  ('#' is wildcard)
        target=None,  # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION
        target_queue=None,  # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS
        host='pulse.mozilla.org',  # url to connect,
        port=5671,  # tcp port
        user=None,
        password=None,
        vhost="/",
        start=0,  # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE
        ssl=True,
        applabel=None,
        heartbeat=False,  # True to also get the Pulse heartbeat message
        durable=False,  # True to keep queue after shutdown
        serializer='json',
        broker_timezone='GMT',
        kwargs=None
    ):
        global count
        count = coalesce(start, 0)

        self.target_queue = target_queue
        self.pulse_target = target
        if (target_queue == None and target == None) or (target_queue != None and target != None):
            Log.error("Expecting a queue (for fast digesters) or a target (for slow digesters)")

        Thread.__init__(self, name="Pulse consumer for " + kwargs.exchange, target=self._worker)
        self.settings = kwargs
        kwargs.callback = self._got_result
        kwargs.user = coalesce(kwargs.user, kwargs.username)
        kwargs.applabel = coalesce(kwargs.applable, kwargs.queue, kwargs.queue_name)
        kwargs.topic = topic

        self.pulse = ModifiedGenericConsumer(kwargs, connect=True, **kwargs)
        self.start()
Пример #53
0
    def note(
        cls,
        template,
        default_params={},
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if not isinstance(template, text_type):
            Log.error("Log.note was expecting a unicode template")

        if len(template) > 10000:
            template = template[:10000]

        params = dict(unwrap(default_params), **more_params)

        log_params = set_default({
            "template": template,
            "params": params,
            "timestamp": datetime.utcnow(),
            "machine": machine_metadata
        }, log_context, {"context": exceptions.NOTE})

        if not template.startswith("\n") and template.find("\n") > -1:
            template = "\n" + template

        if cls.trace:
            log_template = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" ({{location.method}}) - " + template.replace("{{", "{{params.")
            f = sys._getframe(stack_depth + 1)
            log_params.location = {
                "line": f.f_lineno,
                "file": text_type(f.f_code.co_filename.split(os.sep)[-1]),
                "method": text_type(f.f_code.co_name)
            }
            thread = _Thread.current()
            log_params.thread = {"name": thread.name, "id": thread.id}
        else:
            log_template = "{{timestamp|datetime}} - " + template.replace("{{", "{{params.")

        cls.main_log.write(log_template, log_params)
Пример #54
0
    def query(self, command):
        """
        WILL BLOCK CALLING THREAD UNTIL THE command IS COMPLETED
        :param command: COMMAND FOR SQLITE
        :return: list OF RESULTS
        """
        if not self.worker:
            self.worker = Thread.run("sqlite db thread", self._worker)

        signal = Signal()
        result = Data()
        self.queue.add((command, result, signal, None))
        signal.wait()
        if result.exception:
            Log.error("Problem with Sqlite call", cause=result.exception)
        return result
Пример #55
0
def one_request(request, please_stop):
    and_op = request.where['and']

    files = []
    for a in and_op:
        if a['in'].path:
            files = a['in'].path
        elif a.eq.path:
            files = [a.eq.path]

    with Timer("Make TUID request from {{timestamp|datetime}}", {"timestamp": request.meta.request_time}):
        try:
            result = http.post_json(
                "http://localhost:5000/tuid",
                json=request,
                timeout=30
            )
            if result is None or len(result.data) != len(files):
                Log.note("incomplete response for {{thread}}", thread=Thread.current().name)
        except Exception as e:
            Log.warning("Request failure", cause=e)
Пример #56
0
def time_delta_pusher(please_stop, appender, queue, interval):
    """
    appender - THE FUNCTION THAT ACCEPTS A STRING
    queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE
    interval - timedelta
    USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL
    """

    next_run = time() + interval

    while not please_stop:
        profiler = Thread.current().cprofiler
        profiler.disable()
        (Till(till=next_run) | please_stop).wait()
        profiler.enable()

        next_run = time() + interval
        logs = queue.pop_all()
        if not logs:
            continue

        lines = []
        for log in logs:
            try:
                if log is THREAD_STOP:
                    please_stop.go()
                    next_run = time()
                else:
                    expanded = expand_template(log.get("template"), log.get("params"))
                    lines.append(expanded)
            except Exception as e:
                location = log.get('params', {}).get('location', {})
                Log.warning("Trouble formatting log from {{location}}", location=location, cause=e)
                # SWALLOW ERROR, GOT TO KEEP RUNNING
        try:
            appender(u"\n".join(lines) + u"\n")
        except Exception as e:

            sys.stderr.write(str("Trouble with appender: ") + str(e.__class__.__name__) + str("\n"))
Пример #57
0
    def _annotate(
        cls,
        item,
        timestamp,
        stack_depth
    ):
        """
        :param itemt:  A LogItemTHE TYPE OF MESSAGE
        :param stack_depth: FOR TRACKING WHAT LINE THIS CAME FROM
        :return:
        """
        item.timestamp = timestamp
        item.machine = machine_metadata
        item.template = strings.limit(item.template, 10000)

        item.format = strings.limit(item.format, 10000)
        if item.format == None:
            format = text_type(item)
        else:
            format = item.format.replace("{{", "{{params.")
        if not format.startswith(CR) and format.find(CR) > -1:
            format = CR + format

        if cls.trace:
            log_format = item.format = "{{machine.name}} (pid {{machine.pid}}) - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" - ({{location.method}}) - " + format
            f = sys._getframe(stack_depth + 1)
            item.location = {
                "line": f.f_lineno,
                "file": text_type(f.f_code.co_filename),
                "method": text_type(f.f_code.co_name)
            }
            thread = _Thread.current()
            item.thread = {"name": thread.name, "id": thread.id}
        else:
            log_format = item.format = "{{timestamp|datetime}} - " + format

        cls.main_log.write(log_format, item.__data__())
Пример #58
0
            Log.note("Skipping try revision.")
            queue.commit()
            continue

        now = Date.now().unix
        if time_offset is None:
            time_offset = now - request.meta.request_time

        next_request = request.meta.request_time + time_offset
        if next_request > now:
            Log.note("Next request in {{wait_time}}", wait_time=Duration(seconds=next_request - now))
            Till(till=next_request).wait()

        Thread.run("request "+text_type(request_count), one_request, request)
        request_count += 1
        queue.commit()


if __name__ == '__main__':
    try:
        tmp_signal = Signal()
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)

        queue_consumer(kwargs=config, please_stop=tmp_signal)
        worker = Thread.run("sqs consumer", queue_consumer, kwargs=config)
        MAIN_THREAD.wait_for_shutdown_signal(allow_exit=True, please_stop=worker.stopped)
    except BaseException as e:
        Log.error("Serious problem with consumer construction! Shutdown!", cause=e)