Пример #1
0
    def error(
            cls,
            template,  # human readable template
            default_params={},  # parameters for template
            cause=None,  # pausible cause
            stack_depth=0,
            **more_params):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if not is_text(template):
            # sys.stderr.write(str("Log.error was expecting a unicode template"))
            Log.error("Log.error was expecting a unicode template")

        if default_params and isinstance(
                listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = Data(dict(default_params, **more_params))

        add_to_trace = False
        if cause == None:
            causes = None
        elif is_list(cause):
            causes = []
            for c in listwrap(
                    cause
            ):  # CAN NOT USE LIST-COMPREHENSION IN PYTHON3 (EXTRA STACK DEPTH FROM THE IN-LINED GENERATOR)
                causes.append(Except.wrap(c, stack_depth=1))
            causes = FlatList(causes)
        elif isinstance(cause, BaseException):
            causes = Except.wrap(cause, stack_depth=1)
        else:
            causes = None
            Log.error("can only accept Exception, or list of exceptions")

        trace = exceptions.get_stacktrace(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(
            context=exceptions.ERROR,
            template=template,
            params=params,
            cause=causes,
            trace=trace,
        )
        raise_from_none(e)
Пример #2
0
    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if not isinstance(template, text_type):
            sys.stderr.write(str("Log.error was expecting a unicode template"))
            Log.error("Log.error was expecting a unicode template")

        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        if cause == None:
            causes = None
        elif isinstance(cause, list):
            causes = []
            for c in listwrap(cause):  # CAN NOT USE LIST-COMPREHENSION IN PYTHON3 (EXTRA STACK DEPTH FROM THE IN-LINED GENERATOR)
                causes.append(Except.wrap(c, stack_depth=1))
            causes = FlatList(causes)
        elif isinstance(cause, BaseException):
            causes = Except.wrap(cause, stack_depth=1)
        else:
            causes = None
            Log.error("can only accept Exception, or list of exceptions")

        trace = exceptions.extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(type=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace)
        raise_from_none(e)
Пример #3
0
    def query(self, sql, param=None, stream=False, row_tuples=False):
        """
        RETURN LIST OF dicts
        """
        if not self.cursor:  # ALLOW NON-TRANSACTIONAL READS
            Log.error("must perform all queries inside a transaction")
        self._execute_backlog()

        try:
            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            if row_tuples:
                if stream:
                    result = self.cursor
                else:
                    result = wrap(list(self.cursor))
            else:
                columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
                if stream:
                    result = (wrap({c: utf8_to_unicode(v) for c, v in zip(columns, row)}) for row in self.cursor)
                else:
                    result = wrap([{c: utf8_to_unicode(v) for c, v in zip(columns, row)} for row in self.cursor])

            return result
        except Exception as e:
            e = Except.wrap(e)
            if "InterfaceError" in e:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
Пример #4
0
 def write(self, template, params):
     try:
         self.queue.add({"template": template, "params": params})
         return self
     except Exception as e:
         e = Except.wrap(e)
         raise e  # OH NO!
Пример #5
0
def utf82unicode(value):
    """
    WITH EXPLANATION FOR FAILURE
    """
    try:
        return value.decode("utf8")
    except Exception as e:
        if not _Log:
            _late_import()

        if not is_binary(value):
            _Log.error("Can not convert {{type}} to unicode because it's not bytes",  type= type(value).__name__)

        e = _Except.wrap(e)
        for i, c in enumerate(value):
            try:
                c.decode("utf8")
            except Exception as f:
                _Log.error("Can not convert charcode {{c}} in string index {{i}}", i=i, c=ord(c), cause=[e, _Except.wrap(f)])

        try:
            latin1 = text_type(value.decode("latin1"))
            _Log.error("Can not explain conversion failure, but seems to be latin1", e)
        except Exception:
            pass

        try:
            a = text_type(value.decode("latin1"))
            _Log.error("Can not explain conversion failure, but seems to be latin1", e)
        except Exception:
            pass

        _Log.error("Can not explain conversion failure of " + type(value).__name__ + "!", e)
Пример #6
0
    def warning(cls,
                template,
                default_params={},
                cause=None,
                stack_depth=0,
                log_context=None,
                **more_params):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        if "values" in more_params.keys():
            Log.error("Can not handle a logging parameter by name `values`")
        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.WARNING, template, params, cause, trace)
        Log.note("{{error|unicode}}",
                 error=e,
                 log_context=set_default({"context": exceptions.WARNING},
                                         log_context),
                 stack_depth=stack_depth + 1)
Пример #7
0
    def worker(please_stop):
        seen_problem = False
        while not please_stop:
            request_time = (time.time() - timer.START) / 60  # MINUTES
            try:
                response = requests.get(
                    "http://169.254.169.254/latest/meta-data/spot/termination-time"
                )
                seen_problem = False
                if response.status_code not in [400, 404]:
                    Log.alert("Shutdown AWS Spot Node {{name}} {{type}}",
                              name=machine_metadata.name,
                              type=machine_metadata.aws_instance_type)
                    please_stop.go()
            except Exception as e:
                e = Except.wrap(e)
                if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e:
                    Log.note(
                        "AWS Spot Detection has shutdown, this is probably not a spot node, (http://169.254.169.254 is unreachable)"
                    )
                    return
                elif seen_problem:
                    # IGNORE THE FIRST PROBLEM
                    Log.warning(
                        "AWS shutdown detection has more than one consecutive problem: (last request {{time|round(1)}} minutes since startup)",
                        time=request_time,
                        cause=e)
                seen_problem = True

                (Till(seconds=61) | please_stop).wait()
            (Till(seconds=11) | please_stop).wait()
Пример #8
0
    def _got_result(self, data, message):
        global count

        data = wrap(data)
        with count_locker:
            Log.note("{{count}} from {{exchange}}", count=count, exchange=self.pulse.exchange)
            data._meta.count = count
            data._meta.exchange = self.pulse.exchange
            count += 1

        if self.settings.debug:
            Log.note("{{data}}", data=data)
        if self.target_queue != None:
            try:
                self.target_queue.add(data)
                message.ack()
            except Exception as e:
                e = Except.wrap(e)
                if not self.target_queue.closed:  # EXPECTED TO HAPPEN, THIS THREAD MAY HAVE BEEN AWAY FOR A WHILE
                    raise e
        else:
            try:
                self.pulse_target(data)
                message.ack()
            except Exception as e:
                Log.warning("Problem processing pulse (see `data` in structured log)", data=data, cause=e)
Пример #9
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if 'json' in kwargs:
        kwargs['data'] = value2json(kwargs['json']).encode('utf8')
        del kwargs['json']
    elif 'data' in kwargs:
        kwargs['data'] = value2json(kwargs['data']).encode('utf8')
    else:
        Log.error(u"Expecting `json` parameter")
    response = post(url, **kwargs)
    details = json2value(response.content.decode('utf8'))
    if response.status_code not in [200, 201, 202]:

        if "template" in details:
            Log.error(u"Bad response code {{code}}",
                      code=response.status_code,
                      cause=Except.wrap(details))
        else:
            Log.error(u"Bad response code {{code}}\n{{details}}",
                      code=response.status_code,
                      details=details)
    else:
        return details
Пример #10
0
    def _got_result(self, data, message):
        global count

        data = wrap(data)
        with count_locker:
            Log.note("{{count}} from {{exchange}}",
                     count=count,
                     exchange=self.pulse.exchange)
            data._meta.count = count
            data._meta.exchange = self.pulse.exchange
            count += 1

        if self.settings.debug:
            Log.note("{{data}}", data=data)
        if self.target_queue != None:
            try:
                self.target_queue.add(data)
                message.ack()
            except Exception as e:
                e = Except.wrap(e)
                if not self.target_queue.closed:  # EXPECTED TO HAPPEN, THIS THREAD MAY HAVE BEEN AWAY FOR A WHILE
                    raise e
        else:
            try:
                self.pulse_target(data)
                message.ack()
            except Exception as e:
                Log.warning(
                    "Problem processing pulse (see `data` in structured log)",
                    data=data,
                    cause=e)
Пример #11
0
 def write(self, template, params):
     try:
         self.queue.add({"template": template, "params": params})
         return self
     except Exception as e:
         e = Except.wrap(e)
         raise e  # OH NO!
Пример #12
0
def wrap_function(cache_store, func_):
    attr_name = "_cache_for_" + func_.__name__

    if func_.func_code.co_argcount > 0 and func_.func_code.co_varnames[
            0] == "self":
        using_self = True
        func = lambda self, *args: func_(self, *args)
    else:
        using_self = False
        func = lambda self, *args: func_(*args)

    def output(*args):
        with cache_store.locker:
            if using_self:
                self = args[0]
                args = args[1:]
            else:
                self = cache_store

            now = Date.now()
            try:
                _cache = getattr(self, attr_name)
            except Exception, _:
                _cache = {}
                setattr(self, attr_name, _cache)

            if Random.int(100) == 0:
                # REMOVE OLD CACHE
                _cache = {
                    k: v
                    for k, v in _cache.items() if v[0] == None or v[0] > now
                }
                setattr(self, attr_name, _cache)

            timeout, key, value, exception = _cache.get(
                args, (Null, Null, Null, Null))

        if now >= timeout:
            value = func(self, *args)
            with cache_store.locker:
                _cache[args] = (now + cache_store.timeout, args, value, None)
            return value

        if value == None:
            if exception == None:
                try:
                    value = func(self, *args)
                    with cache_store.locker:
                        _cache[args] = (now + cache_store.timeout, args, value,
                                        None)
                    return value
                except Exception, e:
                    e = Except.wrap(e)
                    with cache_store.locker:
                        _cache[args] = (now + cache_store.timeout, args, None,
                                        e)
                    raise e
            else:
                raise exception
Пример #13
0
    def _get_from_elasticsearch(self, revision, locale=None, get_diff=False):
        rev = revision.changeset.id
        if self.es.cluster.version.startswith("1.7."):
            query = {
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": {"and": [
                        {"term": {"changeset.id12": rev[0:12]}},
                        {"term": {"branch.name": revision.branch.name}},
                        {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}},
                        {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                    ]}
                }},
                "size": 2000
            }
        else:
            query = {
                "query": {"bool": {"must": [
                    {"term": {"changeset.id12": rev[0:12]}},
                    {"term": {"branch.name": revision.branch.name}},
                    {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}},
                    {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                ]}},
                "size": 2000
            }

        for attempt in range(3):
            try:
                with self.es_locker:
                    docs = self.es.search(query).hits.hits
                break
            except Exception as e:
                e = Except.wrap(e)
                if "NodeNotConnectedException" in e:
                    # WE LOST A NODE, THIS MAY TAKE A WHILE
                    (Till(seconds=Random.int(5 * 60))).wait()
                    continue
                elif "EsRejectedExecutionException[rejected execution (queue capacity" in e:
                    (Till(seconds=Random.int(30))).wait()
                    continue
                else:
                    Log.warning("Bad ES call, fall back to TH", cause=e)
                    return None

        best = docs[0]._source
        if len(docs) > 1:
            for d in docs:
                if d._id.endswith(d._source.branch.locale):
                    best = d._source
            Log.warning("expecting no more than one document")

        if not GET_DIFF and not get_diff:
            return best
        elif best.changeset.diff:
            return best
        elif not best.changeset.files:
            return best  # NOT EXPECTING A DIFF, RETURN IT ANYWAY
        else:
            return None
 def write(self, template, params):
     try:
         params.template = strings.limit(params.template, 2000)
         params.format = None
         self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60)
     except Exception as e:
         sys.stdout.write(text_type(Except.wrap(e)))
     return self
Пример #15
0
    def _get_queue(self, row):
        row = wrap(row)
        if row.json:
            row.value, row.json = json2value(row.json), None
        timestamp = Date(self.rollover_field(row.value))
        if timestamp == None:
            return Null
        elif timestamp < Date.today() - self.rollover_max:
            return DATA_TOO_OLD

        rounded_timestamp = timestamp.floor(self.rollover_interval)
        with self.locker:
            queue = self.known_queues.get(rounded_timestamp.unix)
        if queue == None:
            candidates = sort_using_key(
                filter(
                    lambda r: re.match(
                        re.escape(self.settings.index) + r"\d\d\d\d\d\d\d\d_\d\d\d\d\d\d$",
                        r['index']
                    ),
                    self.cluster.get_aliases()
                ),
                key=lambda r: r['index']
            )
            best = None
            for c in candidates:
                c = wrap(c)
                c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT)
                if timestamp > c.date:
                    best = c
            if not best or rounded_timestamp > best.date:
                if rounded_timestamp < wrap(candidates[-1]).date:
                    es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)
                else:
                    try:
                        es = self.cluster.create_index(create_timestamp=rounded_timestamp, kwargs=self.settings)
                        es.add_alias(self.settings.index)
                    except Exception as e:
                        e = Except.wrap(e)
                        if "IndexAlreadyExistsException" not in e:
                            Log.error("Problem creating index", cause=e)
                        return self._get_queue(row)  # TRY AGAIN
            else:
                es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)

            def refresh(please_stop):
                try:
                    es.set_refresh_interval(seconds=60 * 10, timeout=5)
                except Exception:
                    Log.note("Could not set refresh interval for {{index}}", index=es.settings.index)

            Thread.run("refresh", refresh)

            self._delete_old_indexes(candidates)
            threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
            with self.locker:
                queue = self.known_queues[rounded_timestamp.unix] = threaded_queue
        return queue
Пример #16
0
    def output(*args, **kwargs):
        if kwargs:
            Log.error(
                "Sorry, caching only works with ordered parameter, not keyword arguments"
            )

        with cache_store.locker:
            if using_self:
                self = args[0]
                args = args[1:]
            else:
                self = cache_store

            now = Date.now()
            try:
                _cache = getattr(self, attr_name)
            except Exception:
                _cache = {}
                setattr(self, attr_name, _cache)

            if Random.int(100) == 0:
                # REMOVE OLD CACHE
                _cache = {
                    k: v
                    for k, v in _cache.items()
                    if v.timeout == None or v.timeout > now
                }
                setattr(self, attr_name, _cache)

            timeout, key, value, exception = _cache.get(
                args, (Null, Null, Null, Null))

        if now >= timeout:
            value = func(self, *args)
            with cache_store.locker:
                _cache[args] = CacheElement(now + cache_store.timeout, args,
                                            value, None)
            return value

        if value == None:
            if exception == None:
                try:
                    value = func(self, *args)
                    with cache_store.locker:
                        _cache[args] = CacheElement(now + cache_store.timeout,
                                                    args, value, None)
                    return value
                except Exception as e:
                    e = Except.wrap(e)
                    with cache_store.locker:
                        _cache[args] = CacheElement(now + cache_store.timeout,
                                                    args, None, e)
                    raise e
            else:
                raise exception
        else:
            return value
Пример #17
0
 def output(*args, **kwargs):
     while True:
         try:
             return func(*args, **kwargs)
         except Exception as e:
             e = Except.wrap(e)
             if "Request limit exceeded" in e:
                 Log.warning("AWS Problem", cause=e)
                 continue
             else:
                 Log.error("Problem with call to AWS", cause=e)
Пример #18
0
 def output(*args, **kwargs):
     while True:
         try:
             return func(*args, **kwargs)
         except Exception as e:
             e = Except.wrap(e)
             if "Request limit exceeded" in e:
                 Log.warning("AWS Problem", cause=e)
                 continue
             else:
                 Log.error("Problem with call to AWS", cause=e)
Пример #19
0
def wrap_function(cache_store, func_):
    attr_name = "_cache_for_" + func_.__name__

    if func_.func_code.co_argcount > 0 and func_.func_code.co_varnames[0] == "self":
        using_self = True
        func = lambda self, *args: func_(self, *args)
    else:
        using_self = False
        func = lambda self, *args: func_(*args)

    def output(*args):
        with cache_store.locker:
            if using_self:
                self = args[0]
                args = args[1:]
            else:
                self = cache_store

            now = Date.now()
            try:
                _cache = getattr(self, attr_name)
            except Exception, _:
                _cache = {}
                setattr(self, attr_name, _cache)

            if Random.int(100) == 0:
                # REMOVE OLD CACHE
                _cache = {k: v for k, v in _cache.items() if v[0]==None or v[0] > now}
                setattr(self, attr_name, _cache)

            timeout, key, value, exception = _cache.get(args, (Null, Null, Null, Null))

        if now >= timeout:
            value = func(self, *args)
            with cache_store.locker:
                _cache[args] = (now + cache_store.timeout, args, value, None)
            return value

        if value == None:
            if exception == None:
                try:
                    value = func(self, *args)
                    with cache_store.locker:
                        _cache[args] = (now + cache_store.timeout, args, value, None)
                    return value
                except Exception as e:
                    e = Except.wrap(e)
                    with cache_store.locker:
                        _cache[args] = (now + cache_store.timeout, args, None, e)
                    raise e
            else:
                raise exception
        else:
            return value
Пример #20
0
    def _kill(self):
        try:
            self.service.kill()
        except Exception as e:
            ee = Except.wrap(e)
            if 'The operation completed successfully' in ee:
                return
            if 'No such process' in ee:
                return

            Log.warning("Failure to kill process {{process|quote}}", process=self.name, cause=ee)
Пример #21
0
    def _kill(self):
        try:
            self.service.kill()
            Log.note("Service was successfully terminated.")
        except Exception as e:
            ee = Except.wrap(e)
            if 'The operation completed successfully' in ee:
                return
            if 'No such process' in ee:
                return

            Log.warning("Failure to kill process {{process|quote}}", process=self.name, cause=ee)
Пример #22
0
        def worker_bee(please_stop):
            def stopper():
                self.add(THREAD_STOP)

            please_stop.on_go(stopper)

            _buffer = []
            _post_push_functions = []
            now = time()
            next_push = Till(till=now + period)  # THE TIME WE SHOULD DO A PUSH
            last_push = now - period

            def push_to_queue():
                queue.extend(_buffer)
                del _buffer[:]
                for f in _post_push_functions:
                    f()
                del _post_push_functions[:]

            while not please_stop:
                try:
                    if not _buffer:
                        item = self.pop()
                        now = time()
                        if now > last_push + period:
                            # _Log.note("delay next push")
                            next_push = Till(till=now + period)
                    else:
                        item = self.pop(till=next_push)
                        now = time()

                    if item is THREAD_STOP:
                        push_to_queue()
                        please_stop.go()
                        break
                    elif isinstance(item, types.FunctionType):
                        _post_push_functions.append(item)
                    elif item is not None:
                        _buffer.append(item)

                except Exception, e:
                    e = _Except.wrap(e)
                    if error_target:
                        try:
                            error_target(e, _buffer)
                        except Exception, f:
                            _Log.warning(
                                "`error_target` should not throw, just deal",
                                name=name,
                                cause=f)
                    else:
                        _Log.warning("Unexpected problem", name=name, cause=e)
Пример #23
0
    def _get_from_elasticsearch(self, revision, locale=None, get_diff=False, get_moves=True):
        rev = revision.changeset.id
        if self.es.cluster.version.startswith("1.7."):
            query = {
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": {"and": [
                        {"term": {"changeset.id12": rev[0:12]}},
                        {"term": {"branch.name": revision.branch.name}},
                        {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}},
                        {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                    ]}
                }},
                "size": 20
            }
        else:
            query = {
                "query": {"bool": {"must": [
                    {"term": {"changeset.id12": rev[0:12]}},
                    {"term": {"branch.name": revision.branch.name}},
                    {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}},
                    {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                ]}},
                "size": 20
            }

        for attempt in range(3):
            try:
                with self.es_locker:
                    docs = self.es.search(query).hits.hits
                if len(docs) == 0:
                    return None
                best = docs[0]._source
                if len(docs) > 1:
                    for d in docs:
                        if d._id.endswith(d._source.branch.locale):
                            best = d._source
                    Log.warning("expecting no more than one document")
                return best
            except Exception as e:
                e = Except.wrap(e)
                if "EsRejectedExecutionException[rejected execution (queue capacity" in e:
                    (Till(seconds=Random.int(30))).wait()
                    continue
                else:
                    Log.warning("Bad ES call, waiting for {{num}} seconds", num=WAIT_AFTER_NODE_FAILURE, cause=e)
                    Till(seconds=WAIT_AFTER_NODE_FAILURE).wait()
                    continue

        Log.warning("ES did not deliver, fall back to HG")
        return None
Пример #24
0
    def write(self, template, params):
        bad = []
        for m in self.many:
            try:
                m.write(template, params)
            except Exception as e:
                e = Except.wrap(e)
                bad.append(m)
                Log.warning("Logger {{type|quote}} failed! It will be removed.", type=m.__class__.__name__, cause=e)
        with suppress_exception:
            for b in bad:
                self.many.remove(b)

        return self
Пример #25
0
    def output(*args):
        with cache_store.locker:
            if using_self:
                self = args[0]
                args = args[1:]
            else:
                self = cache_store

            now = Date.now()
            try:
                _cache = getattr(self, attr_name)
            except Exception:
                _cache = {}
                setattr(self, attr_name, _cache)

            if Random.int(100) == 0:
                # REMOVE OLD CACHE
                _cache = {
                    k: v
                    for k, v in _cache.items() if v[0] == None or v[0] > now
                }
                setattr(self, attr_name, _cache)

            timeout, key, value, exception = _cache.get(
                args, (Null, Null, Null, Null))

        if now >= timeout:
            value = func(self, *args)
            with cache_store.locker:
                _cache[args] = (now + cache_store.timeout, args, value, None)
            return value

        if value == None:
            if exception == None:
                try:
                    value = func(self, *args)
                    with cache_store.locker:
                        _cache[args] = (now + cache_store.timeout, args, value,
                                        None)
                    return value
                except Exception as e:
                    e = Except.wrap(e)
                    with cache_store.locker:
                        _cache[args] = (now + cache_store.timeout, args, None,
                                        e)
                    raise e
            else:
                raise exception
        else:
            return value
Пример #26
0
 def begin(self):
     if self.transaction_level == 0:
         self.cursor = self.db.cursor()
     self.transaction_level += 1
     self.execute("SET TIME_ZONE='+00:00'")
     if EXECUTE_TIMEOUT:
         try:
             self.execute("SET MAX_EXECUTION_TIME=" + text_type(EXECUTE_TIMEOUT))
             self._execute_backlog()
         except Exception as e:
             e = Except.wrap(e)
             if "Unknown system variable 'MAX_EXECUTION_TIME'" in e:
                 globals()['EXECUTE_TIMEOUT'] = 0  # THIS VERSION OF MYSQL DOES NOT HAVE SESSION LEVEL VARIABLE
             else:
                 raise e
Пример #27
0
    def _get_queue(self, row):
        row = wrap(row)
        if row.json:
            row.value, row.json = json2value(row.json), None
        timestamp = Date(self.rollover_field(row.value))
        if timestamp == None:
            return Null
        elif timestamp < Date.today() - self.rollover_max:
            return DATA_TOO_OLD

        rounded_timestamp = timestamp.floor(self.rollover_interval)
        with self.locker:
            queue = self.known_queues.get(rounded_timestamp.unix)
        if queue == None:
            candidates = jx.run({
                "from": ListContainer(".", self.cluster.get_aliases()),
                "where": {"regex": {"index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"}},
                "sort": "index"
            })
            best = None
            for c in candidates:
                c = wrap(c)
                c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT)
                if timestamp > c.date:
                    best = c
            if not best or rounded_timestamp > best.date:
                if rounded_timestamp < wrap(candidates[-1]).date:
                    es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)
                else:
                    try:
                        es = self.cluster.create_index(create_timestamp=rounded_timestamp, kwargs=self.settings)
                        es.add_alias(self.settings.index)
                    except Exception as e:
                        e = Except.wrap(e)
                        if "IndexAlreadyExistsException" not in e:
                            Log.error("Problem creating index", cause=e)
                        return self._get_queue(row)  # TRY AGAIN
            else:
                es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)

            with suppress_exception:
                es.set_refresh_interval(seconds=60 * 5, timeout=5)

            self._delete_old_indexes(candidates)
            threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
            with self.locker:
                queue = self.known_queues[rounded_timestamp.unix] = threaded_queue
        return queue
Пример #28
0
    def _get_queue(self, row):
        row = wrap(row)
        if row.json:
            row.value, row.json = json2value(row.json), None
        timestamp = Date(self.rollover_field(row.value))
        if timestamp == None:
            return Null
        elif timestamp < Date.today() - self.rollover_max:
            return DATA_TOO_OLD

        rounded_timestamp = timestamp.floor(self.rollover_interval)
        with self.locker:
            queue = self.known_queues.get(rounded_timestamp.unix)
        if queue == None:
            candidates = jx.run({
                "from": ListContainer(".", self.cluster.get_aliases()),
                "where": {"regex": {"index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"}},
                "sort": "index"
            })
            best = None
            for c in candidates:
                c = wrap(c)
                c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT)
                if timestamp > c.date:
                    best = c
            if not best or rounded_timestamp > best.date:
                if rounded_timestamp < wrap(candidates[-1]).date:
                    es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)
                else:
                    try:
                        es = self.cluster.create_index(create_timestamp=rounded_timestamp, kwargs=self.settings)
                        es.add_alias(self.settings.index)
                    except Exception as e:
                        e = Except.wrap(e)
                        if "IndexAlreadyExistsException" not in e:
                            Log.error("Problem creating index", cause=e)
                        return self._get_queue(row)  # TRY AGAIN
            else:
                es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)

            with suppress_exception:
                es.set_refresh_interval(seconds=60 * 5, timeout=5)

            self._delete_old_indexes(candidates)
            threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
            with self.locker:
                queue = self.known_queues[rounded_timestamp.unix] = threaded_queue
        return queue
Пример #29
0
 def _worker(self, please_stop):
     try:
         # MAIN EXECUTION LOOP
         while not please_stop:
             command_item = self.queue.pop(till=please_stop)
             if command_item is None:
                 break
             self._process_command_item(command_item)
     except Exception as e:
         e = Except.wrap(e)
         if not please_stop:
             Log.warning("Problem with sql", cause=e)
     finally:
         self.closed = True
         DEBUG and Log.note("Database is closed")
         self.db.close()
Пример #30
0
 def worker(please_stop):
     while not please_stop:
         try:
             response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time")
             if response.status_code not in [400, 404]:
                 Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type)
                 please_stop.go()
         except Exception as e:
             e = Except.wrap(e)
             if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e:
                 Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)")
                 return
             else:
                 Log.warning("AWS shutdown detection has problems", cause=e)
             (Till(seconds=61) | please_stop).wait()
         (Till(seconds=11) | please_stop).wait()
Пример #31
0
 def begin(self):
     if self.transaction_level == 0:
         self.cursor = self.db.cursor()
     self.transaction_level += 1
     self.execute("SET TIME_ZONE='+00:00'")
     if EXECUTE_TIMEOUT:
         try:
             self.execute("SET MAX_EXECUTION_TIME=" + text(EXECUTE_TIMEOUT))
             self._execute_backlog()
         except Exception as e:
             e = Except.wrap(e)
             if "Unknown system variable 'MAX_EXECUTION_TIME'" in e:
                 globals(
                 )['EXECUTE_TIMEOUT'] = 0  # THIS VERSION OF MYSQL DOES NOT HAVE SESSION LEVEL VARIABLE
             else:
                 raise e
Пример #32
0
    def output(*args, **kwargs):
        if kwargs:
            Log.error("Sorry, caching only works with ordered parameter, not keyword arguments")

        with cache_store.locker:
            if using_self:
                self = args[0]
                args = args[1:]
            else:
                self = cache_store

            now = Date.now()
            try:
                _cache = getattr(self, attr_name)
            except Exception:
                _cache = {}
                setattr(self, attr_name, _cache)

            if Random.int(100) == 0:
                # REMOVE OLD CACHE
                _cache = {k: v for k, v in _cache.items() if v.timeout == None or v.timeout > now}
                setattr(self, attr_name, _cache)

            timeout, key, value, exception = _cache.get(args, (Null, Null, Null, Null))

        if now >= timeout:
            value = func(self, *args)
            with cache_store.locker:
                _cache[args] = CacheElement(now + cache_store.timeout, args, value, None)
            return value

        if value == None:
            if exception == None:
                try:
                    value = func(self, *args)
                    with cache_store.locker:
                        _cache[args] = CacheElement(now + cache_store.timeout, args, value, None)
                    return value
                except Exception as e:
                    e = Except.wrap(e)
                    with cache_store.locker:
                        _cache[args] = CacheElement(now + cache_store.timeout, args, None, e)
                    raise e
            else:
                raise exception
        else:
            return value
Пример #33
0
    def fatal(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        SEND TO STDERR

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.ERROR, template, params, cause, trace)
        str_e = unicode(e)

        error_mode = cls.error_mode
        with suppress_exception:
            if not error_mode:
                cls.error_mode = True
                Log.note(
                    "{{error|unicode}}",
                    error=e,
                    log_context=set_default({"context": exceptions.FATAL}, log_context),
                    stack_depth=stack_depth + 1
                )
        cls.error_mode = error_mode

        sys.stderr.write(str_e.encode('utf8'))
Пример #34
0
    def query(self, sql, param=None, stream=False, row_tuples=False):
        """
        RETURN LIST OF dicts
        """
        if not self.cursor:  # ALLOW NON-TRANSACTIONAL READS
            Log.error("must perform all queries inside a transaction")
        self._execute_backlog()

        try:
            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            if row_tuples:
                if stream:
                    result = self.cursor
                else:
                    result = wrap(list(self.cursor))
            else:
                columns = [
                    utf8_to_unicode(d[0])
                    for d in coalesce(self.cursor.description, [])
                ]
                if stream:
                    result = (wrap(
                        {c: utf8_to_unicode(v)
                         for c, v in zip(columns, row)})
                              for row in self.cursor)
                else:
                    result = wrap(
                        [{c: utf8_to_unicode(v)
                          for c, v in zip(columns, row)}
                         for row in self.cursor])

            return result
        except Exception as e:
            e = Except.wrap(e)
            if "InterfaceError" in e:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",
                      sql=sql,
                      cause=e,
                      stack_depth=1)
Пример #35
0
 def _iter():
     g = 0
     out = FlatList()
     try:
         for i, d in enumerate(data):
             out.append(d)
             if (i + 1) % max_size == 0:
                 yield g, out
                 g += 1
                 out = FlatList()
         if out:
             yield g, out
     except Exception, e:
         e = Except.wrap(e)
         if out:
             # AT LEAST TRY TO RETURN WHAT HAS BEEN PROCESSED SO FAR
             yield g, out
         Log.error("Problem inside jx.groupby", e)
Пример #36
0
 def _iter():
     g = 0
     out = FlatList()
     try:
         for i, d in enumerate(data):
             out.append(d)
             if (i + 1) % max_size == 0:
                 yield g, out
                 g += 1
                 out = FlatList()
         if out:
             yield g, out
     except Exception as e:
         e = Except.wrap(e)
         if out:
             # AT LEAST TRY TO RETURN WHAT HAS BEEN PROCESSED SO FAR
             yield g, out
         Log.error("Problem inside jx.groupby", e)
Пример #37
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if 'json' in kwargs:
        kwargs['data'] = unicode2utf8(value2json(kwargs['json']))
        del kwargs['json']
    elif 'data' in kwargs:
        kwargs['data'] = unicode2utf8(value2json(kwargs['data']))
    else:
        Log.error(u"Expecting `json` parameter")
    response = post(url, **kwargs)
    details = json2value(utf82unicode(response.content))
    if response.status_code not in [200, 201]:
        Log.error(u"Bad response code {{code}}",
                  code=response.status_code,
                  cause=Except.wrap(details))
    else:
        return details
Пример #38
0
    def assertRaises(self, problem, function, *args, **kwargs):
        try:
            function(*args, **kwargs)
        except Exception as e:
            f = Except.wrap(e)
            if isinstance(problem, basestring):
                if problem in f:
                    return
                Log.error(
                    "expecting an exception returning {{problem|quote}} got something else instead",
                    problem=problem,
                    cause=f
                )
            elif not isinstance(f, problem) and not isinstance(e, problem):
                Log.error("expecting an exception of type {{type}} to be raised", type=problem)
            else:
                return

        Log.error("Expecting an exception to be raised")
Пример #39
0
    def assertRaises(self, problem, function, *args, **kwargs):
        try:
            function(*args, **kwargs)
        except Exception as e:
            f = Except.wrap(e)
            if isinstance(problem, basestring):
                if problem in f:
                    return
                Log.error(
                    "expecting an exception returning {{problem|quote}} got something else instead",
                    problem=problem,
                    cause=f)
            elif not isinstance(f, problem) and not isinstance(e, problem):
                Log.error(
                    "expecting an exception of type {{type}} to be raised",
                    type=problem)
            else:
                return

        Log.error("Expecting an exception to be raised")
Пример #40
0
    def close(self):
        if self.transaction_level > 0:
            if self.readonly:
                self.commit()  # AUTO-COMMIT
            else:
                Log.error("expecting commit() or rollback() before close")
        self.cursor = None  # NOT NEEDED
        try:
            self.db.close()
        except Exception as e:
            e = Except.wrap(e)
            if "Already closed" in e:
                return

            Log.warning("can not close()", e)
        finally:
            try:
                all_db.remove(self)
            except Exception as e:
                Log.error("not expected", cause=e)
Пример #41
0
    def close(self):
        if self.transaction_level > 0:
            if self.readonly:
                self.commit()  # AUTO-COMMIT
            else:
                Log.error("expecting commit() or rollback() before close")
        self.cursor = None  # NOT NEEDED
        try:
            self.db.close()
        except Exception as e:
            e = Except.wrap(e)
            if "Already closed" in e:
                return

            Log.warning("can not close()", e)
        finally:
            try:
                all_db.remove(self)
            except Exception as e:
                Log.error("not expected", cause=e)
Пример #42
0
    def query(self, _query):
        try:
            query = QueryOp.wrap(_query, schema=self)

            for n in self.namespaces:
                query = n.convert(query)
            if self.typed:
                query = Typed().convert(query)

            for s in listwrap(query.select):
                if not aggregates1_4.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate
                    )

            frum = query["from"]
            if isinstance(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_deepop(self._es, query):
                return es_deepop(self._es, query)
            if is_aggsop(self._es, query):
                return es_aggsop(self._es, frum, query)
            if is_setop(self._es, query):
                return es_setop(self._es, query)
            if es09_setop.is_setop(query):
                return es09_setop.es_setop(self._es, None, query)
            if es09_aggop.is_aggop(query):
                return es09_aggop.es_aggop(self._es, None, query)
            Log.error("Can not handle")
        except Exception as e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self._es.cluster.path+"/_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)
Пример #43
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if b"json" in kwargs:
        kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"json"]))
    elif b'data' in kwargs:
        kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"data"]))
    else:
        Log.error("Expecting `json` parameter")

    response = post(url, **kwargs)
    c = response.content
    try:
        details = mo_json.json2value(convert.utf82unicode(c))
    except Exception as e:
        Log.error("Unexpected return value {{content}}", content=c, cause=e)

    if response.status_code not in [200, 201]:
        Log.error("Bad response", cause=Except.wrap(details))

    return details
Пример #44
0
    def warning(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        timestamp = datetime.utcnow()
        if not is_text(template):
            Log.error("Log.warning was expecting a unicode template")

        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        if "values" in more_params.keys():
            Log.error("Can not handle a logging parameter by name `values`")

        params = Data(dict(default_params, **more_params))
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.WARNING, template=template, params=params, cause=cause, trace=trace)
        Log._annotate(
            e,
            timestamp,
            stack_depth+1
        )
Пример #45
0
    def worker(please_stop):
        seen_problem = False
        while not please_stop:
            request_time = (time.time() - timer.START)/60  # MINUTES
            try:
                response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time")
                seen_problem = False
                if response.status_code not in [400, 404]:
                    Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type)
                    please_stop.go()
            except Exception as e:
                e = Except.wrap(e)
                if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e:
                    Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)")
                    return
                elif seen_problem:
                    # IGNORE THE FIRST PROBLEM
                    Log.warning("AWS shutdown detection has more than one consecutive problem: (last request {{time|round(1)}} minutes since startup)", time=request_time, cause=e)
                seen_problem = True

                (Till(seconds=61) | please_stop).wait()
            (Till(seconds=11) | please_stop).wait()
Пример #46
0
    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if not isinstance(template, unicode):
            sys.stderr.write("Log.error was expecting a unicode template")
            Log.error("Log.error was expecting a unicode template")

        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)]))
        trace = exceptions.extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(exceptions.ERROR, template, params, cause, trace)
        raise e
Пример #47
0
    def _update_cardinality(self, column):
        """
        QUERY ES TO FIND CARDINALITY AND PARTITIONS FOR A SIMPLE COLUMN
        """
        if column.es_index in self.index_does_not_exist:
            return

        if column.jx_type in STRUCT:
            Log.error("not supported")
        try:
            if column.es_index == "meta.columns":
                partitions = jx.sort([g[column.es_column] for g, _ in jx.groupby(self.meta.columns, column.es_column) if g[column.es_column] != None])
                self.meta.columns.update({
                    "set": {
                        "partitions": partitions,
                        "count": len(self.meta.columns),
                        "cardinality": len(partitions),
                        "multi": 1,
                        "last_updated": Date.now()
                    },
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                return
            if column.es_index == "meta.tables":
                partitions = jx.sort([g[column.es_column] for g, _ in jx.groupby(self.meta.tables, column.es_column) if g[column.es_column] != None])
                self.meta.columns.update({
                    "set": {
                        "partitions": partitions,
                        "count": len(self.meta.tables),
                        "cardinality": len(partitions),
                        "multi": 1,
                        "last_updated": Date.now()
                    },
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                return

            es_index = column.es_index.split(".")[0]

            is_text = [cc for cc in self.meta.columns if cc.es_column == column.es_column and cc.es_type == "text"]
            if is_text:
                # text IS A MULTIVALUE STRING THAT CAN ONLY BE FILTERED
                result = self.es_cluster.post("/" + es_index + "/_search", data={
                    "aggs": {
                        "count": {"filter": {"match_all": {}}}
                    },
                    "size": 0
                })
                count = result.hits.total
                cardinality = max(1001, count)
                multi = 1001
            elif column.es_column == "_id":
                result = self.es_cluster.post("/" + es_index + "/_search", data={
                    "query": {"match_all": {}},
                    "size": 0
                })
                count = cardinality = result.hits.total
                multi = 1
            elif column.es_type == BOOLEAN:
                result = self.es_cluster.post("/" + es_index + "/_search", data={
                    "aggs": {
                        "count": _counting_query(column)
                    },
                    "size": 0
                })
                count = result.hits.total
                cardinality = 2
                multi = 1
            else:
                result = self.es_cluster.post("/" + es_index + "/_search", data={
                    "aggs": {
                        "count": _counting_query(column),
                        "multi": {"max": {"script": "doc[" + quote(column.es_column) + "].values.size()"}}
                    },
                    "size": 0
                })
                agg_results = result.aggregations
                count = result.hits.total
                cardinality = coalesce(agg_results.count.value, agg_results.count._nested.value, agg_results.count.doc_count)
                multi = int(coalesce(agg_results.multi.value, 1))
                if cardinality == None:
                   Log.error("logic error")

            query = Data(size=0)

            if column.es_column == "_id":
                self.meta.columns.update({
                    "set": {
                        "count": cardinality,
                        "cardinality": cardinality,
                        "multi": 1,
                        "last_updated": Date.now()
                    },
                    "clear": ["partitions"],
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                return
            elif cardinality > 1000 or (count >= 30 and cardinality == count) or (count >= 1000 and cardinality / count > 0.99):
                DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality)
                self.meta.columns.update({
                    "set": {
                        "count": count,
                        "cardinality": cardinality,
                        "multi": multi,
                        "last_updated": Date.now()
                    },
                    "clear": ["partitions"],
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                return
            elif column.es_type in elasticsearch.ES_NUMERIC_TYPES and cardinality > 30:
                DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality)
                self.meta.columns.update({
                    "set": {
                        "count": count,
                        "cardinality": cardinality,
                        "multi": multi,
                        "last_updated": Date.now()
                    },
                    "clear": ["partitions"],
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                return
            elif len(column.nested_path) != 1:
                query.aggs["_"] = {
                    "nested": {"path": column.nested_path[0]},
                    "aggs": {"_nested": {"terms": {"field": column.es_column}}}
                }
            elif cardinality == 0:
                query.aggs["_"] = {"terms": {"field": column.es_column}}
            else:
                query.aggs["_"] = {"terms": {"field": column.es_column, "size": cardinality}}

            result = self.es_cluster.post("/" + es_index + "/_search", data=query)

            aggs = result.aggregations._
            if aggs._nested:
                parts = jx.sort(aggs._nested.buckets.key)
            else:
                parts = jx.sort(aggs.buckets.key)

            self.meta.columns.update({
                "set": {
                    "count": count,
                    "cardinality": cardinality,
                    "multi": multi,
                    "partitions": parts,
                    "last_updated": Date.now()
                },
                "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
            })
        except Exception as e:
            # CAN NOT IMPORT: THE TEST MODULES SETS UP LOGGING
            # from tests.test_jx import TEST_TABLE
            e = Except.wrap(e)
            TEST_TABLE = "testdata"
            is_missing_index = any(w in e for w in ["IndexMissingException", "index_not_found_exception"])
            is_test_table = column.es_index.startswith((TEST_TABLE_PREFIX, TEST_TABLE))
            if is_missing_index and is_test_table:
                # WE EXPECT TEST TABLES TO DISAPPEAR
                self.meta.columns.update({
                    "clear": ".",
                    "where": {"eq": {"es_index": column.es_index}}
                })
                self.index_does_not_exist.add(column.es_index)
            else:
                self.meta.columns.update({
                    "set": {
                        "last_updated": Date.now()
                    },
                    "clear": [
                        "count",
                        "cardinality",
                        "multi",
                        "partitions",
                    ],
                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                })
                Log.warning("Could not get {{col.es_index}}.{{col.es_column}} info", col=column, cause=e)
Пример #48
0
def request(method, url, zip=None, retry=None, **kwargs):
    """
    JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES
    DEMANDS data IS ONE OF:
    * A JSON-SERIALIZABLE STRUCTURE, OR
    * LIST OF JSON-SERIALIZABLE STRUCTURES, OR
    * None

    Parameters
     * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH
     * json - JSON-SERIALIZABLE STRUCTURE
     * retry - {"times": x, "sleep": y} STRUCTURE

    THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT**
    IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH
    INCLUDES url AND headers
    """
    global _warning_sent
    if not default_headers and not _warning_sent:
        _warning_sent = True
        Log.warning(
            "The pyLibrary.env.http module was meant to add extra "
            "default headers to all requests, specifically the 'Referer' "
            "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` "
            "function to set `pyLibrary.env.http.default_headers`"
        )

    if isinstance(url, list):
        # TRY MANY URLS
        failures = []
        for remaining, u in jx.countdown(url):
            try:
                response = request(method, u, zip=zip, retry=retry, **kwargs)
                if Math.round(response.status_code, decimal=-2) not in [400, 500]:
                    return response
                if not remaining:
                    return response
            except Exception as e:
                e = Except.wrap(e)
                failures.append(e)
        Log.error("Tried {{num}} urls", num=len(url), cause=failures)

    if b"session" in kwargs:
        session = kwargs[b"session"]
        del kwargs[b"session"]
    else:
        session = sessions.Session()
    session.headers.update(default_headers)

    if zip is None:
        zip = ZIP_REQUEST

    if isinstance(url, unicode):
        # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
        url = url.encode("ascii")

    _to_ascii_dict(kwargs)
    timeout = kwargs[b'timeout'] = coalesce(kwargs.get(b'timeout'), default_timeout)

    if retry == None:
        retry = Data(times=1, sleep=0)
    elif isinstance(retry, Number):
        retry = Data(times=retry, sleep=1)
    else:
        retry = wrap(retry)
        if isinstance(retry.sleep, Duration):
            retry.sleep = retry.sleep.seconds
        set_default(retry, {"times": 1, "sleep": 0})

    if b'json' in kwargs:
        kwargs[b'data'] = convert.value2json(kwargs[b'json']).encode("utf8")
        del kwargs[b'json']

    try:
        headers = kwargs[b"headers"] = unwrap(coalesce(wrap(kwargs)[b"headers"], {}))
        set_default(headers, {b"accept-encoding": b"compress, gzip"})

        if zip and len(coalesce(kwargs.get(b"data"))) > 1000:
            compressed = convert.bytes2zip(kwargs[b"data"])
            headers[b'content-encoding'] = b'gzip'
            kwargs[b"data"] = compressed

            _to_ascii_dict(headers)
        else:
            _to_ascii_dict(headers)
    except Exception as e:
        Log.error("Request setup failure on {{url}}", url=url, cause=e)

    errors = []
    for r in range(retry.times):
        if r:
            Till(seconds=retry.sleep).wait()

        try:
            if DEBUG:
                Log.note("http {{method}} to {{url}}", method=method, url=url)
            return session.request(method=method, url=url, **kwargs)
        except Exception as e:
            errors.append(Except.wrap(e))

    if " Read timed out." in errors[0]:
        Log.error("Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=timeout, times=retry.times, cause=errors[0])
    else:
        Log.error("Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
Пример #49
0
    def _normalize_revision(self, r, found_revision, push, get_diff, get_moves):
        new_names = set(r.keys()) - KNOWN_TAGS
        if new_names and not r.tags:
            Log.warning(
                "hg is returning new property names {{names|quote}} for {{changeset}} from {{url}}",
                names=new_names,
                changeset=r.node,
                url=found_revision.branch.url
            )

        changeset = Changeset(
            id=r.node,
            id12=r.node[0:12],
            author=r.user,
            description=strings.limit(coalesce(r.description, r.desc), 2000),
            date=parse_hg_date(r.date),
            files=r.files,
            backedoutby=r.backedoutby if r.backedoutby else None,
            bug=self._extract_bug_id(r.description)
        )
        rev = Revision(
            branch=found_revision.branch,
            index=r.rev,
            changeset=changeset,
            parents=unwraplist(list(set(r.parents))),
            children=unwraplist(list(set(r.children))),
            push=push,
            phase=r.phase,
            bookmarks=unwraplist(r.bookmarks),
            landingsystem=r.landingsystem,
            etl={"timestamp": Date.now().unix, "machine": machine_metadata}
        )

        r.pushuser = None
        r.pushdate = None
        r.pushid = None
        r.node = None
        r.user = None
        r.desc = None
        r.description = None
        r.date = None
        r.files = None
        r.backedoutby = None
        r.parents = None
        r.children = None
        r.bookmarks = None
        r.landingsystem = None

        set_default(rev, r)

        # ADD THE DIFF
        if get_diff:
            rev.changeset.diff = self._get_json_diff_from_hg(rev)
        if get_moves:
            rev.changeset.moves = self._get_moves_from_hg(rev)

        try:
            _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE)
            with self.es_locker:
                self.es.add({"id": _id, "value": rev})
        except Exception as e:
            e = Except.wrap(e)
            Log.warning("Did not save to ES, waiting {{duration}} seconds", duration=WAIT_AFTER_NODE_FAILURE, cause=e)
            Till(seconds=WAIT_AFTER_NODE_FAILURE).wait()
            if "FORBIDDEN/12/index read-only" in e:
                pass  # KNOWN FAILURE MODE

        return rev
Пример #50
0
def _esfilter2sqlwhere(db, esfilter):
    """
    CONVERT ElassticSearch FILTER TO SQL FILTER
    db - REQUIRED TO PROPERLY QUOTE VALUES AND COLUMN NAMES
    """
    esfilter = wrap(esfilter)

    if esfilter is True:
        return SQL_TRUE
    elif esfilter["and"]:
        return sql_iso(SQL_AND.join([esfilter2sqlwhere(db, a) for a in esfilter["and"]]))
    elif esfilter["or"]:
        return sql_iso(SQL_OR.join([esfilter2sqlwhere(db, a) for a in esfilter["or"]]))
    elif esfilter["not"]:
        return SQL_NOT + sql_iso(esfilter2sqlwhere(db, esfilter["not"]))
    elif esfilter.term:
        return sql_iso(SQL_AND.join([
            quote_column(col) + SQL("=") + quote_value(val)
            for col, val in esfilter.term.items()
        ]))
    elif esfilter.terms:
        for col, v in esfilter.terms.items():
            if len(v) == 0:
                return "FALSE"

            try:
                int_list = convert.value2intlist(v)
                has_null = False
                for vv in v:
                    if vv == None:
                        has_null = True
                        break
                if int_list:
                    filter = int_list_packer(col, int_list)
                    if has_null:
                        return esfilter2sqlwhere(db, {"or": [{"missing": col}, filter]})
                    elif 'terms' in filter and set(filter['terms'].get(col, []))==set(int_list):
                        return quote_column(col) + " in " + quote_list(int_list)
                    else:
                        return esfilter2sqlwhere(db, filter)
                else:
                    if has_null:
                        return esfilter2sqlwhere(db, {"missing": col})
                    else:
                        return "false"
            except Exception as e:
                e = Except.wrap(e)
                pass
            return quote_column(col) + " in " + quote_list(v)
    elif esfilter.script:
        return sql_iso(esfilter.script)
    elif esfilter.range:
        name2sign = {
            "gt": SQL(">"),
            "gte": SQL(">="),
            "lte": SQL("<="),
            "lt": SQL("<")
        }

        def single(col, r):
            min = coalesce(r["gte"], r[">="])
            max = coalesce(r["lte"], r["<="])
            if min != None and max != None:
                # SPECIAL CASE (BETWEEN)
                sql = quote_column(col) + SQL(" BETWEEN ") + quote_value(min) + SQL_AND + quote_value(max)
            else:
                sql = SQL_AND.join(
                    quote_column(col) + name2sign[sign] + quote_value(value)
                    for sign, value in r.items()
                )
            return sql

        terms = [single(col, ranges) for col, ranges in esfilter.range.items()]
        if len(terms) == 1:
            output = terms[0]
        else:
            output = sql_iso(SQL_AND.join(terms))
        return output
    elif esfilter.missing:
        if isinstance(esfilter.missing, text_type):
            return sql_iso(quote_column(esfilter.missing) + SQL_IS_NULL)
        else:
            return sql_iso(quote_column(esfilter.missing.field) + SQL_IS_NULL)
    elif esfilter.exists:
        if isinstance(esfilter.exists, text_type):
            return sql_iso(quote_column(esfilter.exists) + SQL_IS_NOT_NULL)
        else:
            return sql_iso(quote_column(esfilter.exists.field) + SQL_IS_NOT_NULL)
    elif esfilter.match_all:
        return SQL_TRUE
    elif esfilter.instr:
        return sql_iso(SQL_AND.join(["instr" + sql_iso(quote_column(col) + ", " + quote_value(val)) + ">0" for col, val in esfilter.instr.items()]))
    else:
        Log.error("Can not convert esfilter to SQL: {{esfilter}}", esfilter=esfilter)
Пример #51
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if 'json' in kwargs:
        kwargs['data'] = unicode2utf8(value2json(kwargs['json']))
        del kwargs['json']
    elif 'data' in kwargs:
        kwargs['data'] = unicode2utf8(value2json(kwargs['data']))
    else:
        Log.error(u"Expecting `json` parameter")
    response = post(url, **kwargs)
    details = json2value(utf82unicode(response.content))
    if response.status_code not in [200, 201, 202]:

        if "template" in details:
            Log.error(u"Bad response code {{code}}", code=response.status_code, cause=Except.wrap(details))
        else:
            Log.error(u"Bad response code {{code}}\n{{details}}", code=response.status_code, details=details)
    else:
        return details
Пример #52
0
def request(method, url, headers=None, zip=None, retry=None, **kwargs):
    """
    JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES
    DEMANDS data IS ONE OF:
    * A JSON-SERIALIZABLE STRUCTURE, OR
    * LIST OF JSON-SERIALIZABLE STRUCTURES, OR
    * None

    Parameters
     * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH
     * json - JSON-SERIALIZABLE STRUCTURE
     * retry - {"times": x, "sleep": y} STRUCTURE

    THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT**
    IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH
    INCLUDES url AND headers
    """
    global _warning_sent
    global request_count

    if not _warning_sent and not default_headers:
        Log.warning(text_type(
            "The pyLibrary.env.http module was meant to add extra " +
            "default headers to all requests, specifically the 'Referer' " +
            "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " +
            "function to set `pyLibrary.env.http.default_headers`"
        ))
    _warning_sent = True

    if is_list(url):
        # TRY MANY URLS
        failures = []
        for remaining, u in jx.countdown(url):
            try:
                response = request(method, u, retry=retry, **kwargs)
                if mo_math.round(response.status_code, decimal=-2) not in [400, 500]:
                    return response
                if not remaining:
                    return response
            except Exception as e:
                e = Except.wrap(e)
                failures.append(e)
        Log.error(u"Tried {{num}} urls", num=len(url), cause=failures)

    if 'session' in kwargs:
        session = kwargs['session']
        del kwargs['session']
        sess = Null
    else:
        sess = session = sessions.Session()

    with closing(sess):
        if PY2 and is_text(url):
            # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
            url = url.encode('ascii')

        try:
            set_default(kwargs, {"zip":zip, "retry": retry}, DEFAULTS)
            _to_ascii_dict(kwargs)

            # HEADERS
            headers = kwargs['headers'] = unwrap(set_default(headers, session.headers, default_headers))
            _to_ascii_dict(headers)
            del kwargs['headers']

            # RETRY
            retry = wrap(kwargs['retry'])
            if isinstance(retry, Number):
                retry = set_default({"times":retry}, DEFAULTS['retry'])
            if isinstance(retry.sleep, Duration):
                retry.sleep = retry.sleep.seconds
            del kwargs['retry']

            # JSON
            if 'json' in kwargs:
                kwargs['data'] = value2json(kwargs['json']).encode('utf8')
                del kwargs['json']

            # ZIP
            set_default(headers, {'Accept-Encoding': 'compress, gzip'})

            if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000:
                compressed = convert.bytes2zip(kwargs['data'])
                headers['content-encoding'] = 'gzip'
                kwargs['data'] = compressed
            del kwargs['zip']
        except Exception as e:
            Log.error(u"Request setup failure on {{url}}", url=url, cause=e)

        errors = []
        for r in range(retry.times):
            if r:
                Till(seconds=retry.sleep).wait()

            try:
                DEBUG and Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text_type(url))
                request_count += 1
                return session.request(method=method, headers=headers, url=str(url), **kwargs)
            except Exception as e:
                e = Except.wrap(e)
                if retry['http'] and str(url).startswith("https://") and "EOF occurred in violation of protocol" in e:
                    url = URL("http://" + str(url)[8:])
                    Log.note("Changed {{url}} to http due to SSL EOF violation.", url=str(url))
                errors.append(e)

        if " Read timed out." in errors[0]:
            Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0])
        else:
            Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
Пример #53
0
    def _worker(self, please_stop):
        global _load_extension_warning_sent

        if DEBUG:
            Log.note("Sqlite version {{version}}", version=sqlite3.sqlite_version)
        if Sqlite.canonical:
            self.db = Sqlite.canonical
        else:
            self.db = sqlite3.connect(coalesce(self.filename, ':memory:'))

            library_loc = File.new_instance(sys.modules[__name__].__file__, "../..")
            full_path = File.new_instance(library_loc, "vendor/sqlite/libsqlitefunctions.so").abspath
            try:
                trace = extract_stack(0)[0]
                if self.upgrade:
                    if os.name == 'nt':
                        file = File.new_instance(trace["file"], "../../vendor/sqlite/libsqlitefunctions.so")
                    else:
                        file = File.new_instance(trace["file"], "../../vendor/sqlite/libsqlitefunctions")

                    full_path = file.abspath
                    self.db.enable_load_extension(True)
                    self.db.execute("SELECT load_extension(" + self.quote_value(full_path) + ")")
            except Exception as e:
                if not _load_extension_warning_sent:
                    _load_extension_warning_sent = True
                    Log.warning("Could not load {{file}}}, doing without. (no SQRT for you!)", file=full_path, cause=e)

        try:
            while not please_stop:
                command, result, signal, trace = self.queue.pop(till=please_stop)

                if DEBUG_INSERT and command.strip().lower().startswith("insert"):
                    Log.note("Running command\n{{command|indent}}", command=command)
                if DEBUG and not command.strip().lower().startswith("insert"):
                    Log.note("Running command\n{{command|indent}}", command=command)
                with Timer("Run command", debug=DEBUG):
                    if signal is not None:
                        try:
                            curr = self.db.execute(command)
                            self.db.commit()
                            result.meta.format = "table"
                            result.header = [d[0] for d in curr.description] if curr.description else None
                            result.data = curr.fetchall()
                            if DEBUG and result.data:
                                text = convert.table2csv(list(result.data))
                                Log.note("Result:\n{{data}}", data=text)
                        except Exception as e:
                            e = Except.wrap(e)
                            result.exception = Except(ERROR, "Problem with\n{{command|indent}}", command=command, cause=e)
                        finally:
                            signal.go()
                    else:
                        try:
                            self.db.execute(command)
                            self.db.commit()
                        except Exception as e:
                            e = Except.wrap(e)
                            e.cause = Except(
                                type=ERROR,
                                template="Bad call to Sqlite",
                                trace=trace
                            )
                            Log.warning("Failure to execute", cause=e)

        except Exception as e:
            if not please_stop:
                Log.error("Problem with sql thread", e)
        finally:
            if DEBUG:
                Log.note("Database is closed")
            self.db.commit()
            self.db.close()