def error( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params): """ raise an exception with a trace for the cause too :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if not is_text(template): sys.stderr.write(str("Log.error was expecting a unicode template")) Log.error("Log.error was expecting a unicode template") if default_params and isinstance( listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = Data(dict(default_params, **more_params)) add_to_trace = False if cause == None: causes = None elif is_list(cause): causes = [] for c in listwrap( cause ): # CAN NOT USE LIST-COMPREHENSION IN PYTHON3 (EXTRA STACK DEPTH FROM THE IN-LINED GENERATOR) causes.append(Except.wrap(c, stack_depth=1)) causes = FlatList(causes) elif isinstance(cause, BaseException): causes = Except.wrap(cause, stack_depth=1) else: causes = None Log.error("can only accept Exception, or list of exceptions") trace = exceptions.get_stacktrace(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) e = Except(context=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace) raise_from_none(e)
def __exit__(self, exc_type, exc_val, exc_tb): causes = [] try: if isinstance(exc_val, Exception): causes.append(Except.wrap(exc_val)) self.rollback() else: self.commit() except Exception as e: causes.append(Except.wrap(e)) Log.error("Transaction failed", cause=unwraplist(causes))
def error( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params ): """ raise an exception with a trace for the cause too :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if not isinstance(template, text_type): sys.stderr.write(str("Log.error was expecting a unicode template")) Log.error("Log.error was expecting a unicode template") if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) add_to_trace = False if cause == None: causes = None elif isinstance(cause, list): causes = [] for c in listwrap(cause): # CAN NOT USE LIST-COMPREHENSION IN PYTHON3 (EXTRA STACK DEPTH FROM THE IN-LINED GENERATOR) causes.append(Except.wrap(c, stack_depth=1)) causes = FlatList(causes) elif isinstance(cause, BaseException): causes = Except.wrap(cause, stack_depth=1) else: causes = None Log.error("can only accept Exception, or list of exceptions") trace = exceptions.extract_stack(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) e = Except(type=exceptions.ERROR, template=template, params=params, cause=causes, trace=trace) raise_from_none(e)
def get_raw_json(path): active_data_timer = Timer("total duration") body = flask.request.get_data() try: with active_data_timer: args = wrap(Data(**flask.request.args)) limit = args.limit if args.limit else 10 args.limit = None frum = wrap_from(path) result = jx.run( { "from": path, "where": { "eq": args }, "limit": limit, "format": "list" }, frum) if isinstance( result, Container ): #TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers result = result.format("list") result.meta.active_data_response_time = active_data_timer.duration response_data = convert.unicode2utf8( convert.value2json(result.data, pretty=True)) Log.note("Response is {{num}} bytes", num=len(response_data)) return Response(response_data, status=200) except Exception, e: e = Except.wrap(e) return _send_error(active_data_timer, body, e)
def query(self, _query): try: query = QueryOp.wrap(_query, table=self) for n in self.namespaces: query = n.convert(query) for s in listwrap(query.select): if not aggregates.get(s.aggregate): Log.error( "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate", name=s.name, aggregate=s.aggregate ) frum = query["from"] if isinstance(frum, QueryOp): result = self.query(frum) q2 = query.copy() q2.frum = result return jx.run(q2) if is_deepop(self._es, query): return es_deepop(self._es, query) if is_aggsop(self._es, query): return es_aggsop(self._es, frum, query) if is_setop(self._es, query): return es_setop(self._es, query) Log.error("Can not handle") except Exception as e: e = Except.wrap(e) if "Data too large, data for" in e: http.post(self._es.cluster.path+"/_cache/clear") Log.error("Problem (Tried to clear Elasticsearch cache)", e) Log.error("problem", e)
def _got_result(self, data, message): global count data = wrap(data) with count_locker: Log.note("{{count}} from {{exchange}}", count=count, exchange=self.pulse.exchange) data._meta.count = count data._meta.exchange = self.pulse.exchange count += 1 if self.settings.debug: Log.note("{{data}}", data=data) if self.target_queue != None: try: self.target_queue.add(data) message.ack() except Exception as e: e = Except.wrap(e) if not self.target_queue.closed: # EXPECTED TO HAPPEN, THIS THREAD MAY HAVE BEEN AWAY FOR A WHILE raise e else: try: self.pulse_target(data) message.ack() except Exception as e: Log.warning( "Problem processing pulse (see `data` in structured log)", data=data, cause=e)
def _got_result(self, data, message): global count data = wrap(data) with count_locker: Log.note("{{count}} from {{exchange}}", count=count, exchange=self.pulse.exchange) data._meta.count = count data._meta.exchange = self.pulse.exchange count += 1 if self.settings.debug: Log.note("{{data}}", data=data) if self.target_queue != None: try: self.target_queue.add(data) message.ack() except Exception as e: e = Except.wrap(e) if not self.target_queue.closed: # EXPECTED TO HAPPEN, THIS THREAD MAY HAVE BEEN AWAY FOR A WHILE raise e else: try: self.pulse_target(data) message.ack() except Exception as e: Log.warning("Problem processing pulse (see `data` in structured log)", data=data, cause=e)
def query(self, sql, param=None, stream=False, row_tuples=False): """ RETURN LIST OF dicts """ if not self.cursor: # ALLOW NON-TRANSACTIONAL READS Log.error("must perform all queries inside a transaction") self._execute_backlog() try: if param: sql = expand_template(sql, quote_param(param)) sql = self.preamble + outdent(sql) self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) if row_tuples: if stream: result = self.cursor else: result = wrap(list(self.cursor)) else: columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])] if stream: result = (wrap({c: utf8_to_unicode(v) for c, v in zip(columns, row)}) for row in self.cursor) else: result = wrap([{c: utf8_to_unicode(v) for c, v in zip(columns, row)} for row in self.cursor]) return result except Exception as e: e = Except.wrap(e) if "InterfaceError" in e: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def worker(please_stop): seen_problem = False while not please_stop: request_time = (time.time() - timer.START) / 60 # MINUTES try: response = requests.get( "http://169.254.169.254/latest/meta-data/spot/termination-time" ) seen_problem = False if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note( "AWS Spot Detection has shutdown, this is probably not a spot node, (http://169.254.169.254 is unreachable)" ) return elif seen_problem: # IGNORE THE FIRST PROBLEM Log.warning( "AWS shutdown detection has more than one consecutive problem: (last request {{time|round(1)}} minutes since startup)", time=request_time, cause=e) seen_problem = True (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait()
def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception as e: e = Except.wrap(e) raise e # OH NO!
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if 'json' in kwargs: kwargs['data'] = value2json(kwargs['json']).encode('utf8') del kwargs['json'] elif 'data' in kwargs: kwargs['data'] = value2json(kwargs['data']).encode('utf8') else: Log.error(u"Expecting `json` parameter") response = post(url, **kwargs) details = json2value(response.content.decode('utf8')) if response.status_code not in [200, 201, 202]: if "template" in details: Log.error(u"Bad response code {{code}}", code=response.status_code, cause=Except.wrap(details)) else: Log.error(u"Bad response code {{code}}\n{{details}}", code=response.status_code, details=details) else: return details
def utf82unicode(value): """ WITH EXPLANATION FOR FAILURE """ try: return value.decode("utf8") except Exception as e: if not _Log: _late_import() if not is_binary(value): _Log.error("Can not convert {{type}} to unicode because it's not bytes", type= type(value).__name__) e = _Except.wrap(e) for i, c in enumerate(value): try: c.decode("utf8") except Exception as f: _Log.error("Can not convert charcode {{c}} in string index {{i}}", i=i, c=ord(c), cause=[e, _Except.wrap(f)]) try: latin1 = text_type(value.decode("latin1")) _Log.error("Can not explain conversion failure, but seems to be latin1", e) except Exception: pass try: a = text_type(value.decode("latin1")) _Log.error("Can not explain conversion failure, but seems to be latin1", e) except Exception: pass _Log.error("Can not explain conversion failure of " + type(value).__name__ + "!", e)
def __init__( self, host, user=None, port=None, config=None, gateway=None, forward_agent=None, connect_timeout=None, connect_kwargs=None, inline_ssh_env=None, key_filename=None, # part of connect_kwargs kwargs=None, ): connect_kwargs = set_default({}, connect_kwargs, { "key_filename": [File(f).abspath for f in listwrap(key_filename)] }) key_filenames = connect_kwargs.key_filename self.stdout = LogStream(host, "stdout") self.stderr = LogStream(host, "stderr") config = Config(**unwrap( set_default( {}, config, { "overrides": { "run": { # "hide": True, "out_stream": self.stdout, "err_stream": self.stderr, } } }, ))) self.warn = False cause = Except("expecting some private key to connect") for key_file in key_filenames: try: connect_kwargs.key_filename = File(key_file).abspath self.conn = _Connection( host, user, port, config, gateway, forward_agent, connect_timeout, connect_kwargs, inline_ssh_env, ) self.conn.run("echo") # verify we can connect return except Exception as e: cause = e Log.error("could not connect", cause=cause)
def wrap_function(cache_store, func_): attr_name = "_cache_for_" + func_.__name__ if func_.func_code.co_argcount > 0 and func_.func_code.co_varnames[ 0] == "self": using_self = True func = lambda self, *args: func_(self, *args) else: using_self = False func = lambda self, *args: func_(*args) def output(*args): with cache_store.locker: if using_self: self = args[0] args = args[1:] else: self = cache_store now = Date.now() try: _cache = getattr(self, attr_name) except Exception, _: _cache = {} setattr(self, attr_name, _cache) if Random.int(100) == 0: # REMOVE OLD CACHE _cache = { k: v for k, v in _cache.items() if v[0] == None or v[0] > now } setattr(self, attr_name, _cache) timeout, key, value, exception = _cache.get( args, (Null, Null, Null, Null)) if now >= timeout: value = func(self, *args) with cache_store.locker: _cache[args] = (now + cache_store.timeout, args, value, None) return value if value == None: if exception == None: try: value = func(self, *args) with cache_store.locker: _cache[args] = (now + cache_store.timeout, args, value, None) return value except Exception, e: e = Except.wrap(e) with cache_store.locker: _cache[args] = (now + cache_store.timeout, args, None, e) raise e else: raise exception
def _get_from_elasticsearch(self, revision, locale=None, get_diff=False): rev = revision.changeset.id if self.es.cluster.version.startswith("1.7."): query = { "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"term": {"changeset.id12": rev[0:12]}}, {"term": {"branch.name": revision.branch.name}}, {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]} }}, "size": 2000 } else: query = { "query": {"bool": {"must": [ {"term": {"changeset.id12": rev[0:12]}}, {"term": {"branch.name": revision.branch.name}}, {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]}}, "size": 2000 } for attempt in range(3): try: with self.es_locker: docs = self.es.search(query).hits.hits break except Exception as e: e = Except.wrap(e) if "NodeNotConnectedException" in e: # WE LOST A NODE, THIS MAY TAKE A WHILE (Till(seconds=Random.int(5 * 60))).wait() continue elif "EsRejectedExecutionException[rejected execution (queue capacity" in e: (Till(seconds=Random.int(30))).wait() continue else: Log.warning("Bad ES call, fall back to TH", cause=e) return None best = docs[0]._source if len(docs) > 1: for d in docs: if d._id.endswith(d._source.branch.locale): best = d._source Log.warning("expecting no more than one document") if not GET_DIFF and not get_diff: return best elif best.changeset.diff: return best elif not best.changeset.files: return best # NOT EXPECTING A DIFF, RETURN IT ANYWAY else: return None
def write(self, template, params): try: params.template = strings.limit(params.template, 2000) params.format = None self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60) except Exception as e: sys.stdout.write(text_type(Except.wrap(e))) return self
def write(self, template, params): try: params.template = strings.limit(params.template, 2000) params.format = None self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60) except Exception as e: sys.stdout.write(text(Except.wrap(e))) return self
def _get_queue(self, row): row = wrap(row) if row.json: row.value, row.json = json2value(row.json), None timestamp = Date(self.rollover_field(row.value)) if timestamp == None: return Null elif timestamp < Date.today() - self.rollover_max: return DATA_TOO_OLD rounded_timestamp = timestamp.floor(self.rollover_interval) with self.locker: queue = self.known_queues.get(rounded_timestamp.unix) if queue == None: candidates = sort_using_key( filter( lambda r: re.match( re.escape(self.settings.index) + r"\d\d\d\d\d\d\d\d_\d\d\d\d\d\d$", r['index'] ), self.cluster.get_aliases() ), key=lambda r: r['index'] ) best = None for c in candidates: c = wrap(c) c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT) if timestamp > c.date: best = c if not best or rounded_timestamp > best.date: if rounded_timestamp < wrap(candidates[-1]).date: es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings) else: try: es = self.cluster.create_index(create_timestamp=rounded_timestamp, kwargs=self.settings) es.add_alias(self.settings.index) except Exception as e: e = Except.wrap(e) if "IndexAlreadyExistsException" not in e: Log.error("Problem creating index", cause=e) return self._get_queue(row) # TRY AGAIN else: es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings) def refresh(please_stop): try: es.set_refresh_interval(seconds=60 * 10, timeout=5) except Exception: Log.note("Could not set refresh interval for {{index}}", index=es.settings.index) Thread.run("refresh", refresh) self._delete_old_indexes(candidates) threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True) with self.locker: queue = self.known_queues[rounded_timestamp.unix] = threaded_queue return queue
def utf82unicode(value): """ WITH EXPLANATION FOR FAILURE """ try: return value.decode("utf8") except Exception as e: if not _Log: _late_import() if not isinstance(value, binary_type): _Log.error( "Can not convert {{type}} to unicode because it's not bytes", type=type(value).__name__) e = _Except.wrap(e) for i, c in enumerate(value): try: c.decode("utf8") except Exception as f: _Log.error( "Can not convert charcode {{c}} in string index {{i}}", i=i, c=ord(c), cause=[e, _Except.wrap(f)]) try: latin1 = text_type(value.decode("latin1")) _Log.error( "Can not explain conversion failure, but seems to be latin1", e) except Exception: pass try: a = text_type(value.decode("latin1")) _Log.error( "Can not explain conversion failure, but seems to be latin1", e) except Exception: pass _Log.error( "Can not explain conversion failure of " + type(value).__name__ + "!", e)
def output(*args, **kwargs): if kwargs: Log.error( "Sorry, caching only works with ordered parameter, not keyword arguments" ) with cache_store.locker: if using_self: self = args[0] args = args[1:] else: self = cache_store now = Date.now() try: _cache = getattr(self, attr_name) except Exception: _cache = {} setattr(self, attr_name, _cache) if Random.int(100) == 0: # REMOVE OLD CACHE _cache = { k: v for k, v in _cache.items() if v.timeout == None or v.timeout > now } setattr(self, attr_name, _cache) timeout, key, value, exception = _cache.get( args, (Null, Null, Null, Null)) if now >= timeout: value = func(self, *args) with cache_store.locker: _cache[args] = CacheElement(now + cache_store.timeout, args, value, None) return value if value == None: if exception == None: try: value = func(self, *args) with cache_store.locker: _cache[args] = CacheElement(now + cache_store.timeout, args, value, None) return value except Exception as e: e = Except.wrap(e) with cache_store.locker: _cache[args] = CacheElement(now + cache_store.timeout, args, None, e) raise e else: raise exception else: return value
def fatal( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, log_context=None, **more_params ): """ SEND TO STDERR :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(type=exceptions.ERROR, template=template, params=params, cause=cause, trace=trace) error_mode = cls.error_mode with suppress_exception: if not error_mode: cls.error_mode = True Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.FATAL}, log_context), stack_depth=stack_depth + 1 ) cls.error_mode = error_mode sys.stderr.write(str(e))
def _kill(self): try: self.service.kill() except Exception as e: ee = Except.wrap(e) if 'The operation completed successfully' in ee: return if 'No such process' in ee: return Log.warning("Failure to kill process {{process|quote}}", process=self.name, cause=ee)
def wrap_function(cache_store, func_): attr_name = "_cache_for_" + func_.__name__ if func_.func_code.co_argcount > 0 and func_.func_code.co_varnames[0] == "self": using_self = True func = lambda self, *args: func_(self, *args) else: using_self = False func = lambda self, *args: func_(*args) def output(*args): with cache_store.locker: if using_self: self = args[0] args = args[1:] else: self = cache_store now = Date.now() try: _cache = getattr(self, attr_name) except Exception, _: _cache = {} setattr(self, attr_name, _cache) if Random.int(100) == 0: # REMOVE OLD CACHE _cache = {k: v for k, v in _cache.items() if v[0]==None or v[0] > now} setattr(self, attr_name, _cache) timeout, key, value, exception = _cache.get(args, (Null, Null, Null, Null)) if now >= timeout: value = func(self, *args) with cache_store.locker: _cache[args] = (now + cache_store.timeout, args, value, None) return value if value == None: if exception == None: try: value = func(self, *args) with cache_store.locker: _cache[args] = (now + cache_store.timeout, args, value, None) return value except Exception as e: e = Except.wrap(e) with cache_store.locker: _cache[args] = (now + cache_store.timeout, args, None, e) raise e else: raise exception else: return value
def output(*args, **kwargs): while True: try: return func(*args, **kwargs) except Exception as e: e = Except.wrap(e) if "Request limit exceeded" in e: Log.warning("AWS Problem", cause=e) continue else: Log.error("Problem with call to AWS", cause=e)
def _kill(self): try: self.service.kill() Log.note("Service was successfully terminated.") except Exception as e: ee = Except.wrap(e) if 'The operation completed successfully' in ee: return if 'No such process' in ee: return Log.warning("Failure to kill process {{process|quote}}", process=self.name, cause=ee)
def error( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params): """ raise an exception with a trace for the cause too :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if not isinstance(template, unicode): sys.stderr.write("Log.error was expecting a unicode template") Log.error("Log.error was expecting a unicode template") if default_params and isinstance( listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) add_to_trace = False cause = wrap( unwraplist( [Except.wrap(c, stack_depth=1) for c in listwrap(cause)])) trace = exceptions.extract_stack(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) e = Except(exceptions.ERROR, template, params, cause, trace) raise e
def worker_bee(please_stop): def stopper(): self.add(THREAD_STOP) please_stop.on_go(stopper) _buffer = [] _post_push_functions = [] now = time() next_push = Till(till=now + period) # THE TIME WE SHOULD DO A PUSH last_push = now - period def push_to_queue(): queue.extend(_buffer) del _buffer[:] for f in _post_push_functions: f() del _post_push_functions[:] while not please_stop: try: if not _buffer: item = self.pop() now = time() if now > last_push + period: # _Log.note("delay next push") next_push = Till(till=now + period) else: item = self.pop(till=next_push) now = time() if item is THREAD_STOP: push_to_queue() please_stop.go() break elif isinstance(item, types.FunctionType): _post_push_functions.append(item) elif item is not None: _buffer.append(item) except Exception, e: e = _Except.wrap(e) if error_target: try: error_target(e, _buffer) except Exception, f: _Log.warning( "`error_target` should not throw, just deal", name=name, cause=f) else: _Log.warning("Unexpected problem", name=name, cause=e)
def query(self, query): try: with self.esq: self.esq.query(query) return None except Exception as e: f = Except(ERROR, text(e), trace=extract_tb(1)) try: details = str(f) query = json2value(strings.between(details, ">>>>", "<<<<")) return query except Exception as g: Log.error("problem", f)
def warning(cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ timestamp = datetime.utcnow() if not is_text(template): Log.error("Log.warning was expecting a unicode template") if isinstance(default_params, BaseException): cause = default_params default_params = {} if "values" in more_params.keys(): Log.error("Can not handle a logging parameter by name `values`") params = Data(dict(default_params, **more_params)) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.get_stacktrace(stack_depth + 1) e = Except( exceptions.WARNING, template=template, params=params, cause=cause, trace=trace, ) Log._annotate(e, timestamp, stack_depth + 1)
def _get_from_elasticsearch(self, revision, locale=None, get_diff=False, get_moves=True): rev = revision.changeset.id if self.es.cluster.version.startswith("1.7."): query = { "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"term": {"changeset.id12": rev[0:12]}}, {"term": {"branch.name": revision.branch.name}}, {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]} }}, "size": 20 } else: query = { "query": {"bool": {"must": [ {"term": {"changeset.id12": rev[0:12]}}, {"term": {"branch.name": revision.branch.name}}, {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]}}, "size": 20 } for attempt in range(3): try: with self.es_locker: docs = self.es.search(query).hits.hits if len(docs) == 0: return None best = docs[0]._source if len(docs) > 1: for d in docs: if d._id.endswith(d._source.branch.locale): best = d._source Log.warning("expecting no more than one document") return best except Exception as e: e = Except.wrap(e) if "EsRejectedExecutionException[rejected execution (queue capacity" in e: (Till(seconds=Random.int(30))).wait() continue else: Log.warning("Bad ES call, waiting for {{num}} seconds", num=WAIT_AFTER_NODE_FAILURE, cause=e) Till(seconds=WAIT_AFTER_NODE_FAILURE).wait() continue Log.warning("ES did not deliver, fall back to HG") return None
def output(*args): with cache_store.locker: if using_self: self = args[0] args = args[1:] else: self = cache_store now = Date.now() try: _cache = getattr(self, attr_name) except Exception: _cache = {} setattr(self, attr_name, _cache) if Random.int(100) == 0: # REMOVE OLD CACHE _cache = { k: v for k, v in _cache.items() if v[0] == None or v[0] > now } setattr(self, attr_name, _cache) timeout, key, value, exception = _cache.get( args, (Null, Null, Null, Null)) if now >= timeout: value = func(self, *args) with cache_store.locker: _cache[args] = (now + cache_store.timeout, args, value, None) return value if value == None: if exception == None: try: value = func(self, *args) with cache_store.locker: _cache[args] = (now + cache_store.timeout, args, value, None) return value except Exception as e: e = Except.wrap(e) with cache_store.locker: _cache[args] = (now + cache_store.timeout, args, None, e) raise e else: raise exception else: return value
def write(self, template, params): bad = [] for m in self.many: try: m.write(template, params) except Exception as e: e = Except.wrap(e) bad.append(m) Log.warning("Logger {{type|quote}} failed! It will be removed.", type=m.__class__.__name__, cause=e) with suppress_exception: for b in bad: self.many.remove(b) return self
def begin(self): if self.transaction_level == 0: self.cursor = self.db.cursor() self.transaction_level += 1 self.execute("SET TIME_ZONE='+00:00'") if EXECUTE_TIMEOUT: try: self.execute("SET MAX_EXECUTION_TIME=" + text_type(EXECUTE_TIMEOUT)) self._execute_backlog() except Exception as e: e = Except.wrap(e) if "Unknown system variable 'MAX_EXECUTION_TIME'" in e: globals()['EXECUTE_TIMEOUT'] = 0 # THIS VERSION OF MYSQL DOES NOT HAVE SESSION LEVEL VARIABLE else: raise e
def _get_queue(self, row): row = wrap(row) if row.json: row.value, row.json = json2value(row.json), None timestamp = Date(self.rollover_field(row.value)) if timestamp == None: return Null elif timestamp < Date.today() - self.rollover_max: return DATA_TOO_OLD rounded_timestamp = timestamp.floor(self.rollover_interval) with self.locker: queue = self.known_queues.get(rounded_timestamp.unix) if queue == None: candidates = jx.run({ "from": ListContainer(".", self.cluster.get_aliases()), "where": {"regex": {"index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"}}, "sort": "index" }) best = None for c in candidates: c = wrap(c) c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT) if timestamp > c.date: best = c if not best or rounded_timestamp > best.date: if rounded_timestamp < wrap(candidates[-1]).date: es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings) else: try: es = self.cluster.create_index(create_timestamp=rounded_timestamp, kwargs=self.settings) es.add_alias(self.settings.index) except Exception as e: e = Except.wrap(e) if "IndexAlreadyExistsException" not in e: Log.error("Problem creating index", cause=e) return self._get_queue(row) # TRY AGAIN else: es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings) with suppress_exception: es.set_refresh_interval(seconds=60 * 5, timeout=5) self._delete_old_indexes(candidates) threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True) with self.locker: queue = self.known_queues[rounded_timestamp.unix] = threaded_queue return queue
def worker(please_stop): while not please_stop: try: response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time") if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)") return else: Log.warning("AWS shutdown detection has problems", cause=e) (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait()
def begin(self): if self.transaction_level == 0: self.cursor = self.db.cursor() self.transaction_level += 1 self.execute("SET TIME_ZONE='+00:00'") if EXECUTE_TIMEOUT: try: self.execute("SET MAX_EXECUTION_TIME=" + text(EXECUTE_TIMEOUT)) self._execute_backlog() except Exception as e: e = Except.wrap(e) if "Unknown system variable 'MAX_EXECUTION_TIME'" in e: globals( )['EXECUTE_TIMEOUT'] = 0 # THIS VERSION OF MYSQL DOES NOT HAVE SESSION LEVEL VARIABLE else: raise e
def _worker(self, please_stop): try: # MAIN EXECUTION LOOP while not please_stop: command_item = self.queue.pop(till=please_stop) if command_item is None: break self._process_command_item(command_item) except Exception as e: e = Except.wrap(e) if not please_stop: Log.warning("Problem with sql", cause=e) finally: self.closed = True DEBUG and Log.note("Database is closed") self.db.close()
def output(*args, **kwargs): if kwargs: Log.error("Sorry, caching only works with ordered parameter, not keyword arguments") with cache_store.locker: if using_self: self = args[0] args = args[1:] else: self = cache_store now = Date.now() try: _cache = getattr(self, attr_name) except Exception: _cache = {} setattr(self, attr_name, _cache) if Random.int(100) == 0: # REMOVE OLD CACHE _cache = {k: v for k, v in _cache.items() if v.timeout == None or v.timeout > now} setattr(self, attr_name, _cache) timeout, key, value, exception = _cache.get(args, (Null, Null, Null, Null)) if now >= timeout: value = func(self, *args) with cache_store.locker: _cache[args] = CacheElement(now + cache_store.timeout, args, value, None) return value if value == None: if exception == None: try: value = func(self, *args) with cache_store.locker: _cache[args] = CacheElement(now + cache_store.timeout, args, value, None) return value except Exception as e: e = Except.wrap(e) with cache_store.locker: _cache[args] = CacheElement(now + cache_store.timeout, args, None, e) raise e else: raise exception else: return value
def query(self, sql, param=None, stream=False, row_tuples=False): """ RETURN LIST OF dicts """ if not self.cursor: # ALLOW NON-TRANSACTIONAL READS Log.error("must perform all queries inside a transaction") self._execute_backlog() try: if param: sql = expand_template(sql, quote_param(param)) sql = self.preamble + outdent(sql) self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) if row_tuples: if stream: result = self.cursor else: result = wrap(list(self.cursor)) else: columns = [ utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, []) ] if stream: result = (wrap( {c: utf8_to_unicode(v) for c, v in zip(columns, row)}) for row in self.cursor) else: result = wrap( [{c: utf8_to_unicode(v) for c, v in zip(columns, row)} for row in self.cursor]) return result except Exception as e: e = Except.wrap(e) if "InterfaceError" in e: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def fatal( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, log_context=None, **more_params ): """ SEND TO STDERR :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.ERROR, template, params, cause, trace) str_e = unicode(e) error_mode = cls.error_mode with suppress_exception: if not error_mode: cls.error_mode = True Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.FATAL}, log_context), stack_depth=stack_depth + 1 ) cls.error_mode = error_mode sys.stderr.write(str_e.encode('utf8'))
def _iter(): g = 0 out = FlatList() try: for i, d in enumerate(data): out.append(d) if (i + 1) % max_size == 0: yield g, out g += 1 out = FlatList() if out: yield g, out except Exception as e: e = Except.wrap(e) if out: # AT LEAST TRY TO RETURN WHAT HAS BEEN PROCESSED SO FAR yield g, out Log.error("Problem inside jx.groupby", e)
def _iter(): g = 0 out = [] try: for i, d in enumerate(data): out.append(d) if (i + 1) % size == 0: yield g, FlatList(vals=out) g += 1 out = [] if out: yield g, FlatList(vals=out) except Exception as e: e = Except.wrap(e) if out: # AT LEAST TRY TO RETURN WHAT HAS BEEN PROCESSED SO FAR yield g, out Log.error("Problem inside jx.chunk", e)
def _iter(): g = 0 out = FlatList() try: for i, d in enumerate(data): out.append(d) if (i + 1) % max_size == 0: yield g, out g += 1 out = FlatList() if out: yield g, out except Exception, e: e = Except.wrap(e) if out: # AT LEAST TRY TO RETURN WHAT HAS BEEN PROCESSED SO FAR yield g, out Log.error("Problem inside jx.groupby", e)
def assertRaises(self, problem, function, *args, **kwargs): try: function(*args, **kwargs) except Exception as e: f = Except.wrap(e) if isinstance(problem, basestring): if problem in f: return Log.error( "expecting an exception returning {{problem|quote}} got something else instead", problem=problem, cause=f ) elif not isinstance(f, problem) and not isinstance(e, problem): Log.error("expecting an exception of type {{type}} to be raised", type=problem) else: return Log.error("Expecting an exception to be raised")
def close(self): if self.transaction_level > 0: if self.readonly: self.commit() # AUTO-COMMIT else: Log.error("expecting commit() or rollback() before close") self.cursor = None # NOT NEEDED try: self.db.close() except Exception as e: e = Except.wrap(e) if "Already closed" in e: return Log.warning("can not close()", e) finally: try: all_db.remove(self) except Exception as e: Log.error("not expected", cause=e)
def query(self, _query): try: query = QueryOp.wrap(_query, schema=self) for n in self.namespaces: query = n.convert(query) if self.typed: query = Typed().convert(query) for s in listwrap(query.select): if not aggregates1_4.get(s.aggregate): Log.error( "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate", name=s.name, aggregate=s.aggregate ) frum = query["from"] if isinstance(frum, QueryOp): result = self.query(frum) q2 = query.copy() q2.frum = result return jx.run(q2) if is_deepop(self._es, query): return es_deepop(self._es, query) if is_aggsop(self._es, query): return es_aggsop(self._es, frum, query) if is_setop(self._es, query): return es_setop(self._es, query) if es09_setop.is_setop(query): return es09_setop.es_setop(self._es, None, query) if es09_aggop.is_aggop(query): return es09_aggop.es_aggop(self._es, None, query) Log.error("Can not handle") except Exception as e: e = Except.wrap(e) if "Data too large, data for" in e: http.post(self._es.cluster.path+"/_cache/clear") Log.error("Problem (Tried to clear Elasticsearch cache)", e) Log.error("problem", e)
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if b"json" in kwargs: kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"json"])) elif b'data' in kwargs: kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"data"])) else: Log.error("Expecting `json` parameter") response = post(url, **kwargs) c = response.content try: details = mo_json.json2value(convert.utf82unicode(c)) except Exception as e: Log.error("Unexpected return value {{content}}", content=c, cause=e) if response.status_code not in [200, 201]: Log.error("Bad response", cause=Except.wrap(details)) return details
def warning( cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ timestamp = datetime.utcnow() if not is_text(template): Log.error("Log.warning was expecting a unicode template") if isinstance(default_params, BaseException): cause = default_params default_params = {} if "values" in more_params.keys(): Log.error("Can not handle a logging parameter by name `values`") params = Data(dict(default_params, **more_params)) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.WARNING, template=template, params=params, cause=cause, trace=trace) Log._annotate( e, timestamp, stack_depth+1 )
def worker(please_stop): seen_problem = False while not please_stop: request_time = (time.time() - timer.START)/60 # MINUTES try: response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time") seen_problem = False if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)") return elif seen_problem: # IGNORE THE FIRST PROBLEM Log.warning("AWS shutdown detection has more than one consecutive problem: (last request {{time|round(1)}} minutes since startup)", time=request_time, cause=e) seen_problem = True (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait()
def error( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params ): """ raise an exception with a trace for the cause too :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if not isinstance(template, unicode): sys.stderr.write("Log.error was expecting a unicode template") Log.error("Log.error was expecting a unicode template") if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) add_to_trace = False cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)])) trace = exceptions.extract_stack(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) e = Except(exceptions.ERROR, template, params, cause, trace) raise e
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if 'json' in kwargs: kwargs['data'] = unicode2utf8(value2json(kwargs['json'])) del kwargs['json'] elif 'data' in kwargs: kwargs['data'] = unicode2utf8(value2json(kwargs['data'])) else: Log.error(u"Expecting `json` parameter") response = post(url, **kwargs) details = json2value(utf82unicode(response.content)) if response.status_code not in [200, 201, 202]: if "template" in details: Log.error(u"Bad response code {{code}}", code=response.status_code, cause=Except.wrap(details)) else: Log.error(u"Bad response code {{code}}\n{{details}}", code=response.status_code, details=details) else: return details
def request(method, url, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent if not default_headers and not _warning_sent: _warning_sent = True Log.warning( "The pyLibrary.env.http module was meant to add extra " "default headers to all requests, specifically the 'Referer' " "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " "function to set `pyLibrary.env.http.default_headers`" ) if isinstance(url, list): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, zip=zip, retry=retry, **kwargs) if Math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception as e: e = Except.wrap(e) failures.append(e) Log.error("Tried {{num}} urls", num=len(url), cause=failures) if b"session" in kwargs: session = kwargs[b"session"] del kwargs[b"session"] else: session = sessions.Session() session.headers.update(default_headers) if zip is None: zip = ZIP_REQUEST if isinstance(url, unicode): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode("ascii") _to_ascii_dict(kwargs) timeout = kwargs[b'timeout'] = coalesce(kwargs.get(b'timeout'), default_timeout) if retry == None: retry = Data(times=1, sleep=0) elif isinstance(retry, Number): retry = Data(times=retry, sleep=1) else: retry = wrap(retry) if isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds set_default(retry, {"times": 1, "sleep": 0}) if b'json' in kwargs: kwargs[b'data'] = convert.value2json(kwargs[b'json']).encode("utf8") del kwargs[b'json'] try: headers = kwargs[b"headers"] = unwrap(coalesce(wrap(kwargs)[b"headers"], {})) set_default(headers, {b"accept-encoding": b"compress, gzip"}) if zip and len(coalesce(kwargs.get(b"data"))) > 1000: compressed = convert.bytes2zip(kwargs[b"data"]) headers[b'content-encoding'] = b'gzip' kwargs[b"data"] = compressed _to_ascii_dict(headers) else: _to_ascii_dict(headers) except Exception as e: Log.error("Request setup failure on {{url}}", url=url, cause=e) errors = [] for r in range(retry.times): if r: Till(seconds=retry.sleep).wait() try: if DEBUG: Log.note("http {{method}} to {{url}}", method=method, url=url) return session.request(method=method, url=url, **kwargs) except Exception as e: errors.append(Except.wrap(e)) if " Read timed out." in errors[0]: Log.error("Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=timeout, times=retry.times, cause=errors[0]) else: Log.error("Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
def request(method, url, headers=None, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent global request_count if not _warning_sent and not default_headers: Log.warning(text_type( "The pyLibrary.env.http module was meant to add extra " + "default headers to all requests, specifically the 'Referer' " + "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " + "function to set `pyLibrary.env.http.default_headers`" )) _warning_sent = True if is_list(url): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, retry=retry, **kwargs) if mo_math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception as e: e = Except.wrap(e) failures.append(e) Log.error(u"Tried {{num}} urls", num=len(url), cause=failures) if 'session' in kwargs: session = kwargs['session'] del kwargs['session'] sess = Null else: sess = session = sessions.Session() with closing(sess): if PY2 and is_text(url): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode('ascii') try: set_default(kwargs, {"zip":zip, "retry": retry}, DEFAULTS) _to_ascii_dict(kwargs) # HEADERS headers = kwargs['headers'] = unwrap(set_default(headers, session.headers, default_headers)) _to_ascii_dict(headers) del kwargs['headers'] # RETRY retry = wrap(kwargs['retry']) if isinstance(retry, Number): retry = set_default({"times":retry}, DEFAULTS['retry']) if isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds del kwargs['retry'] # JSON if 'json' in kwargs: kwargs['data'] = value2json(kwargs['json']).encode('utf8') del kwargs['json'] # ZIP set_default(headers, {'Accept-Encoding': 'compress, gzip'}) if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000: compressed = convert.bytes2zip(kwargs['data']) headers['content-encoding'] = 'gzip' kwargs['data'] = compressed del kwargs['zip'] except Exception as e: Log.error(u"Request setup failure on {{url}}", url=url, cause=e) errors = [] for r in range(retry.times): if r: Till(seconds=retry.sleep).wait() try: DEBUG and Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text_type(url)) request_count += 1 return session.request(method=method, headers=headers, url=str(url), **kwargs) except Exception as e: e = Except.wrap(e) if retry['http'] and str(url).startswith("https://") and "EOF occurred in violation of protocol" in e: url = URL("http://" + str(url)[8:]) Log.note("Changed {{url}} to http due to SSL EOF violation.", url=str(url)) errors.append(e) if " Read timed out." in errors[0]: Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0]) else: Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
def _normalize_revision(self, r, found_revision, push, get_diff, get_moves): new_names = set(r.keys()) - KNOWN_TAGS if new_names and not r.tags: Log.warning( "hg is returning new property names {{names|quote}} for {{changeset}} from {{url}}", names=new_names, changeset=r.node, url=found_revision.branch.url ) changeset = Changeset( id=r.node, id12=r.node[0:12], author=r.user, description=strings.limit(coalesce(r.description, r.desc), 2000), date=parse_hg_date(r.date), files=r.files, backedoutby=r.backedoutby if r.backedoutby else None, bug=self._extract_bug_id(r.description) ) rev = Revision( branch=found_revision.branch, index=r.rev, changeset=changeset, parents=unwraplist(list(set(r.parents))), children=unwraplist(list(set(r.children))), push=push, phase=r.phase, bookmarks=unwraplist(r.bookmarks), landingsystem=r.landingsystem, etl={"timestamp": Date.now().unix, "machine": machine_metadata} ) r.pushuser = None r.pushdate = None r.pushid = None r.node = None r.user = None r.desc = None r.description = None r.date = None r.files = None r.backedoutby = None r.parents = None r.children = None r.bookmarks = None r.landingsystem = None set_default(rev, r) # ADD THE DIFF if get_diff: rev.changeset.diff = self._get_json_diff_from_hg(rev) if get_moves: rev.changeset.moves = self._get_moves_from_hg(rev) try: _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE) with self.es_locker: self.es.add({"id": _id, "value": rev}) except Exception as e: e = Except.wrap(e) Log.warning("Did not save to ES, waiting {{duration}} seconds", duration=WAIT_AFTER_NODE_FAILURE, cause=e) Till(seconds=WAIT_AFTER_NODE_FAILURE).wait() if "FORBIDDEN/12/index read-only" in e: pass # KNOWN FAILURE MODE return rev
def _worker(self, please_stop): global _load_extension_warning_sent if DEBUG: Log.note("Sqlite version {{version}}", version=sqlite3.sqlite_version) if Sqlite.canonical: self.db = Sqlite.canonical else: self.db = sqlite3.connect(coalesce(self.filename, ':memory:')) library_loc = File.new_instance(sys.modules[__name__].__file__, "../..") full_path = File.new_instance(library_loc, "vendor/sqlite/libsqlitefunctions.so").abspath try: trace = extract_stack(0)[0] if self.upgrade: if os.name == 'nt': file = File.new_instance(trace["file"], "../../vendor/sqlite/libsqlitefunctions.so") else: file = File.new_instance(trace["file"], "../../vendor/sqlite/libsqlitefunctions") full_path = file.abspath self.db.enable_load_extension(True) self.db.execute("SELECT load_extension(" + self.quote_value(full_path) + ")") except Exception as e: if not _load_extension_warning_sent: _load_extension_warning_sent = True Log.warning("Could not load {{file}}}, doing without. (no SQRT for you!)", file=full_path, cause=e) try: while not please_stop: command, result, signal, trace = self.queue.pop(till=please_stop) if DEBUG_INSERT and command.strip().lower().startswith("insert"): Log.note("Running command\n{{command|indent}}", command=command) if DEBUG and not command.strip().lower().startswith("insert"): Log.note("Running command\n{{command|indent}}", command=command) with Timer("Run command", debug=DEBUG): if signal is not None: try: curr = self.db.execute(command) self.db.commit() result.meta.format = "table" result.header = [d[0] for d in curr.description] if curr.description else None result.data = curr.fetchall() if DEBUG and result.data: text = convert.table2csv(list(result.data)) Log.note("Result:\n{{data}}", data=text) except Exception as e: e = Except.wrap(e) result.exception = Except(ERROR, "Problem with\n{{command|indent}}", command=command, cause=e) finally: signal.go() else: try: self.db.execute(command) self.db.commit() except Exception as e: e = Except.wrap(e) e.cause = Except( type=ERROR, template="Bad call to Sqlite", trace=trace ) Log.warning("Failure to execute", cause=e) except Exception as e: if not please_stop: Log.error("Problem with sql thread", e) finally: if DEBUG: Log.note("Database is closed") self.db.commit() self.db.close()
def _esfilter2sqlwhere(db, esfilter): """ CONVERT ElassticSearch FILTER TO SQL FILTER db - REQUIRED TO PROPERLY QUOTE VALUES AND COLUMN NAMES """ esfilter = wrap(esfilter) if esfilter is True: return SQL_TRUE elif esfilter["and"]: return sql_iso(SQL_AND.join([esfilter2sqlwhere(db, a) for a in esfilter["and"]])) elif esfilter["or"]: return sql_iso(SQL_OR.join([esfilter2sqlwhere(db, a) for a in esfilter["or"]])) elif esfilter["not"]: return SQL_NOT + sql_iso(esfilter2sqlwhere(db, esfilter["not"])) elif esfilter.term: return sql_iso(SQL_AND.join([ quote_column(col) + SQL("=") + quote_value(val) for col, val in esfilter.term.items() ])) elif esfilter.terms: for col, v in esfilter.terms.items(): if len(v) == 0: return "FALSE" try: int_list = convert.value2intlist(v) has_null = False for vv in v: if vv == None: has_null = True break if int_list: filter = int_list_packer(col, int_list) if has_null: return esfilter2sqlwhere(db, {"or": [{"missing": col}, filter]}) elif 'terms' in filter and set(filter['terms'].get(col, []))==set(int_list): return quote_column(col) + " in " + quote_list(int_list) else: return esfilter2sqlwhere(db, filter) else: if has_null: return esfilter2sqlwhere(db, {"missing": col}) else: return "false" except Exception as e: e = Except.wrap(e) pass return quote_column(col) + " in " + quote_list(v) elif esfilter.script: return sql_iso(esfilter.script) elif esfilter.range: name2sign = { "gt": SQL(">"), "gte": SQL(">="), "lte": SQL("<="), "lt": SQL("<") } def single(col, r): min = coalesce(r["gte"], r[">="]) max = coalesce(r["lte"], r["<="]) if min != None and max != None: # SPECIAL CASE (BETWEEN) sql = quote_column(col) + SQL(" BETWEEN ") + quote_value(min) + SQL_AND + quote_value(max) else: sql = SQL_AND.join( quote_column(col) + name2sign[sign] + quote_value(value) for sign, value in r.items() ) return sql terms = [single(col, ranges) for col, ranges in esfilter.range.items()] if len(terms) == 1: output = terms[0] else: output = sql_iso(SQL_AND.join(terms)) return output elif esfilter.missing: if isinstance(esfilter.missing, text_type): return sql_iso(quote_column(esfilter.missing) + SQL_IS_NULL) else: return sql_iso(quote_column(esfilter.missing.field) + SQL_IS_NULL) elif esfilter.exists: if isinstance(esfilter.exists, text_type): return sql_iso(quote_column(esfilter.exists) + SQL_IS_NOT_NULL) else: return sql_iso(quote_column(esfilter.exists.field) + SQL_IS_NOT_NULL) elif esfilter.match_all: return SQL_TRUE elif esfilter.instr: return sql_iso(SQL_AND.join(["instr" + sql_iso(quote_column(col) + ", " + quote_value(val)) + ">0" for col, val in esfilter.instr.items()])) else: Log.error("Can not convert esfilter to SQL: {{esfilter}}", esfilter=esfilter)
def _update_cardinality(self, column): """ QUERY ES TO FIND CARDINALITY AND PARTITIONS FOR A SIMPLE COLUMN """ if column.es_index in self.index_does_not_exist: return if column.jx_type in STRUCT: Log.error("not supported") try: if column.es_index == "meta.columns": partitions = jx.sort([g[column.es_column] for g, _ in jx.groupby(self.meta.columns, column.es_column) if g[column.es_column] != None]) self.meta.columns.update({ "set": { "partitions": partitions, "count": len(self.meta.columns), "cardinality": len(partitions), "multi": 1, "last_updated": Date.now() }, "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} }) return if column.es_index == "meta.tables": partitions = jx.sort([g[column.es_column] for g, _ in jx.groupby(self.meta.tables, column.es_column) if g[column.es_column] != None]) self.meta.columns.update({ "set": { "partitions": partitions, "count": len(self.meta.tables), "cardinality": len(partitions), "multi": 1, "last_updated": Date.now() }, "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} }) return es_index = column.es_index.split(".")[0] is_text = [cc for cc in self.meta.columns if cc.es_column == column.es_column and cc.es_type == "text"] if is_text: # text IS A MULTIVALUE STRING THAT CAN ONLY BE FILTERED result = self.es_cluster.post("/" + es_index + "/_search", data={ "aggs": { "count": {"filter": {"match_all": {}}} }, "size": 0 }) count = result.hits.total cardinality = max(1001, count) multi = 1001 elif column.es_column == "_id": result = self.es_cluster.post("/" + es_index + "/_search", data={ "query": {"match_all": {}}, "size": 0 }) count = cardinality = result.hits.total multi = 1 elif column.es_type == BOOLEAN: result = self.es_cluster.post("/" + es_index + "/_search", data={ "aggs": { "count": _counting_query(column) }, "size": 0 }) count = result.hits.total cardinality = 2 multi = 1 else: result = self.es_cluster.post("/" + es_index + "/_search", data={ "aggs": { "count": _counting_query(column), "multi": {"max": {"script": "doc[" + quote(column.es_column) + "].values.size()"}} }, "size": 0 }) agg_results = result.aggregations count = result.hits.total cardinality = coalesce(agg_results.count.value, agg_results.count._nested.value, agg_results.count.doc_count) multi = int(coalesce(agg_results.multi.value, 1)) if cardinality == None: Log.error("logic error") query = Data(size=0) if column.es_column == "_id": self.meta.columns.update({ "set": { "count": cardinality, "cardinality": cardinality, "multi": 1, "last_updated": Date.now() }, "clear": ["partitions"], "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} }) return elif cardinality > 1000 or (count >= 30 and cardinality == count) or (count >= 1000 and cardinality / count > 0.99): DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality) self.meta.columns.update({ "set": { "count": count, "cardinality": cardinality, "multi": multi, "last_updated": Date.now() }, "clear": ["partitions"], "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} }) return elif column.es_type in elasticsearch.ES_NUMERIC_TYPES and cardinality > 30: DEBUG and Log.note("{{table}}.{{field}} has {{num}} parts", table=column.es_index, field=column.es_column, num=cardinality) self.meta.columns.update({ "set": { "count": count, "cardinality": cardinality, "multi": multi, "last_updated": Date.now() }, "clear": ["partitions"], "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} }) return elif len(column.nested_path) != 1: query.aggs["_"] = { "nested": {"path": column.nested_path[0]}, "aggs": {"_nested": {"terms": {"field": column.es_column}}} } elif cardinality == 0: query.aggs["_"] = {"terms": {"field": column.es_column}} else: query.aggs["_"] = {"terms": {"field": column.es_column, "size": cardinality}} result = self.es_cluster.post("/" + es_index + "/_search", data=query) aggs = result.aggregations._ if aggs._nested: parts = jx.sort(aggs._nested.buckets.key) else: parts = jx.sort(aggs.buckets.key) self.meta.columns.update({ "set": { "count": count, "cardinality": cardinality, "multi": multi, "partitions": parts, "last_updated": Date.now() }, "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} }) except Exception as e: # CAN NOT IMPORT: THE TEST MODULES SETS UP LOGGING # from tests.test_jx import TEST_TABLE e = Except.wrap(e) TEST_TABLE = "testdata" is_missing_index = any(w in e for w in ["IndexMissingException", "index_not_found_exception"]) is_test_table = column.es_index.startswith((TEST_TABLE_PREFIX, TEST_TABLE)) if is_missing_index and is_test_table: # WE EXPECT TEST TABLES TO DISAPPEAR self.meta.columns.update({ "clear": ".", "where": {"eq": {"es_index": column.es_index}} }) self.index_does_not_exist.add(column.es_index) else: self.meta.columns.update({ "set": { "last_updated": Date.now() }, "clear": [ "count", "cardinality", "multi", "partitions", ], "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} }) Log.warning("Could not get {{col.es_index}}.{{col.es_column}} info", col=column, cause=e)