def record_request(request, query_, data, error): try: if request_log_queue == None: return if data and len(data) > 10000: data = data[:10000] log = wrap({ "timestamp": Date.now(), "http_user_agent": request.headers.get("user_agent"), "http_accept_encoding": request.headers.get("accept_encoding"), "referer": request.headers.get("x-referer"), "path": request.headers.environ["werkzeug.request"].full_path, "content_length": request.headers.get("content_length"), "remote_addr": coalesce(request.headers.get("x-remote-addr"), request.remote_addr), "query_text": value2json(query_) if query_ else None, "data": data if data else None, "error": value2json(error) if error else None }) log["from"] = request.headers.get('from') request_log_queue.add({"value": log}) except Exception as e: Log.warning("Can not record", cause=e)
def __json__(self): separator = "{" for k, v in self.__dict__.items(): yield separator separator = "," yield value2json(k) + ": " + value2json(v) yield "}"
def test_rest_get(self): data = [ {"a": 0, "b": 0}, {"a": 0, "b": 1}, {"a": 1, "b": 0}, {"a": 1, "b": 1} ] test = wrap({ "data": data, "query": {"from": TEST_TABLE}, "expecting_list": {"data": data} }) self.utils.execute_tests(test) # LOAD, AND SET meta.testing=True url = URL(self.utils.testing.query) url.path = "json/" + test.query['from'] url.query = {"a": 1} response = self.utils.try_till_response(str(url), data=b"") self.assertEqual(response.status_code, 200) # ORDER DOES NOT MATTER, TEST EITHER expected1 = value2json([{"a": 1, "b": 0}, {"a": 1, "b": 1}], pretty=True).encode('utf8') expected2 = value2json([{"a": 1, "b": 1}, {"a": 1, "b": 0}], pretty=True).encode('utf8') try: self.assertEqual(response.all_content, expected1) except Exception: self.assertEqual(response.all_content, expected2)
def close(self): self.please_stop.go() with self.lock: if self.db is None: return self.add(THREAD_STOP) if self.db.status.end == self.start: DEBUG and Log.note("persistent queue clear and closed") self.file.delete() else: DEBUG and Log.note( "persistent queue closed with {{num}} items left", num=len(self)) try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) self.file.write( mo_json.value2json({"add": self.db}) + "\n" + ("\n".join( mo_json.value2json(p) for p in self.pending)) + "\n") self._apply_pending() except Exception as e: raise e self.db = None
def test_json(self): some_dict = TEST_DICT some_json = mo_json.value2json(some_dict) prop = Data() prop.a.b.c = TEST_VAL prop_json = mo_json.value2json(prop) self.assertEqual(some_json, prop_json)
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if 'json' in kwargs: kwargs['data'] = value2json(kwargs['json']).encode('utf8') del kwargs['json'] elif 'data' in kwargs: kwargs['data'] = value2json(kwargs['data']).encode('utf8') else: Log.error(u"Expecting `json` parameter") response = post(url, **kwargs) details = json2value(response.content.decode('utf8')) if response.status_code not in [200, 201, 202]: if "template" in details: Log.error(u"Bad response code {{code}}", code=response.status_code, cause=Except.wrap(details)) else: Log.error(u"Bad response code {{code}}\n{{details}}", code=response.status_code, details=details) else: return details
def _shorten(source_key, value, source): if source.name.startswith("active-data-test-result"): value.result.subtests = [ s for s in value.result.subtests if s.ok is False ] value.result.missing_subtests = True value.repo.changeset.files = None shorter_length = len(value2json(value)) if shorter_length > MAX_RECORD_LENGTH: result_size = len(value2json(value.result)) if source.name == "active-data-test-result": if result_size > MAX_RECORD_LENGTH: Log.warning( "Epic test failure in {{name}} results in big record for {{id}} of length {{length}}", id=value._id, name=source.name, length=shorter_length) else: pass # NOT A PROBLEM else: Log.warning( "Monstrous {{name}} record {{id}} of length {{length}}", id=source_key, name=source.name, length=shorter_length)
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if is_data(value): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return { k: _deep_json_to_string(v, depth - 1) for k, v in value.items() } elif is_sequence(value): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, number_types): return value elif is_text(value): return strings.limit(value, LOG_STRING_LENGTH) elif is_binary(value): return strings.limit(bytes2base64(value), LOG_STRING_LENGTH) elif isinstance(value, (date, datetime)): return datetime2unix(value) else: return strings.limit(value2json(value), LOG_STRING_LENGTH)
def command_loop(local): DEBUG and Log.note("mo-python process running with {{config|json}}", config=local['config']) while not please_stop: line = sys.stdin.readline() try: command = json2value(line.decode('utf8')) DEBUG and Log.note("got {{command}}", command=command) if "import" in command: dummy = {} if is_text(command['import']): exec("from " + command['import'] + " import *", dummy, context) else: exec( "from " + command['import']['from'] + " import " + ",".join(listwrap(command['import']['vars'])), dummy, context) STDOUT.write(DONE) elif "set" in command: for k, v in command.set.items(): context[k] = v STDOUT.write(DONE) elif "get" in command: STDOUT.write( value2json({ "out": coalesce(local.get(command['get']), context.get(command['get'])) })) STDOUT.write('\n') elif "stop" in command: STDOUT.write(DONE) please_stop.go() elif "exec" in command: if not is_text(command['exec']): Log.error("exec expects only text") exec(command['exec'], context, local) STDOUT.write(DONE) else: for k, v in command.items(): if is_list(v): exec( "_return = " + k + "(" + ",".join(map(value2json, v)) + ")", context, local) else: exec( "_return = " + k + "(" + ",".join(kk + "=" + value2json(vv) for kk, vv in v.items()) + ")", context, local) STDOUT.write(value2json({"out": local['_return']})) STDOUT.write('\n') except Exception as e: STDOUT.write(value2json({"err": e})) STDOUT.write('\n') finally: STDOUT.flush()
def save_session(self, app, session, response): if not session or not session.keys(): return if not session.session_id: session.session_id = generate_sid() session.permanent = True DEBUG and Log.note("save session {{session}}", session=session) now = Date.now().unix session_id = session.session_id result = self.db.query( sql_query({ "from": self.table, "where": { "eq": { "session_id": session_id } } })) saved_record = first(Data(zip(result.header, r)) for r in result.data) expires = min(session.expires, now + self.cookie.inactive_lifetime.seconds) if saved_record: DEBUG and Log.note("found session {{session}}", session=saved_record) saved_record.data = value2json(session) saved_record.expires = expires saved_record.last_used = now with self.db.transaction() as t: t.execute("UPDATE " + quote_column(self.table) + SQL_SET + sql_list( sql_eq(**{k: v}) for k, v in saved_record.items()) + SQL_WHERE + sql_eq(session_id=session_id)) else: new_record = { "session_id": session_id, "data": value2json(session), "expires": expires, "last_used": now, } DEBUG and Log.note("new record for db {{session}}", session=new_record) with self.db.transaction() as t: t.execute(sql_insert(self.table, new_record)) response.set_cookie(app.session_cookie_name, session_id, expires=expires)
def test_default_python(self): test = {"add": Data(start="".join([" ", "â€"]))} output = value2json(test) expecting = '{"add":{"start":" â€"}}' self.assertEqual(expecting, output, "expecting correct json")
def test_branch_count(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [ {"aggregate": "count"}, ], "edges": [ "build.branch" ], "where": {"or": [ {"missing": "build.id"} # {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}} ]}, "format": "table" }}) query = value2json(test.query).encode('utf8') # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) Log.note("result\n{{result|indent}}", {"result": result})
def test_double1(self): test = {"value": 5.2025595183536973e-07} output = value2json(test) self.assertIn(output, [ u'{"value":5.202559518353697e-7}', u'{"value":5.202559518353697e-07}' ], "expecting correct value")
def process_task(mail, please_stop=None): try: if not this.call: this.call = this.celery.Task.__call__ this.dummy = this.celery.Task() name = mail.sender.name args = (mail,) if mail.sender.bind else tuple() this._status_update(mail, states.STARTED, {"response": {"start_time": Date.now().format()}}) fun = this.celery._tasks[name] mail.result = this.call(this.dummy, fun, *args, **unwrap(mail.message)) mail.status = states.SUCCESS except Exception as e: mail.result = Except.wrap(e) mail.status = states.FAILURE # mail = wrap({"request": {"id": mail.request.id}, "sender": {"name": "mail.sender.name"}}) Log.warning("worker failed to process {{mail}}", mail=mail, cause=e) mail.response.end_time = Date.now().format() if isinstance(mail.result, Exception): mail.result = Except.wrap(mail.result) mail.receiver.thread = None Log.note("Add {{id}} ({{name}}) to response queue\n{{result}}", id=mail.request.id, name=mail.sender.name, result=mail) this.response_queue.add(value2json(mail)) with work_list_lock: del work_list[mail.request.id]
def _worker(self, please_stop): while not please_stop: pair = self.requests.pop(till=please_stop) if please_stop: break ready, method, path, req_headers, timestamp = pair try: url = self.url / path self.outbound_rate.add(Date.now()) response = http.request(method, url, req_headers) del response.headers['transfer-encoding'] resp_headers = value2json(response.headers) resp_content = response.raw.read() please_cache = self.please_cache(path) if please_cache: with self.db.transaction() as t: t.execute("INSERT INTO cache (path, headers, response, timestamp) VALUES" + quote_list((path, resp_headers, resp_content.decode('latin1'), timestamp))) with self.cache_locker: self.cache[path] = (ready, resp_headers, resp_content, timestamp) except Exception as e: Log.warning("problem with request to {{path}}", path=path, cause=e) with self.cache_locker: ready, headers, response = self.cache[path] del self.cache[path] finally: ready.go()
def _shorten(source_key, value, source): if source.name.startswith("active-data-test-result"): value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True value.repo.changeset.files = None shorter_length = len(value2json(value)) if shorter_length > MAX_RECORD_LENGTH: result_size = len(value2json(value.result)) if source.name == "active-data-test-result": if result_size > MAX_RECORD_LENGTH: Log.warning("Epic test failure in {{name}} results in big record for {{id}} of length {{length}}", id=value._id, name=source.name, length=shorter_length) else: pass # NOT A PROBLEM else: Log.warning("Monstrous {{name}} record {{id}} of length {{length}}", id=source_key, name=source.name, length=shorter_length)
def __init__( self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, kwargs=None, ): """ settings ARE FOR THE ELASTICSEARCH INDEX """ kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds kwargs.retry.times = coalesce(kwargs.retry.times, 3) kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds self.es = Cluster(kwargs).get_or_create_index( schema=json2value(value2json(SCHEMA), leaves=True), limit_replicas=True, typed=True, kwargs=kwargs, ) self.batch_size = batch_size self.es.add_alias(coalesce(kwargs.alias, kwargs.index)) self.queue = Queue("debug logs to es", max=queue_size, silent=True) self.worker = Thread.run("add debug logs to es", self._insert_loop)
def _db_insert_column(self, column): try: self.db.execute( "INSERT INTO" + db_table_name + sql_iso(all_columns) + "VALUES" + sql_iso( sql_list( [ quote_value(column[c.name]) if c.name not in ("nested_path", "partitions") else quote_value(value2json(column[c.name])) for c in METADATA_COLUMNS ] ) ) ) except Exception as e: e = Except.wrap(e) if "UNIQUE constraint failed" in e or " are not unique" in e: # THIS CAN HAPPEN BECAUSE todo HAS OLD COLUMN DATA self.todo.add((UPDATE, column), force=True) else: Log.error("do not know how to handle", cause=e)
def write(self, template, params): output = { "Timestamp": (Decimal(datetime2unix(params.timestamp)) * Decimal(1e9)).to_integral_exact(), # NANOSECONDS "Type": params.template, "Logger": params.machine.name, "Hostname": self.app_name, "EnvVersion": "2.0", "Severity": severity_map.get( params.context, 3), # https://en.wikipedia.org/wiki/Syslog#Severity_levels "Pid": params.machine.pid, "Fields": { k: _deep_json_to_string(v, 0) for k, v in to_data(params).leaves() } } self.stream.write(value2json(output).encode('utf8')) self.stream.write(b'\n')
def __init__( self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, kwargs=None, ): """ settings ARE FOR THE ELASTICSEARCH INDEX """ kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds kwargs.retry.times = coalesce(kwargs.retry.times, 3) kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds kwargs.host = Random.sample(listwrap(host), 1)[0] schema = json2value(value2json(SCHEMA), leaves=True) schema.mappings[type].properties["~N~"].type = "nested" self.es = Cluster(kwargs).get_or_create_index( schema=schema, limit_replicas=True, typed=True, kwargs=kwargs, ) self.batch_size = batch_size self.es.add_alias(coalesce(kwargs.alias, kwargs.index)) self.queue = Queue("debug logs to es", max=queue_size, silent=True) self.worker = Thread.run("add debug logs to es", self._insert_loop)
def commit(self): with self.lock: if self.closed: Log.error("Queue is closed, commit not allowed") try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) if self.db.status.end - self.start < 10 or Random.range( 0, 1000) == 0: # FORCE RE-WRITE TO LIMIT FILE SIZE # SIMPLY RE-WRITE FILE if DEBUG: Log.note( "Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start) for k in self.db.keys(): if k == "status" or int(k) >= self.db.status.start: continue Log.error("Not expecting {{key}}", key=k) self._commit() self.file.write( mo_json.value2json({"add": self.db}) + "\n") else: self._commit() except Exception as e: raise e
def test_one_tuple(self): test = to_data({ "data": [{"a": 1}, {"a": 2}], "query": {"from": TEST_TABLE}, # "expecting_list": [{"a": 1}, {"a": 2}] }) self.utils.fill_container(test) # FILL AND MAKE DUMMY QUERY query = value2json({"tuple": { "from": test.query['from'], "where": {"eq": {"a": 1}}, "format": "list", "meta": {"testing": True} }}).encode('utf8') # SEND QUERY response = self.utils.try_till_response(self.utils.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) self.assertEqual(result, { "data": [ {"data": [{"a": 1}]}, ] })
def value2url_param(value): """ :param value: :return: ascii URL """ if value == None: Log.error("Can not encode None into a URL") if is_data(value): from mo_json import value2json value_ = wrap(value) output = "&".join([ value2url_param(k) + "=" + (value2url_param(v) if is_text(v) else value2url_param(value2json(v))) for k, v in value_.leaves() ]) elif is_text(value): output = "".join(_map2url[c] for c in value.encode('utf8')) elif is_binary(value): output = "".join(_map2url[c] for c in value) elif hasattr(value, "__iter__"): output = ",".join(value2url_param(v) for v in value) else: output = str(value) return output
def __init__(self, name, config): config = to_data(config) if config.debug.logs: Log.error("not allowed to configure logging on other process") Log.note("begin process") # WINDOWS REQUIRED shell, WHILE LINUX NOT shell = "windows" in platform.system().lower() self.process = Process( name, [PYTHON, "-u", "mo_threads" + os.sep + "python_worker.py"], debug=False, cwd=os.getcwd(), shell=shell) self.process.stdin.add( value2json(set_default({}, config, {"debug": { "trace": True }}))) status = self.process.stdout.pop() if status != '{"out":"ok"}': Log.error("could not start python\n{{error|indent}}", error=self.process.stderr.pop_all() + [status] + self.process.stdin.pop_all()) self.lock = Lock("wait for response from " + name) self.current_task = DONE self.current_response = None self.current_error = None self.daemon = Thread.run("", self._daemon) self.errors = Thread.run("", self._stderr)
def login(self, path=None): """ EXPECT AN ACCESS TOKEN, RETURN A SESSION TOKEN """ now = Date.now().unix try: access_token = get_token_auth_header() # if access_token.error: # Log.error("{{error}}: {{error_description}}", access_token) if len(access_token.split(".")) == 3: access_details = self.verify_jwt_token(access_token) session.scope = access_details["scope"] # ADD TO SESSION self.session_manager.setup_session(session) user_details = self.verify_opaque_token(access_token) session.user = self.permissions.get_or_create_user(user_details) session.last_used = now self.markup_user() return Response(value2json( self.session_manager.make_cookie(session)), status=200) except Exception as e: session.user = None session.last_used = None Log.error("failure to authorize", cause=e)
def get_raw_json(path): active_data_timer = Timer("total duration") body = flask.request.get_data() try: with active_data_timer: args = scrub_args(flask.request.args) limit = args.limit if args.limit else 10 args.limit = None frum = find_container(path, after=None) result = jx.run( { "from": path, "where": { "eq": args }, "limit": limit, "format": "list" }, frum) if isinstance( result, Container ): # TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers result = result.format("list") result.meta.active_data_response_time = active_data_timer.duration response_data = value2json(result.data, pretty=True).encode('utf8') Log.note("Response is {{num}} bytes", num=len(response_data)) return Response(response_data, status=200) except Exception as e: e = Except.wrap(e) return send_error(active_data_timer, body, e)
def __init__( self, host, index, port=9200, type="log", queue_size=1000, batch_size=100, refresh_interval="1second", kwargs=None, ): """ settings ARE FOR THE ELASTICSEARCH INDEX """ kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds kwargs.retry.times = coalesce(kwargs.retry.times, 3) kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds kwargs.host = randoms.sample(listwrap(host), 1)[0] rollover_interval = coalesce(kwargs.rollover.interval, kwargs.rollover.max, "year") rollover_max = coalesce(kwargs.rollover.max, kwargs.rollover.interval, "year") schema = set_default( kwargs.schema, { "mappings": { kwargs.type: { "properties": { "~N~": { "type": "nested" } } } } }, json2value(value2json(SCHEMA), leaves=True), ) self.es = RolloverIndex( rollover_field={"get": [{ "first": "." }, { "literal": "timestamp" }]}, rollover_interval=rollover_interval, rollover_max=rollover_max, schema=schema, limit_replicas=True, typed=True, read_only=False, kwargs=kwargs, ) self.batch_size = batch_size self.queue = Queue("debug logs to es", max=queue_size, silent=True) self.worker = Thread.run("add debug logs to es", self._insert_loop)
def bytes(self): if self.pre: yield self.pre else: self.pre = b",\n" yield b"{\"meta\":{\"format\":\"table\"},\"header\":" yield value2json(self.header).encode('utf8') yield b",\n\"data\":[\n" comma = b"" for r in self.result.data: yield comma comma = b",\n" yield value2json(r).encode('utf8') self.count += 1 if self.count >= self.abs_limit: yield DONE
def test_unicode3(self): output = value2json({ "comment": "testing accented char ŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙" }) assert output == '{"comment":"testing accented char ŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙"}' if not isinstance(output, text): Log.error("expecting unicode json")
def __init__(self, abs_limit, select, query): self.count = 0 self.abs_limit = abs_limit self.formatter = row_formatter(select) self.rows = None self.pre = ( b"{\"meta\":{\"format\":\"table\"},\"header\":" + value2json(format_table_header(select, query)).encode('utf8') + b",\n\"data\":[\n")
def test_unicode2(self): output = value2json({ "comment": "testing accented char àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ" }) assert output == '{"comment":"testing accented char àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"}' if not isinstance(output, text): Log.error("expecting unicode json")
def heartbeat(): try: backend_check() return Response(status=200) except Exception as e: Log.warning("heartbeat failure", cause=e) return Response(unicode2utf8(value2json(e)), status=500, headers={"Content-Type": "application/json"})
def command_loop(local): DEBUG and Log.note("mo-python process running with {{config|json}}", config=local['config']) while not please_stop: line = sys.stdin.readline() try: command = json2value(line.decode('utf8')) DEBUG and Log.note("got {{command}}", command=command) if "import" in command: dummy={} if is_text(command['import']): exec ("from " + command['import'] + " import *", dummy, context) else: exec ("from " + command['import']['from'] + " import " + ",".join(listwrap(command['import']['vars'])), dummy, context) STDOUT.write(DONE) elif "set" in command: for k, v in command.set.items(): context[k] = v STDOUT.write(DONE) elif "get" in command: STDOUT.write(value2json({"out": coalesce(local.get(command['get']), context.get(command['get']))})) STDOUT.write('\n') elif "stop" in command: STDOUT.write(DONE) please_stop.go() elif "exec" in command: if not is_text(command['exec']): Log.error("exec expects only text") exec (command['exec'], context, local) STDOUT.write(DONE) else: for k, v in command.items(): if is_list(v): exec ("_return = " + k + "(" + ",".join(map(value2json, v)) + ")", context, local) else: exec ("_return = " + k + "(" + ",".join(kk + "=" + value2json(vv) for kk, vv in v.items()) + ")", context, local) STDOUT.write(value2json({"out": local['_return']})) STDOUT.write('\n') except Exception as e: STDOUT.write(value2json({"err": e})) STDOUT.write('\n') finally: STDOUT.flush()
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if isinstance(value, Mapping): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return {k: _deep_json_to_string(v, depth - 1) for k, v in value.items()} elif isinstance(value, list): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, (float, int, long)): return value elif isinstance(value, basestring): return strings.limit(value, LOG_STRING_LENGTH) else: return strings.limit(value2json(value), LOG_STRING_LENGTH)
def _stream_list(files): if not files: yield b'{"format":"list", "data":[]}' return sep = b'{"format":"list", "data":[' for f, pairs in files: yield sep yield value2json({"path": f, "tuids": map_to_array(pairs)}).encode('utf8') sep = b"," yield b']}'
def post_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ if 'json' in kwargs: kwargs['data'] = unicode2utf8(value2json(kwargs['json'])) del kwargs['json'] elif 'data' in kwargs: kwargs['data'] = unicode2utf8(value2json(kwargs['data'])) else: Log.error(u"Expecting `json` parameter") response = post(url, **kwargs) details = json2value(utf82unicode(response.content)) if response.status_code not in [200, 201, 202]: if "template" in details: Log.error(u"Bad response code {{code}}", code=response.status_code, cause=Except.wrap(details)) else: Log.error(u"Bad response code {{code}}\n{{details}}", code=response.status_code, details=details) else: return details
def heartbeat(): try: backend_check() return Response(status=200) except Exception as e: Log.warning("heartbeat failure", cause=e) return Response( unicode2utf8(value2json(e)), status=500, headers={ "Content-Type": "application/json" } )
def relay_post(path): try: return cache.request("post", path, flask.request.headers) except Exception as e: e = Except.wrap(e) Log.warning("could not handle request", cause=e) return Response( unicode2utf8(value2json(e, pretty=True)), status=400, headers={ "Content-Type": "text/html" } )
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if is_data(value): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return {k: _deep_json_to_string(v, depth - 1) for k, v in value.items()} elif is_sequence(value): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, number_types): return value elif is_text(value): return strings.limit(value, LOG_STRING_LENGTH) elif is_binary(value): return strings.limit(bytes2base64(value), LOG_STRING_LENGTH) elif isinstance(value, (date, datetime)): return datetime2unix(value) else: return strings.limit(value2json(value), LOG_STRING_LENGTH)
def __init__(self, name, config): config = wrap(config) if config.debug.logs: Log.error("not allowed to configure logging on other process") self.process = Process(name, [PYTHON, "mo_threads" + os.sep + "python_worker.py"], shell=True) self.process.stdin.add(value2json(set_default({"debug": {"trace": True}}, config))) self.lock = Lock("wait for response from "+name) self.current_task = None self.current_response = None self.current_error = None self.daemon = Thread.run("", self._daemon) self.errors = Thread.run("", self._stderr)
def write(self, template, params): output = { "Timestamp": (Decimal(datetime2unix(params.timestamp)) * Decimal(1e9)).to_integral_exact(), # NANOSECONDS "Type": params.template, "Logger": params.machine.name, "Hostname": self.app_name, "EnvVersion": "2.0", "Severity": severity_map.get(params.context, 3), # https://en.wikipedia.org/wiki/Syslog#Severity_levels "Pid": params.machine.pid, "Fields": { k: _deep_json_to_string(v, 0) for k, v in wrap(params).leaves() } } self.stream.write(value2json(output).encode('utf8')) self.stream.write(b'\n')
def __init__(self, host, index, port=9200, type="log", max_size=1000, batch_size=100, kwargs=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(kwargs).get_or_create_index( schema=mo_json.json2value(value2json(SCHEMA), leaves=True), limit_replicas=True, tjson=True, kwargs=kwargs ) self.batch_size = batch_size self.es.add_alias(coalesce(kwargs.alias, kwargs.index)) self.queue = Queue("debug logs to es", max=max_size, silent=True) self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3) self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE)) Thread.run("add debug logs to es", self._insert_loop)
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self.es.get_properties() # GET IDS OF DOCUMENTS results = self.es.search({ "fields": listwrap(schema._routing.path), "query": {"filtered": { "filter": jx_expression(command.where).to_esfilter(Null) }}, "size": 10000 }) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = FlatList() for k, v in command.set.items(): if not is_variable_name(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: v = scrub(v) scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_es_script(schema).script(schema)}) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}}) updates.append(s) content = ("\n".join(value2json(c) for c in updates) + "\n") response = self.es.cluster.post( self.es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"wait_for_active_shards": self.settings.wait_for_active_shards} ) if response.errors: Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
def _execute(self, command): with self.lock: if self.current_task is not None: self.current_task.wait() self.current_task = Signal() self.current_response = None self.current_error = None self.process.stdin.add(value2json(command)) self.current_task.wait() with self.lock: try: if self.current_error: Log.error("problem with process call", cause=Except.new_instance(self.current_error)) else: return self.current_response finally: self.current_task = None self.current_response = None self.current_error = None
def close(self): self.please_stop.go() with self.lock: if self.db is None: return self.add(THREAD_STOP) if self.db.status.end == self.start: DEBUG and Log.note("persistent queue clear and closed") self.file.delete() else: DEBUG and Log.note("persistent queue closed with {{num}} items left", num=len(self)) try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) self.file.write(mo_json.value2json({"add": self.db}) + "\n" + ("\n".join(mo_json.value2json(p) for p in self.pending)) + "\n") self._apply_pending() except Exception as e: raise e self.db = None
def commit(self): with self.lock: if self.closed: Log.error("Queue is closed, commit not allowed") try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) if self.db.status.end - self.start < 10 or Random.range(0, 1000) == 0: # FORCE RE-WRITE TO LIMIT FILE SIZE # SIMPLY RE-WRITE FILE if DEBUG: Log.note("Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start) for k in self.db.keys(): if k == "status" or int(k) >= self.db.status.start: continue Log.error("Not expecting {{key}}", key=k) self._commit() self.file.write(mo_json.value2json({"add": self.db}) + "\n") else: self._commit() except Exception as e: raise e
def refresh(self, *args, **kwargs): data_as_json = mo_json.value2json(self.data, pretty=True) self.file.write(data_as_json)
def get_tuids(self, branch, revision, files): """ GET TUIDS FROM ENDPOINT, AND STORE IN DB :param branch: BRANCH TO FIND THE REVISION/FILE :param revision: THE REVISION NUNMBER :param files: THE FULL PATHS TO THE FILES :return: MAP FROM FILENAME TO TUID LIST """ # SCRUB INPUTS revision = revision[:12] files = [file.lstrip('/') for file in files] with Timer( "ask tuid service for {{num}} files at {{revision|left(12)}}", {"num": len(files), "revision": revision}, silent=not self.enabled ): response = self.db.query( "SELECT file, tuids FROM tuid WHERE revision=" + quote_value(revision) + " AND file IN " + quote_list(files) ) found = {file: json2value(tuids) for file, tuids in response.data} try: remaining = set(files) - set(found.keys()) new_response = None if remaining: request = wrap({ "from": "files", "where": {"and": [ {"eq": {"revision": revision}}, {"in": {"path": remaining}}, {"eq": {"branch": branch}} ]}, "branch": branch, "meta": { "format": "list", "request_time": Date.now() } }) if self.push_queue is not None: if DEBUG: Log.note("record tuid request to SQS: {{timestamp}}", timestamp=request.meta.request_time) self.push_queue.add(request) else: if DEBUG: Log.note("no recorded tuid request") if not self.enabled: return found new_response = http.post_json( self.endpoint, json=request, timeout=self.timeout ) with self.db.transaction() as transaction: command = "INSERT INTO tuid (revision, file, tuids) VALUES " + sql_list( quote_list((revision, r.path, value2json(r.tuids))) for r in new_response.data if r.tuids != None ) if not command.endswith(" VALUES "): transaction.execute(command) self.num_bad_requests = 0 found.update({r.path: r.tuids for r in new_response.data} if new_response else {}) return found except Exception as e: self.num_bad_requests += 1 Till(seconds=SLEEP_ON_ERROR).wait() if self.enabled and self.num_bad_requests >= 3: self.enabled = False Log.error("TUID service has problems.", cause=e) return found
def request(method, url, headers=None, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent global request_count if not _warning_sent and not default_headers: Log.warning(text_type( "The pyLibrary.env.http module was meant to add extra " + "default headers to all requests, specifically the 'Referer' " + "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " + "function to set `pyLibrary.env.http.default_headers`" )) _warning_sent = True if is_list(url): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, retry=retry, **kwargs) if mo_math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception as e: e = Except.wrap(e) failures.append(e) Log.error(u"Tried {{num}} urls", num=len(url), cause=failures) if 'session' in kwargs: session = kwargs['session'] del kwargs['session'] sess = Null else: sess = session = sessions.Session() with closing(sess): if PY2 and is_text(url): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode('ascii') try: set_default(kwargs, {"zip":zip, "retry": retry}, DEFAULTS) _to_ascii_dict(kwargs) # HEADERS headers = kwargs['headers'] = unwrap(set_default(headers, session.headers, default_headers)) _to_ascii_dict(headers) del kwargs['headers'] # RETRY retry = wrap(kwargs['retry']) if isinstance(retry, Number): retry = set_default({"times":retry}, DEFAULTS['retry']) if isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds del kwargs['retry'] # JSON if 'json' in kwargs: kwargs['data'] = value2json(kwargs['json']).encode('utf8') del kwargs['json'] # ZIP set_default(headers, {'Accept-Encoding': 'compress, gzip'}) if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000: compressed = convert.bytes2zip(kwargs['data']) headers['content-encoding'] = 'gzip' kwargs['data'] = compressed del kwargs['zip'] except Exception as e: Log.error(u"Request setup failure on {{url}}", url=url, cause=e) errors = [] for r in range(retry.times): if r: Till(seconds=retry.sleep).wait() try: DEBUG and Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text_type(url)) request_count += 1 return session.request(method=method, headers=headers, url=str(url), **kwargs) except Exception as e: e = Except.wrap(e) if retry['http'] and str(url).startswith("https://") and "EOF occurred in violation of protocol" in e: url = URL("http://" + str(url)[8:]) Log.note("Changed {{url}} to http due to SSL EOF violation.", url=str(url)) errors.append(e) if " Read timed out." in errors[0]: Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0]) else: Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
def add(self, message): message = wrap(message) m = Message() m.set_body(value2json(message)) self.queue.write(m)
def _db_worker(self, please_stop): while not please_stop: try: with self._db_transaction(): result = self._query( SQL_SELECT + all_columns + SQL_FROM + db_table_name + SQL_WHERE + "last_updated > " + quote_value(self.last_load) + SQL_ORDERBY + sql_list(map(quote_column, ["es_index", "name", "es_column"])) ) with self.locker: for r in result.data: c = row_to_column(result.header, r) self._add(c) if c.last_updated > self.last_load: self.last_load = c.last_updated updates = self.todo.pop_all() DEBUG and updates and Log.note( "{{num}} columns to push to db", num=len(updates) ) for action, column in updates: while not please_stop: try: with self._db_transaction(): DEBUG and Log.note( "{{action}} db for {{table}}.{{column}}", action=action, table=column.es_index, column=column.es_column, ) if action is EXECUTE: self.db.execute(column) elif action is UPDATE: self.db.execute( "UPDATE" + db_table_name + "SET" + sql_list( [ "count=" + quote_value(column.count), "cardinality=" + quote_value(column.cardinality), "multi=" + quote_value(column.multi), "partitions=" + quote_value( value2json(column.partitions) ), "last_updated=" + quote_value(column.last_updated), ] ) + SQL_WHERE + SQL_AND.join( [ "es_index = " + quote_value(column.es_index), "es_column = " + quote_value(column.es_column), "last_updated < " + quote_value(column.last_updated), ] ) ) elif action is DELETE: self.db.execute( "DELETE FROM" + db_table_name + SQL_WHERE + SQL_AND.join( [ "es_index = " + quote_value(column.es_index), "es_column = " + quote_value(column.es_column), ] ) ) else: self._db_insert_column(column) break except Exception as e: e = Except.wrap(e) if "database is locked" in e: Log.note("metadata database is locked") Till(seconds=1).wait() break else: Log.warning("problem updataing database", cause=e) except Exception as e: Log.warning("problem updating database", cause=e) (Till(seconds=10) | please_stop).wait()
def value2url_param(value): """ :param value: :return: ascii URL """ if value == None: Log.error("Can not encode None into a URL") if isinstance(value, Mapping): value_ = wrap(value) output = b"&".join([ value2url_param(k) + b"=" + (value2url_param(v) if isinstance(v, basestring) else value2url_param(value2json(v))) for k, v in value_.leaves() ]) elif isinstance(value, unicode): output = b"".join(_map2url[c] for c in value.encode('utf8')) elif isinstance(value, str): output = b"".join(_map2url[c] for c in value) elif hasattr(value, "__iter__"): output = b",".join(value2url_param(v) for v in value) else: output = str(value) return output
def value2url_param(value): """ :param value: :return: ascii URL """ if value == None: Log.error("Can not encode None into a URL") if is_data(value): value_ = wrap(value) output = "&".join([ value2url_param(k) + "=" + (value2url_param(v) if is_text(v) else value2url_param(value2json(v))) for k, v in value_.leaves() ]) elif is_text(value): output = "".join(_map2url[c] for c in value.encode('utf8')) elif is_binary(value): output = "".join(_map2url[c] for c in value) elif hasattr(value, "__iter__"): output = ",".join(value2url_param(v) for v in value) else: output = str(value) return output
def _commit(self): self.file.append("\n".join(mo_json.value2json(p) for p in self.pending)) self._apply_pending()
def tuid_endpoint(path): with RegisterThread(): try: service.statsdaemon.update_requests(requests_total=1) if flask.request.headers.get("content-length", "") in ["", "0"]: # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK service.statsdaemon.update_requests(requests_complete=1, requests_passed=1) return Response( EXPECTING_QUERY, status=400, headers={ "Content-Type": "text/html" } ) elif int(flask.request.headers["content-length"]) > QUERY_SIZE_LIMIT: service.statsdaemon.update_requests(requests_complete=1, requests_passed=1) return Response( unicode2utf8("request too large"), status=400, headers={ "Content-Type": "text/html" } ) request_body = flask.request.get_data().strip() query = json2value(utf82unicode(request_body)) # ENSURE THE QUERY HAS THE CORRECT FORM if query['from'] != 'files': Log.error("Can only handle queries on the `files` table") ands = listwrap(query.where['and']) if len(ands) != 3: Log.error( 'expecting a simple where clause with following structure\n{{example|json}}', example={"and": [ {"eq": {"branch": "<BRANCH>"}}, {"eq": {"revision": "<REVISION>"}}, {"in": {"path": ["<path1>", "<path2>", "...", "<pathN>"]}} ]} ) rev = None paths = None branch_name = None for a in ands: rev = coalesce(rev, a.eq.revision) paths = unwraplist(coalesce(paths, a['in'].path, a.eq.path)) branch_name = coalesce(branch_name, a.eq.branch) paths = listwrap(paths) if len(paths) == 0: response, completed = [], True elif service.conn.pending_transactions > TOO_BUSY: # CHECK IF service IS VERY BUSY # TODO: BE SURE TO UPDATE STATS TOO Log.note("Too many open transactions") response, completed = [], False elif service.get_thread_count() > TOO_MANY_THREADS: Log.note("Too many threads open") response, completed = [], False else: # RETURN TUIDS with Timer("tuid internal response time for {{num}} files", {"num": len(paths)}): response, completed = service.get_tuids_from_files( revision=rev, files=paths, going_forward=True, repo=branch_name ) if not completed: Log.note( "Request for {{num}} files is incomplete for revision {{rev}}.", num=len(paths), rev=rev ) if query.meta.format == 'list': formatter = _stream_list else: formatter = _stream_table service.statsdaemon.update_requests( requests_complete=1 if completed else 0, requests_incomplete=1 if not completed else 0, requests_passed=1 ) return Response( formatter(response), status=200 if completed else 202, headers={ "Content-Type": "application/json" } ) except Exception as e: e = Except.wrap(e) service.statsdaemon.update_requests(requests_incomplete=1, requests_failed=1) Log.warning("could not handle request", cause=e) return Response( unicode2utf8(value2json(e, pretty=True)), status=400, headers={ "Content-Type": "text/html" } )
def DataClass(name, columns, constraint=None): """ Use the DataClass to define a class, but with some extra features: 1. restrict the datatype of property 2. restrict if `required`, or if `nulls` are allowed 3. generic constraints on object properties It is expected that this class become a real class (or be removed) in the long term because it is expensive to use and should only be good for verifying program correctness, not user input. :param name: Name of the class we are creating :param columns: Each columns[i] has properties { "name", - (required) name of the property "required", - False if it must be defined (even if None) "nulls", - True if property can be None, or missing "default", - A default value, if none is provided "type" - a Python datatype } :param constraint: a JSON query Expression for extra constraints (return true if all constraints are met) :return: The class that has been created """ columns = wrap( [ {"name": c, "required": True, "nulls": False, "type": object} if is_text(c) else c for c in columns ] ) slots = columns.name required = wrap( filter(lambda c: c.required and not c.nulls and not c.default, columns) ).name nulls = wrap(filter(lambda c: c.nulls, columns)).name defaults = {c.name: coalesce(c.default, None) for c in columns} types = {c.name: coalesce(c.jx_type, object) for c in columns} code = expand_template( """ from __future__ import unicode_literals from mo_future import is_text, is_binary from collections import Mapping meta = None types_ = {{types}} defaults_ = {{defaults}} class {{class_name}}(Mapping): __slots__ = {{slots}} def _constraint(row, rownum, rows): try: return {{constraint_expr}} except Exception as e: return False def __init__(self, **kwargs): if not kwargs: return for s in {{slots}}: object.__setattr__(self, s, kwargs.get(s, {{defaults}}.get(s, None))) missed = {{required}}-set(kwargs.keys()) if missed: Log.error("Expecting properties {"+"{missed}}", missed=missed) illegal = set(kwargs.keys())-set({{slots}}) if illegal: Log.error("{"+"{names}} are not a valid properties", names=illegal) if not self._constraint(0, [self]): Log.error("constraint not satisfied {"+"{expect}}\\n{"+"{value|indent}}", expect={{constraint}}, value=self) def __getitem__(self, item): return getattr(self, item) def __setitem__(self, item, value): setattr(self, item, value) return self def __setattr__(self, item, value): if item not in {{slots}}: Log.error("{"+"{item|quote}} not valid attribute", item=item) object.__setattr__(self, item, value) if not self._constraint(0, [self]): Log.error("constraint not satisfied {"+"{expect}}\\n{"+"{value|indent}}", expect={{constraint}}, value=self) def __getattr__(self, item): Log.error("{"+"{item|quote}} not valid attribute", item=item) def __hash__(self): return object.__hash__(self) def __eq__(self, other): if isinstance(other, {{class_name}}) and dict(self)==dict(other) and self is not other: Log.error("expecting to be same object") return self is other def __dict__(self): return {k: getattr(self, k) for k in {{slots}}} def items(self): return ((k, getattr(self, k)) for k in {{slots}}) def __copy__(self): _set = object.__setattr__ output = object.__new__({{class_name}}) {{assign}} return output def __iter__(self): return {{slots}}.__iter__() def __len__(self): return {{len_slots}} def __str__(self): return str({{dict}}) """, { "class_name": name, "slots": "(" + (", ".join(quote(s) for s in slots)) + ")", "required": "{" + (", ".join(quote(s) for s in required)) + "}", "nulls": "{" + (", ".join(quote(s) for s in nulls)) + "}", "defaults": Literal(defaults).to_python(), "len_slots": len(slots), "dict": "{" + (", ".join(quote(s) + ": self." + s for s in slots)) + "}", "assign": "; ".join( "_set(output, " + quote(s) + ", self." + s + ")" for s in slots ), "types": "{" + (",".join(quote(k) + ": " + v.__name__ for k, v in types.items())) + "}", "constraint_expr": Python[jx_expression(constraint)].to_python(), "constraint": value2json(constraint), }, ) output = _exec(code, name) register_data(output) return output
def _stream_table(files): yield b'{"format":"table", "header":["path", "tuids"], "data":[' for f, pairs in files: yield value2json([f, map_to_array(pairs)]).encode('utf8') yield b']}'
def __str__(self): return value2json(self.to_es)