def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, delta=None): """ Snagged from unittest/case.py, then modified (Aug2014) """ if expected.__class__.__name__ == "NullOp": if test == None: return else: raise AssertionError(expand_template("{{test}} != {{expected}}", locals())) if expected == None: # None has no expectations return if test == expected: # shortcut return if not Math.is_number(expected): # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL if isinstance(expected, list) and len(expected) == 0 and test == None: return if isinstance(expected, Mapping) and not expected.keys() and test == None: return if test != expected: raise AssertionError(expand_template("{{test}} != {{expected}}", locals())) return num_param = 0 if digits != None: num_param += 1 if places != None: num_param += 1 if delta != None: num_param += 1 if num_param>1: raise TypeError("specify only one of digits, places or delta") if digits is not None: with suppress_exception: diff = Math.log10(abs(test-expected)) if diff < digits: return standardMsg = expand_template("{{test}} != {{expected}} within {{digits}} decimal places", locals()) elif delta is not None: if abs(test - expected) <= delta: return standardMsg = expand_template("{{test}} != {{expected}} within {{delta}} delta", locals()) else: if places is None: places = 15 with suppress_exception: diff = Math.log10(abs(test-expected)) if diff < Math.ceiling(Math.log10(abs(test)))-places: return standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals()) raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
def __new__(cls, value=None, **kwargs): output = object.__new__(cls) if value == None: if kwargs: output.milli = datetime.timedelta( **kwargs).total_seconds() * 1000 output.month = 0 return output else: return None if Math.is_number(value): output._milli = float(value) * 1000 output.month = 0 return output elif isinstance(value, text_type): return parse(value) elif isinstance(value, Duration): output.milli = value.milli output.month = value.month return output elif isinstance(value, float) and Math.is_nan(value): return None else: from mo_logs import Log Log.error("Do not know type of object (" + get_module("mo_json").value2json(value) + ")of to make a Duration")
def __div__(self, amount): if isinstance(amount, Duration) and amount.month: m = self.month r = self.milli # DO NOT CONSIDER TIME OF DAY tod = r % MILLI_VALUES.day r = r - tod if m == 0 and r > (MILLI_VALUES.year / 3): m = Math.floor(12 * self.milli / MILLI_VALUES.year) r -= (m / 12) * MILLI_VALUES.year else: r = r - (self.month * MILLI_VALUES.month) if r >= MILLI_VALUES.day * 31: from mo_logs import Log Log.error("Do not know how to handle") r = MIN([29 / 30, (r + tod) / (MILLI_VALUES.day * 30)]) output = Math.floor(m / amount.month) + r return output elif Math.is_number(amount): output = Duration(0) output.milli = self.milli / amount output.month = self.month / amount return output else: return self.milli / amount.milli
def __new__(cls, value=None, **kwargs): output = object.__new__(cls) if value == None: if kwargs: output.milli = datetime.timedelta(**kwargs).total_seconds() * 1000 output.month = 0 return output else: return None if Math.is_number(value): output._milli = float(value) * 1000 output.month = 0 return output elif isinstance(value, text_type): return parse(value) elif isinstance(value, Duration): output.milli = value.milli output.month = value.month return output elif isinstance(value, float) and Math.is_nan(value): return None else: from mo_logs import Log Log.error("Do not know type of object (" + get_module("mo_json").value2json(value) + ")of to make a Duration")
def _make_range_domain(self, domain, column_name): width = (domain.max - domain.min) / domain.interval digits = Math.floor(Math.log10(width - 1)) if digits == 0: value = "a.value" else: value = "+".join("1" + ("0" * j) + "*" + text_type(chr(ord(b'a') + j)) + ".value" for j in range(digits + 1)) if domain.interval == 1: if domain.min == 0: domain = "SELECT " + value + " " + column_name + \ "\nFROM __digits__ a" else: domain = "SELECT (" + value + ") + " + quote_value(domain.min) + " " + column_name + \ "\nFROM __digits__ a" else: if domain.min == 0: domain = "SELECT " + value + " * " + quote_value(domain.interval) + " " + column_name + \ "\nFROM __digits__ a" else: domain = "SELECT (" + value + " * " + quote_value(domain.interval) + ") + " + quote_value( domain.min) + " " + column_name + \ "\nFROM __digits__ a" for j in range(digits): domain += "\nJOIN __digits__ " + text_type( chr(ord(b'a') + j + 1)) + " ON 1=1" domain += "\nWHERE " + value + " < " + quote_value(width) return domain
def _make_range_domain(self, domain, column_name): width = (domain.max - domain.min) / domain.interval digits = Math.floor(Math.log10(width - 1)) if digits == 0: value = "a.value" else: value = SQL("+").join("1" + ("0" * j) + "*" + text_type(chr(ord(b'a') + j)) + ".value" for j in range(digits + 1)) if domain.interval == 1: if domain.min == 0: domain = ( SQL_SELECT + value + column_name + SQL_FROM + "__digits__ a" ) else: domain = ( SQL_SELECT + sql_iso(value) + " + " + quote_value(domain.min) + column_name + SQL_FROM + "__digits__ a" ) else: if domain.min == 0: domain = ( SQL_SELECT + value + " * " + quote_value(domain.interval) +column_name + SQL_FROM + "__digits__ a" ) else: domain = ( SQL_SELECT + sql_iso(value + " * " + quote_value(domain.interval)) + " + " + quote_value(domain.min) + column_name + SQL_FROM + "__digits__ a" ) for j in range(digits): domain += SQL_INNER_JOIN + "__digits__" + text_type(chr(ord(b'a') + j + 1)) + " ON " +SQL_TRUE domain += SQL_WHERE + value + " < " + quote_value(width) return domain
def wrap(query, schema=None): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if isinstance(query, QueryOp) or query == None: return query query = wrap(query) output = QueryOp("from", None) output.format = query.format from jx_python import wrap_from output.frum = wrap_from(query["from"], schema=schema) if not schema and isinstance(output.frum, Schema): schema = output.frum if not schema and hasattr(output.frum, "schema"): schema = output.frum.schema if query.select or isinstance(query.select, (Mapping, list)): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = Data(name="count", value=jx_expression("."), aggregate="count", default=0) else: output.select = _normalize_selects(".", query.frum) if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = _normalize_edges(query.edges, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where(query.where, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.having = None output.sort = _normalize_sort(query.sort) output.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean return output
def wrap(query, container, namespace): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if isinstance(query, QueryOp) or query == None: return query query = wrap(query) table = container.get_table(query['from']) schema = table.schema output = QueryOp(op="from", frum=table, format=query.format, limit=Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))) if query.select or isinstance(query.select, (Mapping, list)): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = DEFAULT_SELECT else: output.select = _normalize_selects(".", query.frum) if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = _normalize_edges(query.edges, limit=output.limit, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, limit=output.limit, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where(query.where, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.having = None output.sort = _normalize_sort(query.sort) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean return output
def intervals(_min, _max=None, size=1): """ RETURN (min, max) PAIRS OF GIVEN SIZE, WHICH COVER THE _min, _max RANGE THE LAST PAIR MAY BE SMALLER Yes! It's just like range(), only cooler! """ if _max == None: _max = _min _min = 0 _max = int(Math.ceiling(_max)) _min = int(Math.floor(_min)) output = ((x, min(x + size, _max)) for x in __builtin__.range(_min, _max, size)) return output
def intervals(_min, _max=None, size=1): """ RETURN (min, max) PAIRS OF GIVEN SIZE, WHICH COVER THE _min, _max RANGE THE LAST PAIR MAY BE SMALLER Yes! It's just like range(), only cooler! """ if _max == None: _max = _min _min = 0 _max = int(Math.ceiling(_max)) _min = int(Math.floor(_min)) output = ((x, min(x + size, _max)) for x in _range(_min, _max, size)) return output
def wrap(query, schema=None): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if isinstance(query, QueryOp) or query == None: return query query = wrap(query) output = QueryOp("from", None) output.format = query.format output.frum = wrap_from(query["from"], schema=schema) if not schema and isinstance(output.frum, Schema): schema = output.frum if not schema and hasattr(output.frum, "schema"): schema = output.frum.schema if query.select or isinstance(query.select, (Mapping, list)): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = Data(name="count", value=jx_expression("."), aggregate="count", default=0) else: output.select = _normalize_selects(".", query.frum) if query.groupby and query.edges: Log.error("You can not use both the `groupby` and `edges` clauses in the same query!") elif query.edges: output.edges = _normalize_edges(query.edges, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where(query.where, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.having = None output.sort = _normalize_sort(query.sort) output.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean return output
def int2Partition(value): if Math.round(value) == 0: return edge.domain.NULL d = datetime(str(value)[:4:], str(value)[-2:], 1) d = d.addMilli(offset) return edge.domain.getPartByKey(d)
def quote_value(value): """ convert values to mysql code for the same mostly delegate directly to the mysql lib, but some exceptions exist """ try: if value == None: return SQL_NULL elif isinstance(value, SQL): return quote_sql(value.template, value.param) elif isinstance(value, text_type): return SQL("'" + "".join(ESCAPE_DCT.get(c, c) for c in value) + "'") elif isinstance(value, Mapping): return quote_value(json_encode(value)) elif Math.is_number(value): return SQL(text_type(value)) elif isinstance(value, datetime): return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif isinstance(value, Date): return SQL("str_to_date('" + value.format("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif hasattr(value, '__iter__'): return quote_value(json_encode(value)) else: return quote_value(text_type(value)) except Exception as e: Log.error("problem quoting SQL {{value}}", value=repr(value), cause=e)
def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.fields = edge.domain.dimension.fields self.domain = self.edge.domain self.domain.limit = Math.min( coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list()
def parse(*args): try: if len(args) == 1: a0 = args[0] if isinstance(a0, (datetime, date)): output = _unix2Date(datetime2unix(a0)) elif isinstance(a0, Date): output = _unix2Date(a0.unix) elif isinstance(a0, (int, long, float, Decimal)): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = _unix2Date(a0 / 1000) else: output = _unix2Date(a0) elif isinstance(a0, text_type) and len(a0) in [9, 10, 12, 13] and Math.is_integer(a0): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = _unix2Date(a0 / 1000) else: output = _unix2Date(a0) elif isinstance(a0, text_type): output = unicode2Date(a0) else: output = _unix2Date(datetime2unix(datetime(*args))) else: if isinstance(args[0], text_type): output = unicode2Date(*args) else: output = _unix2Date(datetime2unix(datetime(*args))) return output except Exception as e: from mo_logs import Log Log.error("Can not convert {{args}} to Date", args=args, cause=e)
def quote_value(self, value): """ convert values to mysql code for the same mostly delegate directly to the mysql lib, but some exceptions exist """ try: if value == None: return SQL("NULL") elif isinstance(value, SQL): if not value.param: # value.template CAN BE MORE THAN A TEMPLATE STRING return self.quote_sql(value.template) param = {k: self.quote_sql(v) for k, v in value.param.items()} return SQL(expand_template(value.template, param)) elif isinstance(value, basestring): return SQL(self.db.literal(value)) elif isinstance(value, Mapping): return SQL(self.db.literal(json_encode(value))) elif Math.is_number(value): return SQL(text_type(value)) elif isinstance(value, datetime): return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif isinstance(value, Date): return SQL("str_to_date('" + value.format("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif hasattr(value, '__iter__'): return SQL(self.db.literal(json_encode(value))) else: return self.db.literal(value) except Exception as e: Log.error("problem quoting SQL", e)
def quote_value(self, value): """ convert values to mysql code for the same mostly delegate directly to the mysql lib, but some exceptions exist """ try: if value == None: return SQL("NULL") elif isinstance(value, SQL): if not value.param: # value.template CAN BE MORE THAN A TEMPLATE STRING return self.quote_sql(value.template) param = {k: self.quote_sql(v) for k, v in value.param.items()} return SQL(expand_template(value.template, param)) elif isinstance(value, basestring): return SQL(self.db.literal(value)) elif isinstance(value, Mapping): return SQL(self.db.literal(json_encode(value))) elif Math.is_number(value): return SQL(unicode(value)) elif isinstance(value, datetime): return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif isinstance(value, Date): return SQL("str_to_date('"+value.format("%Y%m%d%H%M%S.%f")+"', '%Y%m%d%H%i%s.%f')") elif hasattr(value, '__iter__'): return SQL(self.db.literal(json_encode(value))) else: return self.db.literal(value) except Exception as e: Log.error("problem quoting SQL", e)
def quote_value(self, value): """ convert values to mysql code for the same mostly delegate directly to the mysql lib, but some exceptions exist """ try: if value == None: return SQL_NULL elif isinstance(value, SQL): return self.quote_sql(value.template, value.param) elif isinstance(value, text_type): return SQL("'" + value.replace("'", "''") + "'") elif isinstance(value, Mapping): return self.quote_value(json_encode(value)) elif Math.is_number(value): return SQL(text_type(value)) elif isinstance(value, datetime): return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif isinstance(value, Date): return SQL("str_to_date('" + value.format("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif hasattr(value, '__iter__'): return self.quote_value(json_encode(value)) else: return self.quote_value(text_type(value)) except Exception as e: Log.error("problem quoting SQL {{value}}", value=repr(value), cause=e)
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif isinstance(expr, basestring): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, QueryOp): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr]) else: return expr
def end(self): ignore = Math.ceiling(len(self.samples) * (1 - self.middle) / 2) if ignore * 2 >= len(self.samples): return stats.Stats() output = stats.Stats(samples=sorted(self.samples)[ignore:len(self.samples) - ignore:]) output.samples = list(self.samples) return output
def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) edge.allowNulls = False self.fields = edge.domain.dimension.fields self.domain = self.edge.domain self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list()
def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit = Math.min( coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False self.script = self.edge.value.partial_eval().to_es14_script( self.schema) self.pull = pull_functions[self.script.data_type] self.missing = self.script.miss.partial_eval() self.exists = NotOp("not", self.missing).partial_eval() # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM sort_candidates = [ s for s in self.query.sort if s.value == self.edge.value ] if sort_candidates: self.es_order = { "_term": { 1: "asc", -1: "desc" }[sort_candidates[0].sort] } else: self.es_order = None
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif isinstance(expr, text_type): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, QueryOp): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr]) else: return expr
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort == None: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if isinstance(s, basestring): output.append({"value": jx_expression(s), "sort": 1}) elif isinstance(s, Expression): output.append({"value": s, "sort": 1}) elif Math.is_integer(s): output.append({"value": OffsetOp("offset", s), "sort": 1}) elif all(d in sort_direction for d in s.values()) and not s.sort and not s.value: for v, d in s.items(): output.append({ "value": jx_expression(v), "sort": sort_direction[d] }) else: output.append({ "value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1) }) return output
def wrap(query, container, namespace): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if isinstance(query, QueryOp) or query == None: return query query = wrap(query) table = container.get_table(query['from']) schema = table.schema output = QueryOp( op="from", frum=table, format=query.format, limit=Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)) ) if query.select or isinstance(query.select, (Mapping, list)): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = DEFAULT_SELECT else: output.select = _normalize_selects(".", query.frum) if query.groupby and query.edges: Log.error("You can not use both the `groupby` and `edges` clauses in the same query!") elif query.edges: output.edges = _normalize_edges(query.edges, limit=output.limit, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, limit=output.limit, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where(query.where, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.having = None output.sort = _normalize_sort(query.sort) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean return output
def icompressed2ibytes(source): """ :param source: GENERATOR OF COMPRESSED BYTES :return: GENERATOR OF BYTES """ decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) last_bytes_count = 0 # Track the last byte count, so we do not show too many debug lines bytes_count = 0 for bytes_ in source: data = decompressor.decompress(bytes_) bytes_count += len(data) if Math.floor(last_bytes_count, 1000000) != Math.floor( bytes_count, 1000000): last_bytes_count = bytes_count if DEBUG: Log.note("bytes={{bytes}}", bytes=bytes_count) yield data
def value2query(value): if isinstance(value, datetime): return convert.datetime2milli(value) if isinstance(value, Duration): return value.milli if Math.is_number(value): return value return quote(value)
def icompressed2ibytes(source): """ :param source: GENERATOR OF COMPRESSED BYTES :return: GENERATOR OF BYTES """ decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) last_bytes_count = 0 # Track the last byte count, so we do not show too many debug lines bytes_count = 0 for bytes_ in source: try: data = decompressor.decompress(bytes_) except Exception as e: Log.error("problem", cause=e) bytes_count += len(data) if Math.floor(last_bytes_count, 1000000) != Math.floor(bytes_count, 1000000): last_bytes_count = bytes_count DEBUG and Log.note("bytes={{bytes}}", bytes=bytes_count) yield data
def required_utility(self): queue = aws.Queue(self.settings.work_queue) pending = len(queue) tod_minimum = None if Date.now().hour not in [4, 5, 6, 7, 8, 9, 10, 11]: tod_minimum = 100 return max(self.settings.minimum_utility, tod_minimum, Math.ceiling(pending / 30))
def floor(self, interval=None): if not isinstance(interval, Duration): from mo_logs import Log Log.error("Expecting an interval as a Duration object") output = Duration(0) if interval.month: if self.month: output.month = int(Math.floor(self.month / interval.month) * interval.month) output.milli = output.month * MILLI_VALUES.month return output # A MONTH OF DURATION IS BIGGER THAN A CANONICAL MONTH output.month = int(Math.floor(self.milli * 12 / MILLI_VALUES["year"] / interval.month) * interval.month) output.milli = output.month * MILLI_VALUES.month else: output.milli = Math.floor(self.milli / (interval.milli)) * (interval.milli) return output
def pop(self, wait=SECOND, till=None): if till is not None and not isinstance(till, Signal): Log.error("Expecting a signal") m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds)) if not m: return None self.pending.append(m) output = mo_json.json2value(m.get_body()) return output
def value2MVEL(value): """ FROM PYTHON VALUE TO MVEL EQUIVALENT """ if isinstance(value, datetime): return str(convert.datetime2milli(value)) + " /*" + value.format("yyNNNdd HHmmss") + "*/" # TIME if isinstance(value, Duration): return str(convert.timedelta2milli(value)) + " /*" + str(value) + "*/" # DURATION if Math.is_number(value): return str(value) return quote(value)
def to_sql(self, schema, not_null=False, boolean=False): value = self.value v = quote_value(value) if v == None: return wrap([{"name": "."}]) elif isinstance(value, text_type): return wrap([{"name": ".", "sql": {"s": quote_value(value)}}]) elif Math.is_number(v): return wrap([{"name": ".", "sql": {"n": quote_value(value)}}]) elif v in [True, False]: return wrap([{"name": ".", "sql": {"b": quote_value(value)}}]) else: return wrap([{"name": ".", "sql": {"j": quote_value(self.json)}}])
def to_sql(self, schema, not_null=False, boolean=False): value = json2value(self.json) v = sql_quote(value) if v == None: return wrap([{"name": "."}]) elif isinstance(value, unicode): return wrap([{"name": ".", "sql": {"s": sql_quote(value)}}]) elif Math.is_number(v): return wrap([{"name": ".", "sql": {"n": sql_quote(value)}}]) elif v in [True, False]: return wrap([{"name": ".", "sql": {"b": sql_quote(value)}}]) else: return wrap([{"name": ".", "sql": {"j": sql_quote(self.json)}}])
def get_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ response = get(url, **kwargs) try: c = response.all_content return json2value(utf82unicode(c)) except Exception as e: if Math.round(response.status_code, decimal=-2) in [400, 500]: Log.error(u"Bad GET response: {{code}}", code=response.status_code) else: Log.error(u"Good GET requests, but bad JSON", cause=e)
def __getslice__(self, i, j): j = Math.min(j, len(self)) if j - 1 > 2**28: Log.error("Slice of {{num}} bytes is too big", num=j - i) try: self.file.seek(i) output = self.file.read(j - i).decode(self.encoding) return output except Exception as e: Log.error( "Can not read file slice at {{index}}, with encoding {{encoding}}", index=i, encoding=self.encoding, cause=e)
def next(self, value): v = Date(value[0]) if self.last_value.floor(self.duration) > v: Log.error("Expecting strictly increasing") self.last_value = v key = Math.round((v.floor(self.duration) - self.start) / self.duration, decimal=0) if key != self.batch: self.child.reset() self.batch = key c = self.child.next(value[1:]) return [self.batch] + c
def pop_message(self, wait=SECOND, till=None): """ RETURN TUPLE (message, payload) CALLER IS RESPONSIBLE FOR CALLING message.delete() WHEN DONE """ if till is not None and not isinstance(till, Signal): Log.error("Expecting a signal") message = self.queue.read(wait_time_seconds=Math.floor(wait.seconds)) if not message: return None message.delete = lambda: self.queue.delete_message(message) payload = mo_json.json2value(message.get_body()) return message, payload
def _convert_query(self, query): # if not isinstance(query["from"], Container): # Log.error('Expecting from clause to be a Container') query = wrap(query) output = QueryOp("from", None) output["from"] = self._convert_from(query["from"]) output.format = query.format if query.select: output.select = convert_list(self._convert_select, query.select) else: if query.edges or query.groupby: output.select = {"name": "count", "value": ".", "aggregate": "count", "default": 0} else: output.select = {"name": "__all__", "value": "*", "aggregate": "none"} if query.groupby and query.edges: Log.error("You can not use both the `groupby` and `edges` clauses in the same query!") elif query.edges: output.edges = convert_list(self._convert_edge, query.edges) output.groupby = None elif query.groupby: output.edges = None output.groupby = convert_list(self._convert_group, query.groupby) else: output.edges = [] output.groupby = None output.where = self.convert(query.where) output.window = convert_list(self._convert_window, query.window) output.sort = self._convert_sort(query.sort) output.limit = coalesce(query.limit, DEFAULT_LIMIT) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN # THE from SOURCE IS. vars = get_all_vars(output, exclude_where=True) # WE WILL EXCLUDE where VARIABLES for c in query.columns: if c.name in vars and len(c.nested_path) != 1: Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name) output.having = convert_list(self._convert_having, query.having) return output
def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit =Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False # WE ASSUME IF THE VARIABLES MATCH, THEN THE SORT TERM AND EDGE TERM MATCH, AND WE SORT BY TERM self.sorted = None edge_var = edge.value.vars() for s in query.sort: if not edge_var - s.value.vars(): self.sorted = {1: "asc", -1: "desc"}[s.sort]
def setup( self, instance, # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP utility # THE utility OBJECT FOUND IN CONFIG ): with self.locker: self.instance = instance gigabytes = Math.floor(utility.memory) Log.note("setup {{instance}}", instance=instance.id) with hide('output'): self._config_fabric(instance) self._install_indexer() self._install_es(gigabytes) self._install_supervisor() self._start_supervisor()
def __getslice__(self, i, j): j = Math.min(j, len(self)) if j - 1 > 2 ** 28: Log.error("Slice of {{num}} bytes is too big", num=j - i) try: self.file.seek(i) output = self.file.read(j - i).decode(self.encoding) return output except Exception as e: Log.error( "Can not read file slice at {{index}}, with encoding {{encoding}}", index=i, encoding=self.encoding, cause=e )
def compressed_bytes2ibytes(compressed, size): """ CONVERT AN ARRAY OF BYTES TO A BYTE-BLOCK GENERATOR USEFUL IN THE CASE WHEN WE WANT TO LIMIT HOW MUCH WE FEED ANOTHER GENERATOR (LIKE A DECOMPRESSOR) """ decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) for i in range(0, Math.ceiling(len(compressed), size), size): try: block = compressed[i:i + size] yield decompressor.decompress(block) except Exception as e: Log.error("Not expected", e)
def compressed_bytes2ibytes(compressed, size): """ CONVERT AN ARRAY OF BYTES TO A BYTE-BLOCK GENERATOR USEFUL IN THE CASE WHEN WE WANT TO LIMIT HOW MUCH WE FEED ANOTHER GENERATOR (LIKE A DECOMPRESSOR) """ decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) for i in range(0, Math.ceiling(len(compressed), size), size): try: block = compressed[i: i + size] yield decompressor.decompress(block) except Exception as e: Log.error("Not expected", e)
def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False self.script = self.edge.value.partial_eval().to_es_script(self.schema) self.pull = pull_functions[self.script.data_type] self.missing = self.script.miss.partial_eval() self.exists = NotOp("not", self.missing).partial_eval() # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM sort_candidates = [s for s in self.query.sort if s.value == self.edge.value] if sort_candidates: self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]} else: self.es_order = None
def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) if isinstance(edge.value, LeavesOp): prefix = edge.value.term.var flatter = lambda k: literal_field(relative_field(k, prefix)) else: prefix = edge.value.var flatter = lambda k: relative_field(k, prefix) self.put, self.fields = transpose(*[ (flatter(untype_path(c.names["."])), c.es_column) for c in query.frum.schema.leaves(prefix) ]) self.domain = self.edge.domain = wrap({"dimension": {"fields": self.fields}}) self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False
def normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if not sort: return Null output = FlatList() for s in listwrap(sort): if isinstance(s, basestring) or Math.is_integer(s): output.append({"value": s, "sort": 1}) elif not s.field and not s.value and s.sort==None: #ASSUME {name: sort} FORM for n, v in s.items(): output.append({"value": n, "sort": sort_direction[v]}) else: output.append({"value": coalesce(s.field, s.value), "sort": coalesce(sort_direction[s.sort], 1)}) return wrap(output)
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort==None: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if isinstance(s, basestring): output.append({"value": jx_expression(s), "sort": 1}) elif isinstance(s, Expression): output.append({"value": s, "sort": 1}) elif Math.is_integer(s): output.append({"value": OffsetOp("offset", s), "sort": 1}) elif all(d in sort_direction for d in s.values()) and not s.sort and not s.value: for v, d in s.items(): output.append({"value": jx_expression(v), "sort": sort_direction[d]}) else: output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1)}) return output
def _scrub(r): try: if r == None: return None elif isinstance(r, basestring): if r == "": return None return r elif Math.is_number(r): return convert.value2number(r) elif isinstance(r, Mapping): if isinstance(r, Data): r = object.__getattribute__(r, "_dict") output = {} for k, v in r.items(): v = _scrub(v) if v != None: output[k.lower()] = v if len(output) == 0: return None return output elif hasattr(r, '__iter__'): if isinstance(r, FlatList): r = r.list output = [] for v in r: v = _scrub(v) if v != None: output.append(v) if not output: return None elif len(output) == 1: return output[0] else: return output else: return r except Exception as e: Log.warning("Can not scrub: {{json}}", json=r, cause=e)