def _dict2json(value, sub_schema, path, net_new_properties, buffer): prefix = "{" for k, v in sort_using_key(value.items(), lambda r: r[0]): if v == None or v == "": continue append(buffer, prefix) prefix = COMMA if is_binary(k): k = k.decode("utf8") if not is_text(k): Log.error("Expecting property name to be a string") if k not in sub_schema: sub_schema[k] = {} net_new_properties.append(path + [k]) append(buffer, quote(encode_property(k))) append(buffer, COLON) typed_encode(v, sub_schema[k], path + [k], net_new_properties, buffer) if prefix is COMMA: append(buffer, COMMA) append(buffer, QUOTED_EXISTS_KEY) append(buffer, "1}") else: append(buffer, "{") append(buffer, QUOTED_EXISTS_KEY) append(buffer, "1}")
def to_ruby(self, schema): if self.var == ".": return "_source" else: if self.var == "_id": return Ruby( type=STRING, expr= 'doc["_uid"].value.substring(doc["_uid"].value.indexOf(\'#\')+1)', frum=self) columns = schema.values(self.var) acc = [] for c in columns: varname = c.es_column frum = Variable(c.es_column) q = quote(varname) acc.append( Ruby(miss=frum.missing(), type=c.type, expr="doc[" + q + "].values", frum=frum, many=True)) if len(acc) == 0: return NULL.to_ruby(schema) elif len(acc) == 1: return acc[0] else: return CoalesceOp("coalesce", acc).to_ruby(schema)
def unicode_key(key): """ CONVERT PROPERTY VALUE TO QUOTED NAME OF SAME """ if not isinstance(key, basestring): from mo_logs import Log Log.error("{{key|quote}} is not a valid key", key=key) return quote(unicode(key))
def unicode_key(key): """ CONVERT PROPERTY VALUE TO QUOTED NAME OF SAME """ if not isinstance(key, basestring): from mo_logs import Log Log.error("{{key|quote}} is not a valid key", key=key) return quote(unicode(key))
def unicode_key(key): """ CONVERT PROPERTY VALUE TO QUOTED NAME OF SAME """ if not isinstance(key, (text, binary_type)): from mo_logs import Log Log.error("{{key|quote}} is not a valid key", key=key) return quote(text(key))
def to_ruby(self, schema, not_null=False, boolean=True): if isinstance(self.expr, Variable): if self.expr.var == "_id": return Ruby(type=BOOLEAN, expr="false", frum=self) else: columns = schema.leaves(self.expr.var) if len(columns) == 1: return Ruby(type=BOOLEAN, expr="doc[" + quote(columns[0].es_column) + "].isEmpty()", frum=self) else: return AndOp("and", [ Ruby(type=BOOLEAN, expr="doc[" + quote(c.es_column) + "].isEmpty()", frum=self) for c in columns ]).partial_eval().to_ruby(schema) else: return self.expr.missing().partial_eval().to_ruby(schema)
def _convert(v): if v is None: return NULL.to_ruby(schema) if v is True: return Ruby(type=BOOLEAN, expr="true", frum=self) if v is False: return Ruby(type=BOOLEAN, expr="false", frum=self) if isinstance(v, text_type): return Ruby(type=STRING, expr=quote(v), frum=self) if isinstance(v, int): return Ruby(type=INTEGER, expr=text_type(v), frum=self) if isinstance(v, float): return Ruby(type=NUMBER, expr=text_type(v), frum=self) if isinstance(v, dict): return Ruby(type=OBJECT, expr="[" + ", ".join( quote(k) + ": " + _convert(vv) for k, vv in v.items()) + "]", frum=self) if isinstance(v, (list, tuple)): return Ruby(type=OBJECT, expr="[" + ", ".join(_convert(vv).expr for vv in v) + "]", frum=self)
def value2quote(value): # RETURN PRETTY PYTHON CODE FOR THE SAME if is_text(value): return mo_json.quote(value) else: return text_type(repr(value))
def pretty_json(value): try: if value is False: return "false" elif value is True: return "true" elif isinstance(value, Mapping): try: if not value: return "{}" items = list(value.items()) if len(items) == 1: return "{" + unicode_key(items[0][0]) + ": " + pretty_json( items[0][1]).strip() + "}" items = sorted(items, lambda a, b: value_compare(a[0], b[0])) values = [ unicode_key(k) + ": " + indent(pretty_json(v)).strip() for k, v in items if v != None ] return "{\n" + INDENT + (",\n" + INDENT).join(values) + "\n}" except Exception as e: from mo_logs import Log from mo_math import OR if OR(not isinstance(k, basestring) for k in value.keys()): Log.error("JSON must have string keys: {{keys}}:", keys=[k for k in value.keys()], cause=e) Log.error("problem making dict pretty: keys={{keys}}:", keys=[k for k in value.keys()], cause=e) elif value in (None, Null): return "null" elif isinstance(value, basestring): if isinstance(value, str): value = utf82unicode(value) try: return quote(value) except Exception as e: from mo_logs import Log try: Log.note( "try explicit convert of string with length {{length}}", length=len(value)) acc = [u"\""] for c in value: try: try: c2 = ESCAPE_DCT[c] except Exception: c2 = c c3 = unicode(c2) acc.append(c3) except BaseException: pass # Log.warning("odd character {{ord}} found in string. Ignored.", ord= ord(c)}, cause=g) acc.append(u"\"") output = u"".join(acc) Log.note("return value of length {{length}}", length=len(output)) return output except BaseException, f: Log.warning("can not even explicit convert {{type}}", type=f.__class__.__name__, cause=f) return "null" elif isinstance(value, list): if not value: return "[]" if ARRAY_MAX_COLUMNS == 1: return "[\n" + ",\n".join( [indent(pretty_json(v)) for v in value]) + "\n]" if len(value) == 1: j = pretty_json(value[0]) if j.find("\n") >= 0: return "[\n" + indent(j) + "\n]" else: return "[" + j + "]" js = [pretty_json(v) for v in value] max_len = max(*[len(j) for j in js]) if max_len <= ARRAY_ITEM_MAX_LENGTH and max( *[j.find("\n") for j in js]) == -1: # ALL TINY VALUES num_columns = max( 1, min( ARRAY_MAX_COLUMNS, int( floor((ARRAY_ROW_LENGTH + 2.0) / float(max_len + 2))))) # +2 TO COMPENSATE FOR COMMAS if len(js) <= num_columns: # DO NOT ADD \n IF ONLY ONE ROW return "[" + ", ".join(js) + "]" if num_columns == 1: # DO NOT rjust IF THERE IS ONLY ONE COLUMN return "[\n" + ",\n".join( [indent(pretty_json(v)) for v in value]) + "\n]" content = ",\n".join(", ".join( j.rjust(max_len) for j in js[r:r + num_columns]) for r in xrange(0, len(js), num_columns)) return "[\n" + indent(content) + "\n]" pretty_list = js output = ["[\n"] for i, p in enumerate(pretty_list): try: if i > 0: output.append(",\n") output.append(indent(p)) except Exception: from mo_logs import Log Log.warning( "problem concatenating string of length {{len1}} and {{len2}}", len1=len("".join(output)), len2=len(p)) output.append("\n]") try: return "".join(output) except Exception as e: from mo_logs import Log Log.error("not expected", cause=e)
def pretty_json(value): try: if value is False: return "false" elif value is True: return "true" elif isinstance(value, Mapping): try: if not value: return "{}" items = list(value.items()) if len(items) == 1: return "{" + unicode_key(items[0][0]) + ": " + pretty_json(items[0][1]).strip() + "}" items = sorted(items, lambda a, b: value_compare(a[0], b[0])) values = [unicode_key(k) + ": " + indent(pretty_json(v)).strip() for k, v in items if v != None] return "{\n" + INDENT + (",\n" + INDENT).join(values) + "\n}" except Exception as e: from mo_logs import Log from mo_math import OR if OR(not isinstance(k, basestring) for k in value.keys()): Log.error( "JSON must have string keys: {{keys}}:", keys=[k for k in value.keys()], cause=e ) Log.error( "problem making dict pretty: keys={{keys}}:", keys=[k for k in value.keys()], cause=e ) elif value in (None, Null): return "null" elif isinstance(value, basestring): if isinstance(value, str): value = utf82unicode(value) try: return quote(value) except Exception as e: from mo_logs import Log try: Log.note("try explicit convert of string with length {{length}}", length=len(value)) acc = [u"\""] for c in value: try: try: c2 = ESCAPE_DCT[c] except Exception: c2 = c c3 = unicode(c2) acc.append(c3) except BaseException: pass # Log.warning("odd character {{ord}} found in string. Ignored.", ord= ord(c)}, cause=g) acc.append(u"\"") output = u"".join(acc) Log.note("return value of length {{length}}", length=len(output)) return output except BaseException, f: Log.warning("can not even explicit convert {{type}}", type=f.__class__.__name__, cause=f) return "null" elif isinstance(value, list): if not value: return "[]" if ARRAY_MAX_COLUMNS == 1: return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]" if len(value) == 1: j = pretty_json(value[0]) if j.find("\n") >= 0: return "[\n" + indent(j) + "\n]" else: return "[" + j + "]" js = [pretty_json(v) for v in value] max_len = max(*[len(j) for j in js]) if max_len <= ARRAY_ITEM_MAX_LENGTH and max(*[j.find("\n") for j in js]) == -1: # ALL TINY VALUES num_columns = max(1, min(ARRAY_MAX_COLUMNS, int(floor((ARRAY_ROW_LENGTH + 2.0) / float(max_len + 2))))) # +2 TO COMPENSATE FOR COMMAS if len(js) <= num_columns: # DO NOT ADD \n IF ONLY ONE ROW return "[" + ", ".join(js) + "]" if num_columns == 1: # DO NOT rjust IF THERE IS ONLY ONE COLUMN return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]" content = ",\n".join( ", ".join(j.rjust(max_len) for j in js[r:r + num_columns]) for r in xrange(0, len(js), num_columns) ) return "[\n" + indent(content) + "\n]" pretty_list = js output = ["[\n"] for i, p in enumerate(pretty_list): try: if i > 0: output.append(",\n") output.append(indent(p)) except Exception: from mo_logs import Log Log.warning("problem concatenating string of length {{len1}} and {{len2}}", len1=len("".join(output)), len2=len(p) ) output.append("\n]") try: return "".join(output) except Exception as e: from mo_logs import Log Log.error("not expected", cause=e)
def to_python(self, not_null=False, boolean=False, many=False): return "re.match(" + quote(json2value(self.pattern.json) + "$") + ", " + self.var.to_python() + ")"
def typed_encode(value, sub_schema, path, net_new_properties, buffer): """ :param value: THE DATA STRUCTURE TO ENCODE :param sub_schema: dict FROM PATH TO Column DESCRIBING THE TYPE :param path: list OF CURRENT PATH :param net_new_properties: list FOR ADDING NEW PROPERTIES NOT FOUND IN sub_schema :param buffer: UnicodeBuilder OBJECT :return: """ try: # from jx_base import Column if sub_schema.__class__.__name__ == "Column": value_json_type = python_type_to_json_type[value.__class__] column_json_type = es_type_to_json_type[sub_schema.es_type] if value_json_type == column_json_type: pass # ok elif value_json_type == ARRAY and all( python_type_to_json_type[v.__class__] == column_json_type for v in value if v != None): pass # empty arrays can be anything else: from mo_logs import Log Log.error( "Can not store {{value}} in {{column|quote}}", value=value, column=sub_schema.name, ) sub_schema = { json_type_to_inserter_type[value_json_type]: sub_schema } if value == None and path: from mo_logs import Log Log.error("can not encode null (missing) values") elif value is True: if BOOLEAN_KEY not in sub_schema: sub_schema[BOOLEAN_KEY] = {} net_new_properties.append(path + [BOOLEAN_KEY]) append(buffer, "{") append(buffer, QUOTED_BOOLEAN_KEY) append(buffer, "true}") return elif value is False: if BOOLEAN_KEY not in sub_schema: sub_schema[BOOLEAN_KEY] = {} net_new_properties.append(path + [BOOLEAN_KEY]) append(buffer, "{") append(buffer, QUOTED_BOOLEAN_KEY) append(buffer, "false}") return _type = value.__class__ if _type in (dict, Data): if sub_schema.__class__.__name__ == "Column": from mo_logs import Log Log.error("Can not handle {{column|json}}", column=sub_schema) if ARRAY_KEY in sub_schema: # PREFER NESTED, WHEN SEEN BEFORE if value: append(buffer, "{") append(buffer, QUOTED_ARRAY_KEY) append(buffer, "[") _dict2json( value, sub_schema[ARRAY_KEY], path + [ARRAY_KEY], net_new_properties, buffer, ) append(buffer, "]" + COMMA) append(buffer, QUOTED_EXISTS_KEY) append(buffer, text(len(value))) append(buffer, "}") else: # SINGLETON LIST append(buffer, "{") append(buffer, QUOTED_ARRAY_KEY) append(buffer, "[{") append(buffer, QUOTED_EXISTS_KEY) append(buffer, "1}]") append(buffer, COMMA) append(buffer, QUOTED_EXISTS_KEY) append(buffer, "1}") else: if EXISTS_KEY not in sub_schema: sub_schema[EXISTS_KEY] = {} net_new_properties.append(path + [EXISTS_KEY]) if value: _dict2json(value, sub_schema, path, net_new_properties, buffer) else: append(buffer, "{") append(buffer, QUOTED_EXISTS_KEY) append(buffer, "1}") elif _type is binary_type: if STRING_KEY not in sub_schema: sub_schema[STRING_KEY] = True net_new_properties.append(path + [STRING_KEY]) append(buffer, "{") append(buffer, QUOTED_STRING_KEY) append(buffer, '"') try: v = value.decode("utf8") except Exception as e: raise problem_serializing(value, e) for c in v: append(buffer, ESCAPE_DCT.get(c, c)) append(buffer, '"}') elif _type is text: if STRING_KEY not in sub_schema: sub_schema[STRING_KEY] = True net_new_properties.append(path + [STRING_KEY]) append(buffer, "{") append(buffer, QUOTED_STRING_KEY) append(buffer, '"') for c in value: append(buffer, ESCAPE_DCT.get(c, c)) append(buffer, '"}') elif _type in integer_types: if NUMBER_KEY not in sub_schema: sub_schema[NUMBER_KEY] = True net_new_properties.append(path + [NUMBER_KEY]) append(buffer, "{") append(buffer, QUOTED_NUMBER_KEY) append(buffer, text(value)) append(buffer, "}") elif _type in (float, Decimal): if NUMBER_KEY not in sub_schema: sub_schema[NUMBER_KEY] = True net_new_properties.append(path + [NUMBER_KEY]) append(buffer, "{") append(buffer, QUOTED_NUMBER_KEY) append(buffer, float2json(value)) append(buffer, "}") elif _type in (set, list, tuple, FlatList): if len(value) == 0: append(buffer, "{") append(buffer, QUOTED_EXISTS_KEY) append(buffer, "0}") elif any(v.__class__ in (Data, dict, set, list, tuple, FlatList) for v in value): if len(value) == 1: if ARRAY_KEY in sub_schema: append(buffer, "{") append(buffer, QUOTED_ARRAY_KEY) _list2json( value, sub_schema[ARRAY_KEY], path + [ARRAY_KEY], net_new_properties, buffer, ) append(buffer, "}") else: # NO NEED TO NEST, SO DO NOT DO IT typed_encode(value[0], sub_schema, path, net_new_properties, buffer) else: if ARRAY_KEY not in sub_schema: sub_schema[ARRAY_KEY] = {} net_new_properties.append(path + [ARRAY_KEY]) append(buffer, "{") append(buffer, QUOTED_ARRAY_KEY) _list2json( value, sub_schema[ARRAY_KEY], path + [ARRAY_KEY], net_new_properties, buffer, ) append(buffer, "}") else: # ALLOW PRIMITIVE MULTIVALUES value = [v for v in value if v != None] types = list( set(python_type_to_json_type_key[v.__class__] for v in value)) if len(types) == 0: # HANDLE LISTS WITH Nones IN THEM append(buffer, "{") append(buffer, QUOTED_ARRAY_KEY) append(buffer, "[]}") elif len(types) > 1: _list2json( value, sub_schema, path + [ARRAY_KEY], net_new_properties, buffer, ) else: element_type = types[0] if element_type not in sub_schema: sub_schema[element_type] = True net_new_properties.append(path + [element_type]) append(buffer, "{") append(buffer, quote(element_type)) append(buffer, COLON) _multivalue2json( value, sub_schema[element_type], path + [element_type], net_new_properties, buffer, ) append(buffer, "}") elif _type is date: if NUMBER_KEY not in sub_schema: sub_schema[NUMBER_KEY] = True net_new_properties.append(path + [NUMBER_KEY]) append(buffer, "{") append(buffer, QUOTED_NUMBER_KEY) append(buffer, float2json(datetime2unix(value))) append(buffer, "}") elif _type is datetime: if NUMBER_KEY not in sub_schema: sub_schema[NUMBER_KEY] = True net_new_properties.append(path + [NUMBER_KEY]) append(buffer, "{") append(buffer, QUOTED_NUMBER_KEY) append(buffer, float2json(datetime2unix(value))) append(buffer, "}") elif _type is Date: if NUMBER_KEY not in sub_schema: sub_schema[NUMBER_KEY] = True net_new_properties.append(path + [NUMBER_KEY]) append(buffer, "{") append(buffer, QUOTED_NUMBER_KEY) append(buffer, float2json(value.unix)) append(buffer, "}") elif _type is timedelta: if NUMBER_KEY not in sub_schema: sub_schema[NUMBER_KEY] = True net_new_properties.append(path + [NUMBER_KEY]) append(buffer, "{") append(buffer, QUOTED_NUMBER_KEY) append(buffer, float2json(value.total_seconds())) append(buffer, "}") elif _type is Duration: if NUMBER_KEY not in sub_schema: sub_schema[NUMBER_KEY] = True net_new_properties.append(path + [NUMBER_KEY]) append(buffer, "{") append(buffer, QUOTED_NUMBER_KEY) append(buffer, float2json(value.seconds)) append(buffer, "}") elif _type is NullType: append(buffer, "null") elif hasattr(value, "__data__"): typed_encode(value.__data__(), sub_schema, path, net_new_properties, buffer) elif hasattr(value, "__iter__"): if ARRAY_KEY not in sub_schema: sub_schema[ARRAY_KEY] = {} net_new_properties.append(path + [ARRAY_KEY]) append(buffer, "{") append(buffer, QUOTED_ARRAY_KEY) _iter2json( value, sub_schema[ARRAY_KEY], path + [ARRAY_KEY], net_new_properties, buffer, ) append(buffer, "}") else: from mo_logs import Log Log.error(text(repr(value)) + " is not JSON serializable") except Exception as e: from mo_logs import Log Log.error(text(repr(value)) + " is not JSON serializable", cause=e)
append(buffer, "{") append(buffer, QUOTED_EXISTS_KEY) append(buffer, "1}") IS_TYPE_KEY = re.compile(r"^~[bintdsaje]~$") BOOLEAN_KEY = "~b~" NUMBER_KEY = "~n~" INTEGER_KEY = "~i~" STRING_KEY = "~s~" ARRAY_KEY = "~N~" EXISTS_KEY = "~e~" append = UnicodeBuilder.append QUOTED_BOOLEAN_KEY = quote(BOOLEAN_KEY) + COLON QUOTED_NUMBER_KEY = quote(NUMBER_KEY) + COLON QUOTED_INTEGER_KEY = quote(INTEGER_KEY) + COLON QUOTED_STRING_KEY = quote(STRING_KEY) + COLON QUOTED_ARRAY_KEY = quote(ARRAY_KEY) + COLON QUOTED_EXISTS_KEY = quote(EXISTS_KEY) + COLON inserter_type_to_json_type = { BOOLEAN_KEY: BOOLEAN, NUMBER_KEY: NUMBER, INTEGER_KEY: INTEGER, STRING_KEY: STRING, } json_type_to_inserter_type = { BOOLEAN: BOOLEAN_KEY,
def value2quote(value): # RETURN PRETTY PYTHON CODE FOR THE SAME if isinstance(value, basestring): return mo_json.quote(value) else: return repr(value)
def string2quote(value): if value == None: return "None" return quote(value)
def to_python(self, not_null=False, boolean=False, many=False): return "((" + quote(self.substring) + " in " + self.var.to_python( ) + ") if " + self.var.to_python() + "!=None else False)"
def value2quote(value): # RETURN PRETTY PYTHON CODE FOR THE SAME if isinstance(value, basestring): return mo_json.quote(value) else: return repr(value)
def string2quote(value): if value == None: return "None" return quote(value)
def pretty_json(value): try: if value is False: return "false" elif value is True: return "true" elif value == None: return "null" elif is_data(value): try: value = unwrap(value) items = sort_using_key(value.items(), lambda r: r[0]) values = [ quote(k) + PRETTY_COLON + pretty_json(v) for k, v in items if v != None ] if not values: return "{}" elif len(values) == 1: return "{" + values[0] + "}" else: return "{\n" + ",\n".join(indent(v) for v in values) + "\n}" except Exception as cause: cause = Except.wrap(cause) from mo_logs import Log from mo_math import OR if OR(not is_text(k) for k in value.keys()): Log.error( "JSON must have string keys: {{keys}}:", keys=[k for k in value.keys()], cause=cause, ) Log.error( "problem making dict pretty: keys={{keys}}:", keys=list(value.keys()), cause=cause, ) elif value.__class__ in (binary_type, text): if is_binary(value): value = value.decode("utf8") try: if "\n" in value and value.strip(): return pretty_json({ "$concat": value.split("\n"), "separator": "\n", }) else: return quote(value) except Exception as cause: from mo_logs import Log try: Log.note( "try explicit convert of string with length {{length}}", length=len(value), ) acc = [QUOTE] for c in value: try: try: c2 = ESCAPE_DCT[c] except Exception: c2 = c c3 = text(c2) acc.append(c3) except BaseException: pass # Log.warning("odd character {{ord}} found in string. Ignored.", ord= ord(c)}, cause=g) acc.append(QUOTE) output = "".join(acc) Log.note("return value of length {{length}}", length=len(output)) return output except BaseException as f: Log.warning( "can not convert {{type}} to json", type=f.__class__.__name__, cause=f, ) return "null" elif is_list(value): if not value: return "[]" if ARRAY_MAX_COLUMNS == 1: return ( "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]" ) if len(value) == 1: j = pretty_json(value[0]) return "[" + j + "]" js = [pretty_json(v) for v in value] max_len = max(*[len(j) for j in js]) if ( len(js) < ARRAY_MIN_ITEMS and max_len <= ARRAY_ITEM_MAX_LENGTH and not any("\n" in j for j in js) ): # ALL TINY VALUES num_columns = max( 1, min( ARRAY_MAX_COLUMNS, int(floor((ARRAY_ROW_LENGTH + 2.0) / float(max_len + 2))), ), ) # +2 TO COMPENSATE FOR COMMAS if len(js) <= num_columns: # DO NOT ADD \n IF ONLY ONE ROW return "[" + PRETTY_COMMA.join(js) + "]" if num_columns == 1: # DO NOT rjust IF THERE IS ONLY ONE COLUMN return ( "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]" ) content = ",\n".join( PRETTY_COMMA.join(j.rjust(max_len) for j in js[r : r + num_columns]) for r in xrange(0, len(js), num_columns) ) return "[\n" + indent(content) + "\n]" pretty_list = js output = ["[\n"] for i, p in enumerate(pretty_list): try: if i > 0: output.append(",\n") output.append(indent(p)) except Exception: from mo_logs import Log Log.warning( "problem concatenating string of length {{len1}} and {{len2}}", len1=len("".join(output)), len2=len(p), ) output.append("\n]") try: return "".join(output) except Exception as cause: from mo_logs import Log Log.error("not expected", cause=cause) elif hasattr(value, "__data__"): d = value.__data__() return pretty_json(d) elif hasattr(value, "__json__"): j = value.__json__() if j == None: return " null " # TODO: FIND OUT WHAT CAUSES THIS return pretty_json(json_decoder(j)) elif scrub(value) is None: return "null" elif hasattr(value, "__iter__"): return pretty_json(list(value)) elif hasattr(value, "__call__"): return "null" else: try: if int(value) == value: return text(int(value)) except Exception: pass try: if float(value) == value: return text(float(value)) except Exception: pass return pypy_json_encode(value) except Exception as cause: problem_serializing(value, cause)