def partial_eval(self): lhs = ES52[self.lhs].partial_eval() rhs = ES52[self.rhs].partial_eval() if is_literal(lhs): if is_literal(rhs): return FALSE if value_compare(lhs.value, rhs.value) else TRUE else: lhs, rhs = rhs, lhs # FLIP SO WE CAN USE TERMS FILTER if is_literal(rhs) and same_json_type(lhs.type, BOOLEAN): # SPECIAL CASE true == "T" rhs = string2boolean(rhs.value) if rhs is None: return FALSE rhs = Literal(rhs) return EqOp([lhs, rhs]) if lhs.type != OBJECT and rhs.type != OBJECT and not same_json_type( lhs.type, rhs.type): # OBJECT MEANS WE REALLY DO NOT KNOW THE TYPE return FALSE if is_op(lhs, NestedOp): return self.lang[NestedOp(path=lhs.frum, where=AndOp( [lhs.where, EqOp([lhs.select, rhs])]))] return EqOp([lhs, rhs])
def to_bq(self, schema, not_null=False, boolean=False): value = BQLang[self.term].to_bq(schema) if same_json_type(value.data_type, NUMBER): return value else: return BQLScript(data_type=NUMBER, expr=sql_call( "CAST", ConcatSQL(value, SQL_AS, SQL_FLOAT64)), frum=self, miss=self.missing(), many=False, schema=schema)
def __init__(self, term, **clauses): Expression.__init__(self, [term]) self.when = term self.then = coalesce(clauses.get("then"), NULL) self.els_ = coalesce(clauses.get("else"), NULL) if self.then is NULL: self.data_type = self.els_.type elif self.els_ is NULL: self.data_type = self.then.type elif same_json_type(self.then.type, self.els_.type): self.data_type = merge_json_type(self.then.type, self.els_.type) else: self.data_type = OBJECT
def to_es(self, schema): if is_op(self.lhs, Variable_) and is_literal(self.rhs): rhs = self.rhs.value lhs = self.lhs.var cols = schema.leaves(lhs) if not cols: Log.warning( "{{col}} does not exist while processing {{expr}}", col=lhs, expr=self.__data__(), ) if is_container(rhs): if len(rhs) == 1: rhs = rhs[0] else: types = Data() # MAP JSON TYPE TO LIST OF LITERALS for r in rhs: types[python_type_to_json_type[r.__class__]] += [r] if len(types) == 1: jx_type, values = first(types.items()) for c in cols: if same_json_type(jx_type, c.jx_type): return {"terms": {c.es_column: values}} return FALSE.to_es(schema) else: return (OrOp([ EqOp([self.lhs, values]) for t, values in types.items() ]).partial_eval().to_es(schema)) for c in cols: if c.jx_type == BOOLEAN: rhs = pull_functions[c.jx_type](rhs) rhs_type = python_type_to_json_type[rhs.__class__] if rhs_type == c.jx_type or (rhs_type in NUMBER_TYPES and c.jx_type in NUMBER_TYPES): return {"term": {c.es_column: rhs}} return FALSE.to_es(schema) else: return ES52[CaseOp([ WhenOp(self.lhs.missing(), **{"then": self.rhs.missing()}), WhenOp(self.rhs.missing(), **{"then": FALSE}), BasicEqOp([self.lhs, self.rhs]), ]).partial_eval()].to_es(schema)
def typed_encode(value, sub_schema, path, net_new_properties, buffer): """ :param value: THE DATA STRUCTURE TO ENCODE :param sub_schema: dict FROM PATH TO Column DESCRIBING THE TYPE :param path: list OF CURRENT PATH :param net_new_properties: list FOR ADDING NEW PROPERTIES NOT FOUND IN sub_schema :param buffer: UnicodeBuilder OBJECT :return: """ try: # from jx_base import Column if sub_schema.__class__.__name__ == 'Column': value_json_type = python_type_to_json_type[value.__class__] column_json_type = es_type_to_json_type[sub_schema.es_type] if same_json_type(value_json_type, column_json_type): pass # ok elif value_json_type == NESTED and all( python_type_to_json_type[v.__class__] == column_json_type for v in value if v != None): pass # empty arrays can be anything else: from mo_logs import Log Log.error("Can not store {{value}} in {{column|quote}}", value=value, column=sub_schema.name) sub_schema = { json_type_to_inserter_type[value_json_type]: sub_schema } if value == None and path: from mo_logs import Log Log.error("can not encode null (missing) values") elif value is True: if BOOLEAN_TYPE not in sub_schema: sub_schema[BOOLEAN_TYPE] = {} net_new_properties.append(path + [BOOLEAN_TYPE]) append(buffer, '{') append(buffer, QUOTED_BOOLEAN_TYPE) append(buffer, 'true}') return elif value is False: if BOOLEAN_TYPE not in sub_schema: sub_schema[BOOLEAN_TYPE] = {} net_new_properties.append(path + [BOOLEAN_TYPE]) append(buffer, '{') append(buffer, QUOTED_BOOLEAN_TYPE) append(buffer, 'false}') return _type = value.__class__ if _type in (dict, Data): if sub_schema.__class__.__name__ == 'Column': from mo_logs import Log Log.error("Can not handle {{column|json}}", column=sub_schema) if NESTED_TYPE in sub_schema: # PREFER NESTED, WHEN SEEN BEFORE if value: append(buffer, '{') append(buffer, QUOTED_NESTED_TYPE) append(buffer, '[') _dict2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer) append(buffer, ']' + COMMA) append(buffer, QUOTED_EXISTS_TYPE) append(buffer, text(len(value))) append(buffer, '}') else: # SINGLETON LIST append(buffer, '{') append(buffer, QUOTED_NESTED_TYPE) append(buffer, '[{') append(buffer, QUOTED_EXISTS_TYPE) append(buffer, '1}]') append(buffer, COMMA) append(buffer, QUOTED_EXISTS_TYPE) append(buffer, '1}') else: if EXISTS_TYPE not in sub_schema: sub_schema[EXISTS_TYPE] = {} net_new_properties.append(path + [EXISTS_TYPE]) if value: _dict2json(value, sub_schema, path, net_new_properties, buffer) else: append(buffer, '{') append(buffer, QUOTED_EXISTS_TYPE) append(buffer, '1}') elif _type is binary_type: if STRING_TYPE not in sub_schema: sub_schema[STRING_TYPE] = True net_new_properties.append(path + [STRING_TYPE]) append(buffer, '{') append(buffer, QUOTED_STRING_TYPE) append(buffer, '"') try: v = value.decode('utf8') except Exception as e: raise problem_serializing(value, e) for c in v: append(buffer, ESCAPE_DCT.get(c, c)) append(buffer, '"}') elif _type is text: if STRING_TYPE not in sub_schema: sub_schema[STRING_TYPE] = True net_new_properties.append(path + [STRING_TYPE]) append(buffer, '{') append(buffer, QUOTED_STRING_TYPE) append(buffer, '"') for c in value: append(buffer, ESCAPE_DCT.get(c, c)) append(buffer, '"}') elif _type in integer_types: if NUMBER_TYPE not in sub_schema: sub_schema[NUMBER_TYPE] = True net_new_properties.append(path + [NUMBER_TYPE]) append(buffer, '{') append(buffer, QUOTED_NUMBER_TYPE) append(buffer, text(value)) append(buffer, '}') elif _type in (float, Decimal): if NUMBER_TYPE not in sub_schema: sub_schema[NUMBER_TYPE] = True net_new_properties.append(path + [NUMBER_TYPE]) append(buffer, '{') append(buffer, QUOTED_NUMBER_TYPE) append(buffer, float2json(value)) append(buffer, '}') elif _type in (set, list, tuple, FlatList): if len(value) == 0: append(buffer, '{') append(buffer, QUOTED_EXISTS_TYPE) append(buffer, '0}') elif any(v.__class__ in (Data, dict, set, list, tuple, FlatList) for v in value): # THIS IS NOT DONE BECAUSE if len(value) == 1: if NESTED_TYPE in sub_schema: append(buffer, '{') append(buffer, QUOTED_NESTED_TYPE) _list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer) append(buffer, '}') else: # NO NEED TO NEST, SO DO NOT DO IT typed_encode(value[0], sub_schema, path, net_new_properties, buffer) else: if NESTED_TYPE not in sub_schema: sub_schema[NESTED_TYPE] = {} net_new_properties.append(path + [NESTED_TYPE]) append(buffer, '{') append(buffer, QUOTED_NESTED_TYPE) _list2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer) append(buffer, '}') else: # ALLOW PRIMITIVE MULTIVALUES value = [v for v in value if v != None] types = list( set(json_type_to_inserter_type[python_type_to_json_type[ v.__class__]] for v in value)) if len(types) == 0: # HANDLE LISTS WITH Nones IN THEM append(buffer, '{') append(buffer, QUOTED_NESTED_TYPE) append(buffer, '[]}') elif len(types) > 1: _list2json(value, sub_schema, path + [NESTED_TYPE], net_new_properties, buffer) else: element_type = types[0] if element_type not in sub_schema: sub_schema[element_type] = True net_new_properties.append(path + [element_type]) append(buffer, '{') append(buffer, quote(element_type)) append(buffer, COLON) _multivalue2json(value, sub_schema[element_type], path + [element_type], net_new_properties, buffer) append(buffer, '}') elif _type is date: if NUMBER_TYPE not in sub_schema: sub_schema[NUMBER_TYPE] = True net_new_properties.append(path + [NUMBER_TYPE]) append(buffer, '{') append(buffer, QUOTED_NUMBER_TYPE) append(buffer, float2json(datetime2unix(value))) append(buffer, '}') elif _type is datetime: if NUMBER_TYPE not in sub_schema: sub_schema[NUMBER_TYPE] = True net_new_properties.append(path + [NUMBER_TYPE]) append(buffer, '{') append(buffer, QUOTED_NUMBER_TYPE) append(buffer, float2json(datetime2unix(value))) append(buffer, '}') elif _type is Date: if NUMBER_TYPE not in sub_schema: sub_schema[NUMBER_TYPE] = True net_new_properties.append(path + [NUMBER_TYPE]) append(buffer, '{') append(buffer, QUOTED_NUMBER_TYPE) append(buffer, float2json(value.unix)) append(buffer, '}') elif _type is timedelta: if NUMBER_TYPE not in sub_schema: sub_schema[NUMBER_TYPE] = True net_new_properties.append(path + [NUMBER_TYPE]) append(buffer, '{') append(buffer, QUOTED_NUMBER_TYPE) append(buffer, float2json(value.total_seconds())) append(buffer, '}') elif _type is Duration: if NUMBER_TYPE not in sub_schema: sub_schema[NUMBER_TYPE] = True net_new_properties.append(path + [NUMBER_TYPE]) append(buffer, '{') append(buffer, QUOTED_NUMBER_TYPE) append(buffer, float2json(value.seconds)) append(buffer, '}') elif _type is NullType: append(buffer, 'null') elif hasattr(value, '__data__'): typed_encode(value.__data__(), sub_schema, path, net_new_properties, buffer) elif hasattr(value, '__iter__'): if NESTED_TYPE not in sub_schema: sub_schema[NESTED_TYPE] = {} net_new_properties.append(path + [NESTED_TYPE]) append(buffer, '{') append(buffer, QUOTED_NESTED_TYPE) _iter2json(value, sub_schema[NESTED_TYPE], path + [NESTED_TYPE], net_new_properties, buffer) append(buffer, '}') else: from mo_logs import Log Log.error(text(repr(value)) + " is not JSON serializable") except Exception as e: from mo_logs import Log Log.error(text(repr(value)) + " is not JSON serializable", cause=e)