Exemplo n.º 1
0
    def to_esfilter(self, schema):
        if is_op(self.value, Variable_):
            var = self.value.var
            cols = schema.leaves(var)
            if not cols:
                Log.error("expecting {{var}} to be a column", var=var)
            col = first(cols)
            var = col.es_column

            if col.jx_type == BOOLEAN:
                if is_literal(self.superset) and not is_sequence(
                        self.superset.value):
                    return {"term": {var: value2boolean(self.superset.value)}}
                else:
                    return {
                        "terms": {
                            var: map(value2boolean, self.superset.value)
                        }
                    }
            else:
                if is_literal(self.superset) and not is_sequence(
                        self.superset.value):
                    return {"term": {var: self.superset.value}}
                else:
                    return {"terms": {var: self.superset.value}}
        else:
            return Painless[self].to_es_script(schema).to_esfilter(schema)
Exemplo n.º 2
0
 def define(cls, expr):
     term = expr.between
     if is_sequence(term):
         return cls.lang[BetweenOp(
             value=jx_expression(term[0]),
             prefix=jx_expression(term[1]),
             suffix=jx_expression(term[2]),
             default=jx_expression(expr.default),
             start=jx_expression(expr.start),
         )]
     elif is_data(term):
         var, vals = term.items()[0]
         if is_sequence(vals) and len(vals) == 2:
             return cls.lang[BetweenOp(
                 value=Variable(var),
                 prefix=Literal(vals[0]),
                 suffix=Literal(vals[1]),
                 default=jx_expression(expr.default),
                 start=jx_expression(expr.start),
             )]
         else:
             Log.error(
                 "`between` parameters are expected to be in {var: [prefix, suffix]} form"
             )
     else:
         Log.error(
             "`between` parameters are expected to be in {var: [prefix, suffix]} form"
         )
Exemplo n.º 3
0
def value2key(keys, val):
    if len(keys) == 1:
        if is_data(val):
            return get_attr(val, keys[0]),
        elif is_sequence(val):
            return val[0],
        return val,
    else:
        if is_data(val):
            return tuple(val[k] for k in keys)
        elif is_sequence(val):
            return tuple(val)
        else:
            Log.error("do not know what to do here")
Exemplo n.º 4
0
def value2key(keys, val):
    if len(keys) == 1:
        if is_data(val):
            return get_attr(val, keys[0]),
        elif is_sequence(val):
            return val[0],
        return val,
    else:
        if is_data(val):
            return tuple(val[k] for k in keys)
        elif is_sequence(val):
            return tuple(val)
        else:
            Log.error("do not know what to do here")
Exemplo n.º 5
0
def value2key(keys, val):
    if len(keys) == 1:
        if is_data(val):
            return val[keys[0]]
        elif is_sequence(val):
            return val[0]
        else:
            return val
    else:
        if is_data(val):
            return datawrap({k: val[k] for k in keys})
        elif is_sequence(val):
            return datawrap(dict(zip(keys, val)))
        else:
            Log.error("do not know what to do here")
Exemplo n.º 6
0
    def __getitem__(self, index):
        if not is_sequence(index):
            if isinstance(index, slice):
                sub = self.cube[index]
                output = Matrix()
                output.num = 1
                output.dims = (len(sub), )
                output.cube = sub
                return output
            else:
                return self.cube[index]

        if len(index) == 0:
            return self.cube

        dims, cube = _getitem(self.cube, index)

        if len(dims) == 0:
            return cube  # SIMPLE VALUE

        output = Matrix(dims=[])
        output.num = len(dims)
        output.dims = dims
        output.cube = cube
        return output
Exemplo n.º 7
0
def _deep_json_to_string(value, depth):
    """
    :param value: SOME STRUCTURE
    :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED
    :return: FLATTER STRUCTURE
    """
    if is_data(value):
        if depth == 0:
            return strings.limit(value2json(value), LOG_STRING_LENGTH)

        return {
            k: _deep_json_to_string(v, depth - 1)
            for k, v in value.items()
        }
    elif is_sequence(value):
        return strings.limit(value2json(value), LOG_STRING_LENGTH)
    elif isinstance(value, number_types):
        return value
    elif is_text(value):
        return strings.limit(value, LOG_STRING_LENGTH)
    elif is_binary(value):
        return strings.limit(bytes2base64(value), LOG_STRING_LENGTH)
    elif isinstance(value, (date, datetime)):
        return datetime2unix(value)
    else:
        return strings.limit(value2json(value), LOG_STRING_LENGTH)
Exemplo n.º 8
0
 def __init__(self, concat):
     SQL.__init__(self)
     if not is_sequence(concat):
         concat = list(concat)
     if DEBUG and any(not isinstance(s, SQL) for s in concat):
         Log.error("Can only join other SQL")
     self.concat = concat
Exemplo n.º 9
0
    def __getitem__(self, index):
        if not is_sequence(index):
            if isinstance(index, slice):
                sub = self.cube[index]
                output = Matrix()
                output.num = 1
                output.dims = (len(sub), )
                output.cube = sub
                return output
            else:
                return self.cube[index]

        if len(index) == 0:
            return self.cube

        dims, cube = _getitem(self.cube, index)

        if len(dims) == 0:
            return cube  # SIMPLE VALUE

        output = Matrix(dims=[])
        output.num = len(dims)
        output.dims = dims
        output.cube = cube
        return output
Exemplo n.º 10
0
 def __init__(self, terms):
     Expression.__init__(self, terms)
     if is_sequence(terms):
         self.lhs, self.rhs = terms
     elif is_data(terms):
         self.rhs, self.lhs = terms.items()[0]
     else:
         Log.error("logic error")
Exemplo n.º 11
0
def parse_hg_date(date):
    if is_text(date):
        return Date(date)
    elif is_sequence(date):
        # FIRST IN TUPLE (timestamp, time_zone) TUPLE, WHERE timestamp IS GMT
        return Date(date[0])
    else:
        Log.error("Can not deal with date like {{date|json}}", date=date)
Exemplo n.º 12
0
 def __call__(self, row, rownum=None, rows=None):
     path = split_field(self.var)
     for p in path:
         row = row.get(p)
         if row is None:
             return None
     if is_sequence(row) and len(row) == 1:
         return row[0]
     return row
Exemplo n.º 13
0
    def __getitem__(self, key):
        try:
            if is_sequence(key) and len(key) < len(self._keys):
                # RETURN ANOTHER Index
                raise NotImplementedError()

            key = value2key(self._keys, key)
            return wrap(copy(self._data.get(key, [])))
        except Exception as e:
            Log.error("something went wrong", e)
Exemplo n.º 14
0
    def __getitem__(self, key):
        try:
            if is_sequence(key) and len(key) < len(self._keys):
                # RETURN ANOTHER Index
                raise NotImplementedError()

            key = value2key(self._keys, key)
            return wrap(copy(self._data.get(key, [])))
        except Exception as e:
            Log.error("something went wrong", e)
Exemplo n.º 15
0
def _replace_locals(node, doc_path):
    if is_data(node):
        # RECURS, DEEP COPY
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = v
            elif k == "$concat":
                if not is_sequence(v):
                    Log.error("$concat expects an array of strings")
                return coalesce(node.get("separator"), "").join(v)
            elif v == None:
                continue
            else:
                output[k] = _replace_locals(v, [v] + doc_path)

        if not ref:
            return output

        # REFER TO SELF
        frag = ref.fragment
        if frag[0] == ".":
            # RELATIVE
            for i, p in enumerate(frag):
                if p != ".":
                    if i > len(doc_path):
                        Log.error(
                            "{{frag|quote}} reaches up past the root document",
                            frag=frag)
                    new_value = get_attr(doc_path[i - 1], frag[i::])
                    break
            else:
                new_value = doc_path[len(frag) - 1]
        else:
            # ABSOLUTE
            new_value = get_attr(doc_path[-1], frag)

        new_value = _replace_locals(new_value, [new_value] + doc_path)

        if not output:
            return new_value  # OPTIMIZATION FOR CASE WHEN node IS {}
        else:
            return unwrap(set_default(output, new_value))

    elif is_list(node):
        candidate = [_replace_locals(n, [n] + doc_path) for n in node]
        # if all(p[0] is p[1] for p in zip(candidate, node)):
        #     return node
        return candidate

    return node
Exemplo n.º 16
0
    def __init__(self, terms, **clauses):
        if not is_sequence(terms):
            Log.error("case expression requires a list of `when` sub-clauses")
        Expression.__init__(self, terms)
        if len(terms) == 0:
            Log.error("Expecting at least one clause")

        for w in terms[:-1]:
            if not is_op(w, WhenOp) or w.els_ is not NULL:
                Log.error(
                    "case expression does not allow `else` clause in `when` sub-clause"
                )
        self.whens = terms
Exemplo n.º 17
0
def _jx_expression(expr, lang):
    """
    WRAP A JSON EXPRESSION WITH OBJECT REPRESENTATION
    """
    if is_expression(expr):
        # CONVERT TO lang
        new_op = lang[expr]
        if not new_op:
            # CAN NOT BE FOUND, TRY SOME PARTIAL EVAL
            return language[expr.get_id()].partial_eval()
        return expr
        # return new_op(expr.args)  # THIS CAN BE DONE, BUT IT NEEDS MORE CODING, AND I WOULD EXPECT IT TO BE SLOW

    if expr is None:
        return TRUE
    elif is_text(expr):
        return Variable(expr)
    elif expr in (True, False, None) or expr == None or is_number(expr):
        return Literal(expr)
    elif expr.__class__ is Date:
        return Literal(expr.unix)
    elif is_sequence(expr):
        return lang[TupleOp([_jx_expression(e, lang) for e in expr])]

    # expr = to_data(expr)
    try:
        items = items_(expr)

        for op, term in items:
            # ONE OF THESE IS THE OPERATOR
            full_op = operators.get(op)
            if full_op:
                class_ = lang.ops[full_op.get_id()]
                if class_:
                    return class_.define(expr)

                # THIS LANGUAGE DOES NOT SUPPORT THIS OPERATOR, GOTO BASE LANGUAGE AND GET THE MACRO
                class_ = language[op.get_id()]
                output = class_.define(expr).partial_eval()
                return _jx_expression(output, lang)
        else:
            if not items:
                return NULL
            raise Log.error("{{instruction|json}} is not known",
                            instruction=expr)

    except Exception as e:
        Log.error("programmer error expr = {{value|quote}}",
                  value=expr,
                  cause=e)
Exemplo n.º 18
0
 def __init__(self, args):
     self.simplified = False
     # SOME BASIC VERIFICATION THAT THESE ARE REASONABLE PARAMETERS
     if is_sequence(args):
         bad = [t for t in args if t != None and not is_expression(t)]
         if bad:
             Log.error("Expecting an expression, not {{bad}}", bad=bad)
     elif is_data(args):
         if not all(is_op(k, Variable) and is_literal(v) for k, v in args.items()):
             Log.error("Expecting an {<variable>: <literal>}")
     elif args == None:
         pass
     else:
         if not is_expression(args):
             Log.error("Expecting an expression")
Exemplo n.º 19
0
def get_type(v):
    if v == None:
        return None
    elif isinstance(v, bool):
        return BOOLEAN
    elif is_text(v):
        return STRING
    elif is_data(v):
        return OBJECT
    elif isinstance(v, float):
        if isnan(v) or abs(v) == POS_INF:
            return None
        return NUMBER
    elif isinstance(v, (int, Date)):
        return NUMBER
    elif is_sequence(v):
        return NESTED
    return None
Exemplo n.º 20
0
    def replacer(found):
        ops = found.group(1).split("|")

        path = ops[0]
        var = path.lstrip(".")
        depth = min(len(seq), max(1, len(path) - len(var)))
        try:
            val = seq[-depth]
            if var:
                if is_sequence(val) and float(var) == _round(float(var), 0):
                    val = val[int(var)]
                else:
                    val = val[var]
            for func_name in ops[1:]:
                parts = func_name.split("(")
                if len(parts) > 1:
                    val = eval(parts[0] + "(val, " + "(".join(parts[1::]))
                else:
                    val = FORMATTERS[func_name](val)
            val = toString(val)
            return val
        except Exception as e:
            from mo_logs import Except

            e = Except.wrap(e)
            try:
                if e.message.find("is not JSON serializable"):
                    # WORK HARDER
                    val = toString(val)
                    return val
            except Exception as f:
                if not _Log:
                    _late_import()

                _Log.warning(
                    "Can not expand " + "|".join(ops) +
                    " in template: {{template_|json}}",
                    template_=template,
                    cause=e,
                )
            return "[template expansion error: (" + str(e.message) + ")]"
Exemplo n.º 21
0
    def replacer(found):
        ops = found.group(1).split("|")

        path = ops[0]
        var = path.lstrip(".")
        depth = min(len(seq), max(1, len(path) - len(var)))
        try:
            val = seq[-depth]
            if var:
                if is_sequence(val) and float(var) == _round(float(var), 0):
                    val = val[int(var)]
                else:
                    val = val[var]
            for func_name in ops[1:]:
                parts = func_name.split('(')
                if len(parts) > 1:
                    val = eval(parts[0] + "(val, " + ("(".join(parts[1::])))
                else:
                    val = FORMATTERS[func_name](val)
            val = toString(val)
            return val
        except Exception as e:
            from mo_logs import Except

            e = Except.wrap(e)
            try:
                if e.message.find("is not JSON serializable"):
                    # WORK HARDER
                    val = toString(val)
                    return val
            except Exception as f:
                if not _Log:
                    _late_import()

                _Log.warning(
                    "Can not expand " + "|".join(ops) + " in template: {{template_|json}}",
                    template_=template,
                    cause=e
                )
            return "[template expansion error: (" + str(e.message) + ")]"
Exemplo n.º 22
0
def _deep_json_to_string(value, depth):
    """
    :param value: SOME STRUCTURE
    :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED
    :return: FLATTER STRUCTURE
    """
    if is_data(value):
        if depth == 0:
            return strings.limit(value2json(value), LOG_STRING_LENGTH)

        return {k: _deep_json_to_string(v, depth - 1) for k, v in value.items()}
    elif is_sequence(value):
        return strings.limit(value2json(value), LOG_STRING_LENGTH)
    elif isinstance(value, number_types):
        return value
    elif is_text(value):
        return strings.limit(value, LOG_STRING_LENGTH)
    elif is_binary(value):
        return strings.limit(bytes2base64(value), LOG_STRING_LENGTH)
    elif isinstance(value, (date, datetime)):
        return datetime2unix(value)
    else:
        return strings.limit(value2json(value), LOG_STRING_LENGTH)
Exemplo n.º 23
0
    def _test_contains(self, key):
        try:
            if is_sequence(key) and len(key) < len(self._keys):
                # RETURN ANOTHER Index
                length = len(key)
                key = value2key(self._keys[0:length:], key)
                d = self._data
                for k in key[:length]:
                    try:
                        d = d[k]
                    except Exception as e:
                        return False
                return True

            key = value2key(self._keys, key)
            d = self._data
            for k in key:
                try:
                    d = d[k]
                except Exception as e:
                    return False
            return True
        except Exception as e:
            Log.error("something went wrong", e)
Exemplo n.º 24
0
    def _test_contains(self, key):
        try:
            if is_sequence(key) and len(key) < len(self._keys):
                # RETURN ANOTHER Index
                length = len(key)
                key = value2key(self._keys[0:length:], key)
                d = self._data
                for k in key[:length]:
                    try:
                        d = d[k]
                    except Exception as e:
                        return False
                return True

            key = value2key(self._keys, key)
            d = self._data
            for k in key:
                try:
                    d = d[k]
                except Exception as e:
                    return False
            return True
        except Exception as e:
            Log.error("something went wrong", e)
Exemplo n.º 25
0
def request(method,
            url,
            headers=None,
            data=None,
            json=None,
            zip=None,
            retry=None,
            timeout=None,
            session=None,
            kwargs=None):
    """
    JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES
    DEMANDS data IS ONE OF:
    * A JSON-SERIALIZABLE STRUCTURE, OR
    * LIST OF JSON-SERIALIZABLE STRUCTURES, OR
    * None

    :param method: GET, POST, etc
    :param url: URL
    :param headers: dict OF HTTP REQUEST HEADERS
    :param data: BYTES (OR GENERATOR OF BYTES)
    :param json: JSON-SERIALIZABLE STRUCTURE
    :param zip: ZIP THE REQUEST BODY, IF BIG ENOUGH
    :param retry: {"times": x, "sleep": y} STRUCTURE
    :param timeout: SECONDS TO WAIT FOR RESPONSE
    :param session: Session OBJECT, IF YOU HAVE ONE
    :param kwargs: ALL PARAMETERS (DO NOT USE)
    :return:
    """
    global _warning_sent
    global request_count

    if not _warning_sent and not default_headers:
        Log.warning(
            text(
                "The mo_http.http module was meant to add extra " +
                "default headers to all requests, specifically the 'Referer' "
                +
                "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` "
                + "function to set `mo_http.http.default_headers`"))
    _warning_sent = True

    if is_list(url):
        # TRY MANY URLS
        failures = []
        for remaining, u in countdown(url):
            try:
                response = request(url=u, kwargs=kwargs)
                if mo_math.round(response.status_code,
                                 decimal=-2) not in [400, 500]:
                    return response
                if not remaining:
                    return response
            except Exception as e:
                e = Except.wrap(e)
                failures.append(e)
        Log.error(u"Tried {{num}} urls", num=len(url), cause=failures)

    if session:
        close_after_response = Null
    else:
        close_after_response = session = sessions.Session()

    with closing(close_after_response):
        if PY2 and is_text(url):
            # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
            url = url.encode('ascii')

        try:
            set_default(kwargs, DEFAULTS)

            # HEADERS
            headers = unwrap(
                set_default(headers, session.headers, default_headers))
            _to_ascii_dict(headers)

            # RETRY
            retry = wrap(retry)
            if retry == None:
                retry = set_default({}, DEFAULTS['retry'])
            elif isinstance(retry, Number):
                retry = set_default({"times": retry}, DEFAULTS['retry'])
            elif isinstance(retry.sleep, Duration):
                retry.sleep = retry.sleep.seconds

            # JSON
            if json != None:
                data = value2json(json).encode('utf8')

            # ZIP
            zip = coalesce(zip, DEFAULTS['zip'])
            set_default(headers, {'Accept-Encoding': 'compress, gzip'})

            if zip:
                if is_sequence(data):
                    compressed = ibytes2icompressed(data)
                    headers['content-encoding'] = 'gzip'
                    data = compressed
                elif len(coalesce(data)) > 1000:
                    compressed = bytes2zip(data)
                    headers['content-encoding'] = 'gzip'
                    data = compressed
        except Exception as e:
            Log.error(u"Request setup failure on {{url}}", url=url, cause=e)

        errors = []
        for r in range(retry.times):
            if r:
                Till(seconds=retry.sleep).wait()

            try:
                request_count += 1
                with Timer("http {{method|upper}} to {{url}}",
                           param={
                               "method": method,
                               "url": text(url)
                           },
                           verbose=DEBUG):
                    return _session_request(session,
                                            url=str(url),
                                            headers=headers,
                                            data=data,
                                            json=None,
                                            kwargs=kwargs)
            except Exception as e:
                e = Except.wrap(e)
                if retry['http'] and str(url).startswith(
                        "https://"
                ) and "EOF occurred in violation of protocol" in e:
                    url = URL("http://" + str(url)[8:])
                    Log.note(
                        "Changed {{url}} to http due to SSL EOF violation.",
                        url=str(url))
                errors.append(e)

        if " Read timed out." in errors[0]:
            Log.error(
                u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}",
                timeout=timeout,
                times=retry.times,
                cause=errors[0])
        else:
            Log.error(u"Tried {{times}} times: Request failure of {{url}}",
                      url=url,
                      times=retry.times,
                      cause=errors[0])