Пример #1
0
def get_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    response = get(url, **kwargs)
    try:
        c = response.all_content
        return json2value(utf82unicode(c))
    except Exception as e:
        if mo_math.round(response.status_code, decimal=-2) in [400, 500]:
            Log.error(u"Bad GET response: {{code}}", code=response.status_code)
        else:
            Log.error(u"Good GET requests, but bad JSON", cause=e)
Пример #2
0
def get_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    response = get(url, **kwargs)
    try:
        c = response.all_content
        return json2value(c.decode('utf8'))
    except Exception as e:
        if mo_math.round(response.status_code, decimal=-2) in [400, 500]:
            Log.error(u"Bad GET response: {{code}}", code=response.status_code)
        else:
            Log.error(u"Good GET requests, but bad JSON", cause=e)
Пример #3
0
def get_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    response = get(url, **kwargs)
    try:
        c = response.all_content
        path = URL(url).path
        if path.endswith(".zip"):
            buff = StringIO(c)
            archive = zipfile.ZipFile(buff, mode='r')
            c = archive.read(archive.namelist()[0])
        elif path.endswith(".gz"):
            c = zip2bytes(c)

        return json2value(c.decode('utf8'))
    except Exception as e:
        if mo_math.round(response.status_code, decimal=-2) in [400, 500]:
            Log.error(u"Bad GET response: {{code}}", code=response.status_code)
        else:
            Log.error(u"Good GET requests, but bad JSON", cause=e)
Пример #4
0
 def round(self, interval, decimal=0):
     output = self / interval
     output = round(output, decimal)
     return output
Пример #5
0
def request(method,
            url,
            headers=None,
            data=None,
            json=None,
            zip=None,
            retry=None,
            timeout=None,
            session=None,
            kwargs=None):
    """
    JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES
    DEMANDS data IS ONE OF:
    * A JSON-SERIALIZABLE STRUCTURE, OR
    * LIST OF JSON-SERIALIZABLE STRUCTURES, OR
    * None

    :param method: GET, POST, etc
    :param url: URL
    :param headers: dict OF HTTP REQUEST HEADERS
    :param data: BYTES (OR GENERATOR OF BYTES)
    :param json: JSON-SERIALIZABLE STRUCTURE
    :param zip: ZIP THE REQUEST BODY, IF BIG ENOUGH
    :param retry: {"times": x, "sleep": y} STRUCTURE
    :param timeout: SECONDS TO WAIT FOR RESPONSE
    :param session: Session OBJECT, IF YOU HAVE ONE
    :param kwargs: ALL PARAMETERS (DO NOT USE)
    :return:
    """
    global _warning_sent
    global request_count

    if not _warning_sent and not default_headers:
        Log.warning(
            text(
                "The mo_http.http module was meant to add extra " +
                "default headers to all requests, specifically the 'Referer' "
                +
                "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` "
                + "function to set `mo_http.http.default_headers`"))
    _warning_sent = True

    if is_list(url):
        # TRY MANY URLS
        failures = []
        for remaining, u in countdown(url):
            try:
                response = request(url=u, kwargs=kwargs)
                if mo_math.round(response.status_code,
                                 decimal=-2) not in [400, 500]:
                    return response
                if not remaining:
                    return response
            except Exception as e:
                e = Except.wrap(e)
                failures.append(e)
        Log.error(u"Tried {{num}} urls", num=len(url), cause=failures)

    if session:
        close_after_response = Null
    else:
        close_after_response = session = sessions.Session()

    with closing(close_after_response):
        if PY2 and is_text(url):
            # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
            url = url.encode('ascii')

        try:
            set_default(kwargs, DEFAULTS)

            # HEADERS
            headers = unwrap(
                set_default(headers, session.headers, default_headers))
            _to_ascii_dict(headers)

            # RETRY
            retry = wrap(retry)
            if retry == None:
                retry = set_default({}, DEFAULTS['retry'])
            elif isinstance(retry, Number):
                retry = set_default({"times": retry}, DEFAULTS['retry'])
            elif isinstance(retry.sleep, Duration):
                retry.sleep = retry.sleep.seconds

            # JSON
            if json != None:
                data = value2json(json).encode('utf8')

            # ZIP
            zip = coalesce(zip, DEFAULTS['zip'])
            set_default(headers, {'Accept-Encoding': 'compress, gzip'})

            if zip:
                if is_sequence(data):
                    compressed = ibytes2icompressed(data)
                    headers['content-encoding'] = 'gzip'
                    data = compressed
                elif len(coalesce(data)) > 1000:
                    compressed = bytes2zip(data)
                    headers['content-encoding'] = 'gzip'
                    data = compressed
        except Exception as e:
            Log.error(u"Request setup failure on {{url}}", url=url, cause=e)

        errors = []
        for r in range(retry.times):
            if r:
                Till(seconds=retry.sleep).wait()

            try:
                request_count += 1
                with Timer("http {{method|upper}} to {{url}}",
                           param={
                               "method": method,
                               "url": text(url)
                           },
                           verbose=DEBUG):
                    return _session_request(session,
                                            url=str(url),
                                            headers=headers,
                                            data=data,
                                            json=None,
                                            kwargs=kwargs)
            except Exception as e:
                e = Except.wrap(e)
                if retry['http'] and str(url).startswith(
                        "https://"
                ) and "EOF occurred in violation of protocol" in e:
                    url = URL("http://" + str(url)[8:])
                    Log.note(
                        "Changed {{url}} to http due to SSL EOF violation.",
                        url=str(url))
                errors.append(e)

        if " Read timed out." in errors[0]:
            Log.error(
                u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}",
                timeout=timeout,
                times=retry.times,
                cause=errors[0])
        else:
            Log.error(u"Tried {{times}} times: Request failure of {{url}}",
                      url=url,
                      times=retry.times,
                      cause=errors[0])
Пример #6
0
def agg_formula(acc, formula, query_path, schema):
    # DUPLICATED FOR SCRIPTS, MAYBE THIS CAN BE PUT INTO A LANGUAGE?
    for i, s in enumerate(formula):
        canonical_name = s.name
        s_path = [
            k for k, v in split_expression_by_path(
                s.value, schema=schema, lang=Painless).items() if v
        ]
        if len(s_path) == 0:
            # FOR CONSTANTS
            nest = NestedAggs(query_path)
            acc.add(nest)
        elif len(s_path) == 1:
            nest = NestedAggs(first(s_path))
            acc.add(nest)
        else:
            raise Log.error("do not know how to handle")

        if is_op(s.value, TupleOp):
            if s.aggregate == "count":
                # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
                s.pull = jx_expression_to_function("doc_count")
            elif s.aggregate in ('max', 'maximum', 'min', 'minimum'):
                if s.aggregate in ('max', 'maximum'):
                    dir = 1
                    op = "max"
                else:
                    dir = -1
                    op = 'min'

                nully = Painless[TupleOp(
                    [NULL] *
                    len(s.value.terms))].partial_eval().to_es_script(schema)
                selfy = text(
                    Painless[s.value].partial_eval().to_es_script(schema))

                script = {
                    "scripted_metric": {
                        'init_script':
                        'params._agg.best = ' + nully + '.toArray();',
                        'map_script':
                        'params._agg.best = ' + expand_template(
                            MAX_OF_TUPLE, {
                                "expr1": "params._agg.best",
                                "expr2": selfy,
                                "dir": dir,
                                "op": op
                            }) + ";",
                        'combine_script':
                        'return params._agg.best',
                        'reduce_script':
                        'return params._aggs.stream().' + op + '(' +
                        expand_template(COMPARE_TUPLE, {
                            "dir": dir,
                            "op": op
                        }) + ').get()',
                    }
                }
                nest.add(
                    NestedAggs(query_path).add(
                        ExprAggs(canonical_name, script, s)))
                s.pull = jx_expression_to_function("value")
            else:
                Log.error("{{agg}} is not a supported aggregate over a tuple",
                          agg=s.aggregate)
        elif s.aggregate == "count":
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "value_count": {
                            "script":
                            text(Painless[s.value].partial_eval().to_es_script(
                                schema))
                        }
                    }, s))
            s.pull = jx_expression_to_function("value")
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            nest.add(
                ExprAggs(
                    key, {
                        "percentiles": {
                            "script": text(
                                Painless[s.value].to_es_script(schema)),
                            "percents": [50]
                        }
                    }, s))
            s.pull = jx_expression_to_function(join_field(["50.0"]))
        elif s.aggregate in ("and", "or"):
            key = literal_field(canonical_name + " " + s.aggregate)
            op = aggregates[s.aggregate]
            nest.add(
                ExprAggs(
                    key, {
                        op: {
                            "script":
                            text(Painless[NumberOp(
                                s.value)].to_es_script(schema))
                        }
                    }, s))
            # get_name = concat_field(canonical_name, "value")
            s.pull = jx_expression_to_function({
                "case": [{
                    "when": {
                        "eq": {
                            "value": 1
                        }
                    },
                    "then": True
                }, {
                    "when": {
                        "eq": {
                            "value": 0
                        }
                    },
                    "then": False
                }]
            })
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = mo_math.round(s.percentile * 100, decimal=6)
            nest.add(
                ExprAggs(
                    key, {
                        "percentiles": {
                            "script": text(
                                Painless[s.value].to_es_script(schema)),
                            "percents": [percent]
                        }
                    }, s))
            s.pull = jx_expression_to_function(
                join_field(["values", text(percent)]))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"
            nest.add(
                ExprAggs(
                    key, {
                        "cardinality": {
                            "script": text(
                                Painless[s.value].to_es_script(schema))
                        }
                    }, s))
            s.pull = jx_expression_to_function("value")
        elif s.aggregate == "stats":
            # REGULAR STATS
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "extended_stats": {
                            "script": text(
                                Painless[s.value].to_es_script(schema))
                        }
                    }, s))
            s.pull = get_pull_stats()

            # GET MEDIAN TOO!
            select_median = s.copy()
            select_median.pull = jx_expression_to_function(
                {"select": [{
                    "name": "median",
                    "value": "values.50\\.0"
                }]})

            nest.add(
                ExprAggs(
                    canonical_name + "_percentile", {
                        "percentiles": {
                            "script": text(
                                Painless[s.value].to_es_script(schema)),
                            "percents": [50]
                        }
                    }, select_median))
            s.pull = get_pull_stats()
        elif s.aggregate == "union":
            # USE TERMS AGGREGATE TO SIMULATE union
            nest.add(
                TermsAggs(canonical_name, {
                    "script_field":
                    text(Painless[s.value].to_es_script(schema))
                }, s))
            s.pull = jx_expression_to_function("key")
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = jx_expression_to_function(aggregates[s.aggregate])
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "extended_stats": {
                            "script":
                            text(
                                NumberOp(s.value).partial_eval().to_es_script(
                                    schema))
                        }
                    }, s))
Пример #7
0
def sql_query(path):
    with RegisterThread():
        query_timer = Timer("total duration")
        request_body = None
        try:
            with query_timer:
                preamble_timer = Timer("preamble", silent=True)
                with preamble_timer:
                    if flask.request.headers.get("content-length",
                                                 "") in ["", "0"]:
                        # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK
                        return Response(BLANK,
                                        status=400,
                                        headers={"Content-Type": "text/html"})
                    elif int(flask.request.headers["content-length"]
                             ) > QUERY_SIZE_LIMIT:
                        Log.error("Query is too large")

                    request_body = flask.request.get_data().strip()
                    text = utf82unicode(request_body)
                    data = json2value(text)
                    record_request(flask.request, data, None, None)

                translate_timer = Timer("translate", silent=True)
                with translate_timer:
                    if not data.sql:
                        Log.error("Expecting a `sql` parameter")
                    jx_query = parse_sql(data.sql)
                    frum = find_container(jx_query['from'])
                    if data.meta.testing:
                        test_mode_wait(jx_query)
                    result = jx.run(jx_query, container=frum)
                    if isinstance(
                            result, Container
                    ):  # TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers
                        result = result.format(jx_query.format)
                    result.meta.jx_query = jx_query

                save_timer = Timer("save")
                with save_timer:
                    if data.meta.save:
                        try:
                            result.meta.saved_as = save_query.query_finder.save(
                                data)
                        except Exception as e:
                            Log.warning("Unexpected save problem", cause=e)

                result.meta.timing.preamble = mo_math.round(
                    preamble_timer.duration.seconds, digits=4)
                result.meta.timing.translate = mo_math.round(
                    translate_timer.duration.seconds, digits=4)
                result.meta.timing.save = mo_math.round(
                    save_timer.duration.seconds, digits=4)
                result.meta.timing.total = "{{TOTAL_TIME}}"  # TIMING PLACEHOLDER

                with Timer("jsonification", silent=True) as json_timer:
                    response_data = unicode2utf8(value2json(result))

            with Timer("post timer", silent=True):
                # IMPORTANT: WE WANT TO TIME OF THE JSON SERIALIZATION, AND HAVE IT IN THE JSON ITSELF.
                # WE CHEAT BY DOING A (HOPEFULLY FAST) STRING REPLACEMENT AT THE VERY END
                timing_replacement = b'"total": ' + str(mo_math.round(query_timer.duration.seconds, digits=4)) +\
                                     b', "jsonification": ' + str(mo_math.round(json_timer.duration.seconds, digits=4))
                response_data = response_data.replace(
                    b'"total":"{{TOTAL_TIME}}"', timing_replacement)
                Log.note("Response is {{num}} bytes in {{duration}}",
                         num=len(response_data),
                         duration=query_timer.duration)

                return Response(
                    response_data,
                    status=200,
                    headers={"Content-Type": result.meta.content_type})
        except Exception as e:
            e = Except.wrap(e)
            return send_error(query_timer, request_body, e)
Пример #8
0
def request(method, url, headers=None, zip=None, retry=None, **kwargs):
    """
    JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES
    DEMANDS data IS ONE OF:
    * A JSON-SERIALIZABLE STRUCTURE, OR
    * LIST OF JSON-SERIALIZABLE STRUCTURES, OR
    * None

    Parameters
     * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH
     * json - JSON-SERIALIZABLE STRUCTURE
     * retry - {"times": x, "sleep": y} STRUCTURE

    THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT**
    IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH
    INCLUDES url AND headers
    """
    global _warning_sent
    global request_count

    if not _warning_sent and not default_headers:
        Log.warning(
            text(
                "The pyLibrary.env.http module was meant to add extra " +
                "default headers to all requests, specifically the 'Referer' "
                +
                "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` "
                + "function to set `pyLibrary.env.http.default_headers`"))
    _warning_sent = True

    if is_list(url):
        # TRY MANY URLS
        failures = []
        for remaining, u in jx.countdown(url):
            try:
                response = request(method, u, retry=retry, **kwargs)
                if mo_math.round(response.status_code,
                                 decimal=-2) not in [400, 500]:
                    return response
                if not remaining:
                    return response
            except Exception as e:
                e = Except.wrap(e)
                failures.append(e)
        Log.error(u"Tried {{num}} urls", num=len(url), cause=failures)

    if 'session' in kwargs:
        session = kwargs['session']
        del kwargs['session']
        sess = Null
    else:
        sess = session = sessions.Session()

    with closing(sess):
        if PY2 and is_text(url):
            # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
            url = url.encode('ascii')

        try:
            set_default(kwargs, {"zip": zip, "retry": retry}, DEFAULTS)
            _to_ascii_dict(kwargs)

            # HEADERS
            headers = kwargs['headers'] = unwrap(
                set_default(headers, session.headers, default_headers))
            _to_ascii_dict(headers)
            del kwargs['headers']

            # RETRY
            retry = wrap(kwargs['retry'])
            if isinstance(retry, Number):
                retry = set_default({"times": retry}, DEFAULTS['retry'])
            if isinstance(retry.sleep, Duration):
                retry.sleep = retry.sleep.seconds
            del kwargs['retry']

            # JSON
            if 'json' in kwargs:
                kwargs['data'] = value2json(kwargs['json']).encode('utf8')
                del kwargs['json']

            # ZIP
            set_default(headers, {'Accept-Encoding': 'compress, gzip'})

            if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000:
                compressed = convert.bytes2zip(kwargs['data'])
                headers['content-encoding'] = 'gzip'
                kwargs['data'] = compressed
            del kwargs['zip']
        except Exception as e:
            Log.error(u"Request setup failure on {{url}}", url=url, cause=e)

        errors = []
        for r in range(retry.times):
            if r:
                Till(seconds=retry.sleep).wait()

            try:
                DEBUG and Log.note(u"http {{method|upper}} to {{url}}",
                                   method=method,
                                   url=text(url))
                request_count += 1
                return session.request(method=method,
                                       headers=headers,
                                       url=str(url),
                                       **kwargs)
            except Exception as e:
                e = Except.wrap(e)
                if retry['http'] and str(url).startswith(
                        "https://"
                ) and "EOF occurred in violation of protocol" in e:
                    url = URL("http://" + str(url)[8:])
                    Log.note(
                        "Changed {{url}} to http due to SSL EOF violation.",
                        url=str(url))
                errors.append(e)

        if " Read timed out." in errors[0]:
            Log.error(
                u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}",
                timeout=kwargs['timeout'],
                times=retry.times,
                cause=errors[0])
        else:
            Log.error(u"Tried {{times}} times: Request failure of {{url}}",
                      url=url,
                      times=retry.times,
                      cause=errors[0])
Пример #9
0
def jx_query(path):
    try:
        with Timer("total duration", verbose=DEBUG) as query_timer:
            preamble_timer = Timer("preamble", silent=True)
            with preamble_timer:
                if flask.request.headers.get("content-length", "") in ["", "0"]:
                    # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK
                    return Response(
                        BLANK,
                        status=400,
                        headers={
                            "Content-Type": "text/html"
                        }
                    )
                elif int(flask.request.headers["content-length"]) > QUERY_SIZE_LIMIT:
                    Log.error(QUERY_TOO_LARGE)

                request_body = flask.request.get_data().strip()
                text = request_body.decode('utf8')
                data = json2value(text)
                record_request(flask.request, data, None, None)
                if data.meta.testing:
                    test_mode_wait(data, MAIN_THREAD.please_stop)

            find_table_timer = Timer("find container", verbose=DEBUG)
            with find_table_timer:
                frum = find_container(data['from'], after=None)

            translate_timer = Timer("translate", verbose=DEBUG)
            with translate_timer:
                result = jx.run(data, container=frum)

                if isinstance(result, Container):  # TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers
                    result = result.format(data.format)

            save_timer = Timer("save", verbose=DEBUG)
            with save_timer:
                if data.meta.save:
                    try:
                        result.meta.saved_as = save_query.query_finder.save(data)
                    except Exception as e:
                        Log.warning("Unexpected save problem", cause=e)

            result.meta.timing.find_table = mo_math.round(find_table_timer.duration.seconds, digits=4)
            result.meta.timing.preamble = mo_math.round(preamble_timer.duration.seconds, digits=4)
            result.meta.timing.translate = mo_math.round(translate_timer.duration.seconds, digits=4)
            result.meta.timing.save = mo_math.round(save_timer.duration.seconds, digits=4)
            result.meta.timing.total = "{{TOTAL_TIME}}"  # TIMING PLACEHOLDER

            with Timer("jsonification", verbose=DEBUG) as json_timer:
                response_data = value2json(result).encode('utf8')

        with Timer("post timer", verbose=DEBUG):
            # IMPORTANT: WE WANT TO TIME OF THE JSON SERIALIZATION, AND HAVE IT IN THE JSON ITSELF.
            # WE CHEAT BY DOING A (HOPEFULLY FAST) STRING REPLACEMENT AT THE VERY END
            timing_replacement = (
                b'"total":' + binary_type(mo_math.round(query_timer.duration.seconds, digits=4)) +
                b', "jsonification":' + binary_type(mo_math.round(json_timer.duration.seconds, digits=4))
            )
            response_data = response_data.replace(b'"total":"{{TOTAL_TIME}}"', timing_replacement)
            Log.note("Response is {{num}} bytes in {{duration}}", num=len(response_data), duration=query_timer.duration)

            return Response(
                response_data,
                status=200,
                headers={
                    "Content-Type": result.meta.content_type
                }
            )
    except Exception as e:
        e = Except.wrap(e)
        return send_error(query_timer, request_body, e)
    def add_instances(self, net_new_utility, remaining_budget):
        prices = self.pricing()

        for p in prices:
            if net_new_utility <= 0 or remaining_budget <= 0:
                break

            if p.current_price == None:
                Log.note("{{type}} has no current price",
                         type=p.type.instance_type)
                continue

            if self.settings.utility[p.type.instance_type].blacklist or \
                p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones):
                Log.note("{{type}} in {{zone}} skipped due to blacklist",
                         type=p.type.instance_type,
                         zone=p.availability_zone)
                continue

            # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY
            max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount
            max_bid = mo_math.min(p.higher_price, max_acceptable_price,
                                  remaining_budget)
            min_bid = p.price_80

            if min_bid > max_acceptable_price:
                Log.note(
                    "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour",
                    type=p.type.instance_type,
                    price=min_bid,
                    remaining=max_acceptable_price)
                continue
            elif min_bid > remaining_budget:
                Log.note(
                    "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour",
                    type=p.type.instance_type,
                    bid=min_bid,
                    remaining_budget=remaining_budget)
                continue
            elif min_bid > max_bid:
                Log.error("not expected")

            naive_number_needed = int(
                mo_math.round(float(net_new_utility) / float(p.type.utility),
                              decimal=0))
            limit_total = None
            if self.settings.max_percent_per_type < 1:
                current_count = sum(
                    1 for a in self.active
                    if a.launch_specification.instance_type ==
                    p.type.instance_type and a.launch_specification.placement
                    == p.availability_zone)
                all_count = sum(
                    1 for a in self.active
                    if a.launch_specification.placement == p.availability_zone)
                all_count = max(all_count, naive_number_needed)
                limit_total = int(
                    mo_math.floor(
                        (all_count * self.settings.max_percent_per_type -
                         current_count) /
                        (1 - self.settings.max_percent_per_type)))

            num = mo_math.min(naive_number_needed, limit_total,
                              self.settings.max_requests_per_type)
            if num < 0:
                Log.note(
                    "{{type}} is over {{limit|percent}} of instances, no more requested",
                    limit=self.settings.max_percent_per_type,
                    type=p.type.instance_type)
                continue
            elif num == 1:
                min_bid = mo_math.min(
                    mo_math.max(p.current_price * 1.1, min_bid),
                    max_acceptable_price)
                price_interval = 0
            else:
                price_interval = mo_math.min(min_bid / 10,
                                             (max_bid - min_bid) / (num - 1))

            for i in range(num):
                bid_per_machine = min_bid + (i * price_interval)
                if bid_per_machine < p.current_price:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        current_price=p.current_price)
                    continue
                if bid_per_machine - p.type.discount > remaining_budget:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        remaining=remaining_budget)
                    continue

                last_no_capacity_message = self.no_capacity.get(
                    p.type.instance_type, Null)
                if last_no_capacity_message > Date.now(
                ) - CAPACITY_NOT_AVAILABLE_RETRY:
                    Log.note(
                        "Did not bid on {{type}}: \"No capacity\" last seen at {{last_time|datetime}}",
                        type=p.type.instance_type,
                        last_time=last_no_capacity_message)
                    continue

                try:
                    if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1:
                        Log.error(
                            "Spot Manager can only request machine one-at-a-time"
                        )

                    new_requests = self._request_spot_instances(
                        price=bid_per_machine,
                        availability_zone_group=p.availability_zone,
                        instance_type=p.type.instance_type,
                        kwargs=copy(self.settings.ec2.request))
                    Log.note(
                        "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour",
                        num=len(new_requests),
                        type=p.type.instance_type,
                        zone=p.availability_zone,
                        utility=p.type.utility,
                        price=bid_per_machine)
                    net_new_utility -= p.type.utility * len(new_requests)
                    remaining_budget -= (bid_per_machine -
                                         p.type.discount) * len(new_requests)
                    with self.net_new_locker:
                        for ii in new_requests:
                            self.net_new_spot_requests.add(ii)
                except Exception as e:
                    Log.warning(
                        "Request instance {{type}} failed because {{reason}}",
                        type=p.type.instance_type,
                        reason=e.message,
                        cause=e)

                    if "Max spot instance count exceeded" in e.message:
                        Log.note("No further spot requests will be attempted.")
                        return net_new_utility, remaining_budget

        return net_new_utility, remaining_budget
Пример #11
0
def es_aggsop(es, frum, query):
    query = query.copy()  # WE WILL MARK UP THIS QUERY
    schema = frum.schema
    query_path = schema.query_path[0]
    select = listwrap(query.select)

    new_select = Data(
    )  # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if is_op(s.value, Variable_):
            s.query_path = query_path
            if s.aggregate == "count":
                new_select["count_" + literal_field(s.value.var)] += [s]
            else:
                new_select[literal_field(s.value.var)] += [s]
        elif s.aggregate:
            split_select = split_expression_by_path(s.value,
                                                    schema,
                                                    lang=Painless)
            for si_key, si_value in split_select.items():
                if si_value:
                    if s.query_path:
                        Log.error(
                            "can not handle more than one depth per select")
                    s.query_path = si_key
            formula.append(s)

    acc = Aggs()
    for _, many in new_select.items():
        for s in many:
            canonical_name = s.name
            if s.aggregate in ("value_count", "count"):
                columns = frum.schema.values(s.value.var,
                                             exclude_type=(OBJECT, NESTED))
            else:
                columns = frum.schema.values(s.value.var)

            if s.aggregate == "count":
                canonical_names = []
                for column in columns:
                    es_name = column.es_column + "_count"
                    if column.jx_type == EXISTS:
                        if column.nested_path[0] == query_path:
                            canonical_names.append("doc_count")
                            acc.add(
                                NestedAggs(column.nested_path[0]).add(
                                    CountAggs(s)))
                    else:
                        canonical_names.append("value")
                        acc.add(
                            NestedAggs(column.nested_path[0]).add(
                                ExprAggs(es_name, {
                                    "value_count": {
                                        "field": column.es_column
                                    }
                                }, s)))
                if len(canonical_names) == 1:
                    s.pull = jx_expression_to_function(canonical_names[0])
                else:
                    s.pull = jx_expression_to_function(
                        {"add": canonical_names})
            elif s.aggregate == "median":
                columns = [
                    c for c in columns if c.jx_type in (NUMBER, INTEGER)
                ]
                if len(columns) != 1:
                    Log.error(
                        "Do not know how to perform median on columns with more than one type (script probably)"
                    )
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = canonical_name + " percentile"
                acc.add(
                    ExprAggs(
                        key, {
                            "percentiles": {
                                "field": first(columns).es_column,
                                "percents": [50]
                            }
                        }, s))
                s.pull = jx_expression_to_function("values.50\\.0")
            elif s.aggregate == "percentile":
                columns = [
                    c for c in columns if c.jx_type in (NUMBER, INTEGER)
                ]
                if len(columns) != 1:
                    Log.error(
                        "Do not know how to perform percentile on columns with more than one type (script probably)"
                    )
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = canonical_name + " percentile"
                if is_text(
                        s.percentile) or s.percetile < 0 or 1 < s.percentile:
                    Log.error(
                        "Expecting percentile to be a float from 0.0 to 1.0")
                percent = mo_math.round(s.percentile * 100, decimal=6)

                acc.add(
                    ExprAggs(
                        key, {
                            "percentiles": {
                                "field": first(columns).es_column,
                                "percents": [percent],
                                "tdigest": {
                                    "compression": 2
                                }
                            }
                        }, s))
                s.pull = jx_expression_to_function(
                    join_field(["values", text_type(percent)]))
            elif s.aggregate == "cardinality":
                for column in columns:
                    path = column.es_column + "_cardinality"
                    acc.add(
                        ExprAggs(path,
                                 {"cardinality": {
                                     "field": column.es_column
                                 }}, s))
                s.pull = jx_expression_to_function("value")
            elif s.aggregate == "stats":
                columns = [
                    c for c in columns if c.jx_type in (NUMBER, INTEGER)
                ]
                if len(columns) != 1:
                    Log.error(
                        "Do not know how to perform stats on columns with more than one type (script probably)"
                    )
                # REGULAR STATS
                acc.add(
                    ExprAggs(canonical_name, {
                        "extended_stats": {
                            "field": first(columns).es_column
                        }
                    }, s))
                s.pull = get_pull_stats()

                # GET MEDIAN TOO!
                select_median = s.copy()
                select_median.pull = jx_expression_to_function(
                    {"select": [{
                        "name": "median",
                        "value": "values.50\\.0"
                    }]})

                acc.add(
                    ExprAggs(
                        canonical_name + "_percentile", {
                            "percentiles": {
                                "field": first(columns).es_column,
                                "percents": [50]
                            }
                        }, select_median))

            elif s.aggregate == "union":
                for column in columns:
                    script = {
                        "scripted_metric": {
                            'init_script':
                            'params._agg.terms = new HashSet()',
                            'map_script':
                            'for (v in doc[' + quote(column.es_column) +
                            '].values) params._agg.terms.add(v);',
                            'combine_script':
                            'return params._agg.terms.toArray()',
                            'reduce_script':
                            'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
                        }
                    }
                    stats_name = column.es_column
                    acc.add(
                        NestedAggs(column.nested_path[0]).add(
                            ExprAggs(stats_name, script, s)))
                s.pull = jx_expression_to_function("value")
            elif s.aggregate == "count_values":
                # RETURN MAP FROM VALUE TO THE NUMBER OF TIMES FOUND IN THE DOCUMENTS
                # NOT A NESTED DOC, RATHER A MULTIVALUE FIELD
                for column in columns:
                    script = {
                        "scripted_metric": {
                            'params': {
                                "_agg": {}
                            },
                            'init_script':
                            'params._agg.terms = new HashMap()',
                            'map_script':
                            'for (v in doc[' + quote(column.es_column) +
                            '].values) params._agg.terms.put(v, Optional.ofNullable(params._agg.terms.get(v)).orElse(0)+1);',
                            'combine_script':
                            'return params._agg.terms',
                            'reduce_script':
                            '''
                            HashMap output = new HashMap(); 
                            for (agg in params._aggs) {
                                if (agg!=null){
                                    for (e in agg.entrySet()) {
                                        String key = String.valueOf(e.getKey());
                                        output.put(key, e.getValue() + Optional.ofNullable(output.get(key)).orElse(0));
                                    } 
                                }
                            } 
                            return output;
                        '''
                        }
                    }
                    stats_name = encode_property(column.es_column)
                    acc.add(
                        NestedAggs(column.nested_path[0]).add(
                            ExprAggs(stats_name, script, s)))
                s.pull = jx_expression_to_function("value")
            else:
                if not columns:
                    s.pull = jx_expression_to_function(NULL)
                else:
                    for c in columns:
                        acc.add(
                            NestedAggs(c.nested_path[0]).add(
                                ExprAggs(
                                    canonical_name,
                                    {"extended_stats": {
                                        "field": c.es_column
                                    }}, s)))
                    s.pull = jx_expression_to_function(aggregates[s.aggregate])

    for i, s in enumerate(formula):
        s_path = [
            k for k, v in split_expression_by_path(
                s.value, schema=schema, lang=Painless).items() if v
        ]
        if len(s_path) == 0:
            # FOR CONSTANTS
            nest = NestedAggs(query_path)
            acc.add(nest)
        elif len(s_path) == 1:
            nest = NestedAggs(first(s_path))
            acc.add(nest)
        else:
            Log.error("do not know how to handle")

        canonical_name = s.name
        if is_op(s.value, TupleOp):
            if s.aggregate == "count":
                # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
                s.pull = jx_expression_to_function("doc_count")
            elif s.aggregate in ('max', 'maximum', 'min', 'minimum'):
                if s.aggregate in ('max', 'maximum'):
                    dir = 1
                    op = "max"
                else:
                    dir = -1
                    op = 'min'

                nully = Painless[TupleOp(
                    [NULL] *
                    len(s.value.terms))].partial_eval().to_es_script(schema)
                selfy = text_type(
                    Painless[s.value].partial_eval().to_es_script(schema))

                script = {
                    "scripted_metric": {
                        'init_script':
                        'params._agg.best = ' + nully + ';',
                        'map_script':
                        'params._agg.best = ' + expand_template(
                            MAX_OF_TUPLE, {
                                "expr1": "params._agg.best",
                                "expr2": selfy,
                                "dir": dir,
                                "op": op
                            }) + ";",
                        'combine_script':
                        'return params._agg.best',
                        'reduce_script':
                        'return params._aggs.stream().' + op + '(' +
                        expand_template(COMPARE_TUPLE, {
                            "dir": dir,
                            "op": op
                        }) + ').get()',
                    }
                }
                nest.add(
                    NestedAggs(query_path).add(
                        ExprAggs(canonical_name, script, s)))
                s.pull = jx_expression_to_function("value")
            else:
                Log.error("{{agg}} is not a supported aggregate over a tuple",
                          agg=s.aggregate)
        elif s.aggregate == "count":
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "value_count": {
                            "script":
                            text_type(Painless[
                                s.value].partial_eval().to_es_script(schema))
                        }
                    }, s))
            s.pull = jx_expression_to_function("value")
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            nest.add(
                ExprAggs(
                    key, {
                        "percentiles": {
                            "script":
                            text_type(Painless[s.value].to_es_script(schema)),
                            "percents": [50]
                        }
                    }, s))
            s.pull = jx_expression_to_function(join_field(["50.0"]))
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = mo_math.round(s.percentile * 100, decimal=6)
            nest.add(
                ExprAggs(
                    key, {
                        "percentiles": {
                            "script":
                            text_type(Painless[s.value].to_es_script(schema)),
                            "percents": [percent]
                        }
                    }, s))
            s.pull = jx_expression_to_function(
                join_field(["values", text_type(percent)]))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"
            nest.add(
                ExprAggs(
                    key, {
                        "cardinality": {
                            "script":
                            text_type(Painless[s.value].to_es_script(schema))
                        }
                    }, s))
            s.pull = jx_expression_to_function("value")
        elif s.aggregate == "stats":
            # REGULAR STATS
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "extended_stats": {
                            "script":
                            text_type(Painless[s.value].to_es_script(schema))
                        }
                    }, s))
            s.pull = get_pull_stats()

            # GET MEDIAN TOO!
            select_median = s.copy()
            select_median.pull = jx_expression_to_function(
                {"select": [{
                    "name": "median",
                    "value": "values.50\\.0"
                }]})

            nest.add(
                ExprAggs(
                    canonical_name + "_percentile", {
                        "percentiles": {
                            "script":
                            text_type(Painless[s.value].to_es_script(schema)),
                            "percents": [50]
                        }
                    }, select_median))
            s.pull = get_pull_stats()
        elif s.aggregate == "union":
            # USE TERMS AGGREGATE TO SIMULATE union
            nest.add(
                TermsAggs(
                    canonical_name, {
                        "script_field":
                        text_type(Painless[s.value].to_es_script(schema))
                    }, s))
            s.pull = jx_expression_to_function("key")
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = jx_expression_to_function(aggregates[s.aggregate])
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "extended_stats": {
                            "script":
                            text_type(
                                NumberOp(s.value).partial_eval().to_es_script(
                                    schema))
                        }
                    }, s))

    acc = NestedAggs(query_path).add(acc)
    split_decoders = get_decoders_by_path(query)
    split_wheres = split_expression_by_path(query.where,
                                            schema=frum.schema,
                                            lang=ES52)

    start = 0
    decoders = [None] * (len(query.edges) + len(query.groupby))
    paths = list(reversed(sorted(split_wheres.keys() | split_decoders.keys())))
    for path in paths:
        literal_path = literal_field(path)
        decoder = split_decoders[literal_path]
        where = split_wheres[literal_path]

        for d in decoder:
            decoders[d.edge.dim] = d
            acc = d.append_query(path, acc)
            start += d.num_columns

        if where:
            acc = FilterAggs("_filter", AndOp(where), None).add(acc)
        acc = NestedAggs(path).add(acc)

    acc = NestedAggs('.').add(acc)
    acc = simplify(acc)
    es_query = wrap(acc.to_es(schema))

    es_query.size = 0

    with Timer("ES query time", silent=not DEBUG) as es_duration:
        result = es_post(es, es_query, query.limit)

    try:
        format_time = Timer("formatting", silent=not DEBUG)
        with format_time:
            # result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total)  # IT APPEARS THE OLD doc_count IS GONE
            aggs = unwrap(result.aggregations)

            formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[
                query.format]
            if query.edges:
                output = formatter(aggs, acc, query, decoders, select)
            elif query.groupby:
                output = groupby_formatter(aggs, acc, query, decoders, select)
            else:
                output = aggop_formatter(aggs, acc, query, decoders, select)

        output.meta.timing.formatting = format_time.duration
        output.meta.timing.es_search = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet",
                      format=query.format,
                      cause=e)
        Log.error("Some problem", cause=e)
Пример #12
0
def request(method, url, headers=None, zip=None, retry=None, **kwargs):
    """
    JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES
    DEMANDS data IS ONE OF:
    * A JSON-SERIALIZABLE STRUCTURE, OR
    * LIST OF JSON-SERIALIZABLE STRUCTURES, OR
    * None

    Parameters
     * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH
     * json - JSON-SERIALIZABLE STRUCTURE
     * retry - {"times": x, "sleep": y} STRUCTURE

    THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT**
    IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH
    INCLUDES url AND headers
    """
    global _warning_sent
    global request_count

    if not _warning_sent and not default_headers:
        Log.warning(text_type(
            "The pyLibrary.env.http module was meant to add extra " +
            "default headers to all requests, specifically the 'Referer' " +
            "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " +
            "function to set `pyLibrary.env.http.default_headers`"
        ))
    _warning_sent = True

    if is_list(url):
        # TRY MANY URLS
        failures = []
        for remaining, u in jx.countdown(url):
            try:
                response = request(method, u, retry=retry, **kwargs)
                if mo_math.round(response.status_code, decimal=-2) not in [400, 500]:
                    return response
                if not remaining:
                    return response
            except Exception as e:
                e = Except.wrap(e)
                failures.append(e)
        Log.error(u"Tried {{num}} urls", num=len(url), cause=failures)

    if 'session' in kwargs:
        session = kwargs['session']
        del kwargs['session']
        sess = Null
    else:
        sess = session = sessions.Session()

    with closing(sess):
        if PY2 and is_text(url):
            # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
            url = url.encode('ascii')

        try:
            set_default(kwargs, {"zip":zip, "retry": retry}, DEFAULTS)
            _to_ascii_dict(kwargs)

            # HEADERS
            headers = kwargs['headers'] = unwrap(set_default(headers, session.headers, default_headers))
            _to_ascii_dict(headers)
            del kwargs['headers']

            # RETRY
            retry = wrap(kwargs['retry'])
            if isinstance(retry, Number):
                retry = set_default({"times":retry}, DEFAULTS['retry'])
            if isinstance(retry.sleep, Duration):
                retry.sleep = retry.sleep.seconds
            del kwargs['retry']

            # JSON
            if 'json' in kwargs:
                kwargs['data'] = value2json(kwargs['json']).encode('utf8')
                del kwargs['json']

            # ZIP
            set_default(headers, {'Accept-Encoding': 'compress, gzip'})

            if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000:
                compressed = convert.bytes2zip(kwargs['data'])
                headers['content-encoding'] = 'gzip'
                kwargs['data'] = compressed
            del kwargs['zip']
        except Exception as e:
            Log.error(u"Request setup failure on {{url}}", url=url, cause=e)

        errors = []
        for r in range(retry.times):
            if r:
                Till(seconds=retry.sleep).wait()

            try:
                DEBUG and Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text_type(url))
                request_count += 1
                return session.request(method=method, headers=headers, url=str(url), **kwargs)
            except Exception as e:
                e = Except.wrap(e)
                if retry['http'] and str(url).startswith("https://") and "EOF occurred in violation of protocol" in e:
                    url = URL("http://" + str(url)[8:])
                    Log.note("Changed {{url}} to http due to SSL EOF violation.", url=str(url))
                errors.append(e)

        if " Read timed out." in errors[0]:
            Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0])
        else:
            Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
Пример #13
0
def agg_field(acc, new_select, query_path, schema):
    for s in (s for _, many in new_select.items() for s in many):
        canonical_name = s.name
        if s.aggregate in ("value_count", "count"):
            columns = schema.values(s.value.var, exclude_type=(OBJECT, NESTED))
        else:
            columns = schema.values(s.value.var)

        if s.aggregate == "count":
            canonical_names = []
            for column in columns:
                es_name = column.es_column + "_count"
                if column.jx_type == EXISTS:
                    if column.nested_path[0] == query_path:
                        canonical_names.append("doc_count")
                        acc.add(
                            NestedAggs(column.nested_path[0]).add(
                                CountAggs(s)))
                else:
                    canonical_names.append("value")
                    acc.add(
                        NestedAggs(column.nested_path[0]).add(
                            ExprAggs(
                                es_name,
                                {"value_count": {
                                    "field": column.es_column
                                }}, s)))
            if len(canonical_names) == 1:
                s.pull = jx_expression_to_function(canonical_names[0])
            else:
                s.pull = jx_expression_to_function({"add": canonical_names})
        elif s.aggregate == "median":
            columns = [c for c in columns if c.jx_type in NUMBER_TYPES]
            if len(columns) != 1:
                Log.error(
                    "Do not know how to perform median on columns with more than one type (script probably)"
                )
            # ES USES DIFFERENT METHOD FOR PERCENTILES
            key = canonical_name + " percentile"
            acc.add(
                ExprAggs(
                    key, {
                        "percentiles": {
                            "field": first(columns).es_column,
                            "percents": [50]
                        }
                    }, s))
            s.pull = jx_expression_to_function("values.50\\.0")
        elif s.aggregate in ("and", "or"):
            columns = [c for c in columns if c.jx_type is BOOLEAN]
            op = aggregates[s.aggregate]
            if not columns:
                s.pull = jx_expression_to_function(NULL)
            else:
                for c in columns:
                    acc.add(
                        NestedAggs(c.nested_path[0]).add(
                            ExprAggs(canonical_name,
                                     {op: {
                                         "field": c.es_column
                                     }}, s)))
                # get_name = concat_field(canonical_name, "value")
                s.pull = jx_expression_to_function({
                    "case": [{
                        "when": {
                            "eq": {
                                "value": 1
                            }
                        },
                        "then": True
                    }, {
                        "when": {
                            "eq": {
                                "value": 0
                            }
                        },
                        "then": False
                    }]
                })
        elif s.aggregate == "percentile":
            columns = [c for c in columns if c.jx_type in NUMBER_TYPES]
            if len(columns) != 1:
                Log.error(
                    "Do not know how to perform percentile on columns with more than one type (script probably)"
                )
            # ES USES DIFFERENT METHOD FOR PERCENTILES
            key = canonical_name + " percentile"
            if is_text(s.percentile) or s.percetile < 0 or 1 < s.percentile:
                Log.error("Expecting percentile to be a float from 0.0 to 1.0")
            percent = mo_math.round(s.percentile * 100, decimal=6)

            acc.add(
                ExprAggs(
                    key, {
                        "percentiles": {
                            "field": first(columns).es_column,
                            "percents": [percent],
                            "tdigest": {
                                "compression": 2
                            }
                        }
                    }, s))
            s.pull = jx_expression_to_function(
                join_field(["values", text(percent)]))
        elif s.aggregate == "cardinality":
            for column in columns:
                path = column.es_column + "_cardinality"
                acc.add(
                    ExprAggs(path,
                             {"cardinality": {
                                 "field": column.es_column
                             }}, s))
            s.pull = jx_expression_to_function("value")
        elif s.aggregate == "stats":
            columns = [c for c in columns if c.jx_type in NUMBER_TYPES]
            if len(columns) != 1:
                Log.error(
                    "Do not know how to perform stats on columns with more than one type (script probably)"
                )
            # REGULAR STATS
            acc.add(
                ExprAggs(
                    canonical_name,
                    {"extended_stats": {
                        "field": first(columns).es_column
                    }}, s))
            s.pull = get_pull_stats()

            # GET MEDIAN TOO!
            select_median = s.copy()
            select_median.pull = jx_expression_to_function(
                {"select": [{
                    "name": "median",
                    "value": "values.50\\.0"
                }]})

            acc.add(
                ExprAggs(
                    canonical_name + "_percentile", {
                        "percentiles": {
                            "field": first(columns).es_column,
                            "percents": [50]
                        }
                    }, select_median))

        elif s.aggregate == "union":
            for column in columns:
                script = {
                    "scripted_metric": {
                        'init_script':
                        'params._agg.terms = new HashSet()',
                        'map_script':
                        'for (v in doc[' + quote(column.es_column) +
                        '].values) params._agg.terms.add(v);',
                        'combine_script':
                        'return params._agg.terms.toArray()',
                        'reduce_script':
                        'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
                    }
                }
                stats_name = column.es_column
                acc.add(
                    NestedAggs(column.nested_path[0]).add(
                        ExprAggs(stats_name, script, s)))
            s.pull = jx_expression_to_function("value")
        elif s.aggregate == "count_values":
            # RETURN MAP FROM VALUE TO THE NUMBER OF TIMES FOUND IN THE DOCUMENTS
            # NOT A NESTED DOC, RATHER A MULTIVALUE FIELD
            for column in columns:
                script = {
                    "scripted_metric": {
                        'params': {
                            "_agg": {}
                        },
                        'init_script':
                        'params._agg.terms = new HashMap()',
                        'map_script':
                        'for (v in doc[' + quote(column.es_column) +
                        '].values) params._agg.terms.put(v, Optional.ofNullable(params._agg.terms.get(v)).orElse(0)+1);',
                        'combine_script':
                        'return params._agg.terms',
                        'reduce_script':
                        '''
                        HashMap output = new HashMap(); 
                        for (agg in params._aggs) {
                            if (agg!=null){
                                for (e in agg.entrySet()) {
                                    String key = String.valueOf(e.getKey());
                                    output.put(key, e.getValue() + Optional.ofNullable(output.get(key)).orElse(0));
                                } 
                            }
                        } 
                        return output;
                    '''
                    }
                }
                stats_name = encode_property(column.es_column)
                acc.add(
                    NestedAggs(column.nested_path[0]).add(
                        ExprAggs(stats_name, script, s)))
            s.pull = jx_expression_to_function("value")
        else:
            if not columns:
                s.pull = jx_expression_to_function(NULL)
            else:
                for c in columns:
                    acc.add(
                        NestedAggs(c.nested_path[0]).add(
                            ExprAggs(
                                canonical_name,
                                {"extended_stats": {
                                    "field": c.es_column
                                }}, s)))
                s.pull = jx_expression_to_function(aggregates[s.aggregate])
Пример #14
0
 def round(self, interval, decimal=0):
     output = self / interval
     output = round(output, decimal)
     return output
Пример #15
0
 def test_round(self):
     self.assertAlmostEqual(mo_math.round(3.1415, digits=0), 1)
     self.assertAlmostEqual(mo_math.round(3.1415, digits=4), 3.142)
     self.assertAlmostEqual(mo_math.round(4, digits=0), 10)
     self.assertAlmostEqual(mo_math.round(11, digits=0), 10)
     self.assertAlmostEqual(mo_math.round(3.1415), 3)