示例#1
0
 def __init__(self, edge, query, limit):
     AggsDecoder.__init__(self, edge, query, limit)
     edge.allowNulls = False
     self.fields = edge.domain.dimension.fields
     self.domain = self.edge.domain
     self.domain.limit = mo_math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
     self.parts = list()
示例#2
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = mo_math.min(
            coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = Painless[self.edge.value].partial_eval().to_es_script(
            self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp(self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [s for s in query.sort if s.value == edge.value]
        if sort_candidates:
            self.es_order = {
                "_term": {
                    1: "asc",
                    -1: "desc"
                }[sort_candidates[0].sort]
            }
        else:
            self.es_order = None
示例#3
0
 def __init__(self, edge, query, limit):
     AggsDecoder.__init__(self, edge, query, limit)
     edge.allowNulls = False
     self.fields = edge.domain.dimension.fields
     self.domain = self.edge.domain
     self.domain.limit = mo_math.min(
         coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
     self.parts = list()
示例#4
0
def temper_limit(proposed_limit, query):
    """
    SUITABLE DEFAULTS AND LIMITS
    """
    from jx_elasticsearch.es52.agg_bulk import is_bulk_agg
    from jx_elasticsearch.es52.set_bulk import is_bulk_set
    if is_bulk_agg(Null, query) or is_bulk_set(Null, query):
        return coalesce(proposed_limit, query.limit)
    else:
        return mo_math.min(
            coalesce(proposed_limit, query.limit, DEFAULT_LIMIT), MAX_LIMIT)
示例#5
0
    def wrap(query, container, namespace):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if is_op(query, QueryOp) or query == None:
            return query

        query = wrap(query)
        table = container.get_table(query['from'])
        schema = table.schema
        output = QueryOp(frum=table,
                         format=query.format,
                         limit=mo_math.min(
                             MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)))

        if query.select or isinstance(query.select, (Mapping, list)):
            output.select = _normalize_selects(query.select,
                                               query.frum,
                                               schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = DEFAULT_SELECT
            else:
                output.select = _normalize_selects(".", query.frum)

        if query.groupby and query.edges:
            Log.error(
                "You can not use both the `groupby` and `edges` clauses in the same query!"
            )
        elif query.edges:
            output.edges = _normalize_edges(query.edges,
                                            limit=output.limit,
                                            schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby,
                                                limit=output.limit,
                                                schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where({"and": listwrap(query.where)},
                                        schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.having = None
        output.sort = _normalize_sort(query.sort)
        if not mo_math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        return output
示例#6
0
 def __getslice__(self, i, j):
     j = mo_math.min(j, len(self))
     if j - 1 > 2**28:
         Log.error("Slice of {{num}} bytes is too big", num=j - i)
     try:
         self.file.seek(i)
         output = self.file.read(j - i).decode(self.encoding)
         return output
     except Exception as e:
         Log.error(
             "Can not read file slice at {{index}}, with encoding {{encoding}}",
             index=i,
             encoding=self.encoding,
             cause=e)
示例#7
0
 def __getslice__(self, i, j):
     j = mo_math.min(j, len(self))
     if j - 1 > 2 ** 28:
         Log.error("Slice of {{num}} bytes is too big", num=j - i)
     try:
         self.file.seek(i)
         output = self.file.read(j - i).decode(self.encoding)
         return output
     except Exception as e:
         Log.error(
             "Can not read file slice at {{index}}, with encoding {{encoding}}",
             index=i,
             encoding=self.encoding,
             cause=e
         )
示例#8
0
    def wrap(query, container, namespace):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if is_op(query, QueryOp) or query == None:
            return query

        query = wrap(query)
        table = container.get_table(query['from'])
        schema = table.schema
        output = QueryOp(
            frum=table,
            format=query.format,
            limit=mo_math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
        )

        if query.select or isinstance(query.select, (Mapping, list)):
            output.select = _normalize_selects(query.select, query.frum, schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = DEFAULT_SELECT
            else:
                output.select = _normalize_selects(".", query.frum)

        if query.groupby and query.edges:
            Log.error("You can not use both the `groupby` and `edges` clauses in the same query!")
        elif query.edges:
            output.edges = _normalize_edges(query.edges, limit=output.limit, schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby, limit=output.limit, schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where(query.where, schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.having = None
        output.sort = _normalize_sort(query.sort)
        if not mo_math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        return output
示例#9
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = mo_math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = Painless[self.edge.value].partial_eval().to_es_script(self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp(self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [s for s in query.sort if s.value == edge.value]
        if sort_candidates:
            self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
        else:
            self.es_order = None
示例#10
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        if is_op(edge.value, LeavesOp):
            prefix = edge.value.term.var
            flatter = lambda k: literal_field(relative_field(k, prefix))
        else:
            prefix = edge.value.var
            flatter = lambda k: relative_field(k, prefix)

        self.put, self.fields = transpose(*[
            (flatter(untype_path(c.name)), c.es_column)
            for c in query.frum.schema.leaves(prefix)
        ])

        self.domain = self.edge.domain = wrap({"dimension": {"fields": self.fields}})
        self.domain.limit = mo_math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
示例#11
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        if is_op(edge.value, LeavesOp):
            prefix = edge.value.term.var
            flatter = lambda k: literal_field(relative_field(k, prefix))
        else:
            prefix = edge.value.var
            flatter = lambda k: relative_field(k, prefix)

        self.put, self.fields = transpose(
            *[(flatter(untype_path(c.name)), c.es_column)
              for c in query.frum.schema.leaves(prefix)])

        self.domain = self.edge.domain = wrap(
            {"dimension": {
                "fields": self.fields
            }})
        self.domain.limit = mo_math.min(
            coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
示例#12
0
                    on_clause = SQL_AND.join(
                        quote_column(edge_alias, k) + SQL_EQ + sql
                        for k, (t, sql) in zip(domain_names, edge_values))
                    null_on_clause = None
            elif query_edge.domain.type == "range":
                domain_name = "d" + text(edge_index) + "c0"
                domain_names = [
                    domain_name
                ]  # ONLY EVER SEEN ONE DOMAIN VALUE, DOMAIN TUPLES CERTAINLY EXIST
                d = query_edge.domain
                if d.max == None or d.min == None or d.min == d.max:
                    Log.error("Invalid range: {{range|json}}", range=d)
                if len(edge_names) == 1:
                    domain = self._make_range_domain(domain=d,
                                                     column_name=domain_name)
                    limit = mo_math.min(query.limit, query_edge.domain.limit)
                    domain += (SQL_ORDERBY + sql_list(vals) + SQL_LIMIT +
                               text(limit))

                    where = None
                    join_type = SQL_LEFT_JOIN if query_edge.allowNulls else SQL_INNER_JOIN
                    on_clause = SQL_AND.join(
                        quote_column(edge_alias) + SQL_DOT + k + " <= " + v +
                        SQL_AND + v + " < (" + quote_column(edge_alias) +
                        SQL_DOT + k + SQL_PLUS + text(d.interval) + ")"
                        for k, (t, v) in zip(domain_names, edge_values))
                    null_on_clause = None
                elif query_edge.range:
                    query_edge.allowNulls = False
                    domain = self._make_range_domain(domain=d,
                                                     column_name=domain_name)
    def update_spot_requests(self):
        spot_requests = self._get_managed_spot_requests()

        # ADD UP THE CURRENT REQUESTED INSTANCES
        all_instances = UniqueIndex("id", data=self._get_managed_instances())
        self.active = active = wrap([
            r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES
            | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN
        ])

        for a in active.copy():
            if a.status.code == "request-canceled-and-instance-running" and all_instances[
                    a.instance_id] == None:
                active.remove(a)

        used_budget = 0
        current_spending = 0
        for a in active:
            about = self.price_lookup[a.launch_specification.instance_type,
                                      a.launch_specification.placement]
            discount = coalesce(about.type.discount, 0)
            Log.note(
                "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}",
                id=a.id,
                type=a.launch_specification.instance_type,
                zone=a.launch_specification.placement,
                instance_id=a.instance_id,
                price=a.price - discount)
            used_budget += a.price - discount
            current_spending += coalesce(about.current_price,
                                         a.price) - discount

        Log.note(
            "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)",
            budget=used_budget,
            current=current_spending)

        remaining_budget = self.settings.budget - used_budget

        current_utility = coalesce(
            SUM(self.price_lookup[
                r.launch_specification.instance_type,
                r.launch_specification.placement].type.utility
                for r in active), 0)
        utility_required = self.instance_manager.required_utility(
            current_utility)
        net_new_utility = utility_required - current_utility

        Log.note(
            "have {{current_utility}} utility running; need {{need_utility}} more utility",
            current_utility=current_utility,
            need_utility=net_new_utility)

        if remaining_budget < 0:
            remaining_budget, net_new_utility = self.save_money(
                remaining_budget, net_new_utility)

        if net_new_utility < 0:
            if self.settings.allowed_overage:
                net_new_utility = mo_math.min(
                    net_new_utility +
                    self.settings.allowed_overage * utility_required, 0)

            net_new_utility = self.remove_instances(net_new_utility)

        if net_new_utility > 0:
            net_new_utility = mo_math.min(net_new_utility,
                                          self.settings.max_new_utility)
            net_new_utility, remaining_budget = self.add_instances(
                net_new_utility, remaining_budget)

        if net_new_utility > 0:
            Log.alert(
                "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour).  Remaining budget is ${{budget|round(decimal=2)}} ",
                num=net_new_utility,
                expected=self.settings.max_utility_price,
                budget=remaining_budget)

        # Give EC2 a chance to notice the new requests before tagging them.
        Till(seconds=3).wait()
        with self.net_new_locker:
            for req in self.net_new_spot_requests:
                req.add_tag("Name", self.settings.ec2.instance.name)

        Log.note("All requests for new utility have been made")
        self.done_making_new_spot_requests.go()
    def add_instances(self, net_new_utility, remaining_budget):
        prices = self.pricing()

        for p in prices:
            if net_new_utility <= 0 or remaining_budget <= 0:
                break

            if p.current_price == None:
                Log.note("{{type}} has no current price",
                         type=p.type.instance_type)
                continue

            if self.settings.utility[p.type.instance_type].blacklist or \
                p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones):
                Log.note("{{type}} in {{zone}} skipped due to blacklist",
                         type=p.type.instance_type,
                         zone=p.availability_zone)
                continue

            # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY
            max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount
            max_bid = mo_math.min(p.higher_price, max_acceptable_price,
                                  remaining_budget)
            min_bid = p.price_80

            if min_bid > max_acceptable_price:
                Log.note(
                    "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour",
                    type=p.type.instance_type,
                    price=min_bid,
                    remaining=max_acceptable_price)
                continue
            elif min_bid > remaining_budget:
                Log.note(
                    "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour",
                    type=p.type.instance_type,
                    bid=min_bid,
                    remaining_budget=remaining_budget)
                continue
            elif min_bid > max_bid:
                Log.error("not expected")

            naive_number_needed = int(
                mo_math.round(float(net_new_utility) / float(p.type.utility),
                              decimal=0))
            limit_total = None
            if self.settings.max_percent_per_type < 1:
                current_count = sum(
                    1 for a in self.active
                    if a.launch_specification.instance_type ==
                    p.type.instance_type and a.launch_specification.placement
                    == p.availability_zone)
                all_count = sum(
                    1 for a in self.active
                    if a.launch_specification.placement == p.availability_zone)
                all_count = max(all_count, naive_number_needed)
                limit_total = int(
                    mo_math.floor(
                        (all_count * self.settings.max_percent_per_type -
                         current_count) /
                        (1 - self.settings.max_percent_per_type)))

            num = mo_math.min(naive_number_needed, limit_total,
                              self.settings.max_requests_per_type)
            if num < 0:
                Log.note(
                    "{{type}} is over {{limit|percent}} of instances, no more requested",
                    limit=self.settings.max_percent_per_type,
                    type=p.type.instance_type)
                continue
            elif num == 1:
                min_bid = mo_math.min(
                    mo_math.max(p.current_price * 1.1, min_bid),
                    max_acceptable_price)
                price_interval = 0
            else:
                price_interval = mo_math.min(min_bid / 10,
                                             (max_bid - min_bid) / (num - 1))

            for i in range(num):
                bid_per_machine = min_bid + (i * price_interval)
                if bid_per_machine < p.current_price:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        current_price=p.current_price)
                    continue
                if bid_per_machine - p.type.discount > remaining_budget:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        remaining=remaining_budget)
                    continue

                last_no_capacity_message = self.no_capacity.get(
                    p.type.instance_type, Null)
                if last_no_capacity_message > Date.now(
                ) - CAPACITY_NOT_AVAILABLE_RETRY:
                    Log.note(
                        "Did not bid on {{type}}: \"No capacity\" last seen at {{last_time|datetime}}",
                        type=p.type.instance_type,
                        last_time=last_no_capacity_message)
                    continue

                try:
                    if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1:
                        Log.error(
                            "Spot Manager can only request machine one-at-a-time"
                        )

                    new_requests = self._request_spot_instances(
                        price=bid_per_machine,
                        availability_zone_group=p.availability_zone,
                        instance_type=p.type.instance_type,
                        kwargs=copy(self.settings.ec2.request))
                    Log.note(
                        "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour",
                        num=len(new_requests),
                        type=p.type.instance_type,
                        zone=p.availability_zone,
                        utility=p.type.utility,
                        price=bid_per_machine)
                    net_new_utility -= p.type.utility * len(new_requests)
                    remaining_budget -= (bid_per_machine -
                                         p.type.discount) * len(new_requests)
                    with self.net_new_locker:
                        for ii in new_requests:
                            self.net_new_spot_requests.add(ii)
                except Exception as e:
                    Log.warning(
                        "Request instance {{type}} failed because {{reason}}",
                        type=p.type.instance_type,
                        reason=e.message,
                        cause=e)

                    if "Max spot instance count exceeded" in e.message:
                        Log.note("No further spot requests will be attempted.")
                        return net_new_utility, remaining_budget

        return net_new_utility, remaining_budget
示例#15
0
文件: main.py 项目: mozilla/cia-tasks
    def process_one(self, start, end, branch, please_stop):
        # ASSUME PREVIOUS WORK IS DONE
        # UPDATE THE DATABASE STATE
        self.done.min = mo_math.min(end, self.done.min)
        self.done.max = mo_math.max(start, self.done.max)
        self.set_state()

        try:
            pushes = make_push_objects(from_date=start.format(),
                                       to_date=end.format(),
                                       branch=branch)
        except MissingDataError:
            return
        except Exception as e:
            raise Log.error("not expected", cause=e)

        Log.note(
            "Found {{num}} pushes on {{branch}} in ({{start}}, {{end}})",
            num=len(pushes),
            start=start,
            end=end,
            branch=branch,
        )

        data = []
        try:
            for push in pushes:
                if please_stop:
                    break

                with Timer("get tasks for push {{push}}", {"push": push.id}):
                    try:
                        schedulers = [
                            label.split("shadow-scheduler-")[1]
                            for label in push.scheduled_task_labels
                            if "shadow-scheduler" in label
                        ]
                    except Exception as e:
                        Log.warning("could not get schedulers", cause=e)
                        schedulers = []

                    scheduler = []
                    for s in schedulers:
                        try:
                            scheduler.append({
                                "name":
                                s,
                                "tasks":
                                jx.sort(push.get_shadow_scheduler_tasks(s)),
                            })
                        except Exception:
                            pass
                try:
                    regressions = push.get_regressions("label").keys()
                except Exception as e:
                    regressions = []
                    Log.warning("could not get regressions for {{push}}",
                                push=push.id,
                                cause=e)

                # RECORD THE PUSH
                data.append({
                    "push": {
                        "id": push.id,
                        "date": push.date,
                        "changesets": push.revs,
                        "backedoutby": push.backedoutby,
                    },
                    "schedulers":
                    scheduler,
                    "regressions": [{
                        "label": name
                    } for name in jx.sort(regressions)],
                    "branch":
                    branch,
                    "etl": {
                        "revision": git.get_revision(),
                        "timestamp": Date.now(),
                    },
                })
        finally:
            # ADD WHATEVER WE HAVE
            with Timer("adding {{num}} records to bigquery",
                       {"num": len(data)}):
                self.destination.extend(data)