Exemplo n.º 1
0
def get_schema_from_list(table_name, frum):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    columns = UniqueIndex(keys=("names.\\.",))
    _get_schema_from_list(frum, ".", prefix_path=[], nested_path=ROOT_PATH, columns=columns)
    return Schema(table_name=table_name, columns=columns)
Exemplo n.º 2
0
    def __init__(self, instance_manager, disable_prices=False, kwargs=None):
        self.settings = kwargs
        self.instance_manager = instance_manager
        aws_args = dict(
            region_name=kwargs.aws.region,
            aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id),
            aws_secret_access_key=unwrap(kwargs.aws.aws_secret_access_key)
        )
        self.ec2_conn = boto.ec2.connect_to_region(**aws_args)
        self.vpc_conn = boto.vpc.connect_to_region(**aws_args)
        self.price_locker = Lock()
        self.prices = None
        self.price_lookup = None
        self.done_spot_requests = Signal()
        self.net_new_locker = Lock()
        self.net_new_spot_requests = UniqueIndex(("id",))  # SPOT REQUESTS FOR THIS SESSION
        self.watcher = None
        self.active = None

        self.settings.uptime.bid_percentile = coalesce(self.settings.uptime.bid_percentile, self.settings.bid_percentile)
        self.settings.uptime.history = coalesce(Date(self.settings.uptime.history), DAY)
        self.settings.uptime.duration = coalesce(Duration(self.settings.uptime.duration), Date("5minute"))
        self.settings.max_percent_per_type = coalesce(self.settings.max_percent_per_type, 1)

        if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required():
            self._start_life_cycle_watcher()
        if not disable_prices:
            self.pricing()
def main():
    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)
        with SingleInstance(flavor_id=settings.args.filename):
            settings.run_interval = Duration(settings.run_interval)
            for u in settings.utility:
                u.discount = coalesce(u.discount, 0)
                # MARKUP drives WITH EXPECTED device MAPPING
                num_ephemeral_volumes = ephemeral_storage[
                    u.instance_type]["num"]
                for i, d in enumerate(d for d in u.drives if not d.device):
                    letter = convert.ascii2char(98 + num_ephemeral_volumes + i)
                    d.device = "/dev/xvd" + letter

            settings.utility = UniqueIndex(["instance_type"],
                                           data=settings.utility)
            instance_manager = new_instance(settings.instance)
            m = SpotManager(instance_manager, kwargs=settings)

            if ENABLE_SIDE_EFFECTS:
                m.update_spot_requests()

            if m.watcher:
                m.watcher.join()
    except Exception as e:
        Log.warning("Problem with spot manager", cause=e)
    finally:
        Log.stop()
        MAIN_THREAD.stop()
Exemplo n.º 4
0
def get_branches(hg, branches, kwargs=None):
    # TRY ES
    try:
        es = elasticsearch.Cluster(kwargs=branches).get_index(kwargs=branches, read_only=False)

        query = {
            "query": {"match_all": {}},
            "size": 10000
        }

        found_branches = es.search(query).hits.hits._source
        # IF IT IS TOO OLD, THEN PULL FROM HG
        oldest = Date(MAX(found_branches.etl.timestamp))
        if oldest == None or Date.now() - oldest > OLD_BRANCH:
            found_branches = _get_branches_from_hg(hg)
            es.extend({"id": b.name + " " + b.locale, "value": b} for b in found_branches)
            es.flush()

        try:
            return UniqueIndex(["name", "locale"], data=found_branches, fail_on_dup=False)
        except Exception as e:
            Log.error("Bad branch in ES index", cause=e)
    except Exception as e:
        if "Can not find index " in e:
            set_default(branches, {"schema": branches_schema})
            es = elasticsearch.Cluster(kwargs=branches).get_or_create_index(kwargs=branches)
            es.add_alias()
            return get_branches(kwargs=kwargs)
        Log.error("problem getting branches", cause=e)
Exemplo n.º 5
0
def get_branches(hg, branches, kwargs=None):
    # TRY ES
    cluster = elasticsearch.Cluster(branches)
    try:
        es = cluster.get_index(kwargs=branches, read_only=False)
        esq = jx_elasticsearch.new_instance(branches)
        found_branches = esq.query({"from": "branches", "format": "list", "limit": 10000}).data

        # IF IT IS TOO OLD, THEN PULL FROM HG
        oldest = Date(MAX(found_branches.etl.timestamp))
        if oldest == None or Date.now() - oldest > OLD_BRANCH:
            found_branches = _get_branches_from_hg(hg)
            es.extend({"id": b.name + " " + b.locale, "value": b} for b in found_branches)
            es.flush()

        try:
            return UniqueIndex(["name", "locale"], data=found_branches, fail_on_dup=False)
        except Exception as e:
            Log.error("Bad branch in ES index", cause=e)
    except Exception as e:
        e = Except.wrap(e)
        if "Can not find index " in e:
            set_default(branches, {"schema": branches_schema})
            es = cluster.get_or_create_index(branches)
            es.add_alias()
            return get_branches(kwargs)
        Log.error("problem getting branches", cause=e)
    def __init__(self, instance_manager, disable_prices=False, kwargs=None):
        self.settings = kwargs
        self.instance_manager = instance_manager
        aws_args = dict(region_name=kwargs.aws.region,
                        aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id),
                        aws_secret_access_key=unwrap(
                            kwargs.aws.aws_secret_access_key))
        self.ec2_conn = boto.ec2.connect_to_region(**aws_args)
        self.vpc_conn = boto.vpc.connect_to_region(**aws_args)
        self.price_locker = Lock()
        self.prices = None
        self.price_lookup = None
        self.no_capacity = {}
        self.no_capacity_file = File(
            kwargs.price_file).parent / "no capacity.json"
        self.done_making_new_spot_requests = Signal()
        self.net_new_locker = Lock()
        self.net_new_spot_requests = UniqueIndex(
            ("id", ))  # SPOT REQUESTS FOR THIS SESSION
        self.watcher = None
        self.active = None

        self.settings.uptime.bid_percentile = coalesce(
            self.settings.uptime.bid_percentile, self.settings.bid_percentile)
        self.settings.uptime.history = coalesce(
            Date(self.settings.uptime.history), DAY)
        self.settings.uptime.duration = coalesce(
            Duration(self.settings.uptime.duration), Date("5minute"))
        self.settings.max_percent_per_type = coalesce(
            self.settings.max_percent_per_type, 1)

        if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required(
        ):
            self._start_life_cycle_watcher()
        if not disable_prices:
            self.pricing()
    def _get_managed_instances(self):
        requests = UniqueIndex(["instance_id"],
                               data=self._get_managed_spot_requests().filter(
                                   lambda r: r.instance_id != None))
        reservations = self.ec2_conn.get_all_instances()

        output = []
        for res in reservations:
            for instance in res.instances:
                if instance.tags.get('Name', '').startswith(
                        self.settings.ec2.instance.name
                ) and instance._state.name == "running":
                    instance.request = requests[instance.id]
                    output.append(datawrap(instance))
        return wrap(output)
Exemplo n.º 8
0
def get_schema_from_list(table_name,
                         frum,
                         native_type_to_json_type=python_type_to_json_type):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    columns = UniqueIndex(keys=("name", ))
    _get_schema_from_list(
        frum,
        ".",
        parent=".",
        nested_path=ROOT_PATH,
        columns=columns,
        native_type_to_json_type=native_type_to_json_type,
    )
    return Schema(table_name=table_name, columns=list(columns))
Exemplo n.º 9
0
def _get_branches_from_hg(kwarg):
    # GET MAIN PAGE
    response = http.get(kwarg.url)
    doc = BeautifulSoup(response.all_content, "html.parser")

    all_repos = doc("table")[1]
    branches = UniqueIndex(["name", "locale"], fail_on_dup=False)
    for i, r in enumerate(all_repos("tr")):
        dir, name = [v.text.strip() for v in r("td")]

        b = _get_single_branch_from_hg(kwarg, name, dir.lstrip("/"))
        branches.extend(b)

    # branches.add(set_default({"name": "release-mozilla-beta"}, branches["mozilla-beta", DEFAULT_LOCALE]))
    for b in list(branches["mozilla-beta", ]):
        branches.add(set_default({"name": "release-mozilla-beta"}, b))  # THIS IS THE l10n "name"
        b.url = "https://hg.mozilla.org/releases/mozilla-beta"          # THIS IS THE

    for b in list(branches["mozilla-release", ]):
        branches.add(set_default({"name": "release-mozilla-release"}, b))

    for b in list(branches["mozilla-aurora", ]):
        if b.locale == "en-US":
            continue
        branches.add(set_default({"name": "comm-aurora"}, b))
        # b.url = "https://hg.mozilla.org/releases/mozilla-aurora"

    for b in list(branches):
        if b.name.startswith("mozilla-esr"):
            branches.add(set_default({"name": "release-" + b.name}, b))  # THIS IS THE l10n "name"
            b.url = "https://hg.mozilla.org/releases/" + b.name

    #CHECKS
    for b in branches:
        if b.name != b.name.lower():
            Log.error("Expecting lowercase name")
        if not b.locale:
            Log.error("Not expected")
        if not b.url.startswith("http"):
            Log.error("Expecting a valid url")
        if not b.etl.timestamp:
            Log.error("Expecting a timestamp")

    return branches
Exemplo n.º 10
0
class SpotManager(object):
    @override
    def __init__(self, instance_manager, disable_prices=False, kwargs=None):
        self.settings = kwargs
        self.instance_manager = instance_manager
        aws_args = dict(
            region_name=kwargs.aws.region,
            aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id),
            aws_secret_access_key=unwrap(kwargs.aws.aws_secret_access_key)
        )
        self.ec2_conn = boto.ec2.connect_to_region(**aws_args)
        self.vpc_conn = boto.vpc.connect_to_region(**aws_args)
        self.price_locker = Lock()
        self.prices = None
        self.price_lookup = None
        self.done_spot_requests = Signal()
        self.net_new_locker = Lock()
        self.net_new_spot_requests = UniqueIndex(("id",))  # SPOT REQUESTS FOR THIS SESSION
        self.watcher = None
        self.active = None

        self.settings.uptime.bid_percentile = coalesce(self.settings.uptime.bid_percentile, self.settings.bid_percentile)
        self.settings.uptime.history = coalesce(Date(self.settings.uptime.history), DAY)
        self.settings.uptime.duration = coalesce(Duration(self.settings.uptime.duration), Date("5minute"))
        self.settings.max_percent_per_type = coalesce(self.settings.max_percent_per_type, 1)

        if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required():
            self._start_life_cycle_watcher()
        if not disable_prices:
            self.pricing()

    def update_spot_requests(self, utility_required):
        spot_requests = self._get_managed_spot_requests()

        # ADD UP THE CURRENT REQUESTED INSTANCES
        all_instances = UniqueIndex("id", data=self._get_managed_instances())
        self.active = active = wrap([r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN])

        for a in active.copy():
            if a.status.code == "request-canceled-and-instance-running" and all_instances[a.instance_id] == None:
                active.remove(a)

        used_budget = 0
        current_spending = 0
        for a in active:
            about = self.price_lookup[a.launch_specification.instance_type, a.launch_specification.placement]
            discount = coalesce(about.type.discount, 0)
            Log.note(
                "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}",
                id=a.id,
                type=a.launch_specification.instance_type,
                zone=a.launch_specification.placement,
                instance_id=a.instance_id,
                price=a.price - discount
            )
            used_budget += a.price - discount
            current_spending += coalesce(about.current_price, a.price) - discount

        Log.note(
            "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)",
            budget=used_budget,
            current=current_spending
        )

        remaining_budget = self.settings.budget - used_budget

        current_utility = coalesce(SUM(self.price_lookup[r.launch_specification.instance_type, r.launch_specification.placement].type.utility for r in active), 0)
        net_new_utility = utility_required - current_utility

        Log.note("have {{current_utility}} utility running; need {{need_utility}} more utility", current_utility=current_utility, need_utility=net_new_utility)

        if remaining_budget < 0:
            remaining_budget, net_new_utility = self.save_money(remaining_budget, net_new_utility)

        if net_new_utility < 0:
            if self.settings.allowed_overage:
                net_new_utility = Math.min(net_new_utility + self.settings.allowed_overage * utility_required, 0)

            net_new_utility = self.remove_instances(net_new_utility)

        if net_new_utility > 0:
            net_new_utility = Math.min(net_new_utility, self.settings.max_new_utility)
            net_new_utility, remaining_budget = self.add_instances(net_new_utility, remaining_budget)

        if net_new_utility > 0:
            Log.alert(
                "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour).  Remaining budget is ${{budget|round(decimal=2)}} ",
                num=net_new_utility,
                expected=self.settings.max_utility_price,
                budget=remaining_budget
            )

        # Give EC2 a chance to notice the new requests before tagging them.
        Till(timeout=3).wait()
        with self.net_new_locker:
            for req in self.net_new_spot_requests:
                req.add_tag("Name", self.settings.ec2.instance.name)

        Log.note("All requests for new utility have been made")
        self.done_spot_requests.go()

    def add_instances(self, net_new_utility, remaining_budget):
        prices = self.pricing()

        for p in prices:
            if net_new_utility <= 0 or remaining_budget <= 0:
                break

            if p.current_price == None:
                Log.note("{{type}} has no current price",
                    type=p.type.instance_type
                )
                continue

            if self.settings.utility[p.type.instance_type].blacklist or \
                    p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones):
                Log.note("{{type}} in {{zone}} skipped due to blacklist", type=p.type.instance_type, zone=p.availability_zone)
                continue

            # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY
            max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount
            max_bid = Math.min(p.higher_price, max_acceptable_price, remaining_budget)
            min_bid = p.price_80

            if min_bid > max_acceptable_price:
                Log.note(
                    "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour",
                    type=p.type.instance_type,
                    price=min_bid,
                    remaining=max_acceptable_price
                )
                continue
            elif min_bid > remaining_budget:
                Log.note(
                    "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour",
                    type=p.type.instance_type,
                    bid=min_bid,
                    remaining_budget=remaining_budget
                )
                continue
            elif min_bid > max_bid:
                Log.error("not expected")

            naive_number_needed = int(Math.round(float(net_new_utility) / float(p.type.utility), decimal=0))
            limit_total = None
            if self.settings.max_percent_per_type < 1:
                current_count = sum(1 for a in self.active if a.launch_specification.instance_type == p.type.instance_type and a.launch_specification.placement == p.availability_zone)
                all_count = sum(1 for a in self.active if a.launch_specification.placement == p.availability_zone)
                all_count = max(all_count, naive_number_needed)
                limit_total = int(Math.floor((all_count * self.settings.max_percent_per_type - current_count) / (1 - self.settings.max_percent_per_type)))

            num = Math.min(naive_number_needed, limit_total, self.settings.max_requests_per_type)
            if num < 0:
                Log.note(
                    "{{type}} is over {{limit|percent}} of instances, no more requested",
                    limit=self.settings.max_percent_per_type,
                    type=p.type.instance_type
                )
                continue
            elif num == 1:
                min_bid = Math.min(Math.max(p.current_price * 1.1, min_bid), max_acceptable_price)
                price_interval = 0
            else:
                price_interval = Math.min(min_bid / 10, (max_bid - min_bid) / (num - 1))

            for i in range(num):
                bid_per_machine = min_bid + (i * price_interval)
                if bid_per_machine < p.current_price:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        current_price=p.current_price
                    )
                    continue
                if bid_per_machine - p.type.discount > remaining_budget:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        remaining=remaining_budget
                    )
                    continue

                try:
                    if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1:
                        Log.error("Spot Manager can only request machine one-at-a-time")

                    new_requests = self._request_spot_instances(
                        price=bid_per_machine,
                        availability_zone_group=p.availability_zone,
                        instance_type=p.type.instance_type,
                        kwargs=copy(self.settings.ec2.request)
                    )
                    Log.note(
                        "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour",
                        num=len(new_requests),
                        type=p.type.instance_type,
                        zone=p.availability_zone,
                        utility=p.type.utility,
                        price=bid_per_machine
                    )
                    net_new_utility -= p.type.utility * len(new_requests)
                    remaining_budget -= (bid_per_machine - p.type.discount) * len(new_requests)
                    with self.net_new_locker:
                        for ii in new_requests:
                            self.net_new_spot_requests.add(ii)
                except Exception as e:
                    Log.warning(
                        "Request instance {{type}} failed because {{reason}}",
                        type=p.type.instance_type,
                        reason=e.message,
                        cause=e
                    )

                    if "Max spot instance count exceeded" in e.message:
                        Log.note("No further spot requests will be attempted.")
                        return net_new_utility, remaining_budget

        return net_new_utility, remaining_budget

    def remove_instances(self, net_new_utility):
        instances = self.running_instances()

        # FIND COMBO THAT WILL SHUTDOWN WHAT WE NEED EXACTLY, OR MORE
        remove_list = []
        for acceptable_error in range(0, 8):
            remaining_utility = -net_new_utility
            remove_list = FlatList()
            for s in instances:
                utility = coalesce(s.markup.type.utility, 0)
                if utility <= remaining_utility + acceptable_error:
                    remove_list.append(s)
                    remaining_utility -= utility
            if remaining_utility <= 0:
                net_new_utility = -remaining_utility
                break

        if not remove_list:
            return net_new_utility

        # SEND SHUTDOWN TO EACH INSTANCE
        Log.note("Shutdown {{instances}}", instances=remove_list.id)
        for i in remove_list:
            try:
                self.instance_manager.teardown(i)
            except Exception as e:
                Log.warning("Teardown of {{id}} failed", id=i.id, cause=e)

        remove_spot_requests = remove_list.spot_instance_request_id

        # TERMINATE INSTANCES
        self.ec2_conn.terminate_instances(instance_ids=remove_list.id)

        # TERMINATE SPOT REQUESTS
        self.ec2_conn.cancel_spot_instance_requests(request_ids=remove_spot_requests)

        return net_new_utility

    def running_instances(self):
        # FIND THE BIGGEST, MOST EXPENSIVE REQUESTS
        instances = self._get_managed_instances()
        for r in instances:
            try:
                r.markup = self.price_lookup[r.instance_type, r.placement]
            except Exception as e:
                r.markup = self.price_lookup[r.instance_type, r.placement]
                Log.error("No pricing!!!", e)
        instances = jx.sort(instances, [
            {"value": "markup.type.utility", "sort": -1},
            {"value": "markup.estimated_value", "sort": 1}
        ])
        return instances

    def save_money(self, remaining_budget, net_new_utility):
        remove_spot_requests = wrap([])

        # FIRST CANCEL THE PENDING REQUESTS
        if remaining_budget < 0:
            requests = self._get_managed_spot_requests()
            for r in requests:
                if r.status.code in PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN:
                    remove_spot_requests.append(r.id)
                    net_new_utility += self.settings.utility[r.launch_specification.instance_type].utility
                    remaining_budget += r.price

        instances = jx.sort(self.running_instances(), "markup.estimated_value")

        remove_list = wrap([])
        for s in instances:
            if remaining_budget >= 0:
                break
            remove_list.append(s)
            net_new_utility += coalesce(s.markup.type.utility, 0)
            remaining_budget += coalesce(s.request.bid_price, s.markup.price_80, s.markup.current_price)

        if not remove_list:
            return remaining_budget, net_new_utility

        # SEND SHUTDOWN TO EACH INSTANCE
        Log.warning("Shutdown {{instances}} to save money!", instances=remove_list.id)
        for i in remove_list:
            try:
                self.instance_manager.teardown(i)
            except Exception as e:
                Log.warning("Teardown of {{id}} failed", id=i.id, cause=e)

        remove_spot_requests.extend(remove_list.spot_instance_request_id)

        # TERMINATE INSTANCES
        self.ec2_conn.terminate_instances(instance_ids=remove_list.id)

        # TERMINATE SPOT REQUESTS
        self.ec2_conn.cancel_spot_instance_requests(request_ids=remove_spot_requests)
        return remaining_budget, net_new_utility

    @cache(duration=5 * SECOND)
    def _get_managed_spot_requests(self):
        output = wrap([datawrap(r) for r in self.ec2_conn.get_all_spot_instance_requests() if not r.tags.get("Name") or r.tags.get("Name").startswith(self.settings.ec2.instance.name)])
        return output

    def _get_managed_instances(self):
        requests = UniqueIndex(["instance_id"], data=self._get_managed_spot_requests().filter(lambda r: r.instance_id!=None))
        reservations = self.ec2_conn.get_all_instances()

        output = []
        for res in reservations:
            for instance in res.instances:
                if instance.tags.get('Name', '').startswith(self.settings.ec2.instance.name) and instance._state.name == "running":
                    instance.request = requests[instance.id]
                    output.append(datawrap(instance))
        return wrap(output)

    def _start_life_cycle_watcher(self):
        def life_cycle_watcher(please_stop):
            failed_attempts=Data()

            while not please_stop:
                spot_requests = self._get_managed_spot_requests()
                last_get = Date.now()
                instances = wrap({i.id: i for r in self.ec2_conn.get_all_instances() for i in r.instances})
                # INSTANCES THAT REQUIRE SETUP
                time_to_stop_trying = {}
                please_setup = [
                    (i, r) for i, r in [(instances[r.instance_id], r) for r in spot_requests]
                    if i.id and not i.tags.get("Name") and i._state.name == "running" and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP
                ]
                for i, r in please_setup:
                    try:
                        p = self.settings.utility[i.instance_type]
                        if p == None:
                            try:
                                self.ec2_conn.terminate_instances(instance_ids=[i.id])
                                with self.net_new_locker:
                                    self.net_new_spot_requests.remove(r.id)
                            finally:
                                Log.error("Can not setup unknown {{instance_id}} of type {{type}}", instance_id=i.id, type=i.instance_type)
                        i.markup = p
                        try:
                            self.instance_manager.setup(i, coalesce(p, 0))
                        except Exception as e:
                            e = Except.wrap(e)
                            failed_attempts[r.id] += [e]
                            Log.error(ERROR_ON_CALL_TO_SETUP, e)
                        i.add_tag("Name", self.settings.ec2.instance.name + " (running)")
                        with self.net_new_locker:
                            self.net_new_spot_requests.remove(r.id)
                    except Exception as e:
                        if not time_to_stop_trying.get(i.id):
                            time_to_stop_trying[i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN
                        if Date.now() > time_to_stop_trying[i.id]:
                            # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE
                            self.ec2_conn.terminate_instances(instance_ids=[i.id])
                            with self.net_new_locker:
                                self.net_new_spot_requests.remove(r.id)
                            Log.warning("Problem with setup of {{instance_id}}.  Time is up.  Instance TERMINATED!", instance_id=i.id, cause=e)
                        elif "Can not setup unknown " in e:
                            Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e)
                        elif ERROR_ON_CALL_TO_SETUP in e:
                            if len(failed_attempts[r.id]) > 2:
                                Log.warning("Problem with setup() of {{instance_id}}", instance_id=i.id, cause=failed_attempts[r.id])
                        else:
                            Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e)

                if Date.now() - last_get > 5 * SECOND:
                    # REFRESH STALE
                    spot_requests = self._get_managed_spot_requests()
                    last_get = Date.now()

                pending = wrap([r for r in spot_requests if r.status.code in PENDING_STATUS_CODES])
                give_up = wrap([r for r in spot_requests if r.status.code in PROBABLY_NOT_FOR_A_WHILE | TERMINATED_STATUS_CODES])
                ignore = wrap([r for r in spot_requests if r.status.code in MIGHT_HAPPEN])  # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT

                if self.done_spot_requests:
                    with self.net_new_locker:
                        expired = Date.now() - self.settings.run_interval + 2 * MINUTE
                        for ii in list(self.net_new_spot_requests):
                            if Date(ii.create_time) < expired:
                                ## SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS
                                self.net_new_spot_requests.remove(ii)
                        for g in give_up:
                            self.net_new_spot_requests.remove(g.id)
                        for g in ignore:
                            self.net_new_spot_requests.remove(g.id)
                        pending = UniqueIndex(("id",), data=pending)
                        pending = pending | self.net_new_spot_requests

                    if give_up:
                        self.ec2_conn.cancel_spot_instance_requests(request_ids=give_up.id)
                        Log.note("Cancelled spot requests {{spots}}, {{reasons}}", spots=give_up.id, reasons=give_up.status.code)

                if not pending and not time_to_stop_trying and self.done_spot_requests:
                    Log.note("No more pending spot requests")
                    please_stop.go()
                    break
                elif pending:
                    Log.note("waiting for spot requests: {{pending}}", pending=[p.id for p in pending])

                (Till(seconds=10) | please_stop).wait()

            Log.note("life cycle watcher has stopped")

        # Log.warning("lifecycle watcher is disabled")
        timeout = Till(seconds=self.settings.run_interval.seconds - 60)
        self.watcher = Thread.run("lifecycle watcher", life_cycle_watcher, please_stop=timeout)

    def _get_valid_availability_zones(self):
        subnets = list(self.vpc_conn.get_all_subnets(subnet_ids=self.settings.ec2.request.network_interfaces.subnet_id))
        zones_with_interfaces = [s.availability_zone for s in subnets]

        if self.settings.availability_zone:
            # If they pass a list of zones, constrain it by zones we have an
            # interface for.
            return set(zones_with_interfaces) & set(listwrap(self.settings.availability_zone))
        else:
            # Otherwise, use all available zones.
            return zones_with_interfaces

    @override
    def _request_spot_instances(self, price, availability_zone_group, instance_type, kwargs):
        kwargs.kwargs = None

        # m3 INSTANCES ARE NOT ALLOWED PLACEMENT GROUP
        if instance_type.startswith("m3."):
            kwargs.placement_group = None

        kwargs.network_interfaces = NetworkInterfaceCollection(*(
            NetworkInterfaceSpecification(**i)
            for i in listwrap(kwargs.network_interfaces)
            if self.vpc_conn.get_all_subnets(subnet_ids=i.subnet_id, filters={"availabilityZone": availability_zone_group})
        ))

        if len(kwargs.network_interfaces) == 0:
            Log.error("No network interface specifications found for {{availability_zone}}!", availability_zone=kwargs.availability_zone_group)

        block_device_map = BlockDeviceMapping()

        # GENERIC BLOCK DEVICE MAPPING
        for dev, dev_settings in kwargs.block_device_map.items():
            block_device_map[dev] = BlockDeviceType(
                delete_on_termination=True,
                **dev_settings
            )

        kwargs.block_device_map = block_device_map

        # INCLUDE EPHEMERAL STORAGE IN BlockDeviceMapping
        num_ephemeral_volumes = ephemeral_storage[instance_type]["num"]
        for i in range(num_ephemeral_volumes):
            letter = convert.ascii2char(98 + i)  # START AT "b"
            kwargs.block_device_map["/dev/sd" + letter] = BlockDeviceType(
                ephemeral_name='ephemeral' + unicode(i),
                delete_on_termination=True
            )

        if kwargs.expiration:
            kwargs.valid_until = (Date.now() + Duration(kwargs.expiration)).format(ISO8601)
            kwargs.expiration = None

        # ATTACH NEW EBS VOLUMES
        for i, drive in enumerate(self.settings.utility[instance_type].drives):
            letter = convert.ascii2char(98 + i + num_ephemeral_volumes)
            device = drive.device = coalesce(drive.device, "/dev/sd" + letter)
            d = drive.copy()
            d.path = None  # path AND device PROPERTY IS NOT ALLOWED IN THE BlockDeviceType
            d.device = None
            if d.size:
                kwargs.block_device_map[device] = BlockDeviceType(
                    delete_on_termination=True,
                    **d
                )

        output = list(self.ec2_conn.request_spot_instances(**kwargs))
        return output

    def pricing(self):
        with self.price_locker:
            if self.prices:
                return self.prices

            prices = self._get_spot_prices_from_aws()

            now = Date.now()
            expressions.ALLOW_SCRIPTING = True

            with Timer("processing pricing data"):
                hourly_pricing = jx.run({
                    "from": {
                        # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE
                        "from": prices,
                        "window": [
                            {
                                "name": "expire",
                                "value": {"coalesce": [{"rows": {"timestamp": 1}}, {"date": "eod"}]},
                                "edges": ["availability_zone", "instance_type"],
                                "sort": "timestamp"
                            },
                            {  # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME
                                "name": "effective",
                                "value": {"sub": {"timestamp": self.settings.uptime.duration.seconds}}
                            }
                        ]
                    },
                    "edges": [
                        "availability_zone",
                        "instance_type",
                        {
                            "name": "time",
                            "range": {"min": "effective", "max": "expire", "mode": "inclusive"},
                            "allowNulls": False,
                            "domain": {"type": "time", "min": now.floor(HOUR) - self.settings.uptime.history, "max": Date.now().floor(HOUR)+HOUR, "interval": "hour"}
                        }
                    ],
                    "select": [
                        {"value": "price", "aggregate": "max"},
                        {"aggregate": "count"}
                    ],
                    "where": {"gt": {"expire": now.floor(HOUR) - self.settings.uptime.history}},
                    "window": [
                        {
                            "name": "current_price",
                            "value": "rows.last.price",
                            "edges": ["availability_zone", "instance_type"],
                            "sort": "time"
                        }
                    ]
                }).data

                bid80 = jx.run({
                    "from": hourly_pricing,
                    "edges": [
                        {
                            "value": "availability_zone",
                            "allowNulls": False
                        },
                        {
                            "name": "type",
                            "value": "instance_type",
                            "allowNulls": False,
                            "domain": {"type": "set", "key": "instance_type", "partitions": self.settings.utility}
                        }
                    ],
                    "select": [
                        {"name": "price_80", "value": "price", "aggregate": "percentile", "percentile": self.settings.uptime.bid_percentile},
                        {"name": "max_price", "value": "price", "aggregate": "max"},
                        {"aggregate": "count"},
                        {"value": "current_price", "aggregate": "one"},
                        {"name": "all_price", "value": "price", "aggregate": "list"}
                    ],
                    "window": [
                        {"name": "estimated_value", "value": {"div": ["type.utility", "price_80"]}},
                        {"name": "higher_price", "value": lambda row, rownum, rows: find_higher(row.all_price, row.price_80)}  # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}}
                    ]
                })

                output = jx.run({
                    "from": bid80,
                    "sort": {"value": "estimated_value", "sort": -1}
                })

                self.prices = wrap(output.data)
                self.price_lookup = UniqueIndex(("type.instance_type", "availability_zone"), data=self.prices)
            return self.prices

    def _get_spot_prices_from_aws(self):
        with Timer("Read pricing file"):
            try:
                content = File(self.settings.price_file).read()
                cache = convert.json2value(content, flexible=False, leaves=False)
            except Exception as e:
                cache = FlatList()

        most_recents = jx.run({
            "from": cache,
            "edges": ["instance_type", "availability_zone"],
            "select": {"value": "timestamp", "aggregate": "max"}
        })

        zones = self._get_valid_availability_zones()
        prices = set(cache)
        with Timer("Get pricing from AWS"):
            for instance_type in self.settings.utility.keys():
                for zone in zones:
                    if cache:
                        most_recent = most_recents[{
                            "instance_type": instance_type,
                            "availability_zone": zone
                        }].timestamp
                        start_at = MAX([Date(most_recent), Date.today() - WEEK])
                    else:
                        start_at = Date.today() - WEEK

                    if DEBUG_PRICING:
                        Log.note("get pricing for {{instance_type}} starting at {{start_at}}",
                            instance_type=instance_type,
                            start_at=start_at
                        )

                    next_token = None
                    while True:
                        resultset = self.ec2_conn.get_spot_price_history(
                            product_description=coalesce(self.settings.product, "Linux/UNIX (Amazon VPC)"),
                            instance_type=instance_type,
                            availability_zone=zone,
                            start_time=start_at.format(ISO8601),
                            next_token=next_token
                        )
                        next_token = resultset.next_token

                        for p in resultset:
                            prices.add(wrap({
                                "availability_zone": p.availability_zone,
                                "instance_type": p.instance_type,
                                "price": p.price,
                                "product_description": p.product_description,
                                "region": p.region.name,
                                "timestamp": Date(p.timestamp).unix
                            }))

                        if not next_token:
                            break

        with Timer("Save prices to file"):
            new_prices = jx.filter(prices, {"gte": {"timestamp": {"date": "today-2day"}}})
            def stream():  # IT'S A LOT OF PRICES, STREAM THEM TO FILE
                prefix = "[\n"
                for p in new_prices:
                    yield prefix
                    yield convert.value2json(p)
                    prefix = ",\n"
                yield "]"
            File(self.settings.price_file).write(stream())

        return prices
Exemplo n.º 11
0
    def _scan_database(self):
        # GET ALL RELATIONS
        raw_relations = self.db.query(
            """
            SELECT
                table_schema,
                table_name,
                referenced_table_schema,
                referenced_table_name,
                referenced_column_name,
                constraint_name,
                column_name,
                ordinal_position
            FROM
                information_schema.key_column_usage
            WHERE
                referenced_column_name IS NOT NULL
            """,
            param=self.settings.database,
        )

        if not raw_relations:
            Log.error("No relations in the database")

        for r in self.settings.add_relations:
            try:
                lhs, rhs = map(strings.trim, r.split("->"))
                lhs = lhs.split(".")
                if len(lhs) == 2:
                    lhs = [self.settings.database.schema] + lhs
                rhs = rhs.split(".")
                if len(rhs) == 2:
                    rhs = [self.settings.database.schema] + rhs
                to_add = Data(
                    ordinal_position=1,  # CAN ONLY HANDLE 1-COLUMN RELATIONS
                    table_schema=lhs[0],
                    table_name=lhs[1],
                    column_name=lhs[2],
                    referenced_table_schema=rhs[0],
                    referenced_table_name=rhs[1],
                    referenced_column_name=rhs[2],
                )

                # CHECK IF EXISTING
                if jx.filter(raw_relations, {"eq": to_add}):
                    Log.note("Relation {{relation}} already exists",
                             relation=r)
                    continue

                to_add.constraint_name = Random.hex(20)
                raw_relations.append(to_add)
            except Exception as e:
                Log.error("Could not parse {{line|quote}}", line=r, cause=e)

        relations = jx.select(
            raw_relations,
            [
                {
                    "name": "constraint.name",
                    "value": "constraint_name"
                },
                {
                    "name": "table.schema",
                    "value": "table_schema"
                },
                {
                    "name": "table.name",
                    "value": "table_name"
                },
                {
                    "name": "column.name",
                    "value": "column_name"
                },
                {
                    "name": "referenced.table.schema",
                    "value": "referenced_table_schema"
                },
                {
                    "name": "referenced.table.name",
                    "value": "referenced_table_name"
                },
                {
                    "name": "referenced.column.name",
                    "value": "referenced_column_name"
                },
                {
                    "name": "ordinal_position",
                    "value": "ordinal_position"
                },
            ],
        )

        # GET ALL TABLES
        raw_tables = self.db.query("""
            SELECT
                t.table_schema,
                t.table_name,
                c.constraint_name,
                c.constraint_type,
                k.column_name,
                k.ordinal_position
            FROM
                information_schema.tables t
            LEFT JOIN
                information_schema.table_constraints c on c.table_name=t.table_name AND c.table_schema=t.table_schema and (constraint_type='UNIQUE' or constraint_type='PRIMARY KEY')
            LEFT JOIN
                information_schema.key_column_usage k on k.constraint_name=c.constraint_name AND k.table_name=t.table_name and k.table_schema=t.table_schema
            ORDER BY
                t.table_schema,
                t.table_name,
                c.constraint_name,
                k.ordinal_position,
                k.column_name
            """)

        # ORGANIZE, AND PICK ONE UNIQUE CONSTRAINT FOR LINKING
        tables = UniqueIndex(keys=["name", "schema"])
        for t, c in jx.groupby(raw_tables, ["table_name", "table_schema"]):
            c = wrap(list(c))
            best_index = Null
            is_referenced = False
            is_primary = False
            for g, w in jx.groupby(c, "constraint_name"):
                if not g.constraint_name:
                    continue
                w = list(w)
                ref = False
                for r in relations:
                    if (r.table.name == t.table_name
                            and r.table.schema == t.table_schema
                            and r.constraint.name == g.constraint_name):
                        ref = True
                is_prime = w[0].constraint_type == "PRIMARY"

                reasons_this_one_is_better = [
                    best_index == None,  # WE DO NOT HAVE A CANDIDATE YET
                    is_prime
                    and not is_primary,  # PRIMARY KEYS ARE GOOD TO HAVE
                    is_primary == is_prime and ref and
                    not is_referenced,  # REFERENCED UNIQUE TUPLES ARE GOOD TOO
                    is_primary == is_prime and ref == is_referenced and len(w)
                    < len(best_index),  # THE SHORTER THE TUPLE, THE BETTER
                ]
                if any(reasons_this_one_is_better):
                    is_primary = is_prime
                    is_referenced = ref
                    best_index = w

            tables.add({
                "name": t.table_name,
                "schema": t.table_schema,
                "id": [b.column_name for b in best_index],
            })

        fact_table = tables[self.settings.fact_table,
                            self.settings.database.schema]
        ids_table = {
            "alias": "t0",
            "name": "__ids__",
            "schema": fact_table.schema,
            "id": fact_table.id,
        }
        relations.extend(
            wrap({
                "constraint": {
                    "name": "__link_ids_to_fact_table__"
                },
                "table": ids_table,
                "column": {
                    "name": c
                },
                "referenced": {
                    "table": fact_table,
                    "column": {
                        "name": c
                    }
                },
                "ordinal_position": i,
            }) for i, c in enumerate(fact_table.id))
        tables.add(ids_table)

        # GET ALL COLUMNS
        raw_columns = self.db.query("""
            SELECT
                column_name,
                table_schema,
                table_name,
                ordinal_position,
                data_type
            FROM
                information_schema.columns
            """)

        reference_only_tables = [
            r.split(".")[0] for r in self.settings.reference_only
            if len(r.split(".")) == 2
        ]
        reference_all_tables = [
            r.split(".")[0] for r in self.settings.reference_only
            if len(r.split(".")) == 1
        ]
        foreign_column_table_schema_triples = {(r.column.name, r.table.name,
                                                r.table.schema)
                                               for r in relations}
        referenced_column_table_schema_triples = {(
            r.referenced.column.name,
            r.referenced.table.name,
            r.referenced.table.schema,
        )
                                                  for r in relations}
        related_column_table_schema_triples = (
            foreign_column_table_schema_triples
            | referenced_column_table_schema_triples)

        columns = UniqueIndex(["column.name", "table.name", "table.schema"])
        for c in raw_columns:
            if c.table_name in reference_only_tables:
                if c.table_name + "." + c.column_name in self.settings.reference_only:
                    include = True
                    reference = True
                    foreign = False
                elif c.column_name in tables[(c.table_name,
                                              c.table_schema)].id:
                    include = self.settings.show_foreign_keys
                    reference = False
                    foreign = False
                else:
                    include = False
                    reference = False
                    foreign = False
            elif c.table_name in reference_all_tables:
                # TABLES USED FOR REFERENCE, NO NESTED DOCUMENTS EXPECTED
                if c.column_name in tables[(c.table_name, c.table_schema)].id:
                    include = self.settings.show_foreign_keys
                    reference = True
                    foreign = False
                elif (
                        c.column_name,
                        c.table_name,
                        c.table_schema,
                ) in foreign_column_table_schema_triples:
                    include = False
                    reference = False
                    foreign = True
                else:
                    include = True
                    reference = False
                    foreign = False
            elif c.column_name in tables[(c.table_name, c.table_schema)].id:
                include = self.settings.show_foreign_keys
                reference = False
                foreign = False
            elif (
                    c.column_name,
                    c.table_name,
                    c.table_schema,
            ) in foreign_column_table_schema_triples:
                include = False
                reference = False
                foreign = True
            elif (
                    c.column_name,
                    c.table_name,
                    c.table_schema,
            ) in referenced_column_table_schema_triples:
                include = self.settings.show_foreign_keys
                reference = False
                foreign = False
            else:
                include = True
                reference = False
                foreign = False

            rel = {
                "column": {
                    "name": c.column_name,
                    "type": c.data_type
                },
                "table": {
                    "name": c.table_name,
                    "schema": c.table_schema
                },
                "ordinal_position": c.ordinal_position,
                "is_id": c.column_name
                in tables[(c.table_name, c.table_schema)].id,
                "include": include,  # TRUE IF THIS COLUMN IS OUTPUTTED
                "reference":
                reference,  # TRUE IF THIS COLUMN REPRESENTS THE ROW
                "foreign":
                foreign,  # TRUE IF THIS COLUMN POINTS TO ANOTHER ROW
            }
            columns.add(rel)

        # ITERATE OVER ALL PATHS
        todo = FlatList()
        output_columns = FlatList()
        nested_path_to_join = {}
        all_nested_paths = [["."]]

        def follow_paths(position, path, nested_path, done_relations,
                         no_nested_docs):
            if position.name in self.settings.exclude:
                return

            if self.path_not_allowed(path):
                return
            if DEBUG:
                Log.note("Trace {{path}}", path=path)
            if position.name != "__ids__":
                # USED TO CONFIRM WE CAN ACCESS THE TABLE (WILL THROW ERROR WHEN IF IT FAILS)
                self.db.query(
                    ConcatSQL(
                        SQL_SELECT,
                        SQL_STAR,
                        SQL_FROM,
                        quote_column(position.schema, position.name),
                        SQL_LIMIT,
                        SQL_ONE,
                    ))

            if position.name in reference_all_tables:
                no_nested_docs = True
            if position.name in reference_only_tables:
                return
            curr_join_list = copy(nested_path_to_join[nested_path[0]])

            ###############################################################################
            # INNER OBJECTS
            ###############################################################################
            referenced_tables = list(
                sort_using_key(
                    jx.groupby(
                        jx.filter(
                            relations,
                            {
                                "eq": {
                                    "table.name": position.name,
                                    "table.schema": position.schema,
                                }
                            },
                        ),
                        "constraint.name",
                    ),
                    key=lambda p: first(p[1]).column.name,
                ))
            for g, constraint_columns in referenced_tables:
                g = unwrap(g)
                constraint_columns = deepcopy(constraint_columns)
                if g["constraint.name"] in done_relations:
                    continue
                if any(cc for cc in constraint_columns
                       if cc.referenced.table.name in self.settings.exclude):
                    continue

                done_relations.add(g["constraint.name"])

                many_to_one_joins = nested_path_to_join[nested_path[0]]
                index = len(many_to_one_joins)

                alias = "t" + text(index)
                for c in constraint_columns:
                    c.referenced.table.alias = alias
                    c.table = position
                many_to_one_joins.append({
                    "join_columns": constraint_columns,
                    "path": path,
                    "nested_path": nested_path,
                })

                # HANDLE THE COMMON *id SUFFIX
                name = []
                for cname, tname in zip(
                        constraint_columns.column.name,
                        constraint_columns.referenced.table.name,
                ):
                    if cname.startswith(tname):
                        name.append(tname)
                    elif cname.endswith("_id"):
                        name.append(cname[:-3])
                    else:
                        name.append(cname)

                relation_string = many_to_one_string(constraint_columns[0])
                step = "/".join(name)
                if len(constraint_columns) == 1:
                    step = self.name_relations.get(relation_string, step)

                referenced_column_path = concat_field(path, step)
                if self.path_not_allowed(referenced_column_path):
                    continue

                if referenced_column_path in reference_only_tables:
                    continue

                col_pointer_name = relative_field(referenced_column_path,
                                                  nested_path[0])
                for col in columns:
                    if (col.table.name
                            == constraint_columns[0].referenced.table.name
                            and col.table.schema
                            == constraint_columns[0].referenced.table.schema):
                        col_full_name = concat_field(
                            col_pointer_name, literal_field(col.column.name))

                        if (col.is_id and len(nested_path) == 1
                                and col.table.name == fact_table.name
                                and col.table.schema == fact_table.schema):
                            # ALWAYS SHOW THE ID OF THE FACT
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text(c_index),
                                "column":
                                col,
                                "sort":
                                True,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name,
                            })
                        elif col.column.name == constraint_columns[
                                0].column.name:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                                if self.settings.show_foreign_keys else None,
                            })
                        elif col.is_id:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                                if self.settings.show_foreign_keys else None,
                            })
                        elif col.reference:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_pointer_name
                                if not self.settings.show_foreign_keys else
                                col_full_name,  # REFERENCE FIELDS CAN REPLACE THE WHOLE OBJECT BEING REFERENCED
                            })
                        elif col.include:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name,
                            })

                if position.name in reference_only_tables:
                    continue

                todo.append(
                    Data(
                        position=copy(constraint_columns[0].referenced.table),
                        path=referenced_column_path,
                        nested_path=nested_path,
                        done_relations=copy(done_relations),
                        no_nested_docs=no_nested_docs,
                    ))
            ###############################################################################
            # NESTED OBJECTS
            ###############################################################################
            if not no_nested_docs:
                nesting_tables = list(
                    sort_using_key(
                        jx.groupby(
                            jx.filter(
                                relations,
                                {
                                    "eq": {
                                        "referenced.table.name": position.name,
                                        "referenced.table.schema":
                                        position.schema,
                                    }
                                },
                            ),
                            "constraint.name",
                        ),
                        key=lambda p: [(r.table.name, r.column.name)
                                       for r in [first(p[1])]][0],
                    ))

                for g, constraint_columns in nesting_tables:
                    g = unwrap(g)
                    constraint_columns = deepcopy(constraint_columns)
                    if g["constraint.name"] in done_relations:
                        continue
                    done_relations.add(g["constraint.name"])

                    many_table = set(constraint_columns.table.name)
                    if not (many_table - self.settings.exclude):
                        continue

                    relation_string = one_to_many_string(constraint_columns[0])
                    step = "/".join(many_table)
                    if len(constraint_columns) == 1:
                        step = self.name_relations.get(relation_string, step)

                    referenced_column_path = concat_field(path, step)
                    if self.path_not_allowed(referenced_column_path):
                        continue

                    new_nested_path = [referenced_column_path] + nested_path
                    all_nested_paths.append(new_nested_path)

                    if referenced_column_path in nested_path_to_join:
                        Log.error(
                            "{{path}} already exists, try adding entry to name_relations",
                            path=referenced_column_path,
                        )
                    one_to_many_joins = nested_path_to_join[
                        referenced_column_path] = copy(curr_join_list)
                    index = len(one_to_many_joins)
                    alias = "t" + text(index)
                    for c in constraint_columns:
                        c.table.alias = alias
                        c.referenced.table = position
                    one_to_many_joins.append(
                        set_default(
                            {},
                            g,
                            {
                                "children": True,
                                "join_columns": constraint_columns,
                                "path": path,
                                "nested_path": nested_path,
                            },
                        ))
                    for col in columns:
                        if (col.table.name == constraint_columns[0].table.name
                                and col.table.schema
                                == constraint_columns[0].table.schema):
                            col_full_name = join_field(
                                split_field(referenced_column_path)
                                [len(split_field(new_nested_path[0])):] +
                                [literal_field(col.column.name)])

                            if col.column.name == constraint_columns[
                                    0].column.name:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if
                                    self.settings.show_foreign_keys else None,
                                })
                            elif col.is_id:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if
                                    self.settings.show_foreign_keys else None,
                                })
                            else:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if col.include else None,
                                })

                    todo.append(
                        Data(
                            position=constraint_columns[0].table,
                            path=referenced_column_path,
                            nested_path=new_nested_path,
                            done_relations=copy(done_relations),
                            no_nested_docs=no_nested_docs,
                        ))

        path = "."
        nested_path = [path]
        nested_path_to_join["."] = [{
            "path":
            path,
            "join_columns": [{
                "referenced": {
                    "table": ids_table
                }
            }],
            "nested_path":
            nested_path,
        }]

        todo.append(
            Data(
                position=ids_table,
                path=path,
                nested_path=nested_path,
                done_relations=set(),
                no_nested_docs=False,
            ))

        while todo:
            item = todo.pop(0)
            follow_paths(**item)

        self.all_nested_paths = all_nested_paths
        self.nested_path_to_join = nested_path_to_join
        self.columns = output_columns
Exemplo n.º 12
0
        return self

    def get_schema(self, name):
        if self.name != name:
            Log.error("This container only has table by name of {{name}}",
                      name=name)
        return self.schema

    def get_table(self, name):
        if self is name or self.name == name:
            return self
        Log.error("This container only has table by name of {{name}}",
                  name=name)


def _exec(code):
    try:
        temp = None
        exec("temp = " + code)
        return temp
    except Exception as e:
        Log.error("Could not execute {{code|quote}}", code=code, cause=e)


DUAL = ListContainer(name="dual",
                     data=[{}],
                     schema=Schema(table_name="dual",
                                   columns=UniqueIndex(keys=("name", ))))

export("jx_base.container", ListContainer)
    def add(self, value):
        self.data.append(value)

    def __getitem__(self, item):
        if item < 0 or len(self.data) <= item:
            return Null
        return self.data[item]

    def __iter__(self):
        return (wrap(d) for d in self.data)

    def __len__(self):
        return len(self.data)


def _exec(code):
    try:
        temp = None
        exec "temp = " + code
        return temp
    except Exception as e:
        Log.error("Could not execute {{code|quote}}", code=code, cause=e)


from pyLibrary.queries import Schema, jx

DUAL = ListContainer(name="dual",
                     data=[{}],
                     schema=Schema(table_name="dual",
                                   columns=UniqueIndex(keys=("names.\\.", ))))
Exemplo n.º 14
0
            Log.error("This container only has table by name of {{name}}", name=name)
        return self

    def get_schema(self, name):
        if self.name != name:
            Log.error("This container only has table by name of {{name}}", name=name)
        return self.schema

    def get_table(self, name):
        if self is name or self.name == name:
            return self
        Log.error("This container only has table by name of {{name}}", name=name)


def _exec(code):
    try:
        temp = None
        exec("temp = " + code)
        return temp
    except Exception as e:
        Log.error("Could not execute {{code|quote}}", code=code, cause=e)


from jx_python import jx

DUAL = ListContainer(
    name="dual",
    data=[{}],
    schema=Schema(table_name="dual", columns=UniqueIndex(keys=("name",)))
)
    def update_spot_requests(self):
        spot_requests = self._get_managed_spot_requests()

        # ADD UP THE CURRENT REQUESTED INSTANCES
        all_instances = UniqueIndex("id", data=self._get_managed_instances())
        self.active = active = wrap([
            r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES
            | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN
        ])

        for a in active.copy():
            if a.status.code == "request-canceled-and-instance-running" and all_instances[
                    a.instance_id] == None:
                active.remove(a)

        used_budget = 0
        current_spending = 0
        for a in active:
            about = self.price_lookup[a.launch_specification.instance_type,
                                      a.launch_specification.placement]
            discount = coalesce(about.type.discount, 0)
            Log.note(
                "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}",
                id=a.id,
                type=a.launch_specification.instance_type,
                zone=a.launch_specification.placement,
                instance_id=a.instance_id,
                price=a.price - discount)
            used_budget += a.price - discount
            current_spending += coalesce(about.current_price,
                                         a.price) - discount

        Log.note(
            "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)",
            budget=used_budget,
            current=current_spending)

        remaining_budget = self.settings.budget - used_budget

        current_utility = coalesce(
            SUM(self.price_lookup[
                r.launch_specification.instance_type,
                r.launch_specification.placement].type.utility
                for r in active), 0)
        utility_required = self.instance_manager.required_utility(
            current_utility)
        net_new_utility = utility_required - current_utility

        Log.note(
            "have {{current_utility}} utility running; need {{need_utility}} more utility",
            current_utility=current_utility,
            need_utility=net_new_utility)

        if remaining_budget < 0:
            remaining_budget, net_new_utility = self.save_money(
                remaining_budget, net_new_utility)

        if net_new_utility < 0:
            if self.settings.allowed_overage:
                net_new_utility = mo_math.min(
                    net_new_utility +
                    self.settings.allowed_overage * utility_required, 0)

            net_new_utility = self.remove_instances(net_new_utility)

        if net_new_utility > 0:
            net_new_utility = mo_math.min(net_new_utility,
                                          self.settings.max_new_utility)
            net_new_utility, remaining_budget = self.add_instances(
                net_new_utility, remaining_budget)

        if net_new_utility > 0:
            Log.alert(
                "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour).  Remaining budget is ${{budget|round(decimal=2)}} ",
                num=net_new_utility,
                expected=self.settings.max_utility_price,
                budget=remaining_budget)

        # Give EC2 a chance to notice the new requests before tagging them.
        Till(seconds=3).wait()
        with self.net_new_locker:
            for req in self.net_new_spot_requests:
                req.add_tag("Name", self.settings.ec2.instance.name)

        Log.note("All requests for new utility have been made")
        self.done_making_new_spot_requests.go()
    def pricing(self):
        with self.price_locker:
            if self.prices:
                return self.prices

            prices = self._get_spot_prices_from_aws()
            now = Date.now()

            with Timer("processing pricing data"):
                hourly_pricing = jx.run({
                    "from": {
                        # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE
                        "from":
                        prices,
                        "window": [
                            {
                                "name": "expire",
                                "value": {
                                    "coalesce": [{
                                        "rows": {
                                            "timestamp": 1
                                        }
                                    }, {
                                        "date": "eod"
                                    }]
                                },
                                "edges":
                                ["availability_zone", "instance_type"],
                                "sort": "timestamp"
                            },
                            {  # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME
                                "name": "effective",
                                "value": {
                                    "sub": {
                                        "timestamp":
                                        self.settings.uptime.duration.seconds
                                    }
                                }
                            }
                        ]
                    },
                    "edges": [
                        "availability_zone", "instance_type", {
                            "name": "time",
                            "range": {
                                "min": "effective",
                                "max": "expire",
                                "mode": "inclusive"
                            },
                            "allowNulls": False,
                            "domain": {
                                "type": "time",
                                "min":
                                now.floor(HOUR) - self.settings.uptime.history,
                                "max": Date.now().floor(HOUR) + HOUR,
                                "interval": "hour"
                            }
                        }
                    ],
                    "select": [{
                        "value": "price",
                        "aggregate": "max"
                    }, {
                        "aggregate": "count"
                    }],
                    "where": {
                        "gt": {
                            "expire":
                            now.floor(HOUR) - self.settings.uptime.history
                        }
                    },
                    "window": [{
                        "name": "current_price",
                        "value": "rows.last.price",
                        "edges": ["availability_zone", "instance_type"],
                        "sort": "time"
                    }]
                }).data

                bid80 = jx.run({
                    "from":
                    ListContainer(name=None, data=hourly_pricing),
                    "edges": [{
                        "value": "availability_zone",
                        "allowNulls": False
                    }, {
                        "name": "type",
                        "value": "instance_type",
                        "allowNulls": False,
                        "domain": {
                            "type": "set",
                            "key": "instance_type",
                            "partitions": self.settings.utility
                        }
                    }],
                    "select": [{
                        "name":
                        "price_80",
                        "value":
                        "price",
                        "aggregate":
                        "percentile",
                        "percentile":
                        self.settings.uptime.bid_percentile
                    }, {
                        "name": "max_price",
                        "value": "price",
                        "aggregate": "max"
                    }, {
                        "aggregate": "count"
                    }, {
                        "value": "current_price",
                        "aggregate": "one"
                    }, {
                        "name": "all_price",
                        "value": "price",
                        "aggregate": "list"
                    }],
                    "window": [
                        {
                            "name": "estimated_value",
                            "value": {
                                "div": ["type.utility", "price_80"]
                            }
                        },
                        {
                            "name":
                            "higher_price",
                            "value":
                            lambda row, rownum, rows: find_higher(
                                row.all_price, row.price_80)
                        }  # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}}
                    ]
                })

                output = jx.sort(bid80.values(), {
                    "value": "estimated_value",
                    "sort": -1
                })

                self.prices = wrap(output)
                self.price_lookup = UniqueIndex(
                    ("type.instance_type", "availability_zone"),
                    data=self.prices)
            return self.prices
Exemplo n.º 17
0
def _get_branches_from_hg(kwarg):
    # GET MAIN PAGE
    response = http.get(kwarg.url)
    doc = BeautifulSoup(response.all_content, "html.parser")

    all_repos = doc("table")[1]
    branches = UniqueIndex(["name", "locale"], fail_on_dup=False)
    for i, r in enumerate(all_repos("tr")):
        dir, name = [v.text.strip() for v in r("td")]

        b = _get_single_branch_from_hg(kwarg, name, dir.lstrip("/"))
        branches.extend(b)

    # branches.add(set_default({"name": "release-mozilla-beta"}, branches["mozilla-beta", DEFAULT_LOCALE]))
    for b in list(branches["mozilla-beta", ]):
        branches.add(set_default({"name": "release-mozilla-beta"}, b))  # THIS IS THE l10n "name"
        b.url = "https://hg.mozilla.org/releases/mozilla-beta"          # THIS IS THE

    for b in list(branches["mozilla-release", ]):
        branches.add(set_default({"name": "release-mozilla-release"}, b))

    for b in list(branches["mozilla-aurora", ]):
        if b.locale == "en-US":
            continue
        branches.add(set_default({"name": "comm-aurora"}, b))
        # b.url = "https://hg.mozilla.org/releases/mozilla-aurora"

    for b in list(branches):
        if b.name.startswith("mozilla-esr"):
            branches.add(set_default({"name": "release-" + b.name}, b))  # THIS IS THE l10n "name"
            b.url = "https://hg.mozilla.org/releases/" + b.name

    #CHECKS
    for b in branches:
        if b.name != b.name.lower():
            Log.error("Expecting lowercase name")
        if not b.locale:
            Log.error("Not expected")
        if not b.url.startswith("http"):
            Log.error("Expecting a valid url")
        if not b.etl.timestamp:
            Log.error("Expecting a timestamp")

    return branches
class SpotManager(object):
    @override
    def __init__(self, instance_manager, disable_prices=False, kwargs=None):
        self.settings = kwargs
        self.instance_manager = instance_manager
        aws_args = dict(region_name=kwargs.aws.region,
                        aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id),
                        aws_secret_access_key=unwrap(
                            kwargs.aws.aws_secret_access_key))
        self.ec2_conn = boto.ec2.connect_to_region(**aws_args)
        self.vpc_conn = boto.vpc.connect_to_region(**aws_args)
        self.price_locker = Lock()
        self.prices = None
        self.price_lookup = None
        self.no_capacity = {}
        self.no_capacity_file = File(
            kwargs.price_file).parent / "no capacity.json"
        self.done_making_new_spot_requests = Signal()
        self.net_new_locker = Lock()
        self.net_new_spot_requests = UniqueIndex(
            ("id", ))  # SPOT REQUESTS FOR THIS SESSION
        self.watcher = None
        self.active = None

        self.settings.uptime.bid_percentile = coalesce(
            self.settings.uptime.bid_percentile, self.settings.bid_percentile)
        self.settings.uptime.history = coalesce(
            Date(self.settings.uptime.history), DAY)
        self.settings.uptime.duration = coalesce(
            Duration(self.settings.uptime.duration), Date("5minute"))
        self.settings.max_percent_per_type = coalesce(
            self.settings.max_percent_per_type, 1)

        if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required(
        ):
            self._start_life_cycle_watcher()
        if not disable_prices:
            self.pricing()

    def update_spot_requests(self):
        spot_requests = self._get_managed_spot_requests()

        # ADD UP THE CURRENT REQUESTED INSTANCES
        all_instances = UniqueIndex("id", data=self._get_managed_instances())
        self.active = active = wrap([
            r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES
            | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN
        ])

        for a in active.copy():
            if a.status.code == "request-canceled-and-instance-running" and all_instances[
                    a.instance_id] == None:
                active.remove(a)

        used_budget = 0
        current_spending = 0
        for a in active:
            about = self.price_lookup[a.launch_specification.instance_type,
                                      a.launch_specification.placement]
            discount = coalesce(about.type.discount, 0)
            Log.note(
                "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}",
                id=a.id,
                type=a.launch_specification.instance_type,
                zone=a.launch_specification.placement,
                instance_id=a.instance_id,
                price=a.price - discount)
            used_budget += a.price - discount
            current_spending += coalesce(about.current_price,
                                         a.price) - discount

        Log.note(
            "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)",
            budget=used_budget,
            current=current_spending)

        remaining_budget = self.settings.budget - used_budget

        current_utility = coalesce(
            SUM(self.price_lookup[
                r.launch_specification.instance_type,
                r.launch_specification.placement].type.utility
                for r in active), 0)
        utility_required = self.instance_manager.required_utility(
            current_utility)
        net_new_utility = utility_required - current_utility

        Log.note(
            "have {{current_utility}} utility running; need {{need_utility}} more utility",
            current_utility=current_utility,
            need_utility=net_new_utility)

        if remaining_budget < 0:
            remaining_budget, net_new_utility = self.save_money(
                remaining_budget, net_new_utility)

        if net_new_utility < 0:
            if self.settings.allowed_overage:
                net_new_utility = mo_math.min(
                    net_new_utility +
                    self.settings.allowed_overage * utility_required, 0)

            net_new_utility = self.remove_instances(net_new_utility)

        if net_new_utility > 0:
            net_new_utility = mo_math.min(net_new_utility,
                                          self.settings.max_new_utility)
            net_new_utility, remaining_budget = self.add_instances(
                net_new_utility, remaining_budget)

        if net_new_utility > 0:
            Log.alert(
                "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour).  Remaining budget is ${{budget|round(decimal=2)}} ",
                num=net_new_utility,
                expected=self.settings.max_utility_price,
                budget=remaining_budget)

        # Give EC2 a chance to notice the new requests before tagging them.
        Till(seconds=3).wait()
        with self.net_new_locker:
            for req in self.net_new_spot_requests:
                req.add_tag("Name", self.settings.ec2.instance.name)

        Log.note("All requests for new utility have been made")
        self.done_making_new_spot_requests.go()

    def add_instances(self, net_new_utility, remaining_budget):
        prices = self.pricing()

        for p in prices:
            if net_new_utility <= 0 or remaining_budget <= 0:
                break

            if p.current_price == None:
                Log.note("{{type}} has no current price",
                         type=p.type.instance_type)
                continue

            if self.settings.utility[p.type.instance_type].blacklist or \
                p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones):
                Log.note("{{type}} in {{zone}} skipped due to blacklist",
                         type=p.type.instance_type,
                         zone=p.availability_zone)
                continue

            # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY
            max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount
            max_bid = mo_math.min(p.higher_price, max_acceptable_price,
                                  remaining_budget)
            min_bid = p.price_80

            if min_bid > max_acceptable_price:
                Log.note(
                    "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour",
                    type=p.type.instance_type,
                    price=min_bid,
                    remaining=max_acceptable_price)
                continue
            elif min_bid > remaining_budget:
                Log.note(
                    "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour",
                    type=p.type.instance_type,
                    bid=min_bid,
                    remaining_budget=remaining_budget)
                continue
            elif min_bid > max_bid:
                Log.error("not expected")

            naive_number_needed = int(
                mo_math.round(float(net_new_utility) / float(p.type.utility),
                              decimal=0))
            limit_total = None
            if self.settings.max_percent_per_type < 1:
                current_count = sum(
                    1 for a in self.active
                    if a.launch_specification.instance_type ==
                    p.type.instance_type and a.launch_specification.placement
                    == p.availability_zone)
                all_count = sum(
                    1 for a in self.active
                    if a.launch_specification.placement == p.availability_zone)
                all_count = max(all_count, naive_number_needed)
                limit_total = int(
                    mo_math.floor(
                        (all_count * self.settings.max_percent_per_type -
                         current_count) /
                        (1 - self.settings.max_percent_per_type)))

            num = mo_math.min(naive_number_needed, limit_total,
                              self.settings.max_requests_per_type)
            if num < 0:
                Log.note(
                    "{{type}} is over {{limit|percent}} of instances, no more requested",
                    limit=self.settings.max_percent_per_type,
                    type=p.type.instance_type)
                continue
            elif num == 1:
                min_bid = mo_math.min(
                    mo_math.max(p.current_price * 1.1, min_bid),
                    max_acceptable_price)
                price_interval = 0
            else:
                price_interval = mo_math.min(min_bid / 10,
                                             (max_bid - min_bid) / (num - 1))

            for i in range(num):
                bid_per_machine = min_bid + (i * price_interval)
                if bid_per_machine < p.current_price:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        current_price=p.current_price)
                    continue
                if bid_per_machine - p.type.discount > remaining_budget:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        remaining=remaining_budget)
                    continue

                last_no_capacity_message = self.no_capacity.get(
                    p.type.instance_type, Null)
                if last_no_capacity_message > Date.now(
                ) - CAPACITY_NOT_AVAILABLE_RETRY:
                    Log.note(
                        "Did not bid on {{type}}: \"No capacity\" last seen at {{last_time|datetime}}",
                        type=p.type.instance_type,
                        last_time=last_no_capacity_message)
                    continue

                try:
                    if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1:
                        Log.error(
                            "Spot Manager can only request machine one-at-a-time"
                        )

                    new_requests = self._request_spot_instances(
                        price=bid_per_machine,
                        availability_zone_group=p.availability_zone,
                        instance_type=p.type.instance_type,
                        kwargs=copy(self.settings.ec2.request))
                    Log.note(
                        "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour",
                        num=len(new_requests),
                        type=p.type.instance_type,
                        zone=p.availability_zone,
                        utility=p.type.utility,
                        price=bid_per_machine)
                    net_new_utility -= p.type.utility * len(new_requests)
                    remaining_budget -= (bid_per_machine -
                                         p.type.discount) * len(new_requests)
                    with self.net_new_locker:
                        for ii in new_requests:
                            self.net_new_spot_requests.add(ii)
                except Exception as e:
                    Log.warning(
                        "Request instance {{type}} failed because {{reason}}",
                        type=p.type.instance_type,
                        reason=e.message,
                        cause=e)

                    if "Max spot instance count exceeded" in e.message:
                        Log.note("No further spot requests will be attempted.")
                        return net_new_utility, remaining_budget

        return net_new_utility, remaining_budget

    def remove_instances(self, net_new_utility):
        instances = self.running_instances()

        # FIND COMBO THAT WILL SHUTDOWN WHAT WE NEED EXACTLY, OR MORE
        remove_list = []
        for acceptable_error in range(0, 8):
            remaining_utility = -net_new_utility
            remove_list = FlatList()
            for s in instances:
                utility = coalesce(s.markup.type.utility, 0)
                if utility <= remaining_utility + acceptable_error:
                    remove_list.append(s)
                    remaining_utility -= utility
            if remaining_utility <= 0:
                net_new_utility = -remaining_utility
                break

        if not remove_list:
            return net_new_utility

        # SEND SHUTDOWN TO EACH INSTANCE
        Log.note("Shutdown {{instances}}", instances=remove_list.id)
        remove_threads = [
            Thread.run("teardown for " + text(i.id),
                       self.instance_manager.teardown, i) for i in remove_list
        ]
        for t in remove_threads:
            try:
                t.join()
            except Exception as e:
                Log.warning("Teardown of {{id}} failed", id=i.id, cause=e)

        remove_spot_requests = remove_list.spot_instance_request_id

        # TERMINATE INSTANCES
        self.ec2_conn.terminate_instances(instance_ids=remove_list.id)

        # TERMINATE SPOT REQUESTS
        self.ec2_conn.cancel_spot_instance_requests(
            request_ids=remove_spot_requests)

        return net_new_utility

    def running_instances(self):
        # FIND THE BIGGEST, MOST EXPENSIVE REQUESTS
        instances = self._get_managed_instances()
        for r in instances:
            try:
                r.markup = self.price_lookup[r.instance_type, r.placement]
            except Exception as e:
                r.markup = self.price_lookup[r.instance_type, r.placement]
                Log.error("No pricing!!!", e)
        instances = jx.sort(instances, [{
            "value": "markup.type.utility",
            "sort": -1
        }, {
            "value": "markup.estimated_value",
            "sort": 1
        }])
        return instances

    def save_money(self, remaining_budget, net_new_utility):
        remove_spot_requests = wrap([])

        # FIRST CANCEL THE PENDING REQUESTS
        if remaining_budget < 0:
            requests = self._get_managed_spot_requests()
            for r in requests:
                if r.status.code in PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN:
                    remove_spot_requests.append(r.id)
                    net_new_utility += self.settings.utility[
                        r.launch_specification.instance_type].utility
                    remaining_budget += r.price

        instances = jx.sort(self.running_instances(), "markup.estimated_value")

        remove_list = wrap([])
        for s in instances:
            if remaining_budget >= 0:
                break
            remove_list.append(s)
            net_new_utility += coalesce(s.markup.type.utility, 0)
            remaining_budget += coalesce(s.request.bid_price,
                                         s.markup.price_80,
                                         s.markup.current_price)

        if not remove_list:
            return remaining_budget, net_new_utility

        # SEND SHUTDOWN TO EACH INSTANCE
        Log.warning("Shutdown {{instances}} to save money!",
                    instances=remove_list.id)
        if ALLOW_SHUTDOWN:
            for g, removals in jx.chunk(remove_list, size=20):
                for i, t in [(i,
                              Thread.run("teardown " + i.id,
                                         self.instance_manager.teardown,
                                         i,
                                         please_stop=False))
                             for i in removals]:
                    try:
                        t.join()
                    except Exception:
                        Log.note("Problem with shutdown of {{id}}", id=i.id)

            remove_spot_requests.extend(remove_list.spot_instance_request_id)

            # TERMINATE INSTANCES
            self.ec2_conn.terminate_instances(instance_ids=remove_list.id)

            # TERMINATE SPOT REQUESTS
            self.ec2_conn.cancel_spot_instance_requests(
                request_ids=remove_spot_requests)
        return remaining_budget, net_new_utility

    @cache(duration=5 * SECOND)
    def _get_managed_spot_requests(self):
        output = wrap([
            datawrap(r)
            for r in self.ec2_conn.get_all_spot_instance_requests()
            if not r.tags.get("Name")
            or r.tags.get("Name").startswith(self.settings.ec2.instance.name)
        ])
        return output

    def _get_managed_instances(self):
        requests = UniqueIndex(["instance_id"],
                               data=self._get_managed_spot_requests().filter(
                                   lambda r: r.instance_id != None))
        reservations = self.ec2_conn.get_all_instances()

        output = []
        for res in reservations:
            for instance in res.instances:
                if instance.tags.get('Name', '').startswith(
                        self.settings.ec2.instance.name
                ) and instance._state.name == "running":
                    instance.request = requests[instance.id]
                    output.append(datawrap(instance))
        return wrap(output)

    def _start_life_cycle_watcher(self):
        failed_locker = Lock()
        failed_attempts = Data()

        def track_setup(
                instance_setup_function,
                request,
                instance,  # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP
                utility,  # THE utility OBJECT FOUND IN CONFIG
                please_stop):
            try:
                instance_setup_function(instance, utility, please_stop)
                instance.add_tag(
                    "Name", self.settings.ec2.instance.name + " (running)")
                with self.net_new_locker:
                    self.net_new_spot_requests.remove(request.id)
            except Exception as e:
                e = Except.wrap(e)
                instance.add_tag("Name", "")
                with failed_locker:
                    failed_attempts[request.id] += [e]
                if "Can not setup unknown " in e:
                    Log.warning("Unexpected failure on startup",
                                instance_id=instance.id,
                                cause=e)
                elif ERROR_ON_CALL_TO_SETUP in e:
                    with failed_locker:
                        causes = failed_attempts[request.id]
                    if len(causes) > 2:
                        Log.warning("Problem with setup() of {{instance_id}}",
                                    instance_id=instance.id,
                                    cause=causes)
                else:
                    Log.warning("Unexpected failure on startup",
                                instance_id=instance.id,
                                cause=e)

        def life_cycle_watcher(please_stop):
            bad_requests = Data()
            setup_threads = []
            last_get = Date.now()
            setup_in_progress = set()

            while not please_stop:
                spot_requests = self._get_managed_spot_requests()
                instances = wrap({
                    i.id: i
                    for r in self.ec2_conn.get_all_instances()
                    for i in r.instances
                })
                # INSTANCES THAT REQUIRE SETUP
                time_to_stop_trying = {}
                please_setup = [
                    (i, r) for i, r in [(instances[r.instance_id], r)
                                        for r in spot_requests]
                    if i.id and (not i.tags.get("Name") or i.tags.get(
                        "Name") == self.settings.ec2.instance.name +
                                 " (setup)") and i.id not in setup_in_progress
                    and i._state.name == "running"
                    and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP
                ]

                for i, r in please_setup:
                    if not time_to_stop_trying.get(i.id):
                        time_to_stop_trying[
                            i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN
                    if Date.now() > time_to_stop_trying[i.id]:
                        # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE
                        self.ec2_conn.terminate_instances(instance_ids=[i.id])
                        with self.net_new_locker:
                            self.net_new_spot_requests.remove(r.id)
                        Log.warning(
                            "Problem with setup of {{instance_id}}.  Time is up.  Instance TERMINATED!",
                            instance_id=i.id)
                        continue

                    try:
                        p = self.settings.utility[i.instance_type]
                        if p == None:
                            try:
                                self.ec2_conn.terminate_instances(
                                    instance_ids=[i.id])
                                with self.net_new_locker:
                                    self.net_new_spot_requests.remove(r.id)
                            finally:
                                Log.error(
                                    "Can not setup unknown {{instance_id}} of type {{type}}",
                                    instance_id=i.id,
                                    type=i.instance_type)

                        i.markup = p
                        i.add_tag("Name",
                                  self.settings.ec2.instance.name + " (setup)")
                        setup_in_progress.add(i.id)
                        t = Thread.run("setup for " + text(i.id), track_setup,
                                       self.instance_manager.setup, r, i, p)
                        if SINGLE_THREAD_SETUP:
                            t.join()
                        setup_threads.append(t)
                    except Exception as e:
                        i.add_tag("Name", "")
                        Log.warning("Unexpected failure on startup",
                                    instance_id=i.id,
                                    cause=e)

                if Date.now() - last_get > 5 * SECOND:
                    # REFRESH STALE
                    spot_requests = self._get_managed_spot_requests()
                    last_get = Date.now()

                pending = wrap([
                    r for r in spot_requests
                    if r.status.code in PENDING_STATUS_CODES
                ])
                give_up = wrap([
                    r for r in spot_requests
                    if (r.status.code in PROBABLY_NOT_FOR_A_WHILE
                        | TERMINATED_STATUS_CODES) and r.id not in bad_requests
                ])
                ignore = wrap([
                    r for r in spot_requests if r.status.code in MIGHT_HAPPEN
                ])  # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT

                if self.done_making_new_spot_requests:
                    with self.net_new_locker:
                        expired = Date.now(
                        ) - self.settings.run_interval + 2 * MINUTE
                        for ii in list(self.net_new_spot_requests):
                            if Date(ii.create_time) < expired:
                                # SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS
                                self.net_new_spot_requests.remove(ii)

                        for g in ignore:
                            self.net_new_spot_requests.remove(g.id)
                        pending = UniqueIndex(("id", ), data=pending)
                        pending = pending | self.net_new_spot_requests

                    if give_up:
                        self.ec2_conn.cancel_spot_instance_requests(
                            request_ids=give_up.id)
                        Log.note(
                            "Cancelled spot requests {{spots}}, {{reasons}}",
                            spots=give_up.id,
                            reasons=give_up.status.code)

                        for g in give_up:
                            bad_requests[g.id] += 1
                            if g.id in self.net_new_spot_requests:
                                self.net_new_spot_requests.remove(g.id)
                                if g.status.code == "capacity-not-available":
                                    self.no_capacity[
                                        g.launch_specification.
                                        instance_type] = Date.now()
                                if g.status.code == "bad-parameters":
                                    self.no_capacity[
                                        g.launch_specification.
                                        instance_type] = Date.now()
                                    Log.warning(
                                        "bad parameters while requesting type {{type}}",
                                        type=g.launch_specification.
                                        instance_type)

                if not pending and self.done_making_new_spot_requests:
                    Log.note("No more pending spot requests")
                    break
                elif pending:
                    Log.note("waiting for spot requests: {{pending}}",
                             pending=[p.id for p in pending])

                (Till(seconds=10) | please_stop).wait()

            with Timer("Save no capacity to file"):
                table = [{
                    "instance_type": k,
                    "last_failure": v
                } for k, v in self.no_capacity.items()]
                self.no_capacity_file.write(value2json(table, pretty=True))

            # WAIT FOR SETUP TO COMPLETE
            for t in setup_threads:
                t.join()

            Log.note("life cycle watcher has stopped")

        # Log.warning("lifecycle watcher is disabled")
        timeout = Till(seconds=self.settings.run_interval.seconds - 60)
        self.watcher = Thread.run("lifecycle watcher",
                                  life_cycle_watcher,
                                  please_stop=timeout)

    def _get_valid_availability_zones(self):
        subnets = list(
            self.vpc_conn.get_all_subnets(subnet_ids=self.settings.ec2.request.
                                          network_interfaces.subnet_id))
        zones_with_interfaces = [s.availability_zone for s in subnets]

        if self.settings.availability_zone:
            # If they pass a list of zones, constrain it by zones we have an
            # interface for.
            return set(zones_with_interfaces) & set(
                listwrap(self.settings.availability_zone))
        else:
            # Otherwise, use all available zones.
            return zones_with_interfaces

    @override
    def _request_spot_instances(self, price, availability_zone_group,
                                instance_type, kwargs):
        kwargs.self = None
        kwargs.kwargs = None

        # m3 INSTANCES ARE NOT ALLOWED PLACEMENT GROUP
        if instance_type.startswith("m3."):
            kwargs.placement_group = None

        kwargs.network_interfaces = NetworkInterfaceCollection(
            *(NetworkInterfaceSpecification(**i)
              for i in listwrap(kwargs.network_interfaces)
              if self.vpc_conn.get_all_subnets(
                  subnet_ids=i.subnet_id,
                  filters={"availabilityZone": availability_zone_group})))

        if len(kwargs.network_interfaces) == 0:
            Log.error(
                "No network interface specifications found for {{availability_zone}}!",
                availability_zone=kwargs.availability_zone_group)

        block_device_map = BlockDeviceMapping()

        # GENERIC BLOCK DEVICE MAPPING
        for dev, dev_settings in kwargs.block_device_map.items():
            block_device_map[dev] = BlockDeviceType(delete_on_termination=True,
                                                    **dev_settings)
        kwargs.block_device_map = block_device_map

        # INCLUDE EPHEMERAL STORAGE IN BlockDeviceMapping
        num_ephemeral_volumes = ephemeral_storage[instance_type]["num"]
        for i in range(num_ephemeral_volumes):
            letter = convert.ascii2char(98 + i)  # START AT "b"
            kwargs.block_device_map["/dev/sd" + letter] = BlockDeviceType(
                ephemeral_name='ephemeral' + text(i),
                delete_on_termination=True)

        if kwargs.expiration:
            kwargs.valid_until = (Date.now() +
                                  Duration(kwargs.expiration)).format(ISO8601)
            kwargs.expiration = None

        # ATTACH NEW EBS VOLUMES
        for i, drive in enumerate(self.settings.utility[instance_type].drives):
            letter = convert.ascii2char(98 + i + num_ephemeral_volumes)
            device = drive.device = coalesce(drive.device, "/dev/sd" + letter)
            d = drive.copy()
            d.path = None  # path AND device PROPERTY IS NOT ALLOWED IN THE BlockDeviceType
            d.device = None
            if d.size:
                kwargs.block_device_map[device] = BlockDeviceType(
                    delete_on_termination=True, **d)

        output = list(self.ec2_conn.request_spot_instances(**kwargs))
        return output

    def pricing(self):
        with self.price_locker:
            if self.prices:
                return self.prices

            prices = self._get_spot_prices_from_aws()
            now = Date.now()

            with Timer("processing pricing data"):
                hourly_pricing = jx.run({
                    "from": {
                        # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE
                        "from":
                        prices,
                        "window": [
                            {
                                "name": "expire",
                                "value": {
                                    "coalesce": [{
                                        "rows": {
                                            "timestamp": 1
                                        }
                                    }, {
                                        "date": "eod"
                                    }]
                                },
                                "edges":
                                ["availability_zone", "instance_type"],
                                "sort": "timestamp"
                            },
                            {  # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME
                                "name": "effective",
                                "value": {
                                    "sub": {
                                        "timestamp":
                                        self.settings.uptime.duration.seconds
                                    }
                                }
                            }
                        ]
                    },
                    "edges": [
                        "availability_zone", "instance_type", {
                            "name": "time",
                            "range": {
                                "min": "effective",
                                "max": "expire",
                                "mode": "inclusive"
                            },
                            "allowNulls": False,
                            "domain": {
                                "type": "time",
                                "min":
                                now.floor(HOUR) - self.settings.uptime.history,
                                "max": Date.now().floor(HOUR) + HOUR,
                                "interval": "hour"
                            }
                        }
                    ],
                    "select": [{
                        "value": "price",
                        "aggregate": "max"
                    }, {
                        "aggregate": "count"
                    }],
                    "where": {
                        "gt": {
                            "expire":
                            now.floor(HOUR) - self.settings.uptime.history
                        }
                    },
                    "window": [{
                        "name": "current_price",
                        "value": "rows.last.price",
                        "edges": ["availability_zone", "instance_type"],
                        "sort": "time"
                    }]
                }).data

                bid80 = jx.run({
                    "from":
                    ListContainer(name=None, data=hourly_pricing),
                    "edges": [{
                        "value": "availability_zone",
                        "allowNulls": False
                    }, {
                        "name": "type",
                        "value": "instance_type",
                        "allowNulls": False,
                        "domain": {
                            "type": "set",
                            "key": "instance_type",
                            "partitions": self.settings.utility
                        }
                    }],
                    "select": [{
                        "name":
                        "price_80",
                        "value":
                        "price",
                        "aggregate":
                        "percentile",
                        "percentile":
                        self.settings.uptime.bid_percentile
                    }, {
                        "name": "max_price",
                        "value": "price",
                        "aggregate": "max"
                    }, {
                        "aggregate": "count"
                    }, {
                        "value": "current_price",
                        "aggregate": "one"
                    }, {
                        "name": "all_price",
                        "value": "price",
                        "aggregate": "list"
                    }],
                    "window": [
                        {
                            "name": "estimated_value",
                            "value": {
                                "div": ["type.utility", "price_80"]
                            }
                        },
                        {
                            "name":
                            "higher_price",
                            "value":
                            lambda row, rownum, rows: find_higher(
                                row.all_price, row.price_80)
                        }  # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}}
                    ]
                })

                output = jx.sort(bid80.values(), {
                    "value": "estimated_value",
                    "sort": -1
                })

                self.prices = wrap(output)
                self.price_lookup = UniqueIndex(
                    ("type.instance_type", "availability_zone"),
                    data=self.prices)
            return self.prices

    def _get_spot_prices_from_aws(self):
        with Timer("Read no capacity file"):
            try:
                # FILE IS LIST OF {instance_type, last_failure} OBJECTS
                content = self.no_capacity_file.read()
                self.no_capacity = dict(
                    (r.instance_type, r.last_failure)
                    for r in convert.json2value(
                        content, flexible=False, leaves=False))
            except Exception as e:
                self.no_capacity = {}

        with Timer("Read pricing file"):
            try:
                content = File(self.settings.price_file).read()
                cache = convert.json2value(content,
                                           flexible=False,
                                           leaves=False)
            except Exception as e:
                cache = FlatList()

        cache = ListContainer(name=None, data=cache)
        most_recents = jx.run({
            "from": cache,
            "edges": ["instance_type", "availability_zone"],
            "select": {
                "value": "timestamp",
                "aggregate": "max"
            }
        })

        zones = self._get_valid_availability_zones()
        prices = set(cache)
        with Timer("Get pricing from AWS"):
            for instance_type in self.settings.utility.keys():
                for zone in zones:
                    if cache:
                        most_recent = most_recents[{
                            "instance_type": instance_type,
                            "availability_zone": zone
                        }].timestamp
                        start_at = MAX(
                            [Date(most_recent),
                             Date.today() - WEEK])
                    else:
                        start_at = Date.today() - WEEK

                    if DEBUG_PRICING:
                        Log.note(
                            "get pricing for {{instance_type}} starting at {{start_at}}",
                            instance_type=instance_type,
                            start_at=start_at)

                    next_token = None
                    while True:
                        resultset = self.ec2_conn.get_spot_price_history(
                            product_description=coalesce(
                                self.settings.product,
                                "Linux/UNIX (Amazon VPC)"),
                            instance_type=instance_type,
                            availability_zone=zone,
                            start_time=start_at.format(ISO8601),
                            next_token=next_token)
                        next_token = resultset.next_token

                        for p in resultset:
                            prices.add(
                                wrap({
                                    "availability_zone": p.availability_zone,
                                    "instance_type": p.instance_type,
                                    "price": p.price,
                                    "product_description":
                                    p.product_description,
                                    "region": p.region.name,
                                    "timestamp": Date(p.timestamp).unix
                                }))

                        if not next_token:
                            break

        with Timer("Save prices to file"):
            new_prices = jx.filter(
                prices, {"gte": {
                    "timestamp": {
                        "date": "today-2day"
                    }
                }})

            def stream():  # IT'S A LOT OF PRICES, STREAM THEM TO FILE
                prefix = "[\n"
                for p in new_prices:
                    yield prefix
                    yield convert.value2json(p)
                    prefix = ",\n"
                yield "]"

            File(self.settings.price_file).write(stream())

        return ListContainer(name="prices", data=prices)
        def life_cycle_watcher(please_stop):
            bad_requests = Data()
            setup_threads = []
            last_get = Date.now()
            setup_in_progress = set()

            while not please_stop:
                spot_requests = self._get_managed_spot_requests()
                instances = wrap({
                    i.id: i
                    for r in self.ec2_conn.get_all_instances()
                    for i in r.instances
                })
                # INSTANCES THAT REQUIRE SETUP
                time_to_stop_trying = {}
                please_setup = [
                    (i, r) for i, r in [(instances[r.instance_id], r)
                                        for r in spot_requests]
                    if i.id and (not i.tags.get("Name") or i.tags.get(
                        "Name") == self.settings.ec2.instance.name +
                                 " (setup)") and i.id not in setup_in_progress
                    and i._state.name == "running"
                    and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP
                ]

                for i, r in please_setup:
                    if not time_to_stop_trying.get(i.id):
                        time_to_stop_trying[
                            i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN
                    if Date.now() > time_to_stop_trying[i.id]:
                        # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE
                        self.ec2_conn.terminate_instances(instance_ids=[i.id])
                        with self.net_new_locker:
                            self.net_new_spot_requests.remove(r.id)
                        Log.warning(
                            "Problem with setup of {{instance_id}}.  Time is up.  Instance TERMINATED!",
                            instance_id=i.id)
                        continue

                    try:
                        p = self.settings.utility[i.instance_type]
                        if p == None:
                            try:
                                self.ec2_conn.terminate_instances(
                                    instance_ids=[i.id])
                                with self.net_new_locker:
                                    self.net_new_spot_requests.remove(r.id)
                            finally:
                                Log.error(
                                    "Can not setup unknown {{instance_id}} of type {{type}}",
                                    instance_id=i.id,
                                    type=i.instance_type)

                        i.markup = p
                        i.add_tag("Name",
                                  self.settings.ec2.instance.name + " (setup)")
                        setup_in_progress.add(i.id)
                        t = Thread.run("setup for " + text(i.id), track_setup,
                                       self.instance_manager.setup, r, i, p)
                        if SINGLE_THREAD_SETUP:
                            t.join()
                        setup_threads.append(t)
                    except Exception as e:
                        i.add_tag("Name", "")
                        Log.warning("Unexpected failure on startup",
                                    instance_id=i.id,
                                    cause=e)

                if Date.now() - last_get > 5 * SECOND:
                    # REFRESH STALE
                    spot_requests = self._get_managed_spot_requests()
                    last_get = Date.now()

                pending = wrap([
                    r for r in spot_requests
                    if r.status.code in PENDING_STATUS_CODES
                ])
                give_up = wrap([
                    r for r in spot_requests
                    if (r.status.code in PROBABLY_NOT_FOR_A_WHILE
                        | TERMINATED_STATUS_CODES) and r.id not in bad_requests
                ])
                ignore = wrap([
                    r for r in spot_requests if r.status.code in MIGHT_HAPPEN
                ])  # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT

                if self.done_making_new_spot_requests:
                    with self.net_new_locker:
                        expired = Date.now(
                        ) - self.settings.run_interval + 2 * MINUTE
                        for ii in list(self.net_new_spot_requests):
                            if Date(ii.create_time) < expired:
                                # SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS
                                self.net_new_spot_requests.remove(ii)

                        for g in ignore:
                            self.net_new_spot_requests.remove(g.id)
                        pending = UniqueIndex(("id", ), data=pending)
                        pending = pending | self.net_new_spot_requests

                    if give_up:
                        self.ec2_conn.cancel_spot_instance_requests(
                            request_ids=give_up.id)
                        Log.note(
                            "Cancelled spot requests {{spots}}, {{reasons}}",
                            spots=give_up.id,
                            reasons=give_up.status.code)

                        for g in give_up:
                            bad_requests[g.id] += 1
                            if g.id in self.net_new_spot_requests:
                                self.net_new_spot_requests.remove(g.id)
                                if g.status.code == "capacity-not-available":
                                    self.no_capacity[
                                        g.launch_specification.
                                        instance_type] = Date.now()
                                if g.status.code == "bad-parameters":
                                    self.no_capacity[
                                        g.launch_specification.
                                        instance_type] = Date.now()
                                    Log.warning(
                                        "bad parameters while requesting type {{type}}",
                                        type=g.launch_specification.
                                        instance_type)

                if not pending and self.done_making_new_spot_requests:
                    Log.note("No more pending spot requests")
                    break
                elif pending:
                    Log.note("waiting for spot requests: {{pending}}",
                             pending=[p.id for p in pending])

                (Till(seconds=10) | please_stop).wait()

            with Timer("Save no capacity to file"):
                table = [{
                    "instance_type": k,
                    "last_failure": v
                } for k, v in self.no_capacity.items()]
                self.no_capacity_file.write(value2json(table, pretty=True))

            # WAIT FOR SETUP TO COMPLETE
            for t in setup_threads:
                t.join()

            Log.note("life cycle watcher has stopped")
Exemplo n.º 20
0
    def _scan_database(self):
        # GET ALL RELATIONS
        raw_relations = self.db.query("""
            SELECT
                table_schema,
                table_name,
                referenced_table_schema,
                referenced_table_name,
                referenced_column_name,
                constraint_name,
                column_name,
                ordinal_position
            FROM
                information_schema.key_column_usage
            WHERE
                referenced_column_name IS NOT NULL
        """,
                                      param=self.settings.database)

        if not raw_relations:
            Log.error("No relations in the database")

        for r in self.settings.add_relations:
            try:
                a, b = map(strings.trim, r.split("->"))
                a = a.split(".")
                b = b.split(".")
                raw_relations.append(
                    Data(table_schema=a[0],
                         table_name=a[1],
                         referenced_table_schema=b[0],
                         referenced_table_name=b[1],
                         referenced_column_name=b[2],
                         constraint_name=Random.hex(20),
                         column_name=a[2],
                         ordinal_position=1))
            except Exception as e:
                Log.error("Could not parse {{line|quote}}", line=r, cause=e)

        relations = jx.select(raw_relations,
                              [{
                                  "name": "constraint.name",
                                  "value": "constraint_name"
                              }, {
                                  "name": "table.schema",
                                  "value": "table_schema"
                              }, {
                                  "name": "table.name",
                                  "value": "table_name"
                              }, {
                                  "name": "column.name",
                                  "value": "column_name"
                              }, {
                                  "name": "referenced.table.schema",
                                  "value": "referenced_table_schema"
                              }, {
                                  "name": "referenced.table.name",
                                  "value": "referenced_table_name"
                              }, {
                                  "name": "referenced.column.name",
                                  "value": "referenced_column_name"
                              }, {
                                  "name": "ordinal_position",
                                  "value": "ordinal_position"
                              }])

        # GET ALL TABLES
        raw_tables = self.db.query("""
            SELECT
                t.table_schema,
                t.table_name,
                c.constraint_name,
                c.constraint_type,
                k.column_name,
                k.ordinal_position
            FROM
                information_schema.tables t
            LEFT JOIN
                information_schema.table_constraints c on c.table_name=t.table_name AND c.table_schema=t.table_schema and (constraint_type='UNIQUE' or constraint_type='PRIMARY KEY')
            LEFT JOIN
                information_schema.key_column_usage k on k.constraint_name=c.constraint_name AND k.table_name=t.table_name and k.table_schema=t.table_schema
            ORDER BY
                t.table_schema,
                t.table_name,
                c.constraint_name,
                k.ordinal_position,
                k.column_name
        """,
                                   param=self.settings.database)

        # ORGANIZE, AND PICK ONE UNIQUE CONSTRAINT FOR LINKING
        tables = UniqueIndex(keys=["name", "schema"])
        for t, c in jx.groupby(raw_tables, ["table_name", "table_schema"]):
            c = wrap(list(c))
            best_index = Null
            is_referenced = False
            is_primary = False
            for g, w in jx.groupby(c, "constraint_name"):
                if not g.constraint_name:
                    continue
                w = list(w)
                ref = False
                for r in relations:
                    if r.table.name == t.table_name and r.table.schema == t.table_schema and r.constraint.name == g.constraint_name:
                        ref = True
                is_prime = w[0].constraint_type == "PRIMARY"

                reasons_this_one_is_better = [
                    best_index == None,  # WE DO NOT HAVE A CANDIDATE YET
                    is_prime
                    and not is_primary,  # PRIMARY KEYS ARE GOOD TO HAVE
                    is_primary == is_prime and ref and
                    not is_referenced,  # REFERENCED UNIQUE TUPLES ARE GOOD TOO
                    is_primary == is_prime and ref == is_referenced and len(w)
                    < len(best_index)  # THE SHORTER THE TUPLE, THE BETTER
                ]
                if any(reasons_this_one_is_better):
                    is_primary = is_prime
                    is_referenced = ref
                    best_index = w

            tables.add({
                "name": t.table_name,
                "schema": t.table_schema,
                "id": [b.column_name for b in best_index]
            })

        fact_table = tables[self.settings.fact_table,
                            self.settings.database.schema]
        ids_table = {
            "alias": "t0",
            "name": "__ids__",
            "schema": fact_table.schema,
            "id": fact_table.id
        }
        relations.extend(
            wrap({
                "constraint": {
                    "name": "__link_ids_to_fact_table__"
                },
                "table": ids_table,
                "column": {
                    "name": c
                },
                "referenced": {
                    "table": fact_table,
                    "column": {
                        "name": c
                    }
                },
                "ordinal_position": i
            }) for i, c in enumerate(fact_table.id))
        tables.add(ids_table)

        # GET ALL COLUMNS
        raw_columns = self.db.query("""
            SELECT
                column_name,
                table_schema,
                table_name,
                ordinal_position,
                data_type
            FROM
                information_schema.columns
        """,
                                    param=self.settings.database)

        reference_only_tables = [
            r.split(".")[0] for r in self.settings.reference_only
            if len(r.split(".")) == 2
        ]
        reference_all_tables = [
            r.split(".")[0] for r in self.settings.reference_only
            if len(r.split(".")) == 1
        ]
        foreign_column_table_schema_triples = {(r.column.name, r.table.name,
                                                r.table.schema)
                                               for r in relations}
        referenced_column_table_schema_triples = {
            (r.referenced.column.name, r.referenced.table.name,
             r.referenced.table.schema)
            for r in relations
        }
        related_column_table_schema_triples = foreign_column_table_schema_triples | referenced_column_table_schema_triples

        columns = UniqueIndex(["column.name", "table.name", "table.schema"])
        for c in raw_columns:
            if c.table_name in reference_only_tables:
                if c.table_name + "." + c.column_name in self.settings.reference_only:
                    include = True
                    reference = True
                    foreign = False
                elif c.column_name in tables[(c.table_name,
                                              c.table_schema)].id:
                    include = self.settings.show_foreign_keys
                    reference = False
                    foreign = False
                else:
                    include = False
                    reference = False
                    foreign = False
            elif c.table_name in reference_all_tables:
                # TABLES USED FOR REFERENCE, NO NESTED DOCUMENTS EXPECTED
                if c.column_name in tables[(c.table_name, c.table_schema)].id:
                    include = self.settings.show_foreign_keys
                    reference = True
                    foreign = False
                elif (c.column_name, c.table_name,
                      c.table_schema) in foreign_column_table_schema_triples:
                    include = False
                    reference = False
                    foreign = True
                else:
                    include = True
                    reference = False
                    foreign = False
            elif c.column_name in tables[(c.table_name, c.table_schema)].id:
                include = self.settings.show_foreign_keys
                reference = False
                foreign = False
            elif (c.column_name, c.table_name,
                  c.table_schema) in foreign_column_table_schema_triples:
                include = False
                reference = False
                foreign = True
            elif (c.column_name, c.table_name,
                  c.table_schema) in referenced_column_table_schema_triples:
                include = self.settings.show_foreign_keys
                reference = False
                foreign = False
            else:
                include = True
                reference = False
                foreign = False

            rel = {
                "column": {
                    "name": c.column_name,
                    "type": c.data_type
                },
                "table": {
                    "name": c.table_name,
                    "schema": c.table_schema
                },
                "ordinal_position": c.ordinal_position,
                "is_id": c.column_name
                in tables[(c.table_name, c.table_schema)].id,
                "include": include,  # TRUE IF THIS COLUMN IS OUTPUTTED
                "reference":
                reference,  # TRUE IF THIS COLUMN REPRESENTS THE ROW
                "foreign": foreign  # TRUE IF THIS COLUMN POINTS TO ANOTHER ROW
            }
            columns.add(rel)

        # ITERATE OVER ALL PATHS
        todo = FlatList()
        output_columns = FlatList()
        nested_path_to_join = {}
        all_nested_paths = [["."]]

        def follow_paths(position, path, nested_path, done_relations,
                         no_nested_docs):
            if position.name in self.settings.exclude:
                return
            if DEBUG:
                Log.note("Trace {{path}}", path=path)
            if position.name != "__ids__":
                # USED TO CONFIRM WE CAN ACCESS THE TABLE (WILL THROW ERROR WHEN IF IT FAILS)
                self.db.query("SELECT * FROM " +
                              quote_column(position.name, position.schema) +
                              " LIMIT 1")

            if position.name in reference_all_tables:
                no_nested_docs = True
            if position.name in reference_only_tables:
                return

            curr_join_list = copy(nested_path_to_join[nested_path[0]])

            # INNER OBJECTS
            referenced_tables = list(
                jx.groupby(
                    jx.filter(
                        relations, {
                            "eq": {
                                "table.name": position.name,
                                "table.schema": position.schema
                            }
                        }), "constraint.name"))
            for g, constraint_columns in referenced_tables:
                g = unwrap(g)
                constraint_columns = deepcopy(constraint_columns)
                if g["constraint.name"] in done_relations:
                    continue
                if any(cc for cc in constraint_columns
                       if cc.referenced.table.name in self.settings.exclude):
                    continue

                done_relations.add(g["constraint.name"])

                many_to_one_joins = nested_path_to_join[nested_path[0]]
                index = len(many_to_one_joins)

                alias = "t" + text_type(index)
                for c in constraint_columns:
                    c.referenced.table.alias = alias
                    c.table = position
                many_to_one_joins.append({
                    "join_columns": constraint_columns,
                    "path": path,
                    "nested_path": nested_path
                })

                # referenced_table_path = join_field(split_field(path) + ["/".join(constraint_columns.referenced.table.name)])
                # HANDLE THE COMMON *id SUFFIX
                name = []
                for a, b in zip(constraint_columns.column.name,
                                constraint_columns.referenced.table.name):
                    if a.startswith(b):
                        name.append(b)
                    elif a.endswith("_id"):
                        name.append(a[:-3])
                    else:
                        name.append(a)
                referenced_column_path = join_field(
                    split_field(path) + ["/".join(name)])
                col_pointer_name = relative_field(referenced_column_path,
                                                  nested_path[0])
                # insert into nested1 VALUES (100, 10, 'aaa', -1);
                # id.about.time.nested1 .ref=10
                # id.about.time.nested1 .ref.name
                for col in columns:
                    if col.table.name == constraint_columns[
                            0].referenced.table.name and col.table.schema == constraint_columns[
                                0].referenced.table.schema:
                        col_full_name = concat_field(
                            col_pointer_name, literal_field(col.column.name))

                        if col.is_id and col.table.name == fact_table.name and col.table.schema == fact_table.schema:
                            # ALWAYS SHOW THE ID OF THE FACT
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text_type(c_index),
                                "column":
                                col,
                                "sort":
                                True,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                            })
                        elif col.column.name == constraint_columns[
                                0].column.name:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text_type(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                                if self.settings.show_foreign_keys else None
                            })
                        elif col.is_id:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text_type(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                                if self.settings.show_foreign_keys else None
                            })
                        elif col.reference:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text_type(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_pointer_name
                                if not self.settings.show_foreign_keys else
                                col_full_name  # REFERENCE FIELDS CAN REPLACE THE WHOLE OBJECT BEING REFERENCED
                            })
                        elif col.include:
                            c_index = len(output_columns)
                            output_columns.append({
                                "table_alias":
                                alias,
                                "column_alias":
                                "c" + text_type(c_index),
                                "column":
                                col,
                                "sort":
                                False,
                                "path":
                                referenced_column_path,
                                "nested_path":
                                nested_path,
                                "put":
                                col_full_name
                            })

                if position.name in reference_only_tables:
                    continue

                todo.append(
                    Data(position=copy(constraint_columns[0].referenced.table),
                         path=referenced_column_path,
                         nested_path=nested_path,
                         done_relations=copy(done_relations),
                         no_nested_docs=no_nested_docs))

            # NESTED OBJECTS
            if not no_nested_docs:
                for g, constraint_columns in jx.groupby(
                        jx.filter(
                            relations, {
                                "eq": {
                                    "referenced.table.name": position.name,
                                    "referenced.table.schema": position.schema
                                }
                            }), "constraint.name"):
                    g = unwrap(g)
                    constraint_columns = deepcopy(constraint_columns)
                    if g["constraint.name"] in done_relations:
                        continue
                    done_relations.add(g["constraint.name"])

                    many_table = set(constraint_columns.table.name)
                    if not (many_table - self.settings.exclude):
                        continue

                    referenced_column_path = join_field(
                        split_field(path) + ["/".join(many_table)])
                    new_nested_path = [referenced_column_path] + nested_path
                    all_nested_paths.append(new_nested_path)

                    # if new_path not in self.settings.include:
                    #     Log.note("Exclude nested path {{path}}", path=new_path)
                    #     continue
                    one_to_many_joins = nested_path_to_join[
                        referenced_column_path] = copy(curr_join_list)
                    index = len(one_to_many_joins)
                    alias = "t" + text_type(index)
                    for c in constraint_columns:
                        c.table.alias = alias
                        c.referenced.table = position
                    one_to_many_joins.append(
                        set_default({}, g, {
                            "children": True,
                            "join_columns": constraint_columns,
                            "path": path,
                            "nested_path": nested_path
                        }))
                    # insert into nested1 VALUES (100, 10, 'aaa', -1); # id.about.time.nested1 .ref=10# id.about.time.nested1 .ref.name
                    for col in columns:
                        if col.table.name == constraint_columns[
                                0].table.name and col.table.schema == constraint_columns[
                                    0].table.schema:
                            col_full_name = join_field(
                                split_field(referenced_column_path)
                                [len(split_field(new_nested_path[0])):] +
                                [literal_field(col.column.name)])

                            if col.column.name == constraint_columns[
                                    0].column.name:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text_type(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if
                                    self.settings.show_foreign_keys else None
                                })
                            elif col.is_id:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text_type(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if
                                    self.settings.show_foreign_keys else None
                                })
                            else:
                                c_index = len(output_columns)
                                output_columns.append({
                                    "table_alias":
                                    alias,
                                    "column_alias":
                                    "c" + text_type(c_index),
                                    "column":
                                    col,
                                    "sort":
                                    col.is_id,
                                    "path":
                                    referenced_column_path,
                                    "nested_path":
                                    new_nested_path,
                                    "put":
                                    col_full_name if col.include else None
                                })

                    todo.append(
                        Data(position=constraint_columns[0].table,
                             path=referenced_column_path,
                             nested_path=new_nested_path,
                             done_relations=copy(done_relations),
                             no_nested_docs=no_nested_docs))

        path = "."
        nested_path = [path]
        nested_path_to_join["."] = [{
            "path":
            path,
            "join_columns": [{
                "referenced": {
                    "table": ids_table
                }
            }],
            "nested_path":
            nested_path
        }]

        todo.append(
            Data(position=ids_table,
                 path=path,
                 nested_path=nested_path,
                 done_relations=set(),
                 no_nested_docs=False))

        while todo:
            item = todo.pop(0)
            follow_paths(**item)

        self.all_nested_paths = all_nested_paths
        self.nested_path_to_join = nested_path_to_join
        self.columns = output_columns