示例#1
0
def target_counts_demand() -> None:
    """
    TODO
    """
    dcalc = new_demand_calculator(CONFIG)

    # # 100 cores
    dcalc.add_job(
        Job(
            "tc-10",
            {
                "node.nodearray": "htc",
                "ncpus": 1,
                "exclusive": False
            },
            iterations=10,
        ))

    # 10 nodes
    dcalc.add_job(
        Job(
            "tn-10",
            {
                "node.nodearray": "htc",
                "ncpus": 4,
                "exclusive": True
            },
            node_count=10,
        ))

    # 2 x 5 nodes, non-exclusive so a node from tc-10 can be reused
    dcalc.add_job(
        Job(
            "tn-2x5",
            {
                "node.nodearray": "htc",
                "ncpus": 2,
                "exclusive": True
            },
            node_count=5,
        ), )

    demand_result = dcalc.finish()

    if not DRY_RUN:
        dcalc.bootup()

    print_demand(["name", "job_ids", "nodearray", "ncpus", "*ncpus"],
                 demand_result)

    assert len(demand_result.new_nodes) == 18
示例#2
0
def print_demand(
    config: Dict,
    demand_result: DemandResult,
    output_columns: Optional[List[str]] = None,
    output_format: Optional[str] = None,
    log: bool = False,
) -> None:
    # and let's use the demand printer to print the demand_result.
    for node in demand_result.matched_nodes + demand_result.unmatched_nodes:
        # ignore @allhosts - every node will eventually end up there.
        node.available["hostgroups"] = [
            x for x in get_node_hostgroups(config, node) if x != "@allhosts"
        ]
        node._resources["hostgroups"] = node.available["hostgroups"]

    if not output_columns:
        output_columns = config.get(
            "output_columns",
            [
                "name",
                "hostname",
                "job_ids",
                "hostgroups",
                "exists",
                "required",
                "managed",
                "slots",
                "*slots",
                "vm_size",
                "memory",
                "vcpu_count",
                "state",
                "placement_group",
                "create_time_remaining",
                "idle_time_remaining",
            ],
        )

    if "all" in output_columns:  # type: ignore
        output_columns = []

    output_format = output_format or "table"

    demandprinter.print_demand(
        output_columns,
        demand_result,
        output_format=output_format,
        log=log,
    )
    return demand_result
def _print_demand(output_format: OutputFormat) -> str:
    stream = io.StringIO()

    node = SchedulerNode("tux", {"ncpus": 2, "mem": Memory.value_of("1.0g")})
    node.available["ncpus"] = 1
    node.assign("11")
    node.assign("12")

    result = DemandResult([], [node], [], [])
    print_demand(
        ["hostname", "job_ids", "ncpus", "*ncpus", "mem"],
        result,
        stream=stream,
        output_format=output_format,
    )
    return stream.getvalue()
示例#4
0
def common_cluster_test(qsub_commands: List[str],
                        pg_counts: Optional[Dict[str, int]] = None,
                        previous_dcalc: Optional[DemandCalculator] = None,
                        **array_counts: int) -> DemandCalculator:
    pg_counts = pg_counts or {}
    dcalc = common_cluster(qsub_commands, previous_dcalc)
    demand = dcalc.get_demand()
    demandprinter.print_demand(["name", "job_ids", "placement_group"], demand)

    # sanity check that we don't recreate the same node
    partition_single(demand.new_nodes, lambda n: n.name)
    by_array = partition(demand.new_nodes, lambda n: n.nodearray)
    by_pg = partition(demand.new_nodes, lambda n: n.placement_group)
    if set(by_pg.keys()) != set([None]):
        if set(by_pg.keys()) != set(pg_counts.keys()):
            assert False, "\n%s\n%s" % (
                [(x, len(y)) for x, y in by_pg.items()],
                pg_counts,
            )
        assert set(by_pg.keys()) == set(pg_counts.keys())
        assert not (bool(by_pg) ^ bool(pg_counts))

    if pg_counts:
        for pg_name, count in pg_counts.items():
            assert pg_name in by_pg
            assert (
                len(by_pg[pg_name]) == count
            ), "Expected pg {} to have {} nodes. Found {}. Full {}".format(
                pg_name,
                count,
                len(by_pg[pg_name]),
                [(x, len(y)) for x, y in by_pg.items()],
            )

        for pg_name in by_pg:
            assert pg_name in pg_counts

    for nodearray_name, count in array_counts.items():
        assert nodearray_name in by_array
        assert len(by_array[nodearray_name]) == count, [
            n.name for n in by_array[nodearray_name]
        ]

    for nodearray_name, node_list in by_array.items():
        assert nodearray_name in array_counts

    return dcalc
示例#5
0
    def scale_up() -> DemandCalculator:
        dcalc = new_demand_calculator(CONFIG)

        dcalc.add_job(
            Job("tc-100", {"node.nodearray": "htc", "ncpus": 1}, iterations=50)
        )

        demand_result = dcalc.finish()

        if not DRY_RUN:
            dcalc.bootup()

        print_demand(columns, demand_result)

        dcalc.node_history.conn.close()

        return dcalc
示例#6
0
    def scale_down(dcalc: typing.Optional[DemandCalculator]) -> None:
        dcalc = dcalc or new_demand_calculator(CONFIG)
        dcalc.add_job(
            Job("tc-50", {
                "node.nodearray": "htc",
                "ncpus": 1
            }, iterations=25))

        demand_result = dcalc.finish()

        if not DRY_RUN:
            dcalc.bootup()

        print_demand(columns, demand_result)

        print("The following nodes can be shutdown: {}".format(",".join(
            [n.name for n in demand_result.unmatched_nodes])))
示例#7
0
def print_demand(
    config: Dict,
    demand_result: DemandResult,
    output_columns: Optional[List[str]] = None,
    output_format: Optional[str] = None,
    log: bool = False,
) -> None:
    # and let's use the demand printer to print the demand_result.
    if not output_columns:
        output_columns = config.get(
            "output_columns",
            [
                "name",
                "hostname",
                "job_ids",
                "*hostgroups",
                "exists",
                "required",
                "managed",
                "slots",
                "*slots",
                "vm_size",
                "memory",
                "vcpu_count",
                "state",
                "placement_group",
                "create_time_remaining",
                "idle_time_remaining",
            ],
        )

    if "all" in output_columns:  # type: ignore
        output_columns = []

    output_format = output_format or "table"

    demandprinter.print_demand(
        output_columns,
        demand_result,
        output_format=output_format,
        log=log,
    )
    return demand_result
示例#8
0
def auto():
    CONFIG = json_load("/opt/cycle/scalelib/autoscale.json")

    MIN_CORE_COUNT = 4
    WARM_BUFFER = 2

    # Get hosts / tasks
    celery_d = celery_status()

    dcalc = demandcalculator.new_demand_calculator(
        CONFIG,
        existing_nodes=celery_d.scheduler_nodes,
        node_history=SQLiteNodeHistory())

    dcalc.add_jobs(celery_d.jobs)
    n_jobs = len(celery_d.jobs)
    n_add_jobs = max(n_jobs + WARM_BUFFER, max(n_jobs, MIN_CORE_COUNT))
    if n_add_jobs > 0:
        # RIGHT-SIZE based on Min Count and Buffer
        # It's possible that the padded jobs will float around extending the timer
        # but it seems like they're placed in some kind of normal order that's
        # preserved across autoscale runs
        print("add padding of %d jobs, to existing %d" % (n_add_jobs, n_jobs))
        dcalc.add_jobs(job_buffer(n_add_jobs))

    demand_result = dcalc.finish()
    output_columns = [
        "name", "hostname", "job_ids", "required", "slots", "vm_size",
        "vcpu_count", "state"
    ]

    print_demand(output_columns, demand_result)
    dcalc.bootup()
    delete_result = dcalc.find_unmatched_for(at_least=180)
    if delete_result:
        try:
            dcalc.delete(delete_result)
        except Exception as e:
            _exit_code = 1
            logging.warning(
                "Deletion failed, will retry on next iteration: %s", e)
            logging.exception(str(e))
示例#9
0
def target_counts_demand() -> None:
    """
    Handle a mixture of 'target count' style allocation of ncpus and nodes via the
    DemandCalculator.
    """
    dcalc = new_demand_calculator(CONFIG)

    # job requires 10 cores (ncpus)
    dcalc.add_job(
        Job(
            name="tc-10",
            constraints={"node.nodearray": "htc", "ncpus": 1, "exclusive": False},
            iterations=10,
        )
    )

    # job requires 10 nodes with 4 cores (ncpus)
    dcalc.add_job(
        Job(
            name="tn-10",
            constraints={"node.nodearray": "htc", "ncpus": 4, "exclusive": True},
            node_count=10,
        )
    )

    # 2 x 5 nodes
    dcalc.add_job(
        Job(
            name="tn-2x5",
            constraints={"node.nodearray": "htc", "ncpus": 2, "exclusive": True},
            node_count=5,
        ),
    )

    demand_result = dcalc.finish()

    if not DRY_RUN:
        dcalc.bootup()

    # note that /ncpus will display available/total. ncpus will display the total, and
    # *ncpus will display available.
    print_demand(["name", "job_ids", "nodearray", "/ncpus"], demand_result)
示例#10
0
def onprem_burst_demand() -> None:
    onprem001 = SchedulerNode("onprem001",
                              resources={
                                  "onprem": True,
                                  "nodetype": "A",
                                  "ncpus": 16
                              })
    onprem002 = SchedulerNode("onprem002",
                              resources={
                                  "onprem": True,
                                  "nodetype": "A",
                                  "ncpus": 32
                              })

    # onprem002 already has 10 cores occupied
    onprem002.available["ncpus"] -= 10

    dcalc = new_demand_calculator(CONFIG,
                                  existing_nodes=[onprem001, onprem002])
    dcalc.node_mgr.add_default_resource({"node.nodearray": ["htc", "htcspot"]},
                                        "nodetype", "A")
    assert [b for b in dcalc.node_mgr.get_buckets()
            if b.nodearray == "htc"][0].resources["nodetype"] == "A"
    dcalc.node_mgr.add_default_resource({}, "nodetype", "B")

    assert [b for b in dcalc.node_mgr.get_buckets()
            if b.nodearray == "htc"][0].resources["nodetype"] == "A"
    # we want 50 ncpus, but there are only 38 onpremise, so we need to burst
    # 12 more cores.
    dcalc.add_job(Job("tc-100", {"nodetype": "A", "ncpus": 1}, iterations=50))

    demand_result = dcalc.finish()

    if not DRY_RUN:
        dcalc.bootup()

    # also note we can add defaults to the column by adding a :, like
    # onprem:False, as this is only defined on the onprem nodes and not
    # on the Azure nodes.
    print_demand(["name", "job_ids", "nodetype", "onprem:False", "*ncpus"],
                 demand_result)