def run(args: argparse.Namespace) -> None: check_required_vars() ists = list(boto3.resource("ec2").instances.filter(InstanceIds=args.instances)) for i in ists: i.terminate() print("Destroyed instances:") print_instances(ists, args.output_format)
def configure_parser(parser: argparse.ArgumentParser) -> None: check_required_vars() parser.add_argument("instance", help="The ID of the instance to connect to"), parser.add_argument("command", nargs="*", help="The command to run via SSH, if any")
def configure_parser(parser: argparse.ArgumentParser) -> None: check_required_vars() parser.add_argument("instance", help="The ID of the instance to connect to"), parser.add_argument("--rev", help="The git rev to checkout", default="HEAD"),
def configure_parser(parser: argparse.ArgumentParser) -> None: check_required_vars() parser.add_argument( "who", nargs="*", help="Whose instances to show (defaults to yourself)", default=[whoami()], ) parser.add_argument("--all", help="Show all instances", action="store_true") parser.add_argument("--output-format", choices=["table", "csv"], default="table")
def run(args: argparse.Namespace) -> None: extra_tags = {} if args.extra_tags: extra_tags = json.loads(args.extra_tags) if not isinstance(extra_tags, dict) or not all( isinstance(k, str) and isinstance(v, str) for k, v in extra_tags.items()): raise RuntimeError( "extra-tags must be a JSON dictionary of strings to strings") check_required_vars() extra_tags["LaunchedBy"] = whoami() if args.machine: with open(ROOT / "misc" / "scratch" / "{}.json".format(args.machine)) as f: print("Reading machine configs from {}".format(f.name)) descs = [ MachineDesc.parse_obj(obj) for obj in multi_json(f.read()) ] else: print("Reading machine configs from stdin...") descs = [ MachineDesc.parse_obj(obj) for obj in multi_json(sys.stdin.read()) ] if args.ssh and len(descs) != 1: raise RuntimeError("Cannot use `--ssh` with {} instances".format( len(descs))) if args.max_age_days <= 0: raise RuntimeError( f"max_age_days must be positive, got {args.max_age_days}") max_age = datetime.timedelta(days=args.max_age_days) instances = launch_cluster( descs, subnet_id=args.subnet_id, key_name=args.key_name, security_group_id=args.security_group_id, instance_profile=args.instance_profile, extra_tags=extra_tags, delete_after=datetime.datetime.utcnow() + max_age, git_rev=args.git_rev, extra_env={}, ) print("Launched instances:") print_instances(instances, args.output_format) if args.ssh: print("ssh-ing into: {}".format(instances[0].instance_id)) mssh(instances[0], "")
def run(args: argparse.Namespace) -> None: instance_profile = None if args.no_instance_profile else args.instance_profile extra_tags = {} if args.extra_tags: extra_tags = json.loads(args.extra_tags) if not isinstance(extra_tags, dict) or not all( isinstance(k, str) and isinstance(v, str) for k, v in extra_tags.items() ): raise RuntimeError( "extra-tags must be a JSON dictionary of strings to strings" ) check_required_vars() extra_tags["LaunchedBy"] = whoami() descs = [ MachineDesc( name=obj["name"], launch_script=obj.get("launch_script"), instance_type=obj["instance_type"], ami=obj["ami"], tags=obj.get("tags", dict()), size_gb=obj["size_gb"], checkout=obj.get("checkout", True), ) for obj in multi_json(sys.stdin.read()) ] nonce = util.nonce(8) delete_after = now_plus(MAX_AGE) instances = launch_cluster( descs, nonce, args.subnet_id, args.key_name, args.security_group_id, instance_profile, extra_tags, delete_after, args.git_rev, extra_env={}, ) print("Launched instances:") print_instances(instances, args.output_format)
def run(args: argparse.Namespace) -> None: check_required_vars() instance_ids = [] filters: List[FilterTypeDef] = [{ "Name": "instance-state-name", "Values": ["pending", "running", "stopping", "stopped"], }] if args.all_mine: if args.instances: print( "scratch: error: cannot specify --all-mine and instance IDs", file=sys.stderr, ) sys.exit(1) filters.append({"Name": "tag:LaunchedBy", "Values": [whoami()]}) elif not args.instances: print( "scratch: error: must supply at least one instance ID to destroy", file=sys.stderr, ) sys.exit(1) else: instance_ids.extend(args.instances) instances = list( boto3.resource("ec2").instances.filter(Filters=filters, InstanceIds=instance_ids)) print("Destroying instances:") print_instances(instances, args.output_format) if not args.yes and not ui.confirm("Would you like to continue?"): sys.exit(0) for instance in instances: instance.terminate() print("Instances destroyed.")
def configure_parser(parser: argparse.ArgumentParser) -> None: check_required_vars()
def configure_parser(parser: argparse.ArgumentParser) -> None: check_required_vars() parser.add_argument("instance", help="The ID of the instance to connect to"),
def start(ns: argparse.Namespace) -> None: check_required_vars() revs = ns.revs.split(",") clusters = list( itertools.product(range(ns.trials), (git.rev_parse(rev) for rev in revs))) bench_script = ns.bench_script script_name = bench_script[0] script_args = " ".join((shlex.quote(arg) for arg in bench_script[1:])) # zip up the `misc` repository, for shipment to the remote machine os.chdir("misc/python") spawn.runv(["python3", "./setup.py", "sdist"]) with open("./dist/materialize-0.0.0.tar.gz", "rb") as f: pkg_data = f.read() os.chdir(os.environ["MZ_ROOT"]) if ns.append_metadata: munge_result = 'awk \'{ if (NR == 1) { print $0 ",Timestamp,BenchId,ClusterId,GitRef,S3Root" } else { print $0 ",\'$(date +%s)",$MZ_CB_BENCH_ID,$MZ_CB_CLUSTER_ID,$MZ_CB_GIT_REV,$MZ_CB_S3_ROOT"\'"}}\'' else: munge_result = "cat" mz_launch_script = f"""echo {shlex.quote(base64.b64encode(pkg_data).decode('utf-8'))} | base64 -d > mz.tar.gz python3 -m venv /tmp/mzenv >&2 . /tmp/mzenv/bin/activate >&2 python3 -m pip install --upgrade pip >&2 pip3 install ./mz.tar.gz[dev] >&2 MZ_ROOT=/home/ubuntu/materialize python3 -u -m {script_name} {script_args} result=$? echo $result > ~/bench_exit_code if [ $result -eq 0 ]; then {munge_result} < ~/mzscratch-startup.out | aws s3 cp - s3://{ns.s3_root}/$MZ_CB_BENCH_ID/$MZ_CB_CLUSTER_ID.csv >&2 else aws s3 cp - s3://{ns.s3_root}/$MZ_CB_BENCH_ID/$MZ_CB_CLUSTER_ID-FAILURE.out < ~/mzscratch-startup.out >&2 aws s3 cp - s3://{ns.s3_root}/$MZ_CB_BENCH_ID/$MZ_CB_CLUSTER_ID-FAILURE.err < ~/mzscratch-startup.err fi sudo shutdown -h now # save some money """ if ns.profile == "basic": descs = [ scratch.MachineDesc( name="materialized", launch_script=mz_launch_script, instance_type="r5a.4xlarge", ami="ami-0b29b6e62f2343b46", tags={}, size_gb=64, ), ] elif ns.profile == "confluent": confluent_launch_script = f"""bin/mzcompose --mz-find load-tests up""" descs = [ scratch.MachineDesc( name="materialized", launch_script=mz_launch_script, instance_type="r5a.4xlarge", ami="ami-0b29b6e62f2343b46", tags={}, size_gb=64, ), scratch.MachineDesc( name="confluent", launch_script=confluent_launch_script, instance_type="r5a.4xlarge", ami="ami-0b29b6e62f2343b46", tags={}, size_gb=1000, checkout=False, ), ] else: raise RuntimeError(f"Profile {ns.profile} is not implemented yet") bench_id = util.nonce(8) manifest_bytes = "".join(f"{i}-{rev}\n" for i, rev in clusters).encode("utf-8") boto3.client("s3").put_object(Body=manifest_bytes, Bucket="mz-cloudbench", Key=f"{bench_id}/MANIFEST") # TODO - Do these in parallel launched = [] for (i, rev) in clusters: launched += scratch.launch_cluster( descs=descs, nonce=f"{bench_id}-{i}-{rev}", subnet_id=DEFAULT_SUBNET_ID, security_group_id=DEFAULT_SG_ID, instance_profile=DEFAULT_INSTPROF_NAME, key_name=None, extra_tags={ "bench_id": bench_id, "bench_rev": rev, "bench_i": str(i), "LaunchedBy": scratch.whoami(), }, extra_env={ "MZ_CB_BENCH_ID": bench_id, "MZ_CB_CLUSTER_ID": f"{i}-{rev}", "MZ_CB_GIT_REV": rev, "MZ_CB_S3_ROOT": ns.s3_root, }, delete_after=scratch.now_plus(timedelta(days=1)), git_rev=rev, ) print("Launched instances:") print_instances(launched, format="table") # todo print(f"""Launched cloud bench with ID {bench_id}. To wait for results, run: bin/cloudbench check {bench_id}""")
def check(ns: argparse.Namespace) -> None: check_required_vars() bench_id = ns.bench_id[0] manifest = (boto3.client("s3").get_object( Bucket=ns.s3_root, Key=f"{bench_id}/MANIFEST")["Body"].read().decode("utf-8").strip()) insts = manifest.split("\n") if not insts: raise RuntimeError(f"No instances found for bench ID {bench_id}") results: List[Optional[Union[BenchSuccessResult, BenchFailureLogs]]] = [None for _ in insts] not_done = list(range(len(results))) while not_done: for i in not_done: maybe_result = try_get_object(f"{bench_id}/{insts[i]}.csv", ns.s3_root) if maybe_result is None: maybe_out = try_get_object( f"{bench_id}/{insts[i]}-FAILURE.out", ns.s3_root) maybe_err = try_get_object( f"{bench_id}/{insts[i]}-FAILURE.err", ns.s3_root) if (maybe_out is None) or (maybe_err is None): continue results[i] = BenchFailureLogs(stdout=maybe_out, stderr=maybe_err) else: results[i] = BenchSuccessResult(stdout=maybe_result) not_done = [i for i in not_done if not results[i]] if not_done: print("Benchmark not done; waiting 60 seconds", file=sys.stderr) time.sleep(60) for r in results: assert isinstance(r, BenchSuccessResult) or isinstance( r, BenchFailureLogs) done_results = cast(List[Union[BenchFailureLogs, BenchSuccessResult]], results) failed = [(i, r) for i, r in enumerate(done_results) if isinstance(r, BenchFailureLogs)] if failed: for i, f in failed: print( f"Run of instance {insts[i]} failed, stdout:\n{f.stdout}stderr:\n{f.stderr}", file=sys.stderr, ) raise RuntimeError(f"{len(failed)} runs FAILED!") good_results = cast(List[BenchSuccessResult], done_results) readers = [ csv.DictReader(f"{line}\n" for line in r.stdout.split("\n")) for r in good_results ] csv_results = ((d.values() for d in r) for r in readers) for r in readers: assert isinstance(r.fieldnames, list) for fn in r.fieldnames: assert isinstance(fn, str) headers = set(tuple(cast(List[str], r.fieldnames)) for r in readers) if len(headers) > 1: raise RuntimeError("Mismatched headers") w = csv.writer(sys.stdout) w.writerow( cast(List[str], readers[0].fieldnames) + ["InstanceIndex", "Rev", "Trial"]) for inst, r in zip(insts, csv_results): components = inst.split("-") for i, entry in enumerate(r): w.writerow( itertools.chain(entry, (components[0], components[1], i)))