def decorated(*args, **kwargs): try: CONFIG.assert_required_config_exists(*cases) return func(*args, **kwargs) except AssertionError as e: print(e) sys.exit(1) except FileNotFoundError as e: # Happens when nimbo config file is not found print(e) sys.exit(1)
def list_buckets(): s3 = CONFIG.get_session().client("s3") response = s3.list_buckets() print("Existing buckets:") for bucket in response["Buckets"]: print(f' {bucket["Name"]}')
def create_instance_profile_and_role(dry_run=False): iam = CONFIG.get_session().client("iam") role_name = "NimboS3AndEC2FullAccess" instance_profile_name = "NimboInstanceProfile" policy = { "Version": "2012-10-17", "Statement": { "Effect": "Allow", "Action": "sts:AssumeRole", "Principal": { "Service": "ec2.amazonaws.com" }, }, } if dry_run: return iam.create_role(RoleName=role_name, AssumeRolePolicyDocument=json.dumps(policy)) iam.attach_role_policy( PolicyArn="arn:aws:iam::aws:policy/AmazonS3FullAccess", RoleName=role_name) iam.attach_role_policy( PolicyArn="arn:aws:iam::aws:policy/AmazonEC2FullAccess", RoleName=role_name) iam.create_instance_profile(InstanceProfileName=instance_profile_name, Path="/") iam.add_role_to_instance_profile(InstanceProfileName=instance_profile_name, RoleName=role_name)
def verify_nimbo_instance_profile(dry_run=False): iam = CONFIG.get_session().client("iam") if dry_run: return response = iam.list_instance_profiles() instance_profiles = response["InstanceProfiles"] instance_profile_names = [ p["InstanceProfileName"] for p in instance_profiles ] if "NimboInstanceProfiles" not in instance_profile_names: raise Exception( textwrap.dedent( """Instance profile 'NimboInstanceProfile' not found. An instance profile is necessary to give your instance access to EC2 and S3 resources. You can create an instance profile using 'nimbo create_instance_profile <role_name>'. If you are a root user, you can simply run 'nimbo create_instance_profile_and_role', and nimbo will create the necessary role policies and instance profile for you. Otherwise, please ask your admin for a role that provides the necessary EC2 and S3 read/write access. For more details please go to docs.nimbo.sh/instance-profiles." """))
def ls_spot_gpu_prices(dry_run=False) -> None: if dry_run: return ec2 = CONFIG.get_session().client("ec2") string = AwsUtils._format_price_string("InstanceType", "Price ($/hour)", "GPUs", "CPUs", "Mem (Gb)") print() nprint(string, style="bold") for instance_type in AwsUtils._instance_types(): response = ec2.describe_spot_price_history( InstanceTypes=[instance_type], Filters=[{ "Name": "product-description", "Values": ["Linux/UNIX"] }], ) price = float(response["SpotPriceHistory"][0]["SpotPrice"]) num_gpus, gpu_type, mem, cpus = INSTANCE_GPU_MAP[instance_type] string = AwsUtils._format_price_string(instance_type, round(price, 2), f"{num_gpus} x {gpu_type}", cpus, mem) print(string) print()
def list_spot_gpu_prices(dry_run=False): if dry_run: return instance_types = list(sorted(ec2_instance_types())) instance_types = [ inst for inst in instance_types if inst[:2] in ["p2", "p3", "p4"] or inst[:3] in ["g4d"] ] ec2 = CONFIG.get_session().client("ec2") string = format_price_string("InstanceType", "Price ($/hour)", "GPUs", "CPUs", "Mem (Gb)") print(string) for instance_type in instance_types: response = ec2.describe_spot_price_history( InstanceTypes=[instance_type], Filters=[{ "Name": "product-description", "Values": ["Linux/UNIX"] }], ) price = float(response["SpotPriceHistory"][0]["SpotPrice"]) num_gpus, gpu_type, mem, cpus = INSTANCE_GPU_MAP[instance_type] string = format_price_string(instance_type, round(price, 2), f"{num_gpus} x {gpu_type}", cpus, mem) print(string)
def list_instance_profiles(dry_run=False): iam = CONFIG.get_session().client("iam") if dry_run: return response = iam.list_instance_profiles() pprint(response["InstanceProfiles"])
def stop_instance(instance_id, dry_run=False): ec2 = CONFIG.get_session().client("ec2") try: response = ec2.stop_instances(InstanceIds=[instance_id], DryRun=dry_run) pprint(response) except ClientError as e: if "DryRunOperation" not in str(e): raise
def _setup_for_case(*cases: RequiredCase) -> None: """ Used within CliRunner().isolated_filesystem() for copying what is needed from the test assets folder to the filesystem and for injecting the testing configuration needed for this particular case """ cases = RequiredCase.decompose(*cases) if RequiredCase.MINIMAL in cases: _copy_assets(AssetType.NIMBO_CONFIG) if RequiredCase.INSTANCE in cases: _copy_assets(AssetType.INSTANCE_KEYS) if RequiredCase.JOB in cases: _copy_assets(AssetType.CONDA_ENV) CONFIG.reset_required_config() CONFIG.inject_required_config(*cases)
def delete_instance(instance_id, dry_run=False): ec2 = CONFIG.get_session().client("ec2") try: response = ec2.terminate_instances(InstanceIds=[instance_id], DryRun=dry_run) status = response["TerminatingInstances"][0]["CurrentState"]["Name"] print(f"Instance {instance_id}: {status}") except ClientError as e: if "DryRunOperation" not in str(e): raise
def delete_instance(instance_id: str, dry_run=False) -> None: ec2 = CONFIG.get_session().client("ec2") try: response = ec2.terminate_instances(InstanceIds=[instance_id], DryRun=dry_run) status = response["TerminatingInstances"][0]["CurrentState"][ "Name"] nprint_header(f"Instance [green]{instance_id}[/green]: {status}") except botocore.exceptions.ClientError as e: if "DryRunOperation" not in str(e): raise
def record_event(cmd): if not CONFIG.telemetry: return else: if not CONFIG.user_id: CONFIG.get_session() now = datetime.now() date_time = now.strftime("%Y-%m-%d-%H-%M-%S") data = { "user_id": CONFIG.user_id, "user_arn": CONFIG.user_arn, "cmd": cmd, "date": date_time, } try: requests.post(CONFIG.telemetry_url, data=json.dumps(data), timeout=2) except BaseException: pass
def check_instance_status(instance_id, dry_run=False): ec2 = CONFIG.get_session().client("ec2") try: response = ec2.describe_instances(InstanceIds=[instance_id], Filters=make_instance_filters(), DryRun=dry_run) status = response["Reservations"][0]["Instances"][0]["State"]["Name"] return status except ClientError as e: if "DryRunOperation" not in str(e): raise
def create_instance_profile(role_name, dry_run=False): iam = CONFIG.get_session().client("iam") instance_profile_name = "NimboInstanceProfile" if dry_run: return iam.create_instance_profile(InstanceProfileName=instance_profile_name, Path="/") iam.add_role_to_instance_profile(InstanceProfileName=instance_profile_name, RoleName=role_name)
def list_snapshots(): # Retrieve the list of existing buckets ec2 = CONFIG.get_session().client("ec2") response = ec2.describe_snapshots( Filters=[{ "Name": "tag:created_by", "Values": ["nimbo"] }], MaxResults=100, ) return list(sorted(response["Snapshots"], key=lambda x: x["StartTime"]))
def ec2_instance_types(): """Yield all available EC2 instance types in region CONFIG.region_name""" describe_args = {} client = CONFIG.get_session().client("ec2") while True: describe_result = client.describe_instance_types(**describe_args) yield from [ i["InstanceType"] for i in describe_result["InstanceTypes"] ] if "NextToken" not in describe_result: break describe_args["NextToken"] = describe_result["NextToken"]
def get_instance_status(instance_id: str, dry_run=False) -> str: ec2 = CONFIG.get_session().client("ec2") try: response = ec2.describe_instances( InstanceIds=[instance_id], Filters=AwsInstance._make_instance_filters(), DryRun=dry_run, ) status = response["Reservations"][0]["Instances"][0]["State"][ "Name"] return status except botocore.exceptions.ClientError as e: if "DryRunOperation" not in str(e): raise
def check_instance_host(instance_id, dry_run=False): ec2 = CONFIG.get_session().client("ec2") try: response = ec2.describe_instances( InstanceIds=[instance_id], Filters=make_instance_filters(), DryRun=dry_run, ) host = response["Reservations"][0]["Instances"][0]["PublicIpAddress"] except ClientError as e: if "DryRunOperation" not in str(e): raise host = "random_host" return host
def _get_host_from_instance_id(instance_id: str, dry_run=False) -> str: ec2 = CONFIG.get_session().client("ec2") try: response = ec2.describe_instances( InstanceIds=[instance_id], Filters=AwsInstance._make_instance_filters(), DryRun=dry_run, ) host = response["Reservations"][0]["Instances"][0][ "PublicIpAddress"] except botocore.exceptions.ClientError as e: if "DryRunOperation" not in str(e): raise host = "" return host
def run_commands_on_instance(commands, instance_ids): """Runs commands on remote linux instances :param commands: a list of strings, each one a command to execute on the instances :param instance_ids: a list of instance_id strings, of the instances on which to execute the command :return: the response from the send_command function (check the boto3 docs for ssm client.send_command() ) """ ssm = CONFIG.get_session().client("ssm") resp = ssm.send_command( DocumentName="AWS-RunShellScript", # One of AWS' preconfigured documents Parameters={"commands": commands}, InstanceIds=instance_ids, ) return resp
def create_security_group(group_name, dry_run=False): ec2 = CONFIG.get_session().client("ec2") response = ec2.describe_vpcs() vpc_id = response.get("Vpcs", [{}])[0].get("VpcId", "") response = ec2.create_security_group( GroupName=group_name, Description="Base VPC security group for Nimbo jobs.", VpcId=vpc_id, DryRun=dry_run, ) security_group_id = response["GroupId"] print( f"Security Group {group_name} (id={security_group_id}) Created in vpc {vpc_id}." )
def show_stopped_instances(dry_run=False): ec2 = CONFIG.get_session().client("ec2") try: response = ec2.describe_instances( Filters=[{ "Name": "instance-state-name", "Values": ["stopped", "stopping"] }] + make_instance_filters(), DryRun=dry_run, ) for reservation in response["Reservations"]: for inst in reservation["Instances"]: print(f"ID: {inst['InstanceId']}\n" f"Launch Time: {inst['LaunchTime']}\n" f"InstanceType: {inst['InstanceType']}\n") except ClientError as e: if "DryRunOperation" not in str(e): raise
def _instance_types() -> Generator[str, None, None]: """Yield all relevant EC2 instance types in region CONFIG.region_name""" describe_args = {} client = CONFIG.get_session().client("ec2") def instance_type_generator(): while True: describe_result = client.describe_instance_types( **describe_args) yield from (i["InstanceType"] for i in describe_result["InstanceTypes"]) if "NextToken" not in describe_result: break describe_args["NextToken"] = describe_result["NextToken"] return (inst for inst in sorted(instance_type_generator()) if inst.startswith(("p2", "p3", "p4", "g4d")))
def allow_ingress_current_ip(target: str, dry_run=False) -> None: ec2 = CONFIG.get_session().client("ec2") try: response = ec2.describe_security_groups(GroupNames=[target], DryRun=dry_run) security_group_id = response["SecurityGroups"][0]["GroupId"] except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] == "InvalidGroup.NotFound": nprint( f"Security group {target} not found. Please use an existing" " security group or create a new one in the AWS console.", style="error", ) sys.exit(1) elif e.response["Error"]["Code"] == "UnauthorizedOperation": return else: raise my_public_ip = requests.get( "https://checkip.amazonaws.com").text.strip() try: ec2.authorize_security_group_ingress( GroupId=security_group_id, IpPermissions=[{ "IpProtocol": "tcp", "FromPort": 22, "ToPort": 22, "IpRanges": [{ "CidrIp": f"{my_public_ip}/16" }], }], ) except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] == "InvalidPermission.Duplicate": return elif e.response["Error"]["Code"] == "UnauthorizedOperation": return else: raise
def upload_file(file_name, bucket, object_name=None): """Upload a file to an S3 bucket :param file_name: File to upload :param bucket: Bucket to upload to :param object_name: S3 object name. If not specified then file_name is used :return: True if file was uploaded, else False """ # If S3 object_name was not specified, use file_name if object_name is None: object_name = file_name # Upload the file s3 = CONFIG.get_session().client("s3") try: s3.upload_file(file_name, bucket, object_name) except ClientError as e: logging.error(e) return False return True
def show_active_instances(dry_run=False): ec2 = CONFIG.get_session().client("ec2") try: response = ec2.describe_instances( Filters=[{ "Name": "instance-state-name", "Values": ["running", "pending"] }] + make_instance_filters(), DryRun=dry_run, ) for reservation in response["Reservations"]: for inst in reservation["Instances"]: print(f"Id: {inst['InstanceId']}\n" f"Status: {inst['State']['Name']}\n" f"Launch Time: {inst['LaunchTime']}\n" f"InstanceType: {inst['InstanceType']}\n" f"IP Address: {inst['PublicIpAddress']}\n") except ClientError as e: if "DryRunOperation" not in str(e): raise
def delete_all_instances(dry_run=False): ec2 = CONFIG.get_session().client("ec2") try: response = ec2.describe_instances( Filters=[{ "Name": "instance-state-name", "Values": ["running"] }] + make_instance_filters(), DryRun=dry_run, ) for reservation in response["Reservations"]: for inst in reservation["Instances"]: instance_id = inst["InstanceId"] delete_response = ec2.terminate_instances( InstanceIds=[instance_id], ) status = delete_response["TerminatingInstances"][0][ "CurrentState"]["Name"] print(f"Instance {instance_id}: {status}") except ClientError as e: if "DryRunOperation" not in str(e): raise
def create_bucket(bucket_name, dry_run=False): """Create an S3 bucket in a specified region :param bucket_name: Bucket to create :param dry_run :return: True if bucket created, else False """ try: session = CONFIG.get_session() s3 = session.client("s3") location = {"LocationConstraint": session.region_name} s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration=location) except ClientError as e: if e.response["Error"]["Code"] == "BucketAlreadyOwnedByYou": print("Bucket nimbo-main-bucket already exists.") else: logging.error(e) return False print("Bucket %s created." % bucket_name) return True
def mk_bucket(bucket_name: str, dry_run=False) -> None: """Create an S3 bucket in a specified region :param bucket_name: Bucket to create :param dry_run :return: True if bucket created, else False """ try: session = CONFIG.get_session() s3 = session.client("s3") location = {"LocationConstraint": session.region_name} s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration=location) except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] == "BucketAlreadyOwnedByYou": nprint("Bucket nimbo-main-bucket already exists.", style="warning") else: nprint(e, style="error") return print("Bucket %s created." % bucket_name)
def allow_inbound_current_ip(group_name, dry_run=False): ec2 = CONFIG.get_session().client("ec2") # Get the security group id response = ec2.describe_security_groups(GroupNames=[group_name], DryRun=dry_run) security_group_id = response["SecurityGroups"][0]["GroupId"] my_public_ip = requests.get("https://checkip.amazonaws.com").text.strip() response = ec2.authorize_security_group_ingress( GroupId=security_group_id, IpPermissions=[{ "IpProtocol": "tcp", "FromPort": 22, "ToPort": 22, "IpRanges": [{ "CidrIp": f"{my_public_ip}/16" }], }], ) print("Ingress Successfully Set") pprint(response)