示例#1
0
def grow_ebs_for_task(task_fragment, target_size_gb):
    """Grows EBS volume for given task."""

    ec2 = u.create_ec2_resource()
    client = u.create_ec2_client()

    # todo: don't crash on missing/duplicate names
    instances = {u.get_name(i.tags): i for i in ec2.instances.all()}

    ec2 = u.create_ec2_resource()
    instances = [(u.seconds_from_datetime(i.launch_time), i)
                 for i in ec2.instances.all()]
    sorted_instances = sorted(instances, key=itemgetter(0))

    for (seconds, instance) in sorted_instances:
        task_name = u.get_name(instance.tags)
        hours_ago = (time.time() - seconds) / 3600
        hours_ago += 8  # adjust for time being in UTC

        if task_fragment in task_name:
            print("Found instance %s launched %.1f hours ago" %
                  (task_name, hours_ago))
            break
    print(instance.id)

    volumes = list(instance.volumes.all())
    assert len(volumes) == 1, "Must have 1 volume"

    print("Growing %s to %s" % (volumes[0].id, target_size_gb))
    response = client.modify_volume(
        VolumeId=volumes[0].id,
        Size=target_size_gb,
    )
    assert u.is_good_response(response)
示例#2
0
def main():
    ec2 = u.create_ec2_resource()  # ec2 resource
    ec2_client = u.create_ec2_client()  # ec2 client
    instances = list(ec2.instances.all())  # todo: use filter?
    region = u.get_region()

    instances_to_kill = []
    for i in instances:
        name = u.get_name(i.tags)
        state = i.state['Name']
        if not fragment in name:
            continue
        if args.skip_tensorboard and '.tb.' in name:
            continue
        if args.skip_stopped and state == 'stopped':
            continue
        if args.limit_to_key and not (USER_KEY_NAME in i.key_name):
            continue
        if state == 'terminated':
            continue
        instances_to_kill.append(i)
        print(u.get_name(i), i.instance_type, i.key_name,
              state if state == 'stopped' else '')

    # print extra info if couldn't find anything to kill
    if not instances_to_kill:
        valid_names = sorted(
            list(
                set("%s,%s" % (u.get_name(i), u.get_state(i))
                    for i in instances)))
        from pprint import pprint as pp
        print("Current instances:")
        pp(valid_names)
        print("No running instances found for: Name '%s', key '%s'" %
              (fragment, USER_KEY_NAME))
        if args.skip_tensorboard:
            print("skipping tensorboard")
        return

    action = 'soft terminate' if args.soft else 'terminate'
    if args.yes:
        answer = 'y'
    else:
        answer = input("%d instances found, %s in %s? (y/N) " %
                       (len(instances_to_kill), action, region))
    if not answer:
        answer = "n"
    if answer.lower() == "y" or args.yes:
        instance_ids = [i.id for i in instances_to_kill]
        if args.delay:
            print(f"Sleeping for {args.delay} seconds")
            time.sleep(args.delay)
        if args.soft:
            response = ec2_client.stop_instances(InstanceIds=instance_ids)
            print("soft terminating, got response: %s", response)
        else:
            response = ec2_client.terminate_instances(InstanceIds=instance_ids)
            print("terminating, got response: %s", response)
    else:
        print("Didn't get y, doing nothing")
示例#3
0
def list_spot_requests():
    ec2 = u.create_ec2_resource()
    client = u.create_ec2_client()
    for request in client.describe_spot_instance_requests(
    )['SpotInstanceRequests']:
        launch_spec = request['LaunchSpecification']
        print(request['SpotInstanceRequestId'], launch_spec['InstanceType'],
              launch_spec['KeyName'], request['State'])
示例#4
0
def main():
    fragment = args.fragment

    # TODO: prevent CTRL+c/CTRL+d from killing session
    if not args.skip_tmux:
        print("Launching into TMUX session, use CTRL+b d to exit")

    region = u.get_region()
    client = u.create_ec2_client()
    ec2 = u.create_ec2_resource()
    response = client.describe_instances()

    username = os.environ.get("USERNAME", "ubuntu")
    print("Using username '%s'" % (username, ))

    instance_list = []
    for instance in ec2.instances.all():
        if instance.state['Name'] != 'running':
            continue

        name = u.get_name(instance.tags)
        if (fragment in name or fragment in str(instance.public_ip_address)
                or fragment in str(instance.id)
                or fragment in str(instance.private_ip_address)):
            instance_list.append((u.toseconds(instance.launch_time), instance))

    from tzlocal import get_localzone  # $ pip install tzlocal

    filtered_instance_list = u.get_instances(fragment)
    if not filtered_instance_list:
        print("no instance id contains fragment '%s'" % (fragment, ))
        return

    # connect to most recent instance
    print(filtered_instance_list)
    instance = filtered_instance_list[0]
    print("Connecting to ", u.get_name(instance), " launched ",
          instance.launch_time.astimezone(get_localzone()))
    cmd = ''
    keypair_fn = u.get_keypair_fn()
    cmd = make_cmd(keypair_fn, username, instance.public_ip_address)

    print(cmd)
    result = os.system(cmd)
    if username == 'ubuntu':
        username = '******'
    elif username == 'ec2-user':
        username = '******'

    if result != 0:
        print("ssh failed with code %d, trying username %s" %
              (result, username))
    cmd = make_cmd(keypair_fn, username, instance.public_ip_address)
    os.system(cmd)
示例#5
0
def main():
    if len(sys.argv) < 2:
        mode = 'list'
    else:
        mode = sys.argv[1]

    if mode == 'list':
        list_vpcs()
    elif mode == 'delete':
        assert len(sys.argv) == 3

        assert 'AWS_DEFAULT_REGION' in os.environ
        client = u.create_ec2_client()
        ec2 = u.create_ec2_resource()
        response = client.describe_vpcs()
        for vpc_response in response['Vpcs']:
            vpc_name = _get_name(vpc_response.get('Tags', []))
            vpc = ec2.Vpc(vpc_response['VpcId'])
            if vpc_name == sys.argv[2] or vpc.id == sys.argv[2]:
                print("Deleting VPC name=%s, id=%s" % (vpc_name, vpc.id))

                for subnet in vpc.subnets.all():
                    print("Deleting subnet %s" % (subnet.id))
                    assert u.is_good_response(subnet.delete())

                for gateway in vpc.internet_gateways.all():
                    print("Deleting gateway %s" % (gateway.id))
                    assert u.is_good_response(
                        gateway.detach_from_vpc(VpcId=vpc.id))
                    assert u.is_good_response(gateway.delete())

                for security_group in vpc.security_groups.all():
                    try:
                        assert u.is_good_response(security_group.delete())
                    except Exception as e:
                        print("Failed with " + str(e))

                for route_table in vpc.route_tables.all():
                    print("Deleting route table %s" % (route_table.id))
                    try:
                        assert u.is_good_response(route_table.delete())
                    except Exception as e:
                        print("Failed with " + str(e))

                if u.is_good_response(client.delete_vpc(VpcId=vpc.id)):
                    print("Succeeded deleting VPC ", vpc.id)
示例#6
0
def cancel_spot_requests():
    ec2 = u.create_ec2_resource()
    client = u.create_ec2_client()
    for request in client.describe_spot_instance_requests(
    )['SpotInstanceRequests']:
        state = request['State']
        if state == 'cancelled' or state == 'closed':
            continue

        launch_spec = request['LaunchSpecification']

        print('cancelling', request['SpotInstanceRequestId'],
              launch_spec['InstanceType'], launch_spec['KeyName'],
              request['State'])

        client.cancel_spot_instance_requests(
            SpotInstanceRequestIds=[request['SpotInstanceRequestId']])
示例#7
0
    )
    print_response(inspect.getframeinfo(inspect.currentframe())[2], route)


def describe_route_tables(ec2_client):
    # https://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.Client.describe_route_tables
    response = ec2_client.describe_route_tables()
    print_response(inspect.getframeinfo(inspect.currentframe())[2], response)


if __name__ == '__main__':
    aws = {}
    # profileを使い分ける場合には、profileをセット
    session = boto3.Session(profile_name='my-profile')
    # 使用するクライアントとリソースを作成
    client = create_ec2_client(session)
    resource = create_ec2_resource(session)

    # VPCの作成と確認
    aws['vpc_id'] = create_vpc(client)
    add_vpc_name_tag(resource, aws['vpc_id'])
    describe_vpc(client)

    # サブネットの作成
    # アベイラビリティゾーンの確認
    zones = describe_availability_zones(client)
    # 最初のアベイラビリティゾーンを使用するアベイラビリティゾーンとする
    first_zone = zones['AvailabilityZones'][0]['ZoneName']
    print_response('first availability zone', first_zone)
    subnet = create_vpc_subnet(resource, aws['vpc_id'], first_zone, '192.168.1.0/24')
    aws['public_subnet_id'] = subnet.subnet_id
示例#8
0
    default='all',
    help=("which resources to delete, all/network/keypair/efs"))
parser.add_argument('--force-delete-efs',
                    action='store_true',
                    help="force deleting main EFS")

args = parser.parse_args()

EFS_NAME = u.get_resource_name()
VPC_NAME = u.get_resource_name()
SECURITY_GROUP_NAME = u.get_resource_name()
ROUTE_TABLE_NAME = u.get_resource_name()
KEYPAIR_NAME = u.get_keypair_name()
EFS_NAME = u.get_resource_name()

client = u.create_ec2_client()
ec2 = u.create_ec2_resource()


def response_type(response):
    return 'ok' if u.is_good_response(response) else 'failed'


def delete_efs():
    efss = u.get_efs_dict()
    efs_id = efss.get(EFS_NAME, '')
    efs_client = u.create_efs_client()
    if efs_id:
        try:
            # delete mount targets first
            print("About to delete %s (%s)" % (efs_id, EFS_NAME))
示例#9
0
def main():
    # TODO: also bring down all the instances and wait for them to come down
    region = os.environ['AWS_DEFAULT_REGION']
    if DEFAULT_NAME == 'nexus':
        print("Nexus resources are protected, don't delete them")
        sys.exit()

    print("Deleting %s resources in region %s" % (
        DEFAULT_NAME,
        region,
    ))
    existing_vpcs = u.get_vpc_dict()
    client = u.create_ec2_client()
    ec2 = u.create_ec2_resource()

    def response_type(response):
        return 'ok' if u.is_good_response(response) else 'failed'

    # delete EFS
    efss = u.get_efs_dict()
    efs_id = efss.get(DEFAULT_NAME, '')
    efs_client = u.create_efs_client()
    if efs_id:
        try:
            # delete mount targets first
            print("About to delete %s (%s)" % (efs_id, DEFAULT_NAME))
            response = efs_client.describe_mount_targets(FileSystemId=efs_id)
            assert u.is_good_response(response)
            for mount_response in response['MountTargets']:
                subnet = ec2.Subnet(mount_response['SubnetId'])
                zone = subnet.availability_zone
                state = mount_response['LifeCycleState']
                id = mount_response['MountTargetId']
                ip = mount_response['IpAddress']
                sys.stdout.write('Deleting mount target %s ... ' % (id, ))
                sys.stdout.flush()
                response = efs_client.delete_mount_target(MountTargetId=id)
                print(response_type(response))

            sys.stdout.write('Deleting EFS %s (%s)... ' %
                             (efs_id, DEFAULT_NAME))
            sys.stdout.flush()
            u.delete_efs_id(efs_id)

        except Exception as e:
            sys.stdout.write('failed\n')
            u.loge(str(e) + '\n')

    if VPC_NAME in existing_vpcs:
        vpc = ec2.Vpc(existing_vpcs[VPC_NAME].id)
        print("Deleting VPC %s (%s) subresources:" % (VPC_NAME, vpc.id))

        for subnet in vpc.subnets.all():
            try:
                sys.stdout.write("Deleting subnet %s ... " % (subnet.id))
                sys.stdout.write(response_type(subnet.delete()) + '\n')
            except Exception as e:
                sys.stdout.write('failed\n')
                u.loge(str(e) + '\n')

        for gateway in vpc.internet_gateways.all():
            sys.stdout.write("Deleting gateway %s ... " % (gateway.id))
            # todo: if instances are using VPC, this fails with
            # botocore.exceptions.ClientError: An error occurred (DependencyViolation) when calling the DetachInternetGateway operation: Network vpc-ca4abab3 has some mapped public address(es). Please unmap those public address(es) before detaching the gateway.

            sys.stdout.write('detached ... ' if u.is_good_response(
                gateway.detach_from_vpc(VpcId=vpc.id)) else ' detach_failed ')
            sys.stdout.write('deleted ' if u.is_good_response(gateway.delete(
            )) else ' delete_failed ')
            sys.stdout.write('\n')

        def desc(route_table):
            return "%s (%s)" % (route_table.id, u.get_name(route_table.tags))

        for route_table in vpc.route_tables.all():
            sys.stdout.write("Deleting route table %s ... " %
                             (desc(route_table)))
            try:
                sys.stdout.write(response_type(route_table.delete()) + '\n')
            except Exception as e:
                sys.stdout.write('failed\n')
                u.loge(str(e) + '\n')

        def desc(security_group):
            return "%s (%s, %s)" % (security_group.id,
                                    u.get_name(security_group.tags),
                                    security_group.group_name)

        # TODO: this tries to remove default security group, maybe not remove it?
        for security_group in vpc.security_groups.all():
            sys.stdout.write('Deleting security group %s ... ' %
                             (desc(security_group)))
            try:
                sys.stdout.write(response_type(security_group.delete()) + '\n')
            except Exception as e:
                sys.stdout.write('failed\n')
                u.loge(str(e) + '\n')

        sys.stdout.write("Deleting VPC %s ... " % (vpc.id))
        sys.stdout.write(response_type(vpc.delete()) + '\n')

    # delete keypair
    keypairs = u.get_keypair_dict()
    keypair = keypairs.get(DEFAULT_NAME, '')
    if keypair:
        try:
            sys.stdout.write("Deleting keypair %s (%s) ... " %
                             (keypair.key_name, DEFAULT_NAME))
            sys.stdout.write(response_type(keypair.delete()) + '\n')
        except Exception as e:
            sys.stdout.write('failed\n')
            u.loge(str(e) + '\n')

    keypair_fn = u.get_keypair_fn(KEYPAIR_NAME)
    if os.path.exists(keypair_fn):
        print("Deleting local keypair file %s" % (keypair_fn, ))
        os.system('rm -f ' + keypair_fn)