예제 #1
0
def keypair_setup():
  """Creates keypair if necessary, saves private key locally, returns contents
  of private key file."""
  
  
  existing_keypairs = u.get_keypair_dict()
  keypair = existing_keypairs.get(KEYPAIR_NAME, None)
  keypair_fn = u.get_keypair_fn(KEYPAIR_NAME)
  if keypair:
    print("Reusing keypair "+KEYPAIR_NAME)
    # check that local pem file exists and is readable
    assert os.path.exists(keypair_fn)
    keypair_contents = open(keypair_fn).read()
    assert len(keypair_contents)>0
    # todo: check that fingerprint matches keypair.key_fingerprint
    return keypair
  
  print("Creating keypair "+KEYPAIR_NAME)
  ec2 = u.create_ec2_resource()
  keypair = ec2.create_key_pair(KeyName=KEYPAIR_NAME)
  assert not os.path.exists(keypair_fn), "previous, keypair exists, delete it with 'sudo rm %s'"%(keypair_fn)
  
  open(keypair_fn, 'w').write(keypair.key_material)
  os.system('chmod 400 '+keypair_fn)
  return keypair
예제 #2
0
def keypair_setup():
    """Creates keypair if necessary, saves private key locally, returns contents
  of private key file."""

    os.system('mkdir -p ' + u.PRIVATE_KEY_LOCATION)

    keypair = u.get_keypair_dict().get(KEYPAIR_NAME, None)
    keypair_fn = u.get_keypair_fn()
    if keypair:
        print("Reusing keypair " + KEYPAIR_NAME)
        # check that local pem file exists and is readable
        assert os.path.exists(
            keypair_fn
        ), "Keypair %s exists, but corresponding .pem file %s is not found, delete keypair %s through console and run again to recreate keypair/.pem together" % (
            KEYPAIR_NAME, keypair_fn, KEYPAIR_NAME)
        keypair_contents = open(keypair_fn).read()
        assert len(keypair_contents) > 0
        # todo: check that fingerprint matches keypair.key_fingerprint
    else:
        print("Creating keypair " + KEYPAIR_NAME)
        ec2 = u.create_ec2_resource()
        assert not os.path.exists(
            keypair_fn
        ), "previous keypair exists, delete it with 'sudo rm %s' and also delete corresponding keypair through console" % (
            keypair_fn)
        keypair = ec2.create_key_pair(KeyName=KEYPAIR_NAME)

        open(keypair_fn, 'w').write(keypair.key_material)
        os.system('chmod 400 ' + keypair_fn)

    return keypair
예제 #3
0
파일: connect.py 프로젝트: sgugger/cluster
def main():
  fragment = args.fragment

  # TODO: prevent CTRL+c/CTRL+d from killing session
  if not args.skip_tmux:
    print("Launching into TMUX session, use CTRL+b d to exit")

  region = os.environ['AWS_DEFAULT_REGION']
  client = boto3.client('ec2', region_name=region)
  ec2 = boto3.resource('ec2', region_name=region)
  response = client.describe_instances()

  username = os.environ.get("USERNAME", "ubuntu")
  print("Using username '%s'"%(username,))
    
  instance_list = []
  for instance in ec2.instances.all():
    if instance.state['Name'] != 'running':
      continue
    
    name = u.get_name(instance.tags)
    if (fragment in name or fragment in instance.public_ip_address or
        fragment in instance.id or fragment in instance.private_ip_address):
      instance_list.append((toseconds(instance.launch_time), instance))
      
  import pytz
  from tzlocal import get_localzone # $ pip install tzlocal

  sorted_instance_list = reversed(sorted(instance_list, key=itemgetter(0)))
  cmd = ''
  print("Using region ", region)
  for (ts, instance) in sorted_instance_list:
    localtime = instance.launch_time.astimezone(get_localzone())
    assert instance.key_name == u.get_keypair_name(), "Got key %s, expected %s"%(instance.key_name, u.get_keypair_name())
    keypair_fn = u.get_keypair_fn(instance.key_name)

    print("Found to %s in %s launched at %s with key %s" % (u.get_name(instance.tags), region, localtime, instance.key_name))

    cmd = make_cmd(keypair_fn, username, instance.public_ip_address)
    break
  
  if not cmd:
    print("no instance id contains fragment '%s'"%(fragment,))
    return
  

  print(cmd)
  result = os.system(cmd)
  if username == 'ubuntu':
    username = '******'
  elif username == 'ec2-user':
    username = '******'
    
  if result != 0:
    print("ssh failed with code %d, trying username %s"%(result, username))
  cmd = make_cmd(keypair_fn, username, instance.public_ip_address)
  os.system(cmd)
예제 #4
0
    def __init__(self,
                 instance,
                 job,
                 task_id,
                 install_script=None,
                 user_data='',
                 linux_type=None,
                 skip_efs_mount=False):
        self.initialize_called = False
        self.instance = instance
        self.job = job
        self.id = task_id

        if user_data:
            assert user_data.startswith('#!/bin/bash')
        self.install_script = install_script
        self.user_data = user_data
        self.linux_type = linux_type
        self._run_counter = 0
        self.cached_ip = None
        self.cached_public_ip = None
        self.skip_efs_mount = skip_efs_mount

        self.name = u.format_task_name(task_id, job.name)
        # TODO, make below actually mean stuff (also, run_command_available)
        self.initialized = False

        # scratch is client-local space for temporary files
        # TODO: this job.name already contains name of run, the directory
        # below uses run name twice
        self.scratch = "{}/{}.{}.{}.{}/scratch".format(TASKDIR_PREFIX,
                                                       job._run.name, job.name,
                                                       self.id,
                                                       0)  # u.now_micros())
        self.remote_scratch = '/tmp/tmux'
        #    self.log("Creating local scratch dir %s", self.scratch)
        self._ossystem('rm -Rf ' + self.scratch)  # TODO: don't delete this?
        self._ossystem('mkdir -p ' + self.scratch)
        #    os.chdir(self.scratch)

        # todo: create taskdir
        self.connect_instructions = "waiting for initialize()"
        self.keypair_fn = u.get_keypair_fn()

        # username to use to ssh into instances
        # ec2-user or ubuntu
        if linux_type == 'ubuntu':
            self.username = '******'
        elif linux_type == 'amazon':
            self.username = '******'
        else:
            assert False, "Unknown linux type '%s', expected 'ubuntu' or 'amazon'."

        self.taskdir = '/home/' + self.username
예제 #5
0
def main():
    fragment = args.fragment

    # TODO: prevent CTRL+c/CTRL+d from killing session
    if not args.skip_tmux:
        print("Launching into TMUX session, use CTRL+b d to exit")

    region = u.get_region()
    client = u.create_ec2_client()
    ec2 = u.create_ec2_resource()
    response = client.describe_instances()

    username = os.environ.get("USERNAME", "ubuntu")
    print("Using username '%s'" % (username, ))

    instance_list = []
    for instance in ec2.instances.all():
        if instance.state['Name'] != 'running':
            continue

        name = u.get_name(instance.tags)
        if (fragment in name or fragment in str(instance.public_ip_address)
                or fragment in str(instance.id)
                or fragment in str(instance.private_ip_address)):
            instance_list.append((u.toseconds(instance.launch_time), instance))

    from tzlocal import get_localzone  # $ pip install tzlocal

    filtered_instance_list = u.get_instances(fragment)
    if not filtered_instance_list:
        print("no instance id contains fragment '%s'" % (fragment, ))
        return

    # connect to most recent instance
    print(filtered_instance_list)
    instance = filtered_instance_list[0]
    print("Connecting to ", u.get_name(instance), " launched ",
          instance.launch_time.astimezone(get_localzone()))
    cmd = ''
    keypair_fn = u.get_keypair_fn()
    cmd = make_cmd(keypair_fn, username, instance.public_ip_address)

    print(cmd)
    result = os.system(cmd)
    if username == 'ubuntu':
        username = '******'
    elif username == 'ec2-user':
        username = '******'

    if result != 0:
        print("ssh failed with code %d, trying username %s" %
              (result, username))
    cmd = make_cmd(keypair_fn, username, instance.public_ip_address)
    os.system(cmd)
예제 #6
0
def delete_keypair():
    keypairs = u.get_keypair_dict()
    keypair = keypairs.get(KEYPAIR_NAME, '')
    if keypair:
        try:
            sys.stdout.write("Deleting keypair %s (%s) ... " %
                             (keypair.key_name, KEYPAIR_NAME))
            sys.stdout.write(response_type(keypair.delete()) + '\n')
        except Exception as e:
            sys.stdout.write('failed\n')
            u.loge(str(e) + '\n')

    keypair_fn = u.get_keypair_fn()
    if os.path.exists(keypair_fn):
        print("Deleting local keypair file %s" % (keypair_fn, ))
        os.system('rm -f ' + keypair_fn)
예제 #7
0
    def __init__(self,
                 instance,
                 job,
                 task_id,
                 install_script=None,
                 linux_type=None,
                 user_data='',
                 skip_efs_mount=False):
        self.initialize_called = False
        self.instance = instance
        self.job = job
        self.id = task_id
        self.install_script = install_script
        self._run_counter = 0
        self.cached_ip = None
        self.cached_public_ip = None
        self.skip_efs_mount = skip_efs_mount

        self.initialized = False

        # scratch is client-local space for temporary files
        self.scratch = "{}/{}.{}.{}.{}/scratch".format(TASKDIR_PREFIX,
                                                       job._run.name, job.name,
                                                       self.id,
                                                       0)  # u.now_micros())
        self.remote_scratch = '/tmp/tmux'
        #    self.log("Creating local scratch dir %s", self.scratch)
        self._ossystem('rm -Rf ' + self.scratch)  # TODO: don't delete this?
        self._ossystem('mkdir -p ' + self.scratch)
        #    os.chdir(self.scratch)

        # todo: create taskdir
        self.connect_instructions = "waiting for initialize()"
        self.keypair_fn = u.get_keypair_fn(u.get_keypair_name())

        # username to use to ssh into instances
        # ec2-user or ubuntu
        if linux_type == 'ubuntu':
            self.username = '******'
        elif linux_type == 'amazon':
            self.username = '******'
        else:
            assert False, "Unknown linux type '%s', expected 'ubuntu' or 'amazon'."

        self.taskdir = '/home/' + self.username
예제 #8
0
def main():
    fragment = ''
    if len(sys.argv) > 1:
        fragment = sys.argv[1]

    def get_name(instance_response):
        names = [
            entry['Value'] for entry in instance_response.get('Tags', [])
            if entry['Key'] == 'Name'
        ]
        if not names:
            names = ['']
        assert len(names) == 1
        return names[0]

    region = u.get_region()
    client = boto3.client('ec2', region_name=region)
    ec2 = boto3.resource('ec2', region_name=region)
    response = client.describe_instances()

    username = os.environ.get("EC2_USER", "ubuntu")
    print("Using username '%s'" % (username, ))

    instance_list = []
    for instance in ec2.instances.all():
        if instance.state['Name'] != 'running':
            continue

        name = u.get_name(instance.tags)
        if (fragment in name or fragment in instance.public_ip_address
                or fragment in instance.id
                or fragment in instance.private_ip_address):

            print("Uninitializing %s %s %s" %
                  (name, instance.public_ip_address,
                   instance.private_ip_address))

            key_file = u.get_keypair_fn(instance.key_name)
            ssh_client = u.SshClient(hostname=instance.public_ip_address,
                                     ssh_key=key_file,
                                     username=username)
            ssh_client.run('rm /tmp/is_initialized || echo "failed 1"')
            ssh_client.run('rm /tmp/nv_setup_complete || echo "failed 2"')
            ssh_client.run('rm *.sh')  # remove install scripts
예제 #9
0
파일: sync.py 프로젝트: yaroslavvb/cluster
    def command(self, instance, pem_location=''):
        excludes = []
        for exclude in self.excludes:
            excludes += ['--exclude', exclude]

        # todo, rename no_strict_checking to ssh_command

        keypair_fn = u.get_keypair_fn()
        username = u.get_username(instance)
        ip = instance.public_ip_address

        ssh_command = "ssh -i %s -o StrictHostKeyChecking=no" % (keypair_fn, )
        no_strict_checking = ['-arvce', ssh_command]

        command = ['rsync'] + no_strict_checking + excludes
        if self.modify_window:
            command += ['--update', '--modify-window=600']
        if self.copy_links:
            command += ['-L']
        command += ['-rv', self.source, username + "@" + ip + ':' + self.dest]
        print("Running ")
        print(command)
        return command
예제 #10
0
def main():
    # TODO: also bring down all the instances and wait for them to come down
    region = os.environ['AWS_DEFAULT_REGION']
    if DEFAULT_NAME == 'nexus':
        print("Nexus resources are protected, don't delete them")
        sys.exit()

    print("Deleting %s resources in region %s" % (
        DEFAULT_NAME,
        region,
    ))
    existing_vpcs = u.get_vpc_dict()
    client = u.create_ec2_client()
    ec2 = u.create_ec2_resource()

    def response_type(response):
        return 'ok' if u.is_good_response(response) else 'failed'

    # delete EFS
    efss = u.get_efs_dict()
    efs_id = efss.get(DEFAULT_NAME, '')
    efs_client = u.create_efs_client()
    if efs_id:
        try:
            # delete mount targets first
            print("About to delete %s (%s)" % (efs_id, DEFAULT_NAME))
            response = efs_client.describe_mount_targets(FileSystemId=efs_id)
            assert u.is_good_response(response)
            for mount_response in response['MountTargets']:
                subnet = ec2.Subnet(mount_response['SubnetId'])
                zone = subnet.availability_zone
                state = mount_response['LifeCycleState']
                id = mount_response['MountTargetId']
                ip = mount_response['IpAddress']
                sys.stdout.write('Deleting mount target %s ... ' % (id, ))
                sys.stdout.flush()
                response = efs_client.delete_mount_target(MountTargetId=id)
                print(response_type(response))

            sys.stdout.write('Deleting EFS %s (%s)... ' %
                             (efs_id, DEFAULT_NAME))
            sys.stdout.flush()
            u.delete_efs_id(efs_id)

        except Exception as e:
            sys.stdout.write('failed\n')
            u.loge(str(e) + '\n')

    if VPC_NAME in existing_vpcs:
        vpc = ec2.Vpc(existing_vpcs[VPC_NAME].id)
        print("Deleting VPC %s (%s) subresources:" % (VPC_NAME, vpc.id))

        for subnet in vpc.subnets.all():
            try:
                sys.stdout.write("Deleting subnet %s ... " % (subnet.id))
                sys.stdout.write(response_type(subnet.delete()) + '\n')
            except Exception as e:
                sys.stdout.write('failed\n')
                u.loge(str(e) + '\n')

        for gateway in vpc.internet_gateways.all():
            sys.stdout.write("Deleting gateway %s ... " % (gateway.id))
            # todo: if instances are using VPC, this fails with
            # botocore.exceptions.ClientError: An error occurred (DependencyViolation) when calling the DetachInternetGateway operation: Network vpc-ca4abab3 has some mapped public address(es). Please unmap those public address(es) before detaching the gateway.

            sys.stdout.write('detached ... ' if u.is_good_response(
                gateway.detach_from_vpc(VpcId=vpc.id)) else ' detach_failed ')
            sys.stdout.write('deleted ' if u.is_good_response(gateway.delete(
            )) else ' delete_failed ')
            sys.stdout.write('\n')

        def desc(route_table):
            return "%s (%s)" % (route_table.id, u.get_name(route_table.tags))

        for route_table in vpc.route_tables.all():
            sys.stdout.write("Deleting route table %s ... " %
                             (desc(route_table)))
            try:
                sys.stdout.write(response_type(route_table.delete()) + '\n')
            except Exception as e:
                sys.stdout.write('failed\n')
                u.loge(str(e) + '\n')

        def desc(security_group):
            return "%s (%s, %s)" % (security_group.id,
                                    u.get_name(security_group.tags),
                                    security_group.group_name)

        # TODO: this tries to remove default security group, maybe not remove it?
        for security_group in vpc.security_groups.all():
            sys.stdout.write('Deleting security group %s ... ' %
                             (desc(security_group)))
            try:
                sys.stdout.write(response_type(security_group.delete()) + '\n')
            except Exception as e:
                sys.stdout.write('failed\n')
                u.loge(str(e) + '\n')

        sys.stdout.write("Deleting VPC %s ... " % (vpc.id))
        sys.stdout.write(response_type(vpc.delete()) + '\n')

    # delete keypair
    keypairs = u.get_keypair_dict()
    keypair = keypairs.get(DEFAULT_NAME, '')
    if keypair:
        try:
            sys.stdout.write("Deleting keypair %s (%s) ... " %
                             (keypair.key_name, DEFAULT_NAME))
            sys.stdout.write(response_type(keypair.delete()) + '\n')
        except Exception as e:
            sys.stdout.write('failed\n')
            u.loge(str(e) + '\n')

    keypair_fn = u.get_keypair_fn(KEYPAIR_NAME)
    if os.path.exists(keypair_fn):
        print("Deleting local keypair file %s" % (keypair_fn, ))
        os.system('rm -f ' + keypair_fn)