Пример #1
0
def create_image(args):
    AWSACCID = _getawsaccid()
    conn = boto.ec2.connect_to_region(args.region,
                                      aws_access_key_id=AWSAKEY,
                                      aws_secret_access_key=AWSSKEY)
    if args.snapshotid == "" or args.snapshotid is None:
        print 'You have to pass the snapshot ID used to create the image with --snapshotid="snapid"'
        raise SystemExit(1)
    else:
        namei = raw_input("Enter name of image: ")
        descr = raw_input("Enter a description for image: ")
        print "Creating image from snapshot %s ..." % args.snapshotid
        ebs = EBSBlockDeviceType()
        ebs.snapshot_id = args.snapshotid
        block_map = BlockDeviceMapping()
        block_map['/dev/sda1'] = ebs
        try:
            if args.region == "eu-west-1":
                ret = conn.register_image(name=namei,description=descr,architecture='x86_64',kernel_id='aki-71665e05',\
   root_device_name='/dev/sda1', block_device_map=block_map)
            else:
                ret = conn.register_image(name=namei,description=descr,architecture='x86_64',kernel_id='aki-b6aa75df',\
   root_device_name='/dev/sda1', block_device_map=block_map)
            print "Image creation successful"
        except EC2ResponseError:
            print "Image creation error"
Пример #2
0
    def create_node(self, name, distribution, metadata={}):
        size = self._default_size
        disk_size = 8

        with start_action(
            action_type=u"flocker:provision:aws:create_node",
            name=name,
            distribution=distribution,
            image_size=size,
            disk_size=disk_size,
            metadata=metadata,
        ):

            metadata = metadata.copy()
            metadata["Name"] = name

            disk1 = EBSBlockDeviceType()
            disk1.size = disk_size
            disk1.delete_on_termination = True
            diskmap = BlockDeviceMapping()
            diskmap["/dev/sda1"] = disk1

            images = self._connection.get_all_images(filters={"name": IMAGE_NAMES[distribution]})
            # Retry several times, no sleep between retries is needed.
            instance = poll_until(
                lambda: self._get_node(images[0].id, size, diskmap, metadata), repeat(0, 10), lambda x: None
            )
            return AWSNode(name=name, _provisioner=self, _instance=instance, distribution=distribution)
Пример #3
0
    def _register_image(self, snapshot_id):
        conn = self.platform.new_ec2_conn()
    
        instance_id = self.platform.get_instance_id()
        instance = conn.get_all_instances([instance_id])[0].instances[0]

        block_device_map = BlockDeviceMapping(conn)

        root_vol = EBSBlockDeviceType(snapshot_id=snapshot_id)
        root_vol.delete_on_termination = True
        # Adding ephemeral devices
        for eph, device in EPH_STORAGE_MAPPING[linux.os['arch']].items():
            bdt = EBSBlockDeviceType(conn)
            bdt.ephemeral_name = eph
            block_device_map[device] = bdt

        root_partition = instance.root_device_name[:-1]
        if root_partition in self.platform.get_block_device_mapping().values():
            block_device_map[root_partition] = root_vol
        else:
            block_device_map[instance.root_device_name] = root_vol

        return conn.register_image(
            name=self.image_name,
            root_device_name=instance.root_device_name,
            block_device_map=block_device_map,
            kernel_id=instance.kernel,
            virtualization_type=instance.virtualization_type,
            ramdisk_id=self.platform.get_ramdisk_id(),
            architecture=instance.architecture)
Пример #4
0
    def register_snap(self, snap_id, arch, name, aki=None, desc=None, ari=None,
                      pub=True, disk=False):
        """
        Register an EBS volume snapshot as an AMI. Returns the AMI ID. An arch,
        snapshot ID, and name for the AMI must be provided. Optionally
        a description, AKI ID, ARI ID and billing code may be specified too.
        disk is whether or not we are registering a disk image.
        """
        self.logger.info('Registering snap: %s' % (snap_id))
        snap = self.conn.get_all_snapshots([snap_id])[0]
        #Makes block device map
        ebs = EBSBlockDeviceType()
        ebs.snapshot_id = snap_id
        block_map = BlockDeviceMapping()

        if aki == None:
            raise Fedora_EC2Error('Need to specify an AKI')
        if disk:
            disk = '/dev/sda=%s' % snap_id
            root = '/dev/sda'
        else:
            disk = '/dev/sda1=%s' % snap_id
            root = '/dev/sda1'
        block_map[root] = ebs

        ami_id = self.conn.register_image(name=name, description=desc,
              image_location = '', architecture=arch, kernel_id=aki,
              ramdisk_id=ari,root_device_name=root, block_device_map=block_map)

        if not ami_id.startswith('ami-'):
            self._log_error('Could not register an AMI')
        self.logger.info('Registered an AMI: %s' % ami_id)
        return ami_id
Пример #5
0
    def create_node(self, name, distribution, metadata={}):
        size = self._default_size
        disk_size = 10

        with start_action(
                action_type=u"flocker:provision:aws:create_node",
                name=name,
                distribution=distribution,
                image_size=size,
                disk_size=disk_size,
                metadata=metadata,
        ):

            metadata = metadata.copy()
            metadata['Name'] = name

            disk1 = EBSBlockDeviceType()
            disk1.size = disk_size
            disk1.delete_on_termination = True
            diskmap = BlockDeviceMapping()
            diskmap['/dev/sda1'] = disk1

            images = self._connection.get_all_images(
                filters={'name': IMAGE_NAMES[distribution]}, )
            # Retry several times, no sleep between retries is needed.
            instance = poll_until(
                lambda: self._get_node(images[0].id, size, diskmap, metadata),
                repeat(0, 10), lambda x: None)
            return AWSNode(
                name=name,
                _provisioner=self,
                _instance=instance,
                distribution=distribution,
            )
Пример #6
0
def create_instance():
    """Support function to create a new AWS instance."""
    from boto.ec2.blockdevicemapping import EBSBlockDeviceType, BlockDeviceMapping

    kwargs = dict(
        instance_type = conf.type, 
        key_name=conf.key_pair,
        placement=conf.zone,
    )

    if conf.disk_size:
        # We want a larger EBS root volume, so override /dev/sda1.
        dev_root = EBSBlockDeviceType()
        dev_root.size = conf.disk_size
    
        # Create the mapping.
        dev_mapping = BlockDeviceMapping()
        dev_mapping['/dev/sda1'] = dev_root 

        kwargs['block_device_map'] = dev_mapping

    reservation = env.aws.run_instances(
        conf.ami,
        **kwargs)

    instance = env.aws.instance = reservation.instances[0]
    wait_for_status(instance, "Creating server", "running")
    env.aws.create_tags([instance.id], {'Name': env.server.name})

    print " Done. \nInstance built:", instance.public_dns_name

    return instance
Пример #7
0
 def launch_instance(self):
     if not self.verify_settings():
         return
     is_instance_store = self.conn.get_all_images(self.config['ec2_ami_id'], filters={'root-device-type': 'instance-store'})
     if is_instance_store:
         block_map = None
     else:
         block_map = BlockDeviceMapping()
         root_device = self.config['ec2_root_device']
         block_map[root_device] = EBSBlockDeviceType()
         if self.config['ec2_size']:
             block_map[root_device].size = self.config['ec2_size']
         block_map[root_device].delete_on_termination = True
     reservation = self.conn.run_instances(
         self.config['ec2_ami_id'],
         key_name=self.config['ec2_key_name'],
         security_groups=self.config['ec2_security_groups'] or [self.config['ec2_security_group']],
         instance_type=self.config['ec2_instance_type'],
         placement=self.config['ec2_zone'],
         placement_group=self.config['ec2_placement_group'],
         monitoring_enabled=self.config['ec2_monitoring_enabled'],
         block_device_map=block_map,
         user_data=self.user_data)
     self.instance = reservation.instances[0]
     secs = RUN_INSTANCE_TIMEOUT
     rest_interval = 5
     while secs and not self.instance.state == 'running':
         time.sleep(rest_interval)
         secs = secs - rest_interval
         try:
             self.instance.update()
         except boto.exception.EC2ResponseError:
             pass
     if secs <= 0:
         errmsg = "run instance {0} failed after {1} seconds".format(
             self.instance.id, RUN_INSTANCE_TIMEOUT)
         LOG.error(errmsg)
     else:
         if self.config['hostname']:
             self.assign_name_tag()
         msg1 = "Started Instance: {0}\n".format(self.instance.id)
         LOG.info(msg1)
         print msg1
         p = int(self.config['ssh_port'])
         port = "-p {0} ".format(p) if p and not p == 22 else ''
         ## change user to 'root' for all non-Ubuntu systems
         user = self.config['sudouser'] if self.config['sudouser'] and self.config['ssh_import'] else 'ubuntu'
         #XXX - TODO: replace public dns with fqdn, where appropriate
         msg2 = "To access: ssh {0}{1}@{2}\n".format(
             '-p {0} '.format(port) if port else '',
             user,
             self.instance.public_dns_name)
         msg3 = "To terminate: shaker-terminate {0}".format(
                    self.instance.id)
         LOG.info(msg2)
         LOG.info(msg3)
         print msg2
         print msg3
Пример #8
0
def create_image(conn):

    reservation = None
    if launch_type == 'on-demand':
        reservation = launch_and_wait(conn, base_instance_type, 1, base_image)

    instance = get_instances_from_reservation(reservation)[0]
    instance_ip = instance.ip_address
    instance_id = instance.id

    log_file = setup_instance(instance_ip)

    boot_disk = EBSBlockDeviceType()
    boot_disk.size = 50
    bdm = BlockDeviceMapping()
    bdm['/dev/sda1'] = boot_disk

    global node_image

    try:
        images = conn.get_all_images(owners=['self'])
        for image in images:
            image.deregister()
        f = open('cloud_configs/' + cloud + '/' + cloud + '_node_image.py',
                 'w')
        f.write("node_image = 'DEREGISTERED!'")
        f.close()
    except:
        1

    node_image = conn.create_image(instance_id,
                                   'AWS-pwa-node-image',
                                   block_device_mapping=bdm)

    image = conn.get_all_images(image_ids=[node_image])[0]

    f = open(log_file, 'a+')

    while image.state == 'pending':
        sleep(15)
        f.write("Image upload state: " + image.state + '\n')
        image.update()
    f.write("Image upload state: " + image.state + '\n')

    if image.state == 'failed':
        sys.exit("AMI CREATION FAILED!")

    f.write('\n' * 2)
    f.write('#' * 30 + '\n')
    f.write('#' * 30 + '\n\n')
    f.write("node_image = '" + str(node_image) + "'\n\n")
    f.write('#' * 30 + '\n')
    f.write('#' * 30 + '\n')
    f.close()

    f = open('cloud_configs/' + cloud + '/' + cloud + '_node_image.py', 'w')
    f.write("node_image = '" + str(node_image) + "'")
    f.close()
def launch_instance(skip_updates=False):
    '''
    Launch an Oracle database instance.
    '''
    # Assume the keypair name is based on our env.key_filename.
    instance_key_name = os.path.basename(env.key_filename).replace('.pem', '')
    
    # Check that we have a security group configured already.
    security_group_list = ec2_connection.get_all_security_groups()
    security_group_found = False
    for security_group in security_group_list:
        if security_group.name == security_group_name:
            security_group_found = True
            break
    
    # If we didn't find it, create it.
    if not security_group_found:
        create_security_group()    
    
    # We want a larger EBS root volume, so override /dev/sda1.
    # Create an EBS device with 40GB allocated.
    dev_root = EBSBlockDeviceType()
    dev_root.size = 40
    
    # Create the mapping.
    dev_mapping = BlockDeviceMapping()
    dev_mapping['/dev/sda1'] = dev_root 
    
    reservation = ec2_connection.run_instances(ami_id, 
                       instance_type=instance_type, key_name=instance_key_name, 
                       security_groups=[security_group_name], 
                       block_device_map = dev_mapping)
    
    # This is hacky but (mostly) works.
    instance = reservation.instances[0]
    print(green("Launching instance on reservation {}.".format(instance, reservation)))
    
    '''
    Wait for instance state to change;
    if it doesn't change to running, then fail.
    '''    
    print(yellow('Waiting for instance to start...'))
    set_tags = False
    while instance.state == u'pending':
        # Try to set tags.
        if set_tags == False:
            try:
                ec2_connection.create_tags([instance.id], {"Name": instance_name})
                set_tags = True
                print(green("Instance {} tagged.".format(instance)))
            except EC2ResponseError, e:
                print(red("Tagging failed; sleeping, updating instance, and trying again."))
        
        # Check up on its status every so often
        time.sleep(10)
        instance.update()
Пример #10
0
 def launch_instance(self):
     if not self.verify_settings():
         return
     block_map = BlockDeviceMapping()
     root_device = self.config['ec2_root_device']
     block_map[root_device] = EBSBlockDeviceType()
     if self.config['ec2_size']:
         block_map[root_device].size = self.config['ec2_size']
     block_map[root_device].delete_on_termination = True
     reservation = self.conn.run_instances(
         self.config['ec2_ami_id'],
         key_name=self.config['ec2_key_name'],
         security_groups=self.config['ec2_security_groups']
         or [self.config['ec2_security_group']],
         instance_type=self.config['ec2_instance_type'],
         placement_group=self.config['ec2_placement_group'],
         monitoring_enabled=self.config['ec2_monitoring_enabled'],
         block_device_map=block_map,
         user_data=self.user_data)
     self.instance = reservation.instances[0]
     secs = RUN_INSTANCE_TIMEOUT
     rest_interval = 5
     while secs and not self.instance.state == 'running':
         time.sleep(rest_interval)
         secs = secs - rest_interval
         try:
             self.instance.update()
         except boto.exception.EC2ResponseError:
             pass
     if secs <= 0:
         errmsg = "run instance {0} failed after {1} seconds".format(
             self.instance.id, RUN_INSTANCE_TIMEOUT)
         LOG.error(errmsg)
     else:
         if self.config['hostname']:
             self.assign_name_tag()
         msg1 = "Started Instance: {0}\n".format(self.instance.id)
         LOG.info(msg1)
         print msg1
         p = int(self.config['ssh_port'])
         port = "-p {0} ".format(p) if p and not p == 22 else ''
         ## change user to 'root' for all non-Ubuntu systems
         user = self.config['sudouser'] if self.config[
             'sudouser'] and self.config['ssh_import'] else 'ubuntu'
         #XXX - TODO: replace public dns with fqdn, where appropriate
         msg2 = "To access: ssh {0}{1}@{2}\n".format(
             '-p {0} '.format(port) if port else '', user,
             self.instance.public_dns_name)
         msg3 = "To terminate: shaker-terminate {0}".format(
             self.instance.id)
         LOG.info(msg2)
         LOG.info(msg3)
         print msg2
         print msg3
Пример #11
0
 def launch_instance(self):
     if not self.verify_settings():
         return
     block_map = BlockDeviceMapping()
     root_device = self.config["ec2_root_device"]
     block_map[root_device] = EBSBlockDeviceType()
     if self.config["ec2_size"]:
         block_map[root_device].size = self.config["ec2_size"]
     block_map[root_device].delete_on_termination = True
     for num, device_location in enumerate(self.config["ec2_ephemeral_devices"]):
         device = BlockDeviceType()
         device.ephemeral_name = "ephemeral%d" % num
         block_map[device_location] = device
     reservation = self.conn.run_instances(
         self.config["ec2_ami_id"],
         key_name=self.config["ec2_key_name"],
         security_groups=self.config["ec2_security_groups"] or [self.config["ec2_security_group"]],
         instance_type=self.config["ec2_instance_type"],
         placement=self.config["ec2_zone"],
         monitoring_enabled=self.config["ec2_monitoring_enabled"],
         block_device_map=block_map,
         user_data=self.user_data,
     )
     self.instance = reservation.instances[0]
     secs = RUN_INSTANCE_TIMEOUT
     rest_interval = 5
     while secs and not self.instance.state == "running":
         time.sleep(rest_interval)
         secs = secs - rest_interval
         try:
             self.instance.update()
         except boto.exception.EC2ResponseError:
             pass
     if secs <= 0:
         errmsg = "run instance %s failed after %d seconds" % (self.instance.id, RUN_INSTANCE_TIMEOUT)
         LOG.error(errmsg)
     else:
         if self.config["hostname"]:
             self.assign_name_tag()
         msg1 = "Started Instance: {0}\n".format(self.instance.id)
         LOG.info(msg1)
         print msg1
         p = int(self.config["ssh_port"])
         port = "-p {0} ".format(p) if p and not p == 22 else ""
         ## change user to 'root' for all non-Ubuntu systems
         user = self.config["sudouser"] if self.config["sudouser"] and self.config["ssh_import"] else "ubuntu"
         # XXX - TODO: replace public dns with fqdn, where appropriate
         msg2 = "To access: ssh {0}{1}@{2}\n" "To terminate: shaker-terminate {3}".format(
             port, user, self.instance.public_dns_name, self.instance.id
         )
         LOG.info(msg2)
         print msg2
Пример #12
0
    def test_launch_config(self):
        # This unit test is based on #753 and #1343
        self.set_http_response(status_code=200)
        dev_sdf = EBSBlockDeviceType(snapshot_id='snap-12345')
        dev_sdg = EBSBlockDeviceType(snapshot_id='snap-12346')

        bdm = BlockDeviceMapping()
        bdm['/dev/sdf'] = dev_sdf
        bdm['/dev/sdg'] = dev_sdg

        lc = launchconfig.LaunchConfiguration(
            connection=self.service_connection,
            name='launch_config',
            image_id='123456',
            instance_type='m1.large',
            user_data='#!/bin/bash',
            security_groups=['group1', 'group2'],
            spot_price='price',
            block_device_mappings=[bdm],
            associate_public_ip_address=True,
            volume_type='atype',
            delete_on_termination=False,
            iops=3000)

        response = self.service_connection.create_launch_configuration(lc)

        self.assert_request_parameters(
            {
                'Action': 'CreateLaunchConfiguration',
                'BlockDeviceMappings.member.1.DeviceName': '/dev/sdf',
                'BlockDeviceMappings.member.1.Ebs.DeleteOnTermination':
                'false',
                'BlockDeviceMappings.member.1.Ebs.SnapshotId': 'snap-12345',
                'BlockDeviceMappings.member.2.DeviceName': '/dev/sdg',
                'BlockDeviceMappings.member.2.Ebs.DeleteOnTermination':
                'false',
                'BlockDeviceMappings.member.2.Ebs.SnapshotId': 'snap-12346',
                'EbsOptimized': 'false',
                'LaunchConfigurationName': 'launch_config',
                'ImageId': '123456',
                'UserData': base64.b64encode('#!/bin/bash').decode('utf-8'),
                'InstanceMonitoring.Enabled': 'false',
                'InstanceType': 'm1.large',
                'SecurityGroups.member.1': 'group1',
                'SecurityGroups.member.2': 'group2',
                'SpotPrice': 'price',
                'AssociatePublicIpAddress': 'true',
                'VolumeType': 'atype',
                'DeleteOnTermination': 'false',
                'Iops': 3000,
            },
            ignore_params_values=['Version'])
Пример #13
0
def run_encryptor_instance(aws_svc, encryptor_image_id, snapshot, root_size,
                           guest_image_id, sg_id, update_ami=False):
    bdm = BlockDeviceMapping()
    guest_unencrypted_root = EBSBlockDeviceType(
        volume_type='gp2',
        snapshot_id=snapshot,
        delete_on_termination=True)
    # Use gp2 for fast burst I/O copying root drive
    bdm['/dev/sda4'] = guest_unencrypted_root
    if not update_ami:
        log.info('Launching encryptor instance with snapshot %s', snapshot)
        # They are creating an encrypted AMI instead of updating it
        # Use gp2 for fast burst I/O copying root drive
        guest_encrypted_root = EBSBlockDeviceType(
            volume_type='gp2',
            delete_on_termination=True)
        guest_encrypted_root.size = 2 * root_size + 1
        bdm['/dev/sda5'] = guest_encrypted_root
    else:
        log.info('Launching encryptor instance for updating %s',
                 guest_image_id)
        guest_encrypted_root = EBSBlockDeviceType(
            volume_type='gp2',
            snapshot_id=snapshot,
            delete_on_termination=True)

        guest_encrypted_root.size = root_size
        bdm['/dev/sda5'] = guest_encrypted_root

    instance = aws_svc.run_instance(encryptor_image_id,
                                    security_group_ids=[sg_id],
                                    block_device_map=bdm)
    aws_svc.create_tags(
        instance.id,
        name=NAME_ENCRYPTOR,
        description=DESCRIPTION_ENCRYPTOR % {'image_id': guest_image_id}
    )
    instance = _wait_for_instance(aws_svc, instance.id)
    log.info('Launched encryptor instance %s', instance.id)
    # Tag volumes.
    bdm = instance.block_device_mapping
    if not update_ami:
        aws_svc.create_tags(
            bdm['/dev/sda5'].volume_id, name=NAME_ENCRYPTED_ROOT_VOLUME)
    aws_svc.create_tags(
        bdm['/dev/sda2'].volume_id, name=NAME_METAVISOR_ROOT_VOLUME)
    aws_svc.create_tags(
        bdm['/dev/sda1'].volume_id, name=NAME_METAVISOR_GRUB_VOLUME)
    aws_svc.create_tags(
        bdm['/dev/sda3'].volume_id, name=NAME_METAVISOR_LOG_VOLUME)
    return instance
Пример #14
0
 def launch_instance(self):
     if not self.verify_settings():
         return
     is_instance_store = self.conn.get_all_images(self.config['ec2_ami_id'], filters={'root-device-type': 'instance-store'})
     if is_instance_store:
         block_map = None
     else:
         block_map = BlockDeviceMapping()
         root_device = self.config['ec2_root_device']
         block_map[root_device] = EBSBlockDeviceType()
         if self.config['ec2_size']:
             block_map[root_device].size = self.config['ec2_size']
         block_map[root_device].delete_on_termination = True
     opts = {
         'key_name': self.config['ec2_key_name'],
         'security_groups': self.config['ec2_security_groups'] or [self.config['ec2_security_group']],
         'instance_type': self.config['ec2_instance_type'],
         'placement': self.config['ec2_zone'],
         'placement_group': self.config['ec2_placement_group'],
         'monitoring_enabled': self.config['ec2_monitoring_enabled'],
         'block_device_map': block_map,
         'user_data': self.user_data
     }
     if self.config.get('ec2_subnet_id',False):
         # when providing subnet_id, must use security_group_ids and not
         # named security_groups or API call will fail.
         opts.pop('security_groups',None)
         opts['security_group_ids'] = self.config['ec2_security_group_ids'] or [self.config['ec2_security_group_id']]
         if not opts['security_group_ids']:
             raise AssertionError('Must specify ec2_security_group_id or ec2_security_group_ids with subnet_id')
         opts['subnet_id'] = self.config['ec2_subnet_id']
     reservation = self.conn.run_instances(self.config['ec2_ami_id'], **opts)
     self.instance = reservation.instances[0]
     secs = RUN_INSTANCE_TIMEOUT
     rest_interval = 5
     while secs and not self.instance.state == 'running':
         time.sleep(rest_interval)
         secs = secs - rest_interval
         try:
             self.instance.update()
         except boto.exception.EC2ResponseError:
             pass
     if secs <= 0:
         errmsg = "run instance {0} failed after {1} seconds".format(
             self.instance.id, RUN_INSTANCE_TIMEOUT)
         LOG.error(errmsg)
     else:
         if self.config['hostname']:
             self.assign_name_tag()
def get_block_device(instance_type, ebs_vol_size):
    block_map = BlockDeviceMapping()

    if ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = ebs_vol_size
        device.delete_on_termination = True
        block_map['/dev/sdv'] = device

    for i in range(get_num_disks(instance_type)):
        dev = BlockDeviceType()
        dev.ephemeral_name = 'ephemeral%d' % i
        # The first ephemeral drive is /dev/sdb.
        name = '/dev/sd' + string.ascii_letters[i + 1]
        block_map[name] = dev

    return block_map
Пример #16
0
def startInstance(ec2connection, hardwareProfile, ARCH, RHEL, AMI, SSHKEYNAME):
    conn_region = ec2connection
    map = BlockDeviceMapping()
    t = EBSBlockDeviceType()
    t.size = "15"
    # map = {'DeviceName':'/dev/sda','VolumeSize':'15'}
    map["/dev/sda1"] = t

    # blockDeviceMap = []
    # blockDeviceMap.append( {'DeviceName':'/dev/sda', 'Ebs':{'VolumeSize' : '100'} })

    if ARCH == "i386" and RHEL == "6.1":
        reservation = conn_region.run_instances(
            AMI, instance_type=hardwareProfile, key_name=SSHKEYNAME, block_device_map=map
        )
    elif ARCH == "x86_64" and RHEL == "6.1":
        reservation = conn_region.run_instances(
            AMI, instance_type=hardwareProfile, key_name=SSHKEYNAME, block_device_map=map
        )
    elif ARCH == "i386":
        reservation = conn_region.run_instances(
            AMI, instance_type=hardwareProfile, key_name=SSHKEYNAME, block_device_map=map
        )
    elif ARCH == "x86_64":
        reservation = conn_region.run_instances(
            AMI, instance_type=hardwareProfile, key_name=SSHKEYNAME, block_device_map=map
        )
    else:
        print "arch type is neither i386 or x86_64.. will exit"
        exit(1)

    myinstance = reservation.instances[0]

    time.sleep(5)
    while not myinstance.update() == "running":
        time.sleep(5)
        print myinstance.update()

    instanceDetails = myinstance.__dict__
    pprint(instanceDetails)
    # region = instanceDetails['placement']
    # print 'region =' + region
    publicDNS = instanceDetails["public_dns_name"]
    print "public hostname = " + publicDNS
    # check for console output here to make sure ssh is up
    return publicDNS
Пример #17
0
def create_server():
    """
    Creates EC2 Instance and saves it state in a local json file
    """
    # looks for an existing 'data.json' file, so that we don't start
    # additional ec2 instances when we don't need them.
    #
    if is_there_state():
        return True
    else:
        conn = connect_to_ec2()

        print(_green("Started..."))
        print(_yellow("...Creating EC2 instance..."))

        # we need a larger boot device to store our cached images
        dev_sda1 = EBSBlockDeviceType()
        dev_sda1.size = 120
        bdm = BlockDeviceMapping()
        bdm['/dev/sda1'] = dev_sda1

        # get an ec2 ami image object with our choosen ami
        image = conn.get_all_images(env.ec2_ami)[0]
        # start a new instance
        reservation = image.run(1,
                                1,
                                key_name=env.ec2_key_pair,
                                security_groups=env.ec2_security,
                                block_device_map=bdm,
                                instance_type=env.ec2_instancetype)

        # and get our instance_id
        instance = reservation.instances[0]
        # add a tag to our instance
        conn.create_tags([instance.id], {"Name": env.ec2_instance_name})
        #  and loop and wait until ssh is available
        while instance.state == u'pending':
            yellow("Instance state: %s" % instance.state)
            sleep(10)
            instance.update()
        wait_for_ssh(instance.public_dns_name)

        green("Instance state: %s" % instance.state)
        green("Public dns: %s" % instance.public_dns_name)
        # finally save the details or our new instance into the local state file
        save_state_locally(instance.id)
Пример #18
0
def get_block_device(instance_type, ebs_vol_size):
    block_map = BlockDeviceMapping()

    if ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = ebs_vol_size
        device.delete_on_termination = True
        block_map["/dev/sdv"] = device

    for i in range(get_num_disks(instance_type)):
        dev = BlockDeviceType()
        dev.ephemeral_name = 'ephemeral%d' % i
        # The first ephemeral drive is /dev/sdb.
        name = '/dev/sd' + string.ascii_letters[i + 1]
        block_map[name] = dev

    return block_map
Пример #19
0
    def register_ebs_ami(self, snapshot_id, arch = 'x86_64', default_ephem_map = True,
                         img_name = None, img_desc = None):
        # register against snapshot
        try:
            aki=PVGRUB_AKIS[self.region.name][arch]
        except KeyError:
            raise Exception("Unable to determine pvgrub hd00 AKI for region (%s) arch (%s)" % (self.region.name, arch))

        if not img_name:
            rand_id = random.randrange(2**32)
            # These names need to be unique, hence the pseudo-uuid
            img_name='EBSHelper AMI - %s - uuid-%x' % (snapshot_id, rand_id)
        if not img_desc:
            img_desc='Created directly from volume snapshot %s' % (snapshot_id)

        self.log.debug("Registering snapshot (%s) as new EBS AMI" % (snapshot_id))
        ebs = EBSBlockDeviceType()
        ebs.snapshot_id = snapshot_id
        ebs.delete_on_termination = True
        block_map = BlockDeviceMapping()
        block_map['/dev/sda'] = ebs
        # The ephemeral mappings are automatic with S3 images
        # For EBS images we need to make them explicit
        # These settings are required to make the same fstab work on both S3 and EBS images
        if default_ephem_map:
            e0 = EBSBlockDeviceType()
            e0.ephemeral_name = 'ephemeral0'
            e1 = EBSBlockDeviceType()
            e1.ephemeral_name = 'ephemeral1'
            block_map['/dev/sdb'] = e0
            block_map['/dev/sdc'] = e1
        result = self.conn.register_image(name=img_name, description=img_desc,
                           architecture=arch,  kernel_id=aki,
                           root_device_name='/dev/sda', block_device_map=block_map)
        return str(result)
Пример #20
0
def create_server():
    """
    Creates EC2 Instance and saves it state in a local json file
    """
    # looks for an existing 'data.json' file, so that we don't start
    # additional ec2 instances when we don't need them.
    #
    if is_there_state():
        return True
    else:
        conn = connect_to_ec2()

        print(_green("Started..."))
        print(_yellow("...Creating EC2 instance..."))

        # we need a larger boot device to store our cached images
        dev_sda1 = EBSBlockDeviceType()
        dev_sda1.size = 120
        bdm = BlockDeviceMapping()
        bdm['/dev/sda1'] = dev_sda1

        # get an ec2 ami image object with our choosen ami
        image = conn.get_all_images(env.ec2_ami)[0]
        # start a new instance
        reservation = image.run(1, 1,
                                key_name=env.ec2_key_pair,
                                security_groups=env.ec2_security,
                                block_device_map = bdm,
                                instance_type=env.ec2_instancetype)

        # and get our instance_id
        instance = reservation.instances[0]
        # add a tag to our instance
        conn.create_tags([instance.id], {"Name": env.ec2_instance_name})
        #  and loop and wait until ssh is available
        while instance.state == u'pending':
            yellow("Instance state: %s" % instance.state)
            sleep(10)
            instance.update()
        wait_for_ssh(instance.public_dns_name)

        green("Instance state: %s" % instance.state)
        green("Public dns: %s" % instance.public_dns_name)
        # finally save the details or our new instance into the local state file
        save_state_locally(instance.id)
Пример #21
0
def diag(aws_svc=None,
         region='us-west-2',
         instance_id=None,
         snapshot_id=None,
         vpc_id=None,
         subnet_id=None,
         security_group_ids=None,
         diag_instance_type='m3.medium',
         ssh_keypair=None):
    if instance_id:
        snapshot_id = snapshot_log_volume(aws_svc, instance_id).id
        log.info("Waiting for 30 seconds for snapshot to be available")
        time.sleep(30)

    diag_image = DIAG_IMAGES_BY_REGION[region]

    log.info("Launching diag instance")

    if not security_group_ids:
        vpc_id = None
        if subnet_id:
            subnet = aws_svc.get_subnet(subnet_id)
            vpc_id = subnet.vpc_id
        temp_sg_id = create_diag_security_group(aws_svc, vpc_id=vpc_id).id
        security_group_ids = [temp_sg_id]

    log_volume = EBSBlockDeviceType(delete_on_termination=True,
                                    snapshot_id=snapshot_id)
    bdm = BlockDeviceMapping()

    # Choose /dev/sda3 since it is the first free mountpoint
    bdm['/dev/sda3'] = log_volume

    diag_instance = aws_svc.run_instance(diag_image,
                                         instance_type=diag_instance_type,
                                         ebs_optimized=False,
                                         subnet_id=subnet_id,
                                         security_group_ids=security_group_ids,
                                         block_device_map=bdm)

    aws_svc.create_tags(diag_instance.id,
                        name=NAME_DIAG_INSTANCE % {'snapshot_id': snapshot_id},
                        description=DESCRIPTION_DIAG_INSTANCE %
                        {'snapshot_id': snapshot_id})

    wait_for_instance(aws_svc, diag_instance.id)

    diag_instance = aws_svc.get_instance(diag_instance.id)
    print "Diag instance id: %s" % diag_instance.id
    if diag_instance.ip_address:
        print "IP address: %s" % diag_instance.ip_address
    if diag_instance.private_ip_address:
        print "Private IP address: %s" % diag_instance.private_ip_address
    print "User: root"
    print "SSH Keypair: %s" % ssh_keypair
    print "Log volume mountpoint: /dev/xbd2a for PV, /dev/xbd2e for HVM"
Пример #22
0
    def startInstance(self, ami, ec2_keyName, sec_group, hwp):
        map = BlockDeviceMapping()
        t = EBSBlockDeviceType()
        t.size = '15'
        #map = {'DeviceName':'/dev/sda','VolumeSize':'15'}
        map['/dev/sda1'] = t
        reservation = self.connection.run_instances(ami,
             instance_type=hwp, key_name=ec2_keyName,
             security_groups=sec_group, block_device_map=map)

        myinstance = reservation.instances[0]

        time.sleep(5)
        while(not myinstance.update() == 'running'):
            time.sleep(5)
            print myinstance.update()

        #pprint(instanceDetails)
        return myinstance
Пример #23
0
 def launch_instance(self):
     if not self.verify_settings():
         return
     is_instance_store = self.conn.get_all_images(
         self.config['ec2_ami_id'],
         filters={'root-device-type': 'instance-store'})
     if is_instance_store:
         block_map = None
     else:
         block_map = BlockDeviceMapping()
         root_device = self.config['ec2_root_device']
         block_map[root_device] = EBSBlockDeviceType()
         if self.config['ec2_size']:
             block_map[root_device].size = self.config['ec2_size']
         block_map[root_device].delete_on_termination = True
     reservation = self.conn.run_instances(
         self.config['ec2_ami_id'],
         key_name=self.config['ec2_key_name'],
         security_groups=self.config['ec2_security_groups']
         or [self.config['ec2_security_group']],
         instance_type=self.config['ec2_instance_type'],
         placement=self.config['ec2_zone'],
         placement_group=self.config['ec2_placement_group'],
         monitoring_enabled=self.config['ec2_monitoring_enabled'],
         block_device_map=block_map,
         user_data=self.user_data)
     self.instance = reservation.instances[0]
     secs = RUN_INSTANCE_TIMEOUT
     rest_interval = 5
     while secs and not self.instance.state == 'running':
         time.sleep(rest_interval)
         secs = secs - rest_interval
         try:
             self.instance.update()
         except boto.exception.EC2ResponseError:
             pass
     if secs <= 0:
         errmsg = "run instance {0} failed after {1} seconds".format(
             self.instance.id, RUN_INSTANCE_TIMEOUT)
         LOG.error(errmsg)
     else:
         if self.config['hostname']:
             self.assign_name_tag()
Пример #24
0
    def test_launch_config(self):
        # This unit test is based on #753 and #1343
        self.set_http_response(status_code=200)
        dev_sdf = EBSBlockDeviceType(snapshot_id='snap-12345')
        dev_sdg = EBSBlockDeviceType(snapshot_id='snap-12346')

        bdm = BlockDeviceMapping()
        bdm['/dev/sdf'] = dev_sdf
        bdm['/dev/sdg'] = dev_sdg

        lc = launchconfig.LaunchConfiguration(
            connection=self.service_connection,
            name='launch_config',
            image_id='123456',
            instance_type='m1.large',
            security_groups=['group1', 'group2'],
            spot_price='price',
            block_device_mappings=[bdm])

        response = self.service_connection.create_launch_configuration(lc)

        self.assert_request_parameters(
            {
                'Action': 'CreateLaunchConfiguration',
                'BlockDeviceMappings.member.1.DeviceName': '/dev/sdf',
                'BlockDeviceMappings.member.1.Ebs.DeleteOnTermination':
                'false',
                'BlockDeviceMappings.member.1.Ebs.SnapshotId': 'snap-12345',
                'BlockDeviceMappings.member.2.DeviceName': '/dev/sdg',
                'BlockDeviceMappings.member.2.Ebs.DeleteOnTermination':
                'false',
                'BlockDeviceMappings.member.2.Ebs.SnapshotId': 'snap-12346',
                'EbsOptimized': 'false',
                'LaunchConfigurationName': 'launch_config',
                'ImageId': '123456',
                'InstanceMonitoring.Enabled': 'false',
                'InstanceType': 'm1.large',
                'SecurityGroups.member.1': 'group1',
                'SecurityGroups.member.2': 'group2',
                'SpotPrice': 'price',
            },
            ignore_params_values=['Version'])
Пример #25
0
 def parse_block_device_args(self, block_device_maps_args):
     block_device_map = BlockDeviceMapping()
     for block_device_map_arg in block_device_maps_args:
         parts = block_device_map_arg.split('=')
         if len(parts) > 1:
             device_name = parts[0]
             block_dev_type = EBSBlockDeviceType()
             value_parts = parts[1].split(':')
             if value_parts[0].startswith('snap'):
                 block_dev_type.snapshot_id = value_parts[0]
             else:
                 if value_parts[0].startswith('ephemeral'):
                     block_dev_type.ephemeral_name = value_parts[0]
             if len(value_parts) > 1:
                 block_dev_type.size = int(value_parts[1])
             if len(value_parts) > 2:
                 if value_parts[2] == 'true':
                     block_dev_type.delete_on_termination = True
             block_device_map[device_name] = block_dev_type
     return block_device_map
Пример #26
0
def startInstance(ec2connection, hardwareProfile):
    conn_region = ec2connection
    map = BlockDeviceMapping() 
    t = EBSBlockDeviceType()
    t.size = '15'
    #map = {'DeviceName':'/dev/sda','VolumeSize':'15'}
    map['/dev/sda1'] = t  

    #blockDeviceMap = []
    #blockDeviceMap.append( {'DeviceName':'/dev/sda', 'Ebs':{'VolumeSize' : '100'} })

    if ARCH == 'i386' and RHEL == '6.1':
        reservation = conn_region.run_instances(AMI, instance_type=hardwareProfile, key_name=SSHKEYNAME, block_device_map=map )
    elif ARCH == 'x86_64' and RHEL == '6.1':
        reservation = conn_region.run_instances(AMI, instance_type=hardwareProfile, key_name=SSHKEYNAME, block_device_map=map )
    elif ARCH == 'i386':
        reservation = conn_region.run_instances(AMI, instance_type=hardwareProfile, key_name=SSHKEYNAME, block_device_map=map )
    elif ARCH == 'x86_64':
        reservation = conn_region.run_instances(AMI, instance_type=hardwareProfile, key_name=SSHKEYNAME, block_device_map=map)
    else:
        print "arch type is neither i386 or x86_64.. will exit"
        exit(1)
        
    myinstance = reservation.instances[0]
    
    time.sleep(5)
    while(not myinstance.update() == 'running'):
        time.sleep(5)
        print myinstance.update()
        
    instanceDetails = myinstance.__dict__
    pprint(instanceDetails)
    #region = instanceDetails['placement']
    #print 'region =' + region
    publicDNS = instanceDetails['public_dns_name']
    print 'public hostname = ' + publicDNS
   
    
    # check for console output here to make sure ssh is up
    return publicDNS
Пример #27
0
    def _register_image(self, snapshot_id):
        conn = self.platform.new_ec2_conn()

        instance_id = self.platform.get_instance_id()
        instance = conn.get_all_instances([instance_id])[0].instances[0]

        block_device_map = BlockDeviceMapping(conn)

        root_vol = EBSBlockDeviceType(snapshot_id=snapshot_id)
        root_vol.delete_on_termination = True
        # Adding ephemeral devices
        for eph, device in EPH_STORAGE_MAPPING[linux.os['arch']].items():
            bdt = EBSBlockDeviceType(conn)
            bdt.ephemeral_name = eph
            block_device_map[device] = bdt

        root_partition = instance.root_device_name[:-1]
        if root_partition in self.platform.get_block_device_mapping().values():
            block_device_map[root_partition] = root_vol
        else:
            block_device_map[instance.root_device_name] = root_vol

        return conn.register_image(
            name=self.image_name,
            root_device_name=instance.root_device_name,
            block_device_map=block_device_map,
            kernel_id=instance.kernel,
            virtualization_type=instance.virtualization_type,
            ramdisk_id=self.platform.get_ramdisk_id(),
            architecture=instance.architecture)
Пример #28
0
 def launch_instance(self):
     if not self.verify_settings():
         return
     is_instance_store = self.conn.get_all_images(self.config['ec2_ami_id'], filters={'root-device-type': 'instance-store'})
     if is_instance_store:
         block_map = None
     else:
         block_map = BlockDeviceMapping()
         root_device = self.config['ec2_root_device']
         block_map[root_device] = EBSBlockDeviceType()
         if self.config['ec2_size']:
             block_map[root_device].size = self.config['ec2_size']
         block_map[root_device].delete_on_termination = True
     reservation = self.conn.run_instances(
         self.config['ec2_ami_id'],
         key_name=self.config['ec2_key_name'],
         security_groups=self.config['ec2_security_groups'] or [self.config['ec2_security_group']],
         instance_type=self.config['ec2_instance_type'],
         placement=self.config['ec2_zone'],
         placement_group=self.config['ec2_placement_group'],
         monitoring_enabled=self.config['ec2_monitoring_enabled'],
         block_device_map=block_map,
         user_data=self.user_data)
     self.instance = reservation.instances[0]
     secs = RUN_INSTANCE_TIMEOUT
     rest_interval = 5
     while secs and not self.instance.state == 'running':
         time.sleep(rest_interval)
         secs = secs - rest_interval
         try:
             self.instance.update()
         except boto.exception.EC2ResponseError:
             pass
     if secs <= 0:
         errmsg = "run instance {0} failed after {1} seconds".format(
             self.instance.id, RUN_INSTANCE_TIMEOUT)
         LOG.error(errmsg)
     else:
         if self.config['hostname']:
             self.assign_name_tag()
def create_image(args):
    AWSACCID = _getawsaccid()
    conn = boto.ec2.connect_to_region(args.region,aws_access_key_id=AWSAKEY,aws_secret_access_key=AWSSKEY)
    if args.snapshotid == "" or args.snapshotid is None:
        print 'You have to pass the snapshot ID used to create the image with --snapshotid="snapid"'
	raise SystemExit(1)
    else:
	namei = raw_input("Enter name of image: ")
	descr = raw_input("Enter a description for image: ")
	vtype = raw_input("Enter a virtualization type for image:[hvm|paravirtual] ")
        print "Creating image from snapshot %s ..." % args.snapshotid
	ebs = EBSBlockDeviceType()
	ebs.snapshot_id = args.snapshotid
	block_map = BlockDeviceMapping()
	block_map['/dev/sda1'] = ebs
	print vtype
	
	try:
	    if args.region == "eu-west-1":
		if vtype == "hvm":    
                    #ret = conn.register_image(name=namei,description=descr,architecture='x86_64',kernel_id='aki-71665e05',\
	#		root_device_name='/dev/sda1', block_device_map=block_map, virtualization_type='hvm')
                    ret = conn.register_image(name=namei,description=descr,architecture='x86_64',\
			root_device_name='/dev/sda1', block_device_map=block_map, virtualization_type='hvm')
		else:
                    ret = conn.register_image(name=namei,description=descr,architecture='x86_64',kernel_id='aki-71665e05',\
			root_device_name='/dev/sda1', block_device_map=block_map, virtualization_type='paravirtual')

	    else:
		if vtype == "hvm":    
                    ret = conn.register_image(name=namei,description=descr,architecture='x86_64',kernel_id='aki-b6aa75df',\
			root_device_name='/dev/sda1', block_device_map=block_map, virtualization_type='hvm')
		else:
                    ret = conn.register_image(name=namei,description=descr,architecture='x86_64',kernel_id='aki-b6aa75df',\
			root_device_name='/dev/sda1', block_device_map=block_map, virtualization_type='paravirtual')

	    print "Image creation successful"
	except EC2ResponseError:
	    print "Image creation error"
Пример #30
0
def create_image(name=IMAGE_NAME, description=IMAGE_DESCRIPTION):
    """
    Create an EBS AMI from the build volume.
    
    :type name: string
    :param name: The name of the AMI to use.
    
    :type description: string
    :param description: The description of the AMI.
    
    :rtype: class:`boto.ec2.Image` or ``None``
    :return: The image produced.
    """
    instance, volume, device_name = get_volume()
    snapshot = create_snapshot(IMAGE_NAME)
    image = None
    if snapshot is None:
        print red('Cannot create image with no snapshot')
    else:
        # Create block device mapping
        ebs = EBSBlockDeviceType(snapshot_id=snapshot.id,
                                 delete_on_termination=True)
        ephemeral0 = BlockDeviceType(ephemeral_name='ephemeral0')
        swap = BlockDeviceType(ephemeral_name='ephemeral1')
        block_map = BlockDeviceMapping()
        block_map['/dev/sda1'] = ebs
        block_map['/dev/sda2'] = ephemeral0
        block_map['/dev/sda3'] = swap

        image_id = instance.connection.register_image(
            name,
            description,
            architecture=instance.architecture,
            kernel_id=get_kernel(),
            root_device_name='/dev/sda1',
            block_device_map=block_map)

        print green('Image id is %s' % image_id)
        time.sleep(5)
        image = instance.connection.get_all_images((image_id, ))[0]
        add_name(image, name)
    return image
Пример #31
0
def build_block_device_map(source_image, target_snapshot_id,
                           source_volume_size):
    """Create a block device map which is used for the copied AMI.
    The created block device map contains a root volumes with 8GB of storage
    on general purpose SSD (gp2).
    """

    root_device_name = source_image.root_device_name

    del_root_volume = source_image.block_device_mapping[
        root_device_name].delete_on_termination

    block_device_map = BlockDeviceMapping()
    block_device_map[root_device_name] = EBSBlockDeviceType(
        snapshot_id=target_snapshot_id,
        size=source_volume_size,
        volume_type='gp2',
        delete_on_termination=del_root_volume)

    return block_device_map
Пример #32
0
    def register_ebs_ami(self, snapshot_id, arch="x86_64", default_ephem_map=True, img_name=None, img_desc=None):
        # register against snapshot
        try:
            aki = PVGRUB_AKIS[self.region.name][arch]
        except KeyError:
            raise Exception("Unable to find pvgrub hd00 AKI for %s, arch (%s)" % (self.region.name, arch))
        if not img_name:
            rand_id = random.randrange(2 ** 32)
            # These names need to be unique, hence the pseudo-uuid
            img_name = "EBSHelper AMI - %s - uuid-%x" % (snapshot_id, rand_id)
        if not img_desc:
            img_desc = "Created directly from volume snapshot %s" % snapshot_id

        self.log.debug("Registering %s as new EBS AMI" % snapshot_id)
        self.create_sgroup("ec2helper-vnc-ssh-%x" % random.randrange(2 ** 32), allow_vnc=True)
        ebs = EBSBlockDeviceType()
        ebs.snapshot_id = snapshot_id
        ebs.delete_on_termination = True
        block_map = BlockDeviceMapping()
        block_map["/dev/sda"] = ebs
        # The ephemeral mappings are automatic with S3 images
        # For EBS images we need to make them explicit
        # These settings are required to make the same fstab work on both S3
        # and EBS images
        if default_ephem_map:
            e0 = EBSBlockDeviceType()
            e0.ephemeral_name = "ephemeral0"
            e1 = EBSBlockDeviceType()
            e1.ephemeral_name = "ephemeral1"
            block_map["/dev/sdb"] = e0
            block_map["/dev/sdc"] = e1
        result = self.conn.register_image(
            name=img_name,
            description=img_desc,
            architecture=arch,
            kernel_id=aki,
            root_device_name="/dev/sda",
            block_device_map=block_map,
        )
        sleep(10)
        new_amis = self.conn.get_all_images([result])
        new_amis[0].add_tag("Name", resource_tag)

        return str(result)
Пример #33
0
def build(hosts, cred, dry, inventory='hosts'):
    hret = {}
    old_state = {}
    con = None
    for h in hosts:
        logger.info("    Run action on host [%s]" % (h))
        hret[h] = {}
        hv = {}
        hv = vmbuilder.utils.load_host_vars(h, inventory=inventory)
        hvars = hv['VM_PROVIDER']
        if con is None:
            con = _connect(hvars['region'], cred)
        reservations = con.get_all_reservations(filters={"tag:Name": h})
        old_state[h] = "absent"
        for reservation in reservations:
            instance = reservation.instances[0]
            if instance.state != 'terminated':
                hret[h]['instance'] = instance
                old_state[h] = "present"
                logger.info("      Server [%s] is already present" % (h))

        if old_state[h] == 'present':
            continue

        bdm = None
        if 'disk_size' in hvars:
            try:
                dev_sda1 = EBSBlockDeviceType()
                dev_sda1.size = hvars['disk_size']
                dev_sda1.delete_on_termination = True
                bdm = BlockDeviceMapping()
                bdm['/dev/sda1'] = dev_sda1
            except Exception as e:
                logger.error("Error building block device for server: %s" % (e))
                exit(1)

        try:
            reservation = con.run_instances(
                hvars['ami'],
                key_name=hvars['key'],
                instance_type=hvars['vmtype'],
                security_group_ids=[hvars['security']],
                subnet_id=hvars['subnet'],
                block_device_map=bdm,
                dry_run=dry
            )
            hret[h]['instance'] = reservation.instances[0]
        except Exception as e:
            logger.error("Error building server: %s" % (e))
            exit(1)

    for h in hosts:
        hv = vmbuilder.utils.load_host_vars(h, inventory=inventory)
        hvars = hv['VM_PROVIDER']
        instance = hret[h]['instance']
        status = instance.update()
        if old_state[h] == 'absent':
            logger.info("        Waiting for [%s] to be launched..." % (h))
            while status == 'pending':
                time.sleep(5)
                status = instance.update()

        if old_state[h] == 'present':
            logger.info("        State is running with IP [%s]" % (instance.private_ip_address))
        elif status == 'running':
            logger.info("        State changed to running with IP [%s]" % (instance.private_ip_address))
        else:
            logger.error("        Status of [%s] is [%s]" % (h, status))

        instance.add_tag("Name", "%s" % (h))
        for cur_tag in hvars['tags']:
            instance.add_tag(cur_tag, hvars['tags'][cur_tag])

        if 'extra_disks' in hvars and old_state[h] == 'absent':
            try:
                for cur_disk in hvars['extra_disks']:
                    cur_vol = con.create_volume(cur_disk['size'], instance.placement)
                    status = cur_vol.status
                    while status != 'available':
                        logger.info("          Waiting for volume [%s] to be launched..." % (cur_vol))
                        time.sleep(10)
                        status = cur_vol.update()
                    con.attach_volume(cur_vol.id, instance.id, '/dev/' + cur_disk['device'])
            except Exception as e:
                logger.error("Error Attaching new disks: %s" % (e))
                exit(1)

        instance_volumes = con.get_all_volumes(filters={'attachment.instance-id': instance.id})
        for counter, cur_vol in enumerate(instance_volumes):
            cur_vol.add_tag("Name", "%s_disk%d" % (h.split('.')[0], counter))

        hret[h]['private_ip_address'] = instance.private_ip_address
        # If requested assosiate an new elastic IP for the host and create a security group to whitelist external IPs
        if 'assosiate_eip' in hvars and hvars['assosiate_eip'] is True:
            if instance.ip_address is None:
                eip = con.allocate_address()
                con.associate_address(instance.id, eip.public_ip)
                logger.info("          Adding public IP [%s]" % (eip.public_ip))
                hret[h]['public_ip_address'] = eip.public_ip
            if 'whitelisted_ips' in hvars:
                logger.info("          Whitelisting IPs [%s]" % (hvars['whitelisted_ips']))
                ips = hvars['whitelisted_ips'].split(',')
                project = hvars['tags']['Project']
                security = hvars['security']
                _create_security_group(con, instance, project, ips, security)
    return hret
Пример #34
0
def launch_cluster(conn, opts, cluster_name):
    template_vars = {
        'cluster_name':cluster_name,
        'master_security_group': cluster_name + "-master",
        'slave_security_group': cluster_name + "-slaves",
        'discovery_security_group': cluster_name + "-discovery"
    }

    if opts.copy_aws_credentials:
        if opts.deploy_aws_key_id:
            template_vars['aws_key']=opts.deploy_aws_key_id
        else:
            template_vars['aws_key']=opts.aws_access_key_id

        if opts.deploy_aws_key_secret:
            template_vars['aws_secret']=opts.deploy_aws_key_secret
        else:
            template_vars['aws_secret']=opts.aws_secret_access_key

    if opts.identity_file is None:
        print("ERROR: Must provide an identity file (-i) for ssh connections.", file=stderr)
        sys.exit(1)

    if opts.key_pair is None:
        print("ERROR: Must provide a key pair name (-k) to use on instances.", file=stderr)
        sys.exit(1)

    print("Setting up security groups...")
    master_group = get_or_make_group(conn, template_vars['master_security_group'], opts.vpc_id)
    slave_group = get_or_make_group(conn, template_vars['slave_security_group'], opts.vpc_id)
    discovery_group = get_or_make_group(conn, template_vars['discovery_security_group'], opts.vpc_id)
    authorized_address = opts.authorized_address

    if master_group.rules == []:  # Group was just now created
        if opts.vpc_id is None:
            master_group.authorize(src_group=master_group)
            master_group.authorize(src_group=slave_group)
            master_group.authorize(src_group=discovery_group)
        else:
            master_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
                                   src_group=discovery_group)
            master_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
                                   src_group=discovery_group)
            master_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
                                   src_group=discovery_group)
        master_group.authorize('tcp', 22, 22, authorized_address)

    if slave_group.rules == []:  # Group was just now created
        if opts.vpc_id is None:
            slave_group.authorize(src_group=master_group)
            slave_group.authorize(src_group=slave_group)
            slave_group.authorize(src_group=discovery_group)
        else:
            slave_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
                                  src_group=discovery_group)
            slave_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
                                  src_group=discovery_group)
            slave_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
                                  src_group=discovery_group)
        slave_group.authorize('tcp', 22, 22, authorized_address)

    if discovery_group.rules == []:  # Group was just now created
        if opts.vpc_id is None:
            discovery_group.authorize(src_group=master_group)
            discovery_group.authorize(src_group=slave_group)
            discovery_group.authorize(src_group=discovery_group)
        else:
            discovery_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
                                  src_group=discovery_group)
            discovery_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
                                  src_group=discovery_group)
            discovery_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
                                  src_group=discovery_group)

    # Check if instances are already running in our groups
    existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                             die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print("ERROR: There are already instances running in group %s or %s" %
              (master_group.name, slave_group.name), file=stderr)
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_ami(opts)

    # we use group ids to work around https://github.com/boto/boto/issues/350
    additional_group_ids = []
    if opts.additional_security_group:
        all_groups = conn.get_all_security_groups()
        additional_group_ids = []
        for group in opts.additional_security_group.split(','):
            additional_group_ids += [sg.id for sg in all_groups if group in (sg.name, sg.id)]

    template_vars['security_groups']= template_vars['discovery_security_group']

    print("Launching instances...")

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print("Could not find AMI " + opts.ami, file=stderr)
        sys.exit(1)

    # Create block device mapping so that we can add EBS volumes if asked to.
    # The first drive is attached as /dev/sds, 2nd as /dev/sdt, ... /dev/sdz
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        ebs_devices=[]
        for i in range(opts.ebs_vol_num):
            device = EBSBlockDeviceType()
            device_id = "/dev/sd" + chr(ord('s') + i)
            device.size = opts.ebs_vol_size
            device.volume_type = opts.ebs_vol_type
            device.delete_on_termination = True
            block_map[device_id] = device
            ebs_devices+=device_id
        template_vars['ebs_devices']=' '.join(ebs_devices)

    # AWS ignores the AMI-specified block device mapping for M3 (see SPARK-3342).
    if opts.instance_type.startswith('m3.'):
        local_devices=[]
        for i in range(get_num_disks(opts.instance_type)):
            dev = BlockDeviceType()
            dev.ephemeral_name = 'ephemeral%d' % i
            # The first ephemeral drive is /dev/sdb.
            name = '/dev/sd' + string.ascii_letters[i + 1]
            block_map[name] = dev
            local_devices+=name
        template_vars['local_devices']=' '.join(local_devices)

    master_user_data_content = get_user_data(opts.master_user_data,template_vars)
    slave_user_data_content = get_user_data(opts.slave_user_data,template_vars)

    # Launch slaves
    if opts.spot_price is not None:
        # Launch spot instances with the requested price
        print("Requesting %d slaves as spot instances with price $%.3f" %
              (opts.slaves, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_group_ids=[slave_group.id,discovery_group.id] + additional_group_ids,
                instance_type=opts.instance_type,
                block_device_map=block_map,
                subnet_id=opts.subnet_id,
                placement_group=opts.placement_group,
                user_data=slave_user_data_content,
                instance_profile_name=opts.instance_profile_name)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print("Waiting for spot instances to be granted...")
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                for i in my_req_ids:
                    if i in id_to_req and id_to_req[i].state == "active":
                        active_instance_ids.append(id_to_req[i].instance_id)
                if len(active_instance_ids) == opts.slaves:
                    print("All %d slaves granted" % opts.slaves)
                    reservations = conn.get_all_reservations(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print("%d of %d slaves granted, waiting longer" % (
                        len(active_instance_ids), opts.slaves))
        except:
            print("Canceling spot instance requests")
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes, slave_nodes) = get_existing_cluster(
                conn, opts, cluster_name, die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print(("WARNING: %d instances are still running" % running), file=stderr)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(
                    key_name=opts.key_pair,
                    security_group_ids=[slave_group.id,discovery_group.id] + additional_group_ids,
                    instance_type=opts.instance_type,
                    placement=zone,
                    min_count=num_slaves_this_zone,
                    max_count=num_slaves_this_zone,
                    block_device_map=block_map,
                    subnet_id=opts.subnet_id,
                    placement_group=opts.placement_group,
                    user_data=slave_user_data_content,
                    instance_initiated_shutdown_behavior=opts.instance_initiated_shutdown_behavior,
                    instance_profile_name=opts.instance_profile_name)
                slave_nodes += slave_res.instances
                print("Launched {s} slave{plural_s} in {z}, regid = {r}".format(
                      s=num_slaves_this_zone,
                      plural_s=('' if num_slaves_this_zone == 1 else 's'),
                      z=zone,
                      r=slave_res.id))
            i += 1

    # Launch or resume masters
    if existing_masters:
        print("Starting master...")
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name

        if opts.spot_price is not None:
            # Launch spot instance with the requested price
            print("Requesting master as spot instance with price $%.3f" % (opts.spot_price))
            master_reqs = conn.request_spot_instances(
                    price=opts.spot_price,
                    image_id=opts.ami,
                    key_name=opts.key_pair,
                    launch_group="master-group-%s" % cluster_name,
                    security_group_ids=[master_group.id,discovery_group.id] + additional_group_ids,
                    instance_type=master_type,
                    placement=opts.zone,
                    count=1,
                    block_device_map=block_map,
                    subnet_id=opts.subnet_id,
                    placement_group=opts.placement_group,
                    user_data=master_user_data_content,
                    instance_profile_name=opts.instance_profile_name)
            master_req_id = master_reqs[0].id

            print("Waiting for spot instances to be granted...")
            try:
                while True:
                    time.sleep(10)
                    reqs = conn.get_all_spot_instance_requests()
                    id_to_req = {}
                    for r in reqs:
                        id_to_req[r.id] = r
                    master_instance_ids = []
                    if master_req_id in id_to_req and id_to_req[master_req_id].state == "active":
                        master_instance_ids.append(id_to_req[master_req_id].instance_id)
                        print("Master granted")
                        reservations = conn.get_all_reservations(master_instance_ids)
                        master_nodes = []
                        for r in reservations:
                            master_nodes += r.instances
                        break
                    else:
                        print("Master not granted yet, waiting longer")
            except:
                print("Canceling spot instance request for master")
                conn.cancel_spot_instance_requests([master_req_id])
                sys.exit(0)
        else:
            master_res = image.run(
                key_name=opts.key_pair,
                security_group_ids=[master_group.id,discovery_group.id] + additional_group_ids,
                instance_type=master_type,
                placement=opts.zone,
                min_count=1,
                max_count=1,
                block_device_map=block_map,
                subnet_id=opts.subnet_id,
                placement_group=opts.placement_group,
                user_data=master_user_data_content,
                instance_initiated_shutdown_behavior=opts.instance_initiated_shutdown_behavior,
                instance_profile_name=opts.instance_profile_name)

            master_nodes = master_res.instances
            print("Launched master in %s, regid = %s" % (zone, master_res.id))

    # This wait time corresponds to SPARK-4983
    print("Waiting for AWS to propagate instance metadata...")
    time.sleep(15)

    # Give the instances descriptive names and set additional tags
    additional_tags = {}
    if opts.additional_tags.strip():
        additional_tags = dict(
            map(str.strip, tag.split(':', 1)) for tag in opts.additional_tags.split(',')
        )

    for master in master_nodes:
        master.add_tags(
            dict(additional_tags, Name='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id))
        )

    for slave in slave_nodes:
        slave.add_tags(
            dict(additional_tags, Name='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))
        )

    # Return all the instances
    return (master_nodes, slave_nodes)
Пример #35
0
def launch_cluster(conn, opts, cluster_name):

  conn = AWSConnection(conn, VPCConnection(region=conn.region))

  print "Setting up VPC..."
  vpc = get_or_make_vpc(conn, cluster_name, 'mesos-vpc')
  print "Using vpc: %s" % (vpc.id)


  print "Setting up subnet..."
  subnet = get_or_make_subnet(conn, vpc.id, opts.zone, cluster_name, 'mesos-subnet')
  print "Using subnet: %s" % (subnet.id)

  # Add internet gateway to VPC.
  print "Creating internet gateway"
  ig = get_or_make_ig(conn, vpc.id, cluster_name, 'mesos-vpc')
  print "Using internet gateway: %s" % (ig.id)
  
  # Add route to route table
  rt = get_or_make_rt(conn, vpc.id, cluster_name, 'mesos-rt')
  conn.vpc.create_route(rt.id, '0.0.0.0/0', gateway_id=ig.id)

  print "Setting up security groups..."
  master_group = get_or_make_group(conn, cluster_name, vpc.id, "mesos-masters")
  slave_group = get_or_make_group(conn, cluster_name, vpc.id, "mesos-slaves")
  zoo_group = get_or_make_group(conn, cluster_name, vpc.id, "mesos-zoo")

  if master_group.rules == []: # Group was just now created
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
    master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
    master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
    master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
    slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
    slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
    slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
  if zoo_group.rules == []: # Group was just now created
    zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
    zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

  # Check if instances are already running in our groups
  print "Checking for running cluster..."
  reservations = conn.ec2.get_all_instances()
  for res in reservations:
    group_names = [g.name for g in res.groups]
    if master_group.name in group_names or slave_group.name in group_names or zoo_group.name in group_names:
      active = [i for i in res.instances if is_active(i)]
      if len(active) > 0:
        print >> stderr, ("ERROR: There are already instances running in " +
            "group %s, %s or %s" % (master_group.name, slave_group.name, zoo_group.name))
        sys.exit(1)
  
  print "Launching instances..."
  if opts.ami == "latest":
    # Figure out the latest AMI from our static URL
    try:
      opts.ami = urllib2.urlopen(LATEST_AMI_URL).read().strip()
    except:
      print >> stderr, "Could not read " + LATEST_AMI_URL

  try:
    image = conn.ec2.get_all_images(image_ids=[opts.ami])[0]
  except:
    print >> stderr, "Could not find AMI " + opts.ami
    sys.exit(1)

  # Create block device mapping so that we can add an EBS volume if asked to
  block_map = BlockDeviceMapping()
  if opts.ebs_vol_size > 0:
    device = EBSBlockDeviceType()
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    block_map["/dev/sdv"] = device

  # Launch slaves
  if opts.spot_price != None:
    # Launch spot instances with the requested price
    print ("Requesting %d slaves as spot instances with price $%.3f" %
           (opts.slaves, opts.spot_price))
    slave_reqs = conn.ec2.request_spot_instances(
        price = opts.spot_price,
        image_id = opts.ami,
        launch_group = "launch-group-%s" % cluster_name,
        placement = opts.zone,
        count = opts.slaves,
        key_name = opts.key_pair,
        security_groups = [slave_group],
        instance_type = opts.instance_type,
        block_device_map = block_map)
    my_req_ids = [req.id for req in slave_reqs]
    print "Waiting for spot instances to be granted..."
    while True:
      time.sleep(10)
      reqs = conn.get_all_spot_instance_requests()
      id_to_req = {}
      for r in reqs:
        id_to_req[r.id] = r
      active = 0
      instance_ids = []
      for i in my_req_ids:
        if id_to_req[i].state == "active":
          active += 1
          instance_ids.append(id_to_req[i].instance_id)
      if active == opts.slaves:
        print "All %d slaves granted" % opts.slaves
        reservations = conn.get_all_instances(instance_ids)
        slave_nodes = []
        for r in reservations:
          slave_nodes += r.instances
        break
      else:
        print "%d of %d slaves granted, waiting longer" % (active, opts.slaves)
  else:
    # Launch non-spot instances
    slave_res = conn.ec2.run_instances(opts.ami,
                          key_name = opts.key_pair,
                          subnet_id = subnet.id,
                          security_group_ids = [slave_group.id],
                          instance_type = opts.instance_type,
                          placement = opts.zone,
                          min_count = opts.slaves,
                          max_count = opts.slaves,
                          block_device_map = block_map)
    slave_nodes = slave_res.instances
    print "Launched slaves, regid = " + slave_res.id

  # Launch masters
  master_type = opts.master_instance_type
  if master_type == "":
    master_type = opts.instance_type
  interface = boto.ec2.networkinterface.NetworkInterfaceSpecification(subnet_id=subnet.id,
                                                                    groups=[master_group.id],
                                                                    associate_public_ip_address=True)
  interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(interface)
  master_res = conn.ec2.run_instances(opts.ami,
                          key_name = opts.key_pair,
                         instance_type = master_type,
                         placement = opts.zone,
                         network_interfaces = interfaces,
                         min_count = opts.ft,
                         max_count = opts.ft,
                         block_device_map = block_map)
  master_nodes = master_res.instances
  print "Launched master, regid = " + master_res.id

  # Launch ZooKeeper nodes if required
  if opts.ft > 1:
    zoo_res = conn.ec2.run_instances(opts.ami,
                        key_name = opts.key_pair,
                        subnet_id = subnet.id,
                        security_group_ids = [zoo_group.id],
                        instance_type = opts.instance_type,
                        placement = opts.zone,
                        min_count = 3,
                        max_count = 3,
                        block_device_map = block_map)
    zoo_nodes = zoo_res.instances
    print "Launched zoo, regid = " + zoo_res.id
  else:
    zoo_nodes = []

  # Return all the instances
  return (master_nodes, slave_nodes, zoo_nodes)
Пример #36
0
def make_ebs_based_image(ami_name, docker_image_name, fstype='ext3',
        mount_point=None, desc='', arch='x86_64', kernel=None, disk_size=10240):
    '''size of ebs is passed in Mb'''

    availability_zone = boto.utils.get_instance_metadata()[
        'placement']['availability-zone']
    instance_id = boto.utils.get_instance_metadata()['instance_id']

    # needs a ec2 connection here
    conn = boto.ec2.connection.EC2Connection()
    vol = conn.create_volume(disk_size, availability_zone)

    devpath = random.choice(
        [devp for devp in map(lambda x: '/dev/sd%s' % x, string.ascii_lowercase)
            if not os.path.exists(devp)]
        )

    vol.attach(instance_id, devpath)
    run('/sbin/mkfs -t %s %s' % (fstype, devpath))
    if mount_point is None:
        mount_point = tempfile.mkdtemp('ebs-based-mount-point')

    # copy files
    cid = dockerc.create_container(
        image=docker_image_name,
        command='/bin/bash',
        tty=True,
        volume=['dev']
        )

    export_fileobj = dockerc.export(cid)
    run('/bin/mount -t %s %s %s' % (fstype, devpath, mount_point))
    try:
        with tempfile.TemporaryFile() as fp:
            data = export_fileobj.read(2048)
            while data:
                fp.write(data)
                data = export_fileobj.read(2048)
            else:
                # if it goes well, seek to 0
                fp.seek(0)

        tar = tarfile.open(fileobj=fp)
        os.chdir(mount_point)
        tar.extractall()
        tar.close()
        os.chdir(os.pardir)
    finally:
        run('/bin/umount %s' % mount_point)

    vol.detach()
    snapshot = vol.create_snapshot('initial snapshot for ebs')
    ebs = EBSBlockDeviceType()
    ebs.snapshot_id = snapshot.id
    block_map = BlockDeviceMapping()
    block_map['/dev/sda1'] = ebs
    ami = conn.register_image(
        ami_name,
        description=desc,
        architecture=arch,
        kernel_id=kernel,
        root_device_name='dev/sda1',
        block_device_map=block_map
        )

    log.info('ebs-based ami: %s' % ami.id)
Пример #37
0
def node_install(cn=def_cn,inst_type_idx=def_inst_type,idn=0,
        avz=def_default_avz,rt=def_default_requesttype,
        group_name='oggmssh',
        ssh_port=22,
        cidr='0.0.0.0/0'):
    """
    Request and prepare single instance
    """
    # FSO---connect
    cloud = boto.ec2.connect_to_region(avz[:-1],profile_name=ec2Profile)
    aminfo = cloud.get_image(def_ami[avz[:-1]])

    # FSO---check if node with same name already exists
    if node_exists(cn + '_node' + str(idn)):
        print("Node already exists")
        sys.exit()

    # Check if ssh keypair exists
    key_name = get_keypair_name()
    check_keypair(cloud, key_name)

    # FSO---create a bigger root device
    dev_sda1 = EBSBlockDeviceType()
    dev_sda1.size = rootfs_size_gb
    dev_sda1.delete_on_termination = True
    bdm = BlockDeviceMapping()
    bdm['/dev/sda1'] = dev_sda1

    dev_sdf_vol = get_user_persist_ebs(cloud, avz)

    # Check to see if specified security group already exists.
    # If we get an InvalidGroup.NotFound error back from EC2,
    # it means that it doesn't exist and we need to create it.
    try:
        group = cloud.get_all_security_groups(groupnames=[group_name])[0]
    except cloud.ResponseError as e:
        if e.code == 'InvalidGroup.NotFound':
            print('Creating Security Group: %s' % group_name)
            # Create a security group to control access to instance via SSH.
            group = cloud.create_security_group(group_name, 'A group that allows SSH access')
        else:
            raise

    # Add a rule to the security group to authorize SSH traffic
    # on the specified port.
    try:
        group.authorize('tcp', ssh_port, ssh_port, cidr)
    except cloud.ResponseError as e:
        if e.code == 'InvalidPermission.Duplicate':
            print('Security Group: %s already authorized' % group_name)
        else:
            raise

    log_with_ts("request node "+str(idn))
    print('Reserving instance for node', aminfo.id, instance_infos[inst_type_idx]['type'], aminfo.name, aminfo.region)

    if rt == 'spot':
        print("placing node in ",avz)
        requests = cloud.request_spot_instances(def_price,
                      def_ami[avz[:-1]],
                      count=1,
                      type='one-time',
                      security_groups=[group_name],
                      key_name=key_name,
                      placement=avz,
                      instance_type=instance_infos[inst_type_idx]['type'],
                      block_device_map=bdm)
        req_ids = [request.id for request in requests]
        instance_ids = wait_for_fulfillment(cloud,req_ids)
        instances = cloud.get_only_instances(instance_ids=instance_ids)
        node = instances[0]
        log_with_ts("fullfilled spot node "+str(idn))
    else:
        print("placing node in ",avz)
        reservation = cloud.run_instances(image_id=def_ami[avz[:-1]],
                key_name=key_name,
                placement = avz,
                security_groups=[group_name],
                instance_type=instance_infos[inst_type_idx]['type'],
                block_device_map= bdm)
        node = reservation.instances[0]
        log_with_ts("fullfilled ondemand node "+str(idn))

    time.sleep(2)
    while not node.update() == 'running':
        print('waiting for', cn, 'node', idn, 'to boot...')
        time.sleep(5)

    log_with_ts("booted node "+str(idn))

    if dev_sdf_vol is not None:
        cloud.attach_volume(dev_sdf_vol.id, node.id, "/dev/sdf")

    node.add_tag('Name', cn+'_node'+str(idn))
    node.add_tag('type', cn+'node')
    node.add_tag('node-owner', user_identifier)

    # FSO---set delete on termination flag to true for ebs block device
    node.modify_attribute('blockDeviceMapping', { '/dev/sda1' : True })

    # FSO--- test socket connect to ssh service
    ssh_test(node)
    log_with_ts("reachable node "+str(idn))

    update_key_filename(node.region.name)

    # Mount potential user volume
    if dev_sdf_vol is not None:
        use_user_volume(node.dns_name)

    log_with_ts("finished node "+str(idn))
Пример #38
0
def node_install(cn=def_cn,
                 inst_type_idx=def_inst_type,
                 idn=0,
                 avz=def_default_avz,
                 rt=def_default_requesttype,
                 group_name='oggmssh',
                 ssh_port=22,
                 cidr='0.0.0.0/0'):
    """
    Request and prepare single instance
    """
    # FSO---connect
    cloud = boto.ec2.connect_to_region(avz[:-1], profile_name=ec2Profile)
    aminfo = cloud.get_image(def_ami[avz[:-1]])
    vpcconn = VPCConnection(region=cloud.region)

    try:
        vpc_id, subnet_id = def_subnet[avz]
        vpc = vpcconn.get_all_vpcs(vpc_ids=[vpc_id])[0]
    except:
        vpc_id = None
        subnet_id = None
        vpc = None

    # FSO---check if node with same name already exists
    if node_exists(cn + '_node' + str(idn)):
        print("Node already exists")
        sys.exit()

    # Check if ssh keypair exists
    key_name = get_keypair_name(avz[:-1])
    check_keypair(cloud, key_name)

    # FSO---create a bigger root device
    dev_sda1 = EBSBlockDeviceType()
    dev_sda1.size = rootfs_size_gb
    dev_sda1.delete_on_termination = True
    bdm = BlockDeviceMapping()
    bdm['/dev/sda1'] = dev_sda1

    dev_sdf_vol = get_user_persist_ebs(cloud, avz)

    # Check to see if specified security group already exists.
    # If we get an InvalidGroup.NotFound error back from EC2,
    # it means that it doesn't exist and we need to create it.
    try:
        group = cloud.get_all_security_groups(groupnames=[group_name])[0]
    except cloud.ResponseError as e:
        if e.code == 'InvalidGroup.NotFound':
            print('Creating Security Group: %s' % group_name)
            # Create a security group to control access to instance via SSH.
            group = cloud.create_security_group(
                group_name, 'A group that allows SSH access')
        else:
            raise

    # Authorize all Intra-VPC traffic
    if vpc is not None:
        try:
            group.authorize('-1', -1, -1, vpc.cidr_block)
        except cloud.ResponseError as e:
            if e.code != 'InvalidPermission.Duplicate':
                raise

    # Add a rule to the security group to authorize SSH traffic
    # on the specified port.
    try:
        group.authorize('tcp', ssh_port, ssh_port, cidr)
    except cloud.ResponseError as e:
        if e.code == 'InvalidPermission.Duplicate':
            print('Security Group: %s already authorized' % group_name)
        else:
            raise

    log_with_ts("request node " + str(idn))
    print('Reserving instance for node', aminfo.id,
          instance_infos[inst_type_idx]['type'], aminfo.name, aminfo.region)

    if rt == 'spot':
        print("placing node in ", avz)
        requests = cloud.request_spot_instances(
            def_price,
            def_ami[avz[:-1]],
            count=1,
            type='one-time',
            security_group_ids=[group.id],
            key_name=key_name,
            placement=avz,
            subnet_id=subnet_id,
            ebs_optimized=True,
            instance_type=instance_infos[inst_type_idx]['type'],
            block_device_map=bdm)
        req_ids = [request.id for request in requests]
        instance_ids = wait_for_fulfillment(cloud, req_ids)
        instances = cloud.get_only_instances(instance_ids=instance_ids)
        node = instances[0]
        log_with_ts("fullfilled spot node " + str(idn))
    else:
        print("placing node in ", avz)
        reservation = cloud.run_instances(
            image_id=def_ami[avz[:-1]],
            key_name=key_name,
            placement=avz,
            subnet_id=subnet_id,
            security_group_ids=[group.id],
            ebs_optimized=True,
            instance_type=instance_infos[inst_type_idx]['type'],
            block_device_map=bdm)
        node = reservation.instances[0]
        log_with_ts("fullfilled ondemand node " + str(idn))

    time.sleep(2)
    while not node.update() == 'running':
        print('waiting for', cn, 'node', idn, 'to boot...')
        time.sleep(5)

    log_with_ts("booted node " + str(idn))

    if dev_sdf_vol is not None:
        cloud.attach_volume(dev_sdf_vol.id, node.id, "/dev/sdf")

    node.add_tag('Name', cn + '_node' + str(idn))
    node.add_tag('type', cn + 'node')
    node.add_tag('node-owner', user_identifier)

    # FSO---set delete on termination flag to true for ebs block device
    node.modify_attribute('blockDeviceMapping', {'/dev/sda1': True})

    # FSO--- test socket connect to ssh service
    ssh_test(node)
    log_with_ts("reachable node " + str(idn))

    update_key_filename(node.region.name)

    # Mount potential user volume
    if dev_sdf_vol is not None:
        use_user_volume(node.dns_name)

    log_with_ts("finished node " + str(idn))
Пример #39
0
def launch_cluster(conn, opts, cluster_name):
  print "Setting up security groups..."
  
  master_group = get_or_make_group(conn, "shark-exp-master")
  slave_group = get_or_make_group(conn, "shark-exp-slaves")
  zoo_group = get_or_make_group(conn, "ampcamp-zoo")
  if master_group.rules == []: # Group was just now created
    master_group.authorize(src_group=master_group)
    master_group.authorize(src_group=slave_group)
    master_group.authorize(src_group=zoo_group)
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    if opts.cluster_type == "mesos":
      master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
      master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
      master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
      master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
      # hbase
      master_group.authorize('tcp', 60010, 60010, '0.0.0.0/0')
      master_group.authorize('tcp', 60050, 60050, '0.0.0.0/0')
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize(src_group=master_group)
    slave_group.authorize(src_group=slave_group)
    slave_group.authorize(src_group=zoo_group)
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    if opts.cluster_type == "mesos":
      slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
      slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
      slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
      slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
      # hbase
      slave_group.authorize('tcp', 60050, 60050, '0.0.0.0/0')
  if zoo_group.rules == []: # Group was just now created
    zoo_group.authorize(src_group=master_group)
    zoo_group.authorize(src_group=slave_group)
    zoo_group.authorize(src_group=zoo_group)
    zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
    zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

  # Check if instances are already running in our groups
  print "Checking for running cluster..."
  reservations = conn.get_all_instances()
  for res in reservations:
    for instance in res.instances:
      if 'tags' in instance.__dict__ and 'cluster' in instance.tags:
        if instance.tags['cluster'] == cluster_name and is_active(instance):
          print >> stderr, ("ERROR: Instances %s is already running in cluster %s"
                            % (instance.id, cluster_name))
          sys.exit(1)

  if opts.ami in ["latest", "standalone"]:
    opts.ami = get_ami(opts.ami)

  print "Launching instances..."

  try:
    image = conn.get_all_images(image_ids=[opts.ami])[0]
  except:
    print >> stderr, "Could not find AMI " + opts.ami
    sys.exit(1)

  # Create block device mapping so that we can add an EBS volume if asked to
  block_map = BlockDeviceMapping()
  if opts.ebs_vol_size > 0:
    device = EBSBlockDeviceType()
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    block_map["/dev/sdv"] = device

  # Launch slaves
  if opts.spot_price != None:
    # Launch spot instances with the requested price
    print ("Requesting %d slaves as spot instances with price $%.3f" %
           (opts.slaves, opts.spot_price))
    slave_reqs = conn.request_spot_instances(
        price = opts.spot_price,
        image_id = opts.ami,
        launch_group = "launch-group-%s" % cluster_name,
        placement = opts.zone,
        count = opts.slaves,
        key_name = opts.key_pair,
        security_groups = [slave_group],
        instance_type = opts.instance_type,
        block_device_map = block_map)
    my_req_ids = [req.id for req in slave_reqs]
    print "Waiting for spot instances to be granted..."
    while True:
      time.sleep(10)
      reqs = conn.get_all_spot_instance_requests()
      id_to_req = {}
      for r in reqs:
        id_to_req[r.id] = r
      active = 0
      instance_ids = []
      for i in my_req_ids:
        if id_to_req[i].state == "active":
          active += 1
          instance_ids.append(id_to_req[i].instance_id)
      if active == opts.slaves:
        print "All %d slaves granted" % opts.slaves
        reservations = conn.get_all_instances(instance_ids)
        slave_nodes = []
        for r in reservations:
          slave_nodes += r.instances
        break
      else:
        print "%d of %d slaves granted, waiting longer" % (active, opts.slaves)
  else:
    # Launch non-spot instances
    slave_res = image.run(key_name = opts.key_pair,
                          security_groups = [slave_group],
                          instance_type = opts.instance_type,
                          placement = opts.zone,
                          min_count = opts.slaves,
                          max_count = opts.slaves,
                          block_device_map = block_map)
    slave_nodes = slave_res.instances
    print "Launched slaves, regid = " + slave_res.id

  # Launch masters
  master_type = opts.master_instance_type
  if master_type == "":
    master_type = opts.instance_type
  master_res = image.run(key_name = opts.key_pair,
                         security_groups = [master_group],
                         instance_type = master_type,
                         placement = opts.zone,
                         min_count = 1,
                         max_count = 1,
                         block_device_map = block_map)
  master_nodes = master_res.instances
  print "Launched master, regid = " + master_res.id

  # Create the right tags
  tags = {}
  tags['cluster'] = cluster_name

  tags['type'] = 'slave'
  for node in slave_nodes:
    conn.create_tags([node.id], tags)
  
  tags['type'] = 'master'
  for node in master_nodes:
    conn.create_tags([node.id], tags)

  zoo_nodes = []

  # Return all the instances
  return (master_nodes, slave_nodes, zoo_nodes)
Пример #40
0
def launch_cluster(conn, opts, cluster_name):

    #Remove known hosts to avoid "Offending key for IP ..." errors.
    known_hosts = os.environ['HOME'] + "/.ssh/known_hosts"
    if os.path.isfile(known_hosts):
        os.remove(known_hosts)
    if opts.key_pair is None:
        opts.key_pair = keypair()
        if opts.key_pair is None:
            print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
            sys.exit(1)

    if opts.profile is None:
        opts.profile = profile()
        if opts.profile is None:
            print >> stderr, "ERROR: No profile found in current host. It be provided with -p option."
            sys.exit(1)

    public_key = pub_key()
    user_data = Template("""#!/bin/bash
  set -e -x
  echo '$public_key' >> ~root/.ssh/authorized_keys
  echo '$public_key' >> ~ec2-user/.ssh/authorized_keys""").substitute(
        public_key=public_key)

    print "Setting up security groups..."
    master_group = get_or_make_group(conn, cluster_name + "-master")
    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    sparknotebook_group = get_or_make_group(conn, "SparkNotebookApplication")
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize(src_group=sparknotebook_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        master_group.authorize('tcp', 18080, 18080, '0.0.0.0/0')
        master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
        master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
        master_group.authorize('tcp', 7077, 7077, '0.0.0.0/0')
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize(src_group=sparknotebook_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

    if not any(r for r in sparknotebook_group.rules
               for g in r.grants if master_group.id == g.group_id):
        sparknotebook_group.authorize(ip_protocol="tcp",
                                      from_port="1",
                                      to_port="65535",
                                      src_group=master_group)
        sparknotebook_group.authorize(ip_protocol="icmp",
                                      from_port="-1",
                                      to_port="-1",
                                      src_group=master_group)

    if not any(r for r in sparknotebook_group.rules
               for g in r.grants if slave_group.id == g.group_id):
        sparknotebook_group.authorize(ip_protocol="tcp",
                                      from_port="1",
                                      to_port="65535",
                                      src_group=slave_group)
        sparknotebook_group.authorize(ip_protocol="icmp",
                                      from_port="-1",
                                      to_port="-1",
                                      src_group=slave_group)

    # Check if instances are already running in our groups
    existing_masters, existing_slaves = get_existing_cluster(
        conn, opts, cluster_name, die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, ("ERROR: There are already instances running in " +
                          "group %s or %s" %
                          (master_group.name, slave_group.name))
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_spark_ami(opts)
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        block_map["/dev/sdv"] = device

    # Launch slaves
    if opts.spot_price != None:
        zones = get_zones(conn, opts)

        num_zones = len(zones)
        i = 0
        my_req_ids = []

        for zone in zones:
            best_price = find_best_price(conn, opts.instance_type, zone,
                                         opts.spot_price)
            # Launch spot instances with the requested price
            print >> stderr, (
                "Requesting %d slaves as spot instances with price $%.3f/hour each (total $%.3f/hour)"
                % (opts.slaves, best_price, opts.slaves * best_price))

            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            interface = boto.ec2.networkinterface.NetworkInterfaceSpecification(
                subnet_id=subnetId(),
                groups=[slave_group.id],
                associate_public_ip_address=True)
            interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(
                interface)

            slave_reqs = conn.request_spot_instances(
                price=best_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                instance_type=opts.instance_type,
                block_device_map=block_map,
                user_data=user_data,
                instance_profile_arn=opts.profile,
                network_interfaces=interfaces)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print >> stderr, "Waiting for spot instances to be granted"
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                for i in my_req_ids:
                    if i in id_to_req and id_to_req[i].state == "active":
                        active_instance_ids.append(id_to_req[i].instance_id)
                if len(active_instance_ids) == opts.slaves:
                    print >> stderr, "All %d slaves granted" % opts.slaves
                    reservations = conn.get_all_instances(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    # print >> stderr, ".",
                    print "%d of %d slaves granted, waiting longer" % (
                        len(active_instance_ids), opts.slaves)
        except:
            print >> stderr, "Canceling spot instance requests"
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes,
             slave_nodes) = get_existing_cluster(conn,
                                                 opts,
                                                 cluster_name,
                                                 die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print >> stderr, ("WARNING: %d instances are still running" %
                                  running)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(key_name=opts.key_pair,
                                      security_group_ids=[slave_group.id],
                                      instance_type=opts.instance_type,
                                      subnet_id=subnetId(),
                                      placement=zone,
                                      min_count=num_slaves_this_zone,
                                      max_count=num_slaves_this_zone,
                                      block_device_map=block_map,
                                      user_data=user_data,
                                      instance_profile_arn=opts.profile)
                slave_nodes += slave_res.instances
                print >> stderr, "Launched %d slaves in %s, regid = %s" % (
                    num_slaves_this_zone, zone, slave_res.id)
            i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        if opts.spot_price != None:
            best_price = find_best_price(conn, master_type, opts.zone,
                                         opts.spot_price)
            # Launch spot instances with the requested price
            print >> stderr, (
                "Requesting master as spot instances with price $%.3f/hour" %
                (best_price))

            interface = boto.ec2.networkinterface.NetworkInterfaceSpecification(
                subnet_id=subnetId(),
                groups=[master_group.id],
                associate_public_ip_address=True)
            interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(
                interface)

            master_reqs = conn.request_spot_instances(
                price=best_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=opts.zone,
                count=1,
                key_name=opts.key_pair,
                instance_type=master_type,
                block_device_map=block_map,
                user_data=user_data,
                instance_profile_arn=opts.profile,
                network_interfaces=interfaces)
            my_req_ids = [r.id for r in master_reqs]
            print >> stderr, "Waiting for spot instance to be granted"
            try:
                while True:
                    time.sleep(10)
                    reqs = conn.get_all_spot_instance_requests(
                        request_ids=my_req_ids)
                    id_to_req = {}
                    for r in reqs:
                        id_to_req[r.id] = r
                    active_instance_ids = []
                    for i in my_req_ids:
                        if i in id_to_req and id_to_req[i].state == "active":
                            active_instance_ids.append(
                                id_to_req[i].instance_id)
                    if len(active_instance_ids) == 1:
                        print >> stderr, "Master granted"
                        reservations = conn.get_all_instances(
                            active_instance_ids)
                        master_nodes = []
                        for r in reservations:
                            master_nodes += r.instances
                        break
                    else:
                        # print >> stderr, ".",
                        print "%d of %d masters granted, waiting longer" % (
                            len(active_instance_ids), 1)
            except:
                print >> stderr, "Canceling spot instance requests"
                conn.cancel_spot_instance_requests(my_req_ids)
                # Log a warning if any of these requests actually launched instances:
                (master_nodes,
                 master_nodes) = get_existing_cluster(conn,
                                                      opts,
                                                      cluster_name,
                                                      die_on_error=False)
                running = len(master_nodes) + len(master_nodes)
                if running:
                    print >> stderr, (
                        "WARNING: %d instances are still running" % running)
                sys.exit(0)
        else:
            master_res = image.run(key_name=opts.key_pair,
                                   security_group_ids=[master_group.id],
                                   instance_type=master_type,
                                   subnet_id=subnetId(),
                                   placement=opts.zone,
                                   min_count=1,
                                   max_count=1,
                                   block_device_map=block_map,
                                   user_data=user_data,
                                   instance_profile_arn=opts.profile)
            master_nodes = master_res.instances
            print >> stderr, "Launched master in %s, regid = %s" % (
                zone, master_res.id)
    # Return all the instances
    return (master_nodes, slave_nodes)
Пример #41
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
        sys.exit(1)
    if opts.key_pair is None:
        print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
        sys.exit(1)

    user_data_content = None
    if opts.user_data:
        with open(opts.user_data) as user_data_file:
            user_data_content = user_data_file.read()

    print "Setting up security groups..."
    master_group = get_or_make_group(conn, cluster_name + "-master")
    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    authorized_address = opts.authorized_address
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize('tcp', 22, 22, authorized_address)
        master_group.authorize('tcp', 8080, 8081, authorized_address)
        master_group.authorize('tcp', 18080, 18080, authorized_address)
        master_group.authorize('tcp', 19999, 19999, authorized_address)
        master_group.authorize('tcp', 50030, 50030, authorized_address)
        master_group.authorize('tcp', 50070, 50070, authorized_address)
        master_group.authorize('tcp', 60070, 60070, authorized_address)
        master_group.authorize('tcp', 4040, 4045, authorized_address)
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, authorized_address)
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize('tcp', 22, 22, authorized_address)
        slave_group.authorize('tcp', 8080, 8081, authorized_address)
        slave_group.authorize('tcp', 50060, 50060, authorized_address)
        slave_group.authorize('tcp', 50075, 50075, authorized_address)
        slave_group.authorize('tcp', 60060, 60060, authorized_address)
        slave_group.authorize('tcp', 60075, 60075, authorized_address)

    # Check if instances are already running in our groups
    existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                             die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, ("ERROR: There are already instances running in " +
                          "group %s or %s" % (master_group.name, slave_group.name))
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_spark_ami(opts)

    additional_groups = []
    if opts.additional_security_group:
        additional_groups = [sg
                             for sg in conn.get_all_security_groups()
                             if opts.additional_security_group in (sg.name, sg.id)]
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add EBS volumes if asked to.
    # The first drive is attached as /dev/sds, 2nd as /dev/sdt, ... /dev/sdz
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        for i in range(opts.ebs_vol_num):
            device = EBSBlockDeviceType()
            device.size = opts.ebs_vol_size
            device.volume_type = opts.ebs_vol_type
            device.delete_on_termination = True
            block_map["/dev/sd" + chr(ord('s') + i)] = device

    # AWS ignores the AMI-specified block device mapping for M3 (see SPARK-3342).
    if opts.instance_type.startswith('m3.'):
        for i in range(get_num_disks(opts.instance_type)):
            dev = BlockDeviceType()
            dev.ephemeral_name = 'ephemeral%d' % i
            # The first ephemeral drive is /dev/sdb.
            name = '/dev/sd' + string.letters[i + 1]
            block_map[name] = dev

    # Launch slaves
    if opts.spot_price is not None:
        # Launch spot instances with the requested price
        print ("Requesting %d slaves as spot instances with price $%.3f" %
               (opts.slaves, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_groups=[slave_group] + additional_groups,
                instance_type=opts.instance_type,
                block_device_map=block_map,
                user_data=user_data_content)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print "Waiting for spot instances to be granted..."
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                for i in my_req_ids:
                    if i in id_to_req and id_to_req[i].state == "active":
                        active_instance_ids.append(id_to_req[i].instance_id)
                if len(active_instance_ids) == opts.slaves:
                    print "All %d slaves granted" % opts.slaves
                    reservations = conn.get_all_instances(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print "%d of %d slaves granted, waiting longer" % (
                        len(active_instance_ids), opts.slaves)
        except:
            print "Canceling spot instance requests"
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes, slave_nodes) = get_existing_cluster(
                conn, opts, cluster_name, die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print >> stderr, ("WARNING: %d instances are still running" % running)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(key_name=opts.key_pair,
                                      security_groups=[slave_group] + additional_groups,
                                      instance_type=opts.instance_type,
                                      placement=zone,
                                      min_count=num_slaves_this_zone,
                                      max_count=num_slaves_this_zone,
                                      block_device_map=block_map,
                                      user_data=user_data_content)
                slave_nodes += slave_res.instances
                print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                                zone, slave_res.id)
            i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(key_name=opts.key_pair,
                               security_groups=[master_group] + additional_groups,
                               instance_type=master_type,
                               placement=opts.zone,
                               min_count=1,
                               max_count=1,
                               block_device_map=block_map,
                               user_data=user_data_content)
        master_nodes = master_res.instances
        print "Launched master in %s, regid = %s" % (zone, master_res.id)

    # Give the instances descriptive names
    for master in master_nodes:
        master.add_tag(
            key='Name',
            value='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id))
    for slave in slave_nodes:
        slave.add_tag(
            key='Name',
            value='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))

    # Return all the instances
    return (master_nodes, slave_nodes)
Пример #42
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print("ERROR: Must provide an identity file (-i) for ssh connections.", file=stderr)
        sys.exit(1)

    if opts.key_pair is None:
        print("ERROR: Must provide a key pair name (-k) to use on instances.", file=stderr)
        sys.exit(1)

    user_data_content = None
    if opts.user_data:
        with open(opts.user_data) as user_data_file:
            user_data_content = user_data_file.read()

    print("Setting up security groups...")
    master_group = get_or_make_group(conn, cluster_name + "-master", opts.vpc_id)
    slave_group = get_or_make_group(conn, cluster_name + "-slaves", opts.vpc_id)
    authorized_address = opts.authorized_address
    if master_group.rules == []:  # Group was just now created
        if opts.vpc_id is None:
            master_group.authorize(src_group=master_group)
            master_group.authorize(src_group=slave_group)
        else:
            master_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
                                   src_group=master_group)
            master_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
                                   src_group=master_group)
            master_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
                                   src_group=master_group)
            master_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
                                   src_group=slave_group)
            master_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
                                   src_group=slave_group)
            master_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
                                   src_group=slave_group)
        master_group.authorize('tcp', 22, 22, authorized_address)
        master_group.authorize('tcp', 8080, 8081, authorized_address)
        master_group.authorize('tcp', 18080, 18080, authorized_address)
        master_group.authorize('tcp', 19999, 19999, authorized_address)
        master_group.authorize('tcp', 50030, 50030, authorized_address)
        master_group.authorize('tcp', 50070, 50070, authorized_address)
        master_group.authorize('tcp', 60070, 60070, authorized_address)
        master_group.authorize('tcp', 4040, 4045, authorized_address)
        # Rstudio (GUI for R) needs port 8787 for web access
        master_group.authorize('tcp', 8787, 8787, authorized_address)
        # HDFS NFS gateway requires 111,2049,4242 for tcp & udp
        master_group.authorize('tcp', 111, 111, authorized_address)
        master_group.authorize('udp', 111, 111, authorized_address)
        master_group.authorize('tcp', 2049, 2049, authorized_address)
        master_group.authorize('udp', 2049, 2049, authorized_address)
        master_group.authorize('tcp', 4242, 4242, authorized_address)
        master_group.authorize('udp', 4242, 4242, authorized_address)
        # RM in YARN mode uses 8088
        master_group.authorize('tcp', 8088, 8088, authorized_address)
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, authorized_address)
    if slave_group.rules == []:  # Group was just now created
        if opts.vpc_id is None:
            slave_group.authorize(src_group=master_group)
            slave_group.authorize(src_group=slave_group)
        else:
            slave_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
                                  src_group=master_group)
            slave_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
                                  src_group=master_group)
            slave_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
                                  src_group=master_group)
            slave_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
                                  src_group=slave_group)
            slave_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
                                  src_group=slave_group)
            slave_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
                                  src_group=slave_group)
        slave_group.authorize('tcp', 22, 22, authorized_address)
        slave_group.authorize('tcp', 8080, 8081, authorized_address)
        slave_group.authorize('tcp', 50060, 50060, authorized_address)
        slave_group.authorize('tcp', 50075, 50075, authorized_address)
        slave_group.authorize('tcp', 60060, 60060, authorized_address)
        slave_group.authorize('tcp', 60075, 60075, authorized_address)
#Kylix
        slave_group.authorize(ip_protocol='tcp', from_port=50050, to_port=50060,
                               src_group=slave_group)
        slave_group.authorize(ip_protocol='udp', from_port=50050, to_port=50060,
                               src_group=slave_group)
        slave_group.authorize(ip_protocol='tcp', from_port=50050, to_port=50060,
                               src_group=master_group)
        slave_group.authorize(ip_protocol='udp', from_port=50050, to_port=50060,
                               src_group=master_group)
        master_group.authorize(ip_protocol='tcp', from_port=50050, to_port=50060,
                               src_group=slave_group)
        master_group.authorize(ip_protocol='udp', from_port=50050, to_port=50060,
                               src_group=slave_group)


    # Check if instances are already running in our groups
    existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                             die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print("ERROR: There are already instances running in group %s or %s" %
              (master_group.name, slave_group.name), file=stderr)
        sys.exit(1)

    # we use group ids to work around https://github.com/boto/boto/issues/350
    additional_group_ids = []
    if opts.additional_security_group:
        additional_group_ids = [sg.id
                                for sg in conn.get_all_security_groups()
                                if opts.additional_security_group in (sg.name, sg.id)]
    print("Launching instances...")

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print("Could not find AMI " + opts.ami, file=stderr)
        sys.exit(1)

    # Create block device mapping so that we can add EBS volumes if asked to.
    # The first drive is attached as /dev/sds, 2nd as /dev/sdt, ... /dev/sdz
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        for i in range(opts.ebs_vol_num):
            device = EBSBlockDeviceType()
            device.size = opts.ebs_vol_size
            device.volume_type = opts.ebs_vol_type
            device.delete_on_termination = True
            block_map["/dev/sd" + chr(ord('s') + i)] = device

    # AWS ignores the AMI-specified block device mapping for M3 (see SPARK-3342).
    if opts.instance_type.startswith('m3.'):
        for i in range(get_num_disks(opts.instance_type)):
            dev = BlockDeviceType()
            dev.ephemeral_name = 'ephemeral%d' % i
            # The first ephemeral drive is /dev/sdb.
            name = '/dev/sd' + string.letters[i + 1]
            block_map[name] = dev

    # Launch slaves
    if opts.spot_price is not None:
        # Launch spot instances with the requested price
        print("Requesting %d slaves as spot instances with price $%.3f" %
              (opts.slaves, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_group_ids=[slave_group.id] + additional_group_ids,
                instance_type=opts.instance_type,
                block_device_map=block_map,
                subnet_id=opts.subnet_id,
                placement_group=opts.placement_group,
                user_data=user_data_content,
                instance_profile_name=opts.instance_profile_name)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print("Waiting for spot instances to be granted...")
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                for i in my_req_ids:
                    if i in id_to_req and id_to_req[i].state == "active":
                        active_instance_ids.append(id_to_req[i].instance_id)
                if len(active_instance_ids) == opts.slaves:
                    print("All %d slaves granted" % opts.slaves)
                    reservations = conn.get_all_reservations(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print("%d of %d slaves granted, waiting longer" % (
                        len(active_instance_ids), opts.slaves))
        except:
            print("Canceling spot instance requests")
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes, slave_nodes) = get_existing_cluster(
                conn, opts, cluster_name, die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print(("WARNING: %d instances are still running" % running), file=stderr)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(
                    key_name=opts.key_pair,
                    security_group_ids=[slave_group.id] + additional_group_ids,
                    instance_type=opts.instance_type,
                    placement=zone,
                    min_count=num_slaves_this_zone,
                    max_count=num_slaves_this_zone,
                    block_device_map=block_map,
                    subnet_id=opts.subnet_id,
                    placement_group=opts.placement_group,
                    user_data=user_data_content,
                    instance_initiated_shutdown_behavior=opts.instance_initiated_shutdown_behavior,
                    instance_profile_name=opts.instance_profile_name)
                slave_nodes += slave_res.instances
                print("Launched {s} slave{plural_s} in {z}, regid = {r}".format(
                      s=num_slaves_this_zone,
                      plural_s=('' if num_slaves_this_zone == 1 else 's'),
                      z=zone,
                      r=slave_res.id))
            i += 1

    # Launch or resume masters
    if existing_masters:
        print("Starting master...")
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(
            key_name=opts.key_pair,
            security_group_ids=[master_group.id] + additional_group_ids,
            instance_type=master_type,
            placement=opts.zone,
            min_count=1,
            max_count=1,
            block_device_map=block_map,
            subnet_id=opts.subnet_id,
            placement_group=opts.placement_group,
            user_data=user_data_content,
            instance_initiated_shutdown_behavior=opts.instance_initiated_shutdown_behavior,
            instance_profile_name=opts.instance_profile_name)

        master_nodes = master_res.instances
        print("Launched master in %s, regid = %s" % (zone, master_res.id))

    # This wait time corresponds to SPARK-4983
    print("Waiting for AWS to propagate instance metadata...")
    time.sleep(15)

    # Give the instances descriptive names and set additional tags
    additional_tags = {}
    if opts.additional_tags.strip():
        additional_tags = dict(
            map(str.strip, tag.split(':', 1)) for tag in opts.additional_tags.split(',')
        )

    for master in master_nodes:
        master.add_tags(
            dict(additional_tags, Name='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id))
        )

    for slave in slave_nodes:
        slave.add_tags(
            dict(additional_tags, Name='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))
        )

    # Return all the instances
    return (master_nodes, slave_nodes)
Пример #43
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
        sys.exit(1)
    if opts.key_pair is None:
        print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
        sys.exit(1)

    user_data_content = None
    if opts.user_data:
        with open(opts.user_data) as user_data_file:
            user_data_content = user_data_file.read()

    print "Setting up security groups..."
    master_group = get_or_make_group(conn, cluster_name + "-master")
    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        master_group.authorize('tcp', 18080, 18080, '0.0.0.0/0')
        master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
        master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

    # Check if instances are already running in our groups
    existing_masters, existing_slaves = get_existing_cluster(
        conn, opts, cluster_name, die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, ("ERROR: There are already instances running in " +
                          "group %s or %s" %
                          (master_group.name, slave_group.name))
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_spark_ami(opts)
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        block_map["/dev/sdv"] = device

    # AWS ignores the AMI-specified block device mapping for M3 (see SPARK-3342).
    if opts.instance_type.startswith('m3.'):
        for i in range(get_num_disks(opts.instance_type)):
            dev = BlockDeviceType()
            dev.ephemeral_name = 'ephemeral%d' % i
            # The first ephemeral drive is /dev/sdb.
            name = '/dev/sd' + string.letters[i + 1]
            block_map[name] = dev

    # Launch slaves
    if opts.spot_price is not None:
        # Launch spot instances with the requested price
        print("Requesting %d slaves as spot instances with price $%.3f" %
              (opts.slaves, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_groups=[slave_group],
                instance_type=opts.instance_type,
                block_device_map=block_map,
                user_data=user_data_content)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print "Waiting for spot instances to be granted..."
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                for i in my_req_ids:
                    if i in id_to_req and id_to_req[i].state == "active":
                        active_instance_ids.append(id_to_req[i].instance_id)
                if len(active_instance_ids) == opts.slaves:
                    print "All %d slaves granted" % opts.slaves
                    reservations = conn.get_all_instances(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print "%d of %d slaves granted, waiting longer" % (
                        len(active_instance_ids), opts.slaves)
        except:
            print "Canceling spot instance requests"
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes,
             slave_nodes) = get_existing_cluster(conn,
                                                 opts,
                                                 cluster_name,
                                                 die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print >> stderr, ("WARNING: %d instances are still running" %
                                  running)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(key_name=opts.key_pair,
                                      security_groups=[slave_group],
                                      instance_type=opts.instance_type,
                                      placement=zone,
                                      min_count=num_slaves_this_zone,
                                      max_count=num_slaves_this_zone,
                                      block_device_map=block_map,
                                      user_data=user_data_content)
                slave_nodes += slave_res.instances
                print "Launched %d slaves in %s, regid = %s" % (
                    num_slaves_this_zone, zone, slave_res.id)
            i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(key_name=opts.key_pair,
                               security_groups=[master_group],
                               instance_type=master_type,
                               placement=opts.zone,
                               min_count=1,
                               max_count=1,
                               block_device_map=block_map,
                               user_data=user_data_content)
        master_nodes = master_res.instances
        print "Launched master in %s, regid = %s" % (zone, master_res.id)

    # Give the instances descriptive names
    for master in master_nodes:
        master.add_tag(key='Name',
                       value='{cn}-master-{iid}'.format(cn=cluster_name,
                                                        iid=master.id))
    for slave in slave_nodes:
        slave.add_tag(key='Name',
                      value='{cn}-slave-{iid}'.format(cn=cluster_name,
                                                      iid=slave.id))

    # Return all the instances
    return (master_nodes, slave_nodes)
Пример #44
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
        sys.exit(1)
    if opts.key_pair is None:
        print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
        sys.exit(1)

    user_data_content = None
    if opts.user_data:
        with open(opts.user_data) as user_data_file:
            user_data_content = user_data_file.read()

    print "Setting up security groups..."
    if opts.security_group_prefix is None:
        master_group = get_or_make_group(conn, cluster_name + "-master")
        slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    else:
        master_group = get_or_make_group(conn, opts.security_group_prefix + "-master")
        slave_group = get_or_make_group(conn, opts.security_group_prefix + "-slaves")
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        master_group.authorize('tcp', 18080, 18080, '0.0.0.0/0')
        master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
        master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

    # Check if instances are already running with the cluster name
    existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                             die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, ("ERROR: There are already instances for name: %s " % cluster_name)
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_spark_ami(opts)
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        block_map["/dev/sdv"] = device

    # Launch slaves
    if opts.spot_price is not None:
        # Launch spot instances with the requested price
        print ("Requesting %d slaves as spot instances with price $%.3f" %
               (opts.slaves, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_groups=[slave_group],
                instance_type=opts.instance_type,
                block_device_map=block_map,
                user_data=user_data_content)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print "Waiting for spot instances to be granted..."
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                outstanding_request_ids = []
                for i in my_req_ids:
                    if i in id_to_req:
                        if id_to_req[i].state == "active":
                            active_instance_ids.append(id_to_req[i].instance_id)
                        else:
                            outstanding_request_ids.append(i)
                if len(active_instance_ids) == opts.slaves:
                    print "All %d slaves granted" % opts.slaves
                    reservations = conn.get_all_instances(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print "%d of %d slaves granted, waiting longer for request ids including %s" % (
                        len(active_instance_ids), opts.slaves, outstanding_request_ids[0:10])
        except:
            print "Canceling spot instance requests"
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes, slave_nodes) = get_existing_cluster(
                conn, opts, cluster_name, die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print >> stderr, ("WARNING: %d instances are still running" % running)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(key_name=opts.key_pair,
                                      security_groups=[slave_group],
                                      instance_type=opts.instance_type,
                                      placement=zone,
                                      min_count=num_slaves_this_zone,
                                      max_count=num_slaves_this_zone,
                                      block_device_map=block_map,
                                      user_data=user_data_content)
                slave_nodes += slave_res.instances
                print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                                zone, slave_res.id)
            i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(key_name=opts.key_pair,
                               security_groups=[master_group],
                               instance_type=master_type,
                               placement=opts.zone,
                               min_count=1,
                               max_count=1,
                               block_device_map=block_map)
        master_nodes = master_res.instances
        print "Launched master in %s, regid = %s" % (zone, master_res.id)

    # Give the instances descriptive names
    # TODO: Add retry logic for tagging with name since it's used to identify a cluster.
    for master in master_nodes:
        name = '{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)
        for i in range(0, 5):
            try:
                master.add_tag(key='Name', value=name)
            except:
                print "Failed attempt %i of 5 to tag %s" % ((i + 1), name)
                if (i == 5):
                    raise "Error - failed max attempts to add name tag"
                time.sleep(5)


    for slave in slave_nodes:
        name = '{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)
        for i in range(0, 5):
            try:
                slave.add_tag(key='Name', value=name)
            except:
                print "Failed attempt %i of 5 to tag %s" % ((i + 1), name)
                if (i == 5):
                    raise "Error - failed max attempts to add name tag"
                time.sleep(5)

    # Return all the instances
    return (master_nodes, slave_nodes)
Пример #45
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
        sys.exit(1)
    if opts.key_pair is None:
        print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
        sys.exit(1)

    user_data_content = None
    if opts.user_data:
        with open(opts.user_data) as user_data_file:
            user_data_content = user_data_file.read()

    print "Setting up security groups..."
    if opts.security_group_prefix is None:
        master_group = get_or_make_group(conn, cluster_name + "-master")
        slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    else:
        master_group = get_or_make_group(
            conn, opts.security_group_prefix + "-master")
        slave_group = get_or_make_group(conn,
                                        opts.security_group_prefix + "-slaves")
    authorized_address = opts.authorized_address
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize('tcp', 22, 22, authorized_address)
        master_group.authorize('tcp', 8080, 8081, authorized_address)
        master_group.authorize('tcp', 18080, 18080, authorized_address)
        master_group.authorize('tcp', 19999, 19999, authorized_address)
        master_group.authorize('tcp', 50030, 50030, authorized_address)
        master_group.authorize('tcp', 50070, 50070, authorized_address)
        master_group.authorize('tcp', 60070, 60070, authorized_address)
        master_group.authorize('tcp', 4040, 4045, authorized_address)
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, authorized_address)
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize('tcp', 22, 22, authorized_address)
        slave_group.authorize('tcp', 8080, 8081, authorized_address)
        slave_group.authorize('tcp', 50060, 50060, authorized_address)
        slave_group.authorize('tcp', 50075, 50075, authorized_address)
        slave_group.authorize('tcp', 60060, 60060, authorized_address)
        slave_group.authorize('tcp', 60075, 60075, authorized_address)

    # Check if instances are already running with the cluster name
    existing_masters, existing_slaves = get_existing_cluster(
        conn, opts, cluster_name, die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, ("ERROR: There are already instances for name: %s " %
                          cluster_name)
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_spark_ami(opts)

    additional_groups = []
    if opts.additional_security_group:
        additional_groups = [
            sg for sg in conn.get_all_security_groups()
            if opts.additional_security_group in (sg.name, sg.id)
        ]
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        block_map["/dev/sdv"] = device

    # Launch slaves
    if opts.spot_price is not None:
        # Launch spot instances with the requested price
        print("Requesting %d slaves as spot instances with price $%.3f" %
              (opts.slaves, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_groups=[slave_group] + additional_groups,
                instance_type=opts.instance_type,
                block_device_map=block_map,
                user_data=user_data_content)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print "Waiting for spot instances to be granted..."
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                outstanding_request_ids = []
                for i in my_req_ids:
                    if i in id_to_req:
                        if id_to_req[i].state == "active":
                            active_instance_ids.append(
                                id_to_req[i].instance_id)
                        else:
                            outstanding_request_ids.append(i)
                if len(active_instance_ids) == opts.slaves:
                    print "All %d slaves granted" % opts.slaves
                    reservations = conn.get_all_instances(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print "%d of %d slaves granted, waiting longer for request ids including %s" % (
                        len(active_instance_ids), opts.slaves,
                        outstanding_request_ids[0:10])
        except:
            print "Canceling spot instance requests"
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes,
             slave_nodes) = get_existing_cluster(conn,
                                                 opts,
                                                 cluster_name,
                                                 die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print >> stderr, ("WARNING: %d instances are still running" %
                                  running)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(key_name=opts.key_pair,
                                      security_groups=[slave_group] +
                                      additional_groups,
                                      instance_type=opts.instance_type,
                                      placement=zone,
                                      min_count=num_slaves_this_zone,
                                      max_count=num_slaves_this_zone,
                                      block_device_map=block_map,
                                      user_data=user_data_content)
                slave_nodes += slave_res.instances
                print "Launched %d slaves in %s, regid = %s" % (
                    num_slaves_this_zone, zone, slave_res.id)
            i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(key_name=opts.key_pair,
                               security_groups=[master_group] +
                               additional_groups,
                               instance_type=master_type,
                               placement=opts.zone,
                               min_count=1,
                               max_count=1,
                               block_device_map=block_map)
        master_nodes = master_res.instances
        print "Launched master in %s, regid = %s" % (zone, master_res.id)

    # Give the instances descriptive names
    # TODO: Add retry logic for tagging with name since it's used to identify a cluster.
    for master in master_nodes:
        name = '{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)
        for i in range(0, 5):
            try:
                master.add_tag(key='Name', value=name)
            except:
                print "Failed attempt %i of 5 to tag %s" % ((i + 1), name)
                if (i == 5):
                    raise "Error - failed max attempts to add name tag"
                time.sleep(5)

    for slave in slave_nodes:
        name = '{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)
        for i in range(0, 5):
            try:
                slave.add_tag(key='Name', value=name)
            except:
                print "Failed attempt %i of 5 to tag %s" % ((i + 1), name)
                if (i == 5):
                    raise "Error - failed max attempts to add name tag"
                time.sleep(5)

    # Return all the instances
    return (master_nodes, slave_nodes)
Пример #46
0
    def create_nodes(self, reactor, names, distribution, metadata={}):
        """
        Create nodes with the given names.

        :param reactor: The reactor.
        :param name: The names of the nodes.
        :type name: list of str
        :param str distribution: The name of the distribution to
            install on the nodes.
        :param dict metadata: Metadata to associate with the nodes.

        :return: A list of ``Deferred``s each firing with an INode
            when the corresponding node is created.   The list has
            the same order as :param:`names`.
        """
        size = self._default_size
        disk_size = 8

        action = start_action(
            action_type=u"flocker:provision:aws:create_nodes",
            instance_count=len(names),
            distribution=distribution,
            image_size=size,
            disk_size=disk_size,
            metadata=metadata,
        )
        with action.context():
            disk1 = EBSBlockDeviceType()
            disk1.size = disk_size
            disk1.delete_on_termination = True
            diskmap = BlockDeviceMapping()
            diskmap['/dev/sda1'] = disk1

            images = self._connection.get_all_images(
                filters={'name': IMAGE_NAMES[distribution]}, )

            instances = self._run_nodes(count=len(names),
                                        image_id=images[0].id,
                                        size=size,
                                        diskmap=diskmap)

            def make_node(ignored, name, instance):
                return AWSNode(
                    name=name,
                    _provisioner=self,
                    _instance=instance,
                    distribution=distribution,
                )

            results = []
            for name, instance in izip_longest(names, instances):
                if instance is None:
                    results.append(fail(Exception("Could not run instance")))
                else:
                    node_metadata = metadata.copy()
                    node_metadata['Name'] = name
                    d = self._async_get_node(reactor, instance, node_metadata)
                    d = DeferredContext(d)
                    d.addCallback(make_node, name, instance)
                    results.append(d.result)
            action_completion = DeferredContext(DeferredList(results))
            action_completion.addActionFinish()
            # Individual results and errors should be consumed by the caller,
            # so we can leave action_completion alone now.
            return results
Пример #47
0
def launch_cluster(conn, opts, cluster_name):
  print "Setting up security groups..."
  master_group = get_or_make_group(conn, cluster_name + "-master")
  slave_group = get_or_make_group(conn, cluster_name + "-slaves")
  zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
  if master_group.rules == []: # Group was just now created
    master_group.authorize(src_group=master_group)
    master_group.authorize(src_group=slave_group)
    master_group.authorize(src_group=zoo_group)
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
    master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
    master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
    master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize(src_group=master_group)
    slave_group.authorize(src_group=slave_group)
    slave_group.authorize(src_group=zoo_group)
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
    slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
    slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
    slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
  if zoo_group.rules == []: # Group was just now created
    zoo_group.authorize(src_group=master_group)
    zoo_group.authorize(src_group=slave_group)
    zoo_group.authorize(src_group=zoo_group)
    zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
    zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

  # Check if instances are already running in our groups
  print "Checking for running cluster..."
  reservations = conn.get_all_instances()
  for res in reservations:
    group_names = [g.id for g in res.groups]
    if master_group.name in group_names or slave_group.name in group_names or zoo_group.name in group_names:
      active = [i for i in res.instances if is_active(i)]
      if len(active) > 0:
        print >> stderr, ("ERROR: There are already instances running in " +
            "group %s, %s or %s" % (master_group.name, slave_group.name, zoo_group.name))
        sys.exit(1)
  print "Launching instances..."
  try:
    image = conn.get_all_images(image_ids=[opts.ami])[0]
  except:
    print >> stderr, "Could not find AMI " + opts.ami
    sys.exit(1)

  # Create block device mapping so that we can add an EBS volume if asked to
  block_map = BlockDeviceMapping()
  if opts.ebs_vol_size > 0:
    device = EBSBlockDeviceType()
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    block_map["/dev/sdv"] = device

  # Launch slaves
  if opts.spot_price != None:
    # Launch spot instances with the requested price
    print ("Requesting %d slaves as spot instances with price $%.3f" %
           (opts.slaves, opts.spot_price))
    slave_reqs = conn.request_spot_instances(
        price = opts.spot_price,
        image_id = opts.ami,
        launch_group = "launch-group-%s" % cluster_name,
        placement = opts.zone,
        count = opts.slaves,
        key_name = opts.key_pair,
        security_groups = [slave_group],
        instance_type = opts.instance_type,
        block_device_map = block_map)
    my_req_ids = [req.id for req in slave_reqs]
    print "Waiting for spot instances to be granted..."
    while True:
      time.sleep(10)
      reqs = conn.get_all_spot_instance_requests()
      id_to_req = {}
      for r in reqs:
        id_to_req[r.id] = r
      active = 0
      instance_ids = []
      for i in my_req_ids:
        if id_to_req[i].state == "active":
          active += 1
          instance_ids.append(id_to_req[i].instance_id)
      if active == opts.slaves:
        print "All %d slaves granted" % opts.slaves
        reservations = conn.get_all_instances(instance_ids)
        slave_nodes = []
        for r in reservations:
          slave_nodes += r.instances
        break
      else:
        print "%d of %d slaves granted, waiting longer" % (active, opts.slaves)
  else:
    # Launch non-spot instances
    slave_res = image.run(key_name = opts.key_pair,
                          security_groups = [slave_group],
                          instance_type = opts.instance_type,
                          placement = opts.zone,
                          min_count = opts.slaves,
                          max_count = opts.slaves,
                          block_device_map = block_map)
    slave_nodes = slave_res.instances
    print "Launched slaves, regid = " + slave_res.id

  # Launch masters
  master_type = opts.master_instance_type
  if master_type == "":
    master_type = opts.instance_type
  master_res = image.run(key_name = opts.key_pair,
                         security_groups = [master_group],
                         instance_type = master_type,
                         placement = opts.zone,
                         min_count = opts.ft,
                         max_count = opts.ft,
                         block_device_map = block_map)
  master_nodes = master_res.instances
  print "Launched master, regid = " + master_res.id

  # Launch ZooKeeper nodes if required
  if opts.ft > 1:
    zoo_res = image.run(key_name = opts.key_pair,
                        security_groups = [zoo_group],
                        instance_type = opts.instance_type,
                        placement = opts.zone,
                        min_count = 3,
                        max_count = 3,
                        block_device_map = block_map)
    zoo_nodes = zoo_res.instances
    print "Launched zoo, regid = " + zoo_res.id
  else:
    zoo_nodes = []

  # Return all the instances
  return (master_nodes, slave_nodes, zoo_nodes)
Пример #48
0
def launch_cluster(conn, opts, cluster_name):
    print "Setting up security groups..."
    master_group = get_or_make_group(conn, cluster_name + "-master")
    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
    # master_group = get_or_make_group(conn, cluster_name)
    # slave_group = get_or_make_group(conn, cluster_name)
    # zoo_group = get_or_make_group(conn, cluster_name)

    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize(src_group=zoo_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
        master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize(src_group=zoo_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
    if zoo_group.rules == []:  # Group was just now created
        zoo_group.authorize(src_group=master_group)
        zoo_group.authorize(src_group=slave_group)
        zoo_group.authorize(src_group=zoo_group)
        zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
        zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

    # Check if instances are already running in our groups
    print "Checking for running cluster..."
    reservations = conn.get_all_instances()
    for res in reservations:
        group_names = [g.id for g in res.groups]
        if master_group.name in group_names or slave_group.name in group_names or zoo_group.name in group_names:
            active = [i for i in res.instances if is_active(i)]
            if len(active) > 0:
                print >> stderr, (
                    "ERROR: There are already instances running in " +
                    "group %s, %s or %s" %
                    (master_group.name, slave_group.name, zoo_group.name))
                sys.exit(1)

    if opts.ami == "std":
        try:
            opts.ami = urllib2.urlopen(STD_AMI_URL).read().strip()
            print "GraphLab AMI for Standard Instances: " + opts.ami
        except:
            print >> stderr, "Could not read " + STD_AMI_URL
    elif opts.ami == "hpc":
        try:
            opts.ami = urllib2.urlopen(HVM_AMI_URL).read().strip()
            print "GraphLab AMI for HPC Instances: " + opts.ami
        except:
            print >> stderr, "Could not read " + HVM_AMI_URL

    print "Launching instances..."
    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        block_map["/dev/sdv"] = device

    # Launch slaves
    if opts.spot_price != None:
        # Launch spot instances with the requested price
        print("Requesting %d slaves as spot instances with price $%.3f" %
              (opts.slaves, opts.spot_price))
        slave_reqs = conn.request_spot_instances(
            price=opts.spot_price,
            image_id=opts.ami,
            launch_group="launch-group-%s" % cluster_name,
            placement=opts.zone,
            count=opts.slaves,
            key_name=opts.key_pair,
            security_groups=[slave_group],
            instance_type=opts.instance_type,
            block_device_map=block_map)
        my_req_ids = [req.id for req in slave_reqs]
        print "Waiting for spot instances to be granted..."
        while True:
            time.sleep(10)
            reqs = conn.get_all_spot_instance_requests()
            id_to_req = {}
            for r in reqs:
                id_to_req[r.id] = r
            active = 0
            instance_ids = []
            for i in my_req_ids:
                if id_to_req[i].state == "active":
                    active += 1
                    instance_ids.append(id_to_req[i].instance_id)
            if active == opts.slaves:
                print "All %d slaves granted" % opts.slaves
                reservations = conn.get_all_instances(instance_ids)
                slave_nodes = []
                for r in reservations:
                    slave_nodes += r.instances
                break
            else:
                print "%d of %d slaves granted, waiting longer" % (active,
                                                                   opts.slaves)
    else:
        # Launch non-spot instances
        slave_res = image.run(key_name=opts.key_pair,
                              security_groups=[slave_group],
                              instance_type=opts.instance_type,
                              placement=opts.zone,
                              min_count=opts.slaves,
                              max_count=opts.slaves,
                              block_device_map=block_map)
        slave_nodes = slave_res.instances
        print "Launched slaves, regid = " + slave_res.id

    # # Launch masters
    master_type = opts.master_instance_type
    if master_type == "":
        master_type = opts.instance_type
    master_res = image.run(key_name=opts.key_pair,
                           security_groups=[master_group],
                           instance_type=master_type,
                           placement=opts.zone,
                           min_count=1,
                           max_count=1,
                           block_device_map=block_map)
    master_nodes = master_res.instances
    print "Launched master, regid = " + master_res.id

    zoo_nodes = []

    # Return all the instances
    return (master_nodes, slave_nodes, zoo_nodes)
Пример #49
0
def launch_cluster(conn, opts, num_nodes, cluster_name):
    if opts.identity_file is None:
        print("ERROR: Must provide an identity file (-i) for ssh connections.", file=stderr)
        sys.exit(1)

    if opts.key_pair is None:
        print("ERROR: Must provide a key pair name (-k) to use on instances.", file=stderr)
        sys.exit(1)

    print("Setting up security groups...")

    slave_group = get_or_make_group(conn, cluster_name + "-slaves", opts.vpc_id)
    authorized_address = opts.authorized_address
    if slave_group.rules == []:  # Group was just now created
        if opts.vpc_id is None:
            slave_group.authorize(src_group=slave_group)
        else:
            slave_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
                                  src_group=slave_group)
            slave_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
                                  src_group=slave_group)
            slave_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
                                  src_group=slave_group)
        slave_group.authorize('tcp', 22, 22, authorized_address)

    # Check if instances are already running in our groups
    existing_slaves = get_existing_cluster(conn, opts, cluster_name, die_on_error=False)
    if existing_slaves:
        print("ERROR: There are already instances running in group %s" %
              slave_group.name, file=stderr)
        sys.exit(1)

    if opts.ami is None:
        print("ERROR: AMI is not set, exit")
        sys.exit(1)

    # we use group ids to work around https://github.com/boto/boto/issues/350
    additional_group_ids = []
    if opts.additional_security_group:
        additional_group_ids = [sg.id
                                for sg in conn.get_all_security_groups()
                                if opts.additional_security_group in (sg.name, sg.id)]
    print("Launching instances...")

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print("Could not find AMI " + opts.ami, file=stderr)
        sys.exit(1)

    # Create block device mapping so that we can add EBS volumes if asked to.
    # The first drive is attached as /dev/sds, 2nd as /dev/sdt, ... /dev/sdz
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        for i in range(opts.ebs_vol_num):
            device = EBSBlockDeviceType()
            device.size = opts.ebs_vol_size
            device.volume_type = opts.ebs_vol_type
            device.delete_on_termination = True
            block_map["/dev/sd" + chr(ord('s') + i)] = device

    # Launch slaves
    if opts.spot_price is not None:
        # Launch spot instances with the requested price
        print("Requesting %d slaves as spot instances with price $%.3f" %
              (num_nodes, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(num_nodes, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_group_ids=[slave_group.id] + additional_group_ids,
                instance_type=opts.instance_type,
                block_device_map=block_map,
                subnet_id=opts.subnet_id,
                placement_group=opts.placement_group,
                instance_profile_name=opts.instance_profile_name)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print("Waiting for spot instances to be granted...")
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                for i in my_req_ids:
                    if i in id_to_req and id_to_req[i].state == "active":
                        active_instance_ids.append(id_to_req[i].instance_id)
                if len(active_instance_ids) == num_nodes:
                    print("All %d spot instances granted" % (num_nodes + 1))
                    reservations = conn.get_all_reservations(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print("%d of %d slave spot instances granted, waiting longer" % (
                            len(active_instance_ids), num_nodes))
        except:
            print("Canceling spot instance requests")
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            slave_nodes = get_existing_cluster(conn, opts, cluster_name, die_on_error=False)
            running = len(slave_nodes)
            if running:
                print(("WARNING: %d instances are still running" % running), file=stderr)
            sys.exit(0)
    else:
        print ("WARNING: --spot-price was not set; consider launch slaves as spot instances to save money")
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(num_nodes, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(
                    key_name=opts.key_pair,
                    security_group_ids=[slave_group.id] + additional_group_ids,
                    instance_type=opts.instance_type,
                    placement=zone,
                    min_count=num_slaves_this_zone,
                    max_count=num_slaves_this_zone,
                    block_device_map=block_map,
                    subnet_id=opts.subnet_id,
                    placement_group=opts.placement_group,
                    instance_initiated_shutdown_behavior=opts.instance_initiated_shutdown_behavior,
                    instance_profile_name=opts.instance_profile_name)
                slave_nodes += slave_res.instances
                print("Launched {s} slave{plural_s} in {z}, regid = {r}".format(
                      s=num_slaves_this_zone,
                      plural_s=('' if num_slaves_this_zone == 1 else 's'),
                      z=zone,
                      r=slave_res.id))
            i += 1


    print("Waiting for AWS to propagate instance metadata...")
    time.sleep(15)

    # Give the instances descriptive names and set additional tags
    additional_tags = {}
    if opts.additional_tags.strip():
        additional_tags = dict(
            map(str.strip, tag.split(':', 1)) for tag in opts.additional_tags.split(',')
        )

    for slave in slave_nodes:
        slave.add_tags(
            dict(additional_tags, Name='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))
        )

    # Return all the instances
    return slave_nodes
Пример #50
0
def update_ami(aws_svc, encrypted_ami, updater_ami, encrypted_ami_name,
               subnet_id=None, security_group_ids=None,
               enc_svc_class=encryptor_service.EncryptorService,
               guest_instance_type='m3.medium',
               updater_instance_type='m3.medium',
               instance_config=None,
               status_port=encryptor_service.ENCRYPTOR_STATUS_PORT):
    encrypted_guest = None
    updater = None
    mv_root_id = None
    temp_sg_id = None
    if instance_config is None:
        instance_config = InstanceConfig()

    try:
        guest_image = aws_svc.get_image(encrypted_ami)

        # Step 1. Launch encrypted guest AMI
        # Use 'updater' mode to avoid chain loading the guest
        # automatically. We just want this AMI/instance up as the
        # base to create a new AMI and preserve license
        # information embedded in the guest AMI
        log.info("Launching encrypted guest/updater")

        instance_config.brkt_config['solo_mode'] = 'updater'
        instance_config.brkt_config['status_port'] = status_port

        encrypted_guest = aws_svc.run_instance(
            encrypted_ami,
            instance_type=guest_instance_type,
            ebs_optimized=False,
            subnet_id=subnet_id,
            user_data=json.dumps(instance_config.brkt_config))
        aws_svc.create_tags(
            encrypted_guest.id,
            name=NAME_GUEST_CREATOR,
            description=DESCRIPTION_GUEST_CREATOR % {'image_id': encrypted_ami}
        )
        # Run updater in same zone as guest so we can swap volumes

        user_data = instance_config.make_userdata()
        compressed_user_data = gzip_user_data(user_data)

        # If the user didn't specify a security group, create a temporary
        # security group that allows brkt-cli to get status from the updater.
        run_instance = aws_svc.run_instance
        if not security_group_ids:
            vpc_id = None
            if subnet_id:
                subnet = aws_svc.get_subnet(subnet_id)
                vpc_id = subnet.vpc_id
            temp_sg_id = create_encryptor_security_group(
                aws_svc, vpc_id=vpc_id, status_port=status_port).id
            security_group_ids = [temp_sg_id]

            # Wrap with a retry, to handle eventual consistency issues with
            # the newly-created group.
            run_instance = aws_svc.retry(
                aws_svc.run_instance,
                error_code_regexp='InvalidGroup\.NotFound'
            )

        updater = run_instance(
            updater_ami,
            instance_type=updater_instance_type,
            user_data=compressed_user_data,
            ebs_optimized=False,
            subnet_id=subnet_id,
            placement=encrypted_guest.placement,
            security_group_ids=security_group_ids)
        aws_svc.create_tags(
            updater.id,
            name=NAME_METAVISOR_UPDATER,
            description=DESCRIPTION_METAVISOR_UPDATER,
        )
        wait_for_instance(aws_svc, encrypted_guest.id, state="running")
        log.info("Launched guest: %s Updater: %s" %
             (encrypted_guest.id, updater.id)
        )

        # Step 2. Wait for the updater to finish and stop the instances
        aws_svc.stop_instance(encrypted_guest.id)

        updater = wait_for_instance(aws_svc, updater.id, state="running")
        host_ips = []
        if updater.ip_address:
            host_ips.append(updater.ip_address)
        if updater.private_ip_address:
            host_ips.append(updater.private_ip_address)
            log.info('Adding %s to NO_PROXY environment variable' %
                 updater.private_ip_address)
            if os.environ.get('NO_PROXY'):
                os.environ['NO_PROXY'] += "," + \
                    updater.private_ip_address
            else:
                os.environ['NO_PROXY'] = updater.private_ip_address

        enc_svc = enc_svc_class(host_ips, port=status_port)
        log.info('Waiting for updater service on %s (port %s on %s)',
                 updater.id, enc_svc.port, ', '.join(host_ips))
        wait_for_encryptor_up(enc_svc, Deadline(600))
        try:
            wait_for_encryption(enc_svc)
        except Exception as e:
            # Stop the updater instance, to make the console log available.
            encrypt_ami.stop_and_wait(aws_svc, updater.id)

            log_exception_console(aws_svc, e, updater.id)
            raise

        aws_svc.stop_instance(updater.id)
        encrypted_guest = wait_for_instance(
            aws_svc, encrypted_guest.id, state="stopped")
        updater = wait_for_instance(aws_svc, updater.id, state="stopped")

        guest_bdm = encrypted_guest.block_device_mapping
        updater_bdm = updater.block_device_mapping

        # Step 3. Detach old BSD drive(s) and delete from encrypted guest
        if guest_image.virtualization_type == 'paravirtual':
            d_list = ['/dev/sda1', '/dev/sda2', '/dev/sda3']
        else:
            d_list = [encrypted_guest.root_device_name]
        for d in d_list:
            log.info("Detaching old metavisor disk: %s from %s" %
                (guest_bdm[d].volume_id, encrypted_guest.id))
            aws_svc.detach_volume(guest_bdm[d].volume_id,
                    instance_id=encrypted_guest.id,
                    force=True
            )
            aws_svc.delete_volume(guest_bdm[d].volume_id)

        # Step 4. Snapshot MV volume(s)
        log.info("Creating snapshots")
        if guest_image.virtualization_type == 'paravirtual':
            description = DESCRIPTION_SNAPSHOT % {'image_id': updater.id}
            snap_root = aws_svc.create_snapshot(
                updater_bdm['/dev/sda2'].volume_id,
                name=NAME_METAVISOR_ROOT_SNAPSHOT,
                description=description
            )
            snap_log = aws_svc.create_snapshot(
                updater_bdm['/dev/sda3'].volume_id,
                name=NAME_METAVISOR_LOG_SNAPSHOT,
                description=description
            )
            wait_for_snapshots(aws_svc, snap_root.id, snap_log.id)
            dev_root = EBSBlockDeviceType(volume_type='gp2',
                        snapshot_id=snap_root.id,
                        delete_on_termination=True)
            dev_log = EBSBlockDeviceType(volume_type='gp2',
                        snapshot_id=snap_log.id,
                        delete_on_termination=True)
            guest_bdm['/dev/sda2'] = dev_root
            guest_bdm['/dev/sda3'] = dev_log
            # Use updater as base instance for create_image
            boot_snap_name = NAME_METAVISOR_GRUB_SNAPSHOT
            root_device_name = updater.root_device_name
            guest_root = '/dev/sda5'
            d_list.append(guest_root)
        else:
            # Use guest_instance as base instance for create_image
            boot_snap_name = NAME_METAVISOR_ROOT_SNAPSHOT
            root_device_name = guest_image.root_device_name
            guest_root = '/dev/sdf'
            d_list.append(guest_root)

        # Preserve volume type for any additional attached volumes
        for d in guest_bdm.keys():
            if d not in d_list:
                log.debug("Preserving volume type for disk %s", d)
                vol_id = guest_bdm[d].volume_id
                vol = aws_svc.get_volume(vol_id)
                guest_bdm[d].volume_type = vol.type

        # Step 5. Move new MV boot disk to base instance
        log.info("Detach boot volume from %s" % (updater.id,))
        mv_root_id = updater_bdm['/dev/sda1'].volume_id
        aws_svc.detach_volume(mv_root_id,
            instance_id=updater.id,
            force=True
        )

        # Step 6. Attach new boot disk to guest instance
        log.info("Attaching new metavisor boot disk: %s to %s" %
            (mv_root_id, encrypted_guest.id)
        )
        aws_svc.attach_volume(mv_root_id, encrypted_guest.id, root_device_name)
        encrypted_guest = encrypt_ami.wait_for_volume_attached(
            aws_svc, encrypted_guest.id, root_device_name)
        guest_bdm[root_device_name] = \
            encrypted_guest.block_device_mapping[root_device_name]
        guest_bdm[root_device_name].delete_on_termination = True
        guest_bdm[root_device_name].volume_type = 'gp2'
        guest_root_vol_id = guest_bdm[guest_root].volume_id
        guest_root_vol = aws_svc.get_volume(guest_root_vol_id)
        guest_bdm[guest_root].volume_type = guest_root_vol.type

        # Step 7. Create new AMI. Preserve billing/license info
        log.info("Creating new AMI")
        ami = aws_svc.create_image(
            encrypted_guest.id,
            encrypted_ami_name,
            description=guest_image.description,
            no_reboot=True,
            block_device_mapping=guest_bdm
        )
        wait_for_image(aws_svc, ami)
        image = aws_svc.get_image(ami, retry=True)
        aws_svc.create_tags(
            image.block_device_mapping[root_device_name].snapshot_id,
            name=boot_snap_name,
        )
        aws_svc.create_tags(
            image.block_device_mapping[guest_root].snapshot_id,
            name=NAME_ENCRYPTED_ROOT_SNAPSHOT,
        )
        aws_svc.create_tags(ami)
        return ami
    finally:
        instance_ids = set()
        volume_ids = set()
        sg_ids = set()

        if encrypted_guest:
            instance_ids.add(encrypted_guest.id)
        if updater:
            instance_ids.add(updater.id)
        if mv_root_id:
            volume_ids.add(mv_root_id)
        if temp_sg_id:
            sg_ids.add(temp_sg_id)

        clean_up(aws_svc,
                 instance_ids=instance_ids,
                 volume_ids=volume_ids,
                 security_group_ids=sg_ids)
Пример #51
0
def launch_cluster(conn, opts, cluster_name):
    print "Setting up security groups..."
    master_group = get_or_make_group(conn, cluster_name + "-master")
    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
        master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

    # Check if instances are already running in our groups
    existing_masters, existing_slaves = get_existing_cluster(
        conn, opts, cluster_name, die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, ("ERROR: There are already instances running in " +
                          "group %s or %s" %
                          (master_group.name, slave_group.name))
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_spark_ami(opts)
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        block_map["/dev/sdv"] = device

    # Launch slaves
    if opts.spot_price != None:
        # Launch spot instances with the requested price
        print("Requesting %d slaves as spot instances with price $%.3f" %
              (opts.slaves, opts.spot_price))
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        my_req_ids = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            slave_reqs = conn.request_spot_instances(
                price=opts.spot_price,
                image_id=opts.ami,
                launch_group="launch-group-%s" % cluster_name,
                placement=zone,
                count=num_slaves_this_zone,
                key_name=opts.key_pair,
                security_groups=[slave_group],
                instance_type=opts.instance_type,
                block_device_map=block_map)
            my_req_ids += [req.id for req in slave_reqs]
            i += 1

        print "Waiting for spot instances to be granted..."
        try:
            while True:
                time.sleep(10)
                reqs = conn.get_all_spot_instance_requests()
                id_to_req = {}
                for r in reqs:
                    id_to_req[r.id] = r
                active_instance_ids = []
                for i in my_req_ids:
                    if i in id_to_req and id_to_req[i].state == "active":
                        active_instance_ids.append(id_to_req[i].instance_id)
                if len(active_instance_ids) == opts.slaves:
                    print "All %d slaves granted" % opts.slaves
                    reservations = conn.get_all_instances(active_instance_ids)
                    slave_nodes = []
                    for r in reservations:
                        slave_nodes += r.instances
                    break
                else:
                    print "%d of %d slaves granted, waiting longer" % (
                        len(active_instance_ids), opts.slaves)
        except:
            print "Canceling spot instance requests"
            conn.cancel_spot_instance_requests(my_req_ids)
            # Log a warning if any of these requests actually launched instances:
            (master_nodes,
             slave_nodes) = get_existing_cluster(conn,
                                                 opts,
                                                 cluster_name,
                                                 die_on_error=False)
            running = len(master_nodes) + len(slave_nodes)
            if running:
                print >> stderr, ("WARNING: %d instances are still running" %
                                  running)
            sys.exit(0)
    else:
        # Launch non-spot instances
        zones = get_zones(conn, opts)
        num_zones = len(zones)
        i = 0
        slave_nodes = []
        for zone in zones:
            num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
            if num_slaves_this_zone > 0:
                slave_res = image.run(key_name=opts.key_pair,
                                      security_groups=[slave_group],
                                      instance_type=opts.instance_type,
                                      placement=zone,
                                      instance_profile_name="spark-node",
                                      min_count=num_slaves_this_zone,
                                      max_count=num_slaves_this_zone,
                                      block_device_map=block_map)
                slave_nodes += slave_res.instances
                print "Launched %d slaves in %s, regid = %s" % (
                    num_slaves_this_zone, zone, slave_res.id)
            i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(key_name=opts.key_pair,
                               security_groups=[master_group],
                               instance_type=master_type,
                               placement=opts.zone,
                               min_count=1,
                               max_count=1,
                               instance_profile_name="spark-node",
                               block_device_map=block_map)
        master_nodes = master_res.instances
        print "Launched master in %s, regid = %s" % (zone, master_res.id)

    # Return all the instances
    return (master_nodes, slave_nodes)
Пример #52
0
    def create_node(self,
                    name,
                    distribution,
                    size=None,
                    disk_size=8,
                    metadata={}):
        if size is None:
            size = self._default_size

        with start_action(
                action_type=u"flocker:provision:aws:create_node",
                name=name,
                distribution=distribution,
                image_size=size,
                disk_size=disk_size,
                metadata=metadata,
        ):

            metadata = metadata.copy()
            metadata['Name'] = name

            disk1 = EBSBlockDeviceType()
            disk1.size = disk_size
            disk1.delete_on_termination = True
            diskmap = BlockDeviceMapping()
            diskmap['/dev/sda1'] = disk1

            images = self._connection.get_all_images(
                filters={'name': IMAGE_NAMES[distribution]}, )

            with start_action(
                    action_type=
                    u"flocker:provision:aws:create_node:run_instances",
            ) as context:
                reservation = self._connection.run_instances(
                    images[0].id,
                    key_name=self._keyname,
                    instance_type=size,
                    security_groups=self._security_groups,
                    block_device_map=diskmap,
                    placement=self._zone,
                    # On some operating systems, a tty is requried for sudo.
                    # Since AWS systems have a non-root user as the login,
                    # disable this, so we can use sudo with conch.
                    user_data=dedent("""\
                        #!/bin/sh
                        sed -i '/Defaults *requiretty/d' /etc/sudoers
                        """),
                )

                instance = reservation.instances[0]
                context.add_success_fields(instance_id=instance.id)

            self._connection.create_tags([instance.id], metadata)

            # Display state as instance starts up, to keep user informed that
            # things are happening.
            _wait_until_running(instance)

            return AWSNode(
                name=name,
                _provisioner=self,
                _instance=instance,
                distribution=distribution,
            )
Пример #53
0
    def create_node(self, name, distribution,
                    size=None, disk_size=8,
                    metadata={}):
        if size is None:
            size = self._default_size

        with start_action(
            action_type=u"flocker:provision:aws:create_node",
            name=name,
            distribution=distribution,
            image_size=size,
            disk_size=disk_size,
            metadata=metadata,
        ):

            metadata = metadata.copy()
            metadata['Name'] = name

            disk1 = EBSBlockDeviceType()
            disk1.size = disk_size
            disk1.delete_on_termination = True
            diskmap = BlockDeviceMapping()
            diskmap['/dev/sda1'] = disk1

            images = self._connection.get_all_images(
                filters={'name': IMAGE_NAMES[distribution]},
            )

            with start_action(
                action_type=u"flocker:provision:aws:create_node:run_instances",
            ) as context:
                reservation = self._connection.run_instances(
                    images[0].id,
                    key_name=self._keyname,
                    instance_type=size,
                    security_groups=self._security_groups,
                    block_device_map=diskmap,
                    placement=self._zone,
                    # On some operating systems, a tty is requried for sudo.
                    # Since AWS systems have a non-root user as the login,
                    # disable this, so we can use sudo with conch.
                    user_data=dedent("""\
                        #!/bin/sh
                        sed -i '/Defaults *requiretty/d' /etc/sudoers
                        """),
                )

                instance = reservation.instances[0]
                context.add_success_fields(instance_id=instance.id)

            self._connection.create_tags([instance.id], metadata)

            # Display state as instance starts up, to keep user informed that
            # things are happening.
            _wait_until_running(instance)

            return AWSNode(
                name=name,
                _provisioner=self,
                _instance=instance,
                distribution=distribution,
            )
Пример #54
0
def create_cluster(conn, args):
    if args.identity_file is None:
        print("ERROR: Must provide an identity file (-i) for ssh connections.",
              file=stderr)
        sys.exit(1)

    if args.key_pair is None:
        print("ERROR: Must provide a key pair name (-k) to use on instances.",
              file=stderr)
        sys.exit(1)

    # make or get the security group.
    security_group = get_or_make_group(conn, args.name, args.vpc_id)

    # set the inbound permission rules
    if len(security_group.rules) == 0:
        if __name__ == '__main__':
            if args.vpc_id is None:
                security_group.authorize(src_group=security_group)
            else:
                security_group.authorize('tcp', 22, 22,
                                         args.authorized_address)
                security_group.authorize('tcp', 8888, 8888,
                                         args.authorized_address)
                security_group.authorize('tcp', 7000, 7000,
                                         args.authorized_address)
                security_group.authorize('tcp', 7001, 7001,
                                         args.authorized_address)
                security_group.authorize('tcp', 7199, 7199,
                                         args.authorized_address)
                security_group.authorize('tcp', 9042, 9042,
                                         args.authorized_address)
                security_group.authorize('tcp', 9160, 9160,
                                         args.authorized_address)
    else:
        print("Security group already exists, skipping creation.")

    instances = cluster_nodes(conn, args.name)
    if any(instances):
        additional_tags = {}
        for i in instances:
            i.add_tags(
                dict(additional_tags,
                     Name="{cn}-node-{iid}".format(cn=args.name, iid=i.id)))
        return instances
    else:
        print(
            "Launching {m} instances for cluster...".format(m=args.node_count))

        try:
            image = conn.get_all_images(image_ids=args.ami)[0]

            block_map = BlockDeviceMapping()
            if args.ebs_vol_size > 0:
                if args.instance_type.startswith('m3.'):
                    for i in range(get_num_disks(args.instance_type)):
                        device = BlockDeviceType()
                        device.ephemeral_name = "ephemeral%d" % i
                        name = "/dev/sd" + string.ascii_letters[i + 1]
                        block_map[name] = device

                else:
                    device = EBSBlockDeviceType()
                    device.size = args.ebs_vol_size
                    device.volume_type = args.ebs_vol_type
                    device.delete_on_termination = True
                    key = "/dev/sd" + chr(ord('s') + 1)
                    block_map[key] = device

            nodes = image.run(key_name=args.key_pair,
                              security_group_ids=[security_group.id],
                              instance_type="",
                              placement=args.zone,
                              min_count=args.node_count,
                              max_count=args.node_count,
                              block_device_map=block_map,
                              subnet_id=None,
                              placement_group=None,
                              user_data=None,
                              instance_initiated_shutdown_behavior="stop",
                              instance_profile_name=None)

            print("Waiting for AWS to propagate instance metadata...")
            time.sleep(15)

            additional_tags = {}
            for node in nodes.instances:
                node.add_tags(
                    dict(additional_tags,
                         Name="{cn}-node-{iid}".format(cn=args.name,
                                                       iid=node.id)))

            return nodes.instances

        except Exception as e:
            print("Caught exception: ", e)
            print("ERROR: Could not find AMI " + args.ami, file=stderr)
            sys.exit(1)
Пример #55
0
def _run_encryptor_instance(
        aws_svc, encryptor_image_id, snapshot, root_size, guest_image_id,
        security_group_ids=None, subnet_id=None, zone=None,
        instance_config=None,
        status_port=encryptor_service.ENCRYPTOR_STATUS_PORT):
    bdm = BlockDeviceMapping()

    if instance_config is None:
        instance_config = InstanceConfig()

    image = aws_svc.get_image(encryptor_image_id)
    virtualization_type = image.virtualization_type

    # Use gp2 for fast burst I/O copying root drive
    guest_unencrypted_root = EBSBlockDeviceType(
        volume_type='gp2',
        snapshot_id=snapshot,
        delete_on_termination=True)
    # Use gp2 for fast burst I/O copying root drive
    log.info('Launching encryptor instance with snapshot %s', snapshot)
    # They are creating an encrypted AMI instead of updating it
    # Use gp2 for fast burst I/O copying root drive
    guest_encrypted_root = EBSBlockDeviceType(
        volume_type='gp2',
        delete_on_termination=True)
    guest_encrypted_root.size = 2 * root_size + 1

    if virtualization_type == 'paravirtual':
        bdm['/dev/sda4'] = guest_unencrypted_root
        bdm['/dev/sda5'] = guest_encrypted_root
    else:
        # Use 'sd' names even though AWS maps these to 'xvd'
        # The AWS GUI only exposes 'sd' names, and won't allow
        # the user to attach to an existing 'sd' name in use, but
        # would allow conflicts if we used 'xvd' names here.
        bdm['/dev/sdf'] = guest_unencrypted_root
        bdm['/dev/sdg'] = guest_encrypted_root

    # If security groups were not specified, create a temporary security
    # group that allows us to poll the metavisor for encryption progress.
    temp_sg_id = None
    instance = None

    try:
        run_instance = aws_svc.run_instance

        if not security_group_ids:
            vpc_id = None
            if subnet_id:
                subnet = aws_svc.get_subnet(subnet_id)
                vpc_id = subnet.vpc_id
            temp_sg_id = create_encryptor_security_group(
                aws_svc, vpc_id=vpc_id, status_port=status_port).id
            security_group_ids = [temp_sg_id]

            # Wrap with a retry, to handle eventual consistency issues with
            # the newly-created group.
            run_instance = aws_svc.retry(
                aws_svc.run_instance,
                error_code_regexp='InvalidGroup\.NotFound'
            )

        user_data = instance_config.make_userdata()
        compressed_user_data = gzip_user_data(user_data)

        instance = run_instance(
            encryptor_image_id,
            security_group_ids=security_group_ids,
            user_data=compressed_user_data,
            placement=zone,
            block_device_map=bdm,
            subnet_id=subnet_id
        )
        aws_svc.create_tags(
            instance.id,
            name=NAME_ENCRYPTOR,
            description=DESCRIPTION_ENCRYPTOR % {'image_id': guest_image_id}
        )
        log.info('Launching encryptor instance %s', instance.id)
        instance = wait_for_instance(aws_svc, instance.id)

        # Tag volumes.
        bdm = instance.block_device_mapping
        if virtualization_type == 'paravirtual':
            aws_svc.create_tags(
                bdm['/dev/sda5'].volume_id, name=NAME_ENCRYPTED_ROOT_VOLUME)
            aws_svc.create_tags(
                bdm['/dev/sda2'].volume_id, name=NAME_METAVISOR_ROOT_VOLUME)
            aws_svc.create_tags(
                bdm['/dev/sda1'].volume_id, name=NAME_METAVISOR_GRUB_VOLUME)
            aws_svc.create_tags(
                bdm['/dev/sda3'].volume_id, name=NAME_METAVISOR_LOG_VOLUME)
        else:
            aws_svc.create_tags(
                bdm['/dev/sda1'].volume_id, name=NAME_METAVISOR_ROOT_VOLUME)
            aws_svc.create_tags(
                bdm['/dev/sdg'].volume_id, name=NAME_ENCRYPTED_ROOT_VOLUME)
    except:
        cleanup_instance_ids = []
        cleanup_sg_ids = []
        if instance:
            cleanup_instance_ids = [instance.id]
        if temp_sg_id:
            cleanup_sg_ids = [temp_sg_id]
        clean_up(
            aws_svc,
            instance_ids=cleanup_instance_ids,
            security_group_ids=cleanup_sg_ids
        )
        raise

    return instance, temp_sg_id
Пример #56
0
def launch_cluster(conn, opts, cluster_name):
    print "Setting up security groups..."
    master_group = get_or_make_group(conn, cluster_name + "-master")
    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize(src_group=zoo_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
        master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize(src_group=zoo_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
    if zoo_group.rules == []:  # Group was just now created
        zoo_group.authorize(src_group=master_group)
        zoo_group.authorize(src_group=slave_group)
        zoo_group.authorize(src_group=zoo_group)
        zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
        zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

    # Check if instances are already running in our groups
    print "Checking for running cluster..."
    reservations = conn.get_all_instances()
    for res in reservations:
        group_names = [g.id for g in res.groups]
        if master_group.name in group_names or slave_group.name in group_names or zoo_group.name in group_names:
            active = [i for i in res.instances if is_active(i)]
            if len(active) > 0:
                print >> stderr, (
                    "ERROR: There are already instances running in " +
                    "group %s, %s or %s" %
                    (master_group.name, slave_group.name, zoo_group.name))
                sys.exit(1)
    print "Launching instances..."
    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    block_map = BlockDeviceMapping()
    if opts.ebs_vol_size > 0:
        device = EBSBlockDeviceType()
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        block_map["/dev/sdv"] = device

    # Launch slaves
    slave_res = image.run(key_name=opts.key_pair,
                          security_groups=[slave_group],
                          instance_type=opts.instance_type,
                          placement=opts.zone,
                          min_count=opts.slaves,
                          max_count=opts.slaves,
                          block_device_map=block_map)
    slave_nodes = slave_res.instances
    print "Launched slaves, regid = " + slave_res.id

    # Launch masters
    master_type = opts.master_instance_type
    if master_type == "":
        master_type = opts.instance_type
    master_res = image.run(key_name=opts.key_pair,
                           security_groups=[master_group],
                           instance_type=master_type,
                           placement=opts.zone,
                           min_count=opts.ft,
                           max_count=opts.ft,
                           block_device_map=block_map)
    master_nodes = master_res.instances
    print "Launched master, regid = " + master_res.id

    # Launch ZooKeeper nodes if required
    if opts.ft > 1:
        zoo_res = image.run(key_name=opts.key_pair,
                            security_groups=[zoo_group],
                            instance_type=opts.instance_type,
                            placement=opts.zone,
                            min_count=3,
                            max_count=3,
                            block_device_map=block_map)
        zoo_nodes = zoo_res.instances
        print "Launched zoo, regid = " + zoo_res.id
    else:
        zoo_nodes = []

    # Return all the instances
    return (master_nodes, slave_nodes, zoo_nodes)
Пример #57
0
def launch_cluster(conn, opts, cluster_name):
  if opts.identity_file is None:
    print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
    sys.exit(1)
  if opts.key_pair is None:
    print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
    sys.exit(1)
  #print "Setting up security groups..."
  #master_group = get_or_make_group(conn, cluster_name + "-master")
  #slave_group = get_or_make_group(conn, cluster_name + "-slaves")
  #if master_group.rules == []: # Group was just now created
  #  master_group.authorize(src_group=master_group)
  #  master_group.authorize(src_group=slave_group)
  #  master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
  #  master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
  #  master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
  #  master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
  #  master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
  #  master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
  #  master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
  #  if opts.ganglia:
  #    master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
  #if slave_group.rules == []: # Group was just now created
  #  slave_group.authorize(src_group=master_group)
  #  slave_group.authorize(src_group=slave_group)
  #  slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
  #  slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
  #  slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
  #  slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
  #  slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
  #  slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

  # Check if instances are already running in our groups
  existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                           die_on_error=False)
  #if existing_slaves or (existing_masters and not opts.use_existing_master):
  #  print >> stderr, ("ERROR: There are already instances running in " +
  #      "group %s or %s" % (master_group.name, slave_group.name))
  #  sys.exit(1)

  # Figure out Spark AMI
  if opts.ami is None:
    opts.ami = get_spark_ami(opts)
  print "Launching instances..."

  try:
    image = conn.get_all_images(image_ids=[opts.ami])[0]
  except:
    print >> stderr, "Could not find AMI " + opts.ami
    sys.exit(1)

  # Create block device mapping so that we can add an EBS volume if asked to
  block_map = BlockDeviceMapping()
  if opts.ebs_vol_size > 0:
    device = EBSBlockDeviceType()
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    block_map["/dev/sdv"] = device

  # Launch slaves
  if opts.spot_price != None:
    # Launch spot instances with the requested price
    print ("Requesting %d slaves as spot instances with price $%.3f" %
           (opts.slaves, opts.spot_price))
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    my_req_ids = []
    for zone in zones:
      num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
      slave_reqs = conn.request_spot_instances(
          price = opts.spot_price,
          image_id = opts.ami,
          launch_group = "launch-group-%s" % cluster_name,
          placement = zone,
          count = num_slaves_this_zone,
          key_name = opts.key_pair,
          #security_groups = [slave_group],
          instance_type = opts.instance_type,
          block_device_map = block_map)
      my_req_ids += [req.id for req in slave_reqs]
      i += 1

    print "Waiting for spot instances to be granted..."
    try:
      while True:
        time.sleep(10)
        reqs = conn.get_all_spot_instance_requests()
        id_to_req = {}
        for r in reqs:
          id_to_req[r.id] = r
        active_instance_ids = []
        for i in my_req_ids:
          if i in id_to_req and id_to_req[i].state == "active":
            active_instance_ids.append(id_to_req[i].instance_id)
        if len(active_instance_ids) == opts.slaves:
          print "All %d slaves granted" % opts.slaves
          reservations = conn.get_all_instances(active_instance_ids)
          slave_nodes = []
          for r in reservations:
            slave_nodes += r.instances
          break
        else:
          print "%d of %d slaves granted, waiting longer" % (
            len(active_instance_ids), opts.slaves)
    except:
      print "Canceling spot instance requests"
      conn.cancel_spot_instance_requests(my_req_ids)
      # Log a warning if any of these requests actually launched instances:
      (master_nodes, slave_nodes) = get_existing_cluster(
          conn, opts, cluster_name, die_on_error=False)
      running = len(master_nodes) + len(slave_nodes)
      if running:
        print >> stderr, ("WARNING: %d instances are still running" % running)
      sys.exit(0)
  else:
    # Launch non-spot instances
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    slave_nodes = []
    for zone in zones:
      num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
      if num_slaves_this_zone > 0:
        slave_res = image.run(key_name = opts.key_pair,
                              security_group_ids = ["sg-87956be2","sg-1ac33f7f", "sg-1ec33f7b"],
                              subnet_id = "subnet-4182b007",
                              instance_type = opts.instance_type,
                              placement = zone,
                              min_count = num_slaves_this_zone,
                              max_count = num_slaves_this_zone,
                              block_device_map = block_map)
        slave_nodes += slave_res.instances
        print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                        zone, slave_res.id)
      i += 1

  # Launch or resume masters
  if existing_masters:
    print "Starting master..."
    for inst in existing_masters:
      if inst.state not in ["shutting-down", "terminated"]:
        inst.start()
    master_nodes = existing_masters
  else:
    master_type = opts.master_instance_type
    if master_type == "":
      master_type = opts.instance_type
    if opts.zone == 'all':
      opts.zone = random.choice(conn.get_all_zones()).name
    master_res = image.run(key_name = opts.key_pair,
                           security_group_ids = ["sg-bd956bd8","sg-1ac33f7f", "sg-1ec33f7b"],
                           subnet_id = "subnet-4182b007",
                           instance_type = master_type,
                           placement = opts.zone,
                           min_count = 1,
                           max_count = 1,
                           block_device_map = block_map)
    master_nodes = master_res.instances
    print "Launched master in %s, regid = %s" % (zone, master_res.id)

  # Return all the instances
  return (master_nodes, slave_nodes)
Пример #58
0
def launch_cluster(conn, opts, cluster_name):
    if opts.identity_file is None:
        print >> stderr, "ERROR: Must provide an identity file (-i) for ssh connections."
        sys.exit(1)
    if opts.key_pair is None:
        print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
        sys.exit(1)
    print "Setting up security groups..."
    master_group = get_or_make_group(conn, cluster_name + "-master")
    master_group.owner_id = os.getenv('EC2_USER_ID')
    slave_group = get_or_make_group(conn, cluster_name + "-slaves")
    slave_group.owner_id = os.getenv('EC2_USER_ID')
    zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
    zoo_group.owner_id = os.getenv('EC2_USER_ID')

    if master_group.rules == []:  # Group was just now created
        master_group.authorize(src_group=master_group)
        master_group.authorize(src_group=slave_group)
        master_group.authorize(src_group=zoo_group)
        master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        master_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
        master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
        master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
        master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
        master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
        master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
        master_group.authorize('tcp', 4040, 4045, '0.0.0.0/0')
        if opts.ganglia:
            master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
    if slave_group.rules == []:  # Group was just now created
        slave_group.authorize(src_group=master_group)
        slave_group.authorize(src_group=slave_group)
        slave_group.authorize(src_group=zoo_group)
        slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
        slave_group.authorize('tcp', 5050, 5051, '0.0.0.0/0')
        slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
        slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
        slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
        slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')

    if zoo_group.rules == []:  # Group was just now created
        zoo_group.authorize(src_group=master_group)
        zoo_group.authorize(src_group=slave_group)
        zoo_group.authorize(src_group=zoo_group)
        zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
        zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
        zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

    # Check if instances are already running in our groups
    existing_masters, existing_slaves, existing_zoos = get_existing_cluster(
        conn, opts, cluster_name, die_on_error=False)
    if existing_slaves or (existing_masters and not opts.use_existing_master):
        print >> stderr, (
            "ERROR: There are already instances running in " +
            "group %s or %s" %
            (master_group.name, slave_group.name, zoo_group.name))
        sys.exit(1)

    # Figure out Spark AMI
    if opts.ami is None:
        opts.ami = get_ami(opts)
    print "Launching instances..."

    try:
        image = conn.get_all_images(image_ids=[opts.ami])[0]
    except:
        print >> stderr, "Could not find AMI " + opts.ami
        sys.exit(1)

    # Create block device mapping so that we can add an EBS volume if asked to
    logging.debug("Calling boto BlockDeviceMapping()...")
    block_map = BlockDeviceMapping()
    logging.debug(" Printing block_map..")
    #print block_map
    if opts.ebs_vol_size > 0:
        logging.debug("Calling boto EBSBlockDeviceType()...")
        device = EBSBlockDeviceType()
        #print "device: ", device
        device.size = opts.ebs_vol_size
        device.delete_on_termination = True
        device.ephemeral_name = "ephemeral0"
        #block_map["/dev/sdv"] = device
        #block_map["/dev/sdv"] = device
        block_map["/dev/vdb"] = device

    if opts.user_data_file != None:
        user_data_file = open(opts.user_data_file)
        try:
            opts.user_data = user_data_file.read()
            #print "user data (encoded) = ", opts.user_data
        finally:
            user_data_file.close()

    # Launch non-spot instances
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    slave_nodes = []
    for zone in zones:
        num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
        if num_slaves_this_zone > 0:
            slave_res = image.run(key_name=opts.key_pair,
                                  security_groups=[slave_group],
                                  instance_type=opts.instance_type,
                                  placement=zone,
                                  min_count=num_slaves_this_zone,
                                  max_count=num_slaves_this_zone,
                                  block_device_map=block_map,
                                  user_data=opts.user_data)
            slave_nodes += slave_res.instances
            print "Launched %d slaves in %s, regid = %s" % (
                num_slaves_this_zone, zone, slave_res.id)
        i += 1

    # Launch or resume masters
    if existing_masters:
        print "Starting master..."
        for inst in existing_masters:
            if inst.state not in ["shutting-down", "terminated"]:
                inst.start()
        master_nodes = existing_masters
    else:
        master_type = opts.master_instance_type
        if master_type == "":
            master_type = opts.instance_type
        if opts.zone == 'all':
            opts.zone = random.choice(conn.get_all_zones()).name
        master_res = image.run(key_name=opts.key_pair,
                               security_groups=[master_group],
                               instance_type=master_type,
                               placement=opts.zone,
                               min_count=1,
                               max_count=1,
                               block_device_map=block_map,
                               user_data=opts.user_data)
        master_nodes = master_res.instances
        print "Launched master in %s, regid = %s" % (zone, master_res.id)

    # Launch ZooKeeper nodes if required
    if int(opts.ft) > 1:
        print "Running " + opts.ft + " zookeepers"
        zoo_res = image.run(key_name=opts.key_pair,
                            security_groups=[zoo_group],
                            instance_type=opts.instance_type,
                            placement=opts.zone,
                            min_count=3,
                            max_count=3,
                            block_device_map=block_map,
                            user_data=opts.user_data)
        zoo_nodes = zoo_res.instances
        print "Launched zoo, regid = " + zoo_res.id
    else:
        zoo_nodes = []

    # Return all the instances
    return (master_nodes, slave_nodes, zoo_nodes)
Пример #59
0
def launch_cluster(conn, opts, cluster_name):
  print "Setting up security groups..."
  master_group = get_or_make_group(conn, cluster_name + "-master")
  slave_group = get_or_make_group(conn, cluster_name + "-slaves")
  zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
  if master_group.rules == []: # Group was just now created
    master_group.authorize(src_group=master_group)
    master_group.authorize(src_group=slave_group)
    master_group.authorize(src_group=zoo_group)
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
    master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
    master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
    if opts.cluster_type == "mesos":
      master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
    if opts.ganglia:
      master_group.authorize('tcp', 5080, 5080, '0.0.0.0/0')
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize(src_group=master_group)
    slave_group.authorize(src_group=slave_group)
    slave_group.authorize(src_group=zoo_group)
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
    slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
    slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
    slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
  if zoo_group.rules == []: # Group was just now created
    zoo_group.authorize(src_group=master_group)
    zoo_group.authorize(src_group=slave_group)
    zoo_group.authorize(src_group=zoo_group)
    zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
    zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

  # Check if instances are already running in our groups
  active_nodes = get_existing_cluster(conn, opts, cluster_name,
                                      die_on_error=False)
  if any(active_nodes):
    print >> stderr, ("ERROR: There are already instances running in " +
        "group %s, %s or %s" % (master_group.name, slave_group.name, zoo_group.name))
    sys.exit(1)

  # Figure out the latest AMI from our static URL
  if opts.ami == "latest":
    try:
      opts.ami = urllib2.urlopen(LATEST_AMI_URL).read().strip()
      print "Latest Spark AMI: " + opts.ami
    except:
      print >> stderr, "Could not read " + LATEST_AMI_URL
      sys.exit(1)

  print "Launching instances..."

  try:
    image = conn.get_all_images(image_ids=[opts.ami])[0]
  except:
    print >> stderr, "Could not find AMI " + opts.ami
    sys.exit(1)

  # Create block device mapping so that we can add an EBS volume if asked to
  block_map = BlockDeviceMapping()
  if opts.ebs_vol_size > 0:
    device = EBSBlockDeviceType()
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    block_map["/dev/sdv"] = device

  # Launch slaves
  if opts.spot_price != None:
    # Launch spot instances with the requested price
    print ("Requesting %d slaves as spot instances with price $%.3f" %
           (opts.slaves, opts.spot_price))
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    my_req_ids = []
    for zone in zones:
      num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
      slave_reqs = conn.request_spot_instances(
          price = opts.spot_price,
          image_id = opts.ami,
          launch_group = "launch-group-%s" % cluster_name,
          placement = zone,
          count = num_slaves_this_zone,
          key_name = opts.key_pair,
          security_groups = [slave_group],
          instance_type = opts.instance_type,
          block_device_map = block_map)
      my_req_ids += [req.id for req in slave_reqs]
      i += 1
    
    print "Waiting for spot instances to be granted..."
    try:
      while True:
        time.sleep(10)
        reqs = conn.get_all_spot_instance_requests()
        id_to_req = {}
        for r in reqs:
          id_to_req[r.id] = r
        active_instance_ids = []
        for i in my_req_ids:
          if i in id_to_req and id_to_req[i].state == "active":
            active_instance_ids.append(id_to_req[i].instance_id)
        if len(active_instance_ids) == opts.slaves:
          print "All %d slaves granted" % opts.slaves
          reservations = conn.get_all_instances(active_instance_ids)
          slave_nodes = []
          for r in reservations:
            slave_nodes += r.instances
          break
        else:
          print "%d of %d slaves granted, waiting longer" % (
            len(active_instance_ids), opts.slaves)
    except:
      print "Canceling spot instance requests"
      conn.cancel_spot_instance_requests(my_req_ids)
      # Log a warning if any of these requests actually launched instances:
      (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
          conn, opts, cluster_name, die_on_error=False)
      running = len(master_nodes) + len(slave_nodes) + len(zoo_nodes)
      if running:
        print >> stderr, ("WARNING: %d instances are still running" % running)
      sys.exit(0)
  else:
    # Launch non-spot instances
    zones = get_zones(conn, opts)
    num_zones = len(zones)
    i = 0
    slave_nodes = []
    for zone in zones:
      num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
      if num_slaves_this_zone > 0:
        slave_res = image.run(key_name = opts.key_pair,
                              security_groups = [slave_group],
                              instance_type = opts.instance_type,
                              placement = zone,
                              min_count = num_slaves_this_zone,
                              max_count = num_slaves_this_zone,
                              block_device_map = block_map)
        slave_nodes += slave_res.instances
        print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                        zone, slave_res.id)
      i += 1

  # Launch masters
  master_type = opts.master_instance_type
  if master_type == "":
    master_type = opts.instance_type
  if opts.zone == 'all':
    opts.zone = random.choice(conn.get_all_zones()).name
  master_res = image.run(key_name = opts.key_pair,
                         security_groups = [master_group],
                         instance_type = master_type,
                         placement = opts.zone,
                         min_count = 1,
                         max_count = 1,
                         block_device_map = block_map)
  master_nodes = master_res.instances
  print "Launched master in %s, regid = %s" % (zone, master_res.id)

  zoo_nodes = []

  # Return all the instances
  return (master_nodes, slave_nodes, zoo_nodes)
Пример #60
0
def _create_server_ec2(connection,
                       region,
                       disk_name,
                       disk_size,
                       ami,
                       key_pair,
                       instance_type,
                       tags={},
                       security_groups=None,
                       delete_on_termination=True,
                       log=False,
                       wait_for_ssh_available=True):
    """
    Creates EC2 Instance
    """

    if log:
        log_green("Started...")
        log_yellow("...Creating EC2 instance...")

    ebs_volume = EBSBlockDeviceType()
    ebs_volume.size = disk_size
    bdm = BlockDeviceMapping()
    bdm[disk_name] = ebs_volume

    # get an ec2 ami image object with our choosen ami
    image = connection.get_all_images(ami)[0]
    # start a new instance
    reservation = image.run(1,
                            1,
                            key_name=key_pair,
                            security_groups=security_groups,
                            block_device_map=bdm,
                            instance_type=instance_type)

    # and get our instance_id
    instance = reservation.instances[0]

    #  and loop and wait until ssh is available
    while instance.state == u'pending':
        if log:
            log_yellow("Instance state: %s" % instance.state)
        sleep(10)
        instance.update()
    if log:
        log_green("Instance state: %s" % instance.state)
    if wait_for_ssh_available:
        wait_for_ssh(instance.public_dns_name)

    # update the EBS volumes to be deleted on instance termination
    if delete_on_termination:
        for dev, bd in instance.block_device_mapping.items():
            instance.modify_attribute('BlockDeviceMapping',
                                      ["%s=%d" % (dev, 1)])

    # add a tag to our instance
    if tags:
        connection.create_tags([instance.id], tags)

    if log:
        log_green("Public dns: %s" % instance.public_dns_name)

    # returns our new instance
    return instance