Exemplo n.º 1
0
def scale_instances(tasks_per_instance, group_name, total_groups):
    conn = AutoScaleConnection()
    group = conn.get_all_groups(names=[group_name])[0]

    if group.desired_capacity == group.max_size:
        logger.info('Maximum number of instances reached')
        return
    tasks_count = get_sqs_tasks_count()
    if not tasks_count:
        logger.info('No tasks left in queues')
        return
    logger.info('Num of tasks in queues %s', tasks_count)

    tasks_per_instance = float(tasks_per_instance)
    additional_instances_count = int(
        ceil(tasks_count / tasks_per_instance) / total_groups)
    updated_instances_count = \
        group.desired_capacity + additional_instances_count
    # consider max allowed instances
    if updated_instances_count > group.max_size:
        updated_instances_count = group.max_size

    logger.info('Updating group from %s to %s instances',
                group.desired_capacity, updated_instances_count)
    group.set_capacity(updated_instances_count)
    group.desired_capacity = updated_instances_count
    group.update()
    logger.info('Done\n')
def main():
    parser = optparse.OptionParser()
    parser.add_option( "-c", "--config", dest="config_file", help="AutoScale config INI", metavar="FILE" )
    (options, args) = parser.parse_args()
    logging.info( "Using config file [%s]" % options.config_file )

    config = parse_config( options.config_file ) 

    aws_access = config.get("AWS", 'access')
    aws_secret = config.get("AWS", 'secret')

    logging.debug( "Connecting to AWS with access [%s] and secret [%s]" % ( aws_access, aws_secret ) )
    aws_connection = AutoScaleConnection( aws_access, aws_secret )

    print "AutoScalingGroups:"
    print aws_connection.get_all_groups().__dict__
Exemplo n.º 3
0
def create_autoscaling_group():
    global img
    conn = AutoScaleConnection(os.environ['AWS_ACCESS_KEY_ID'], os.environ['AWS_SECRET_ACCESS_KEY'])
    autoscale = boto.ec2.autoscale.connect_to_region('us-east-1')
    print conn.get_all_groups()
    timestamp = time.time()
    value = datetime.datetime.fromtimestamp(timestamp)
    humanreadabledate = value.strftime('%Y-%m-%d_%H.%M.%S')
    config_name = 'live_launch_config'+humanreadabledate
    init_script = "#!/bin/sh /home/ec2-user/sds/deployment_scripts/initialize_server.py"
    lc = LaunchConfiguration(name=config_name, image_id=img,
                             key_name='SDSEastKey',
                             security_groups=['sg-a7afb1c2'],
                             user_data=init_script)
    conn.create_launch_configuration(lc)
    ag = AutoScalingGroup(group_name=config_name, load_balancers=['SDSLiveLoadBalancer'], availability_zones=['us-east-1a'], launch_config=lc, min_size=2, max_size=2, connection=conn)
    conn.create_auto_scaling_group(ag)
Exemplo n.º 4
0
    def test_basic(self):
        # NB: as it says on the tin these are really basic tests that only
        # (lightly) exercise read-only behaviour - and that's only if you
        # have any autoscale groups to introspect. It's useful, however, to
        # catch simple errors

        print '--- running %s tests ---' % self.__class__.__name__
        c = AutoScaleConnection()

        self.assertTrue(repr(c).startswith('AutoScaleConnection'))

        groups = c.get_all_groups()
        for group in groups:
            self.assertTrue(type(group), AutoScalingGroup)

            # get activities
            activities = group.get_activities()

            for activity in activities:
                self.assertEqual(type(activity), Activity)

        # get launch configs
        configs = c.get_all_launch_configurations()
        for config in configs:
            self.assertTrue(type(config), LaunchConfiguration)

        # get policies
        policies = c.get_all_policies()
        for policy in policies:
            self.assertTrue(type(policy), ScalingPolicy)

        # get scheduled actions
        actions = c.get_all_scheduled_actions()
        for action in actions:
            self.assertTrue(type(action), ScheduledUpdateGroupAction)

        # get instances
        instances = c.get_all_autoscaling_instances()
        for instance in instances:
            self.assertTrue(type(instance), Instance)

        # get all scaling process types
        ptypes = c.get_all_scaling_process_types()
        for ptype in ptypes:
            self.assertTrue(type(ptype), ProcessType)

        # get adjustment types
        adjustments = c.get_all_adjustment_types()
        for adjustment in adjustments:
            self.assertTrue(type(adjustment), AdjustmentType)

        # get metrics collection types
        types = c.get_all_metric_collection_types()
        self.assertTrue(type(types), MetricCollectionTypes)

        print '--- tests completed ---'
Exemplo n.º 5
0
def get_all_group_instances_and_conn():
    conn = AutoScaleConnection()
    global autoscale_conn
    autoscale_conn = conn
    ec2 = boto.ec2.connect_to_region('us-east-1')
    groups = conn.get_all_groups(
        names=['SCCluster1', 'SCCluster2', 'SCCluster3',
               'SCCluster4'])  # TODO: update this list
    instances = [instance for group in groups for instance in group]
    if not instances:
        sys.exit()
    instance_ids = [instance.instance_id for instance in instances]
    instances = ec2.get_only_instances(instance_ids)
    return instances, conn
Exemplo n.º 6
0
def autoscale_group_hosts(group_name):
    import boto.ec2
    from boto.ec2.autoscale import AutoScaleConnection
    ec2 = boto.connect_ec2()
    conn = AutoScaleConnection()
    groups = conn.get_all_groups(names=[])
    groups = [ group for group in groups if group.name.startswith(group_name) ]

    instance_ids = []
    instances = []
    for group in groups:
        print group.name
        instance_ids.extend([i.instance_id for i in group.instances])
        instances.extend(ec2.get_only_instances(instance_ids))

    return [i.private_ip_address for i in instances], instances[0].id, instances[0].tags.get("aws:autoscaling:groupName")
Exemplo n.º 7
0
def get_all_group_instances_and_conn(
        groups_names=get_autoscale_groups()['groups']):
    conn = AutoScaleConnection()
    global autoscale_conn
    autoscale_conn = conn
    ec2 = boto.ec2.connect_to_region('us-east-1')
    selected_group_name = random.choice(groups_names)
    logger.info('Selected autoscale group: %s' % selected_group_name)
    group = conn.get_all_groups(names=[selected_group_name])[0]
    if not group.instances:
        logger.info("No working instances in selected group %s" %
                    selected_group_name)
        upload_logs_to_s3()
        sys.exit()
    instance_ids = [i.instance_id for i in group.instances]
    instances = ec2.get_only_instances(instance_ids)
    return instances, conn
Exemplo n.º 8
0
def launch_auto_scaling(stage = 'development'):
	config = get_provider_dict()
	from boto.ec2.autoscale import AutoScaleConnection, AutoScalingGroup, LaunchConfiguration, Trigger
	conn = AutoScaleConnection(fabric.api.env.conf['AWS_ACCESS_KEY_ID'], fabric.api.env.conf['AWS_SECRET_ACCESS_KEY'], host='%s.autoscaling.amazonaws.com' % config['location'][:-1])
	
	for name, values in config.get(stage, {}).get('autoscale', {}):
		if any(group.name == name for group in conn.get_all_groups()):
			fabric.api.warn(fabric.colors.orange('Autoscale group %s already exists' % name))
			continue
		lc = LaunchConfiguration(name = '%s-launch-config' % name, image_id = values['image'],  key_name = config['key'])
		conn.create_launch_configuration(lc)
		ag = AutoScalingGroup(group_name = name, load_balancers = values.get('load-balancers'), availability_zones = [config['location']], launch_config = lc, min_size = values['min-size'], max_size = values['max-size'])
		conn.create_auto_scaling_group(ag)
		if 'min-cpu' in values and 'max-cpu' in values:
			tr = Trigger(name = '%s-trigger' % name, autoscale_group = ag, measure_name = 'CPUUtilization', statistic = 'Average', unit = 'Percent', dimensions = [('AutoScalingGroupName', ag.name)],
						 period = 60, lower_threshold = values['min-cpu'], lower_breach_scale_increment = '-1', upper_threshold = values['max-cpu'], upper_breach_scale_increment = '2', breach_duration = 60)
			conn.create_trigger(tr)
Exemplo n.º 9
0
def _is_up_to_date():
    """
    Returns True if this instance is up to date.
    """

    # Retrieve instance information.
    conn = AutoScaleConnection()
    pool = conn.get_all_groups(["LSDA Worker Pool"])[0]
    config = conn.get_all_launch_configurations(
      names=[pool.launch_config_name])[0]

    # Retrive the AMI for this instance and for others.
    config_ami = config.image_id
    my_ami = urllib.urlopen("http://169.254.169.254/latest/"
                            "meta-data/ami-id").read()

    return config_ami == my_ami
Exemplo n.º 10
0
def autoscale_group_hosts(group_name):
    import boto.ec2
    from boto.ec2.autoscale import AutoScaleConnection
    ec2 = boto.connect_ec2()
    conn = AutoScaleConnection()
    groups = conn.get_all_groups(names=[])
    groups = [group for group in groups if group.name.startswith(group_name)]

    instance_ids = []
    instances = []
    for group in groups:
        print "group name:", group.name
        instance_ids.extend([i.instance_id for i in group.instances])
        instances.extend(ec2.get_only_instances(instance_ids))

    return set([
        i.private_ip_address for i in instances
    ]), instances[0].id, instances[0].tags.get("aws:autoscaling:groupName")
Exemplo n.º 11
0
def find_unused_launch_configs():
    conn = AutoScaleConnection()
    autoscale_groups = conn.get_all_groups(max_records=100)
    launch_configs = conn.get_all_launch_configurations(max_records=100)
    launch_config_names = {lc.name for lc in launch_configs}
    used_launch_config_names = {asg.launch_config_name for asg in autoscale_groups}
    unused_launch_config_names = launch_config_names - used_launch_config_names

    print "Autoscale Groups and Current Launch Configs:"
    print "{:<40}{:<40}".format("ASG", "LC")
    for asg in autoscale_groups:
        #print "asg:", asg.name, "-> lc:", asg.launch_config_name
        print "{:<40}{:<40}".format(asg.name, asg.launch_config_name)

    print "\nUnused Launch Configs: (launch configs without a autoscale group)"
    unused_launch_config_names = list(sorted(unused_launch_config_names))
    for unused_launch_config in unused_launch_config_names:
        print "\t", unused_launch_config
    return unused_launch_config_names
Exemplo n.º 12
0
def delete_autoscaling():
    con = AutoScaleConnection(aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                              aws_access_key_id=AWS_ACCESS_KEY,
                              region=RegionInfo(name=REGION,
                                               endpoint='autoscaling.%s.amazonaws.com' % REGION))

    print "Deleting autoscaling group.."
    group = con.get_all_groups(names=[AUTOSCALING_GROUP_NAME])[0]
    print "shutting down instances"
    group.shutdown_instances()
    time.sleep(LONG_SLEEP_PERIOD)
    print "Deleting autoscaling group itself"
    con.delete_auto_scaling_group(AUTOSCALING_GROUP_NAME, force_delete=True)
    print "Deleting launch configuration"
    con.delete_launch_configuration(AUTOSCALING_GROUP_NAME)



    con.close()
Exemplo n.º 13
0
def launch_auto_scaling(stage='development'):
    config = get_provider_dict()
    from boto.ec2.autoscale import AutoScaleConnection, AutoScalingGroup, LaunchConfiguration, Trigger
    conn = AutoScaleConnection(fabric.api.env.conf['AWS_ACCESS_KEY_ID'],
                               fabric.api.env.conf['AWS_SECRET_ACCESS_KEY'],
                               host='%s.autoscaling.amazonaws.com' %
                               config['location'][:-1])

    for name, values in config.get(stage, {}).get('autoscale', {}):
        if any(group.name == name for group in conn.get_all_groups()):
            fabric.api.warn(
                fabric.colors.orange('Autoscale group %s already exists' %
                                     name))
            continue
        lc = LaunchConfiguration(name='%s-launch-config' % name,
                                 image_id=values['image'],
                                 key_name=config['key'])
        conn.create_launch_configuration(lc)
        ag = AutoScalingGroup(group_name=name,
                              load_balancers=values.get('load-balancers'),
                              availability_zones=[config['location']],
                              launch_config=lc,
                              min_size=values['min-size'],
                              max_size=values['max-size'])
        conn.create_auto_scaling_group(ag)
        if 'min-cpu' in values and 'max-cpu' in values:
            tr = Trigger(name='%s-trigger' % name,
                         autoscale_group=ag,
                         measure_name='CPUUtilization',
                         statistic='Average',
                         unit='Percent',
                         dimensions=[('AutoScalingGroupName', ag.name)],
                         period=60,
                         lower_threshold=values['min-cpu'],
                         lower_breach_scale_increment='-1',
                         upper_threshold=values['max-cpu'],
                         upper_breach_scale_increment='2',
                         breach_duration=60)
            conn.create_trigger(tr)
Exemplo n.º 14
0
class EbsHelper(object):
    """
    Class for helping with ebs
    """

    def __init__(self, aws, wait_time_secs, app_name=None,):
        """
        Creates the EbsHelper
        """
        self.aws = aws
        self.ebs = connect_to_region(aws.region, aws_access_key_id=aws.access_key,
                                     aws_secret_access_key=aws.secret_key,
                                     security_token=aws.security_token)
        self.autoscale = AutoScaleConnection(aws_access_key_id=aws.access_key,
                                             aws_secret_access_key=aws.secret_key,
                                             security_token=aws.security_token)
        self.s3 = S3Connection(
            aws_access_key_id=aws.access_key, 
            aws_secret_access_key=aws.secret_key, 
            security_token=aws.security_token,
            host=(lambda r: 's3.amazonaws.com' if r == 'us-east-1' else 's3-' + r + '.amazonaws.com')(aws.region))
        self.app_name = app_name
        self.wait_time_secs = wait_time_secs

    def swap_environment_cnames(self, from_env_name, to_env_name):
        """
        Swaps cnames for an environment
        """
        self.ebs.swap_environment_cnames(source_environment_name=from_env_name,
                                         destination_environment_name=to_env_name)

    def upload_archive(self, filename, key, auto_create_bucket=True):
        """
        Uploads an application archive version to s3
        """
        try:
            bucket = self.s3.get_bucket(self.aws.bucket)
            if ((
                  self.aws.region != 'us-east-1' and self.aws.region != 'eu-west-1') and bucket.get_location() != self.aws.region) or (
                  self.aws.region == 'us-east-1' and bucket.get_location() != '') or (
                  self.aws.region == 'eu-west-1' and bucket.get_location() != 'eu-west-1'):
                raise Exception("Existing bucket doesn't match region")
        except S3ResponseError:
            bucket = self.s3.create_bucket(self.aws.bucket, location=self.aws.region)

        def __report_upload_progress(sent, total):
            if not sent:
                sent = 0
            if not total:
                total = 0
            out("Uploaded " + str(sent) + " bytes of " + str(total) \
                + " (" + str(int(float(max(1, sent)) / float(total) * 100)) + "%)")

        # upload the new version
        k = Key(bucket)
        k.key = self.aws.bucket_path + key
        k.set_metadata('time', str(time()))
        k.set_contents_from_filename(filename, cb=__report_upload_progress, num_cb=10)

    def list_available_solution_stacks(self):
        """
        Returns a list of available solution stacks
        """
        stacks = self.ebs.list_available_solution_stacks()
        return stacks['ListAvailableSolutionStacksResponse']['ListAvailableSolutionStacksResult']['SolutionStacks']

    def create_application(self, description=None):
        """
        Creats an application and sets the helpers current
        app_name to the created application
        """
        out("Creating application " + str(self.app_name))
        self.ebs.create_application(self.app_name, description=description)

    def delete_application(self):
        """
        Creats an application and sets the helpers current
        app_name to the created application
        """
        out("Deleting application " + str(self.app_name))
        self.ebs.delete_application(self.app_name, terminate_env_by_force=True)

    def application_exists(self):
        """
        Returns whether or not the given app_name exists
        """
        response = self.ebs.describe_applications(application_names=[self.app_name])
        return len(response['DescribeApplicationsResponse']['DescribeApplicationsResult']['Applications']) > 0

    def create_environment(self, env_name, version_label=None,
                           solution_stack_name=None, cname_prefix=None, description=None,
                           option_settings=None, tier_name='WebServer', tier_type='Standard', tier_version='1.1'):
        """
        Creates a new environment
        """
        out("Creating environment: " + str(env_name) + ", tier_name:" + str(tier_name) + ", tier_type:" + str(tier_type))
        self.ebs.create_environment(self.app_name, env_name,
                                    version_label=version_label,
                                    solution_stack_name=solution_stack_name,
                                    cname_prefix=cname_prefix,
                                    description=description,
                                    option_settings=option_settings,
                                    tier_type=tier_type,
                                    tier_name=tier_name,
                                    tier_version=tier_version)

    def environment_exists(self, env_name, include_deleted=False):
        """
        Returns whether or not the given environment exists
        """
        response = self.ebs.describe_environments(application_name=self.app_name, environment_names=[env_name],
                                                  include_deleted=include_deleted)
        return len(response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments']) > 0 \
               and response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments'][0][
                       'Status'] != 'Terminated'

    def environment_resources(self, env_name):
        """
        Returns the description for the given environment's resources
        """
        resp = self.ebs.describe_environment_resources(environment_name=env_name)
        return resp['DescribeEnvironmentResourcesResponse']['DescribeEnvironmentResourcesResult']['EnvironmentResources']

    def get_env_sizing_metrics(self, env_name):
        asg = self.get_asg(env_name)
        if asg:
            return asg.min_size, asg.max_size, asg.desired_capacity
        else:
            return None, None, None

    def get_asg(self, env_name):
        asg_name = self.get_asg_name(env_name)
        asgs = self.autoscale.get_all_groups(names=[asg_name])
        asg = None
        if asgs:
            asg = asgs[0]
        return asg

    def get_asg_name(self, env_name):
        resources = self.environment_resources(env_name)
        name = resources["AutoScalingGroups"][0]["Name"]
        return name

    def set_env_sizing_metrics(self, env_name, min_size, max_size):
        self.update_environment(env_name, option_settings=[
            ("aws:autoscaling:asg", "MinSize", min_size), ("aws:autoscaling:asg", "MaxSize", max_size)])

    def environment_data(self, env_name):
        """
        Returns the description for the given environment
        """
        response = self.ebs.describe_environments(application_name=self.app_name, environment_names=[env_name],
                                                  include_deleted=False)
        return response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments'][0]

    def rebuild_environment(self, env_name):
        """
        Rebuilds an environment
        """
        out("Rebuilding " + str(env_name))
        self.ebs.rebuild_environment(environment_name=env_name)

    def get_environments(self):
        """
        Returns the environments
        """
        response = self.ebs.describe_environments(application_name=self.app_name, include_deleted=False)
        return response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments']

    def delete_environment(self, environment_name):
        """
        Deletes an environment
        """
        self.ebs.terminate_environment(environment_name=environment_name, terminate_resources=True)

    def update_environment(self, environment_name, description=None, option_settings=[], tier_type=None, tier_name=None,
                           tier_version='1.0'):
        """
        Updates an application version
        """
        out("Updating environment: " + str(environment_name))
        messages = self.ebs.validate_configuration_settings(self.app_name, option_settings,
                                                            environment_name=environment_name)
        messages = messages['ValidateConfigurationSettingsResponse']['ValidateConfigurationSettingsResult']['Messages']
        ok = True
        for message in messages:
            if message['Severity'] == 'error':
                ok = False
            out("[" + message['Severity'] + "] " + str(environment_name) + " - '" \
                + message['Namespace'] + ":" + message['OptionName'] + "': " + message['Message'])
        self.ebs.update_environment(
            environment_name=environment_name,
            description=description,
            option_settings=option_settings,
            tier_type=tier_type,
            tier_name=tier_name,
            tier_version=tier_version)

    def get_previous_environment_for_subdomain(self, env_subdomain):
        """
        Returns an environment name for the given cname
        """

        def sanitize_subdomain(subdomain):
            return subdomain.lower()

        env_subdomain = sanitize_subdomain(env_subdomain)

        def match_cname(cname):
            subdomain = sanitize_subdomain(cname.split(".")[0])
            return subdomain == env_subdomain

        def match_candidate(env):
            return env['Status'] != 'Terminated' \
                    and env.get('CNAME') \
                    and match_cname(env['CNAME'])

        envs = self.get_environments()
        candidates = [env for env in envs if match_candidate(env)]

        match = None
        if candidates:
            match = candidates[0]["EnvironmentName"]

        return match

    def deploy_version(self, environment_name, version_label):
        """
        Deploys a version to an environment
        """
        out("Deploying " + str(version_label) + " to " + str(environment_name))
        self.ebs.update_environment(environment_name=environment_name, version_label=version_label)

    def get_versions(self):
        """
        Returns the versions available
        """
        response = self.ebs.describe_application_versions(application_name=self.app_name)
        return response['DescribeApplicationVersionsResponse']['DescribeApplicationVersionsResult']['ApplicationVersions']

    def create_application_version(self, version_label, key):
        """
        Creates an application version
        """
        out("Creating application version " + str(version_label) + " for " + str(key))
        self.ebs.create_application_version(self.app_name, version_label,
                                            s3_bucket=self.aws.bucket, s3_key=self.aws.bucket_path+key)

    def delete_unused_versions(self, versions_to_keep=10):
        """
        Deletes unused versions
        """

        # get versions in use
        environments = self.ebs.describe_environments(application_name=self.app_name, include_deleted=False)
        environments = environments['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments']
        versions_in_use = []
        for env in environments:
            versions_in_use.append(env['VersionLabel'])

        # get all versions
        versions = self.ebs.describe_application_versions(application_name=self.app_name)
        versions = versions['DescribeApplicationVersionsResponse']['DescribeApplicationVersionsResult'][
            'ApplicationVersions']
        versions = sorted(versions, reverse=True, cmp=lambda x, y: cmp(x['DateCreated'], y['DateCreated']))

        # delete versions in use
        for version in versions[versions_to_keep:]:
            if version['VersionLabel'] in versions_in_use:
                out("Not deleting " + version["VersionLabel"] + " because it is in use")
            else:
                out("Deleting unused version: " + version["VersionLabel"])
                self.ebs.delete_application_version(application_name=self.app_name,
                                                    version_label=version['VersionLabel'])
                sleep(2)

    def describe_events(self, environment_name, next_token=None, start_time=None):
        """
        Describes events from the given environment
        """

        events = self.ebs.describe_events(
            application_name=self.app_name,
            environment_name=environment_name,
            next_token=next_token,
            start_time=start_time)

        return (events['DescribeEventsResponse']['DescribeEventsResult']['Events'], events['DescribeEventsResponse']['DescribeEventsResult']['NextToken'])

    def wait_for_environments(self, environment_names, health=None, status=None, version_label=None,
                              include_deleted=True, use_events=True):
        """
        Waits for an environment to have the given version_label
        and to be in the green state
        """

        # turn into a list
        if not isinstance(environment_names, (list, tuple)):
            environment_names = [environment_names]
        environment_names = environment_names[:]

        # print some stuff
        s = "Waiting for environment(s) " + (", ".join(environment_names)) + " to"
        if health is not None:
            s += " have health " + health
        else:
            s += " have any health"
        if version_label is not None:
            s += " and have version " + version_label
        if status is not None:
            s += " and have status " + status
        out(s)

        started = time()
        seen_events = list()

        for env_name in environment_names:
            (events, next_token) = self.describe_events(env_name, start_time=utcnow_isoformat())
            for event in events:
                seen_events.append(event)

        delay = 10

        while True:
            # bail if they're all good
            if len(environment_names) == 0:
                break

            # wait
            sleep(delay)

            # # get the env
            try:
                environments = self.ebs.describe_environments(
                    application_name=self.app_name,
                    environment_names=environment_names,
                    include_deleted=include_deleted)
            except BotoServerError as e:
                if not e.error_code == 'Throttling':
                    raise
                delay = min(60, int(delay * 1.5))
                out("Throttling: setting delay to " + str(delay) + " seconds")
                continue

            environments = environments['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments']
            if len(environments) <= 0:
                raise Exception("Couldn't find any environments")

            # loop through and wait
            for env in environments[:]:
                env_name = env['EnvironmentName']

                # the message
                msg = "Environment " + env_name + " is " + str(env['Health'])
                if version_label is not None:
                    msg = msg + " and has version " + str(env['VersionLabel'])
                if status is not None:
                    msg = msg + " and has status " + str(env['Status'])

                # what we're doing
                good_to_go = True
                if health is not None:
                    good_to_go = good_to_go and str(env['Health']) == health
                if status is not None:
                    good_to_go = good_to_go and str(env['Status']) == status
                if version_label is not None:
                    good_to_go = good_to_go and str(env['VersionLabel']) == version_label

                # allow a certain number of Red samples before failing
                if env['Status'] == 'Ready' and env['Health'] == 'Red':
                    if 'RedCount' not in env:
                        env['RedCount'] = 0

                    env['RedCount'] += 1
                    if env['RedCount'] > MAX_RED_SAMPLES:
                        out('Deploy failed')
                        raise Exception('Ready and red')

                # log it
                if good_to_go:
                    out(msg + " ... done")
                    environment_names.remove(env_name)
                else:
                    out(msg + " ... waiting")

                # log events
                try:
                    (events, next_token) = self.describe_events(env_name, start_time=utcnow_isoformat())
                except BotoServerError as e:
                    if not e.error_code == 'Throttling':
                        raise
                    delay = min(60, int(delay * 1.5))
                    out("Throttling: setting delay to " + str(delay) + " seconds")
                    break

                for event in events:
                    if event not in seen_events:
                        out("["+event['Severity']+"] "+event['Message'])
                        seen_events.append(event)

            # check the time
            elapsed = time() - started
            if elapsed > self.wait_time_secs:
                message = "Wait time for environment(s) {environments} to be {health} expired".format(
                    environments=" and ".join(environment_names), health=(health or "Green")
                )
                raise Exception(message)
Exemplo n.º 15
0
class WatchData:
	datafile = "/tmp/watchdata.p"
	dry = False
	low_limit = 70
	high_limit = 90
	high_urgent = 95
	stats_period = 120
	history_size = 0

	def __init__(self):
		self.name = ''
		self.instances = 0
		self.new_desired = 0
		self.desired = 0
		self.instances_info = None
		self.previous_instances = 0
		self.action = ""
		self.action_ts = 0
		self.changed_ts = 0
		self.total_load = 0
		self.avg_load = 0
		self.max_load = 0
		self.up_ts = 0
		self.down_ts= 0
		self.max_loaded = None
		self.loads = {}
		self.measures = {}
		self.emergency = False
		self.history = None

	def __getstate__(self):
		""" Don't store these objets """
		d = self.__dict__.copy()
		del d['ec2']
		del d['cw']
		del d['autoscale']
		del d['group']
		del d['instances_info']
		return d

	def connect(self, groupname):
		self.ec2 = boto.connect_ec2()
		self.cw = CloudWatchConnection()
		self.autoscale = AutoScaleConnection()
		self.group = self.autoscale.get_all_groups(names=[groupname])[0]
		self.instances = len(self.group.instances)
		self.desired = self.group.desired_capacity
		self.name = groupname

	def get_instances_info(self):
		ids = [i.instance_id for i in self.group.instances]
		self.instances_info = self.ec2.get_only_instances(instance_ids = ids)
	
	def get_CPU_loads(self):
		""" Read instances load and store in data """
		for instance in self.group.instances:
			load = self.get_instance_CPU_load(instance.instance_id)
			if load is None:
				continue
			self.total_load += load
			self.loads[instance.instance_id] = load
			if load > self.max_load:
				self.max_load = load
				self.max_loaded = instance.instance_id

		self.avg_load = self.total_load/self.instances

	def get_instance_CPU_load(self, instance):
		end = datetime.datetime.now()
		start = end - datetime.timedelta(seconds=300)

		m = self.cw.get_metric_statistics(self.stats_period, start, end, "CPUUtilization", "AWS/EC2", ["Average"], {"InstanceId": instance})
		if len(m) > 0:
			self.measures[instance] = len(m)
			ordered = sorted(m, key=lambda x: x['Timestamp'], reverse=True)
			return ordered[0]['Average']

		return None

	@classmethod
	def from_file(cls):
		try:
  			data = pickle.load( open(cls.datafile, "rb" ))
		except:
			data = WatchData()

		return data

	def store(self, annotation = False):
		if self.history_size > 0:
			if not self.history: self.history = []
			self.history.append([int(time.time()), len(self.group.instances), int(round(self.total_load))])
			self.history = self.history[-self.history_size:]

		pickle.dump(self, open(self.datafile, "wb" ))

		if annotation:
			import utils
			text = json.dumps(self.__getstate__(), skipkeys=True)
			utils.store_annotation("ec2_watch", text)

	def check_too_low(self):
		for instance, load in self.loads.iteritems():
			if load is not None and self.measures[instance] > 1 and self.instances > 1 and load < self.avg_load * 0.2 and load < 4:
				self.emergency = True
				self.check_avg_low() # Check if the desired instanes can be decreased
				self.action = "EMERGENCY LOW (%s %5.2f%%) " % (instance, load)
				self.kill_instance(instance)
				return True
		return self.emergency

	def check_too_high(self):
		for instance, load in self.loads.iteritems():
			if load is not None and self.measures[instance] > 1 and load > self.high_urgent:
				self.emergency = True
				self.action = "EMERGENCY HIGH (%s %5.2f%%) " % (instance, load)
				if self.instances > 1 and load > self.avg_load * 1.5:
					self.action += " killing bad instance"
					self.kill_instance(instance)
				else:
					self.action += " increasing instances to %d" % (self.instances+1,)
					self.set_desired(self.instances+1)
				return True

		return self.emergency

	def check_avg_high(self):
		threshold = self.high_limit
		if self.instances == 1:
			threshold = threshold * 0.9 # Increase faster if there is just one instance
		
		if self.avg_load > threshold:
			self.action = "WARN, high load: %d -> %d " % (self.instances, self.instances + 1)
			self.set_desired(self.instances + 1)
			return True

	def check_avg_low(self):
		if self.instances <= self.group.min_size:
			return False
		
		if self.total_load/(self.instances-1) < self.low_limit:
			self.action = "low load: %d -> %d " % (self.instances, self.instances - 1)
			self.set_desired(self.instances - 1)

	def kill_instance(self, id):
		if self.action:
			print self.action
		print "Kill instance", id
		syslog.syslog(syslog.LOG_INFO, "ec2_watch kill_instance: %s instances: %d (%s)" % (id, self.instances, self.action))
		if self.dry:
			return
		self.ec2.terminate_instances(instance_ids=[id])
		self.action_ts = time.time()

	def set_desired(self, desired):
		if self.action:
			print self.action
		print "Setting instances from %d to %d" % (self.instances, desired)
		syslog.syslog(syslog.LOG_INFO, "ec2_watch set_desired: %d -> %d (%s)" % (self.instances, desired, self.action))
		if self.dry:
			return
		if desired >= self.group.min_size:
			self.group.set_capacity(desired)
		self.action_ts = time.time()
		self.new_desired = desired
Exemplo n.º 16
0
class MSBManager:
    def __init__(self, aws_access_key, aws_secret_key):
        self.ec2_conn = EC2Connection(aws_access_key, aws_secret_key)
        self.elb_conn = ELBConnection(aws_access_key, aws_secret_key)
        self.auto_scale_conn = AutoScaleConnection(aws_access_key, aws_secret_key)
        self.cloud_watch_conn = CloudWatchConnection(aws_access_key, aws_secret_key)
        self.default_cooldown = 60

    def get_security_group(self, name):
        sgs = [g for g in self.ec2_conn.get_all_security_groups() if g.name == name]
        return sgs[0] if sgs else None

    def create_security_group(self, name, description):
        sgs = [g for g in self.ec2_conn.get_all_security_groups() if g.name == name]
        sg = sgs[0] if sgs else None
        if not sgs:
            sg = self.ec2_conn.create_security_group(name, description)

        try:
            sg.authorize(ip_protocol="-1", from_port=None, to_port=None, cidr_ip="0.0.0.0/0", dry_run=False)
        except EC2ResponseError:
            pass
        return sg

    def remove_security_group(self, name):
        self.ec2_conn.delete_security_group(name=name)

    def create_instance(self, image, instance_type, key_name, zone, security_groups, tags):
        instance = None
        reservations = self.ec2_conn.get_all_instances()
        for reservation in reservations:
            for i in reservation.instances:
                if "Name" in i.tags and i.tags["Name"] == tags["Name"] and i.state == "running":
                    instance = i
                    break

        if not instance:
            reservation = self.ec2_conn.run_instances(
                image,
                instance_type=instance_type,
                key_name=key_name,
                placement=zone,
                security_groups=security_groups,
                monitoring_enabled=True,
            )
            instance = reservation.instances[0]
            while not instance.update() == "running":
                time.sleep(5)
            time.sleep(10)
            self.ec2_conn.create_tags([instance.id], tags)

        return instance

    def request_spot_instance(self, bid, image, instance_type, key_name, zone, security_groups, tags):
        req = self.ec2_conn.request_spot_instances(
            price=bid,
            instance_type=instance_type,
            image_id=image,
            placement=zone,
            key_name=key_name,
            security_groups=security_groups,
        )
        instance_id = None

        while not instance_id:
            job_sir_id = req[0].id
            requests = self.ec2_conn.get_all_spot_instance_requests()
            for sir in requests:
                if sir.id == job_sir_id:
                    instance_id = sir.instance_id
                    break
            print "Job {} not ready".format(job_sir_id)
            time.sleep(60)

        self.ec2_conn.create_tags([instance_id], tags)

    def remove_instance(self, instance_id):
        self.remove_instances([instance_id])

    def remove_instances(self, instance_ids):
        self.ec2_conn.terminate_instances(instance_ids)

    def remove_instance_by_tag_name(self, name):
        reservations = self.ec2_conn.get_all_instances()
        data_centers_intance_ids = []
        for reservation in reservations:
            for instance in reservation.instances:
                if "Name" in instance.tags and instance.tags["Name"] == name and instance.state == "running":
                    data_centers_intance_ids.append(instance.id)
        if data_centers_intance_ids:
            self.remove_instances(data_centers_intance_ids)

    def create_elb(self, name, zone, project_tag_value, security_group_id, instance_ids=None):
        lbs = [l for l in self.elb_conn.get_all_load_balancers() if l.name == name]
        lb = lbs[0] if lbs else None
        if not lb:
            hc = HealthCheck(
                timeout=50, interval=60, healthy_threshold=2, unhealthy_threshold=8, target="HTTP:80/heartbeat"
            )
            ports = [(80, 80, "http")]
            zones = [zone]
            lb = self.elb_conn.create_load_balancer(name, zones, ports)

            self.elb_conn.apply_security_groups_to_lb(name, [security_group_id])
            lb.configure_health_check(hc)
            if instance_ids:
                lb.register_instances(instance_ids)

            params = {
                "LoadBalancerNames.member.1": lb.name,
                "Tags.member.1.Key": "15619project",
                "Tags.member.1.Value": project_tag_value,
            }
            lb.connection.get_status("AddTags", params, verb="POST")
        return lb

    def remove_elb(self, name):
        self.elb_conn.delete_load_balancer(name)

    def create_launch_configuration(self, name, image, key_name, security_groups, instance_type):
        lcs = [l for l in self.auto_scale_conn.get_all_launch_configurations() if l.name == name]
        lc = lcs[0] if lcs else None
        if not lc:
            lc = LaunchConfiguration(
                name=name,
                image_id=image,
                key_name=key_name,
                security_groups=[security_groups],
                instance_type=instance_type,
            )
            self.auto_scale_conn.create_launch_configuration(lc)
        return lc

    def remove_launch_configuration(self, name):
        self.auto_scale_conn.delete_launch_configuration(name)

    def create_autoscaling_group(self, name, lb_name, zone, tags, instance_ids=None):
        lc = self.create_launch_configuration()
        as_groups = [a for a in self.auto_scale_conn.get_all_groups() if a.name == name]
        as_group = as_groups[0] if as_groups else None
        if not as_group:
            as_group = AutoScalingGroup(
                group_name=name,
                load_balancers=[lb_name],
                availability_zones=[zone],
                launch_config=lc,
                min_size=4,
                max_size=4,
                health_check_type="ELB",
                health_check_period=120,
                connection=self.auto_scale_conn,
                default_cooldown=self.default_cooldown,
                desired_capacity=4,
                tags=tags,
            )

            self.auto_scale_conn.create_auto_scaling_group(as_group)
            if instance_ids:
                self.auto_scale_conn.attach_instances(name, instance_ids)

            scale_up_policy = ScalingPolicy(
                name="scale_up",
                adjustment_type="ChangeInCapacity",
                as_name=name,
                scaling_adjustment=1,
                cooldown=self.default_cooldown,
            )
            scale_down_policy = ScalingPolicy(
                name="scale_down",
                adjustment_type="ChangeInCapacity",
                as_name=name,
                scaling_adjustment=-1,
                cooldown=self.default_cooldown,
            )

            self.auto_scale_conn.create_scaling_policy(scale_up_policy)
            self.auto_scale_conn.create_scaling_policy(scale_down_policy)

            scale_up_policy = self.auto_scale_conn.get_all_policies(as_group=name, policy_names=["scale_up"])[0]
            scale_down_policy = self.auto_scale_conn.get_all_policies(as_group=name, policy_names=["scale_down"])[0]

            alarm_dimensions = {"AutoScalingGroupName": name}
            scale_up_alarm = MetricAlarm(
                name="scale_up_on_cpu",
                namespace="AWS/EC2",
                metric="CPUUtilization",
                statistic="Average",
                comparison=">",
                threshold=85,
                period=60,
                evaluation_periods=1,
                alarm_actions=[scale_up_policy.policy_arn],
                dimensions=alarm_dimensions,
            )
            self.cloud_watch_conn.create_alarm(scale_up_alarm)
            scale_down_alarm = MetricAlarm(
                name="scale_down_on_cpu",
                namespace="AWS/EC2",
                metric="CPUUtilization",
                statistic="Average",
                comparison="<",
                threshold=60,
                period=60,
                evaluation_periods=1,
                alarm_actions=[scale_down_policy.policy_arn],
                dimensions=alarm_dimensions,
            )
            self.cloud_watch_conn.create_alarm(scale_down_alarm)

        return as_group

    def update_autoscaling_group_max_size(self, as_group, max_size):
        setattr(as_group, "max_size", max_size)
        as_group.update()

    def update_autoscaling_group_min_size(self, as_group, min_size):
        setattr(as_group, "min_size", min_size)
        as_group.update()

    def remove_autoscaling_group(self, name):
        self.auto_scale_conn.delete_auto_scaling_group(name)
Exemplo n.º 17
0
class BotoScaleInterface(ScaleInterface):
    conn = None
    saveclcdata = False

    def __init__(self, clc_host, access_id, secret_key, token):
        #boto.set_stream_logger('foo')
        path='/services/AutoScaling'
        port=8773
        if clc_host[len(clc_host)-13:] == 'amazonaws.com':
            clc_host = clc_host.replace('ec2', 'autoscaling', 1)
            path = '/'
            reg = None
            port=443
        reg = RegionInfo(name='eucalyptus', endpoint=clc_host)
        self.conn = AutoScaleConnection(access_id, secret_key, region=reg,
                                  port=port, path=path,
                                  is_secure=True, security_token=token, debug=0)
        self.conn.https_validate_certificates = False
        self.conn.http_connection_kwargs['timeout'] = 30

    def __save_json__(self, obj, name):
        f = open(name, 'w')
        json.dump(obj, f, cls=BotoJsonScaleEncoder, indent=2)
        f.close()

    ##
    # autoscaling methods
    ##
    def create_auto_scaling_group(self, as_group):
        return self.conn.create_auto_scaling_group(as_group)

    def delete_auto_scaling_group(self, name, force_delete=False):
        return self.conn.delete_auto_scaling_group(name, force_delete)

    def get_all_groups(self, names=None, max_records=None, next_token=None):
        obj = self.conn.get_all_groups(names, max_records, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_Groups.json")
        return obj

    def get_all_autoscaling_instances(self, instance_ids=None, max_records=None, next_token=None):
        obj = self.conn.get_all_autoscaling_instances(instance_ids, max_records, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_Instances.json")
        return obj

    def set_desired_capacity(self, group_name, desired_capacity, honor_cooldown=False):
        group = self.conn.get_all_groups([group_name])[0];
        # notice, honor_cooldown not supported.
        return group.set_capacity(desired_capacity)

    def set_instance_health(self, instance_id, health_status, should_respect_grace_period=True):
        return self.conn.set_instance_health(instance_id, health_status,
                                             should_respect_grace_period)

    def terminate_instance(self, instance_id, decrement_capacity=True):
        return self.conn.terminate_instance(instance_id, decrement_capacity)

    def update_autoscaling_group(self, as_group):
        as_group.connection = self.conn
        return as_group.update()

    def create_launch_configuration(self, launch_config):
        return self.conn.create_launch_configuration(launch_config)

    def delete_launch_configuration(self, launch_config_name):
        return self.conn.delete_launch_configuration(launch_config_name)

    def get_all_launch_configurations(self, config_names, max_records, next_token):
        obj = self.conn.get_all_launch_configurations(names=config_names, max_records=max_records, next_token=next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_LaunchConfigs.json")
        return obj
Exemplo n.º 18
0
class aws:
    def __init__(self,PREFIX='tfound-',ENV='dev',AMI='',TYPE='',SIZE='',
                 DOMAIN='tfound',SSHKEY='myprivatekey',AWSKEY='',AWSSECRET='',AVAIL_ZONES=["us-east-1a","us-east-1b","us-east-1c","us-east-1d"]):
        '''
        Shows examples
        Create load balancer group 'tfound-dev-web-lb' for web servers, in dev group for tfound:
            python control-lb-and-groups.py --createlb --env dev --aws SC --type web
        Add an instance to the load balancer group:
            python control-lb-and-groups.py --addtolb=true --env dev --aws SC --type web --instance=i-999999
        Create launch config using ami ami-fa6b8393 (default), medium sized instance, and Autoscale Group 'tfound-dev-web-group' with a min of 2 instances, max 5, with health check on port 80:
            python control-lb-and-groups.py  --createlc --ami ami-fa6b8393 --size c1.medium --env dev --aws SC --type web --createag --min 2 --max 5
        Triggers/Health checks are hard coded to spawn new instances when total cpu reaches 60 percent or health check fails.
        '''
        self.PREFIX=PREFIX+DOMAIN+'-'+ENV+'-'+TYPE
        self.ENV=ENV
        self.AMI=AMI
        self.TYPE=TYPE
        self.DOMAIN=DOMAIN
        self.SIZE=SIZE
        self.MIN=MIN
        self.MAX=MAX
        self.SSHKEY=SSHKEY
        self.AWSKEY=AWSKEY
        self.AWSSECRET=AWSSECRET
        self.AVAIL_ZONES=AVAIL_ZONES
        self.LBNAME=self.PREFIX+'-lb'
        self.AGNAME=self.PREFIX+'-group'
        self.TRNAME=self.PREFIX+'-trigger'
        self.LCNAME=self.PREFIX+'-launch_config'
        self.asconn=AutoScaleConnection(self.AWSKEY, self.AWSSECRET)
        self.elbconn = ELBConnection(aws_access_key_id=AWSKEY,aws_secret_access_key=AWSSECRET)
        self.lc = self._buildLaunchConfig()
        self.ag = self._buildAutoscaleGroup()
        
    
    def _buildLaunchConfig(self):
        return LaunchConfiguration(name=self.LCNAME, 
                                   image_id=self.AMI,
                                   key_name=self.SSHKEY,
                                   security_groups=[self.ENV+'.'+self.TYPE],
                                   user_data='LAUNCHTAGS="'+self.ENV+' '
                                   +self.TYPE+' '+self.DOMAIN+'";',
                                   instance_type=self.SIZE)

    def _buildAutoscaleGroup(self):
        return AutoScalingGroup(group_name=self.AGNAME,
                              load_balancers=[self.LBNAME],
                              availability_zones=self.AVAIL_ZONES,
                              launch_config=self.lc, 
                              min_size=self.MIN,
                              max_size=self.MAX)

    def getGroups(self):
        '''get existing lb groups'''
        # conn = AutoScaleConnection(AWSKEY, AWSSECRET)
        #conn = AutoScaleConnection()
        return self.asconn.get_all_groups()

    def getActivities(self,AUTOSCALE_GROUP=None):
        return self.asconn.get_all_activities(AUTOSCALE_GROUP)
                        
    def createLaunchConfig(self):
        '''create Launch Configuration to define initial startup params
        '''
        #conn = AutoScaleConnection(AWSKEY, AWSSECRET)
        #lc = self.buildLaunchConfig()
        return self.asconn.create_launch_configuration(self.lc)
    

        
    def createAutoscaleGroup(self):
        '''We now have created a launch configuration called tfound...launch-config.
        We are now ready to associate it with our new autoscale group.
        returns autoscale object
        '''
        #conn = AutoScaleConnection(AWSKEY, AWSSECRET) 
        #lc = self.buildLaunchConfig()
        
        return self.asconn.create_auto_scaling_group(self.ag)
        #conn.get_all_activities(ag)
        
    def createTrigger(self,AUTOSCALE_GROUP=None):
        ''' 
        you create a trigger on a group, pass in a group object
        this creates a trigger that scales up to MAX instances if average cpu utilitzation goes over 60, 
        scales down to MIN instances if under 40 avg cpu
        '''
        #conn = AutoScaleConnection(AWSKEY, AWSSECRET)
        tr = Trigger(name=self.TRNAME, 
                     autoscale_group=AUTOSCALE_GROUP,
                     measure_name='CPUUtilization', statistic='Average',
                     unit='Percent',
                     dimensions=[('AutoScalingGroupName', AUTOSCALE_GROUP.name),
                                 ('Namespace','AWS/EC2')],
                                 period=120, lower_threshold=10,
                                 lower_breach_scale_increment='-1',
                                 upper_threshold=30,
                                 upper_breach_scale_increment='1',
                                 breach_duration=360)
        return self.asconn.create_trigger(tr)
    
    def createHealthCheck(self):
        return HealthCheck('instance_health', 
                           interval=20, 
                           target='TCP:8080', 
                           timeout='5')
    
    def createLoadbalancer(self):
        #elbconn = ELBConnection(aws_access_key_id=AWSKEY,aws_secret_access_key=AWSSECRET)
        #conn = ELBConnection(aws_access_key_id=AWSKEY,aws_secret_access_key=AWSSECRET,
        #  host='us-east-1a.elasticloadbalancing.amazonaws.com')
        #  hc = HealthCheck('instance_health', interval=20, target='HTTP:80/index.php', timeout='5')
        #  lb = conn.create_load_balancer('tfound-'+options.ENV+'-'+options.TYPE+'-lb', [options.ZONE],
        #                                   [(80, 80, 'http'),(8000,8000, 'tcp')])
        ##  lb.delete()
        hc = self.createHealthCheck()
        lb = self.elbconn.create_load_balancer(self.LBNAME, 
                                               [self.ZONE],
                                               [(8080,8080, 'tcp')])

        lb.configure_health_check(hc)
        return lb.dns_name
    
    def addToLoadbalancer(self,INSTANCE=None):
        #from boto.ec2.elb import ELBConnection
        #from boto.ec2.elb import HealthCheck
        #conn = ELBConnection(aws_access_key_id=AWSKEY,aws_secret_access_key=AWSSECRET)
        #  conn = ELBConnection(aws_access_key_id=AWSKEY,aws_secret_access_key=AWSSECRET,
        #  host='us-east-1a.elasticloadbalancing.amazonaws.com')
        if INSTANCE == None:
            sys.stderr.write("Please provide instance id to add. not acceptble: %s\n" % options.INSTANCE )
            raise SystemExit(1)
        lb = self.elbconn.register_instances(self.LBNAME, 
                                             INSTANCE)
        #lbgroup = 'tfound-'+options.ENV+'-'+options.TYPE+'-lb'
        print "Added instance %s to %s\n" % (INSTANCE, self.LBNAME)

    def getLoadbalancers(self):
        return self.elbconn.get_all_load_balancers()

    def startInstances(self,TYPE='',NUM='',SIZE=''):
        return
conn_cw = boto.ec2.cloudwatch.connect_to_region(regionName)
conn_cw = boto.ec2.cloudwatch.connect_to_region(
        regionName,
        aws_access_key_id = AWS_ACCESS_KEY,
        aws_secret_access_key = AWS_SECRET_KEY
    )
######################### end configuration ################################




# balancers = elb.get_all_load_balancers()
# print balancers[0]

# retrieve the instances in the autoscale group
group = conn_as.get_all_groups(names=[autoscaling_group['name']])[0]
instanceids = [i.instance_id for i in group.instances]
instances = conn_ec2.get_only_instances(instanceids)
print instances



# # shutdown all the instances in the autogroup instances
ag = conn_as.get_all_groups()[0]
print "shutdown the instances in the autoscaling group"
ag.shutdown_instances()
sleep(20)

# # delete the autoscale group
print "delete the autoscaling group"
ag.delete()
Exemplo n.º 20
0
class WatchData:
    datafile = "/var/tmp/watchdata.p"
    dry = False
    low_limit = 72
    high_limit = 90
    high_urgent = 95
    stats_period = 60
    history_size = 0

    def __init__(self):
        self.name = ''
        self.instances = 0
        self.new_desired = 0
        self.desired = 0
        self.instances_info = None
        self.previous_instances = 0
        self.action = ""
        self.action_ts = 0
        self.changed_ts = 0
        self.total_load = 0
        self.avg_load = 0
        self.max_load = 0
        self.up_ts = 0
        self.down_ts = 0
        self.max_loaded = None
        self.loads = {}
        self.measures = {}
        self.emergency = False
        self.history = None
        self.trend = 0
        self.exponential_average = 0
        self.ts = 0

    def __getstate__(self):
        """ Don't store these objets """
        d = self.__dict__.copy()
        del d['ec2']
        del d['cw']
        del d['autoscale']
        del d['group']
        del d['instances_info']
        return d

    def connect(self, groupname):
        self.ec2 = boto.connect_ec2()
        self.cw = CloudWatchConnection()
        self.autoscale = AutoScaleConnection()
        self.group = self.autoscale.get_all_groups(names=[groupname])[0]
        self.instances = len(self.group.instances)
        self.desired = self.group.desired_capacity
        self.name = groupname
        self.ts = int(time.time())

    def get_instances_info(self):
        ids = [i.instance_id for i in self.group.instances]
        self.instances_info = self.ec2.get_only_instances(instance_ids=ids)

    def get_CPU_loads(self):
        """ Read instances load and store in data """
        measures = 0
        for instance in self.group.instances:
            load = self.get_instance_CPU_load(instance.instance_id)
            if load is None:
                continue
            measures += 1
            self.total_load += load
            self.loads[instance.instance_id] = load
            if load > self.max_load:
                self.max_load = load
                self.max_loaded = instance.instance_id

        if measures > 0:
            self.avg_load = self.total_load / measures

    def get_instance_CPU_load(self, instance):
        end = datetime.datetime.now()
        start = end - datetime.timedelta(seconds=int(self.stats_period * 3))

        m = self.cw.get_metric_statistics(self.stats_period, start, end,
                                          "CPUUtilization", "AWS/EC2",
                                          ["Average"],
                                          {"InstanceId": instance})
        if len(m) > 0:
            measures = self.measures[instance] = len(m)
            ordered = sorted(m, key=lambda x: x['Timestamp'])
            averages = [x['Average'] for x in ordered]
            average = reduce(lambda x, y: 0.4 * x + 0.6 * y, averages[-2:])
            return average

        return None

    @classmethod
    def from_file(cls):
        try:
            data = pickle.load(open(cls.datafile, "rb"))
        except:
            data = WatchData()

        return data

    def store(self, annotation=False):
        if self.history_size > 0:
            if not self.history: self.history = []
            self.history.append([
                int(time.time()),
                len(self.group.instances),
                int(round(self.total_load)),
                int(round(self.avg_load))
            ])
            self.history = self.history[-self.history_size:]

        pickle.dump(self, open(self.datafile, "wb"))

        if annotation:
            import utils
            text = json.dumps(self.__getstate__(), skipkeys=True)
            utils.store_annotation("ec2_watch", text)

    def check_too_low(self):
        for instance, load in self.loads.iteritems():
            if load is not None and self.measures[
                    instance] > 1 and self.instances > 1 and load < self.avg_load * 0.2 and load < 4:
                self.emergency = True
                self.check_avg_low(
                )  # Check if the desired instanes can be decreased
                self.action = "EMERGENCY LOW (%s %5.2f%%) " % (instance, load)
                self.kill_instance(instance)
                return True
        return self.emergency

    def check_too_high(self):
        for instance, load in self.loads.iteritems():
            if load is not None and self.measures[
                    instance] > 1 and load > self.high_urgent:
                self.emergency = True
                self.action = "EMERGENCY HIGH (%s %5.2f%%) " % (instance, load)
                if self.instances > 1 and load > self.avg_load * 1.5:
                    self.action += " killing bad instance"
                    self.kill_instance(instance)
                else:
                    self.action += " increasing instances to %d" % (
                        self.instances + 1, )
                    self.set_desired(self.instances + 1)
                return True

        return self.emergency

    def check_avg_high(self):
        threshold = self.high_limit
        if self.instances == 1:
            threshold = threshold * 0.9  # Increase faster if there is just one instance

        if self.avg_load > threshold:
            self.action = "WARN, high load: %d -> %d " % (self.instances,
                                                          self.instances + 1)
            self.set_desired(self.instances + 1)
            return True

    def check_avg_low(self):
        if self.instances <= self.group.min_size:
            return False

        if self.total_load / (self.instances - 1) < self.low_limit:
            self.action = "low load: %d -> %d " % (self.instances,
                                                   self.instances - 1)
            self.set_desired(self.instances - 1)

    def kill_instance(self, id):
        if self.action:
            print(self.action)
        print("Kill instance", id)
        syslog.syslog(
            syslog.LOG_INFO, "ec2_watch kill_instance: %s instances: %d (%s)" %
            (id, self.instances, self.action))
        if self.dry:
            return
        self.ec2.terminate_instances(instance_ids=[id])
        self.action_ts = time.time()

    def set_desired(self, desired):
        if self.action:
            print(self.action)
        print("Setting instances from %d to %d" % (self.instances, desired))
        syslog.syslog(
            syslog.LOG_INFO, "ec2_watch set_desired: %d -> %d (%s)" %
            (self.instances, desired, self.action))
        if self.dry:
            return
        if desired >= self.group.min_size:
            self.group.set_capacity(desired)
        self.action_ts = time.time()
        self.new_desired = desired
Exemplo n.º 21
0
image = conn.get_image(ami_id)
print "Image is now " + image.state

# Connect autoscaling service
print "\nStep 2 : Creating scaling configuration"
aws_region_as = RegionInfo(name=region_name, endpoint=region_autoscale_endpoint)
conn_as = AutoScaleConnection(aws_access_key_id, aws_secret_access_key,region=aws_region_as)

# Create autoscaling configuration
lc = LaunchConfiguration(name=build_name, image_id=ami_id, key_name=key_name, security_groups=security_group, instance_type=instance_type)
conn_as.create_launch_configuration(lc)
print "Autoscaling configuration ready : " + build_name

# Upgrading autoscaling group
print "\nStep 3 : Updating scaling group"
print "Updating Scaling group with new conf & terminating all the existing instances in the scaling group"
as_group = conn_as.get_all_groups(names=[scaling_group_name])[0]
setattr(as_group,'launch_config_name',build_name)
setattr(as_group,'desired_capacity',0)
setattr(as_group,'min_size',0)
as_group.update()

# Launching new systems
print "Waiting for 60 secs before launching new systems"
time.sleep(60)
setattr(as_group,'desired_capacity',as_desired_capacity)
setattr(as_group,'min_size',as_min_size)
as_group.update()
print "Systems are being launched, Updation process complete.\n\nLife is that easy , Have fun scaling :) :)"
Exemplo n.º 22
0
class BotoScaleInterface(ScaleInterface):
    conn = None
    saveclcdata = False

    def __init__(self, clc_host, access_id, secret_key, token):
        #boto.set_stream_logger('foo')
        path = '/services/AutoScaling'
        port = 8773
        if clc_host[len(clc_host) - 13:] == 'amazonaws.com':
            clc_host = clc_host.replace('ec2', 'autoscaling', 1)
            path = '/'
            reg = None
            port = 443
        reg = RegionInfo(name='eucalyptus', endpoint=clc_host)
        if boto.__version__ < '2.6':
            self.conn = AutoScaleConnection(access_id,
                                            secret_key,
                                            region=reg,
                                            port=port,
                                            path=path,
                                            is_secure=True,
                                            security_token=token,
                                            debug=0)
        else:
            self.conn = AutoScaleConnection(access_id,
                                            secret_key,
                                            region=reg,
                                            port=port,
                                            path=path,
                                            validate_certs=False,
                                            is_secure=True,
                                            security_token=token,
                                            debug=0)
        self.conn.http_connection_kwargs['timeout'] = 30

    def __save_json__(self, obj, name):
        f = open(name, 'w')
        json.dump(obj, f, cls=BotoJsonScaleEncoder, indent=2)
        f.close()

    ##
    # autoscaling methods
    ##
    def create_auto_scaling_group(self, as_group):
        return self.conn.create_auto_scaling_group(as_group)

    def delete_auto_scaling_group(self, name, force_delete=False):
        return self.conn.delete_auto_scaling_group(name, force_delete)

    def get_all_groups(self, names=None, max_records=None, next_token=None):
        return []
        obj = self.conn.get_all_groups(names, max_records, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_Groups.json")
        return obj

    def get_all_autoscaling_instances(self,
                                      instance_ids=None,
                                      max_records=None,
                                      next_token=None):
        return []
        obj = self.conn.get_all_autoscaling_instances(instance_ids,
                                                      max_records, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_Instances.json")
        return obj

    def set_desired_capacity(self,
                             group_name,
                             desired_capacity,
                             honor_cooldown=False):
        return self.conn.set_desired_capacity(group_name, desired_capacity,
                                              honor_cooldown)

    def set_instance_health(self,
                            instance_id,
                            health_status,
                            should_respect_grace_period=True):
        return self.conn.set_instance_health(instance_id, health_status,
                                             should_respect_grace_period)

    def terminate_instance(self, instance_id, decrement_capacity=True):
        return self.conn.terminate_instance(instance_id, decrement_capacity)

    def update_autoscaling_group(self, as_group):
        as_group.connection = self.conn
        return as_group.update()

    def create_launch_configuration(self, launch_config):
        return self.conn.create_launch_configuration(launch_config)

    def delete_launch_configuration(self, launch_config_name):
        return self.conn.delete_launch_configuration(launch_config_name)

    def get_all_launch_configurations(self, config_names, max_records,
                                      next_token):
        obj = self.conn.get_all_launch_configurations(names=config_names,
                                                      max_records=max_records,
                                                      next_token=next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_LaunchConfigs.json")
        return obj
class BotoScaleInterface(ScaleInterface):
    conn = None
    saveclcdata = False

    def __init__(self, clc_host, access_id, secret_key, token):
        self.access_id = access_id
        self.secret_key = secret_key
        self.token = token
        self.set_endpoint(clc_host)

    def set_endpoint(self, endpoint):
        #boto.set_stream_logger('scale')
        path = '/services/AutoScaling'
        reg = RegionInfo(name='eucalyptus', endpoint=endpoint)
        port = 8773
        if endpoint[len(endpoint)-13:] == 'amazonaws.com':
            endpoint = endpoint.replace('ec2', 'autoscaling', 1)
            path = '/'
            reg = RegionInfo(endpoint=endpoint)
            port = 443
        self.conn = AutoScaleConnection(self.access_id, self.secret_key, region=reg,
                                  port=port, path=path,
                                  is_secure=True, security_token=self.token, debug=0)
        self.conn.APIVersion = '2011-01-01'
        if not(endpoint[len(endpoint)-13:] == 'amazonaws.com'):
            self.conn.auth_region_name = 'Eucalyptus'
        self.conn.https_validate_certificates = False
        self.conn.http_connection_kwargs['timeout'] = 30

    def __save_json__(self, obj, name):
        f = open(name, 'w')
        json.dump(obj, f, cls=BotoJsonScaleEncoder, indent=2)
        f.close()

    ##
    # autoscaling methods
    ##
    def create_auto_scaling_group(self, as_group):
        return self.conn.create_auto_scaling_group(as_group)

    def delete_auto_scaling_group(self, name, force_delete=False):
        return self.conn.delete_auto_scaling_group(name, force_delete)

    def get_all_groups(self, names=None, max_records=None, next_token=None):
        obj = self.conn.get_all_groups(names, max_records, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_Groups.json")
        return obj

    def get_all_autoscaling_instances(self, instance_ids=None, max_records=None, next_token=None):
        obj = self.conn.get_all_autoscaling_instances(instance_ids, max_records, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_Instances.json")
        return obj

    def set_desired_capacity(self, group_name, desired_capacity, honor_cooldown=False):
        group = self.conn.get_all_groups([group_name])[0];
        # notice, honor_cooldown not supported.
        return group.set_capacity(desired_capacity)

    def set_instance_health(self, instance_id, health_status, should_respect_grace_period=True):
        return self.conn.set_instance_health(instance_id, health_status,
                                             should_respect_grace_period)

    def terminate_instance(self, instance_id, decrement_capacity=True):
        return self.conn.terminate_instance(instance_id, decrement_capacity)

    def update_autoscaling_group(self, as_group):
        as_group.connection = self.conn
        return as_group.update()

    def create_launch_configuration(self, launch_config):
        return self.conn.create_launch_configuration(launch_config)

    def delete_launch_configuration(self, launch_config_name):
        return self.conn.delete_launch_configuration(launch_config_name)

    def get_all_launch_configurations(self, config_names=None, max_records=None, next_token=None):
        obj = self.conn.get_all_launch_configurations(names=config_names, max_records=max_records,
                                                      next_token=next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_LaunchConfigs.json")
        return obj

    # policy related
    def delete_policy(self, policy_name, autoscale_group=None):
        return self.conn.delete_policy(policy_name, autoscale_group)

    def get_all_policies(self, as_group=None, policy_names=None, max_records=None, next_token=None):
        obj = self.conn.get_all_policies(as_group, policy_names, max_records, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_Policies.json")
        return obj

    def execute_policy(self, policy_name, as_group=None, honor_cooldown=None):
        return self.conn.execute_policy(policy_name, as_group, honor_cooldown)

    def create_scaling_policy(self, scaling_policy):
        return self.conn.create_scaling_policy(scaling_policy)

    def get_all_adjustment_types(self):
        return self.conn.get_all_adjustment_types()

    # tag related
    def delete_tags(self, tags):
        return self.conn.delete_tags(tags)

    def get_all_tags(self, filters=None, max_records=None, next_token=None):
        obj = self.conn.get_all_tags(filters, max_records, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_Tags.json")
        return obj

    def create_or_update_tags(self, tags):
        return self.conn.create_or_update_tags(tags)
Exemplo n.º 24
0
def main():
    parser = argparse.ArgumentParser(
      description = "triggers a full LSDA rollout")
    
    parser.add_argument("--inspect", action = "store_true",
      help = "pause before baking AMI", default = False)
    parser.add_argument("--clean", action = "store_true",
      help = "reset from clean Ubuntu 12.04 image", default = False)
    parser.add_argument("--no-restart", action = "store_true",
      dest = "no_restart", help = "don't restart all nodes in ASG",
      default = False)
    
    options = parser.parse_args()
    
    logging.info("Starting rollout.")
    
    conn_ec2 = boto.ec2.connect_to_region("us-east-1")
    conn_ec2_as = AutoScaleConnection()
    
    if not options.clean:
        logging.info("Searching for existing images...")
        
        group = conn_ec2_as.get_all_groups(['LSDA Worker Pool'])[0]
        launch_config = conn_ec2_as.get_all_launch_configurations(
          names=[group.launch_config_name])[0]
        
        existing_images = conn_ec2.get_all_images(owners = ["self"])[0]
        
        ami_id = launch_config.image_id
        logging.info("Using existing image {0}".format(ami_id))
    
    else:
        ami_id = 'ami-59a4a230' # Clean Ubuntu 12.04.
        logging.info("Using base image {0}".format(ami_id))
    
    reservation = conn_ec2.run_instances(
        image_id = ami_id,
        key_name = 'jeremy-aws-key',
        instance_type = 't1.micro',
        security_groups = ['Worker Nodes'],
    )
    
    try:
        instance = reservation.instances[0]
        logging.info("Waiting for instance {} to start...".format(instance.id))
        
        instance.update()
        while instance.ip_address is None:
            logging.info("Not ready. Retrying in 10 seconds...")
            time.sleep(10)
            instance.update()
        
        while True:
            result = subprocess.call(["ssh", "-o",
              "UserKnownHostsFile=/dev/null", "-o", "StrictHostKeyChecking=no",
              "ubuntu@{}".format(instance.ip_address), "uname -r"])
            if result != 0:
                logging.info("Not ready for SSH. Retrying in 10 seconds...")
                time.sleep(10)
            else:
                break
        
        logging.info("Instance has started; running setup script.")
        logging.info("(IP address is {})".format(instance.ip_address))
        
        subprocess.check_call(["ssh", "-o", "UserKnownHostsFile=/dev/null",
          "-o", "StrictHostKeyChecking=no",
          "ubuntu@{}".format(instance.ip_address),
          "sudo stop lsda; sleep 20; sudo rm worker.sh;"
          "wget https://raw.github.com/fatlotus/lsda-infrastructure/"
          "master/servers/worker.sh; sudo bash worker.sh"])
        
        if options.inspect:
            logging.info("Connect to ubuntu@{} to inspect the image."
              .format(instance.ip_address))
            logging.info("When you're done, press CTRL-C.")
            
            try:
                while True:
                    time.sleep(3600)
            except KeyboardInterrupt:
                pass
        
        logging.info("Creating AMI from existing image.")
        new_image = instance.create_image(
            name = ('Latest-{:%Y-%m-%d--%H-%M-%S}'.
              format(datetime.datetime.now())),
            description = "(automatically generated)"
        )
        
        time.sleep(10)
        
        image_object = conn_ec2.get_image(new_image)
        
        while image_object.state == "pending":
            logging.info("State is still pending. Retrying in 10 seconds.")
            time.sleep(10)
            image_object.update()
        
    finally:
        logging.warn("Stopping all nodes.")
        for node in reservation.instances:
            node.terminate()
    
    logging.info("Creating new LaunchConfiguration.")
    
    mapping = BlockDeviceMapping()
    mapping["/dev/sdb"] = BlockDeviceType(ephemeral_name = "ephemeral0")
    mapping["/dev/sdc"] = BlockDeviceType(ephemeral_name = "ephemeral1")
    
    new_launch_config = LaunchConfiguration(
        conn_ec2_as,
        name = ('Latest-{:%Y-%m-%d--%H-%M-%S}'.
          format(datetime.datetime.now())),
        image_id = new_image,
        security_groups = ['sg-f9a08492'],
        instance_type = 'c3.large',
        block_device_mappings = [mapping],
        instance_profile_name = ("arn:aws:iam::470084502640:instance-profile"
          "/dal-access"),
        spot_price = 0.02,
    )
    conn_ec2_as.create_launch_configuration(new_launch_config)
    
    logging.info("Setting launch configuration in existing ASG.")
    group.launch_config_name = new_launch_config.name
    group.update()
    
    logging.info("Cleaning up old launch configurations.")
    for config in conn_ec2_as.get_all_launch_configurations():
        if config.image_id != new_launch_config.image_id:
            conn_ec2_as.delete_launch_configuration(config.name)
    
    logging.info("Cleaning up old images.")
    for image in conn_ec2.get_all_images(filters={"name":["LatestImage"]}):
        if image.id != new_image:
            conn_ec2.deregister_image(image.id, True)
    
    logging.info("Rollout complete. New image is {}.".format(new_image))
    
    if not options.no_restart:
        logging.info("Triggering reload of all nodes in ASG.")
        for instance in group.instances:
            for reservation in conn_ec2.get_all_instances(instance.instance_id):
                reservation.stop_all()
Exemplo n.º 25
0
def add_ingress_rule(dry_run, go_agent_security_group, go_agent_security_group_owner, go_agent_security_group_name):
    """
    For each ASG (app) in each VPC, add a rule to each SG associated with the ASG's launch configuration
    that allows SSH ingress from the GoCD agents' SG.

    BEFORE RUNNING THIS SCRIPT!:
    - Use the assume_role bash script to assume the role in the proper account/VPC (edx, edge, mckinsey, etc.)
        - If you don't know what this is, ask someone in DevOps.
    - THEN run this script.
    """
    asg_conn = AutoScaleConnection()
    ec2_conn = boto.ec2.connect_to_region('us-east-1')
    asgs = []
    launch_configs = {}
    security_groups = {}

    logging.debug('All ASGs:')
    for group in asg_conn.get_all_groups():
        logging.debug('    {}'.format(group))
        asgs.append(group)

    logging.debug('All launch configurations:')
    for launch_config in asg_conn.get_all_launch_configurations():
        logging.debug('    {}'.format(launch_config))
        launch_configs[launch_config.name] = launch_config

    logging.debug('All security groups:')
    for sec_group in ec2_conn.get_all_security_groups():
        logging.debug('    {}'.format(sec_group))
        security_groups[sec_group.id] = sec_group

    # Validate that each ASG has a launch configuration.
    for group in asgs:
        try:
            logging.info("Launch configuration for ASG '{}' is '{}'.".format(
                group.name, launch_configs[group.launch_config_name]
            ))
        except KeyError:
            logging.error("Launch configuration '{}' for ASG '{}' was not found!".format(
                group.launch_config_name, group.name
            ))
            raise

    # Construct a fake security group for the prod-tools-goagent-sg security group in the edx-tools account.
    # This group will be used to grant the go-agents ingress into the ASG's VPCs.
    go_agent_security_group = boto.ec2.securitygroup.SecurityGroup(
        name=go_agent_security_group_name,
        owner_id=go_agent_security_group_owner,
        id=go_agent_security_group
    )

    # For each launch config, check for the security group. Can support multiple security groups
    # but the edX DevOps convention is to use a single security group.
    for group in asgs:
        launch_config = launch_configs[group.launch_config_name]
        if len(launch_config.security_groups) > 1:
            err_msg = "Launch config '{}' for ASG '{}' has more than one security group!: {}".format(
                launch_config.name, group.name, launch_config.security_groups
            )
            logging.warning(err_msg)
            continue
        sg_name = launch_config.security_groups[0]
        try:
            # Find the security group.
            sec_group = security_groups[sg_name]
        except KeyError:
            logging.error("Security group '{}' for ASG '{}' was not found!.".format(sg_name, group.name))
        logging.info('BEFORE: Rules for security group {}:'.format(sec_group.name))
        logging.info(sec_group.rules)
        try:
            # Add the ingress rule to the security group.
            yes_no = raw_input("Apply the change to this security group? [Yes]")
            if yes_no in ("", "y", "Y", "yes"):
                sec_group.authorize(
                    ip_protocol='tcp',
                    from_port=22,
                    to_port=22,
                    src_group=go_agent_security_group,
                    dry_run=dry_run
                )
        except boto.exception.EC2ResponseError as exc:
            if exc.status == 412:
                # If the dry_run flag is set, then each rule addition will raise this exception.
                # Log it and carry on.
                logging.info('Dry run is True but rule addition would have succeeded for security group {}.'.format(
                    sg_name
                ))
            elif exc.code == "InvalidPermission.Duplicate":
                logging.info("Rule already exists for {}.".format(sg_name))
            else:
                raise
        logging.info('AFTER: Rules for security group {}:'.format(sg_name))
        logging.info(sec_group.rules)
Exemplo n.º 26
0
class EbsHelper(object):
    """
    Class for helping with ebs
    """
    def __init__(
        self,
        aws,
        wait_time_secs,
        app_name=None,
    ):
        """
        Creates the EbsHelper
        """
        self.aws = aws
        self.ebs = connect_to_region(aws.region,
                                     aws_access_key_id=aws.access_key,
                                     aws_secret_access_key=aws.secret_key,
                                     security_token=aws.security_token)
        self.autoscale = AutoScaleConnection(
            aws_access_key_id=aws.access_key,
            aws_secret_access_key=aws.secret_key,
            security_token=aws.security_token)
        self.s3 = S3Connection(
            aws_access_key_id=aws.access_key,
            aws_secret_access_key=aws.secret_key,
            security_token=aws.security_token,
            host=(lambda r: 's3.amazonaws.com'
                  if r == 'us-east-1' else 's3-' + r + '.amazonaws.com')(
                      aws.region))
        self.app_name = app_name
        self.wait_time_secs = wait_time_secs

    def swap_environment_cnames(self, from_env_name, to_env_name):
        """
        Swaps cnames for an environment
        """
        self.ebs.swap_environment_cnames(
            source_environment_name=from_env_name,
            destination_environment_name=to_env_name)

    def upload_archive(self, filename, key, auto_create_bucket=True):
        """
        Uploads an application archive version to s3
        """
        try:
            bucket = self.s3.get_bucket(self.aws.bucket)
            if ((self.aws.region != 'us-east-1'
                 and self.aws.region != 'eu-west-1')
                    and bucket.get_location() != self.aws.region) or (
                        self.aws.region == 'us-east-1'
                        and bucket.get_location() != '') or (
                            self.aws.region == 'eu-west-1'
                            and bucket.get_location() != 'eu-west-1'):
                raise Exception("Existing bucket doesn't match region")
        except S3ResponseError:
            bucket = self.s3.create_bucket(self.aws.bucket,
                                           location=self.aws.region)

        def __report_upload_progress(sent, total):
            if not sent:
                sent = 0
            if not total:
                total = 0
            out("Uploaded " + str(sent) + " bytes of " + str(total) \
                + " (" + str(int(float(max(1, sent)) / float(total) * 100)) + "%)")

        # upload the new version
        k = Key(bucket)
        k.key = self.aws.bucket_path + key
        k.set_metadata('time', str(time()))
        k.set_contents_from_filename(filename,
                                     cb=__report_upload_progress,
                                     num_cb=10)

    def list_available_solution_stacks(self):
        """
        Returns a list of available solution stacks
        """
        stacks = self.ebs.list_available_solution_stacks()
        return stacks['ListAvailableSolutionStacksResponse'][
            'ListAvailableSolutionStacksResult']['SolutionStacks']

    def create_application(self, description=None):
        """
        Creats an application and sets the helpers current
        app_name to the created application
        """
        out("Creating application " + str(self.app_name))
        self.ebs.create_application(self.app_name, description=description)

    def delete_application(self):
        """
        Creats an application and sets the helpers current
        app_name to the created application
        """
        out("Deleting application " + str(self.app_name))
        self.ebs.delete_application(self.app_name, terminate_env_by_force=True)

    def application_exists(self):
        """
        Returns whether or not the given app_name exists
        """
        response = self.ebs.describe_applications(
            application_names=[self.app_name])
        return len(response['DescribeApplicationsResponse']
                   ['DescribeApplicationsResult']['Applications']) > 0

    def create_environment(self,
                           env_name,
                           version_label=None,
                           solution_stack_name=None,
                           cname_prefix=None,
                           description=None,
                           option_settings=None,
                           tier_name='WebServer',
                           tier_type='Standard',
                           tier_version='1.1'):
        """
        Creates a new environment
        """
        out("Creating environment: " + str(env_name) + ", tier_name:" +
            str(tier_name) + ", tier_type:" + str(tier_type))
        self.ebs.create_environment(self.app_name,
                                    env_name,
                                    version_label=version_label,
                                    solution_stack_name=solution_stack_name,
                                    cname_prefix=cname_prefix,
                                    description=description,
                                    option_settings=option_settings,
                                    tier_type=tier_type,
                                    tier_name=tier_name,
                                    tier_version=tier_version)

    def environment_exists(self, env_name, include_deleted=False):
        """
        Returns whether or not the given environment exists
        """
        response = self.ebs.describe_environments(
            application_name=self.app_name,
            environment_names=[env_name],
            include_deleted=include_deleted)
        return len(response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments']) > 0 \
               and response['DescribeEnvironmentsResponse']['DescribeEnvironmentsResult']['Environments'][0][
                       'Status'] != 'Terminated'

    def environment_resources(self, env_name):
        """
        Returns the description for the given environment's resources
        """
        resp = self.ebs.describe_environment_resources(
            environment_name=env_name)
        return resp['DescribeEnvironmentResourcesResponse'][
            'DescribeEnvironmentResourcesResult']['EnvironmentResources']

    def get_env_sizing_metrics(self, env_name):
        asg = self.get_asg(env_name)
        if asg:
            return asg.min_size, asg.max_size, asg.desired_capacity
        else:
            return None, None, None

    def get_asg(self, env_name):
        asg_name = self.get_asg_name(env_name)
        asgs = self.autoscale.get_all_groups(names=[asg_name])
        asg = None
        if asgs:
            asg = asgs[0]
        return asg

    def get_asg_name(self, env_name):
        resources = self.environment_resources(env_name)
        name = resources["AutoScalingGroups"][0]["Name"]
        return name

    def set_env_sizing_metrics(self, env_name, min_size, max_size):
        self.update_environment(env_name,
                                option_settings=[("aws:autoscaling:asg",
                                                  "MinSize", min_size),
                                                 ("aws:autoscaling:asg",
                                                  "MaxSize", max_size)])

    def environment_data(self, env_name):
        """
        Returns the description for the given environment
        """
        response = self.ebs.describe_environments(
            application_name=self.app_name,
            environment_names=[env_name],
            include_deleted=False)
        return response['DescribeEnvironmentsResponse'][
            'DescribeEnvironmentsResult']['Environments'][0]

    def rebuild_environment(self, env_name):
        """
        Rebuilds an environment
        """
        out("Rebuilding " + str(env_name))
        self.ebs.rebuild_environment(environment_name=env_name)

    def get_environments(self):
        """
        Returns the environments
        """
        response = self.ebs.describe_environments(
            application_name=self.app_name, include_deleted=False)
        return response['DescribeEnvironmentsResponse'][
            'DescribeEnvironmentsResult']['Environments']

    def delete_environment(self, environment_name):
        """
        Deletes an environment
        """
        self.ebs.terminate_environment(environment_name=environment_name,
                                       terminate_resources=True)

    def update_environment(self,
                           environment_name,
                           description=None,
                           option_settings=[],
                           tier_type=None,
                           tier_name=None,
                           tier_version='1.0'):
        """
        Updates an application version
        """
        out("Updating environment: " + str(environment_name))
        messages = self.ebs.validate_configuration_settings(
            self.app_name, option_settings, environment_name=environment_name)
        messages = messages['ValidateConfigurationSettingsResponse'][
            'ValidateConfigurationSettingsResult']['Messages']
        ok = True
        for message in messages:
            if message['Severity'] == 'error':
                ok = False
            out("[" + message['Severity'] + "] " + str(environment_name) + " - '" \
                + message['Namespace'] + ":" + message['OptionName'] + "': " + message['Message'])
        self.ebs.update_environment(environment_name=environment_name,
                                    description=description,
                                    option_settings=option_settings,
                                    tier_type=tier_type,
                                    tier_name=tier_name,
                                    tier_version=tier_version)

    def get_previous_environment_for_subdomain(self, env_subdomain):
        """
        Returns an environment name for the given cname
        """
        def sanitize_subdomain(subdomain):
            return subdomain.lower()

        env_subdomain = sanitize_subdomain(env_subdomain)

        def match_cname(cname):
            subdomain = sanitize_subdomain(cname.split(".")[0])
            return subdomain == env_subdomain

        def match_candidate(env):
            return env['Status'] != 'Terminated' \
                    and env.get('CNAME') \
                    and match_cname(env['CNAME'])

        envs = self.get_environments()
        candidates = [env for env in envs if match_candidate(env)]

        match = None
        if candidates:
            match = candidates[0]["EnvironmentName"]

        return match

    def deploy_version(self, environment_name, version_label):
        """
        Deploys a version to an environment
        """
        out("Deploying " + str(version_label) + " to " + str(environment_name))
        self.ebs.update_environment(environment_name=environment_name,
                                    version_label=version_label)

    def get_versions(self):
        """
        Returns the versions available
        """
        response = self.ebs.describe_application_versions(
            application_name=self.app_name)
        return response['DescribeApplicationVersionsResponse'][
            'DescribeApplicationVersionsResult']['ApplicationVersions']

    def create_application_version(self, version_label, key):
        """
        Creates an application version
        """
        out("Creating application version " + str(version_label) + " for " +
            str(key))
        self.ebs.create_application_version(self.app_name,
                                            version_label,
                                            s3_bucket=self.aws.bucket,
                                            s3_key=self.aws.bucket_path + key)

    def delete_unused_versions(self, versions_to_keep=10):
        """
        Deletes unused versions
        """

        # get versions in use
        environments = self.ebs.describe_environments(
            application_name=self.app_name, include_deleted=False)
        environments = environments['DescribeEnvironmentsResponse'][
            'DescribeEnvironmentsResult']['Environments']
        versions_in_use = []
        for env in environments:
            versions_in_use.append(env['VersionLabel'])

        # get all versions
        versions = self.ebs.describe_application_versions(
            application_name=self.app_name)
        versions = versions['DescribeApplicationVersionsResponse'][
            'DescribeApplicationVersionsResult']['ApplicationVersions']
        versions = sorted(
            versions,
            reverse=True,
            cmp=lambda x, y: cmp(x['DateCreated'], y['DateCreated']))

        # delete versions in use
        for version in versions[versions_to_keep:]:
            if version['VersionLabel'] in versions_in_use:
                out("Not deleting " + version["VersionLabel"] +
                    " because it is in use")
            else:
                out("Deleting unused version: " + version["VersionLabel"])
                self.ebs.delete_application_version(
                    application_name=self.app_name,
                    version_label=version['VersionLabel'])
                sleep(2)

    def describe_events(self,
                        environment_name,
                        next_token=None,
                        start_time=None):
        """
        Describes events from the given environment
        """

        events = self.ebs.describe_events(application_name=self.app_name,
                                          environment_name=environment_name,
                                          next_token=next_token,
                                          start_time=start_time)

        return (
            events['DescribeEventsResponse']['DescribeEventsResult']['Events'],
            events['DescribeEventsResponse']['DescribeEventsResult']
            ['NextToken'])

    def wait_for_environments(self,
                              environment_names,
                              health=None,
                              status=None,
                              version_label=None,
                              include_deleted=True,
                              use_events=True):
        """
        Waits for an environment to have the given version_label
        and to be in the green state
        """

        # turn into a list
        if not isinstance(environment_names, (list, tuple)):
            environment_names = [environment_names]
        environment_names = environment_names[:]

        # print some stuff
        s = "Waiting for environment(s) " + (
            ", ".join(environment_names)) + " to"
        if health is not None:
            s += " have health " + health
        else:
            s += " have any health"
        if version_label is not None:
            s += " and have version " + version_label
        if status is not None:
            s += " and have status " + status
        out(s)

        started = time()
        seen_events = list()

        for env_name in environment_names:
            (events,
             next_token) = self.describe_events(env_name,
                                                start_time=utcnow_isoformat())
            for event in events:
                seen_events.append(event)

        delay = 10

        while True:
            # bail if they're all good
            if len(environment_names) == 0:
                break

            # wait
            sleep(delay)

            # # get the env
            try:
                environments = self.ebs.describe_environments(
                    application_name=self.app_name,
                    environment_names=environment_names,
                    include_deleted=include_deleted)
            except BotoServerError as e:
                if not e.error_code == 'Throttling':
                    raise
                delay = min(60, int(delay * 1.5))
                out("Throttling: setting delay to " + str(delay) + " seconds")
                continue

            environments = environments['DescribeEnvironmentsResponse'][
                'DescribeEnvironmentsResult']['Environments']
            if len(environments) <= 0:
                raise Exception("Couldn't find any environments")

            # loop through and wait
            for env in environments[:]:
                env_name = env['EnvironmentName']

                # the message
                msg = "Environment " + env_name + " is " + str(env['Health'])
                if version_label is not None:
                    msg = msg + " and has version " + str(env['VersionLabel'])
                if status is not None:
                    msg = msg + " and has status " + str(env['Status'])

                # what we're doing
                good_to_go = True
                if health is not None:
                    good_to_go = good_to_go and str(env['Health']) == health
                if status is not None:
                    good_to_go = good_to_go and str(env['Status']) == status
                if version_label is not None:
                    good_to_go = good_to_go and str(
                        env['VersionLabel']) == version_label

                # allow a certain number of Red samples before failing
                if env['Status'] == 'Ready' and env['Health'] == 'Red':
                    if 'RedCount' not in env:
                        env['RedCount'] = 0

                    env['RedCount'] += 1
                    if env['RedCount'] > MAX_RED_SAMPLES:
                        out('Deploy failed')
                        raise Exception('Ready and red')

                # log it
                if good_to_go:
                    out(msg + " ... done")
                    environment_names.remove(env_name)
                else:
                    out(msg + " ... waiting")

                # log events
                try:
                    (events, next_token) = self.describe_events(
                        env_name, start_time=utcnow_isoformat())
                except BotoServerError as e:
                    if not e.error_code == 'Throttling':
                        raise
                    delay = min(60, int(delay * 1.5))
                    out("Throttling: setting delay to " + str(delay) +
                        " seconds")
                    break

                for event in events:
                    if event not in seen_events:
                        out("[" + event['Severity'] + "] " + event['Message'])
                        seen_events.append(event)

            # check the time
            elapsed = time() - started
            if elapsed > self.wait_time_secs:
                message = "Wait time for environment(s) {environments} to be {health} expired".format(
                    environments=" and ".join(environment_names),
                    health=(health or "Green"))
                raise Exception(message)
Exemplo n.º 27
0
class Cloud(object):
    def __init__(self, cloud_config):
        self.config = cloud_config
        self.all_instances = []
        self.failed_launch = False
        self.failed_count = 0
        self.failed_last_valid_count = 0
        self._conn = None
        self._as_conn = None
        self._lc = None
        self._asg = None
        self._last_asg_launch_attempt = None
        self.maxed = False
        self._last_launch_attempt = datetime.datetime.utcnow()
        self._initialize()

    def _create_connection(self):
        LOG.debug("Creating connection for %s" % self.config.name)
        self._conn = boto.connect_ec2(self.config.access_id,
                                      self.config.secret_key,
                                      validate_certs=False)
        self._conn.host = self.config.cloud_uri
        self._conn.port = self.config.cloud_port

    def _create_autoscale_connection(self):
        LOG.debug("Creating autoscale connection for %s" % self.config.name)
        region = RegionInfo(name=self.config.cloud_type,
                            endpoint=self.config.as_uri)
        self._as_conn = AutoScaleConnection(
            aws_access_key_id=self.config.access_id,
            aws_secret_access_key=self.config.secret_key,
            is_secure=True,
            port=self.config.as_port,
            region=region,
            validate_certs=False)

    def _create_or_set_launch_configuration(self):
        name = self.config.lc_name
        if not self._lc:
            LOG.debug("Attempting to load launch configuration: %s" % (name))
            lc = self._as_conn.get_all_launch_configurations(names=[name])
            if len(lc) == 1:
                LOG.debug("Launch configuration %s found." % (name))
                self._lc = lc[0]
        if not self._lc:
            #TODO(pdmars): key and security groups are hardcoded for now, gross
            if self.config.user_data_file is not None:
                user_data_file = self.config.user_data_file
                with open(user_data_file) as f:
                    user_data = f.read()
            else:
                user_data = None
            LOG.debug("Creating launch configuration %s" % name)
            LOG.debug("\tname: %s" % name)
            LOG.debug("\timage_id: %s" % self.config.image_id)
            LOG.debug("\tinstance_type: %s" % self.config.instance_type)
            LOG.debug("\tuser_data: %s" % user_data)
            self._lc = LaunchConfiguration(
                name=name,
                image_id=self.config.image_id,
                key_name="phantomkey",
                security_groups=['default'],
                instance_type=self.config.instance_type,
                user_data=user_data)
            self._as_conn.create_launch_configuration(self._lc)

    def _create_or_set_autoscale_group(self):
        name = self.config.asg_name
        if not self._asg:
            LOG.debug("Attempting to load autoscale group: %s" % name)
            asg = self._as_conn.get_all_groups(names=[name])
            LOG.debug("Autoscale group: %s" % asg)
            if len(asg) == 1:
                LOG.debug("Autoscale group %s found." % name)
                self._asg = asg[0]
        if not self._asg:
            # TODO(pdmars): more hard coded grossness, for now
            try:
                cloud_guess = self.config.lc_name.split("@")[1].strip()
            except Exception as e:
                LOG.warn("Unable to guess cloud for auto scale tags")
                LOG.warn("Setting cloud to hotel")
                cloud_guess = "hotel"
            policy_name_key = "PHANTOM_DEFINITION"
            policy_name = "error_overflow_n_preserving"
            ordered_clouds_key = "clouds"
            n_preserve_key = "minimum_vms"
            ordered_clouds = cloud_guess + ":-1"
            n_preserve = 0
            policy_tag = Tag(connection=self._as_conn, key=policy_name_key,
                             value=policy_name, resource_id=name)
            clouds_tag = Tag(connection=self._as_conn, key=ordered_clouds_key,
                             value=ordered_clouds, resource_id=name)
            npreserve_tag = Tag(connection=self._as_conn, key=n_preserve_key,
                                value=n_preserve, resource_id=name)
            tags = [policy_tag, clouds_tag, npreserve_tag]
            zones = [self.config.az]
            LOG.debug("Creating autoscale group %s" % name)
            LOG.debug("\tname: %s" % name)
            LOG.debug("\tavailability_zones: %s" % zones)
            LOG.debug("\tlaunch_config: %s" % self._lc)
            self._asg = AutoScalingGroup(group_name=name,
                                         availability_zones=zones,
                                         min_size=0,
                                         max_size=0,
                                         launch_config=self._lc,
                                         tags=tags)
            self._as_conn.create_auto_scaling_group(self._asg)

    def _initialize(self):
        LOG.debug("Initializing %s" % self.config.name)
        self._create_connection()
        self._create_autoscale_connection()
        self._create_or_set_launch_configuration()
        self._create_or_set_autoscale_group()
        LOG.debug("Initialization complete for %s" % self.config.name)

    def get_valid_instances(self):
        return self.all_instances

    def _refresh_instances(self):
        LOG.debug("%s: getting instance information" % self.config.name)
        self.all_instances = []
        instances = []
        as_instances = self._asg.instances
        as_instance_ids = [i.instance_id for i in as_instances]
        reservations = self._conn.get_all_instances()
        for reservation in reservations:
            for instance in reservation.instances:
                if instance.id in as_instance_ids:
                    if instance.state in VALID_RUN_STATES:
                        instances.append(instance)
        for instance in instances:
            self.all_instances.append(instance)
        num_instances = len(self.all_instances)
        LOG.debug("%s: updated %d instances" % (self.config.name,
                                                num_instances))
        if num_instances >= self.config.max_instances:
            LOG.warn("%s reached the max (%s) instances: %s" % (
                self.config.name, self.config.max_instances,
                num_instances))
            self.maxed = True
        else:
            self.maxed = False

    def _refresh_asg(self):
        LOG.debug("%s: refreshing autoscale group" % self.config.name)
        asg_name = self.config.asg_name
        asgs = self._as_conn.get_all_groups(names=[asg_name])
        if len(asgs) == 1:
            self._asg = asgs[0]
            LOG.debug("\trefreshed autoscale group: %s" % asg_name)
        else:
            LOG.warn("\tunable to refresh autoscale group: %s" % asg_name)

    def refresh(self, cluster):
        self._refresh_asg()
        self._refresh_instances()

    def get_total_num_valid_cores(self):
        LOG.debug("%s: getting number of valid cores" % self.config.name)
        total_num_valid_cores = 0
        num_valid_instances = len(self.get_valid_instances())
        total_valid_cores = num_valid_instances * self.config.instance_cores
        num_desired_instances = self._asg.desired_capacity
        num_desired_cores = num_desired_instances * self.config.instance_cores
        if num_desired_cores != total_num_valid_cores:
            LOG.debug("\tmismatching core counts")
            LOG.debug("\tnum_desired_cores: %d" % (num_desired_cores))
            LOG.debug("\ttotal_valid_cores: %d" % (total_valid_cores))
        return total_valid_cores

    def get_instance_by_id(self, id):
        LOG.debug("Searching for instance %s" % id)
        for instances in self.all_instances:
            if instance.id == id:
                LOG.debug("Found instance %s" % id)
                return instance
        return None

    def get_instance_ids_for_public_dns_names(self, public_dns_names):
        instance_ids = []
        for instance in self.all_instances:
            if instance.public_dns_name in public_dns_names:
                instance_ids.append(instance.id)
        return instance_ids

    def get_public_dns_names_close_to_charge(self):
        instances_close_to_charge = []
        sleep_secs = self.config.get_loop_sleep_secs()
        cur_utc_time = datetime.datetime.utcnow()
        valid_instances = self.get_valid_instances()
        time_fmt = "%Y-%m-%dT%H:%M:%S.%fZ"
        for instance in valid_instances:
            launch_time = datetime.datetime.strptime(instance.launch_time,
                                                     time_fmt)
            time_diff = cur_utc_time - launch_time
            # Ignores microseconds
            time_diff_secs = time_diff.seconds + time_diff.days * 24 * 3600
            cur_charge_secs = time_diff_secs % self.config.charge_time_secs
            secs_to_charge = self.config.charge_time_secs - cur_charge_secs
            LOG.debug("%s:%s: charge: %d; current: %d; to charge: %d" % (
                instance.id, instance.public_dns_name,
                self.config.charge_time_secs,
                cur_charge_secs, secs_to_charge))
            if secs_to_charge < (3 * sleep_secs):
                instances_close_to_charge.append(instance.public_dns_name)
        return instances_close_to_charge

    def delete_instances(self, instance_ids=[]):
        if not instance_ids:
            return
        LOG.debug("Deleting instances: %s" % instance_ids)
        # TODO(pdmars): this has the potential to kill instances running jobs
        # maybe I should err on the side of having extra instances if the
        # capacity is higher than the cloud can currently support
        num_instances = len(self.all_instances)
        if ((self._asg.desired_capacity > num_instances) and
                (num_instances > 0)):
            LOG.warn("Desired capacity is greater than num_instances running")
            LOG.warn("Adjusting desired capacity to match")
            self.set_capacity(num_instances)
        for instance_id in instance_ids:
            self._as_conn.terminate_instance(instance_id)
            # TODO(pdmars): due to a bug in phantom, maybe this will help
            # 2013/04/05: this might not be relevant anymore
            time.sleep(.1)

    def launch_autoscale_instances(self, num_instances=1):
        new_capacity = self._asg.desired_capacity + int(num_instances)
        if new_capacity > self.config.max_instances:
            new_capacity = self.config.max_instances
            LOG.warn("%s can launch %s total instances" % (self.config.name,
                                                           new_capacity))
        self._last_launch_attempt = datetime.datetime.utcnow()
        LOG.debug("Setting cloud capacity for %s to %s" % (self.config.name,
                                                           new_capacity))
        self.set_capacity(new_capacity)

    def set_capacity(self, new_capacity):
        self._asg.set_capacity(new_capacity)
Exemplo n.º 28
0
class BotoScaleInterface(ScaleInterface):
    conn = None
    saveclcdata = False

    def __init__(self, clc_host, access_id, secret_key, token):
        #boto.set_stream_logger('foo')
        path = '/services/AutoScaling'
        reg = RegionInfo(name='eucalyptus', endpoint=clc_host)
        port = 8773
        if clc_host[len(clc_host) - 13:] == 'amazonaws.com':
            clc_host = clc_host.replace('ec2', 'autoscaling', 1)
            path = '/'
            reg = None
            port = 443
        self.conn = AutoScaleConnection(access_id,
                                        secret_key,
                                        region=reg,
                                        port=port,
                                        path=path,
                                        is_secure=True,
                                        security_token=token,
                                        debug=0)
        self.conn.APIVersion = '2011-01-01'
        if not (clc_host[len(clc_host) - 13:] == 'amazonaws.com'):
            self.conn.auth_region_name = 'Eucalyptus'
        self.conn.https_validate_certificates = False
        self.conn.http_connection_kwargs['timeout'] = 30

    def __save_json__(self, obj, name):
        f = open(name, 'w')
        json.dump(obj, f, cls=BotoJsonScaleEncoder, indent=2)
        f.close()

    ##
    # autoscaling methods
    ##
    def create_auto_scaling_group(self, as_group):
        return self.conn.create_auto_scaling_group(as_group)

    def delete_auto_scaling_group(self, name, force_delete=False):
        return self.conn.delete_auto_scaling_group(name, force_delete)

    def get_all_groups(self, names=None, max_records=None, next_token=None):
        obj = self.conn.get_all_groups(names, max_records, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_Groups.json")
        return obj

    def get_all_autoscaling_instances(self,
                                      instance_ids=None,
                                      max_records=None,
                                      next_token=None):
        obj = self.conn.get_all_autoscaling_instances(instance_ids,
                                                      max_records, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_Instances.json")
        return obj

    def set_desired_capacity(self,
                             group_name,
                             desired_capacity,
                             honor_cooldown=False):
        group = self.conn.get_all_groups([group_name])[0]
        # notice, honor_cooldown not supported.
        return group.set_capacity(desired_capacity)

    def set_instance_health(self,
                            instance_id,
                            health_status,
                            should_respect_grace_period=True):
        return self.conn.set_instance_health(instance_id, health_status,
                                             should_respect_grace_period)

    def terminate_instance(self, instance_id, decrement_capacity=True):
        return self.conn.terminate_instance(instance_id, decrement_capacity)

    def update_autoscaling_group(self, as_group):
        as_group.connection = self.conn
        return as_group.update()

    def create_launch_configuration(self, launch_config):
        return self.conn.create_launch_configuration(launch_config)

    def delete_launch_configuration(self, launch_config_name):
        return self.conn.delete_launch_configuration(launch_config_name)

    def get_all_launch_configurations(self,
                                      config_names=None,
                                      max_records=None,
                                      next_token=None):
        obj = self.conn.get_all_launch_configurations(names=config_names,
                                                      max_records=max_records,
                                                      next_token=next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/AS_LaunchConfigs.json")
        return obj

    # policy related
    def delete_policy(self, policy_name, autoscale_group=None):
        return self.conn.delete_policy(policy_name, autoscale_group)

    def get_all_policies(self,
                         as_group=None,
                         policy_names=None,
                         max_records=None,
                         next_token=None):
        return self.conn.get_all_policies(as_group, policy_names, max_records,
                                          next_token)

    def execute_policy(self, policy_name, as_group=None, honor_cooldown=None):
        return self.conn.execute_policy(policy_name, as_group, honor_cooldown)

    def create_scaling_policy(self, scaling_policy):
        return self.conn.create_scaling_policy(scaling_policy)

    def get_all_adjustment_types(self):
        return self.conn.get_all_adjustment_types()

    # tag related
    def delete_tags(self, tags):
        return self.conn.delete_tags(tags)

    def get_all_tags(self, filters=None, max_records=None, next_token=None):
        return self.conn.get_all_tags(filters, max_records, next_token)

    def create_or_update_tags(self, tags):
        return self.conn.create_or_update_tags(tags)
Exemplo n.º 29
0
    def test_basic(self):
        # NB: as it says on the tin these are really basic tests that only
        # (lightly) exercise read-only behaviour - and that's only if you
        # have any autoscale groups to introspect. It's useful, however, to
        # catch simple errors

        print('--- running %s tests ---' % self.__class__.__name__)
        c = AutoScaleConnection()

        self.assertTrue(repr(c).startswith('AutoScaleConnection'))

        groups = c.get_all_groups()
        for group in groups:
            self.assertIsInstance(group, AutoScalingGroup)

            # get activities
            activities = group.get_activities()

            for activity in activities:
                self.assertIsInstance(activity, Activity)

        # get launch configs
        configs = c.get_all_launch_configurations()
        for config in configs:
            self.assertIsInstance(config, LaunchConfiguration)

        # get policies
        policies = c.get_all_policies()
        for policy in policies:
            self.assertIsInstance(policy, ScalingPolicy)

        # get scheduled actions
        actions = c.get_all_scheduled_actions()
        for action in actions:
            self.assertIsInstance(action, ScheduledUpdateGroupAction)

        # get instances
        instances = c.get_all_autoscaling_instances()
        for instance in instances:
            self.assertIsInstance(instance, Instance)

        # get all scaling process types
        ptypes = c.get_all_scaling_process_types()
        for ptype in ptypes:
            self.assertTrue(ptype, ProcessType)

        # get adjustment types
        adjustments = c.get_all_adjustment_types()
        for adjustment in adjustments:
            self.assertIsInstance(adjustment, AdjustmentType)

        # get metrics collection types
        types = c.get_all_metric_collection_types()
        self.assertIsInstance(types, MetricCollectionTypes)

        # create the simplest possible AutoScale group
        # first create the launch configuration
        time_string = '%d' % int(time.time())
        lc_name = 'lc-%s' % time_string
        lc = LaunchConfiguration(name=lc_name, image_id='ami-2272864b',
                                 instance_type='t1.micro')
        c.create_launch_configuration(lc)
        found = False
        lcs = c.get_all_launch_configurations()
        for lc in lcs:
            if lc.name == lc_name:
                found = True
                break
        assert found

        # now create autoscaling group
        group_name = 'group-%s' % time_string
        group = AutoScalingGroup(name=group_name, launch_config=lc,
                                 availability_zones=['us-east-1a'],
                                 min_size=1, max_size=1)
        c.create_auto_scaling_group(group)
        found = False
        groups = c.get_all_groups()
        for group in groups:
            if group.name == group_name:
                found = True
                break
        assert found

        # now create a tag
        tag = Tag(key='foo', value='bar', resource_id=group_name,
                  propagate_at_launch=True)
        c.create_or_update_tags([tag])

        found = False
        tags = c.get_all_tags()
        for tag in tags:
            if tag.resource_id == group_name and tag.key == 'foo':
                found = True
                break
        assert found

        c.delete_tags([tag])

        # shutdown instances and wait for them to disappear
        group.shutdown_instances()
        instances = True
        while instances:
            time.sleep(5)
            groups = c.get_all_groups()
            for group in groups:
                if group.name == group_name:
                    if not group.instances:
                        instances = False

        group.delete()
        lc.delete()

        found = True
        while found:
            found = False
            time.sleep(5)
            tags = c.get_all_tags()
            for tag in tags:
                if tag.resource_id == group_name and tag.key == 'foo':
                    found = True

        assert not found

        print('--- tests completed ---')
Exemplo n.º 30
0
def get_asg_connection():
    conn = AutoScaleConnection()
    autoscale_groups = conn.get_all_groups(max_records=1)
    return conn
Exemplo n.º 31
0
def create_autoscaling(ami_id, sns_arn):
    """
    Creates the autoscaling group for proxy instances
    Inspired by boto autoscaling tutorial.
    """
    con = AutoScaleConnection(aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                              aws_access_key_id=AWS_ACCESS_KEY,
                              region=RegionInfo(name=REGION,
                                               endpoint='autoscaling.%s.amazonaws.com' % REGION))



    print "Creating autoscaling configuration.."
    config = LaunchConfiguration(name=AUTOSCALING_GROUP_NAME,
                                 image_id=ami_id,
                                 key_name=KEY_NAME,
                                 security_groups=[EC2_SECURITY_GROUP_NAME],
                                 instance_type=INSTANCE_TYPE)

    con.create_launch_configuration(config)


    print "Create autoscaling group..."
    ag = AutoScalingGroup(name=AUTOSCALING_GROUP_NAME,
                          launch_config=config,
                          availability_zones=["{0}a".format(REGION)],
                          load_balancers=[ELB_NAME],
                          min_size=AUTOSCALING_MIN_INSTANCES,
                          max_size=AUTOSCALING_MAX_INSTANCES,
                          group_name=AUTOSCALING_GROUP_NAME)
    con.create_auto_scaling_group(ag)

    # fetch the autoscale group after it is created (unused but may be necessary)
    _ = con.get_all_groups(names=[AUTOSCALING_GROUP_NAME])[0]

    # Create tag name for autoscaling-created machines
    as_tag = Tag(key='Name', value=AUTOSCALING_GROUP_NAME, propagate_at_launch=True, resource_id=AUTOSCALING_GROUP_NAME)
    con.create_or_update_tags([as_tag])


    print "Creating autoscaling policy..."
    scaleup_policy = ScalingPolicy(name='scale_up',
                                   adjustment_type='ChangeInCapacity',
                                   as_name=AUTOSCALING_GROUP_NAME,
                                   scaling_adjustment=1,
                                   cooldown=AUTOSCALING_COOLDOWN_PERIOD)

    scaledown_policy = ScalingPolicy(name='scale_down',
                                     adjustment_type='ChangeInCapacity',
                                     as_name=AUTOSCALING_GROUP_NAME,
                                     scaling_adjustment=-1,
                                     cooldown=AUTOSCALING_COOLDOWN_PERIOD)

    con.create_scaling_policy(scaleup_policy)
    con.create_scaling_policy(scaledown_policy)

    # Get freshened policy objects
    scaleup_policy = con.get_all_policies(as_group=AUTOSCALING_GROUP_NAME, policy_names=['scale_up'])[0]
    scaledown_policy = con.get_all_policies(as_group=AUTOSCALING_GROUP_NAME, policy_names=['scale_down'])[0]

    print "Creating cloudwatch alarms"
    cloudwatch_con = CloudWatchConnection(aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                                      aws_access_key_id=AWS_ACCESS_KEY,
                                      region=RegionInfo(name=REGION,
                                                        endpoint='monitoring.%s.amazonaws.com' % REGION))


    alarm_dimensions = {"AutoScalingGroupName": AUTOSCALING_GROUP_NAME}
    scaleup_alarm = MetricAlarm(name='scale_up_on_cpu',
                                namespace='AWS/EC2',
                                metric='CPUUtilization',
                                statistic='Average',
                                comparison='>',
                                threshold=AUTOSCALING_CPU_MAX_THRESHOLD,
                                period='60',
                                evaluation_periods=1,
                                alarm_actions=[scaleup_policy.policy_arn, sns_arn],
                                dimensions=alarm_dimensions)

    # Don't send SNS on scaledown policy
    scaledown_alarm = MetricAlarm(name='scale_down_on_cpu',
                                 namespace='AWS/EC2',
                                 metric='CPUUtilization',
                                 statistic='Average',
                                 comparison='<',
                                 threshold=AUTOSCALING_CPU_MIN_THRESHOLD,
                                 period='60',
                                 evaluation_periods=1,
                                 alarm_actions=[scaledown_policy.policy_arn],
                                 dimensions=alarm_dimensions)
    cloudwatch_con.create_alarm(scaleup_alarm)
    cloudwatch_con.create_alarm(scaledown_alarm)
Exemplo n.º 32
0
def add_ingress_rule(dry_run, go_agent_security_group, go_agent_security_group_owner, go_agent_security_group_name):
    """
    For each ASG (app) in each VPC, add a rule to each SG associated with the ASG's launch configuration
    that allows SSH ingress from the GoCD agents' SG.

    BEFORE RUNNING THIS SCRIPT!:
    - Use the assume_role bash script to assume the role in the proper account/VPC (edx, edge, mckinsey, etc.)
        - If you don't know what this is, ask someone in DevOps.
    - THEN run this script.
    """
    asg_conn = AutoScaleConnection()
    ec2_conn = boto.ec2.connect_to_region('us-east-1')
    asgs = []
    launch_configs = {}
    security_groups = {}

    logging.debug('All ASGs:')
    for group in asg_conn.get_all_groups():
        logging.debug('    {}'.format(group))
        asgs.append(group)

    logging.debug('All launch configurations:')
    for launch_config in asg_conn.get_all_launch_configurations():
        logging.debug('    {}'.format(launch_config))
        launch_configs[launch_config.name] = launch_config

    logging.debug('All security groups:')
    for sec_group in ec2_conn.get_all_security_groups():
        logging.debug('    {}'.format(sec_group))
        security_groups[sec_group.id] = sec_group

    # Validate that each ASG has a launch configuration.
    for group in asgs:
        try:
            logging.info("Launch configuration for ASG '{}' is '{}'.".format(
                group.name, launch_configs[group.launch_config_name]
            ))
        except KeyError:
            logging.error("Launch configuration '{}' for ASG '{}' was not found!".format(
                group.launch_config_name, group.name
            ))
            raise

    # Construct a fake security group for the prod-tools-goagent-sg security group in the edx-tools account.
    # This group will be used to grant the go-agents ingress into the ASG's VPCs.
    go_agent_security_group = boto.ec2.securitygroup.SecurityGroup(
        name=go_agent_security_group_name,
        owner_id=go_agent_security_group_owner,
        id=go_agent_security_group
    )

    # For each launch config, check for the security group. Can support multiple security groups
    # but the edX DevOps convention is to use a single security group.
    for group in asgs:
        launch_config = launch_configs[group.launch_config_name]
        if len(launch_config.security_groups) > 1:
            err_msg = "Launch config '{}' for ASG '{}' has more than one security group!: {}".format(
                launch_config.name, group.name, launch_config.security_groups
            )
            logging.warning(err_msg)
            continue
        sg_name = launch_config.security_groups[0]
        try:
            # Find the security group.
            sec_group = security_groups[sg_name]
        except KeyError:
            logging.error("Security group '{}' for ASG '{}' was not found!.".format(sg_name, group.name))
        logging.info('BEFORE: Rules for security group {}:'.format(sec_group.name))
        logging.info(sec_group.rules)
        try:
            # Add the ingress rule to the security group.
            yes_no = six.moves.input("Apply the change to this security group? [Yes]")
            if yes_no in ("", "y", "Y", "yes"):
                sec_group.authorize(
                    ip_protocol='tcp',
                    from_port=22,
                    to_port=22,
                    src_group=go_agent_security_group,
                    dry_run=dry_run
                )
        except boto.exception.EC2ResponseError as exc:
            if exc.status == 412:
                # If the dry_run flag is set, then each rule addition will raise this exception.
                # Log it and carry on.
                logging.info('Dry run is True but rule addition would have succeeded for security group {}.'.format(
                    sg_name
                ))
            elif exc.code == "InvalidPermission.Duplicate":
                logging.info("Rule already exists for {}.".format(sg_name))
            else:
                raise
        logging.info('AFTER: Rules for security group {}:'.format(sg_name))
        logging.info(sec_group.rules)
Exemplo n.º 33
0
class IcsAS(object):

    """
    ICS Library for AutoScale
    """

    def __init__(self, region, **kwargs):
        self.conn = AutoScaleConnection(region=get_region(region), **kwargs)

    def to_list(self, input):
        """
        Validate input, if not list, but string, make it as a list
        """
        if input is None:
            return input
        elif isinstance(input, list):
            return input
        elif isinstance(input, basestring):
            return [input]
        else:
            raise IcsASException("Need the type '%s' but '%s' found"
                                 % ('list', type(input)))

    def get_group_name_from_instance(self, instance_id):
        """
        Get the ASG name from the specific instance id

        :type instance_id: string
        :param instance_id: EC2 instance id startwith 'i-xxxxxxx'

        :rtype: string
        :return: name of the ASG, this instance belongs to
        """
        instances = self.conn.get_all_autoscaling_instances(
            instance_ids=self.to_list(instance_id))
        if instances:
            return instances[0].group_name
        else:
            return None

    def get_instances_from_group_name(self, name):
        """
        Get the instance from the specific ASG name

        :type name: string
        :param name: the specific ASG name

        :rtype: list
        :return: a list contains all the instances
        """
        instances = []
        for group in self.conn.get_all_groups(names=self.to_list(name)):
            instances.extend(group.instances)
        return instances

    def get_group_from_name(self, name):
        """
        Get the ASG from its name

        :type name: string
        :param name: the ASG name

        :rtype: list
        :return: a list represents the specific ASG(s)
        """
        return self.conn.get_all_groups(names=self.to_list(name))

    def get_launch_config_from_name(self, name):
        """
        Get the Launch Configuration from its name

        :type name: string
        :param name: the Launch Configuration name

        :rtype: list
        :return: a list represents the specific Launch Configuration(s)
        """
        return self.conn.get_all_launch_configurations(
            names=self.to_list(name))

    def create_launch_config(self, launch_config):
        """
        Create the Launch Configuration

        :type launch_config: class
        :param launch_config: boto launch_config object

        :rtype: string
        :return: AWS request Id
        """
        return self.conn.create_launch_configuration(launch_config)

    def delete_launch_config_from_name(self, name):
        """
        Delete the Launch Configuration from its name

        :type name: string
        :param name: the name of launch configuration

        :rtype: string
        :return: AWS request Id
        """
        log.info("delete the launch configuration:")
        log.info(">> %s" % name)
        return self.conn.delete_launch_configuration(name)

    def update_launch_config(self, name, launch_config):
        """
        Update the Launch Configuration for specific ASG

        :type name: string
        :param name: the name of Auto-Scaling Group

        :type launch_config: class
        :param launch_config: boto launch_config object

        :rtype: string
        :return: AWS request Id
        """
        groups = self.get_group_from_name(name)
        if groups:
            group = groups[0]
        else:
            raise IcsASException("no such Auto-Scaling Group '%s' found"
                                 % name)

        self.create_launch_config(launch_config)
        old_lc_name = group.launch_config_name
        new_lc_name = launch_config.name
        group.__dict__["launch_config_name"] = launch_config.name
        group.update()

        if self.get_launch_config_from_name(new_lc_name):
            group = self.get_group_from_name(name)[0]
            if group.launch_config_name == new_lc_name:
                return self.delete_launch_config_from_name(old_lc_name)
            else:
                raise IcsASException("failed to update " +
                                     "launch config for ASG '%s'"
                                     % name)
        else:
            raise IcsASException("no such new launch config '%s'"
                                 % new_lc_name)

    def suspend_scaling_group(self, name, scaling_processes=None):
        """
        Suspends Auto Scaling processes for an Auto Scaling group.

        :type name: string
        :param name: the ASG name

        :type scaling_processes: string or list
        :param scaling_processes: scaling process names

         * Launch
         * Terminate
         * HealthCheck
         * ReplaceUnhealthy
         * AZRebalance
         * AlarmNotification
         * ScheduledActions
         * AddToLoadBalancer
        """
        if not isinstance(name, basestring):
            return None
        group = self.get_group_from_name(self.to_list(name))[0]
        return group.suspend_processes(self.to_list(scaling_processes))

    def resume_scaling_group(self, name, scaling_processes=None):
        """
        Resumes Auto Scaling processes for an Auto Scaling group.

        :type name: string
        :param name: the ASG name

        :type scaling_processes: string or list
        :param scaling_processes: scaling process names

         * Launch
         * Terminate
         * HealthCheck
         * ReplaceUnhealthy
         * AZRebalance
         * AlarmNotification
         * ScheduledActions
         * AddToLoadBalancer
        """
        if not isinstance(name, basestring):
            return None
        group = self.get_group_from_name(self.to_list(name))[0]
        return group.resume_processes(self.to_list(scaling_processes))

    def terminate_group_instance(self, instance_id, decrement_capacity=True):
        """
        Terminates the specified instance. The desired group size can
        also be adjusted, if desired.

        :type instance_id: str
        :param instance_id: The ID of the instance to be terminated.

        :type decrement_capability: bool
        :param decrement_capacity: Whether to decrement the size of the
            autoscaling group or not.
        """
        return self.conn.terminate_instance(
            instance_id=instance_id,
            decrement_capacity=decrement_capacity)

    def update_instance_health(self, instance_id, health_status,
                               grace_period=False):
        """
        Explicitly set the health status of an instance.

        :type instance_id: str
        :param instance_id: The identifier of the EC2 instance

        :type health_status: str
        :param health_status: The health status of the instance.

        * Healthy: the instance is healthy and should remain in service.
        * Unhealthy: the instance is unhealthy. \
            Auto Scaling should terminate and replace it.

        :type grace_period: bool
        :param grace_period: If True, this call should respect
            the grace period associated with the group.
        """

        self.conn.set_instance_health(instance_id, health_status,
                                      should_respect_grace_period=grace_period)
Exemplo n.º 34
0
def setup(CONF):
  global out

  lookup_tbl = {
    'name': CONF['NAME'],
  }

  conn = AutoScaleConnection()

  out['conn'] = conn

  # Launch Configurations
  LC = CONF['LC']
  LC['name'] = LC['name'] % lookup_tbl

  lc = LaunchConfiguration(**LC)
  conn.create_launch_configuration(lc)
  out['lc'] = lc

  # Auto Scaling Group
  ASG = CONF['ASG']
  ASG['group_name'] = ASG['group_name'] % lookup_tbl
  ASG['launch_config'] = lc

  groups = conn.get_all_groups(names=[ASG['group_name']])
  if (len(groups) > 0):
    # update
    asg = groups[0]
    for k in ASG :
      # asg not iterable, try-except to make sure asg[k] exists
      try: asg.__getattribute__(k)
      except: continue
      asg.__setattr__(k, ASG[k])
    asg.launch_config_name = LC['name']
    asg.update()
    out['asg'] = asg
  else:
    #create
    asg = AutoScalingGroup(**ASG)
    conn.create_auto_scaling_group(asg)

  # ASG Tags
  ASG_TAGS = CONF['ASG_TAGS']
  for i in ASG_TAGS:
    if 'propagate_at_launch' not in i:
      i['propagate_at_launch'] = True
    i['key'] = i['key'] % lookup_tbl
    i['value'] = i['value'] % lookup_tbl

  tags = [
      Tag(**dict(x.items() + [('resource_id', ASG['group_name'])])) for x in ASG_TAGS
  ]
  conn.create_or_update_tags(tags)

  # Triggers (Scaling Policy / Cloudwatch Alarm)
  conn_cw = connect_to_region(CONF['REGION'])

  TRIGGERS = CONF['TRIGGERS']
  for T in TRIGGERS:
    T['policy']['name'] = T['policy']['name'] % lookup_tbl
    T['policy']['as_name'] = ASG['group_name']
    T['alarm']['dimensions'] = {'AutoScalingGroupName': ASG['group_name']}
    T['alarm']['alarm_actions'] = None

    if 'name' in T['alarm']:
      T['alarm']['name'] = T['alarm']['name'] % lookup_tbl
    else:
      T['alarm']['name'] = T['policy']['name']

    # Policies are safely overwritten, so not checked for existence
    conn.create_scaling_policy(ScalingPolicy(**T['policy']))
    policy = conn.get_all_policies(as_group=ASG['group_name'], policy_names=[T['policy']['name']])[0]

    T['alarm']['alarm_actions'] = [policy.policy_arn]
    hits = conn_cw.describe_alarms(alarm_names=[T['alarm']['name']])

    conn_cw.create_alarm(MetricAlarm(**T['alarm']))
Exemplo n.º 35
0
    all_healthy = False
    for g in group_activities:
        print g 
        if int(g.progress) == 100:
            all_healthy = True
        else:
            all_healthy = False
            break
    return all_healthy

#make connections
autoscaling_conn = AutoScaleConnection(os.environ['AWS_ACCESS_KEY_ID'], os.environ['AWS_SECRET_ACCESS_KEY'])
ec2_conn = boto.ec2.connect_to_region('us-east-1') 

#get the autoscaling groups
a_groups = autoscaling_conn.get_all_groups()
for group in a_groups:
    instances = get_instances_from_group(group)

time.sleep(30)    
#wait until all groups have all healthy instances
while(True):
    all_healthy = False
    #go through all the groups
    for group in a_groups:
        #check if the group is ready
        group.update()
        #if the group is live
        if check_group_health(group.get_activities()):             
            print "Group Passes"
            print group