def stop_decommissioned_node(address, terminate=False): log.debug('Stopping or terminating the node at {address}'.format( address=address)) log.debug('Retrieving the instance ID') command = 'curl http://169.254.169.254/latest/meta-data/instance-id' instance_id = run_command(address, command) log.debug('The instance ID is {id_}'.format(id_=instance_id)) set_maintenance_mode(instance_id) log.debug('Establishing a connection to AWS EC2 us-east-1') conn = boto.ec2.connect_to_region('us-east-1') if terminate: log.debug('Terminating {instance}'.format(instance=instance_id)) response = conn.terminate_instances(instance_ids=[instance_id]) else: log.debug('Stopping `{instance}'.format(instance=instance_id)) response = conn.stop_instances(instance_ids=[instance_id]) log.debug('Received the response {response}'.format(response=response)) terminated = [instance.id for instance in response] if instance_id in terminated: if terminate: log.debug('Successfully terminated {instance}'.format( instance=instance_id)) else: log.debug('Successfully stopped {instance}'.format( instance=instance_id)) else: if terminate: log.debug('Failed to terminate {instance}'.format( instance=instance_id)) else: log.debug('Failed to stop {instance}'.format( instance=instance_id))
def replace_server(environment=None, group=None, subnet_id=None, instance_type=None, availability_zone=None, replica_set_index=None, data_volume_size=None, data_volume_iops=None, mongodb_package_version=None, member=None, replace=False, node_type='data', reroute=False, replica_set_template=None, terminate=False, prompt_before_replace=True): if member is None: log.critical('No existing member defined.') sys.exit(1) replica_set = ReplicaSet(member) if replica_set.primary[:2] == 'ip': log.warn('The replica set\'s primary address is private') log.debug('The replica set\'s primary is {primary}'.format( primary=replica_set.primary)) log.info('To continue, the replica set must be failed over') log.debug('Connecting to AWS EC2 us-east-1') conn = boto.ec2.connect_to_region('us-east-1') log.debug('Connected to AWS EC2 us-east-1') components = replica_set.primary.split('-') old_primary = replica_set.primary private_ip = '.'.join([components[1], components[2], components[3], components[4]]) log.debug('Using the private IP address {ip} for the primary'.format( ip=private_ip)) log.debug('Filtering AWS instances by private IP address') reservations = conn.get_all_instances( filters={'private-ip-address': private_ip}) instance = reservations[0].instances[0] log.debug('Found instance {id_}'.format(id_=instance.id)) public_address = None if 'Name' in instance.tags: public_address = instance.tags['Name'] log.debug('The tag Name exists on the instance') log.debug('Building public address from the instance\s name') if environment == 'test': public_address += '.thorhudl.com' elif environment == 'stage': public_address += '.app.staghudl.com' elif environment == 'prod': public_address += '.app.hudl.com' else: log.debug('The tag Name could not be found on the instance') public_address = instance.private_dns_name log.debug('Proceeding using {address} to contact the primary'.format( address=public_address)) log.debug('Instructing the primary to step down') run_mongo_command(public_address, 'rs.stepDown()') log.debug('Sleeping for 120 seconds while an election takes place') time.sleep(120) log.debug('Determining the new primary') replica_set.determine_primary(member) log.debug('(Temporarily) removing the old primary from ' 'the replica set') replica_set.remove_member(old_primary) log.debug('Sleeping for 120 seconds') time.sleep(120) log.debug('Adding the old primary back into the replica ' 'set with the new address') replica_set.add_member(public_address) replica_set_name = replica_set.status['set'] log.info('Using the replica set name {name}'.format(name=replica_set_name)) if node_type == 'arbiter': log.info('The node being added is an arbiter') log.info('Launching the new node') node = launch_server(environment, group, subnet_id, instance_type, availability_zone, replica_set_index, data_volume_size, data_volume_iops, mongodb_package_version, node_type, replica_set_template=replica_set_name) log.info('Retreiving the replica set\'s current arbiter') arbiter = replica_set.arbiter if arbiter is not None: log.info('The current arbiter is {arbiter}'.format( arbiter=arbiter)) log.info('Removing the old arbiter from the replica set') replica_set.remove_member(arbiter, clean=True) else: log.info('The replica set does not have an arbiter') log.info('Adding the new arbiter to the replica set') replica_set.add_member(node.hostname, arbiter=True, accessible=node.instance.private_dns_name) if replace: log.info('Terminating the previous arbiter') stop_decommissioned_node(member, terminate=terminate) return log.info('The node being added is a {type_} node'.format(type_=node_type)) log.info('Launching the new node') node = launch_server(environment, group, subnet_id, instance_type, availability_zone, replica_set_index, data_volume_size, data_volume_iops, mongodb_package_version, node_type, replica_set_template=replica_set_name) log.info('Placing the new node in maintenance mode') set_maintenance_mode(node.instance.id) log.info('Adding the new node to the replica set') if node_type == 'datawarehousing': replica_set.add_member(node.hostname, hidden=True, accessible=node.instance.private_dns_name) else: replica_set.add_member(node.hostname, accessible=node.instance.private_dns_name) log.info('Retreiving the replica set\'s arbiter') arbiter = replica_set.arbiter if arbiter is not None: log.debug('The arbiter is {arbiter}'.format(arbiter=arbiter)) log.info('(Temporarily) removing the arbiter from the replica set') replica_set.remove_member(arbiter, clean=True) else: log.debug('There is no arbiter') log.info('Waiting for the node to finish syncing') wait_for_sync(node) log.info('Removing the node from maintenance mode') unset_maintenance_mode(node.instance.id) if arbiter is not None: log.info('Adding the arbiter back into the replica set') replica_set.add_member(arbiter, arbiter=True) if replace: log.info('Preparing to remove the previous node') if prompt_before_replace: print '\a' _ = raw_input('Press enter to continue') if replica_set.primary == member: log.warn('The previous node is the primary') log.warn('The replica set will need to fail over to continue') log.info('Failing over the replica set') replica_set.failover() log.info('Removing the previous node from the replica set') replica_set.remove_member(member) log.info('Terminating the previous node') stop_decommissioned_node(member, terminate=terminate) if node_type == 'data' and reroute: log.info('Redirecting previous DNS entry') log.debug('Establishing a connect to AWS Route53 us-east-1') conn = boto.route53.connect_to_region('us-east-1') log.debug('Retrieving the zone app.staghudl.com.') zone = conn.get_zone('app.staghudl.com.') if environment == 'prod': log.debug('Retrieving the zone app.hudl.com.') zone = conn.get_zone('app.hudl.com.') if environment == 'test': log.debug('Retrieving the zone thorhudl.com.') zone = conn.get_zone('thorhudl.com.') if zone.get_cname(member+'.') is None: log.debug('An existing DNS record does not exist') else: log.debug('Updating the DNS CNAME record') zone.update_cname(member+'.', node.instance.private_dns_name)
def compact_mongodb_server(host, version, prompt_before_failover=True): log.debug('Retrieving replica set for host {host}'.format(host=host)) replica_set = ReplicaSet(host) log.debug('Validating the syncingTo property on nodes') while not validate_sync_to(replica_set): log.debug('Enforcing the syncingTo property on nodes') enforce_sync_to(replica_set) log.debug('Validation of syncingTo property on nodes complete') secondaries = [node for node in replica_set.status['members'] if node['stateStr'] == 'SECONDARY'] log.info('Compacting {nodes}'.format( nodes=[s['name'] for s in secondaries])) for secondary in secondaries: address = secondary['name'].split(':')[0] log.debug('Retrieving compact.js on {host}'.format(host=address)) fetch_script(address, version) log.debug('Setting maintenance mode for {host}'.format(host=address)) set_maintenance_mode(id_for_host(address)) log.info('Compacting {host}'.format(host=address)) compact(address) log.info('Waiting for {host} to recover'.format(host=address)) while recovering(replica_set, secondary['name']): log.warning('{host} is still recovering.'.format(host=address)) log.debug('Sleeping for 30 seconds.') time.sleep(30) log.debug('Unsetting maintenance mode for {host}'.format( host=address)) unset_maintenance_mode(id_for_host(address)) log.debug('Retrieving current primary') secondaries = [node for node in replica_set.status['members'] if node['stateStr'] == 'PRIMARY'] log.debug('Preparing to compact primary {host}'.format( host=secondaries[0]['name'])) log.info('Preparing to fail over replica set') if prompt_before_failover: print '\a' _ = raw_input('Press return to continue') log.debug('Instructing the replica set to fail over') replica_set.failover() log.debug('Validating the syncingTo property on nodes') while not validate_sync_to(replica_set): log.debug('Enforcing the syncingTo property on nodes') enforce_sync_to(replica_set) log.debug('Validation of syncingTo property on nodes complete') for secondary in secondaries: address = secondary['name'].split(':')[0] log.debug('Retrieving compact.js on {host}'.format(host=address)) fetch_script(address, version) log.debug('Setting maintenance mode for {host}'.format(host=address)) set_maintenance_mode(id_for_host(address)) log.info('Compacting {host}'.format(host=address)) compact(address) log.info('Waiting for {host} to recover'.format(host=address)) while recovering(replica_set, secondary['name']): log.warning('{host} is still recovering.'.format(host=address)) log.debug('Sleeping for 30 seconds.') time.sleep(30) log.debug('Unsetting maintenance mode for {host}'.format( host=address)) unset_maintenance_mode(id_for_host(address))