def get_database(name, env): if env in Configuration.get_by_name_as_list('dev_envs'): database = Database.objects.filter( name=name, environment__name=env).exclude(is_in_quarantine=True)[0] else: prod_envs = Configuration.get_by_name_as_list('prod_envs') database = Database.objects.filter( name=name, environment__name__in=prod_envs).exclude(is_in_quarantine=True)[0] return database
def database_name_evironment_constraint(database_name, environment_name): from logical.models import Database from system.models import Configuration databases = Database.objects.filter(name=database_name) if not databases: return False dev_envs = Configuration.get_by_name_as_list('dev_envs') if environment_name in dev_envs: return False prod_envs = Configuration.get_by_name_as_list('prod_envs') return any( (database.environment.name in prod_envs for database in databases))
def database_name_evironment_constraint(database_name, environment_name): from logical.models import Database from system.models import Configuration databases = Database.objects.filter(name=database_name) if not databases: return False dev_envs = Configuration.get_by_name_as_list('dev_envs') if environment_name in dev_envs: return False prod_envs = Configuration.get_by_name_as_list('prod_envs') return any(( database.environment.name in prod_envs for database in databases))
def revoke_detail(request, id): import celery from system.models import Configuration celery_inpsect = celery.current_app.control.inspect() celery_workers = Configuration.get_by_name_as_list('celery_workers',) try: workers = celery_inpsect.ping().keys() except Exception as e: LOG.warn("All celery workers are down! {} :(".format(e)) messages.add_message(request, messages.ERROR, "Migration can't be revoked because all celery workers are down!",) return HttpResponseRedirect(request.META.get('HTTP_REFERER')) if workers and workers != celery_workers: LOG.warn("At least one celery worker is down! :(") messages.add_message(request, messages.ERROR, "Migration can't be revoked because at least one celery worker is down!",) return HttpResponseRedirect(request.META.get('HTTP_REFERER')) detail = models.DatabaseRegionMigrationDetail.objects.get(id=id) if detail.status == detail.WAITING: if detail.revoke_maintenance(request): messages.add_message(request, messages.SUCCESS, "Migration revoked!",) else: messages.add_message(request, messages.ERROR, "Migration has already started!",) else: messages.add_message(request, messages.ERROR, "Migration can't be revoked!",) return HttpResponseRedirect(reverse('admin:region_migration_databaseregionmigrationdetail_changelist'))
def revoke_detail(request, id): import celery from system.models import Configuration celery_inpsect = celery.current_app.control.inspect() celery_workers = Configuration.get_by_name_as_list('celery_workers',) try: workers = celery_inpsect.ping().keys() except Exception as e: LOG.warn("All celery workers are down! {} :(".format(e)) messages.add_message(request, messages.ERROR, "Migration can't be revoked because all celery workers are down!",) return HttpResponseRedirect(request.META.get('HTTP_REFERER')) if workers and workers != celery_workers: LOG.warn("At least one celery worker is down! :(") messages.add_message(request, messages.ERROR, "Migration can't be revoked because at least one celery worker is down!",) return HttpResponseRedirect(request.META.get('HTTP_REFERER')) detail = models.DatabaseFlipperFoxMigrationDetail.objects.get(id=id) if detail.status == detail.WAITING: if detail.revoke_maintenance(request): messages.add_message(request, messages.SUCCESS, "Migration revoked!",) else: messages.add_message(request, messages.ERROR, "Migration has already started!",) else: messages.add_message(request, messages.ERROR, "Migration can't be revoked!",) return HttpResponseRedirect(reverse('admin:flipperfox_migration_databaseflipperfoxmigrationdetail_changelist'))
def create_acl_for(self, vpc, env, pool_name): sources = Configuration.get_by_name_as_list('application_networks') destinations = [vpc] cli = AddACLAccess( env, sources, destinations, description="ACl created when pool {} was created".format( pool_name)) cli.create_acl(execute_job=True)
def databaseinfra_ending(context={}): LOG.info("Notifying DatabaseInfra ending with context %s" % context) subject = _("[DBAAS] DatabaseInfra is almost full") template = "infra_notification" addr_from = Configuration.get_by_name("email_addr_from") addr_to = Configuration.get_by_name_as_list("new_user_notify_email") context['domain'] = get_domain() send_mail_template(subject, template, addr_from, addr_to, fail_silently=False, attachments=None, context=context)
def databaseinfra_ending(context={}): LOG.info("Notifying DatabaseInfra ending with context %s" % context) subject = _("[DBAAS] DatabaseInfra is almost full") template = "infra_notification" addr_from = Configuration.get_by_name("email_addr_from") addr_to = Configuration.get_by_name_as_list("new_user_notify_email") context['domain'] = get_domain() send_mail_template(subject, template, addr_from, addr_to, fail_silently=False, attachments=None, context=context)
def revoke_maintenance(request, id): import celery from system.models import Configuration celery_inpsect = celery.current_app.control.inspect() celery_workers = Configuration.get_by_name_as_list('celery_workers',) try: workers = celery_inpsect.ping().keys() except Exception, e: LOG.warn("All celery workers are down! {} :(".format(e)) messages.add_message(request, messages.ERROR, "Maintenance can't be revoked because all celery workers are down!",) return HttpResponseRedirect(request.META.get('HTTP_REFERER'))
def notify_new_user_creation(user=None): subject=_("[DBAAS] a new user has just been created: %s" % user.username) template="new_user_notification" addr_from=Configuration.get_by_name("email_addr_from") addr_to=Configuration.get_by_name_as_list("new_user_notify_email") context={} context['user'] = user domain = get_domain() context['url'] = domain + reverse('admin:account_team_changelist') LOG.debug("user: %s | addr_from: %s | addr_to: %s" % (user, addr_from, addr_to)) if user and addr_from and addr_to: send_mail_template(subject, template, addr_from, addr_to, fail_silently=False, attachments=None, context=context) else: LOG.warning("could not send email for new user creation")
def revoke_maintenance(request, id): import celery from system.models import Configuration celery_inpsect = celery.current_app.control.inspect() celery_workers = Configuration.get_by_name_as_list('celery_workers',) try: workers = celery_inpsect.ping().keys() except Exception, e: LOG.warn("All celery workers are down! {} :(".format(e)) messages.add_message(request, messages.ERROR, "Maintenance can't be revoked because all celery workers are down!",) return HttpResponseRedirect(request.META.get('HTTP_REFERER'))
def databaseinfra_ending(plan,environment,used,capacity,percent): LOG.info("Notifying DatabaseInfra ending") subject=_("[DBAAS] DatabaseInfra is almost full") template="infra_notification" addr_from=Configuration.get_by_name("email_addr_from") addr_to=Configuration.get_by_name_as_list("new_user_notify_email") context={} context['domain'] = get_domain() context['plan'] = plan context['environment'] = environment context['used'] = used context['capacity'] = capacity context['percent'] = percent send_mail_template(subject, template, addr_from, addr_to, fail_silently=False, attachments=None, context=context)
def notify_new_user_creation(user=None): subject = _("[DBAAS] a new user has just been created: %s" % user.username) template = "new_user_notification" addr_from = Configuration.get_by_name("email_addr_from") addr_to = Configuration.get_by_name_as_list("new_user_notify_email") context = {} context['user'] = user domain = get_domain() context['url'] = domain + reverse('admin:account_team_changelist') LOG.debug("user: %s | addr_from: %s | addr_to: %s" % (user, addr_from, addr_to)) if user and addr_from and addr_to: send_mail_template( subject, template, addr_from, addr_to, fail_silently=False, attachments=None, context=context ) else: LOG.warning("could not send email for new user creation")
def make_databases_backup(self): LOG.info("Making databases backups") worker_name = get_worker_name() task_history = TaskHistory.register( request=self.request, worker_name=worker_name, user=None ) task_history.relevance = TaskHistory.RELEVANCE_ERROR waiting_msg = "\nWaiting 5 minutes to start the next backup group" status = TaskHistory.STATUS_SUCCESS environments = Environment.objects.all() prod_envs = Configuration.get_by_name_as_list('prod_envs') dev_envs = Configuration.get_by_name_as_list('dev_envs') env_names_order = prod_envs + dev_envs if not env_names_order: env_names_order = [env.name for env in environments] infras = DatabaseInfra.objects.filter(plan__has_persistence=True) for env_name in env_names_order: try: env = environments.get(name=env_name) except Environment.DoesNotExist: continue msg = '\nStarting Backup for env {}'.format(env.name) task_history.update_details(persist=True, details=msg) databaseinfras_by_env = infras.filter(environment=env) error = {} backup_number = 0 backups_per_group = len(infras) / 12 for infra in databaseinfras_by_env: if not infra.databases.first(): continue if backups_per_group > 0: if backup_number < backups_per_group: backup_number += 1 else: backup_number = 0 task_history.update_details(waiting_msg, True) sleep(300) group = BackupGroup() group.save() for instance in infra.instances.filter(read_only=False): try: driver = instance.databaseinfra.get_driver() is_eligible = driver.check_instance_is_eligible_for_backup( instance ) if not is_eligible: LOG.info( 'Instance {} is not eligible for backup'.format( instance ) ) continue except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(strftime("%m/%d/%Y %H:%M:%S")) start_msg = "\n{} - Starting backup for {} ...".format( time_now, instance ) task_history.update_details(persist=True, details=start_msg) try: snapshot = make_instance_snapshot_backup( instance=instance, error=error, group=group ) if snapshot and snapshot.was_successful: msg = "Backup for %s was successful" % (str(instance)) LOG.info(msg) elif snapshot and snapshot.has_warning: status = TaskHistory.STATUS_WARNING msg = "Backup for %s has warning" % (str(instance)) LOG.info(msg) else: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), error['errormsg']) LOG.error(msg) LOG.info(msg) except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(strftime("%m/%d/%Y %H:%M:%S")) msg = "\n{} - {}".format(time_now, msg) task_history.update_details(persist=True, details=msg) task_history.update_status_for(status, details="\nBackup finished") return
def make_instance_snapshot_backup(instance, error, group, provider_class=VolumeProviderSnapshot, target_volume=None, current_hour=None): LOG.info("Make instance backup for {}".format(instance)) provider = provider_class(instance) infra = instance.databaseinfra database = infra.databases.first() snapshot = Snapshot.create( instance, group, target_volume or provider.volume, environment=provider.environment ) snapshot_final_status = Snapshot.SUCCESS locked = None driver = infra.get_driver() client = None try: client = driver.get_client(instance) locked = lock_instance(driver, instance, client) if not locked: snapshot_final_status = Snapshot.WARNING if 'MySQL' in type(driver).__name__: mysql_binlog_save(client, instance) current_time = datetime.now() has_snapshot = Snapshot.objects.filter( status=Snapshot.WARNING, instance=instance, end_at__year=current_time.year, end_at__month=current_time.month, end_at__day=current_time.day ) backup_hour_list = Configuration.get_by_name_as_list( 'make_database_backup_hour' ) if (snapshot_final_status == Snapshot.WARNING and has_snapshot): if str(current_hour) in backup_hour_list: raise Exception( "Backup with WARNING already created today." ) else: response = provider.take_snapshot() snapshot.done(response) snapshot.save() except Exception as e: errormsg = "Error creating snapshot: {}".format(e) error['errormsg'] = errormsg set_backup_error(infra, snapshot, errormsg) return snapshot finally: unlock_instance(driver, instance, client) if not snapshot.size: command = "du -sb /data/.snapshot/%s | awk '{print $1}'" % ( snapshot.snapshot_name ) try: output = instance.hostname.ssh.run_script(command) size = int(output['stdout'][0]) snapshot.size = size except Exception as e: snapshot.size = 0 LOG.error("Error exec remote command {}".format(e)) backup_path = database.backup_path if backup_path: now = datetime.now() target_path = "{}/{}/{}/{}/{}".format( backup_path, now.strftime("%Y_%m_%d"), instance.hostname.hostname.split('.')[0], now.strftime("%Y%m%d%H%M%S"), infra.name ) snapshot_path = "/data/.snapshot/{}/data/".format( snapshot.snapshot_name ) command = """ if [ -d "{backup_path}" ] then rm -rf {backup_path}/20[0-9][0-9]_[0-1][0-9]_[0-3][0-9] & mkdir -p {target_path} cp -r {snapshot_path} {target_path} & fi """.format(backup_path=backup_path, target_path=target_path, snapshot_path=snapshot_path) try: instance.hostname.ssh.run_script(command) except Exception as e: LOG.error("Error exec remote command {}".format(e)) snapshot.status = snapshot_final_status snapshot.end_at = datetime.now() snapshot.save() register_backup_dbmonitor(infra, snapshot) return snapshot
def make_databases_backup(self): LOG.info("Making databases backups") worker_name = get_worker_name() task_history = TaskHistory.register(request=self.request, worker_name=worker_name, user=None) task_history.relevance = TaskHistory.RELEVANCE_ERROR backup_group_interval = Configuration.get_by_name_as_int( 'backup_group_interval', default=1) waiting_msg = "\nWaiting {} minute(s) to start the next backup group".format( backup_group_interval) status = TaskHistory.STATUS_SUCCESS environments = Environment.objects.all() prod_envs = Configuration.get_by_name_as_list('prod_envs') dev_envs = Configuration.get_by_name_as_list('dev_envs') env_names_order = prod_envs + dev_envs if not env_names_order: env_names_order = [env.name for env in environments] current_time = datetime.now() current_hour = current_time.hour # Get all infras with a backup today until the current hour infras_with_backup_today = DatabaseInfra.objects.filter( instances__backup_instance__status=2, backup_hour__lt=current_hour, plan__has_persistence=True, instances__backup_instance__end_at__year=current_time.year, instances__backup_instance__end_at__month=current_time.month, instances__backup_instance__end_at__day=current_time.day).distinct() # Get all infras with pending backups based on infras_with_backup_today infras_pending_backup = DatabaseInfra.objects.filter( backup_hour__lt=current_hour, plan__has_persistence=True, ).exclude(pk__in=[infra.pk for infra in infras_with_backup_today]) # Get all infras to backup on the current hour infras_current_hour = DatabaseInfra.objects.filter( plan__has_persistence=True, backup_hour=current_time.hour) # Merging pending and current infras to backup list infras = infras_current_hour | infras_pending_backup for env_name in env_names_order: try: env = environments.get(name=env_name) except Environment.DoesNotExist: continue msg = '\nStarting Backup for env {}'.format(env.name) task_history.update_details(persist=True, details=msg) databaseinfras_by_env = infras.filter(environment=env) error = {} backup_number = 0 backups_per_group = len(infras) / 12 for infra in databaseinfras_by_env: if not infra.databases.first(): continue if backups_per_group > 0: if backup_number < backups_per_group: backup_number += 1 else: backup_number = 0 task_history.update_details(waiting_msg, True) sleep(backup_group_interval * 60) group = BackupGroup() group.save() for instance in infra.instances.filter(read_only=False): try: driver = instance.databaseinfra.get_driver() is_eligible = driver.check_instance_is_eligible_for_backup( instance) if not is_eligible: LOG.info( 'Instance {} is not eligible for backup'.format( instance)) continue except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(strftime("%m/%d/%Y %H:%M:%S")) start_msg = "\n{} - Starting backup for {} ...".format( time_now, instance) task_history.update_details(persist=True, details=start_msg) try: snapshot = make_instance_snapshot_backup(instance=instance, error=error, group=group) if snapshot and snapshot.was_successful: msg = "Backup for %s was successful" % (str(instance)) LOG.info(msg) elif snapshot and snapshot.has_warning: status = TaskHistory.STATUS_WARNING msg = "Backup for %s has warning" % (str(instance)) LOG.info(msg) else: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), error['errormsg']) LOG.error(msg) LOG.info(msg) except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(strftime("%m/%d/%Y %H:%M:%S")) msg = "\n{} - {}".format(time_now, msg) task_history.update_details(persist=True, details=msg) task_history.update_status_for(status, details="\nBackup finished") return
def make_databases_backup(self): LOG.info("Making databases backups") worker_name = get_worker_name() task_history = TaskHistory.register(request=self.request, worker_name=worker_name, user=None) task_history.relevance = TaskHistory.RELEVANCE_ERROR waiting_msg = "\nWaiting 5 minutes to start the next backup group" status = TaskHistory.STATUS_SUCCESS environments = Environment.objects.all() prod_envs = Configuration.get_by_name_as_list('prod_envs') dev_envs = Configuration.get_by_name_as_list('dev_envs') env_names_order = prod_envs + dev_envs if not env_names_order: env_names_order = [env.name for env in environments] infras = DatabaseInfra.objects.filter(plan__has_persistence=True) for env_name in env_names_order: try: env = environments.get(name=env_name) except Environment.DoesNotExist: continue msg = '\nStarting Backup for env {}'.format(env.name) task_history.update_details(persist=True, details=msg) databaseinfras_by_env = infras.filter(environment=env) error = {} backup_number = 0 backups_per_group = len(infras) / 12 for infra in databaseinfras_by_env: if not infra.databases.first(): continue if backups_per_group > 0: if backup_number < backups_per_group: backup_number += 1 else: backup_number = 0 task_history.update_details(waiting_msg, True) sleep(300) group = BackupGroup() group.save() for instance in infra.instances.filter(read_only=False): try: driver = instance.databaseinfra.get_driver() is_eligible = driver.check_instance_is_eligible_for_backup( instance) if not is_eligible: LOG.info( 'Instance {} is not eligible for backup'.format( instance)) continue except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(strftime("%m/%d/%Y %H:%M:%S")) start_msg = "\n{} - Starting backup for {} ...".format( time_now, instance) task_history.update_details(persist=True, details=start_msg) try: snapshot = make_instance_snapshot_backup(instance=instance, error=error, group=group) if snapshot and snapshot.was_successful: msg = "Backup for %s was successful" % (str(instance)) LOG.info(msg) elif snapshot and snapshot.has_warning: status = TaskHistory.STATUS_WARNING msg = "Backup for %s has warning" % (str(instance)) LOG.info(msg) else: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), error['errormsg']) LOG.error(msg) LOG.info(msg) except Exception as e: status = TaskHistory.STATUS_ERROR msg = "Backup for %s was unsuccessful. Error: %s" % ( str(instance), str(e)) LOG.error(msg) time_now = str(strftime("%m/%d/%Y %H:%M:%S")) msg = "\n{} - {}".format(time_now, msg) task_history.update_details(persist=True, details=msg) task_history.update_status_for(status, details="\nBackup finished") return