def lambda_handler(event, context): account_id = get_account(context) if account_id not in ACCOUNT_CONFIGS: logger.error("Couldn't find configuration for {} in project_config.py.".format(account_id)) return account_config = ACCOUNT_CONFIGS[account_id] exclude_from_backup = account_config['exclude_from_backup'] logger.info("Describing tables in the account.") table_descriptions = get_table_descriptions(exclude_from_backup) action = detect_action(event) action(**{ 'table_descriptions': table_descriptions, 'pipeline_util': DataPipelineUtil(), 'dynamodb_booster': DynamoDbBooster(table_descriptions, account_config['backup_bucket'], INITIAL_READ_THROUGHPUT_PERCENT), 'account': account_id, 'log_bucket': account_config['log_bucket'], 'sns_endpoint': get_sns_endpoint(context), 'backup_bucket': account_config['backup_bucket'], 'emr_subnet': account_config['emr_subnet'], 'region': _extract_from_arn(context.invoked_function_arn, 3) })
def __init__(self, table_descriptions, backup_bucket, read_throughput_percent): self.table_descriptions = table_descriptions self.backup_bucket = backup_bucket self.read_throughput_percent = read_throughput_percent self.dynamo_db_util = DynamoDBUtil() self.config_util = ConfigUtil() self.data_pipeline_util = DataPipelineUtil() self.application_auto_scaling_util = ApplicationAutoScalingUtil()
def __init__(self): self.s3_util = S3Util() self.data_pipeline_util = DataPipelineUtil()
class ConfigUtil(object): def __init__(self): self.s3_util = S3Util() self.data_pipeline_util = DataPipelineUtil() def save_configuration(self, pipeline_definitions, backup_bucket, table_descriptions, scaling_policies, scalable_targets): self.s3_util.put_json( backup_bucket, self._get_metadata_file_name(), { "Tables": table_descriptions, "Pipelines": pipeline_definitions, "ScalingPolicies": scaling_policies, "ScalableTargets": scalable_targets }) def load_configuration(self, backup_bucket): contents = self.s3_util.list_objects(backup_bucket, COMMON_PREFIX).get( "Contents", []) contents = sorted(contents, key=lambda x: x['LastModified'], reverse=True) if contents: return self.s3_util.get_json(backup_bucket, contents[0].get('Key')) else: return def _get_metadata_file_name(self): return '{}-{}'.format(COMMON_PREFIX, get_date_suffix()) def list_backed_up_tables(self, pipelines, backup_bucket): finished_pipelines = self.list_finished_pipelines( backup_bucket, pipelines) backed_up_tables = [] for pipeline in pipelines: if pipeline['pipeline_id'] in finished_pipelines: backed_up_tables += pipeline['backed_up_tables'] return backed_up_tables def list_finished_pipelines(self, backup_bucket=None, backup_pipelines=None): if not backup_pipelines: last_configuration = self.load_configuration(backup_bucket) if last_configuration: backup_pipelines = last_configuration['Pipelines'] if not backup_pipelines: logger.error( "Couldn't find any backed up tables. Has your backup ran?") return [] backup_pipeline_names = map(lambda x: x['pipeline_id'], backup_pipelines) pipelines = self.data_pipeline_util.describe_pipelines() finished_pipelines = [] for pipeline in pipelines: fields = pipeline["fields"] pipeline_id = pipeline["pipelineId"] logger.info("Checking pipeline {}".format(str(pipeline_id))) if pipeline_id not in backup_pipeline_names: continue for field in fields: if field["key"] != "@pipelineState": continue if field["stringValue"] in DONE_STATES: logger.info("Pipeline {} state is in DONE_STATES.".format( str(pipeline_id))) logger.debug(str(pipeline)) finished_pipelines.append(pipeline_id) return finished_pipelines