def lambda_handler(event, context):

    # Get ignore file list
    ignore_list = []
    try:
        logger.info('Try to get ignore list from ssm parameter')
        ignore_list = ssm.get_parameter(
            Name=ssm_parameter_ignore_list)['Parameter']['Value'].splitlines()
        logger.info(f'Get ignore list: {str(ignore_list)}')
    except Exception:
        logger.info(f'No ignore list in ssm parameter')

    # Check SQS is empty or not
    if check_sqs_empty(sqs, sqs_queue):
        logger.info(
            'Job sqs queue is empty, now process comparing s3 bucket...')
        for bucket_para in load_bucket_para:
            src_bucket = bucket_para['src_bucket']
            src_prefix = bucket_para['src_prefix']
            des_bucket = bucket_para['des_bucket']
            des_prefix = bucket_para['des_prefix']

            # Get List on S3
            logger.info('Get source bucket')
            src_file_list = get_src_file_list(s3_src_client, src_bucket,
                                              src_prefix,
                                              JobsenderCompareVersionId)
            logger.info('Get destination bucket')
            des_file_list = get_des_file_list(s3_des_client, des_bucket,
                                              des_prefix, table,
                                              JobsenderCompareVersionId)
            # Generate job list
            job_list, ignore_records = delta_job_list(
                src_file_list, des_file_list, src_bucket, src_prefix,
                des_bucket, des_prefix, ignore_list, JobsenderCompareVersionId)
            # Upload jobs to sqs
            if len(job_list) != 0:
                job_upload_sqs_ddb(sqs, sqs_queue, job_list)
                max_object = max(job_list, key=itemgetter('Size'))
                MaxChunkSize = int(max_object['Size'] / 10000) + 1024
                if max_object['Size'] >= 50 * 1024 * 1024 * 1024:
                    logger.warning(
                        f'Max object in job_list is {str(max_object)}. Remember to check instance memory >= '
                        f'MaxChunksize({MaxChunkSize}) x MaxThread x MaxParallelFile'
                    )
            else:
                logger.info(
                    'Source list are all in Destination, no job to send.')
    else:
        logger.error(
            'Job sqs queue is not empty or fail to get_queue_attributes. Stop process.'
        )
            # Just backup for debug
            logger.info('Writing job list to local file backup...')
            t = time.localtime()
            start_time = f'{t.tm_year}-{t.tm_mon}-{t.tm_mday}-{t.tm_hour}-{t.tm_min}-{t.tm_sec}'
            log_path = os.path.split(
                os.path.abspath(__file__))[0] + '/s3_migration_log'
            if job_list:
                local_backup_list = f'{log_path}/job-list-{src_bucket}-{start_time}.json'
                with open(local_backup_list, 'w') as f:
                    json.dump(job_list, f)
                logger.info(
                    f'Finish writing: {os.path.abspath(local_backup_list)}')
            if ignore_records:
                local_ignore_records = f'{log_path}/ignore-records-{src_bucket}-{start_time}.json'
                with open(local_ignore_records, 'w') as f:
                    json.dump(ignore_records, f)
                logger.info(
                    f'Finish writing: {os.path.abspath(local_ignore_records)}')

            # Upload jobs to sqs
            if len(job_list) != 0:
                job_upload_sqs_ddb(sqs, sqs_queue, table, job_list)
            else:
                logger.info(
                    'Source list are all in Destination, no job to send.')
    else:
        logger.error(
            'Job sqs queue is not empty or fail to get_queue_attributes. Stop process.'
        )
    print('Completed and logged to file:', os.path.abspath(log_file_name))
Exemplo n.º 3
0
                JobsenderCompareVersionId=JobsenderCompareVersionId)
            # Generate job list
            job_list, ignore_records = delta_job_list(
                src_file_list=src_file_list,
                des_file_list=des_file_list,
                src_bucket=src_bucket,
                src_prefix=src_prefix,
                des_bucket=des_bucket,
                des_prefix=des_prefix,
                ignore_list=ignore_list,
                JobsenderCompareVersionId=JobsenderCompareVersionId)

            # Upload jobs to sqs
            if len(job_list) != 0:
                job_upload_sqs_ddb(sqs=sqs,
                                   sqs_queue=sqs_queue,
                                   job_list=job_list)
                max_object = max(job_list, key=itemgetter('Size'))
                MaxChunkSize = int(max_object['Size'] / 10000) + 1024
                if MaxChunkSize < 5 * 1024 * 1024:
                    MaxChunkSize = 5 * 1024 * 1024
                logger.warning(
                    f'Max object size in job_list: {max_object["Size"]}.\n Require instance memory'
                    f' > MaxChunksize x MaxThread x MaxParallelFile, i.e. '
                    f'{MaxChunkSize} x {MaxThread} x {MaxParallelFile} = '
                    f'{MaxChunkSize*MaxThread*MaxParallelFile}.\n If less memory, instance may crash!'
                )
            else:
                logger.info(
                    'Source list are all in Destination, no job to send.')