def lambda_handler(event, context): # Get ignore file list ignore_list = [] try: logger.info('Try to get ignore list from ssm parameter') ignore_list = ssm.get_parameter( Name=ssm_parameter_ignore_list)['Parameter']['Value'].splitlines() logger.info(f'Get ignore list: {str(ignore_list)}') except Exception: logger.info(f'No ignore list in ssm parameter') # Check SQS is empty or not if check_sqs_empty(sqs, sqs_queue): logger.info( 'Job sqs queue is empty, now process comparing s3 bucket...') for bucket_para in load_bucket_para: src_bucket = bucket_para['src_bucket'] src_prefix = bucket_para['src_prefix'] des_bucket = bucket_para['des_bucket'] des_prefix = bucket_para['des_prefix'] # Get List on S3 logger.info('Get source bucket') src_file_list = get_src_file_list(s3_src_client, src_bucket, src_prefix, JobsenderCompareVersionId) logger.info('Get destination bucket') des_file_list = get_des_file_list(s3_des_client, des_bucket, des_prefix, table, JobsenderCompareVersionId) # Generate job list job_list, ignore_records = delta_job_list( src_file_list, des_file_list, src_bucket, src_prefix, des_bucket, des_prefix, ignore_list, JobsenderCompareVersionId) # Upload jobs to sqs if len(job_list) != 0: job_upload_sqs_ddb(sqs, sqs_queue, job_list) max_object = max(job_list, key=itemgetter('Size')) MaxChunkSize = int(max_object['Size'] / 10000) + 1024 if max_object['Size'] >= 50 * 1024 * 1024 * 1024: logger.warning( f'Max object in job_list is {str(max_object)}. Remember to check instance memory >= ' f'MaxChunksize({MaxChunkSize}) x MaxThread x MaxParallelFile' ) else: logger.info( 'Source list are all in Destination, no job to send.') else: logger.error( 'Job sqs queue is not empty or fail to get_queue_attributes. Stop process.' )
# Just backup for debug logger.info('Writing job list to local file backup...') t = time.localtime() start_time = f'{t.tm_year}-{t.tm_mon}-{t.tm_mday}-{t.tm_hour}-{t.tm_min}-{t.tm_sec}' log_path = os.path.split( os.path.abspath(__file__))[0] + '/s3_migration_log' if job_list: local_backup_list = f'{log_path}/job-list-{src_bucket}-{start_time}.json' with open(local_backup_list, 'w') as f: json.dump(job_list, f) logger.info( f'Finish writing: {os.path.abspath(local_backup_list)}') if ignore_records: local_ignore_records = f'{log_path}/ignore-records-{src_bucket}-{start_time}.json' with open(local_ignore_records, 'w') as f: json.dump(ignore_records, f) logger.info( f'Finish writing: {os.path.abspath(local_ignore_records)}') # Upload jobs to sqs if len(job_list) != 0: job_upload_sqs_ddb(sqs, sqs_queue, table, job_list) else: logger.info( 'Source list are all in Destination, no job to send.') else: logger.error( 'Job sqs queue is not empty or fail to get_queue_attributes. Stop process.' ) print('Completed and logged to file:', os.path.abspath(log_file_name))
JobsenderCompareVersionId=JobsenderCompareVersionId) # Generate job list job_list, ignore_records = delta_job_list( src_file_list=src_file_list, des_file_list=des_file_list, src_bucket=src_bucket, src_prefix=src_prefix, des_bucket=des_bucket, des_prefix=des_prefix, ignore_list=ignore_list, JobsenderCompareVersionId=JobsenderCompareVersionId) # Upload jobs to sqs if len(job_list) != 0: job_upload_sqs_ddb(sqs=sqs, sqs_queue=sqs_queue, job_list=job_list) max_object = max(job_list, key=itemgetter('Size')) MaxChunkSize = int(max_object['Size'] / 10000) + 1024 if MaxChunkSize < 5 * 1024 * 1024: MaxChunkSize = 5 * 1024 * 1024 logger.warning( f'Max object size in job_list: {max_object["Size"]}.\n Require instance memory' f' > MaxChunksize x MaxThread x MaxParallelFile, i.e. ' f'{MaxChunkSize} x {MaxThread} x {MaxParallelFile} = ' f'{MaxChunkSize*MaxThread*MaxParallelFile}.\n If less memory, instance may crash!' ) else: logger.info( 'Source list are all in Destination, no job to send.')