Пример #1
0
def count_job_duplicates(options: argparse.Namespace) -> None:
    jobs = bert_utils.scan_jobs(options.module_name)
    jobs = bert_utils.map_jobs(jobs, options.module_name)
    client = bert_datasource.RedisConnection.ParseURL(
        bert_constants.REDIS_URL).client
    for job_name, conf in jobs.items():
        job = conf['job']
        work_queue, done_queue, ologger = bert_utils.comm_binders(job)
        step = 200
        offset = 0
        # No adjustment
        total = client.llen(work_queue._table_name)
        entry_hashes = []
        while len(entry_hashes) < total:
            next_items = client.lrange(work_queue._table_name, offset,
                                       offset + total)
            entry_hashes.extend([
                hashlib.sha256(entry).hexdigest() for entry in client.lrange(
                    work_queue._table_name, offset, offset + total)
            ])

        unique_hash_count = len([entry for entry in set(entry_hashes)])
        logger.info(
            f'Duplicate Entries for Job[{job_name}]: {len(entry_hashes) - unique_hash_count}; total: {len(entry_hashes)}'
        )
Пример #2
0
def start_service(options: argparse.Namespace) -> None:
    if options.flush_db:
        bert_utils.flush_db()

    jobs = bert_utils.scan_jobs(options.module_name)
    jobs = bert_utils.map_jobs(jobs, options.module_name)

    signal.signal(signal.SIGINT, handle_signal)
    from bert.runner import manager
    manager.run_jobs(options, jobs)
Пример #3
0
def scan_job_spaces(options: argparse.Namespace):
    jobs = bert_utils.scan_jobs(options.module_name)
    jobs = bert_utils.map_jobs(jobs, options.module_name)
    client = bert_datasource.RedisConnection.ParseURL(
        bert_constants.REDIS_URL).client
    for job_name, conf in jobs.items():
        job = conf['job']
        work_queue, done_queue, ologger = bert_utils.comm_binders(job)
        total = client.llen(work_queue._table_name)
        logger.info(f'Work Total for Job[{job_name}]: {total}')
Пример #4
0
def print_job_spaces(options: argparse.Namespace):
    jobs = bert_utils.scan_jobs(options.module_name)
    jobs = bert_utils.map_jobs(jobs, options.module_name)
    for job_name, conf in jobs.items():
        job = conf['job']
        work_queue, done_queue, ologger = bert_utils.comm_binders(job)
        logging.info(
            f'Work Table Space[{work_queue._table_name}] for Job[{job_name}]')

    else:
        logging.info(
            f'Done Table Space[{work_queue._table_name}] for Job[{job_name}]')
Пример #5
0
def start_service(options: argparse.Namespace) -> None:
    if options.flush_db:
        bert_utils.flush_db()

    setup(options)
    jobs = bert_utils.scan_jobs(options)
    if options.web_service:
        start_webservice(options)

    elif options.web_service_daemon:
        start_daemon(options)

    else:
        raise NotImplementedError('Unable to start service')
Пример #6
0
def test_cognito_event(options: argparse.Namespace) -> None:
    triggers = ','.join([member.value for member in CognitoTrigger])
    if options.cognito_trigger is None:
        raise Exception(f'Cognito Trigger type required: {triggers}')

    if options.flush_db:
        bert_utils.flush_db()

    jobs = bert_utils.scan_jobs(options.module_name)
    jobs = bert_utils.map_jobs(jobs, options.module_name)

    signal.signal(signal.SIGINT, handle_signal)
    from bert.runner import manager
    manager.run_jobs(options, jobs)
Пример #7
0
def deploy_service(options) -> None:
    jobs: typing.Dict[str,
                      typing.Any] = bert_utils.scan_jobs(options.module_name)
    jobs: typing.Dict[str, typing.Any] = bert_utils.map_jobs(
        jobs, options.module_name)

    if options.service == Service.AWSLambda:
        if options.invoke:
            import boto3
            job_name: str = [job for job in jobs.keys()][0]
            invoke_args: typing.List[typing.Dict[str, typing.Any]] = {
                key: value
                for key, value in jobs.items()
            }[job_name]['aws-deploy']['invoke-args']
            client = boto3.client('lambda')
            if len(invoke_args) < 1:
                logger.info(f'Invoking Job[{job_name}]')
                client.invoke(FunctionName=job_name, InvocationType='Event')

            else:
                logger.info(
                    f'Invoking Job[{job_name}] with {len(invoke_args)} payloads.'
                )
                if options.invoke_async:
                    for args in invoke_args:
                        payload: bytes = json.dumps({
                            'bert-inputs': [args]
                        }).encode(bert_constants.ENCODING)
                        client.invoke(FunctionName=job_name,
                                      InvocationType='Event',
                                      Payload=payload)
                else:
                    payload: bytes = json.dumps({
                        'bert-inputs': invoke_args
                    }).encode(bert_constants.ENCODING)
                    client.invoke(FunctionName=job_name,
                                  InvocationType='Event',
                                  Payload=payload)

            import sys
            sys.exit(0)

        if options.run_monitor:
            from bert.deploy import reporting
            logger.info(f'Running monitor function locally')
            reporting.monitor_function_progress(options.module_name)
            import sys
            sys.exit(0)

        bert_deploy_utils.validate_inputs(jobs)
        bert_deploy_utils.build_project(jobs)
        bert_deploy_utils.build_lambda_handlers(jobs)
        bert_deploy_utils.build_archives(jobs)
        bert_deploy_utils.create_roles(jobs)
        bert_deploy_utils.scan_dynamodb_tables(jobs)
        bert_deploy_utils.scan_cognito_integrations(jobs)
        bert_deploy_utils.destroy_cognito_integrations(jobs)
        bert_deploy_utils.destroy_lambda_to_table_bindings(jobs)
        bert_deploy_utils.destroy_lambda_concurrency(jobs)
        bert_deploy_utils.destroy_sns_topic_lambdas(jobs)
        if options.rebuild_api_gateway:
            bert_deploy_utils.destroy_api_endpoints(jobs)

        bert_deploy_utils.destroy_lambdas(jobs)
        if options.flush:
            bert_deploy_utils.destroy_dynamodb_tables(jobs)

        bert_deploy_utils.create_lambdas(jobs)
        bert_deploy_utils.create_lambda_concurrency(jobs)
        bert_deploy_utils.create_dynamodb_tables(jobs)
        bert_deploy_utils.create_reporting_dynamodb_table()
        bert_deploy_utils.bind_lambdas_to_tables(jobs)
        bert_deploy_utils.bind_events_for_bottle_functions(jobs)
        bert_deploy_utils.bind_events_for_init_function(jobs)
        bert_deploy_utils.create_api_endpoints(jobs)
        bert_deploy_utils.create_cognito_integrations(jobs)
        bert_deploy_utils.destroy_monitor()
        # bert_deploy_utils.deploy_monitor(options.module_name)

    else:
        raise NotImplementedError(options.service)
Пример #8
0
def monitor_function_progress(module_name: str = None) -> None:
    import boto3
    from bert import \
            utils as bert_utils, \
            constants as bert_constants

    if module_name is None:
        logger.info(f'Using Bert Module Name[{MONITOR_NAME}]')
        module_name = os.environ.get('BERT_MODULE_NAME', None)
        if module_name is None:
            raise NotImplementedError

    dynamodb_client = boto3.client('dynamodb')
    lambda_client = boto3.client('lambda')
    time_offset: int = 15
    logger.info('Running Monitor function')
    jobs: typing.Dict[str, typing.Any] = bert_utils.scan_jobs(module_name)
    jobs: typing.Dict[str, typing.Any] = bert_utils.map_jobs(jobs, module_name)
    logger.info(f"Jobs Found[{','.join(jobs.keys())}]")
    for job_name, conf in jobs.items():
        logger.info(f'Processing Job[{job_name}]')
        scan_filter = {
            'job_name': {
                'AttributeValueList': [{
                    'S': job_name
                }],
                'ComparisonOperator': 'EQ'
            }
        }
        # Clean out the reporting table of all stale entries. Throw an error in dynamodb_client with expected, if this doesn't work as expected.
        procd_jobs: typing.List[typing.Dict[str, typing.Any]] = []
        for page in dynamodb_client.get_paginator('scan').paginate(
                ConsistentRead=True, TableName=TABLE_NAME,
                ScanFilter=scan_filter):
            for item in page['Items']:
                created: datetime = datetime.strptime(
                    item['created']['S'], bert_constants.REPORTING_TIME_FORMAT)
                if created < datetime.utcnow() - timedelta(
                        minutes=time_offset):
                    dynamodb_client.delete_item(
                        TableName=TABLE_NAME,
                        Key={'identity': {
                            'S': item['identity']['S']
                        }},
                        Expected={
                            'identity': {
                                'Exists': True,
                                'Value': {
                                    'S': item['identity']['S']
                                }
                            }
                        })
                else:
                    procd_jobs.append({'created': created, 'item': item})

        logger.info(
            f"Proc'd jobs found[{len(procd_jobs)}] for Job[{job_name}]")
        monitor_items = dynamodb_client.scan(ConsistentRead=True,
                                             TableName=TABLE_NAME,
                                             ScanFilter=scan_filter,
                                             Limit=1)['Items']
        logger.info(
            f"Monitor items found[{len(monitor_items)}] for Job[{job_name}]")
        if len(monitor_items) == 0:
            try:
                work_items = dynamodb_client.scan(
                    ConsistentRead=True,
                    TableName=conf['spaces']['work-key'],
                    Limit=1)['Items']
            except dynamodb_client.exceptions.ResourceNotFoundException:
                work_items = []

            logger.info(
                f"Work items found[{len(work_items)}] for Job[{job_name}]")
            if len(work_items) > 0:
                try:
                    lambda_client.get_function(FunctionName=job_name)
                except lambda_client.exceptions.ResourceNotFoundException:
                    logger.info(f"Job[{job_name}] Lambda doesn't exist")
                else:
                    logger.info(f"Restarting Job[{job_name}]")

                    if conf['spaces'][
                            'pipeline-type'] == bert_constants.PipelineType.CONCURRENT:
                        for idx in range(
                                0, (len(procd_jobs) -
                                    conf['spaces']['min_proced_items']) * -1):
                            lambda_client.invoke(FunctionName=job_name,
                                                 InvocationType='Event',
                                                 Payload=b'{}')

                    elif conf['spaces'][
                            'pipeline-type'] == bert_constants.PipelineType.BOTTLE:
                        if len(procd_jobs) == 0:
                            lambda_client.invoke(FunctionName=job_name,
                                                 InvocationType='Event',
                                                 Payload=b'{}')

                    else:
                        raise NotImplementedError