Пример #1
0
def run_manual_code(study_id):
    """
    Create an AWS Batch job for the Study specified
    :param study_id: Primary key of a Study
    """
    # we assume that the cluster is configured only in one region.
    pipeline_region = get_current_region()

    # Get the object ID of the study, used in the pipeline
    query = Study.objects.filter(pk=study_id)
    if not query.exists():
        return abort(404)
    object_id = query.get().object_id

    error_sentry = make_error_sentry("data",
                                     tags={"pipeline_frequency": "manually"})
    # Get new data access credentials for the manual user, submit a manual job, display message
    # Report all errors to sentry including DataPipelineNotConfigured errors.
    with error_sentry:
        ssm_client = get_boto_client('ssm', pipeline_region)
        refresh_data_access_credentials('manually',
                                        ssm_client=ssm_client,
                                        webserver=True)
        batch_client = get_boto_client('batch', pipeline_region)
        create_one_job('manually', object_id, batch_client, webserver=True)
        flash('Data pipeline code successfully initiated!', 'success')

    if error_sentry.errors:
        flash('An unknown error occurred when trying to run this task.',
              category='danger')
        print error_sentry

    return redirect('/data-pipeline/{:s}'.format(study_id))
def run_manual_code(study_id):
    """
    Create an AWS Batch job for the Study specified
    :param study_id: Primary key of a Study
    """

    # Get the object ID of the study, used in the pipeline
    object_id = Study.objects.get(pk=study_id).object_id
    error_sentry = make_error_sentry("data",
                                     tags={"pipeline_frequency": "manually"})

    with error_sentry:
        # Get new data access credentials for the manual user
        aws_object_names = get_aws_object_names()
        refresh_data_access_credentials('manually', aws_object_names)

        # Submit a manual job
        create_one_job('manually', object_id)

        # The success message gets displayed to the user upon redirect
        flash('Data pipeline code successfully initiated!', 'success')

    if error_sentry.errors:
        flash('An unknown error occurred when trying to run this task.',
              'danger')

    return redirect('/data-pipeline/{:s}'.format(study_id))
Пример #3
0
def run_manual_code(study_id):
    """
    Create an AWS Batch job for the Study specified
    :param study_id: ObjectId of a Study
    """

    # Get new data access credentials for the manual user
    aws_object_names = get_aws_object_names()
    refresh_data_access_credentials('manually', aws_object_names)

    # Submit a manual job
    create_one_job('manually', study_id)

    # The success message gets displayed to the user upon redirect
    flash('Data pipeline code successfully initiated!', 'success')

    return redirect('/data-pipeline/{:s}'.format(study_id))
def run_manual_code(study_id):
    """
    Create an AWS Batch job for the Study specified
    :param study_id: Primary key of a Study
    """

    username = session["admin_username"]

    destination_email_addresses_string = ''
    if 'destination_email_addresses' in request.values:
        destination_email_addresses_string = request.values[
            'destination_email_addresses']
        destination_email_addresses = [
            d.strip() for d in filter(
                None, re.split("[, \?:;]+",
                               destination_email_addresses_string))
        ]
        for email_address in destination_email_addresses:
            if not validate_email(email_address):
                flash(
                    'Email address {0} in ({1}) does not appear to be a valid email address.'
                    .format(email_address, destination_email_addresses_string),
                    category='danger')
                return redirect('/data-pipeline/{:s}'.format(study_id))
        destination_email_addresses_string = ','.join(
            destination_email_addresses)

    participants_string = ''
    if 'participants' in request.values:
        participants_string = request.form.getlist('participants')
        participants_string = ','.join(participants_string)

    data_start_time = ''
    if 'time_start' in request.values:
        data_start_time = request.values['time_start']

    data_end_time = ''
    if 'time_end' in request.values:
        data_end_time = request.values['time_end']

    # Get the object ID of the study, used in the pipeline
    query = Study.objects.filter(pk=study_id)
    if not query.exists():
        flash('Could not find study corresponding to study id {0}'.format(
            study_id),
              category='danger')
        return redirect('/data-pipeline/{:s}'.format(study_id))
        #return abort(404)
    object_id = query.get().object_id

    pipeline_region = os.getenv("pipeline_region", None)
    if not pipeline_region:
        pipeline_region = 'us-east-1'
        flash('Pipeline region not configured, choosing default ({})'.format(
            pipeline_region),
              category='info')
        # return redirect('/data-pipeline/{:s}'.format(study_id))

    error_sentry = make_error_sentry("data",
                                     tags={"pipeline_frequency": "manually"})
    # Get new data access credentials for the manual user, submit a manual job, display message
    # Report all errors to sentry including DataPipelineNotConfigured errors.
    with error_sentry:
        ssm_client = get_boto_client('ssm', pipeline_region)
        refresh_data_access_credentials('manually', ssm_client=ssm_client)
        batch_client = get_boto_client('batch', pipeline_region)
        create_one_job('manually', object_id, username,
                       destination_email_addresses_string, data_start_time,
                       data_end_time, participants_string, batch_client)

        if data_start_time and data_end_time:
            flash(
                'Data pipeline successfully initiated on data collected between {0} and {1}! Email(s) will be sent to {2} on completion.'
                .format(data_start_time, data_end_time,
                        destination_email_addresses), 'success')
        elif data_start_time:
            flash(
                'Data pipeline successfully initiated on data collected after {0}! Email(s) will be sent to {1} on completion.'
                .format(data_start_time,
                        destination_email_addresses), 'success')
        elif data_end_time:
            flash(
                'Data pipeline successfully initiated on data collected before {0}! Email(s) will be sent to {1} on completion.'
                .format(data_start_time,
                        destination_email_addresses), 'success')
        else:
            flash(
                'Data pipeline successfully initiated! Email(s) will be sent to {0} on completion.'
                .format(destination_email_addresses), 'success')

    if error_sentry.errors:
        flash('An error occurred when trying to execute the pipeline: {0}'.
              format(error_sentry),
              category='danger')
        print(error_sentry)

    return redirect('/data-pipeline/{:s}'.format(study_id))
Пример #5
0
from database.data_access_models import ChunkRegistry
from database.study_models import Study
from libs.sentry import make_error_sentry
from pipeline.boto_helpers import get_boto_client
from pipeline.configuration_getters import get_current_region
from pipeline.index import create_one_job, refresh_data_access_credentials

pipeline_region = get_current_region()
ssm_client = get_boto_client('ssm', pipeline_region)
error_sentry = make_error_sentry("data",
                                 tags={"pipeline_frequency": "manually"})
batch_client = get_boto_client('batch', pipeline_region)
yesterday = timezone.now() - timedelta(days=1)

refresh_data_access_credentials('manually',
                                ssm_client=ssm_client,
                                webserver=False)

################################################################################################
# if you are running this on an ubuntu machine you have to sudo apt-get -y install cloud-utils #
################################################################################################

for study in Study.objects.all():
    with error_sentry:
        # we only want to run the pipeline for data that has been uploaded, Report all errors to sentry
        for patient_id in ChunkRegistry.get_updated_users_for_study(
                study, yesterday):
            create_one_job('manually',
                           study,
                           patient_id,
                           batch_client,