Пример #1
0
def run_service(request, service_id):
    """Run service given by name.
    Use id of service to retrieve it from a database.

    args:
        service_id(str): id of service to run.
    """
    if request.method != "POST":
        return redirect(manage)

    results_messages = ""
    error_messages = ""

    # Get service information from service id.
    service = services.objects.get(service_id=service_id)

    # Pass in app name from apps.py.
    new_job = add_job(ServicesConfig.name)

    new_service_job = service_jobs.objects.create(job_id=new_job,
                                                  service_id=service)
    new_service_job.save()

    results_messages = ["Added job successfully."]

    return redirect(job_status)
Пример #2
0
def add_pathauto_job(request, source_job_id):
    new_job = add_job(SwamplrIngestConfig.name, job_type_label="pathauto")
    ingest_job = ingest_jobs.objects.get(job_id=source_job_id)

    pathauto_jobs.objects.create(
        job_id=new_job,
        source_job=ingest_job,
    )
    return redirect(job_status)
Пример #3
0
def add_delete_job(request, source_job_id):
    """Find all newly created objects associated with a given job."""
    new_job = add_job(SwamplrIngestConfig.name, job_type_label="delete")
    ingest_job = ingest_jobs.objects.get(job_id=source_job_id)

    delete_jobs.objects.create(
        job_id=new_job,
        source_job=ingest_job,
    )
    return redirect(job_status)
Пример #4
0
def pre_process():
    """Determine if there are any services that are scheduled to be run now.
        Also, check if there are any jobs to archive based on the auto_archive time field.
    """
    # Get the Jobs that are scheduled to be run
    # Based both on frequency/last_started as well as if they have
    # active jobs running for that service
    pending_jobs = services.objects.raw("""
        SELECT DISTINCT s.* FROM swamplr_services_services s
        LEFT JOIN swamplr_services_service_jobs sj ON (s.service_id = sj.service_id_id)
        LEFT JOIN swamplr_jobs_jobs j ON (sj.job_id_id = j.job_id)
        LEFT JOIN swamplr_jobs_status t ON (t.status_id = j.status_id_id AND t.running='y')
        WHERE s.frequency IS NOT NULL
        AND ((NOW() >= DATE_ADD(s.last_started, INTERVAL s.frequency MINUTE) 
              AND t.status_id IS NULL)|| s.last_started IS NULL)
        AND s.archived != 'y'
    """)

    # Add new jobs for all services found
    logging.info(
        "swamplr_services: Found {0} services that are scheduled to run.".
        format(len(list(pending_jobs))))
    for job in pending_jobs:
        logging.info(
            "swamplr_service: Creating new job for service_id {0}".format(
                job.service_id))
        # Get service information from service id.
        service = services.objects.get(service_id=job.service_id)

        # Pass in app name from apps.py.
        new_job = add_job(ServicesConfig.name)

        new_service_job = service_jobs.objects.create(job_id=new_job,
                                                      service_id=job)
        new_service_job.save()

    pending_archive = service_jobs.objects.raw("""
        SELECT DISTINCT sj.* FROM swamplr_services_services s
        LEFT JOIN swamplr_services_service_jobs sj ON (s.service_id = sj.service_id_id)
        LEFT JOIN swamplr_jobs_jobs j ON (j.job_id = sj.job_id_id)
        LEFT JOIN swamplr_jobs_status t ON (t.status_id = j.status_id_id AND t.success='y')
        WHERE s.auto_archive IS NOT NULL AND s.auto_archive > 0
        AND j.archived != 'y'
        AND DATE_ADD(j.completed, INTERVAL s.auto_archive MINUTE) <= NOW()
        AND t.success='y'
    """)

    # Archive each job
    logging.info(
        "swamplr_services: Found {0} service jobs that can be archived.".
        format(len(list(pending_archive))))
    for job in pending_archive:
        logging.info("swamplr_service: Archiving {0}".format(job.job_id_id))
        jobs.objects.filter(job_id=job.job_id_id).update(archived='y')
Пример #5
0
def delete(request, ns):
    """Add delete job to queue."""
    logging.info("Adding delete job for namespace: {0}".format(ns))

    new_job = add_job(SwamplrNamespacesConfig.name)
    ns_operation = namespace_operations.objects.get(operation_name="Delete")

    namespace_jobs.objects.create(
        job_id=new_job,
        namespace=ns,
        operation_id=ns_operation
    )

    return redirect(job_status)
Пример #6
0
def reindex(request, ns):
    if ns == "all":
        logging.info("Adding reindex job for all items.")
        ns = "[all items]"
    else:
        logging.info("Adding reindex job for namespace: {0}".format(ns))
    new_job = add_job(SwamplrNamespacesConfig.name)
    ns_operation = namespace_operations.objects.get(operation_name="Reindex")

    namespace_jobs.objects.create(
        job_id=new_job,
        namespace=ns,
        operation_id=ns_operation
    )

    return redirect(job_status)
Пример #7
0
def add_id_job(ns, id_type):
    """Add job to queue."""
    logging.info("Adding {0} job for namespace: {1}".format(id_type, ns))

    new_job = add_job(SwamplrNamespacesConfig.name)
    if id_type.lower() == "doi":
        ns_operation = namespace_operations.objects.get(operation_name="Mint DOI")
    elif id_type.lower() == "ark":
        ns_operation = namespace_operations.objects.get(operation_name="Mint ARK")
    elif id_type.lower() == "pathauto":
        ns_operation = namespace_operations.objects.get(operation_name="Pathauto")

    namespace_jobs.objects.create(
        job_id=new_job,
        namespace=ns,
        operation_id=ns_operation
    )
    return redirect(job_status)
Пример #8
0
def add_derivatives_job(request, item_type):
    """Create a new job for the derivative form that was submitted"""

    response = {
        "error_messages": [],
        "result_messages": [],
        "base_dir": settings.DATA_PATHS,
    }

    #derive_options = get_derivative_options("source." + item_type.lower())

    form = DerivativesForm(request.POST)
    form.set_fields(item_type)

    if form.is_valid():
        clean = form.cleaned_data

        object_derivatives = clean["derive_types"]
        replace = "y" if clean["replace_on_duplicate"] else ""
        subset = int(clean["subset_value"]) if clean["subset_value"] else 0

        new_job = add_job(SwamplrDerivativesConfig.name)
        derivative_job = derivative_jobs.objects.create(
            job_id=new_job,
            source_dir=clean["path_list_selected"],
            replace_on_duplicate=replace,
            subset=subset,
            source_file_extension=item_type,
            brightness=clean["brightness_value"],
            contrast=clean["contrast_value"])

        for d in object_derivatives:
            new_derive = job_derivatives.objects.create(
                derive_id=derivative_job,
                derive_type=d,
                parameters=""  # TODO -- populate
            )

    else:
        message = ["Unable to add job: Missing or invalid form data."]
        return run_derivatives(request, item_type, message=message)

    return redirect(job_status)
Пример #9
0
def add_ingest_job(request, collection_name):

    response = {
        "error_messages": [],
        "result_messages": [],
        "base_dir": settings.DATA_PATHS,
    }

    collection_data = get_ingest_data(collection_name)

    form = IngestForm(request.POST)
    form.set_fields(collection_name)

    if form.is_valid():

        clean = form.cleaned_data

        object_datastreams = get_all_datastreams(collection_data, clean)
        metadata_datastreams = get_all_metadata(collection_data, clean)
        all_datastreams = object_datastreams + metadata_datastreams

        # if at least one datastream for the object type (otype) was selected,
        # make sure DC was added as a job_datastream
        counts = {}
        for ds, otype, dtype in all_datastreams:
            if otype in counts:
                counts[otype]["cnt"] = counts[otype]["cnt"] + 1
            else:
                counts[otype] = {}
                counts[otype]["cnt"] = 0
            if ds == "DC":
                counts[otype]["has_dc"] = True

        for obj_type, obj_data in counts.items():
            if "has_dc" not in obj_data:
                all_datastreams.append(("DC", obj_type, "metadata"))

        process_new = "y" if "process_new" in clean["process"] else ""
        process_existing = "y" if "process_existing" in clean["process"] else ""
        replace_on_duplicate = "y" if clean["replace_on_duplicate"] else ""
        subset = int(clean["subset_value"]) if clean["subset_value"] else 0

        new_job = add_job(SwamplrIngestConfig.name, job_type_label="ingest")
        ingest_job = ingest_jobs.objects.create(
            job_id=new_job,
            source_dir=clean["path_list_selected"],
            replace_on_duplicate=replace_on_duplicate,
            collection_name=collection_name,
            namespace=clean["pid_namespace"],
            process_new=process_new,
            process_existing=process_existing,
            subset=subset,
        )
        ds_options = datastreams.objects.all()
        existing_ds = [ds.datastream_label for ds in ds_options]
        added = []
        for ds, otype, dtype in all_datastreams:
            if ds not in existing_ds and ds not in added:
                is_object = "y" if dtype == "datastreams" else ""
                new_ds = datastreams.objects.create(
                    datastream_label=ds,
                    is_object=is_object,
                )
                added.append(ds)

            job_datastream = job_datastreams.objects.create(
                ingest_id=ingest_job,
                object_type=otype,
                datastream_id=datastreams.objects.get(datastream_label=ds),
            )

    else:

        message = ["Unable to add job: Missing or invalid form data."]
        return run_ingest(request, collection_name, message=message)

    return redirect(job_status)