示例#1
0
def celery_nlp_new_collection(request):
    """
    Create a new collection (get documents via Solr requests) and start NLP.

    @param request:
    @return:
    """
    if request.method == 'POST':
        # template = loader.get_template('datamining/start.html')
        # context = RequestContext(request, {
        #
        # })
        try:
            logger.debug('Received POST: ', request.POST)
            # print request.POST
            package_id = request.POST['package_id']
            content_type = request.POST['content_type']
            # additional_and = request.POST['additional_and']
            # additional_and_not = request.POST['additional_and_not']
            tar_path = request.POST['tar_path']

            # build the query
            # solr_query = build_query(package_id, content_type, additional_and, additional_and_not)
            solr_query = build_query(package_id, content_type)
            # print solr_query
            logger.debug('Solr query: %s' % solr_query)

            ner_model = request.POST['ner_model']
            # category_model = request.POST['category_model']

            datamining_main = DMMainTask()
            taskid = uuid.uuid4().__str__()
            details = {
                'solr_query': solr_query,
                'ner_model': ner_model,
                # 'category_model': category_model,
                'tar_path': tar_path
            }
            print details
            # use kwargs, those can be seen in Celery Flower
            t_context = DefaultTaskContext('', '', 'workers.tasks.DMMainTask',
                                           None, '', None)
            datamining_main.apply_async((t_context, ),
                                        kwargs=details,
                                        queue='default',
                                        task_id=taskid)

        except Exception, e:
            logger.debug(e.message)
            # display error message to user
            template = loader.get_template('datamining/feedback.html')
            context = RequestContext(
                request, {'status': 'An error occurred: %s' % e.message})
            return HttpResponse(template.render(context))

        template = loader.get_template('datamining/feedback.html')
        context = RequestContext(
            request, {'status': 'NLP processing has been initiated.'})
        return HttpResponse(template.render(context))
示例#2
0
def initialize_dip(dip_creation_process_name):
    dip = DIP.objects.create(name=dip_creation_process_name)
    uuid = getUniqueID()
    wf = WorkflowModules.objects.get(identifier=AIPtoDIPReset.__name__)
    InformationPackage.objects.create(path=os.path.join(
        config_path_work, uuid),
                                      uuid=uuid,
                                      statusprocess=0,
                                      packagename=dip_creation_process_name,
                                      last_task=wf)
    work_dir = "%s/%s" % (config_path_work, uuid)
    task_context = DefaultTaskContext(
        uuid, work_dir, 'AIPtoDIPReset', None,
        {'packagename': dip_creation_process_name}, None)
    AIPtoDIPReset().apply((task_context, ), queue='default')
示例#3
0
def celery_nlp_existing_collection(request):
    """
    Use an existing document collection to perform NLP on.

    @param request:
    @return:
    """
    if request.method == 'POST':
        # template = loader.get_template('datamining/start.html')
        # context = RequestContext(request, {
        #
        # })
        try:
            logger.debug('Received POST: ', request.POST)
            # print request.POST

            tar_path = request.POST['tar_path']
            ner_model = request.POST['ner_model']
            # category_model = request.POST['category_model']

            datamining_main = DMMainTask()
            taskid = uuid.uuid4().__str__()
            details = {
                'ner_model': ner_model,
                # 'category_model': category_model,
                'tar_path': tar_path
            }
            print details
            # use kwargs, those can be seen in Celery Flower
            t_context = DefaultTaskContext('', '', 'workers.tasks.DMMainTask',
                                           None, '', None)
            datamining_main.apply_async((t_context, ),
                                        kwargs=details,
                                        queue='default',
                                        task_id=taskid)

        except Exception, e:
            logger.debug(e.message)
            # display error message to user
            template = loader.get_template('datamining/feedback.html')
            context = RequestContext(
                request, {'status': 'An error occurred: %s' % e.message})
            return HttpResponse(template.render(context))

        template = loader.get_template('datamining/feedback.html')
        context = RequestContext(
            request, {'status': 'NLP processing has been initiated.'})
        return HttpResponse(template.render(context))
示例#4
0
def execute_chain(request):
    """
    Execute selected task using selected information package as input. Task modules are registered in WorkflowModules.
    The identifier of a workflow module corresponds with the task's class name. The task is executed using celery's
    'apply_async' method.

    @type request: django.core.handlers.wsgi.WSGIRequest
    @param request: Request

    @rtype: django.http.JsonResponse
    @return: JSON response (task execution metadata)
    """
    data = {"success": False, "errmsg": "Unknown error"}
    try:
        selected_ip = request.POST['selected_ip']
        selected_actions = request.POST['selected_actions']
        if not (selected_ip and selected_actions):
            return JsonResponse({
                "success": False,
                "errmsg": "Missing input parameter!"
            })

        actions = selected_actions.split("+")
        print actions

        data = {"success": True, "id": "jobid", "msg": selected_actions}
        # Get module description of the task to be executed from the database
        ip = InformationPackage.objects.get(pk=selected_ip)

        try:
            action_classes = []
            task_chain = []
            for act in actions:
                wfm = WorkflowModules.objects.get(pk=act)
                taskClass = getattr(tasks, wfm.identifier)
                print "Executing task %s" % taskClass.name
                action_classes.append(taskClass)
                task_chain.append(taskClass().s())

            task_chain[0] = action_classes[0]().s(
                DefaultTaskContext(ip.uuid, ip.path, "", "", {}))

            job = chain(task_chain).apply_async()

            data = {"success": True, "id": job.id}

            # SIPResetType = getattr(tasks, "SIPtoAIPReset")
            # sipresettask = SIPResetType()
            # SIPDeliveryValidationType = getattr(tasks, "SIPDeliveryValidation")
            # sipdeliveryvalidationtask = SIPDeliveryValidationType()
            # job = chain(
            #     sipresettask.s(DefaultTaskContext(ip.uuid, ip.path, "", "")),
            #     sipdeliveryvalidationtask.s()
            # ).apply_async();
            # data = {"success": True, "id": job.id}

            # action_classes = []
            # for act in actions:
            #     wfm = WorkflowModules.objects.get(pk=act)
            #     taskClass = getattr(tasks, wfm.identifier)
            #     print "Executing task %s" % taskClass.name
            #     # additional input parameters for the task can be passed through using the 'additional_params' dictionary.
            #     additional_data = {'packagename': ip.packagename }
            #     if wfm.identifier == SIPPackaging.__name__:
            #         additional_data['packagename'] = ip.packagename
            #     if wfm.identifier == AIPPackaging.__name__ or wfm.identifier == LilyHDFSUpload.__name__:
            #         additional_data['identifier'] = ip.identifier
            #     if wfm.identifier == AIPStore.__name__:
            #         additional_data['identifier'] = ip.identifier
            #         additional_data['storage_dest'] = config_path_storage
            #         print "Storage destination %s" % addisuccesstional_data['storage_dest']
            #     if wfm.identifier == DIPAcquireAIPs.__name__ or wfm.identifier == DIPExtractAIPs.__name__:
            #         dip = DIP.objects.get(name=ip.packagename)
            #         selected_aips = {}
            #         for aip in dip.aips.all():
            #             selected_aips[aip.identifier] = aip.source
            #         additional_data['selected_aips'] = selected_aips
            #     if wfm.identifier == AIPPackageMetsCreation.__name__:
            #         additional_data['identifier'] = ip.identifier
            #
            #
            #
            #     mul.s.apply_async((10,), queue='default')
            #
            #
            #     (AIPPackaging(ip.uuid, ip.path, additional_data), DIPAcquireAIPs(ip.uuid, ip.path, additional_data)).apply_async(queue='default')
            #
            #     # Execute task
            #     job = taskClass().apply_async((ip.uuid, isuccessp.path, additional_data,), queue='default')
            #     data = {"success": True, "id": job.id}

        except Exception, err:
            tb = traceback.format_exc()
            print str(tb)
            return JsonResponse({
                "success": False,
                "errmsg": "Workflow module not found"
            })

#         # Get the selected information package from the database
#         ip = InformationPackage.objects.get(pk=selected_ip)
#         if request.is_ajax():
#             try:
#                 # Get task class from module identifier
#                 taskClass = getattr(tasks, wfm.identifier)
#                 print "Executing task %s" % taskClass.name
#                 # additional input parameters for the task can be passed through using the 'additional_params' dictionary.
#                 additional_data = {'packagename': ip.packagename }
#                 if wfm.identifier == SIPPackaging.__name__:
#                     additional_data['packagename'] = ip.packagename
#                 if wfm.identifier == AIPPackaging.__name__ or wfm.identifier == LilyHDFSUpload.__name__:
#                     additional_data['identifier'] = ip.identifier
#                 if wfm.identifier == AIPStore.__name__:
#                     additional_data['identifier'] = ip.identifier
#                     additional_data['storage_dest'] = config_path_storage
#                     print "Storage destination %s" % additional_data['storage_dest']
#                 if wfm.identifier == DIPAcquireAIPs.__name__ or wfm.identifier == DIPExtractAIPs.__name__:
#                     dip = DIP.objects.get(name=ip.packagename)
#                     selected_aips = {}
#                     for aip in dip.aips.all():
#                         selected_aips[aip.identifier] = aip.source
#                     additional_data['selected_aips'] = selected_aips
#                 if wfm.identifier == AIPPackageMetsCreation.__name__:
#                     additional_data['identifier'] = ip.identifier
#
#                 # Execute task
#                 job = taskClass().apply_async((ip.uuid, ip.path, additional_data,), queue='default')
#                 data = {"success": True, "id": job.id}
#             except AttributeError, err:
#                 errdetail = """The workflow module '%s' does not exist.
# It might be necessary to run 'python ./workers/scantasks.py' to register new or renamed tasks.""" % wfm.identifier
#                 data = {"success": False, "errmsg": "Workflow module '%s' does not exist" % wfm.identifier, "errdetail": errdetail}
#         else:
#             data = {"success": False, "errmsg": "not ajax"}
    except Exception, err:
        tb = traceback.format_exc()
        logging.error(str(tb))
        data = {"success": False, "errmsg": err.message, "errdetail": str(tb)}
        return JsonResponse(data)
示例#5
0
def apply_task(request):
    """
    Execute selected task using selected information package as input. Task modules are registered in WorkflowModules.
    The identifier of a workflow module corresponds with the task's class name. The task is executed using celery's
    'apply_async' method.
    @type request: django.core.handlers.wsgi.WSGIRequest
    @param request: Request
    @rtype: django.http.JsonResponse
    @return: JSON response (task execution metadata)
    """
    data = {"success": False, "errmsg": "Unknown error"}
    try:
        selected_ip = request.POST['selected_ip']
        selected_action = request.POST['selected_action']
        if not (selected_ip and selected_action):
            return JsonResponse({
                "success": False,
                "errmsg": "Missing input parameter!"
            })
        # Get module description of the task to be executed from the database
        wfm = WorkflowModules.objects.get(pk=selected_action)
        logging.debug(selected_action)
        # Get the selected information package from the database
        ip = InformationPackage.objects.get(pk=selected_ip)
        if request.is_ajax():
            try:
                # Get task class from module identifier
                taskClass = getattr(tasks, wfm.identifier)
                print "Executing task %s" % taskClass.name
                # additional input parameters for the task can be passed through using the 'additional_params' dictionary.
                # IMPORTANT: if you want to use any of these parameters in the finalize() function, the task MUST return:
                # return task_context.additional_input

                #deserialize ip.additional_data json string from db
                import json
                additional_data = json.loads(ip.additional_data)

                additional_data['packagename'] = ip.packagename
                if ip.identifier != "":
                    additional_data['identifier'] = ip.identifier

                if wfm.identifier == AIPStore.__name__ or wfm.identifier == AIPIndexing.__name__:
                    additional_data['storage_dest'] = config_path_storage
                    if ip.storage_loc and ip.storage_loc != '':
                        additional_data['identifier'] = ip.identifier
                    print "Storage destination %s" % additional_data[
                        'storage_dest']

                if wfm.identifier in [
                        DIPAcquireAIPs.__name__,
                        DIPAcquireDependentAIPs.__name__,
                        DIPExtractAIPs.__name__
                ]:
                    dip = DIP.objects.get(name=ip.packagename)
                    selected_aips = {}
                    for aip in dip.aips.all():
                        selected_aips[aip.identifier] = aip.source
                    additional_data['selected_aips'] = selected_aips
                    additional_data['storage_dest'] = config_path_storage

                # on reset, the identifier is removed from context and from the information package record
                if wfm.identifier == SIPtoAIPReset.__name__ or wfm.identifier == AIPtoDIPReset.__name__:
                    additional_data['identifier'] = ''
                    ip.identifier = ''

                if wfm.identifier == AIPPackageMetsCreation.__name__:
                    additional_data['parent_id'] = ip.parent_identifier

                if wfm.identifier == AIPStore.__name__:
                    additional_data['parent_id'] = ip.parent_identifier
                    # the UUID tells us in which folder the parent AIPs' Mets file is located - only in the dev
                    # version of course, probably doesnt work in distributed storage
                    if len(additional_data['parent_id']) > 0:
                        additional_data[
                            'parent_path'] = InformationPackage.objects.get(
                                identifier=ip.parent_identifier)
                    else:
                        additional_data['parent_path'] = ''

                if wfm.identifier == LilyHDFSUpload.__name__:
                    additional_data['storage_loc'] = ip.storage_loc

                # Execute task
                task_context = DefaultTaskContext(ip.uuid, ip.path,
                                                  taskClass.name, None,
                                                  additional_data, None)
                job = taskClass().apply_async((task_context, ),
                                              queue='default')
                data = {"success": True, "id": job.id}

                # persist changes to information package object
                ip.save()
            except AttributeError, err:
                errdetail = """The workflow module '%s' does not exist.
It might be necessary to run 'python ./workers/scantasks.py' to register new or renamed tasks.""" % wfm.identifier
                data = {
                    "success": False,
                    "errmsg":
                    "Workflow module '%s' does not exist" % wfm.identifier,
                    "errdetail": errdetail
                }
        else: