Exemplo n.º 1
0
def testprojection(result, toolkit):
    # submit test_job into queue
    member = api.user.get_current()
    params = {
        'result': {
            'results_dir': tempfile.mkdtemp(),
            'outputs': {
                'files': {
                    'proj_*.tif': {
                        'title': 'Future Projection',
                        'genre': 'DataGenreFP',
                        'mimetype': 'image/geotiff',
                    }
                }
            }
        },
    }
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname'),
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }
    # TODO: create chain with import task?
    # ...  check what happens if task fails (e.g. remove 'experiment' in context)
    after_commit_task(testjob, params, context)
Exemplo n.º 2
0
def testprojection(result, toolkit):
    # submit test_job into queue
    member = api.user.get_current()
    params = {
        'result': {
            'results_dir': tempfile.mkdtemp(),
            'outputs': {
                'files': {
                    'proj_*.tif': {
                        'title': 'Future Projection',
                        'genre': 'DataGenreFP',
                        'mimetype': 'image/geotiff',
                    }
                }
            }
        },
    }
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname'),
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }
    # TODO: create chain with import task?
    # ...  check what happens if task fails (e.g. remove 'experiment' in context)
    after_commit_task(testjob, params, context)
Exemplo n.º 3
0
def execute(result, func):
    """
    This function takes an experiment and executes.

    It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute
    experiments.

    After the execution finishes the output files will be attached to the
    experiment.

    :param experiment: The experiment holding the configuration and receiving
                       the results
    :type experiment: org.bccvl.site.content.IExperiment


    """
    params = get_project_params(result)
    script = generate_project_script()
    ### plone context for this job
    member = api.user.get_current()
    context = {
        "context": "/".join(result.getPhysicalPath()),
        "user": {
            "id": member.getUserName(),
            "email": member.getProperty("email"),
            "fullname": member.getProperty("fullname"),
        },
        "experiment": {"title": result.__parent__.title, "url": result.__parent__.absolute_url()},
    }
    ### add result infos
    params["result"] = {"results_dir": "scp://[email protected]" + tempfile.mkdtemp(), "outputs": OUTPUTS}
    params["worker"]["script"] = {"name": "projection.R", "script": script}
    # set debug flag
    params["worker"]["zipworkenv"] = api.env.debug_mode()
    after_commit_task(r_task, params, context)
Exemplo n.º 4
0
def testtraits(result, toolkit):
    # submit test_job into queue
    member = api.user.get_current()
    params = {
        'result': {
            'results_dir': tempfile.mkdtemp(),
            'outputs': {
                'files': {
                    "*.RData": {
                        "title": "R Species Traits Model object",
                        "genre": "DataGenreSTModel",
                        "mimetype": "application/x-r-data"
                    }
                }
            }
        },
    }
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname'),
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }
    # TODO: create chain with import task?
    # ...  check what happens if task fails (e.g. remove 'experiment' in context)
    after_commit_task(testjob, params, context)
Exemplo n.º 5
0
def testtraits(result, toolkit):
    # submit test_job into queue
    member = api.user.get_current()
    params = {
        'result': {
            'results_dir': tempfile.mkdtemp(),
            'outputs': {
                'files': {
                    "*.RData": {
                        "title": "R Species Traits Model object",
                        "genre": "DataGenreSTModel",
                        "mimetype": "application/x-r-data"
                    }
                }
            }
        },
    }
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname'),
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }
    # TODO: create chain with import task?
    # ...  check what happens if task fails (e.g. remove 'experiment' in context)
    after_commit_task(testjob, params, context)
Exemplo n.º 6
0
def execute(result, toolkit):
    """
    This function takes an experiment and executes.

    It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute
    experiments.

    After the execution finishes the output files will be attached to the
    experiment.

    :param experiment: The experiment holding the configuration and receiving
                       the results
    :type experiment: org.bccvl.site.content.IExperiment


    """
    try:
        OUTPUTS = json.loads(toolkit.output)
    except (ValueError, TypeError) as e:
        LOG.fatal("couldn't load OUTPUT from toolkit %s: %s",
                  toolkit.getId(), e)
        OUTPUTS = {}
    params = get_traits_params(result)
    script = generate_traits_script(toolkit.script)
    # plone context for this job
    member = api.user.get_current()
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname')
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }

    # TODO: quick fix Decimal json encoding through celery (where is my custom
    # json encoder gone?)
    for key, item in params.items():
        if isinstance(item, Decimal):
            params[key] = float(item)

    # add result infos
    params['result'] = {
        'results_dir': get_results_dir(result, result.REQUEST),
        'outputs': OUTPUTS
    }
    params['worker']['script'] = {
        'name': '{}.R'.format(toolkit.getId()),
        'script': script
    }
    # set debug flag
    params['worker']['zipworkenv'] = api.env.debug_mode()
    after_commit_task(r_task, params, context)
Exemplo n.º 7
0
def execute(result, toolkit, priority=HIGH_PRIORITY):
    """
    This function takes an experiment and executes.

    It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute
    experiments.

    After the execution finishes the output files will be attached to the
    experiment.

    :param experiment: The experiment holding the configuration and receiving
                       the results
    :type experiment: org.bccvl.site.content.IExperiment


    """
    try:
        OUTPUTS = json.loads(toolkit.output)
    except (ValueError, TypeError) as e:
        LOG.fatal("couldn't load OUTPUT from toolkit %s: %s", toolkit.getId(),
                  e)
        OUTPUTS = {}
    params = get_traits_params(result)
    script = generate_traits_script(toolkit.script)
    # plone context for this job
    member = api.user.get_current()
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname')
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }

    # TODO: quick fix Decimal json encoding through celery (where is my custom
    # json encoder gone?)
    for key, item in params.items():
        if isinstance(item, Decimal):
            params[key] = float(item)

    # add result infos
    params['result'] = {
        'results_dir': get_results_dir(result, result.REQUEST),
        'outputs': OUTPUTS
    }
    params['worker']['script'] = {
        'name': '{}.R'.format(toolkit.getId()),
        'script': script
    }
    # set debug flag
    params['worker']['zipworkenv'] = api.env.debug_mode()
    after_commit_task(r_task, priority, params, context)
Exemplo n.º 8
0
def execute_sdm(result, toolkit, priority=HIGH_PRIORITY):
    """
    This function takes an experiment and executes.

    It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute
    experiments.

    After the execution finishes the output files will be attached to the
    experiment.

    :param experiment: The experiment holding the configuration and receiving
                       the results
    :type experiment: org.bccvl.site.content.IExperiment


    """
    try:
        OUTPUTS = json.loads(toolkit.output)
    except (ValueError, TypeError) as e:
        LOG.fatal("couldn't load OUTPUT from toolkit %s: %s", toolkit.getId(),
                  e)
        OUTPUTS = {}
    params = get_toolkit_params(result)
    script = generate_sdm_script(toolkit.script)
    ###### generate plone context infos
    member = api.user.get_current()
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname')
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }
    ##### complete job infos
    params['result'] = {
        # FIXME: not optimal to access request this way
        #        rather pass in as parameter
        'results_dir': get_results_dir(result, result.REQUEST),
        'outputs': OUTPUTS
    }
    params['worker']['script'] = {
        'name': '{}.R'.format(toolkit.getId()),
        'script': script
    }
    # set debug flag
    params['worker']['zipworkenv'] = api.env.debug_mode()
    ### send job to queue

    # TODO: define job chain here (and in other methods as well)
    after_commit_task(r_task, priority, params, context)
Exemplo n.º 9
0
def execute_sdm(result, toolkit):
    """
    This function takes an experiment and executes.

    It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute
    experiments.

    After the execution finishes the output files will be attached to the
    experiment.

    :param experiment: The experiment holding the configuration and receiving
                       the results
    :type experiment: org.bccvl.site.content.IExperiment


    """
    try:
        OUTPUTS = json.loads(toolkit.output)
    except (ValueError, TypeError) as e:
        LOG.fatal("couldn't load OUTPUT from toolkit %s: %s",
                  toolkit.getId(), e)
        OUTPUTS = {}
    params = get_toolkit_params(result)
    script = generate_sdm_script(toolkit.script)
    ###### generate plone context infos
    member = api.user.get_current()
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname')
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }
    ##### complete job infos
    params['result'] = {
        # FIXME: not optimal to access request this way
        #        rather pass in as parameter
        'results_dir': get_results_dir(result, result.REQUEST),
        'outputs': OUTPUTS
    }
    params['worker']['script'] = {
        'name': '{}.R'.format(toolkit.getId()),
        'script': script
    }
    # set debug flag
    params['worker']['zipworkenv'] = api.env.debug_mode()
    ### send job to queue

    # TODO: define job chain here (and in other methods as well)
    after_commit_task(r_task, params, context)
Exemplo n.º 10
0
def execute(result, toolkit, priority=HIGH_PRIORITY):
    """
    This function takes an experiment and executes.

    It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute
    experiments.

    After the execution finishes the output files will be attached to the
    experiment.

    :param experiment: The experiment holding the configuration and receiving
                       the results
    :type experiment: org.bccvl.site.content.IExperiment


    """
    # FIXME: ensemble is not yet a content based toolkit
    # try:
    #     OUTPUTS = json.loads(toolkit.output)
    # except (ValueError, TypeError) as e:
    #     LOG.fatal("couldn't load OUTPUT form toolkit %s: %s",
    #               toolkit.getId(), e)
    #     OUTPUTS = {}
    params = get_ensemble_params(result)
    script = generate_ensemble_script()
    member = api.user.get_current()
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {'id': member.getUserName(),
                 'email': member.getProperty('email'),
                 'fullname': member.getProperty('fullname')
                 },
        'experiment': {'title': result.__parent__.title,
                       'url': result.__parent__.absolute_url()
                       }
    }
    params['result'] = {
        'results_dir': get_results_dir(result, result.REQUEST),
        'outputs': OUTPUTS
    }
    params['worker']['script'] = {
        'name': 'ensemble.R',
        'script': script,
    }
    # set debug flag
    params['worker']['zipworkenv'] = api.env.debug_mode()
    ### send job to queue
    after_commit_task(r_task, priority, params, context)
Exemplo n.º 11
0
def execute(result, toolkit):
    """
    This function takes an experiment and executes.

    It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute
    experiments.

    After the execution finishes the output files will be attached to the
    experiment.

    :param experiment: The experiment holding the configuration and receiving
                       the results
    :type experiment: org.bccvl.site.content.IExperiment


    """
    # FIXME: ensemble is not yet a content based toolkit
    # try:
    #     OUTPUTS = json.loads(toolkit.output)
    # except (ValueError, TypeError) as e:
    #     LOG.fatal("couldn't load OUTPUT form toolkit %s: %s",
    #               toolkit.getId(), e)
    #     OUTPUTS = {}
    params = get_ensemble_params(result)
    script = generate_ensemble_script()
    member = api.user.get_current()
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname')
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }
    params['result'] = {'results_dir': tempfile.mkdtemp(), 'outputs': OUTPUTS}
    params['worker']['script'] = {
        'name': 'ensemble.R',
        'script': script,
    }
    # set debug flag
    params['worker']['zipworkenv'] = api.env.debug_mode()
    ### send job to queue
    after_commit_task(r_task, params, context)
Exemplo n.º 12
0
def execute(result, func, priority=HIGH_PRIORITY):
    """
    This function takes an experiment and executes.

    It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute
    experiments.

    After the execution finishes the output files will be attached to the
    experiment.

    :param experiment: The experiment holding the configuration and receiving
                       the results
    :type experiment: org.bccvl.site.content.IExperiment


    """
    params = get_project_params(result)
    script = generate_project_script()
    ### plone context for this job
    member = api.user.get_current()
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {'id': member.getUserName(),
                 'email': member.getProperty('email'),
                 'fullname': member.getProperty('fullname')
                 },
        'experiment': {'title': result.__parent__.title,
                       'url': result.__parent__.absolute_url()
                       }
    }
    ### add result infos
    params['result'] = {
        'results_dir': get_results_dir(result, result.REQUEST),
        'outputs': get_output(result.job_params['function'])
    }
    params['worker']['script'] = {
        'name': 'projection.R',
        'script': script
    }
    # set debug flag
    params['worker']['zipworkenv'] = api.env.debug_mode()
    after_commit_task(r_task, priority, params, context)
Exemplo n.º 13
0
def execute(result, toolkit):
    """
    This function takes an experiment and executes.

    It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute
    experiments.

    After the execution finishes the output files will be attached to the
    experiment.

    :param experiment: The experiment holding the configuration and receiving
                       the results
    :type experiment: org.bccvl.site.content.IExperiment


    """
    # FIXME: ensemble is not yet a content based toolkit
    # try:
    #     OUTPUTS = json.loads(toolkit.output)
    # except (ValueError, TypeError) as e:
    #     LOG.fatal("couldn't load OUTPUT form toolkit %s: %s",
    #               toolkit.getId(), e)
    #     OUTPUTS = {}
    params = get_ensemble_params(result)
    script = generate_ensemble_script()
    member = api.user.get_current()
    context = {
        "context": "/".join(result.getPhysicalPath()),
        "user": {
            "id": member.getUserName(),
            "email": member.getProperty("email"),
            "fullname": member.getProperty("fullname"),
        },
        "experiment": {"title": result.__parent__.title, "url": result.__parent__.absolute_url()},
    }
    params["result"] = {"results_dir": "scp://[email protected]" + tempfile.mkdtemp(), "outputs": OUTPUTS}
    params["worker"]["script"] = {"name": "ensemble.R", "script": script}
    # set debug flag
    params["worker"]["zipworkenv"] = api.env.debug_mode()
    ### send job to queue
    after_commit_task(r_task, params, context)
Exemplo n.º 14
0
    def start_job(self):
        if self.is_active():
            return 'error', u'Current Job is still running'
        # The dataset object already exists and should have all required metadata
        md = IBCCVLMetadata(self.context)
        # TODO: this assumes we have an lsid in the metadata
        #       should check for it
        lsid = md['species']['taxonID']

        # we need site-path, context-path and lsid for this job
        #site_path = '/'.join(api.portal.get().getPhysicalPath())
        context_path = '/'.join(self.context.getPhysicalPath())
        member = api.user.get_current()
        # a folder for the datamover to place files in
        tmpdir = tempfile.mkdtemp()

        # ala_import will be submitted after commit, so we won't get a
        # result here
        ala_import_task = ala_import(
            lsid, tmpdir, {
                'context': context_path,
                'user': {
                    'id': member.getUserName(),
                    'email': member.getProperty('email'),
                    'fullname': member.getProperty('fullname')
                }
            })
        # TODO: add title, and url for dataset? (like with experiments?)
        # update provenance
        self._createProvenance(self.context)
        after_commit_task(ala_import_task)

        # FIXME: we don't have a backend task id here as it will be started
        #        after commit, when we shouldn't write anything to the db
        #        maybe add another callback to set task_id?
        self.new_job('TODO: generate id', 'generate taskname: ala_import')
        self.set_progress('PENDING', u'ALA import pending')

        return 'info', u'Job submitted {0} - {1}'.format(
            self.context.title, self.state)
Exemplo n.º 15
0
    def export_result(self, serviceid):
        # self.context should be a result
        if not hasattr(self.context, 'job_params'):
            raise NotFound(self.context, self.context.title, self.request)
        # TODO: validate serviceid

        # start export job
        context_path = '/'.join(self.context.getPhysicalPath())
        member = api.user.get_current()

        zipurl = self.context.absolute_url() + '/resultdownload'

        from org.bccvl.tasks.result_export import result_export
        from org.bccvl.tasks.plone import after_commit_task
        export_task = result_export(
            zipurl,
            serviceid, {'context': context_path,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                            }})
        # queue job submission
        after_commit_task(export_task)

        # self.new_job('TODO: generate id', 'generate taskname: export_result')
        # self.set_progress('PENDING', u'Result export pending')

        status = 'info'
        message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format(self.context.title)

        IStatusMessage(self.request).add(message, type=status)
        nexturl = self.request.get('HTTP-REFERER')
        if not nexturl:
            # this method should only be called on a result folder
            # we should be able to safely redirect back to the pacent experiment
            nexturl = self.context.__parent__.absolute_url()
        self.request.response.redirect(nexturl, 307)
        return (status, message)
Exemplo n.º 16
0
def testsdm(result, toolkit):
    # submit test_job into queue
    member = api.user.get_current()
    params = {
        'result': {
            'results_dir': tempfile.mkdtemp(),
            'outputs': {
                'files': {
                    '*.RData': {
                        'title': 'Test SDM Model',
                        'genre': 'DataGenreSDMModel',
                        'mimetype': 'application/x-r-data'
                    },
                    "proj_*.tif": {
                        "title": "Projection to current",
                        "genre": "DataGenreCP",
                        "mimetype": "image/geotiff"
                    },
                }
            }
        },
    }
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname'),
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }
    # TODO: create chain with import task?
    # ...  check what happens if task fails (e.g. remove 'experiment' in context)
    after_commit_task(testjob, params, context)
Exemplo n.º 17
0
def testsdm(result, toolkit):
    # submit test_job into queue
    member = api.user.get_current()
    params = {
        'result': {
            'results_dir': tempfile.mkdtemp(),
            'outputs': {
                'files': {
                    '*.RData': {
                        'title': 'Test SDM Model',
                        'genre': 'DataGenreSDMModel',
                        'mimetype': 'application/x-r-data'
                    },
                    "proj_*.tif": {
                        "title": "Projection to current",
                        "genre": "DataGenreCP",
                        "mimetype": "image/geotiff"
                    },
                }
            }
        },
    }
    context = {
        'context': '/'.join(result.getPhysicalPath()),
        'user': {
            'id': member.getUserName(),
            'email': member.getProperty('email'),
            'fullname': member.getProperty('fullname'),
        },
        'experiment': {
            'title': result.__parent__.title,
            'url': result.__parent__.absolute_url()
        }
    }
    # TODO: create chain with import task?
    # ...  check what happens if task fails (e.g. remove 'experiment' in context)
    after_commit_task(testjob, params, context)
Exemplo n.º 18
0
    def start_job(self):
        if self.is_active():
            return 'error', u'Current Job is still running'
        # The dataset object already exists and should have all required metadata
        md = IBCCVLMetadata(self.context)
        # TODO: this assumes we have an lsid in the metadata
        #       should check for it
        lsid = md['species']['taxonID']

        # we need site-path, context-path and lsid for this job
        #site_path = '/'.join(api.portal.get().getPhysicalPath())
        context_path = '/'.join(self.context.getPhysicalPath())
        member = api.user.get_current()
        # a folder for the datamover to place files in
        tmpdir = tempfile.mkdtemp()

        # ala_import will be submitted after commit, so we won't get a
        # result here
        ala_import_task = ala_import(
            lsid, tmpdir, {'context': context_path,
                           'user': {
                               'id': member.getUserName(),
                               'email': member.getProperty('email'),
                               'fullname': member.getProperty('fullname')
                           }})
        # TODO: add title, and url for dataset? (like with experiments?)
        # update provenance
        self._createProvenance(self.context)
        after_commit_task(ala_import_task)

        # FIXME: we don't have a backend task id here as it will be started
        #        after commit, when we shouldn't write anything to the db
        #        maybe add another callback to set task_id?
        self.new_job('TODO: generate id', 'generate taskname: ala_import')
        self.set_progress('PENDING', u'ALA import pending')

        return 'info', u'Job submitted {0} - {1}'.format(self.context.title, self.state)
Exemplo n.º 19
0
    def export_result(self, serviceid):
        # self.context should be a result
        if not hasattr(self.context, 'job_params'):
            raise NotFound(self.context, self.context.title, self.request)
        # TODO: validate serviceid

        # start export job
        context_path = '/'.join(self.context.getPhysicalPath())
        member = api.user.get_current()

        # collect list of files to export:
        urllist = []
        for content in self.context.values():
            if content.portal_type not in ('org.bccvl.content.dataset', 'org.bccvl.content.remotedataset'):
                # skip non datasets
                continue
            dlinfo = IDownloadInfo(content)
            urllist.append(dlinfo['url'])
        # add mets.xml
        urllist.append('{}/mets.xml'.format(self.context.absolute_url()))
        # add prov.ttl
        urllist.append('{}/prov.ttl'.format(self.context.absolute_url()))

        from org.bccvl.tasks.celery import app
        from org.bccvl.tasks.plone import after_commit_task
        # FIXME: Do mapping from serviceid to service type? based on interface
        #        background task will need serviceid and type, but it may resolve
        #        servicetype via API with serviceid
        export_task = app.signature(
            "org.bccvl.tasks.export_services.export_result",
            kwargs={
                'siteurl': api.portal.get().absolute_url(),
                'fileurls': urllist,
                'serviceid': serviceid,
                'context': {
                    'context': context_path,
                    'user': {
                        'id': member.getUserName(),
                        'email': member.getProperty('email'),
                        'fullname': member.getProperty('fullname')
                    }
                }
            },
            immutable=True)

        # queue job submission
        after_commit_task(export_task)

        # self.new_job('TODO: generate id', 'generate taskname: export_result')
        # self.set_progress('PENDING', u'Result export pending')

        status = 'info'
        message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format(
            self.context.title)

        IStatusMessage(self.request).add(message, type=status)
        nexturl = self.request.get('HTTP-REFERER')
        if not nexturl:
            # this method should only be called on a result folder
            # we should be able to safely redirect back to the pacent
            # experiment
            nexturl = self.context.__parent__.absolute_url()
        self.request.response.redirect(nexturl, 307)
        return (status, message)
Exemplo n.º 20
0
    def export_result(self, serviceid):
        # self.context should be a result
        if not hasattr(self.context, 'job_params'):
            raise NotFound(self.context, self.context.title, self.request)
        # TODO: validate serviceid

        # start export job
        context_path = '/'.join(self.context.getPhysicalPath())
        member = api.user.get_current()

        # collect list of files to export:
        urllist = []
        for content in self.context.values():
            if content.portal_type not in ('org.bccvl.content.dataset',
                                           'org.bccvl.content.remotedataset'):
                # skip non datasets
                continue
            dlinfo = IDownloadInfo(content)
            urllist.append(dlinfo['url'])
        # add mets.xml
        urllist.append('{}/mets.xml'.format(self.context.absolute_url()))
        # add prov.ttl
        urllist.append('{}/prov.ttl'.format(self.context.absolute_url()))
        # add experiment metadata
        urllist.append('{}/expmetadata.txt'.format(
            self.context.absolute_url()))

        from org.bccvl.tasks.celery import app
        from org.bccvl.tasks.plone import after_commit_task
        # FIXME: Do mapping from serviceid to service type? based on interface
        #        background task will need serviceid and type, but it may resolve
        #        servicetype via API with serviceid
        export_task = app.signature(
            "org.bccvl.tasks.export_services.export_result",
            kwargs={
                'siteurl': api.portal.get().absolute_url(),
                'fileurls': urllist,
                'serviceid': serviceid,
                'context': {
                    'context': context_path,
                    'user': {
                        'id': member.getUserName(),
                        'email': member.getProperty('email'),
                        'fullname': member.getProperty('fullname')
                    }
                }
            },
            immutable=True)

        # queue job submission
        after_commit_task(export_task)

        # self.new_job('TODO: generate id', 'generate taskname: export_result')
        # self.set_progress('PENDING', u'Result export pending')

        status = 'info'
        message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format(
            self.context.title)

        IStatusMessage(self.request).add(message, type=status)
        nexturl = self.request.get('HTTP-REFERER')
        if not nexturl:
            # this method should only be called on a result folder
            # we should be able to safely redirect back to the pacent
            # experiment
            nexturl = self.context.__parent__.absolute_url()
        self.request.response.redirect(nexturl, 307)
        return (status, message)
Exemplo n.º 21
0
    def update_metadata(self):
        uuid = self.request.form.get('uuid', None)
        try:
            if uuid:
                brain = uuidToCatalogBrain(uuid)
                if brain is None:
                    raise Exception("Brain not found")

                obj = brain.getObject()
            else:
                obj = self.context

            # get username
            member = ploneapi.user.get_current()
            if member.getId():
                user = {
                    'id': member.getUserName(),
                    'email': member.getProperty('email'),
                    'fullname': member.getProperty('fullname')
                }
            else:
                raise Exception("Invalid user")

            # build download url
            # 1. get context (site) relative path
            obj_url = obj.absolute_url()

            if obj.portal_type == 'org.bccvl.content.dataset':
                filename = obj.file.filename
                obj_url = '{}/@@download/file/{}'.format(obj_url, filename)
            elif obj.portal_type == 'org.bccvl.content.remotedataset':
                filename = os.path.basename(obj.remoteUrl)
                obj_url = '{}/@@download/{}'.format(obj_url, filename)
            elif obj.portal_type == 'org.bccvl.content.multispeciesdataset':
                filename = obj.file.filename
                obj_url = '{}/@@download/file/{}'.format(obj_url, filename)
            else:
                raise Exception("Wrong content type")

            from org.bccvl.tasks.celery import app
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': obj_url,
                    'filename': filename,
                    'contenttype': obj.format,
                    'context': {
                        'context': '/'.join(obj.getPhysicalPath()),
                        'user': user,
                    }
                },
                immutable=True)

            from org.bccvl.tasks.plone import after_commit_task
            after_commit_task(update_task)
            # track background job state
            jt = IJobTracker(obj)
            job = jt.new_job('TODO: generate id',
                             'generate taskname: update_metadata',
                             function=obj.dataSource,
                             type=obj.portal_type)
            jt.set_progress('PENDING', 'Metadata update pending')
            return job.id
        except Exception as e:
            LOG.error('Caught exception %s', e)
        raise NotFound(self, 'update_metadata', self.request)
Exemplo n.º 22
0
    def demosdm(self):
        lsid = self.request.form.get('lsid')
        # Run SDM on a species given by lsid (from ALA), followed by a Climate
        # Change projection.
        if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST':
            raise BadRequest('Request must be POST')
        # Swift params
        swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings)

        # get parameters
        if not lsid:
            raise BadRequest('Required parameter lsid missing')
        # we have an lsid,.... we can't really verify but at least some
        #                      data is here
        # find rest of parameters
        # FIXME: hardcoded path to environmental datasets

        # Get the future climate for climate change projection
        portal = ploneapi.portal.get()
        dspath = '/'.join([defaults.DATASETS_FOLDER_ID,
                           defaults.DATASETS_CLIMATE_FOLDER_ID,
                           'australia', 'australia_1km',
                           'RCP85_ukmo-hadgem1_2085.zip'])
        ds = portal.restrictedTraverse(dspath)
        dsuuid = IUUID(ds)
        dlinfo = IDownloadInfo(ds)
        dsmd = IBCCVLMetadata(ds)
        futureclimatelist = []
        for layer in ('B05', 'B06', 'B13', 'B14'):
            futureclimatelist.append({
                'uuid': dsuuid,
                'filename': dlinfo['filename'],
                'downloadurl': dlinfo['url'],
                'layer': layer,
                'type': dsmd['layers'][layer]['datatype'],
                'zippath': dsmd['layers'][layer]['filename']
            })
        # Climate change projection name
        cc_projection_name = os.path.splitext(dlinfo['filename'])[0]

        # Get the current climate for SDM
        dspath = '/'.join([defaults.DATASETS_FOLDER_ID,
                           defaults.DATASETS_CLIMATE_FOLDER_ID,
                           'australia', 'australia_1km',
                           'current.76to05.zip'])
        ds = portal.restrictedTraverse(dspath)
        dsuuid = IUUID(ds)
        dlinfo = IDownloadInfo(ds)
        dsmd = IBCCVLMetadata(ds)
        envlist = []
        for layer in ('B05', 'B06', 'B13', 'B14'):
            envlist.append({
                'uuid': dsuuid,
                'filename': dlinfo['filename'],
                'downloadurl': dlinfo['url'],
                'layer': layer,
                'type': dsmd['layers'][layer]['datatype'],
                'zippath': dsmd['layers'][layer]['filename']
            })

        # FIXME: we don't use a IJobTracker here for now
        # get toolkit and
        func = portal[defaults.TOOLKITS_FOLDER_ID]['demosdm']
        # build job_params:
        job_params = {
            'resolution': IBCCVLMetadata(ds)['resolution'],
            'function': func.getId(),
            'species_occurrence_dataset': {
                'uuid': 'ala_occurrence_dataset',
                'species': u'demoSDM',
                'downloadurl': 'ala://ala?lsid={}'.format(lsid),
            },
            'environmental_datasets': envlist,
            'future_climate_datasets': futureclimatelist,
            'cc_projection_name': cc_projection_name
        }
        # add toolkit parameters: (all default values)
        # get toolkit schema
        schema = loadString(func.schema).schema
        for name, field in getFields(schema).items():
            if field.default is not None:
                job_params[name] = field.default
        # add other default parameters
        job_params.update({
            'rescale_all_models': False,
            'selected_models': 'all',
            'modeling_id': 'bccvl',
        })
        # generate script to run
        script = u'\n'.join([
            resource_string('org.bccvl.compute', 'rscripts/bccvl.R'),
            resource_string('org.bccvl.compute', 'rscripts/eval.R'),
            func.script])
        # where to store results.
        result = {
            'results_dir': 'swift+{}/wordpress/{}/'.format(swiftsettings.storage_url, urllib.quote_plus(lsid)),
            'outputs': json.loads(func.output)
        }
        # worker hints:
        worker = {
            'script': {
                'name': '{}.R'.format(func.getId()),
                'script': script
            },
            'files': (
                'species_occurrence_dataset',
                'environmental_datasets',
                'future_climate_datasets'
            )
        }
        # put everything together
        jobdesc = {
            'env': {},
            'params': job_params,
            'worker': worker,
            'result': result,
        }

        # create job
        jobtool = getUtility(IJobUtility)
        job = jobtool.new_job(
            lsid=lsid,
            toolkit=IUUID(func),
            function=func.getId(),
            type='demosdm'
        )
        # create job context object
        member = ploneapi.user.get_current()
        context = {
            # we use the site object as context
            'context': '/'.join(portal.getPhysicalPath()),
            'jobid': job.id,
            'user': {
                'id': member.getUserName(),
                'email': member.getProperty('email'),
                'fullname': member.getProperty('fullname')
            },
        }

        # all set to go build task chain now
        from org.bccvl.tasks.compute import demo_task
        from org.bccvl.tasks.plone import after_commit_task, HIGH_PRIORITY
        after_commit_task(demo_task, HIGH_PRIORITY, jobdesc, context)
        # let's hope everything works, return result

        # We don't create an experiment object, so we don't count stats here
        # let's do it manually
        getUtility(IStatsUtility).count_experiment(
            user=member.getId(),
            portal_type='demosdm',
        )

        return {
            'state': os.path.join(result['results_dir'], 'state.json'),
            'result': os.path.join(result['results_dir'], 'proj_metadata.json'),
            'jobid': job.id
        }
Exemplo n.º 23
0
    def add(self, object):
        # FIXME: this is a workaround, which is fine for small uploaded files.
        #        large uploads should go through another process anyway
        # TODO: re implementing this method is the only way to know
        #       the full path of the object. We need the path to apply
        #       the transmogrifier chain.
        # fti = getUtility(IDexterityFTI, name=self.portal_type)
        container = aq_inner(self.context)
        try:
            # traverse to subfolder if possible
            container = container.restrictedTraverse('/'.join(self.subpath))
        except Exception as e:
            LOG.warn('Could not traverse to %s/%s',
                     '/'.join(container.getPhysicalPath()), '/'.join(self.subpath))
        new_object = addContentToContainer(container, object)
        # set data genre:
        if self.datagenre:
            IBCCVLMetadata(new_object)['genre'] = self.datagenre
        if self.categories:
            IBCCVLMetadata(new_object)['categories'] = self.categories
            # rdf commit should happens in transmogrifier step later on
        # if fti.immediate_view:
        #     self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,)
        # else:
        #     self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id)
        # start background import process (just a metadata update)

        # run transmogrify md extraction here
        context_path = '/'.join(new_object.getPhysicalPath())
        member = api.user.get_current()
        # species extract task
        if IMultiSpeciesDataset.providedBy(new_object):
            # kick off csv split import tasks
            import_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.import_multi_species_csv",
                kwargs={
                    'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename),
                    'results_dir': get_results_dir(container, self.request),
                    'import_context': {
                        'context': '/'.join(container.getPhysicalPath()),
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    },
                    'context': {
                        'context': context_path,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            after_commit_task(import_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            job = jt.new_job('TODO: generate id',
                             'generate taskname: import_multi_species_csv')
            job.type = new_object.portal_type
            jt.set_progress('PENDING', u'Multi species import pending')
        else:
            if hasattr(self, '_upload'):
                file = self._upload['file']
                new_object.format = file.contentType
                uid = IUUID(new_object)
                swiftsettings = getUtility(
                    IRegistry).forInterface(ISwiftSettings)
                import os.path
                swift_url = '{storage_url}/{container}/{path}/{name}'.format(
                    storage_url=swiftsettings.storage_url,
                    container=swiftsettings.result_container,
                    path=uid,
                    name=os.path.basename(file.filename))
                new_object.remoteUrl = swift_url
            else:
                file = new_object.file
                new_object.format = file.contentType

            dlinfo = IDownloadInfo(new_object)

            # single species upload
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': dlinfo['url'],
                    'filename': dlinfo['filename'],
                    'contenttype': dlinfo['contenttype'],
                    'context': {
                        'context': context_path,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            # create upload task in case we upload to external store
            if hasattr(self, '_upload'):
                # There is an upload ... we have to make sure the uploaded data ends up in external storage
                # 3. put temp file aside
                tmpdir = tempfile.mkdtemp(prefix='bccvl_upload')
                tmpfile = os.path.join(tmpdir, os.path.basename(file.filename))
                blobf = file.open()
                try:
                    # try rename
                    os.rename(blobf.name, tmpfile)
                except OSError:
                    # try copy
                    shutil.copy(blobf.name, tmpfile)
                # 4. update task chain
                src_url = 'scp://{uid}@{ip}:{port}{file}'.format(
                    uid=pwd.getpwuid(os.getuid()).pw_name,
                    ip=get_hostname(self.request),
                    port=os.environ.get('SSH_PORT', 22),
                    file=tmpfile)
                dest_url = 'swift+{}'.format(new_object.remoteUrl)
                move_task = app.signature(
                    'org.bccvl.tasks.datamover.tasks.move',
                    kwargs={
                        'move_args': [(src_url, dest_url)],
                        'context': {
                            'context': context_path,
                            'user': {
                                'id': member.getUserName(),
                                'email': member.getProperty('email'),
                                'fullname': member.getProperty('fullname')
                            }
                        }
                    },
                    immutable=True)
                cleanup_task = app.signature(
                    'org.bccvl.tasks.plone.import_cleanup',
                    kwargs={
                        'path': os.path.dirname(tmpfile),
                        'context': {
                            'context': context_path,
                            'user': {
                                'id': member.getUserName(),
                                'email': member.getProperty('email'),
                                'fullname': member.getProperty('fullname')
                            }
                        }
                    },
                    immutable=True)

                update_task = move_task | update_task | cleanup_task

                # need some more workflow states here to support e.g. zip file upload (multiple rasters),
                #      give user a chance to better define metadata
                # make sure update_metadata does not change user edited metadata
                #      -> layer, unit, projection, whatever

                # FIXME: clean up tmp upload directory as well

                # queue job submission
            after_commit_task(update_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            job = jt.new_job('TODO: generate id',
                             'generate taskname: update_metadata')
            job.type = new_object.portal_type
            jt.set_progress('PENDING', u'Metadata update pending')

        # We have to reindex after updating the object
        new_object.reindexObject()
Exemplo n.º 24
0
    def update_metadata(self):
        uuid = self.request.form.get("uuid", None)
        try:
            if uuid:
                brain = uuidToCatalogBrain(uuid)
                if brain is None:
                    raise Exception("Brain not found")

                obj = brain.getObject()
            else:
                obj = self.context

            # get username
            member = ploneapi.user.get_current()
            if member.getId():
                user = {
                    "id": member.getUserName(),
                    "email": member.getProperty("email"),
                    "fullname": member.getProperty("fullname"),
                }
            else:
                raise Exception("Invalid user")

            # build download url
            # 1. get context (site) relative path
            obj_url = obj.absolute_url()

            if obj.portal_type == "org.bccvl.content.dataset":
                filename = obj.file.filename
                obj_url = "{}/@@download/file/{}".format(obj_url, filename)
            elif obj.portal_type == "org.bccvl.content.remotedataset":
                filename = os.path.basename(obj.remoteUrl)
                obj_url = "{}/@@download/{}".format(obj_url, filename)
            elif obj.portal_type == "org.bccvl.content.multispeciesdataset":
                filename = obj.file.filename
                obj_url = "{}/@@download/file/{}".format(obj_url, filename)
            else:
                raise Exception("Wrong content type")

            from org.bccvl.tasks.celery import app

            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    "url": obj_url,
                    "filename": filename,
                    "contenttype": obj.format,
                    "context": {"context": "/".join(obj.getPhysicalPath()), "user": user},
                },
                immutable=True,
            )

            from org.bccvl.tasks.plone import after_commit_task

            after_commit_task(update_task)
            # track background job state
            jt = IJobTracker(obj)
            job = jt.new_job("TODO: generate id", "generate taskname: update_metadata")
            job.type = obj.portal_type
            jt.set_progress("PENDING", "Metadata update pending")
            return job.id
        except Exception as e:
            LOG.error("Caught exception %s", e)
        raise NotFound(self, "update_metadata", self.request)
Exemplo n.º 25
0
    def add(self, object):
        # FIXME: this is a workaround, which is fine for small uploaded files.
        #        large uploads should go through another process anyway
        # TODO: re implementing this method is the only way to know
        #       the full path of the object. We need the path to apply
        #       the transmogrifier chain.
        # fti = getUtility(IDexterityFTI, name=self.portal_type)
        container = aq_inner(self.context)
        try:
            # traverse to subfolder if possible
            container = container.restrictedTraverse('/'.join(self.subpath))
        except Exception as e:
            LOG.warn('Could not traverse to %s/%s',
                     '/'.join(container.getPhysicalPath()), '/'.join(self.subpath))
        new_object = addContentToContainer(container, object)
        # set data genre:
        if self.datagenre:
            IBCCVLMetadata(new_object)['genre'] = self.datagenre
        if self.categories:
            IBCCVLMetadata(new_object)['categories'] = self.categories
        
        new_object.subject = []
        if self.domain:
            new_object.subject = [self.domain]
        if self.timeperiod:
            new_object.subject += self.timeperiod

            # rdf commit should happens in transmogrifier step later on
        # if fti.immediate_view:
        #     self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,)
        # else:
        #     self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id)
        # start background import process (just a metadata update)

        # run transmogrify md extraction here
        context_path = '/'.join(new_object.getPhysicalPath())
        member = api.user.get_current()
        # species extract task
        if IMultiSpeciesDataset.providedBy(new_object):
            # kick off csv split import tasks
            import_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.import_multi_species_csv",
                kwargs={
                    'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename),
                    'results_dir': get_results_dir(new_object, self.request, childSpecies=True),
                    'import_context': {
                        'context': '/'.join(container.getPhysicalPath()),
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    },
                    'context': {
                        'context': context_path,
                        'genre': self.datagenre,
                        'dataSource': new_object.dataSource,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            after_commit_task(import_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            jt.new_job('TODO: generate id',
                       'generate taskname: import_multi_species_csv',
                       function=new_object.dataSource,
                       type=new_object.portal_type)
            jt.set_progress('PENDING', u'Multi species import pending')
        else:
            if hasattr(self, '_upload'):
                file = self._upload['file']
                new_object.format = file.contentType
                uid = IUUID(new_object)
                swiftsettings = getUtility(
                    IRegistry).forInterface(ISwiftSettings)
                import os.path
                swift_url = '{storage_url}/{container}/{path}/{name}'.format(
                    storage_url=swiftsettings.storage_url,
                    container=swiftsettings.result_container,
                    path=uid,
                    name=os.path.basename(file.filename))
                new_object.remoteUrl = swift_url
            else:
                file = new_object.file
                new_object.format = file.contentType

            dlinfo = IDownloadInfo(new_object)

            # single species upload
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': dlinfo['url'],
                    'filename': dlinfo['filename'],
                    'contenttype': dlinfo['contenttype'],
                    'context': {
                        'context': context_path,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            # create upload task in case we upload to external store
            if hasattr(self, '_upload'):
                # FIXME: we can't use ssh here.... we don't know which container we are in... and
                #        sshing here is bad as well....
                # There is an upload ... we have to make sure the uploaded data ends up in external storage
                # 3. put temp file aside
                tmpdir = tempfile.mkdtemp(prefix='bccvl_upload')
                tmpfile = os.path.join(tmpdir, os.path.basename(file.filename))
                blobf = file.open()
                try:
                    # try rename
                    os.rename(blobf.name, tmpfile)
                except OSError:
                    # try copy
                    shutil.copy(blobf.name, tmpfile)

                # TODO: we push the uploaded file directly to swift here..
                #       this really should be a background process
                #       best solution: ...
                #           user uploads to some temporary upload service (file never ends up here)
                #           we have a remote url here, and tell the datamover to pull it from there
                #           and move it to final destination. (or something like this)
                #       other good way: ...
                #           let user upload directly to swift (what about large file uploads?)
                #           and take care of clean up if necessary

                # 4. move file to swift
                # TODO: do we have enough information to upload to swift?
                #       need a temp url?
                swiftopts = app.conf.get('bccvl', {}).get('swift', {})
                src_url = build_source('file://{}'.format(tmpfile))
                dest_url = build_destination('swift+{}'.format(new_object.remoteUrl),
                    settings={'swift': {
                        'os_auth_url': swiftopts.get('os_auth_url'),
                        'os_username': swiftopts.get('os_username'),
                        'os_password': swiftopts.get('os_password'),
                        'os_tenant_name': swiftopts.get('os_tenant_name'),
                        'os_storage_url': swiftopts.get('os_storage_url')
                    }}
                )

                try:
                    movelib.move(src_url, dest_url)
                except Exception as e:
                    # do error handling here
                    raise
                finally:
                    # clean up temp location
                    path = os.path.dirname(tmpfile)
                    shutil.rmtree(path)

            # queue job submission
            after_commit_task(update_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            jt.new_job('TODO: generate id',
                       'generate taskname: update_metadata',
                       function=new_object.dataSource,
                       type=new_object.portal_type)
            jt.set_progress('PENDING', u'Metadata update pending')

        # We have to reindex after updating the object
        new_object.reindexObject()
Exemplo n.º 26
0
    def update_metadata(self):
        uuid = self.request.form.get('uuid', None)
        try:
            if uuid:
                brain = uuidToCatalogBrain(uuid)
                if brain is None:
                    raise Exception("Brain not found")

                obj = brain.getObject()
            else:
                obj = self.context

            # get username
            member = ploneapi.user.get_current()
            if member.getId():
                user = {
                    'id': member.getUserName(),
                    'email': member.getProperty('email'),
                    'fullname': member.getProperty('fullname')
                }
            else:
                raise Exception("Invalid user")

            # build download url
            # 1. get context (site) relative path
            obj_url = obj.absolute_url()

            if obj.portal_type == 'org.bccvl.content.dataset':
                filename = obj.file.filename
                obj_url = '{}/@@download/file/{}'.format(obj_url, filename)
            elif obj.portal_type == 'org.bccvl.content.remotedataset':
                filename = os.path.basename(obj.remoteUrl)
                obj_url = '{}/@@download/{}'.format(obj_url, filename)
            elif obj.portal_type == 'org.bccvl.content.multispeciesdataset':
                filename = obj.file.filename
                obj_url = '{}/@@download/file/{}'.format(obj_url, filename)
            else:
                raise Exception("Wrong content type")

            from org.bccvl.tasks.celery import app
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': obj_url,
                    'filename': filename,
                    'contenttype': obj.format,
                    'context': {
                        'context': '/'.join(obj.getPhysicalPath()),
                        'user': user,
                    }
                },
                immutable=True)

            from org.bccvl.tasks.plone import after_commit_task
            after_commit_task(update_task)
            # track background job state
            jt = IJobTracker(obj)
            job = jt.new_job('TODO: generate id',
                             'generate taskname: update_metadata',
                             function=obj.dataSource,
                             type=obj.portal_type)
            jt.set_progress('PENDING', 'Metadata update pending')
            return job.id
        except Exception as e:
            LOG.error('Caught exception %s', e)
        raise NotFound(self, 'update_metadata', self.request)
Exemplo n.º 27
0
    def add(self, object):
        # FIXME: this is a workaround, which is fine for small uploaded files.
        #        large uploads should go through another process anyway
        # TODO: re implementing this method is the only way to know
        #       the full path of the object. We need the path to apply
        #       the transmogrifier chain.
        # fti = getUtility(IDexterityFTI, name=self.portal_type)
        container = aq_inner(self.context)
        try:
            # traverse to subfolder if possible
            container = container.restrictedTraverse('/'.join(self.subpath))
        except Exception as e:
            LOG.warn('Could not traverse to %s/%s',
                     '/'.join(container.getPhysicalPath()),
                     '/'.join(self.subpath))
        new_object = addContentToContainer(container, object)
        # set data genre:
        if self.datagenre:
            IBCCVLMetadata(new_object)['genre'] = self.datagenre
        if self.categories:
            IBCCVLMetadata(new_object)['categories'] = self.categories

        new_object.subject = []
        if self.domain:
            new_object.subject = [self.domain]
        if self.timeperiod:
            new_object.subject += self.timeperiod

            # rdf commit should happens in transmogrifier step later on
        # if fti.immediate_view:
        #     self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,)
        # else:
        #     self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id)
        # start background import process (just a metadata update)

        # run transmogrify md extraction here
        context_path = '/'.join(new_object.getPhysicalPath())
        member = api.user.get_current()
        # species extract task
        if IMultiSpeciesDataset.providedBy(new_object):
            # kick off csv split import tasks
            import_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.import_multi_species_csv",
                kwargs={
                    'url':
                    '{}/@@download/file/{}'.format(new_object.absolute_url(),
                                                   new_object.file.filename),
                    'results_dir':
                    get_results_dir(new_object,
                                    self.request,
                                    childSpecies=True),
                    'import_context': {
                        'context': '/'.join(container.getPhysicalPath()),
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    },
                    'context': {
                        'context': context_path,
                        'genre': self.datagenre,
                        'dataSource': new_object.dataSource,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            after_commit_task(import_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            jt.new_job('TODO: generate id',
                       'generate taskname: import_multi_species_csv',
                       function=new_object.dataSource,
                       type=new_object.portal_type)
            jt.set_progress('PENDING', u'Multi species import pending')
        else:
            if hasattr(self, '_upload'):
                file = self._upload['file']
                new_object.format = file.contentType
                uid = IUUID(new_object)
                swiftsettings = getUtility(IRegistry).forInterface(
                    ISwiftSettings)
                import os.path
                swift_url = '{storage_url}/{container}/{path}/{name}'.format(
                    storage_url=swiftsettings.storage_url,
                    container=swiftsettings.result_container,
                    path=uid,
                    name=os.path.basename(file.filename))
                new_object.remoteUrl = swift_url
            else:
                file = new_object.file
                new_object.format = file.contentType

            dlinfo = IDownloadInfo(new_object)

            # single species upload
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': dlinfo['url'],
                    'filename': dlinfo['filename'],
                    'contenttype': dlinfo['contenttype'],
                    'context': {
                        'context': context_path,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            # create upload task in case we upload to external store
            if hasattr(self, '_upload'):
                # FIXME: we can't use ssh here.... we don't know which container we are in... and
                #        sshing here is bad as well....
                # There is an upload ... we have to make sure the uploaded data ends up in external storage
                # 3. put temp file aside
                tmpdir = tempfile.mkdtemp(prefix='bccvl_upload')
                tmpfile = os.path.join(tmpdir, os.path.basename(file.filename))
                blobf = file.open()
                try:
                    # try rename
                    os.rename(blobf.name, tmpfile)
                except OSError:
                    # try copy
                    shutil.copy(blobf.name, tmpfile)

                # TODO: we push the uploaded file directly to swift here..
                #       this really should be a background process
                #       best solution: ...
                #           user uploads to some temporary upload service (file never ends up here)
                #           we have a remote url here, and tell the datamover to pull it from there
                #           and move it to final destination. (or something like this)
                #       other good way: ...
                #           let user upload directly to swift (what about large file uploads?)
                #           and take care of clean up if necessary

                # 4. move file to swift
                # TODO: do we have enough information to upload to swift?
                #       need a temp url?
                swiftopts = app.conf.get('bccvl', {}).get('swift', {})
                src_url = build_source('file://{}'.format(tmpfile))
                dest_url = build_destination(
                    'swift+{}'.format(new_object.remoteUrl),
                    settings={
                        'swift': {
                            'os_auth_url':
                            swiftopts.get('os_auth_url'),
                            'os_username':
                            swiftopts.get('os_username'),
                            'os_password':
                            swiftopts.get('os_password'),
                            'os_project_name':
                            swiftopts.get('os_project_name'),
                            'os_storage_url':
                            swiftopts.get('os_storage_url'),
                            'os_user_domain_name':
                            swiftopts.get('os_user_domain_name'),
                            'os_project_domain_name':
                            swiftopts.get('os_project_domain_name'),
                            'auth_version':
                            swiftopts.get('auth_version')
                        }
                    })

                try:
                    movelib.move(src_url, dest_url)
                except Exception as e:
                    # do error handling here
                    raise
                finally:
                    # clean up temp location
                    path = os.path.dirname(tmpfile)
                    shutil.rmtree(path)

            # queue job submission
            after_commit_task(update_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            jt.new_job('TODO: generate id',
                       'generate taskname: update_metadata',
                       function=new_object.dataSource,
                       type=new_object.portal_type)
            jt.set_progress('PENDING', u'Metadata update pending')

        # We have to reindex after updating the object
        new_object.reindexObject()
Exemplo n.º 28
0
    def demosdm(self):
        lsid = self.request.form.get('lsid')
        # Run SDM on a species given by lsid (from ALA), followed by a Climate
        # Change projection.
        if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST':
            raise BadRequest('Request must be POST')
        # Swift params
        swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings)

        # get parameters
        if not lsid:
            raise BadRequest('Required parameter lsid missing')
        # we have an lsid,.... we can't really verify but at least some
        #                      data is here
        # find rest of parameters
        # FIXME: hardcoded path to environmental datasets

        # Get the future climate for climate change projection
        portal = ploneapi.portal.get()
        dspath = '/'.join([defaults.DATASETS_FOLDER_ID,
                           defaults.DATASETS_CLIMATE_FOLDER_ID,
                           'australia', 'australia_1km',
                           'RCP85_ukmo-hadgem1_2085.zip'])
        ds = portal.restrictedTraverse(dspath)
        dsuuid = IUUID(ds)
        dlinfo = IDownloadInfo(ds)
        dsmd = IBCCVLMetadata(ds)
        futureclimatelist = []
        for layer in ('B05', 'B06', 'B13', 'B14'):
            futureclimatelist.append({
                'uuid': dsuuid,
                'filename': dlinfo['filename'],
                'downloadurl': dlinfo['url'],
                'layer': layer,
                'type': dsmd['layers'][layer]['datatype'],
                'zippath': dsmd['layers'][layer]['filename']
            })
        # Climate change projection name
        cc_projection_name = os.path.splitext(dlinfo['filename'])[0]

        # Get the current climate for SDM
        dspath = '/'.join([defaults.DATASETS_FOLDER_ID,
                           defaults.DATASETS_CLIMATE_FOLDER_ID,
                           'australia', 'australia_1km',
                           'current.76to05.zip'])
        ds = portal.restrictedTraverse(dspath)
        dsuuid = IUUID(ds)
        dlinfo = IDownloadInfo(ds)
        dsmd = IBCCVLMetadata(ds)
        envlist = []
        for layer in ('B05', 'B06', 'B13', 'B14'):
            envlist.append({
                'uuid': dsuuid,
                'filename': dlinfo['filename'],
                'downloadurl': dlinfo['url'],
                'layer': layer,
                'type': dsmd['layers'][layer]['datatype'],
                'zippath': dsmd['layers'][layer]['filename']
            })

        # FIXME: we don't use a IJobTracker here for now
        # get toolkit and
        func = portal[defaults.TOOLKITS_FOLDER_ID]['demosdm']
        # build job_params:
        job_params = {
            'resolution': IBCCVLMetadata(ds)['resolution'],
            'function': func.getId(),
            'species_occurrence_dataset': {
                'uuid': 'ala_occurrence_dataset',
                'species': u'demoSDM',
                'downloadurl': 'ala://ala?lsid={}'.format(lsid),
            },
            'environmental_datasets': envlist,
            'future_climate_datasets': futureclimatelist,
            'cc_projection_name': cc_projection_name
        }
        # add toolkit parameters: (all default values)
        # get toolkit schema
        schema = loadString(func.schema).schema
        for name, field in getFields(schema).items():
            if field.default is not None:
                job_params[name] = field.default
        # add other default parameters
        job_params.update({
            'rescale_all_models': False,
            'selected_models': 'all',
            'modeling_id': 'bccvl',
        })
        # generate script to run
        script = u'\n'.join([
            resource_string('org.bccvl.compute', 'rscripts/bccvl.R'),
            resource_string('org.bccvl.compute', 'rscripts/eval.R'),
            func.script])
        # where to store results.
        result = {
            'results_dir': 'swift+{}/wordpress/{}/'.format(swiftsettings.storage_url, urllib.quote_plus(lsid)),
            'outputs': json.loads(func.output)
        }
        # worker hints:
        worker = {
            'script': {
                'name': '{}.R'.format(func.getId()),
                'script': script
            },
            'files': (
                'species_occurrence_dataset',
                'environmental_datasets',
                'future_climate_datasets'
            )
        }
        # put everything together
        jobdesc = {
            'env': {},
            'params': job_params,
            'worker': worker,
            'result': result,
        }

        # create job
        jobtool = getUtility(IJobUtility)
        job = jobtool.new_job(
            lsid=lsid,
            toolkit=IUUID(func),
            function=func.getId(),
            type='demosdm'
        )
        # create job context object
        member = ploneapi.user.get_current()
        context = {
            # we use the site object as context
            'context': '/'.join(portal.getPhysicalPath()),
            'jobid': job.id,
            'user': {
                'id': member.getUserName(),
                'email': member.getProperty('email'),
                'fullname': member.getProperty('fullname')
            },
        }

        # all set to go build task chain now
        from org.bccvl.tasks.compute import demo_task
        from org.bccvl.tasks.plone import after_commit_task, HIGH_PRIORITY
        after_commit_task(demo_task, HIGH_PRIORITY, jobdesc, context)
        # let's hope everything works, return result

        # We don't create an experiment object, so we don't count stats here
        # let's do it manually
        getUtility(IStatsUtility).count_experiment(
            user=member.getId(),
            portal_type='demosdm',
        )

        return {
            'state': os.path.join(result['results_dir'], 'state.json'),
            'result': os.path.join(result['results_dir'], 'proj_metadata.json'),
            'jobid': job.id
        }