Exemplo n.º 1
0
    def __createExpmetadata(self, job_params):
        # To do: add other R package versions dynamically
        # Get experiment title
        self.md['Model specifications'] = {
            'Title': self.context.title,
            'Date/time run': self.context.creation_date.__str__(),
            'Description': self.context.description or ''
        }

        # iterate over all input datasets and add them as entities
        self.md['Input datasets:'] = {}
        for key in ('species_occurrence_dataset', 'species_absence_dataset',
                    'traits_dataset'):
            spmd = {}
            if not job_params.has_key(key):
                continue
            dsbrain = uuidToCatalogBrain(job_params[key])
            if not dsbrain:
                continue
            ds = dsbrain.getObject()
            mdata = IBCCVLMetadata(ds)
            if mdata and mdata.get('rows', None):
                spmd = {'Title': "{} ({})".format(ds.title, mdata.get('rows'))}
            else:
                spmd = {'Title': ds.title}
            info = IDownloadInfo(ds)
            spmd['Download URL'] = info['url']

            coll = ds
            while not (ISiteRoot.providedBy(coll)
                       or ICollection.providedBy(coll)):
                coll = coll.__parent__
            spmd['Description'] = ds.description or coll.description or ''
            attribution = ds.attribution or getattr(coll, 'attribution') or ''
            if isinstance(attribution, list):
                attribution = '\n'.join([att.raw for att in attribution])
            spmd['Attribution'] = attribution
            self.md['Input datasets:'][key] = spmd

        key = 'traits_dataset_params'
        if key in job_params:
            self.md['Input datasets:'][key] = job_params.get(key, {})

        # pseudo-absence metadata.
        key = u"pseudo_absence_dataset"
        pa_file = self.context.get('pseudo_absences.csv')
        pa_url = ""
        pa_title = ""
        if pa_file:
            pa_title = pa_file.title
            pa_url = pa_file.absolute_url()
            pa_url = '{}/@@download/{}'.format(pa_url,
                                               os.path.basename(pa_url))

            pamd = {
                'Title': pa_title,
                'Download URL': pa_url,
                'Pseudo-absence Strategy': job_params.get('pa_strategy', ''),
                'Pseudo-absence Ratio': str(job_params.get('pa_ratio', ''))
            }
            if job_params.get('pa_strategy', '') == 'disc':
                pamd['Minimum distance'] = str(
                    job_params.get('pa_disk_min', ''))
                pamd['Maximum distance'] = str(
                    job_params.get('pa_disk_max', ''))
            if job_params.get('pa_strategy', '') == 'sre':
                pamd['Quantile'] = str(job_params.get('pa_sre_quant', ''))
            self.md['Input datasets:'][key] = pamd

        for key in ['environmental_datasets', 'future_climate_datasets']:
            if key not in job_params:
                continue
            env_list = []
            layer_vocab = getUtility(IVocabularyFactory,
                                     'layer_source')(self.context)
            for uuid, layers in job_params[key].items():
                ds = uuidToObject(uuid)
                coll = ds
                while not (ISiteRoot.providedBy(coll)
                           or ICollection.providedBy(coll)):
                    coll = coll.__parent__
                description = ds.description or coll.description
                attribution = ds.attribution or getattr(coll,
                                                        'attribution') or ''
                if isinstance(attribution, list):
                    attribution = '\n'.join([att.raw for att in attribution])

                layer_titles = [
                    layer_vocab.getLayerTitle(layer) for layer in layers
                ]
                env_list.append({
                    'Title': ds.title,
                    'Layers': u'\n'.join(layer_titles),
                    'Description': description,
                    'Attribution': attribution
                })
            self.md['Input datasets:'][key] = env_list

        key = "datasets"
        if key in job_params:
            dataset_list = []
            for uid in job_params[key]:
                dsbrain = uuidToCatalogBrain(uid)
                if dsbrain:
                    ds = dsbrain.getObject()
                    # get the source experiment
                    source_exp = ds.__parent__
                    while not IExperiment.providedBy(source_exp):
                        source_exp = source_exp.__parent__
                    dataset_list.append({
                        'Source experiment':
                        source_exp.title,
                        'Title':
                        ds.title,
                        'Description':
                        ds.description,
                        'Download URL':
                        '{}/@@download/file/{}'.format(
                            ds.absolute_url(),
                            os.path.basename(ds.absolute_url())),
                        'Algorithm':
                        ds.__parent__.job_params.get('function', ''),
                        'Species':
                        IBCCVLMetadata(ds).get('species',
                                               {}).get('scientificName', ''),
                        'Resolution':
                        IBCCVLMetadata(ds).get('resolution', '')
                    })
            self.md['Input datasets:'][key] = dataset_list

        key = 'species_distribution_models'
        if key in job_params:
            dsbrain = uuidToCatalogBrain(job_params[key])
            if dsbrain:
                ds = dsbrain.getObject()
                # get the source experiment
                source_exp = ds.__parent__
                while not IExperiment.providedBy(source_exp):
                    source_exp = source_exp.__parent__

                # get the threshold
                threshold = self.context.species_distribution_models.get(
                    source_exp.UID(), {}).get(ds.UID())
                self.md['Input datasets:'][key] = {
                    'Source experiment':
                    source_exp.title,
                    'Title':
                    ds.title,
                    'Description':
                    ds.description,
                    'Download URL':
                    '{}/@@download/file/{}'.format(
                        ds.absolute_url(),
                        os.path.basename(ds.absolute_url())),
                    'Algorithm':
                    ds.__parent__.job_params.get('function', ''),
                    'Species':
                    IBCCVLMetadata(ds).get('species',
                                           {}).get('scientificName', ''),
                    'Threshold':
                    "{}({})".format(threshold.get('label', ''),
                                    str(threshold.get('value', '')))
                }

        key = 'projections'
        if key in job_params:
            for pds in job_params[key]:
                threshold = pds.get('threshold', {})
                dsbrain = uuidToCatalogBrain(pds.get('dataset'))
                if dsbrain:
                    ds = dsbrain.getObject()
                    # get the source experiment
                    source_exp = ds.__parent__
                    while not IExperiment.providedBy(source_exp):
                        source_exp = source_exp.__parent__
                    self.md['Input datasets:'][key] = {
                        'Source experiment':
                        source_exp.title,
                        'Title':
                        ds.title,
                        'Description':
                        ds.description,
                        'Download URL':
                        '{}/@@download/file/{}'.format(
                            ds.absolute_url(),
                            os.path.basename(ds.absolute_url())),
                        'Algorithm':
                        ds.__parent__.job_params.get('function', ''),
                        'Species':
                        IBCCVLMetadata(ds).get('species',
                                               {}).get('scientificName', ''),
                        'Threshold':
                        "{}({})".format(threshold.get('label', ''),
                                        str(threshold.get('value', ''))),
                        'Biodiverse Cell size (m)':
                        str(job_params.get('cluster_size', ''))
                    }

        # Projection experiment does not have algorithm as input
        if not IProjectionExperiment.providedBy(self.context.__parent__):
            for key in ['function', 'algorithm']:
                if key in job_params:
                    self.md['Algorithm settings:'] = {
                        'Algorithm Name':
                        job_params[key],
                        'Configuration options':
                        self.__algoConfigOption(job_params[key], job_params)
                    }

        # Construct the text
        mdtext = StringIO.StringIO()
        for heading in [
                'BCCVL model outputs guide', 'System specifications',
                'Model specifications', 'Input datasets:',
                'Algorithm settings:', 'Model outputs:'
        ]:
            mdtext.write(self.__getMetadataText(heading, self.md))
        return mdtext.getvalue()
Exemplo n.º 2
0
    def export_to_ala(self):
        uuid = self.request.form.get('uuid', None)
        try:
            if uuid:
                brain = uuidToCatalogBrain(uuid)
                if brain is None:
                    raise Exception("Brain not found")

                obj = brain.getObject()
            else:
                obj = self.context

            # get username
            member = ploneapi.user.get_current()
            if member.getId():
                user = {
                    'id': member.getUserName(),
                    'email': member.getProperty('email'),
                    'fullname': member.getProperty('fullname')
                }
            else:
                raise Exception("Invalid user")

            # verify dataset
            if obj.portal_type not in (
                    'org.bccvl.content.dataset',
                    'org.bccvl.content.remotedataset',
                    'org.bccvl.content.multispeciesdataset'):
                raise Exception("Invalid UUID (content type)")
            md = IBCCVLMetadata(obj)
            if md.get('genre') not in ('DataGenreSpeciesOccurrence',
                                       'DataGenreSpeciesCollection',
                                       'DataGenreTraits'):
                raise Exception("Invalid UUID (data type)")
            # get download url
            dlinfo = IDownloadInfo(obj)

            # download file
            from org.bccvl import movelib
            from org.bccvl.movelib.utils import build_source, build_destination
            import tempfile
            destdir = tempfile.mkdtemp(prefix='export_to_ala')
            try:
                from org.bccvl.tasks.celery import app
                settings = app.conf.get('bccvl', {})
                dest = os.path.join(destdir, os.path.basename(dlinfo['url']))
                movelib.move(build_source(dlinfo['url'], user['id'], settings),
                             build_destination('file://{}'.format(dest)))

                csvfile = None

                if dlinfo['contenttype'] == 'application/zip':
                    # loox at 'layers' to find file within zip
                    arc = md['layers'].keys()[0]

                    import zipfile
                    zf = zipfile.ZipFile(dest, 'r')
                    csvfile = zf.open(arc, 'r')
                else:
                    csvfile = open(dest, 'rb')

                import requests
                # "Accept:application/json" "Origin:http://example.com"
                res = requests.post(settings['ala']['sandboxurl'],
                                    files={'file': csvfile},
                                    headers={
                                        'apikey': settings['ala']['apikey'],
                                        'Accept': 'application/json'
                                    })
                if res.status_code != 200:
                    self.record_error(res.reason, res.status_code)
                    raise Exception('Upload failed')
                retval = res.json()
                # TODO: do error checking
                #  keys: sandboxUrl, fileName, message, error: Bool, fileId
                return retval
            finally:
                import shutil
                shutil.rmtree(destdir)

        except Exception as e:
            self.record_error(str(e), 500)
            raise
Exemplo n.º 3
0
    def demosdm(self):
        lsid = self.request.form.get('lsid')
        # Run SDM on a species given by lsid (from ALA), followed by a Climate
        # Change projection.
        if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST':
            raise BadRequest('Request must be POST')
        # Swift params
        swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings)

        # get parameters
        if not lsid:
            raise BadRequest('Required parameter lsid missing')
        # we have an lsid,.... we can't really verify but at least some
        #                      data is here
        # find rest of parameters
        # FIXME: hardcoded path to environmental datasets

        # Get the future climate for climate change projection
        portal = ploneapi.portal.get()
        dspath = '/'.join([defaults.DATASETS_FOLDER_ID,
                           defaults.DATASETS_CLIMATE_FOLDER_ID,
                           'australia', 'australia_1km',
                           'RCP85_ukmo-hadgem1_2085.zip'])
        ds = portal.restrictedTraverse(dspath)
        dsuuid = IUUID(ds)
        dlinfo = IDownloadInfo(ds)
        dsmd = IBCCVLMetadata(ds)
        futureclimatelist = []
        for layer in ('B05', 'B06', 'B13', 'B14'):
            futureclimatelist.append({
                'uuid': dsuuid,
                'filename': dlinfo['filename'],
                'downloadurl': dlinfo['url'],
                'layer': layer,
                'type': dsmd['layers'][layer]['datatype'],
                'zippath': dsmd['layers'][layer]['filename']
            })
        # Climate change projection name
        cc_projection_name = os.path.splitext(dlinfo['filename'])[0]

        # Get the current climate for SDM
        dspath = '/'.join([defaults.DATASETS_FOLDER_ID,
                           defaults.DATASETS_CLIMATE_FOLDER_ID,
                           'australia', 'australia_1km',
                           'current.76to05.zip'])
        ds = portal.restrictedTraverse(dspath)
        dsuuid = IUUID(ds)
        dlinfo = IDownloadInfo(ds)
        dsmd = IBCCVLMetadata(ds)
        envlist = []
        for layer in ('B05', 'B06', 'B13', 'B14'):
            envlist.append({
                'uuid': dsuuid,
                'filename': dlinfo['filename'],
                'downloadurl': dlinfo['url'],
                'layer': layer,
                'type': dsmd['layers'][layer]['datatype'],
                'zippath': dsmd['layers'][layer]['filename']
            })

        # FIXME: we don't use a IJobTracker here for now
        # get toolkit and
        func = portal[defaults.TOOLKITS_FOLDER_ID]['demosdm']
        # build job_params:
        job_params = {
            'resolution': IBCCVLMetadata(ds)['resolution'],
            'function': func.getId(),
            'species_occurrence_dataset': {
                'uuid': 'ala_occurrence_dataset',
                'species': u'demoSDM',
                'downloadurl': 'ala://ala?lsid={}'.format(lsid),
            },
            'environmental_datasets': envlist,
            'future_climate_datasets': futureclimatelist,
            'cc_projection_name': cc_projection_name
        }
        # add toolkit parameters: (all default values)
        # get toolkit schema
        schema = loadString(func.schema).schema
        for name, field in getFields(schema).items():
            if field.default is not None:
                job_params[name] = field.default
        # add other default parameters
        job_params.update({
            'rescale_all_models': False,
            'selected_models': 'all',
            'modeling_id': 'bccvl',
        })
        # generate script to run
        script = u'\n'.join([
            resource_string('org.bccvl.compute', 'rscripts/bccvl.R'),
            resource_string('org.bccvl.compute', 'rscripts/eval.R'),
            func.script])
        # where to store results.
        result = {
            'results_dir': 'swift+{}/wordpress/{}/'.format(swiftsettings.storage_url, urllib.quote_plus(lsid)),
            'outputs': json.loads(func.output)
        }
        # worker hints:
        worker = {
            'script': {
                'name': '{}.R'.format(func.getId()),
                'script': script
            },
            'files': (
                'species_occurrence_dataset',
                'environmental_datasets',
                'future_climate_datasets'
            )
        }
        # put everything together
        jobdesc = {
            'env': {},
            'params': job_params,
            'worker': worker,
            'result': result,
        }

        # create job
        jobtool = getUtility(IJobUtility)
        job = jobtool.new_job(
            lsid=lsid,
            toolkit=IUUID(func),
            function=func.getId(),
            type='demosdm'
        )
        # create job context object
        member = ploneapi.user.get_current()
        context = {
            # we use the site object as context
            'context': '/'.join(portal.getPhysicalPath()),
            'jobid': job.id,
            'user': {
                'id': member.getUserName(),
                'email': member.getProperty('email'),
                'fullname': member.getProperty('fullname')
            },
        }

        # all set to go build task chain now
        from org.bccvl.tasks.compute import demo_task
        from org.bccvl.tasks.plone import after_commit_task, HIGH_PRIORITY
        after_commit_task(demo_task, HIGH_PRIORITY, jobdesc, context)
        # let's hope everything works, return result

        # We don't create an experiment object, so we don't count stats here
        # let's do it manually
        getUtility(IStatsUtility).count_experiment(
            user=member.getId(),
            portal_type='demosdm',
        )

        return {
            'state': os.path.join(result['results_dir'], 'state.json'),
            'result': os.path.join(result['results_dir'], 'proj_metadata.json'),
            'jobid': job.id
        }
Exemplo n.º 4
0
    def export_result(self, serviceid):
        # self.context should be a result
        if not hasattr(self.context, 'job_params'):
            raise NotFound(self.context, self.context.title, self.request)
        # TODO: validate serviceid

        # start export job
        context_path = '/'.join(self.context.getPhysicalPath())
        member = api.user.get_current()

        # collect list of files to export:
        urllist = []
        for content in self.context.values():
            if content.portal_type not in ('org.bccvl.content.dataset',
                                           'org.bccvl.content.remotedataset'):
                # skip non datasets
                continue
            dlinfo = IDownloadInfo(content)
            urllist.append(dlinfo['url'])
        # add mets.xml
        urllist.append('{}/mets.xml'.format(self.context.absolute_url()))
        # add prov.ttl
        urllist.append('{}/prov.ttl'.format(self.context.absolute_url()))
        # add experiment metadata
        urllist.append('{}/expmetadata.txt'.format(
            self.context.absolute_url()))

        from org.bccvl.tasks.celery import app
        from org.bccvl.tasks.plone import after_commit_task
        # FIXME: Do mapping from serviceid to service type? based on interface
        #        background task will need serviceid and type, but it may resolve
        #        servicetype via API with serviceid
        export_task = app.signature(
            "org.bccvl.tasks.export_services.export_result",
            kwargs={
                'siteurl': api.portal.get().absolute_url(),
                'fileurls': urllist,
                'serviceid': serviceid,
                'context': {
                    'context': context_path,
                    'user': {
                        'id': member.getUserName(),
                        'email': member.getProperty('email'),
                        'fullname': member.getProperty('fullname')
                    }
                }
            },
            immutable=True)

        # queue job submission
        after_commit_task(export_task)

        # self.new_job('TODO: generate id', 'generate taskname: export_result')
        # self.set_progress('PENDING', u'Result export pending')

        status = 'info'
        message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format(
            self.context.title)

        IStatusMessage(self.request).add(message, type=status)
        nexturl = self.request.get('HTTP-REFERER')
        if not nexturl:
            # this method should only be called on a result folder
            # we should be able to safely redirect back to the pacent
            # experiment
            nexturl = self.context.__parent__.absolute_url()
        self.request.response.redirect(nexturl, 307)
        return (status, message)
Exemplo n.º 5
0
def CatalogBrainDownloadInfo(brain):
    context = brain.getObject()
    return IDownloadInfo(context)
Exemplo n.º 6
0
    def add(self, object):
        # FIXME: this is a workaround, which is fine for small uploaded files.
        #        large uploads should go through another process anyway
        # TODO: re implementing this method is the only way to know
        #       the full path of the object. We need the path to apply
        #       the transmogrifier chain.
        # fti = getUtility(IDexterityFTI, name=self.portal_type)
        container = aq_inner(self.context)
        try:
            # traverse to subfolder if possible
            container = container.restrictedTraverse('/'.join(self.subpath))
        except Exception as e:
            LOG.warn('Could not traverse to %s/%s',
                     '/'.join(container.getPhysicalPath()),
                     '/'.join(self.subpath))
        new_object = addContentToContainer(container, object)
        # set data genre:
        if self.datagenre:
            IBCCVLMetadata(new_object)['genre'] = self.datagenre
        if self.categories:
            IBCCVLMetadata(new_object)['categories'] = self.categories

        new_object.subject = []
        if self.domain:
            new_object.subject = [self.domain]
        if self.timeperiod:
            new_object.subject += self.timeperiod

            # rdf commit should happens in transmogrifier step later on
        # if fti.immediate_view:
        #     self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,)
        # else:
        #     self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id)
        # start background import process (just a metadata update)

        # run transmogrify md extraction here
        context_path = '/'.join(new_object.getPhysicalPath())
        member = api.user.get_current()
        # species extract task
        if IMultiSpeciesDataset.providedBy(new_object):
            # kick off csv split import tasks
            import_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.import_multi_species_csv",
                kwargs={
                    'url':
                    '{}/@@download/file/{}'.format(new_object.absolute_url(),
                                                   new_object.file.filename),
                    'results_dir':
                    get_results_dir(new_object,
                                    self.request,
                                    childSpecies=True),
                    'import_context': {
                        'context': '/'.join(container.getPhysicalPath()),
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    },
                    'context': {
                        'context': context_path,
                        'genre': self.datagenre,
                        'dataSource': new_object.dataSource,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            after_commit_task(import_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            jt.new_job('TODO: generate id',
                       'generate taskname: import_multi_species_csv',
                       function=new_object.dataSource,
                       type=new_object.portal_type)
            jt.set_progress('PENDING', u'Multi species import pending')
        else:
            if hasattr(self, '_upload'):
                file = self._upload['file']
                new_object.format = file.contentType
                uid = IUUID(new_object)
                swiftsettings = getUtility(IRegistry).forInterface(
                    ISwiftSettings)
                import os.path
                swift_url = '{storage_url}/{container}/{path}/{name}'.format(
                    storage_url=swiftsettings.storage_url,
                    container=swiftsettings.result_container,
                    path=uid,
                    name=os.path.basename(file.filename))
                new_object.remoteUrl = swift_url
            else:
                file = new_object.file
                new_object.format = file.contentType

            dlinfo = IDownloadInfo(new_object)

            # single species upload
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': dlinfo['url'],
                    'filename': dlinfo['filename'],
                    'contenttype': dlinfo['contenttype'],
                    'context': {
                        'context': context_path,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            # create upload task in case we upload to external store
            if hasattr(self, '_upload'):
                # FIXME: we can't use ssh here.... we don't know which container we are in... and
                #        sshing here is bad as well....
                # There is an upload ... we have to make sure the uploaded data ends up in external storage
                # 3. put temp file aside
                tmpdir = tempfile.mkdtemp(prefix='bccvl_upload')
                tmpfile = os.path.join(tmpdir, os.path.basename(file.filename))
                blobf = file.open()
                try:
                    # try rename
                    os.rename(blobf.name, tmpfile)
                except OSError:
                    # try copy
                    shutil.copy(blobf.name, tmpfile)

                # TODO: we push the uploaded file directly to swift here..
                #       this really should be a background process
                #       best solution: ...
                #           user uploads to some temporary upload service (file never ends up here)
                #           we have a remote url here, and tell the datamover to pull it from there
                #           and move it to final destination. (or something like this)
                #       other good way: ...
                #           let user upload directly to swift (what about large file uploads?)
                #           and take care of clean up if necessary

                # 4. move file to swift
                # TODO: do we have enough information to upload to swift?
                #       need a temp url?
                swiftopts = app.conf.get('bccvl', {}).get('swift', {})
                src_url = build_source('file://{}'.format(tmpfile))
                dest_url = build_destination(
                    'swift+{}'.format(new_object.remoteUrl),
                    settings={
                        'swift': {
                            'os_auth_url': swiftopts.get('os_auth_url'),
                            'os_username': swiftopts.get('os_username'),
                            'os_password': swiftopts.get('os_password'),
                            'os_tenant_name': swiftopts.get('os_tenant_name'),
                            'os_storage_url': swiftopts.get('os_storage_url')
                        }
                    })

                try:
                    movelib.move(src_url, dest_url)
                except Exception as e:
                    # do error handling here
                    raise
                finally:
                    # clean up temp location
                    path = os.path.dirname(tmpfile)
                    shutil.rmtree(path)

            # queue job submission
            after_commit_task(update_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            jt.new_job('TODO: generate id',
                       'generate taskname: update_metadata',
                       function=new_object.dataSource,
                       type=new_object.portal_type)
            jt.set_progress('PENDING', u'Metadata update pending')

        # We have to reindex after updating the object
        new_object.reindexObject()
 def get_download_info(self, item=None):
     if item is None:
         item = self.context
     return IDownloadInfo(item)