def execute(result, toolkit): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ try: OUTPUTS = json.loads(toolkit.output) except (ValueError, TypeError) as e: LOG.fatal("couldn't load OUTPUT from toolkit %s: %s", toolkit.getId(), e) OUTPUTS = {} params = get_traits_params(result) script = generate_traits_script(toolkit.script) # plone context for this job member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } # TODO: quick fix Decimal json encoding through celery (where is my custom # json encoder gone?) for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # add result infos params['result'] = { 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': OUTPUTS } params['worker']['script'] = { 'name': '{}.R'.format(toolkit.getId()), 'script': script } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() after_commit_task(r_task, params, context)
def execute(result, toolkit, priority=HIGH_PRIORITY): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ try: OUTPUTS = json.loads(toolkit.output) except (ValueError, TypeError) as e: LOG.fatal("couldn't load OUTPUT from toolkit %s: %s", toolkit.getId(), e) OUTPUTS = {} params = get_traits_params(result) script = generate_traits_script(toolkit.script) # plone context for this job member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } # TODO: quick fix Decimal json encoding through celery (where is my custom # json encoder gone?) for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # add result infos params['result'] = { 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': OUTPUTS } params['worker']['script'] = { 'name': '{}.R'.format(toolkit.getId()), 'script': script } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() after_commit_task(r_task, priority, params, context)
def execute_sdm(result, toolkit): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ try: OUTPUTS = json.loads(toolkit.output) except (ValueError, TypeError) as e: LOG.fatal("couldn't load OUTPUT from toolkit %s: %s", toolkit.getId(), e) OUTPUTS = {} params = get_toolkit_params(result) script = generate_sdm_script(toolkit.script) ###### generate plone context infos member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } ##### complete job infos params['result'] = { # FIXME: not optimal to access request this way # rather pass in as parameter 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': OUTPUTS } params['worker']['script'] = { 'name': '{}.R'.format(toolkit.getId()), 'script': script } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() ### send job to queue # TODO: define job chain here (and in other methods as well) after_commit_task(r_task, params, context)
def execute_sdm(result, toolkit, priority=HIGH_PRIORITY): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ try: OUTPUTS = json.loads(toolkit.output) except (ValueError, TypeError) as e: LOG.fatal("couldn't load OUTPUT from toolkit %s: %s", toolkit.getId(), e) OUTPUTS = {} params = get_toolkit_params(result) script = generate_sdm_script(toolkit.script) ###### generate plone context infos member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } ##### complete job infos params['result'] = { # FIXME: not optimal to access request this way # rather pass in as parameter 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': OUTPUTS } params['worker']['script'] = { 'name': '{}.R'.format(toolkit.getId()), 'script': script } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() ### send job to queue # TODO: define job chain here (and in other methods as well) after_commit_task(r_task, priority, params, context)
def execute(result, toolkit, priority=HIGH_PRIORITY): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ # FIXME: ensemble is not yet a content based toolkit # try: # OUTPUTS = json.loads(toolkit.output) # except (ValueError, TypeError) as e: # LOG.fatal("couldn't load OUTPUT form toolkit %s: %s", # toolkit.getId(), e) # OUTPUTS = {} params = get_ensemble_params(result) script = generate_ensemble_script() member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': {'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': {'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } params['result'] = { 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': OUTPUTS } params['worker']['script'] = { 'name': 'ensemble.R', 'script': script, } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() ### send job to queue after_commit_task(r_task, priority, params, context)
def execute(result, func, priority=HIGH_PRIORITY): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ params = get_project_params(result) script = generate_project_script() ### plone context for this job member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': {'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': {'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } ### add result infos params['result'] = { 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': get_output(result.job_params['function']) } params['worker']['script'] = { 'name': 'projection.R', 'script': script } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() after_commit_task(r_task, priority, params, context)
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories new_object.subject = [] if self.domain: new_object.subject = [self.domain] if self.timeperiod: new_object.subject += self.timeperiod # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(new_object, self.request, childSpecies=True), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'genre': self.datagenre, 'dataSource': new_object.dataSource, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility(IRegistry).forInterface( ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # FIXME: we can't use ssh here.... we don't know which container we are in... and # sshing here is bad as well.... # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # TODO: we push the uploaded file directly to swift here.. # this really should be a background process # best solution: ... # user uploads to some temporary upload service (file never ends up here) # we have a remote url here, and tell the datamover to pull it from there # and move it to final destination. (or something like this) # other good way: ... # let user upload directly to swift (what about large file uploads?) # and take care of clean up if necessary # 4. move file to swift # TODO: do we have enough information to upload to swift? # need a temp url? swiftopts = app.conf.get('bccvl', {}).get('swift', {}) src_url = build_source('file://{}'.format(tmpfile)) dest_url = build_destination( 'swift+{}'.format(new_object.remoteUrl), settings={ 'swift': { 'os_auth_url': swiftopts.get('os_auth_url'), 'os_username': swiftopts.get('os_username'), 'os_password': swiftopts.get('os_password'), 'os_project_name': swiftopts.get('os_project_name'), 'os_storage_url': swiftopts.get('os_storage_url'), 'os_user_domain_name': swiftopts.get('os_user_domain_name'), 'os_project_domain_name': swiftopts.get('os_project_domain_name'), 'auth_version': swiftopts.get('auth_version') } }) try: movelib.move(src_url, dest_url) except Exception as e: # do error handling here raise finally: # clean up temp location path = os.path.dirname(tmpfile) shutil.rmtree(path) # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(container, self.request), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) job = jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv') job.type = new_object.portal_type jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility( IRegistry).forInterface(ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # 4. update task chain src_url = 'scp://{uid}@{ip}:{port}{file}'.format( uid=pwd.getpwuid(os.getuid()).pw_name, ip=get_hostname(self.request), port=os.environ.get('SSH_PORT', 22), file=tmpfile) dest_url = 'swift+{}'.format(new_object.remoteUrl) move_task = app.signature( 'org.bccvl.tasks.datamover.tasks.move', kwargs={ 'move_args': [(src_url, dest_url)], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) cleanup_task = app.signature( 'org.bccvl.tasks.plone.import_cleanup', kwargs={ 'path': os.path.dirname(tmpfile), 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) update_task = move_task | update_task | cleanup_task # need some more workflow states here to support e.g. zip file upload (multiple rasters), # give user a chance to better define metadata # make sure update_metadata does not change user edited metadata # -> layer, unit, projection, whatever # FIXME: clean up tmp upload directory as well # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) job = jt.new_job('TODO: generate id', 'generate taskname: update_metadata') job.type = new_object.portal_type jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories new_object.subject = [] if self.domain: new_object.subject = [self.domain] if self.timeperiod: new_object.subject += self.timeperiod # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(new_object, self.request, childSpecies=True), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'genre': self.datagenre, 'dataSource': new_object.dataSource, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility( IRegistry).forInterface(ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # FIXME: we can't use ssh here.... we don't know which container we are in... and # sshing here is bad as well.... # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # TODO: we push the uploaded file directly to swift here.. # this really should be a background process # best solution: ... # user uploads to some temporary upload service (file never ends up here) # we have a remote url here, and tell the datamover to pull it from there # and move it to final destination. (or something like this) # other good way: ... # let user upload directly to swift (what about large file uploads?) # and take care of clean up if necessary # 4. move file to swift # TODO: do we have enough information to upload to swift? # need a temp url? swiftopts = app.conf.get('bccvl', {}).get('swift', {}) src_url = build_source('file://{}'.format(tmpfile)) dest_url = build_destination('swift+{}'.format(new_object.remoteUrl), settings={'swift': { 'os_auth_url': swiftopts.get('os_auth_url'), 'os_username': swiftopts.get('os_username'), 'os_password': swiftopts.get('os_password'), 'os_tenant_name': swiftopts.get('os_tenant_name'), 'os_storage_url': swiftopts.get('os_storage_url') }} ) try: movelib.move(src_url, dest_url) except Exception as e: # do error handling here raise finally: # clean up temp location path = os.path.dirname(tmpfile) shutil.rmtree(path) # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()