def get_results_dir(result, request): swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) # swift only if it is remote dataset. For blob and multi-species dataset, store locally. # For other dataset type, store at swift if possible. do_swift = IRemoteDataset.providedBy(result) or \ (not IMultiSpeciesDataset.providedBy(result) and \ not IBlobDataset.providedBy(result) and \ swiftsettings.storage_url) if do_swift: if swiftsettings.storage_url: results_dir = 'swift+{storage_url}/{container}/{path}/'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=IUUID(result) ) else: raise Exception("Remote dataset requires swift url to be set") else: # if swift is not setup we use local storage results_dir = 'scp://{uid}@{ip}:{port}{path}/'.format( uid=pwd.getpwuid(os.getuid()).pw_name, # FIXME: hostname from request is not good enough... # need to get ip or host from plone_worker that does actual # import # store in registry? # (is ok for testing) # ip=get_public_ip(), ip=get_hostname(request), port=os.environ.get('SSH_PORT', 22), path=tempfile.mkdtemp(prefix='result_import_') ) return results_dir
def get_results_dir(result, request, childSpecies=False): swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) # swift only if it is remote dataset. For blob and multi-species dataset, store locally. # For other dataset type (including the child species of multispecies), store at swift if possible. do_swift = IRemoteDataset.providedBy(result) or \ ((childSpecies or (not IMultiSpeciesDataset.providedBy(result))) and \ not IBlobDataset.providedBy(result) and \ swiftsettings.storage_url) if do_swift: if swiftsettings.storage_url: results_dir = 'swift+{storage_url}/{container}/{path}/'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=IUUID(result)) else: raise Exception("Remote dataset requires swift url to be set") else: # if swift is not setup we use local storage results_dir = 'scp://{uid}@{ip}:{port}{path}/'.format( uid=pwd.getpwuid(os.getuid()).pw_name, # FIXME: hostname from request is not good enough... # need to get ip or host from plone_worker that does actual # import # store in registry? # (is ok for testing) # ip=get_public_ip(), ip=get_hostname(request), port=os.environ.get('SSH_PORT', 22), path=tempfile.mkdtemp(prefix='result_import_')) return results_dir
def getGenreSchemata(self): schemata = [] md = IBCCVLMetadata(self.context) genre = md.get('genre') if genre in self.genre_interface_map: schemata.append(self.genre_interface_map[genre]) if IBlobDataset.providedBy(self.context): schemata.append(IBlobDataset) if IRemoteDataset.providedBy(self.context): schemata.append(IRemoteDataset) return schemata
def __call__(self, **kw): jt = IJobTracker(self.context) # TODO: if state is empty check if there is a downloadable file # Yes: COMPLETED # No: FAILED state = jt.state if not state: if IBlobDataset.providedBy(self.context): # we have no state, may happen for imported datasets, # let's check if we have a file if self.context.file is not None: state = 'COMPLETED' else: state = 'FAILED' elif IRemoteDataset.providedBy(self.context): if self.context.remoteUrl: state = 'COMPLETED' else: state = 'FAILED' return state
def _download_results(self, context, zfile): # FIXME: This is a rather lengthy process, and should probably be turned into a background task... (maybe as part of a datamanager service?) # 1. find all IBlobDataset/ IRemotedDataset/ IDataset objects within context pc = getToolByName(context, 'portal_catalog') brains = pc.searchResults(path='/'.join(context.getPhysicalPath()), object_provides=[ IBlobDataset.__identifier__, IRemoteDataset.__identifier__ ]) metadata = {} # the file/folder name for the zip zfilename = context.title # iterate over files and add to zip for brain in brains: content = brain.getObject() if IBlobDataset.providedBy(content): # If data is stored locally: arcname = '/'.join((zfilename, 'data', content.file.filename)) # ob.file should be a NamedFile ... need to get fs name for that blobfile = content.file.openDetached() zfile.write(blobfile.name, arcname) blobfile.close() elif IRemoteDataset.providedBy(content): # TODO: duplicate code from remoteUrl = getattr(content, 'remoteUrl', None) if remoteUrl is None: raise NotFound(self, 'remoteUrl', self.request) # get arcname from remoteUrl arcname = '/'.join( (zfilename, 'data', os.path.basename(remoteUrl))) # FIXME: should check dataset downloaiable flag here, # but assumption is, that this function can only be called on an experiment result folder.... # TODO: duplicate code in browser/dataset.py:RemoteDatasetDownload.__call__ # TODO: may not work in general... it always uses swift as remote url tool = getUtility(ISwiftUtility) try: url = tool.generate_temp_url(url=remoteUrl) except: url = remoteUrl # url is now the location from which we can fetch the file temp_file, _ = urlretrieve(url) zfile.write(temp_file, arcname) os.remove(temp_file) else: # unknown type of Dataset # just skip it # TODO: Log warning or debug? continue metadata[arcname] = getdsmetadata(content) # all files are in .... # TODO: add experiment result metadata # put metadata into zip # provenance data stored on result container provdata = IProvenanceData(context) if not provdata.data is None: zfile.writestr('/'.join((zfilename, 'prov.ttl')), provdata.data.encode('utf-8')) # add experiment metadata expmetadata = IExperimentMetadata(context) if not expmetadata.data is None: zfile.writestr('/'.join((zfilename, 'expmetadata.txt')), expmetadata.data.encode('utf-8')) # add mets.xml metsview = getMultiAdapter((context, self.request), name="mets.xml") zfile.writestr('/'.join((zfilename, 'mets.xml')), metsview.render().encode('utf-8')) # add experiment parameters params = IExperimentParameter(context) if not params.data is None: zfile.writestr('/'.join((zfilename, 'params.json')), params.data.encode('utf-8'))
def _download_results(self, context, zfile): # FIXME: This is a rather lengthy process, and should probably be turned into a background task... (maybe as part of a datamanager service?) # 1. find all IBlobDataset/ IRemotedDataset/ IDataset objects within context pc = getToolByName(context, 'portal_catalog') brains = pc.searchResults(path='/'.join(context.getPhysicalPath()), object_provides=[IBlobDataset.__identifier__, IRemoteDataset.__identifier__]) metadata = {} # the file/folder name for the zip zfilename = context.title # iterate over files and add to zip for brain in brains: content = brain.getObject() if IBlobDataset.providedBy(content): # If data is stored locally: arcname = '/'.join((zfilename, 'data', content.file.filename)) # ob.file should be a NamedFile ... need to get fs name for that blobfile = content.file.openDetached() zfile.write(blobfile.name, arcname) blobfile.close() elif IRemoteDataset.providedBy(content): # TODO: duplicate code from remoteUrl = getattr(content, 'remoteUrl', None) if remoteUrl is None: raise NotFound(self, 'remoteUrl', self.request) # get arcname from remoteUrl arcname = '/'.join((zfilename, 'data', os.path.basename(remoteUrl))) # FIXME: should check dataset downloaiable flag here, # but assumption is, that this function can only be called on an experiment result folder.... # TODO: duplicate code in browser/dataset.py:RemoteDatasetDownload.__call__ # TODO: may not work in general... it always uses swift as remote url tool = getUtility(ISwiftUtility) try: url = tool.generate_temp_url(url=remoteUrl) except: url = remoteUrl # url is now the location from which we can fetch the file temp_file, _ = urlretrieve(url) zfile.write(temp_file, arcname) os.remove(temp_file) else: # unknown type of Dataset # just skip it # TODO: Log warning or debug? continue metadata[arcname] = getdsmetadata(content) # all files are in .... # TODO: add experiment result metadata # put metadata into zip # provenance data stored on result container provdata = IProvenanceData(context) if not provdata.data is None: zfile.writestr('/'.join((zfilename, 'prov.ttl')), provdata.data.encode('utf-8')) # add experiment metadata expmetadata = IExperimentMetadata(context) if not expmetadata.data is None: zfile.writestr('/'.join((zfilename, 'expmetadata.txt')), expmetadata.data.encode('utf-8')) # add mets.xml metsview = getMultiAdapter((context, self.request), name="mets.xml") zfile.writestr('/'.join((zfilename, 'mets.xml')), metsview.render().encode('utf-8')) # add experiment parameters params = IExperimentParameter(context) if not params.data is None: zfile.writestr('/'.join((zfilename, 'params.json')), params.data.encode('utf-8'))
def __call__(self): # FIXME: This is a rather lengthy process, and should probably be turned into a background task... (maybe as part of a datamanager service?) # 1. find all IBlobDataset/ IRemotedDataset/ IDataset objects within context pc = getToolByName(self.context, 'portal_catalog') brains = pc.searchResults(path='/'.join(self.context.getPhysicalPath()), object_provides=[IBlobDataset.__identifier__, IRemoteDataset.__identifier__]) fname = None try: # create tmp file fd, fname = tempfile.mkstemp() fo = os.fdopen(fd, 'w') zfile = zipfile.ZipFile(fo, 'w') metadata = {} # the file/folder name for the zip zfilename = self.context.title # iterate over files and add to zip for brain in brains: content = brain.getObject() if IBlobDataset.providedBy(content): # If data is stored locally: arcname = '/'.join((zfilename, 'data', content.file.filename)) # ob.file should be a NamedFile ... need to get fs name for that blobfile = content.file.openDetached() zfile.write(blobfile.name, arcname) blobfile.close() elif IRemoteDataset.providedBy(content): # TODO: duplicate code from remoteUrl = getattr(content, 'remoteUrl', None) if remoteUrl is None: raise NotFound(self, 'remoteUrl', self.request) # get arcname from remoteUrl arcname = '/'.join((zfilename, 'data', os.path.basename(remoteUrl))) # FIXME: should check dataset downloaiable flag here, # but assumption is, that this function can only be called on an experiment result folder.... # TODO: duplicate code in browser/dataset.py:RemoteDatasetDownload.__call__ # TODO: may not work in general... it always uses swift as remote url tool = getUtility(ISwiftUtility) try: url = tool.generate_temp_url(url=remoteUrl) except: url = remoteUrl # url is now the location from which we can fetch the file temp_file, _ = urlretrieve(url) zfile.write(temp_file, arcname) os.remove(temp_file) else: # unknown type of Dataset # just skip it # TODO: Log warning or debug? continue metadata[arcname] = getdsmetadata(content) # all files are in .... # TODO: add experiment result metadata # put metadata into zip # provenance data stored on result container provdata = IProvenanceData(self.context) if not provdata.data is None: zfile.writestr('/'.join((zfilename, 'prov.ttl')), provdata.data) # add mets.xml metsview = getMultiAdapter((self.context, self.request), name="mets.xml") zfile.writestr('/'.join((zfilename, 'mets.xml')), metsview.render()) # finish zip file zfile.close() fo.close() # create response self.request.response.setHeader('Content-Type', 'application/zip') self.request.response.setHeader('Content-Disposition', 'attachment; filename="{}.zip"'.format(zfilename)) self.request.response.setHeader('Content-Length', '{}'.format(os.path.getsize(fname))) return tmpfile_stream_iterator(fname) except Exception as e: # something went wrong ... # clean up and re-raise if os.path.exists(fname): os.remove(fname) raise e