def getByHash(self, algo, hash): self._validateAlgo(algo) model = FileModel() user = self.getCurrentUser() cursor = model.find({algo: hash}) return [file for file in cursor if model.hasAccess(file, user, AccessType.READ)]
def _checkUploadSize(self, upload): """ Check if an upload will fit within a quota restriction. :param upload: an upload document. :returns: None if the upload is allowed, otherwise a dictionary of information about the quota restriction. """ origSize = 0 if 'fileId' in upload: file = File().load(id=upload['fileId'], force=True) origSize = int(file.get('size', 0)) model, resource = self._getBaseResource('file', file) else: model, resource = self._getBaseResource(upload['parentType'], upload['parentId']) if resource is None: return None fileSizeQuota = self._getFileSizeQuota(model, resource) if not fileSizeQuota: return None newSize = resource['size'] + upload['size'] - origSize # always allow replacement with a smaller object if newSize <= fileSizeQuota or upload['size'] < origSize: return None left = fileSizeQuota - resource['size'] if left < 0: left = 0 return {'fileSizeQuota': fileSizeQuota, 'sizeNeeded': upload['size'] - origSize, 'quotaLeft': left, 'quotaUsed': resource['size']}
def _computeHash(file, progress=noProgress): """ Computes all supported checksums on a given file. Downloads the file data and stream-computes all required hashes on it, saving the results in the file document. In the case of assetstore impls that already compute the sha512, and when sha512 is the only supported algorithm, we will not download the file to the server. """ toCompute = SUPPORTED_ALGORITHMS - set(file) toCompute = {alg: getattr(hashlib, alg)() for alg in toCompute} if not toCompute: return fileModel = FileModel() with fileModel.open(file) as fh: while True: chunk = fh.read(_CHUNK_LEN) if not chunk: break for digest in six.viewvalues(toCompute): digest.update(chunk) progress.update(increment=len(chunk)) digests = {alg: digest.hexdigest() for alg, digest in six.viewitems(toCompute)} fileModel.update({'_id': file['_id']}, update={ '$set': digests }, multi=False) return digests
def _getFirstFileByHash(self, algo, hash, user=None): """ Return the first file that the user has access to given its hash and its associated hashsum algorithm name. :param algo: Algorithm the given hash is encoded with. :param hash: Hash of the file to find. :param user: User to test access against. Default (none) is the current user. :return: A file document. """ self._validateAlgo(algo) query = {algo: hash} fileModel = FileModel() cursor = fileModel.find(query) if not user: user = self.getCurrentUser() for file in cursor: if fileModel.hasAccess(file, user, AccessType.READ): return file return None
def testFilePath(self): """ Test that all files report a FUSE path, and that this results in the same file as the non-fuse path. """ from girder.plugins import fuse as girder_fuse files = list(File().find()) for file in files: adapter = File().getAssetstoreAdapter(file) filesystempath = adapter.fullPath(file) filepath = girder_fuse.getFilePath(file) fusepath = girder_fuse.getFuseFilePath(file) self.assertTrue(os.path.exists(filesystempath)) self.assertTrue(os.path.exists(filepath)) self.assertTrue(os.path.exists(fusepath)) self.assertEqual(filesystempath, filepath) self.assertNotEqual(filesystempath, fusepath) self.assertEqual(fusepath[:len(self.mainMountPath)], self.mainMountPath) with open(filepath) as file1: with open(fusepath) as file2: self.assertEqual(file1.read(), file2.read()) subpath = fusepath[len(self.mainMountPath):].lstrip('/') if self.knownPaths.get(subpath): with open(fusepath) as file1: self.assertEqual(file1.read().strip(), self.knownPaths[subpath])
def adjustDBUri(cls, uri, *args, **kwargs): """ Adjust a uri to match the form sqlite requires. This can convert a Girder resource path to an aprpopriate physical file reference. :param uri: the uri to adjust. :returns: the adjusted uri """ uri = super(SqliteSAConnector, cls).adjustDBUri(uri, *args, **kwargs) if '://' in uri: uri = uri.split('://', 1)[0] + ':////' + uri.split('://', 1)[1].lstrip('/') uri = super(SqliteSAConnector, cls).adjustDBUri(uri, *args, **kwargs) # If we have a Girder resource path, convert it. If this looks like a # file but doesn't exist, check if it is a resource path. If this is # not a resoruce path to a file that we can read directly, treat this # the same as a missing file. if (':///' in uri and not os.path.exists(uri.split(':///', 1)[1])): resourcepath = path_util.lookUpPath( uri.split(':///', 1)[1], test=True, filter=False, force=True) if resourcepath and resourcepath['model'] == 'file': file = resourcepath['document'] adapter = File().getAssetstoreAdapter(file) if hasattr(adapter, 'fullPath'): filepath = adapter.fullPath(file) if os.path.exists(filepath): uri = uri.split(':///', 1)[0] + ':///' + filepath log.debug('Using Girder file for SQLite database') return uri
def removeThumbnails(event): """ When a resource containing thumbnails is about to be deleted, we delete all of the thumbnails that are attached to it. """ thumbs = event.info.get('_thumbnails', ()) fileModel = File() for fileId in thumbs: file = fileModel.load(fileId, force=True) if file: fileModel.remove(file)
def girderInputSpec(resource, resourceType='file', name=None, token=None, dataType='string', dataFormat='text', fetchParent=False): """ Downstream plugins that are building Girder worker jobs that use Girder IO should use this to generate the input specs more easily. :param resource: The resource document to be downloaded at runtime. :type resource: dict :param resourceType: The resource type to download for the input. Should be "folder", "item", or "file". :type resourceType: str :param name: The name of the resource to download. If not passed, uses the "name" field of the resource document. :type name: str or None :param token: The Girder token document or raw token string to use to authenticate when downloading. Pass `None` for anonymous downloads. :type token: dict, str, or None :param dataType: The worker `type` field. :type dataType: str :param dataFormat: The worker `format` field. :type dataFormat: str :param fetchParent: Whether to fetch the whole parent resource of the specified resource as a side effect. :type fetchParent: bool """ if isinstance(token, dict): token = token['_id'] result = { 'mode': 'girder', 'api_url': getWorkerApiUrl(), 'token': token, 'id': str(resource['_id']), 'name': name or resource['name'], 'resource_type': resourceType, 'type': dataType, 'format': dataFormat, 'fetch_parent': fetchParent } if resourceType == 'file' and not fetchParent and Setting().get(PluginSettings.DIRECT_PATH): # If we are adding a file and it exists on the local filesystem include # that location. This can permit the user of the specification to # access the file directly instead of downloading the file. adapter = File().getAssetstoreAdapter(resource) if callable(getattr(adapter, 'fullPath', None)): result['direct_path'] = adapter.fullPath(resource) return result
def testAssetstoreOpen(self): for userAssetstore in (False, True): townItem, townFile, assetstore1 = self._createTownItem(userAssetstore=userAssetstore) adapter = File().getAssetstoreAdapter(townFile) handle = adapter.open(townFile) data = handle.read(200) self.assertEqual(data[:2], b'{"') data += handle.read(200) self.assertNotEqual(data[:200], data[200:]) handle.seek(100) data100 = handle.read(200) self.assertEqual(data100, data[100:300]) handle.seek(-100, os.SEEK_END) dataend = handle.read(200) self.assertEqual(len(dataend), 100) self.assertNotEqual(dataend, data[:100])
def __init__(self): super(HdfsAssetstoreResource, self).__init__() self.resourceName = 'hdfs_assetstore' self.route('PUT', (':id', 'import'), self.importData) self.folderModel = Folder() # Save to avoid many lookups self.itemModel = Item() self.fileModel = File()
def findInvalidFiles(self, progress=progress.noProgress, filters=None, checkSize=True, **kwargs): """ Goes through every file in this assetstore and finds those whose underlying data is missing or invalid. This is a generator function -- for each invalid file found, a dictionary is yielded to the caller that contains the file, its absolute path on disk, and a reason for invalid, e.g. "missing" or "size". :param progress: Pass a progress context to record progress. :type progress: :py:class:`girder.utility.progress.ProgressContext` :param filters: Additional query dictionary to restrict the search for files. There is no need to set the ``assetstoreId`` in the filters, since that is done automatically. :type filters: dict or None :param checkSize: Whether to make sure the size of the underlying data matches the size of the file. :type checkSize: bool """ filters = filters or {} q = dict({ 'assetstoreId': self.assetstore['_id'] }, **filters) cursor = File().find(q) progress.update(total=cursor.count(), current=0) for file in cursor: progress.update(increment=1, message=file['name']) path = self.fullPath(file) if not os.path.isfile(path): yield { 'reason': 'missing', 'file': file, 'path': path } elif checkSize and os.path.getsize(path) != file['size']: yield { 'reason': 'size', 'file': file, 'path': path }
def __init__(self, file): """ Create a file-like object representing a file blob stored in Girder. :param file: The file object being opened. :type file: dict """ super(_FileHandle, self).__init__() self.file = file self._handle = File().open(file)
def __init__(self): super().__init__() self.resourceName = 'label' self.coll_m = Collection() self.file_m = File() self.folder_m = Folder() self.item_m = Item() self.upload_m = Upload() self.asset_m = Assetstore() self.setupRoutes()
def deleteFile(self, file): """ We want to queue up files to be deleted asynchronously since it requires an external HTTP request per file in order to delete them, and we don't want to wait on that. Files that were imported as pre-existing data will not actually be deleted from S3, only their references in Girder will be deleted. """ if file['size'] > 0 and 'relpath' in file: q = { 'relpath': file['relpath'], 'assetstoreId': self.assetstore['_id'] } matching = File().find(q, limit=2, fields=[]) if matching.count(True) == 1: events.daemon.trigger(info={ 'client': self.client, 'bucket': self.assetstore['bucket'], 'key': file['s3Key'] }, callback=_deleteFileImpl)
class _FileHandle(paramiko.SFTPHandle): def __init__(self, file): """ Create a file-like object representing a file blob stored in Girder. :param file: The file object being opened. :type file: dict """ super(_FileHandle, self).__init__() self.file = file self._handle = File().open(file) def read(self, offset, length): if length > MAX_BUF_LEN: raise IOError( 'Requested chunk length (%d) is larger than the maximum allowed.' % length) if offset != self._handle.tell() and offset < self.file['size']: self._handle.seek(offset) return self._handle.read(length) def stat(self): return _stat(self.file, 'file') def close(self): self._handle.close() return paramiko.SFTP_OK
def deleteFile(self, file): """ Delete all of the chunks in the collection that correspond to the given file. """ q = { 'chunkUuid': file['chunkUuid'], 'assetstoreId': self.assetstore['_id'] } matching = File().find(q, limit=2, projection=[]) if matching.count(True) == 1: # If we can't reach the database, we return anyway. A system check # will be necessary to remove the abandoned file. Since we already # can handle that case, tell Mongo to use a 0 write concern -- we # don't need to know that the chunks have been deleted, and this # can be faster. try: self.chunkColl.with_options( write_concern=pymongo.WriteConcern(w=0)).delete_many( {'uuid': file['chunkUuid']}) except pymongo.errors.AutoReconnect: pass
def deleteFile(self, file): """ Deletes the file from disk if it is the only File in this assetstore with the given sha512. Imported files are not actually deleted. """ from girder.models.file import File if file.get('imported') or 'path' not in file: return q = { 'sha512': file['sha512'], 'assetstoreId': self.assetstore['_id'] } path = os.path.join(self.assetstore['root'], file['path']) if os.path.isfile(path): with filelock.FileLock(path + '.deleteLock'): matching = File().find(q, limit=2, fields=[]) matchingUpload = Upload().findOne(q) if matching.count(True) == 1 and matchingUpload is None: try: os.unlink(path) except Exception: logger.exception('Failed to delete file %s' % path)
def __init__(self): super(File, self).__init__() self._model = FileModel() self.resourceName = 'file' self.route('DELETE', (':id',), self.deleteFile) self.route('DELETE', ('upload', ':id'), self.cancelUpload) self.route('GET', ('offset',), self.requestOffset) self.route('GET', (':id',), self.getFile) self.route('GET', (':id', 'download'), self.download) self.route('GET', (':id', 'download', ':name'), self.downloadWithName) self.route('POST', (), self.initUpload) self.route('POST', ('chunk',), self.readChunk) self.route('POST', ('completion',), self.finalizeUpload) self.route('POST', (':id', 'copy'), self.copy) self.route('PUT', (':id',), self.updateFile) self.route('PUT', (':id', 'contents'), self.updateFileContents) self.route('PUT', (':id', 'move'), self.moveFileToAssetstore)
def testFilesystemAssetstoreFindInvalidFiles(self): # Create several files in the assetstore, some of which point to real # files on disk and some that don't folder = next(Folder().childFolders( parent=self.admin, parentType='user', force=True, limit=1)) item = Item().createItem('test', self.admin, folder) path = os.path.join( ROOT_DIR, 'tests', 'cases', 'py_client', 'testdata', 'hello.txt') real = File().createFile( name='hello.txt', creator=self.admin, item=item, assetstore=self.assetstore, size=os.path.getsize(path)) real['imported'] = True real['path'] = path File().save(real) fake = File().createFile( name='fake', creator=self.admin, item=item, size=1, assetstore=self.assetstore) fake['path'] = 'nonexistent/path/to/file' fake['sha512'] = '...' fake = File().save(fake) fakeImport = File().createFile( name='fakeImport', creator=self.admin, item=item, size=1, assetstore=self.assetstore) fakeImport['imported'] = True fakeImport['path'] = '/nonexistent/path/to/file' fakeImport = File().save(fakeImport) adapter = assetstore_utilities.getAssetstoreAdapter(self.assetstore) self.assertTrue(inspect.isgeneratorfunction(adapter.findInvalidFiles)) with ProgressContext(True, user=self.admin, title='test') as p: invalidFiles = list(adapter.findInvalidFiles(progress=p, filters={ 'imported': True })) self.assertEqual(len(invalidFiles), 1) self.assertEqual(invalidFiles[0]['reason'], 'missing') self.assertEqual(invalidFiles[0]['file']['_id'], fakeImport['_id']) self.assertEqual(p.progress['data']['current'], 2) self.assertEqual(p.progress['data']['total'], 2) invalidFiles = list(adapter.findInvalidFiles(progress=p)) self.assertEqual(len(invalidFiles), 2) for invalidFile in invalidFiles: self.assertEqual(invalidFile['reason'], 'missing') self.assertIn(invalidFile['file']['_id'], (fakeImport['_id'], fake['_id'])) self.assertEqual(p.progress['data']['current'], 3) self.assertEqual(p.progress['data']['total'], 3)
def _testEmptyUpload(self, name): """ Uploads an empty file to the server. """ resp = self.request( path='/file', method='POST', user=self.user, params={ 'parentType': 'folder', 'parentId': self.privateFolder['_id'], 'name': name, 'size': 0 }) self.assertStatusOk(resp) file = resp.json self.assertHasKeys(file, ['itemId']) self.assertEqual(file['size'], 0) self.assertEqual(file['name'], name) self.assertEqual(file['assetstoreId'], str(self.assetstore['_id'])) return File().load(file['_id'], force=True)
def _countCachedImages(self, spec, associatedImages=False): if spec is not None: try: spec = json.loads(spec) if not isinstance(spec, list): raise ValueError() except ValueError: raise RestException('The spec parameter must be a JSON list.') spec = [json.dumps(entry, sort_keys=True, separators=(',', ':')) for entry in spec] else: spec = [None] count = 0 for entry in spec: query = {'isLargeImageThumbnail': True, 'attachedToType': 'item'} if entry is not None: query['thumbnailKey'] = entry elif associatedImages: query['thumbnailKey'] = {'$regex': '"imageKey":'} count += File().find(query).count() return count
def ingestDataset(self, params): params = self._decodeParams(params) self.requireParams(['zipFileId', 'name', 'owner'], params) user = self.getCurrentUser() User().requireCreateDataset(user) zipFileId = params.get('zipFileId') if not zipFileId: raise ValidationException('No file was uploaded.', 'zipFileId') zipFile = File().load(zipFileId, user=user, level=AccessType.WRITE, exc=False) if not zipFile: raise ValidationException('Invalid upload file ID.', 'zipFileId') if not self._checkFileFormat(zipFile, ZIP_FORMATS): raise ValidationException('File must be in .zip format.', 'zipFileId') name = params['name'].strip() owner = params['owner'].strip() if not owner: raise ValidationException('Owner must be specified.', 'owner') description = params.get('description', '').strip() # Enforce valid licensing metadata only at API level licenseValue = params.get('license', '').strip() if licenseValue not in {'CC-0', 'CC-BY', 'CC-BY-NC'}: raise ValidationException('Unknown license type.', 'license') signature = params.get('signature', '').strip() if not signature: raise ValidationException('Signature must be specified.', 'signature') anonymous = self.boolParam('anonymous', params, False) attribution = params.get('attribution', '').strip() if not anonymous and not attribution: raise ValidationException( 'Attribution must be specified when not contributing anonymously.', 'attribution') # TODO: make this return only the dataset fields return Dataset().ingestDataset( zipFile=zipFile, user=user, name=name, owner=owner, description=description, license=licenseValue, signature=signature, anonymous=anonymous, attribution=attribution, sendMail=True)
def _uploadFile(self, path, name=None, private=False): """ Upload the specified path to the admin user's public or private folder and return the resulting item. :param path: path to upload. :param name: optional name for the file. :param private: True to upload to the private folder, False for public. 'user' for the user's private folder. :returns: file: the created file. """ if not name: name = os.path.basename(path) with open(path, 'rb') as file: data = file.read() if private == 'user': folderId = self.userPrivateFolder['_id'] elif private: folderId = self.privateFolder['_id'] else: folderId = self.publicFolder['_id'] resp = self.request( path='/file', method='POST', user=self.admin, params={ 'parentType': 'folder', 'parentId': folderId, 'name': name, 'size': len(data) }) self.assertStatusOk(resp) uploadId = resp.json['_id'] fields = [('offset', 0), ('uploadId', uploadId)] files = [('chunk', name, data)] resp = self.multipartRequest( path='/file/chunk', fields=fields, files=files, user=self.admin) self.assertStatusOk(resp) self.assertIn('itemId', resp.json) file = File().load(resp.json['_id'], user=self.admin, exc=True) item = Item().load(file['itemId'], user=self.admin, exc=True) return file, item
def downloadArchiveFile(self, file, path, offset, endByte, contentDisposition, extraParameters): """ Requires read permission on the folder that contains the file's item. """ with ArchiveFileHandle(File(), file, path) as fileobj: info = fileobj.info() rangeHeader = cherrypy.lib.httputil.get_ranges( cherrypy.request.headers.get('Range'), info['size']) # The HTTP Range header takes precedence over query params if rangeHeader and len(rangeHeader): # Currently we only support a single range. offset, endByte = rangeHeader[0] if offset: fileobj.seek(offset) else: offset = 0 if endByte is None or endByte > info['size']: endByte = info['size'] setResponseHeader('Accept-Ranges', 'bytes') setResponseHeader('Content-Type', 'application/octet-stream') setContentDisposition(os.path.basename(path), contentDisposition or 'attachment') setResponseHeader('Content-Length', max(endByte - offset, 0)) if (offset or endByte < file['size']) and file['size']: setResponseHeader( 'Content-Range', 'bytes %d-%d/%d' % (offset, endByte - 1, file['size'])) def downloadGenerator(): pos = offset while pos < endByte: data = fileobj.read(min(65536, endByte - pos)) yield data pos += len(data) if pos >= endByte: break return downloadGenerator
def createTiles(self, item, params): largeImageFileId = params.get('fileId') if largeImageFileId is None: files = list(Item().childFiles(item=item, limit=2)) if len(files) == 1: largeImageFileId = str(files[0]['_id']) if not largeImageFileId: raise RestException('Missing "fileId" parameter.') largeImageFile = File().load(largeImageFileId, force=True, exc=True) user = self.getCurrentUser() token = self.getCurrentToken() try: return self.imageItemModel.createImageItem(item, largeImageFile, user, token, notify=self.boolParam( 'notify', params, default=True)) except TileGeneralException as e: raise RestException(e.args[0])
def find_file(p_folder, name, user, assetstore, create=False): """ Find file by name. If not found create the file. :param p_folder: parent folder :param name: name of the file :param user: user trying to access this file :param assetstore: assetstore where this file exists :param create: boolean, whether or not to create the file if not found :return: file doc or None """ item = list(Item().find({'folderId': p_folder['_id'], 'name': name}).limit(1)) if not item: # check if you are allowed to create, else return nothing if create: file = create_new_file(p_folder, name, user, assetstore) else: return None else: item = item[0] file = list(File().find({'itemId': item['_id']}).limit(1))[0] return file
def create_new_file(p_folder, name, user, assetstore): """ create a new file :param p_folder: parent folder :param name: name of the file you want to create :param user: user trying to create this file :param assetstore: assetstore where this file is going to be created :return: file doc """ item = Item().createItem(name, creator=user, folder=p_folder, description='label file', reuseExisting=False) file = File().createFile(size=0, item=item, name=name, creator=user, assetstore=assetstore, mimeType="application/json") return file
def handler(path, girder_item, girder_file): if ".obj" not in girder_file['name'] and \ ".OBJ" not in girder_file['name']: raise CannotHandleError(girder_file['name'] + ' is not an OBJ file') try: jsonItem = Item().findOne({ 'name': girder_file['name'].replace("obj", "json"), 'folderId': girder_item['folderId'] }) jsonFile = [i for i in Item().childFiles(jsonItem, limit=1)][0] jsonContent = json.loads(''.join( list(File().download(jsonFile, headers=False)()))) projectionParams = jsonContent['scenes'][0]['coordinate_system'][ 'parameters'] ellps = projectionParams[0].upper() m = re.compile('(utm) zone ([0-9]+)N', re.IGNORECASE).match(projectionParams[1]) proj = m.group(1).lower() zone = m.group(2) sourceSrs = Proj(proj=proj, zone=zone, ellps=ellps).srs offset = [ projectionParams[2], projectionParams[3], projectionParams[4] ] bounds = getOBJBounds(path, offset) geoJsonBounds = from_bounds_to_geojson(bounds, sourceSrs) geometa = { 'crs': sourceSrs, 'nativeBounds': bounds, 'bounds': geoJsonBounds, 'type_': 'vector', 'driver': 'OBJ' } schema = OBJSchema() return schema.load(geometa) except Exception: raise CannotHandleError('Failed to add geometa to OBJ file' + girder_file['name'])
def _file_to_cjson(self, file, file_format): readers = { 'cjson': oc.CjsonReader } if file_format not in readers: raise Exception('Unknown file format %s' % file_format) reader = readers[file_format] with File().open(file) as f: calc_data = f.read().decode() # SpooledTemporaryFile doesn't implement next(), # workaround in case any reader needs it tempfile.SpooledTemporaryFile.__next__ = lambda self: self.__iter__().__next__() with tempfile.SpooledTemporaryFile(mode='w+', max_size=10*1024*1024) as tf: tf.write(calc_data) tf.seek(0) cjson = reader(tf).read() return cjson
def testFilePathNoLocalPath(self): """ Test that if an assetstore adapter doesn't respond to getLocalFilePath, we always get the fuse path. """ from girder.plugins import fuse as girder_fuse from girder.utility.filesystem_assetstore_adapter import FilesystemAssetstoreAdapter def getLocalFilePath(self, file): return super(FilesystemAssetstoreAdapter, self).getLocalFilePath(file) file = File().findOne() origGetLocalFilePath = FilesystemAssetstoreAdapter.getLocalFilePath FilesystemAssetstoreAdapter.getLocalFilePath = getLocalFilePath filepath = girder_fuse.getFilePath(file) fusepath = girder_fuse.getFuseFilePath(file) FilesystemAssetstoreAdapter.getLocalFilePath = origGetLocalFilePath self.assertTrue(os.path.exists(filepath)) self.assertTrue(os.path.exists(fusepath)) self.assertEqual(filepath, fusepath)
def testUserImportedFolders(self): user = User().findOne({'login': '******'}) self.assertDictContains( { 'firstName': 'User', 'lastName': 'Two', 'admin': False }, user, 'defaultFolders user') folder = Folder().findOne({'parentId': user['_id']}) self.assertDictContains({'name': 'folder'}, folder, 'imported folder') item = Item().findOne({'folderId': folder['_id']}) self.assertDictContains({'name': 'file.txt'}, item, 'imported item') file = File().findOne({'itemId': item['_id']}) self.assertDictContains( { 'name': 'file.txt', 'mimeType': 'text/plain', 'size': 5 }, file, 'imported file')
def _getLargeImagePath(self): # If self.mayHaveAdjacentFiles is True, we try to use the girder # mount where companion files appear next to each other. try: largeImageFileId = self.item['largeImage']['fileId'] if not hasattr(self, 'mayHaveAdjacentFiles'): # The item has adjacent files if there are any files that # are not the large image file or an original file it # was derived from. This is always the case if there are 3 # or more files. fileIds = [ str(file['_id']) for file in Item().childFiles(self.item, limit=3) ] knownIds = [str(largeImageFileId)] if 'originalId' in self.item['largeImage']: knownIds.append(str(self.item['largeImage']['originalId'])) self.mayHaveAdjacentFiles = (len(fileIds) >= 3 or fileIds[0] not in knownIds or fileIds[-1] not in knownIds) largeImageFile = File().load(largeImageFileId, force=True) if (any(ext in KnownExtensionsWithAdjacentFiles for ext in largeImageFile['exts']) or largeImageFile.get('mimeType') in KnownMimeTypesWithAdjacentFiles): self.mayHaveAdjacentFiles = True largeImagePath = None if self.mayHaveAdjacentFiles and hasattr(File(), 'getGirderMountFilePath'): try: largeImagePath = File().getGirderMountFilePath( largeImageFile) except FilePathException: pass if not largeImagePath: try: largeImagePath = File().getLocalFilePath(largeImageFile) except AttributeError as e: raise TileSourceException( 'No local file path for this file: %s' % e.args[0]) return largeImagePath except (TileSourceAssetstoreException, FilePathException): raise except (KeyError, ValidationException, TileSourceException) as e: raise TileSourceException('No large image file in this item: %s' % e.args[0])
def test_direct_docker_run(mocker, server, adminToken, file): from girder.models.file import File docker_run_mock = mocker.patch( 'slicer_cli_web.girder_worker_plugin.direct_docker_run._docker_run') docker_run_mock.return_value = [] gc_mock = MockedGirderClient() path = File().getLocalFilePath(file) run(image='test', container_args=[DirectGirderFileIdToVolume( file['_id'], filename=basename(path), direct_file_path=None, gc=gc_mock)]) docker_run_mock.assert_called_once() kwargs = docker_run_mock.call_args[1] # image assert kwargs['image'] == 'test' # container args assert len(kwargs['container_args']) == 1 assert kwargs['container_args'][0].endswith(basename(path)) # volumes assert len(kwargs['volumes']) == 1 target_path = '%s/%s' % (TEMP_VOLUME_DIRECT_MOUNT_PREFIX, basename(path)) docker_run_mock.reset_mock() run(image='test', container_args=[DirectGirderFileIdToVolume( file['_id'], direct_file_path=path, gc=gc_mock)]) docker_run_mock.assert_called_once() kwargs = docker_run_mock.call_args[1] # image assert kwargs['image'] == 'test' # container args assert kwargs['container_args'] == [target_path] # volumes assert len(kwargs['volumes']) == 2
def boundaryThumbnail(self, segmentation, image=None, width=256): if not image: image = Image().load(segmentation['imageId'], force=True, exc=True) # An alternative approach to the below method is to use OpenCV to create a morphological # gradient image of the mask (using a square or cross structuring element, a circle is # slower), which produces a mask of the outline. However, while this part is quite fast, # using NumPy to set the pixels of the rendered image is not quite as fast as Pillow's line # drawing and encoding the image as JPEG is also faster with Pillow than NumPy. mask = self.maskData(segmentation) if mask is None: return None contour = OpenCVSegmentationHelper.maskToContour( mask, paddedInput=False) originalFilePath = File().getLocalFilePath(Image().originalFile(image)) with PIL_Image.open(originalFilePath) as pilImageData: pilDraw = PIL_ImageDraw.Draw(pilImageData) pilDraw.line( list(map(tuple, contour)), fill=(0, 255, 0), # TODO: make color an option width=int(pilImageData.size[0] / 300.0) ) # Saving using native PIL is much faster than converting to a NumPy array to save with # ScikitSegmentationHelper if width is not None: height = width * pilImageData.size[1] // pilImageData.size[0] pilImageData = pilImageData.resize( size=(width, height), # A PIL_Image.LANCZOS downsampling filter takes ~350ms to resize a 7k image to # 700, whereas default downsampling filter (nearest neighbor) is <1ms with # minimal noticeable difference in quality. ) imageStream = io.BytesIO() pilImageData.save(imageStream, format='jpeg') return imageStream
def make_girder_file(): files = [] def _make_girder_file(assetstore, user, name, contents=b''): folder = Folder().find({ 'parentId': user['_id'], 'name': 'Public' })[0] file = Upload().uploadFromFile( six.BytesIO(contents), size=len(contents), name=name, parentType='folder', parent=folder, user=user, assetstore=assetstore) if not contents: file = Upload().finalizeUpload(file, assetstore) files.append(file) return file yield _make_girder_file for file in files: File().remove(file)
def testAdmin(self): admin = User().findOne({'login': '******'}) self.assertDictContains( { 'firstName': 'First', 'lastName': 'Last', 'email': '*****@*****.**', 'admin': True }, admin, 'Admin user') folder = Folder().findOne({'parentId': admin['_id']}) self.assertDictContains({'name': 'folder'}, folder, 'imported folder') item = Item().findOne({'folderId': folder['_id']}) self.assertDictContains({'name': 'file.txt'}, item, 'imported item') file = File().findOne({'itemId': item['_id']}) self.assertDictContains( { 'name': 'file.txt', 'mimeType': 'text/plain', 'size': 5 }, file, 'imported file')
def createTiles(self, item, params): if 'concurrent' in params: params['_concurrency'] = params.pop('concurrent') largeImageFileId = params.get('fileId') if largeImageFileId is None: files = list(Item().childFiles(item=item, limit=2)) if len(files) == 1: largeImageFileId = str(files[0]['_id']) if not largeImageFileId: raise RestException('Missing "fileId" parameter.') largeImageFile = File().load(largeImageFileId, force=True, exc=True) user = self.getCurrentUser() token = self.getCurrentToken() notify = self.boolParam('notify', params, default=True) params.pop('notify', None) try: return self.imageItemModel.createImageItem( item, largeImageFile, user, token, createJob='always' if self.boolParam('force', params, default=False) else True, notify=notify, **params) except TileGeneralException as e: raise RestException(e.args[0])
def addZipBatch(self, dataset, params): params = self._decodeParams(params) self.requireParams(['zipFileId', 'signature'], params) user = self.getCurrentUser() User().requireCreateDataset(user) zipFileId = params['zipFileId'] if not zipFileId: raise ValidationException('No file was uploaded.', 'zipFileId') zipFile = File().load(zipFileId, user=user, level=AccessType.WRITE, exc=False) if not zipFile: raise ValidationException('Invalid upload file ID.', 'zipFileId') if not self._checkFileFormat(zipFile, ZIP_FORMATS): raise ValidationException('File must be in .zip format.', 'zipFileId') signature = params['signature'].strip() if not signature: raise ValidationException('Signature must be specified.', 'owner') # TODO: make this return something Dataset().addZipBatch( dataset=dataset, zipFile=zipFile, signature=signature, user=user, sendMail=True)
def mask(self, segmentation, params): contentDisp = params.get('contentDisposition', None) if contentDisp is not None and contentDisp not in { 'inline', 'attachment' }: raise ValidationException( 'Unallowed contentDisposition type "%s".' % contentDisp, 'contentDisposition') # TODO: convert this to make Segmentation use an AccessControlMixin Image().load(segmentation['imageId'], level=AccessType.READ, user=self.getCurrentUser(), exc=True) maskFile = Segmentation().maskFile(segmentation) if maskFile is None: raise RestException( 'This segmentation is failed, and thus has no mask.', code=410) return File().download(maskFile, headers=True, contentDisposition=contentDisp)
def _handleUpload(event): upload, file = event.info['upload'], event.info['file'] try: reference = json.loads(upload.get('reference')) except (TypeError, ValueError): return if isinstance(reference, dict) and 'interactive_thumbnail' in reference: item = Item().load(file['itemId'], force=True, exc=True) file['interactive_thumbnails_uid'] = file['name'] file['attachedToId'] = item['_id'] file['attachedToType'] = 'item' file['itemId'] = None File().save(file) if not item.get('hasInteractiveThumbnail'): Item().update({'_id': item['_id']}, {'$set': { 'hasInteractiveThumbnail': True }}, multi=False)
def _postUpload(event): """ Called when a file is uploaded. We check the parent item to see if it is expecting a large image upload, and if so we register this file as the result image. """ fileObj = event.info['file'] # There may not be an itemId (on thumbnails, for instance) if not fileObj.get('itemId'): return item = Item().load(fileObj['itemId'], force=True, exc=True) if item.get('largeImage', {}).get('expected') and ( fileObj['name'].endswith('.tiff') or fileObj.get('mimeType') == 'image/tiff'): if fileObj.get('mimeType') != 'image/tiff': fileObj['mimeType'] = 'image/tiff' File().save(fileObj) del item['largeImage']['expected'] item['largeImage']['fileId'] = fileObj['_id'] item['largeImage']['sourceName'] = 'tiff' Item().save(item)
def attachThumbnail(file, thumbnail, attachToType, attachToId, width, height): """ Add the required information to the thumbnail file and the resource it is being attached to, and save the documents. :param file: The file from which the thumbnail was derived. :type file: dict :param thumbnail: The newly generated thumbnail file document. :type thumbnail: dict :param attachToType: The type to which the thumbnail is being attached. :type attachToType: str :param attachToId: The ID of the document to attach the thumbnail to. :type attachToId: str or ObjectId :param width: Thumbnail width. :type width: int :param height: Thumbnail height. :type height: int :returns: The updated thumbnail file document. """ parentModel = ModelImporter.model(attachToType) parent = parentModel.load(attachToId, force=True) parent['_thumbnails'] = parent.get('_thumbnails', []) parent['_thumbnails'].append(thumbnail['_id']) parentModel.save(parent) thumbnail['attachedToType'] = attachToType thumbnail['attachedToId'] = parent['_id'] thumbnail['isThumbnail'] = True thumbnail['derivedFrom'] = { 'type': 'file', 'id': file['_id'], 'process': 'thumbnail', 'width': width, 'height': height } return File().save(thumbnail)
def create_calc(self, params): body = getBodyJson() if 'cjson' not in body and ('fileId' not in body or 'format' not in body): raise RestException('Either cjson or fileId is required.') user = getCurrentUser() cjson = body.get('cjson') props = body.get('properties', {}) molecule_id = body.get('moleculeId', None) public = body.get('public', False) notebooks = body.get('notebooks', []) image = body.get('image') input_parameters = body.get('input', {}).get('parameters') file_id = None file_format = body.get('format', 'cjson') if 'fileId' in body: file = File().load(body['fileId'], user=getCurrentUser()) file_id = file['_id'] cjson = self._file_to_cjson(file, file_format) if molecule_id is None: mol = create_molecule(json.dumps(cjson), 'cjson', user, public) molecule_id = mol['_id'] calc = CalculationModel().create_cjson(user, cjson, props, molecule_id, image=image, input_parameters=input_parameters, file_id=file_id, notebooks=notebooks, public=public) cherrypy.response.status = 201 cherrypy.response.headers['Location'] \ = '/calculations/%s' % (str(calc['_id'])) return CalculationModel().filter(calc, user)
def testImportToItemAndFile(self): from girder.plugins.database_assetstore.base import DB_ASSETSTORE_ID userstore = Assetstore().load(DB_ASSETSTORE_ID) resp = self.request(path='/assetstore', method='POST', user=self.admin, params=self.dbParams) self.assertStatusOk(resp) assetstore1 = resp.json for store in (assetstore1, userstore): adapter = assetstore_utilities.getAssetstoreAdapter(store) adapter.importData(self.item1, 'item', { 'tables': ['towns'], 'uri': self.dbParams['dburi'], }, progress.noProgress, self.admin) self.assertEqual(Item().childFiles(item=self.item1).count(), 1) file = Item().childFiles(item=self.item1)[0] adapter.importData(file, 'file', { 'tables': ['towns'], 'limit': 5, 'uri': self.dbParams['dburi'], }, progress.noProgress, self.admin) self.assertEqual(Item().childFiles(item=self.item1).count(), 1) for file in Item().childFiles(item=self.item1): File().remove(file)
def setUp(self): import datetime from girder.api import rest from girder.plugins import worker from girder.plugins.worker import utils as worker_utils from girder.models.file import File from girder.models.folder import Folder from girder.models.item import Item from girder.models.user import User base.TestCase.setUp(self) self.admin = User().createUser('admin', 'passwd', 'admin', 'admin', '*****@*****.**') self.folder = Folder().createFolder(self.admin, 'folder', parentType='user') self.item = Item().createItem('item', self.admin, self.folder) self.file = File().createFile(self.admin, self.item, 'file', 7, self.assetstore) # Mock several functions so we can fake creating jobs def getCurrentToken(): return { '_id': str(self.admin['_id']), 'expires': datetime.datetime.utcnow() + datetime.timedelta(hours=1) } def getWorkerApiUrl(): return '/api/v1' self._origRestGetCurrentToken = rest.getCurrentToken self._origRestGetApiUrl = rest.getApiUrl self._origWorkerGetWorkerApiUrl = worker.getWorkerApiUrl rest.getCurrentToken = getCurrentToken rest.getApiUrl = lambda x: '/api/v1' rest.setCurrentUser(self.admin) worker.getWorkerApiUrl = worker_utils.getWorkerApiUrl = getWorkerApiUrl
def update(self, projection, projection_updates, user=None, public=None ): query = { '_id': projection['_id'] } projection_updates = self.validate(projection_updates) updates = {} mutable_props = ['voltage', 'convergenceSemiAngle', 'probeSize', 'detectorInnerAngle', 'detectorOuterAngle', 'depthOfFocus', 'pixelSize', 'tiltRange', 'electronDose', 'nProjections'] for prop in projection_updates: if prop in mutable_props: updates.setdefault('$set', {})[prop] = projection_updates[prop] if 'emdFileId' in projection_updates: updates.setdefault('$set', {})['emdFileId'] = ObjectId(projection_updates['emdFileId']) if public is not None: updates.setdefault('$set', {})['public'] = public if updates: file_id = projection['emdFileId'] super(Projection, self).update(query, update=updates, multi=False) if 'emdFileId' in projection_updates and \ projection_updates['emdFileId'] != file_id: f = File().load(file_id, force=True) if f is not None: item = Item().load(f['itemId'], force=True) Item().remove(item) return self.load(projection['_id'], user=user, level=AccessType.READ) return projection
def validate(self, doc): try: assert set(six.viewkeys(doc)) >= { 'imageId', 'creatorId', 'created', 'maskId', 'reviews', 'meta'} assert set(six.viewkeys(doc)) <= { '_id', 'imageId', 'creatorId', 'created', 'maskId', 'reviews', 'meta'} assert isinstance(doc['imageId'], ObjectId) assert Image().find({'_id': doc['imageId']}).count() assert isinstance(doc['creatorId'], ObjectId) assert User().find({'_id': doc['creatorId']}).count() assert isinstance(doc['created'], datetime.datetime) if doc['maskId']: assert isinstance(doc['maskId'], ObjectId) assert File().find({'_id': doc['maskId']}).count() assert isinstance(doc['reviews'], list) for review in doc['reviews']: assert set(six.viewkeys(review)) == { 'userId', 'skill', 'time', 'approved'} assert isinstance(review['userId'], ObjectId) assert User().find({'_id': review['userId']}).count() assert review['skill'] in {self.Skill.NOVICE, self.Skill.EXPERT} assert isinstance(review['time'], datetime.datetime) assert isinstance(review['approved'], bool) assert isinstance(doc['meta'], dict) except (AssertionError, KeyError): # TODO: message raise ValidationException('') return doc
def testDownloadLogging(server, admin, fsAssetstore, freshLog): folder = Folder().find({'parentId': admin['_id'], 'name': 'Public'})[0] file = Upload().uploadFromFile(io.BytesIO(b'hello'), size=5, name='test', parentType='folder', parent=folder, user=admin, assetstore=fsAssetstore) Record().collection.remove({}) # Clear existing records File().download(file, headers=False, offset=2, endByte=4) records = Record().find() assert records.count() == 1 record = records[0] assert record['ip'] == '127.0.0.1' assert record['type'] == 'file.download' assert record['details']['fileId'] == file['_id'] assert record['details']['startByte'] == 2 assert record['details']['endByte'] == 4 assert isinstance(record['when'], datetime.datetime)
class File(Resource): """ API Endpoint for files. Includes utilities for uploading and downloading them. """ def __init__(self): super(File, self).__init__() self._model = FileModel() self.resourceName = 'file' self.route('DELETE', (':id',), self.deleteFile) self.route('DELETE', ('upload', ':id'), self.cancelUpload) self.route('GET', ('offset',), self.requestOffset) self.route('GET', (':id',), self.getFile) self.route('GET', (':id', 'download'), self.download) self.route('GET', (':id', 'download', ':name'), self.downloadWithName) self.route('POST', (), self.initUpload) self.route('POST', ('chunk',), self.readChunk) self.route('POST', ('completion',), self.finalizeUpload) self.route('POST', (':id', 'copy'), self.copy) self.route('PUT', (':id',), self.updateFile) self.route('PUT', (':id', 'contents'), self.updateFileContents) self.route('PUT', (':id', 'move'), self.moveFileToAssetstore) @access.public(scope=TokenScope.DATA_READ) @filtermodel(model=FileModel) @autoDescribeRoute( Description('Get a file\'s information.') .modelParam('id', model=FileModel, level=AccessType.READ) .errorResponse() .errorResponse('Read access was denied on the file.', 403) ) def getFile(self, file): return file @access.user(scope=TokenScope.DATA_WRITE) @autoDescribeRoute( Description('Start a new upload or create an empty or link file.') .notes('Use POST /file/chunk to send the contents of the file. ' 'The data for the first chunk of the file can be included with ' 'this query by sending it as the body of the request using an ' 'appropriate content-type and with the other parameters as ' 'part of the query string. If the entire file is uploaded via ' 'this call, the resulting file is returned.') .responseClass('Upload') .param('parentType', 'Type being uploaded into.', enum=['folder', 'item']) .param('parentId', 'The ID of the parent.') .param('name', 'Name of the file being created.') .param('size', 'Size in bytes of the file.', dataType='integer', required=False) .param('mimeType', 'The MIME type of the file.', required=False) .param('linkUrl', 'If this is a link file, pass its URL instead ' 'of size and mimeType using this parameter.', required=False) .param('reference', 'If included, this information is passed to the ' 'data.process event when the upload is complete.', required=False) .param('assetstoreId', 'Direct the upload to a specific assetstore (admin-only).', required=False) .errorResponse() .errorResponse('Write access was denied on the parent folder.', 403) .errorResponse('Failed to create upload.', 500) ) def initUpload(self, parentType, parentId, name, size, mimeType, linkUrl, reference, assetstoreId): """ Before any bytes of the actual file are sent, a request should be made to initialize the upload. This creates the temporary record of the forthcoming upload that will be passed in chunks to the readChunk method. If you pass a "linkUrl" parameter, it will make a link file in the designated parent. """ user = self.getCurrentUser() parent = self.model(parentType).load( id=parentId, user=user, level=AccessType.WRITE, exc=True) if linkUrl is not None: return self._model.filter( self._model.createLinkFile( url=linkUrl, parent=parent, name=name, parentType=parentType, creator=user, size=size, mimeType=mimeType), user) else: self.requireParams({'size': size}) assetstore = None if assetstoreId: self.requireAdmin( user, message='You must be an admin to select a destination assetstore.') assetstore = Assetstore().load(assetstoreId) chunk = None if size > 0 and cherrypy.request.headers.get('Content-Length'): ct = cherrypy.request.body.content_type.value if (ct not in cherrypy.request.body.processors and ct.split('/', 1)[0] not in cherrypy.request.body.processors): chunk = RequestBodyStream(cherrypy.request.body) if chunk is not None and chunk.getSize() <= 0: chunk = None try: # TODO: This can be made more efficient by adding # save=chunk is None # to the createUpload call parameters. However, since this is # a breaking change, that should be deferred until a major # version upgrade. upload = Upload().createUpload( user=user, name=name, parentType=parentType, parent=parent, size=size, mimeType=mimeType, reference=reference, assetstore=assetstore) except OSError as exc: if exc.errno == errno.EACCES: raise GirderException( 'Failed to create upload.', 'girder.api.v1.file.create-upload-failed') raise if upload['size'] > 0: if chunk: return Upload().handleChunk(upload, chunk, filter=True, user=user) return upload else: return self._model.filter(Upload().finalizeUpload(upload), user) @access.user(scope=TokenScope.DATA_WRITE) @autoDescribeRoute( Description('Finalize an upload explicitly if necessary.') .notes('This is only required in certain non-standard upload ' 'behaviors. Clients should know which behavior models require ' 'the finalize step to be called in their behavior handlers.') .modelParam('uploadId', paramType='formData', model=Upload) .errorResponse(('ID was invalid.', 'The upload does not require finalization.', 'Not enough bytes have been uploaded.')) .errorResponse('You are not the user who initiated the upload.', 403) ) def finalizeUpload(self, upload): user = self.getCurrentUser() if upload['userId'] != user['_id']: raise AccessException('You did not initiate this upload.') # If we don't have as much data as we were told would be uploaded and # the upload hasn't specified it has an alternate behavior, refuse to # complete the upload. if upload['received'] != upload['size'] and 'behavior' not in upload: raise RestException( 'Server has only received %s bytes, but the file should be %s bytes.' % (upload['received'], upload['size'])) file = Upload().finalizeUpload(upload) extraKeys = file.get('additionalFinalizeKeys', ()) return self._model.filter(file, user, additionalKeys=extraKeys) @access.user(scope=TokenScope.DATA_WRITE) @autoDescribeRoute( Description('Request required offset before resuming an upload.') .modelParam('uploadId', paramType='formData', model=Upload) .errorResponse("The ID was invalid, or the offset did not match the server's record.") ) def requestOffset(self, upload): """ This should be called when resuming an interrupted upload. It will report the offset into the upload that should be used to resume. :param uploadId: The _id of the temp upload record being resumed. :returns: The offset in bytes that the client should use. """ offset = Upload().requestOffset(upload) if isinstance(offset, six.integer_types): upload['received'] = offset Upload().save(upload) return {'offset': offset} else: return offset @access.user(scope=TokenScope.DATA_WRITE) @autoDescribeRoute( Description('Upload a chunk of a file.') .notes('The data for the chunk should be sent as the body of the ' 'request using an appropriate content-type and with the other ' 'parameters as part of the query string. Alternately, the ' 'data can be sent as a file in the "chunk" field in multipart ' 'form data. Multipart uploads are much less efficient and ' 'their use is deprecated.') .modelParam('uploadId', paramType='formData', model=Upload) .param('offset', 'Offset of the chunk in the file.', dataType='integer', paramType='formData') .errorResponse(('ID was invalid.', 'Received too many bytes.', 'Chunk is smaller than the minimum size.')) .errorResponse('You are not the user who initiated the upload.', 403) .errorResponse('Failed to store upload.', 500) ) def readChunk(self, upload, offset, params): """ After the temporary upload record has been created (see initUpload), the bytes themselves should be passed up in ordered chunks. The user must remain logged in when passing each chunk, to authenticate that the writer of the chunk is the same as the person who initiated the upload. The passed offset is a verification mechanism for ensuring the server and client agree on the number of bytes sent/received. This method accepts both the legacy multipart content encoding, as well as passing offset and uploadId as query parameters and passing the chunk as the body, which is the recommended method. .. deprecated :: 2.2.0 """ if 'chunk' in params: chunk = params['chunk'] if isinstance(chunk, cherrypy._cpreqbody.Part): # Seek is the only obvious way to get the length of the part chunk.file.seek(0, os.SEEK_END) size = chunk.file.tell() chunk.file.seek(0, os.SEEK_SET) chunk = RequestBodyStream(chunk.file, size=size) else: chunk = RequestBodyStream(cherrypy.request.body) user = self.getCurrentUser() if upload['userId'] != user['_id']: raise AccessException('You did not initiate this upload.') if upload['received'] != offset: raise RestException( 'Server has received %s bytes, but client sent offset %s.' % ( upload['received'], offset)) try: return Upload().handleChunk(upload, chunk, filter=True, user=user) except IOError as exc: if exc.errno == errno.EACCES: raise Exception('Failed to store upload.') raise @access.cookie @access.public(scope=TokenScope.DATA_READ) @autoDescribeRoute( Description('Download a file.') .notes('This endpoint also accepts the HTTP "Range" header for partial ' 'file downloads.') .modelParam('id', model=FileModel, level=AccessType.READ) .param('offset', 'Start downloading at this offset in bytes within ' 'the file.', dataType='integer', required=False, default=0) .param('endByte', 'If you only wish to download part of the file, ' 'pass this as the index of the last byte to download. Unlike ' 'the HTTP Range header, the endByte parameter is non-inclusive, ' 'so you should set it to the index of the byte one past the ' 'final byte you wish to receive.', dataType='integer', required=False) .param('contentDisposition', 'Specify the Content-Disposition response ' 'header disposition-type value.', required=False, enum=['inline', 'attachment'], default='attachment') .param('extraParameters', 'Arbitrary data to send along with the download request.', required=False) .errorResponse('ID was invalid.') .errorResponse('Read access was denied on the parent folder.', 403) ) def download(self, file, offset, endByte, contentDisposition, extraParameters): """ Defers to the underlying assetstore adapter to stream a file out. Requires read permission on the folder that contains the file's item. """ rangeHeader = cherrypy.lib.httputil.get_ranges( cherrypy.request.headers.get('Range'), file.get('size', 0)) # The HTTP Range header takes precedence over query params if rangeHeader and len(rangeHeader): # Currently we only support a single range. offset, endByte = rangeHeader[0] return self._model.download( file, offset, endByte=endByte, contentDisposition=contentDisposition, extraParameters=extraParameters) @access.cookie @access.public(scope=TokenScope.DATA_READ) @describeRoute( Description('Download a file.') .param('id', 'The ID of the file.', paramType='path') .param('name', 'The name of the file. This is ignored.', paramType='path') .param('offset', 'Start downloading at this offset in bytes within ' 'the file.', dataType='integer', required=False) .notes('The name parameter doesn\'t alter the download. Some ' 'download clients save files based on the last part of a path, ' 'and specifying the name satisfies those clients.') .errorResponse('ID was invalid.') .errorResponse('Read access was denied on the parent folder.', 403) ) def downloadWithName(self, id, name, params): return self.download(id=id, params=params) @access.user(scope=TokenScope.DATA_WRITE) @autoDescribeRoute( Description('Delete a file by ID.') .modelParam('id', model=FileModel, level=AccessType.WRITE) .errorResponse('ID was invalid.') .errorResponse('Write access was denied on the parent folder.', 403) ) def deleteFile(self, file): self._model.remove(file) @access.user(scope=TokenScope.DATA_WRITE) @autoDescribeRoute( Description('Cancel a partially completed upload.') .modelParam('id', model=Upload) .errorResponse('ID was invalid.') .errorResponse('You lack permission to cancel this upload.', 403) ) def cancelUpload(self, upload): user = self.getCurrentUser() if upload['userId'] != user['_id'] and not user['admin']: raise AccessException('You did not initiate this upload.') Upload().cancelUpload(upload) return {'message': 'Upload canceled.'} @access.user(scope=TokenScope.DATA_WRITE) @filtermodel(model=FileModel) @autoDescribeRoute( Description('Change file metadata such as name or MIME type.') .modelParam('id', model=FileModel, level=AccessType.WRITE) .param('name', 'The name to set on the file.', required=False, strip=True) .param('mimeType', 'The MIME type of the file.', required=False, strip=True) .errorResponse('ID was invalid.') .errorResponse('Write access was denied on the parent folder.', 403) ) def updateFile(self, file, name, mimeType): if name is not None: file['name'] = name if mimeType is not None: file['mimeType'] = mimeType return self._model.updateFile(file) @access.user(scope=TokenScope.DATA_WRITE) @autoDescribeRoute( Description('Change the contents of an existing file.') .modelParam('id', model=FileModel, level=AccessType.WRITE) .param('size', 'Size in bytes of the new file.', dataType='integer') .param('reference', 'If included, this information is passed to the ' 'data.process event when the upload is complete.', required=False) .param('assetstoreId', 'Direct the upload to a specific assetstore (admin-only).', required=False) .notes('After calling this, send the chunks just like you would with a ' 'normal file upload.') ) def updateFileContents(self, file, size, reference, assetstoreId): user = self.getCurrentUser() assetstore = None if assetstoreId: self.requireAdmin( user, message='You must be an admin to select a destination assetstore.') assetstore = Assetstore().load(assetstoreId) # Create a new upload record into the existing file upload = Upload().createUploadToFile( file=file, user=user, size=size, reference=reference, assetstore=assetstore) if upload['size'] > 0: return upload else: return self._model.filter(Upload().finalizeUpload(upload), user) @access.admin(scope=TokenScope.DATA_WRITE) @filtermodel(model=FileModel) @autoDescribeRoute( Description('Move a file to a different assetstore.') .modelParam('id', model=FileModel, level=AccessType.WRITE) .modelParam('assetstoreId', 'The destination assetstore.', paramType='formData', model=Assetstore) .param('progress', 'Controls whether progress notifications will be sent.', dataType='boolean', default=False, required=False) ) def moveFileToAssetstore(self, file, assetstore, progress): user = self.getCurrentUser() title = 'Moving file "%s" to assetstore "%s"' % (file['name'], assetstore['name']) with ProgressContext(progress, user=user, title=title, total=file['size']) as ctx: return Upload().moveFileToAssetstore( file=file, user=user, assetstore=assetstore, progress=ctx) @access.user(scope=TokenScope.DATA_WRITE) @filtermodel(model=FileModel) @autoDescribeRoute( Description('Copy a file.') .modelParam('id', model=FileModel, level=AccessType.READ) .modelParam('itemId', description='The ID of the item to copy the file to.', level=AccessType.WRITE, paramType='formData', model=Item) ) def copy(self, file, item): return self._model.copyFile(file, self.getCurrentUser(), item=item)
def importData(self, parent, parentType, params, progress, user, **kwargs): """ Import a list of tables, each to a file within a distinct item. Each table specification in the list is an object which must have a 'table' key. It may optionally have other connection information such as 'database' and 'schema'. If there is a 'name' key, the name is used for the item and file. If there is a 'database' key, a subfolder is created within the specified parent with that name. If a user or collection is specified for the top level and no database key is specified, the default database name (from the assetstore) is used. If the specific item and file already exists and is from the same assetstore, it is updated. If the specific item already exists and is not from the same assetstore (or not marked that it was imported), an error is given. :param parent: The parent object to import into. Must be a folder, user, collection, item, or file. :param parentType: The model type of the parent object. :param params: Additional parameters required for the import process: tables: a list of tables to add. If there is already an item with an exact table name, it is updated. sort: default sort parameter. Used in plain downloads. fields: default fields parameter. Used in plain downloads. filters: default filters parameter. Used in plain downloads. group: default group parameter. Used in plain downloads. format: default format parameter. Used in plain downloads. replace: if False, don't replace an existing file/item with the name, but always create new entries. A parentType of file will always replace the existing data of a file :type params: dict :param progress: Object on which to record progress if possible. :type progress: :py:class:`girder.utility.progress.ProgressContext` :param user: The Girder user performing the import. :type user: dict or None :return: a list of objects, each of which has an item and file entry with the items and files that were imported. """ uri = (self.assetstore['database'].get('uri') if self.assetstore['database'].get('uri') else params['uri']) defaultDatabase = dbs.databaseFromUri(uri) response = [] createdFolder = createdItem = createdFile = False for table in params['tables']: if isinstance(table, six.string_types): dbinfo = {'table': table} else: dbinfo = table.copy() if not self.assetstore['database'].get('uri'): dbinfo['uri'] = uri name = dbinfo.pop('name', dbinfo['table']) progress.update(message='Importing %s' % name) # Find or create a folder if needed if 'database' not in dbinfo and parentType == 'folder': folder = parent elif parentType not in ('file', 'item'): folderName = dbinfo.get('database', defaultDatabase) folder = Folder().findOne({ 'parentId': parent['_id'], 'name': folderName, 'parentCollection': parentType }) if folder is None: folder = Folder().createFolder( parent, folderName, parentType=parentType, creator=user) createdFolder = True if parentType == 'file': # for files, we'll create a provisional file below, then # delete the original assetstore entry and modify the # existing file entry with the updated values before saving. item = Item().load(parent['itemId'], force=True) elif parentType == 'item': item = parent else: # Create an item if needed item = Item().findOne({ 'folderId': folder['_id'], 'name': name }) if item is None or params.get('replace') is False: item = Item().createItem( name=name, creator=user, folder=folder) createdItem = True # Create a file if needed file = File().findOne({ 'name': name, 'itemId': item['_id'] }) if file is None or params.get('replace') is False or parentType == 'file': file = File().createFile( creator=user, item=item, name=name, size=0, assetstore=self.assetstore, mimeType=dbFormatList.get(preferredFormat(params.get( 'format'))), saveFile=False) createdFile = True if file.get(DB_INFO_KEY) and not file[DB_INFO_KEY].get('imported'): raise GirderException( 'A file for table %s is present but cannot be updated ' 'because it wasn\'t imported.' % name) try: file = self._importDataFile(file, parent, parentType, dbinfo, params) except GirderException as exc: self._importDataCleanup( file if createdFile else None, item if createdItem else None, folder if createdFolder else None) raise exc response.append({'item': item, 'file': file}) return response
def testAssetstoreDownload(self): from girder.plugins.database_assetstore import assetstore from girder.plugins.database_assetstore import query for userAssetstore in (False, True): townItem, townFile, assetstore1 = self._createTownItem({ 'format': 'list', 'fields': 'town,pop2010', 'limit': '10' }, userAssetstore) resp = self.request(path='/item/%s/download' % str(townItem['_id'])) self.assertStatusOk(resp) data = resp.json self.assertEqual(data['datacount'], 10) self.assertEqual(data['fields'], ['town', 'pop2010']) # Test extraParameters for format params = { 'extraParameters': urllib.parse.urlencode({ 'format': 'csv', 'limit': 5 }), 'contentDisposition': 'inline' } resp = self.request( path='/item/%s/download' % str(townItem['_id']), params=params, isJson=False) self.assertStatusOk(resp) data = self.getBody(resp) self.assertEqual(len(data.split('\r\n')), 7) self.assertEqual(data.split('\r\n', 1)[0], 'town,pop2010') # Test range requests resp = self.request( path='/item/%s/download' % str(townItem['_id']), params=params, isJson=False, additionalHeaders=[('Range', 'bytes=10-19')]) self.assertStatus(resp, 206) self.assertEqual(self.getBody(resp), data[10:20]) resp = self.request( path='/item/%s/download' % str(townItem['_id']), params=params, isJson=False, additionalHeaders=[('Range', 'bytes=50-')]) self.assertStatus(resp, 206) self.assertEqual(self.getBody(resp), data[50:]) resp = self.request( path='/item/%s/download' % str(townItem['_id']), params=params, isJson=False, additionalHeaders=[('Range', 'bytes=5000-')]) self.assertStatus(resp, 206) self.assertEqual(self.getBody(resp), '') # Test more complex extraParameters extra = { 'format': 'list', 'fields': json.dumps(['town', 'pop2000', 'pop2010']), 'sort': json.dumps([['pop2000', -1]]), 'filters': json.dumps([{ 'field': 'pop2000', 'operator': '<', 'value': 100000}]), 'limit': 5 } params = {'extraParameters': urllib.parse.urlencode(extra)} resp = self.request( path='/item/%s/download' % str(townItem['_id']), params=params) self.assertStatusOk(resp) data = resp.json self.assertEqual(data['datacount'], 5) self.assertEqual(data['fields'], ['town', 'pop2000', 'pop2010']) self.assertLess(int(data['data'][0][1]), 100000) self.assertLess(int(data['data'][1][1]), int(data['data'][0][1])) # Test with JSON extraParameters params = {'extraParameters': json.dumps(extra)} resp = self.request( path='/item/%s/download' % str(townItem['_id']), params=params) self.assertStatusOk(resp) data = resp.json self.assertEqual(data['datacount'], 5) self.assertEqual(data['fields'], ['town', 'pop2000', 'pop2010']) self.assertLess(int(data['data'][0][1]), 100000) self.assertLess(int(data['data'][1][1]), int(data['data'][0][1])) # Test a direct call townFile = list(Item().childFiles(item=townItem))[0] adapter = File().getAssetstoreAdapter(townFile) params = { 'format': 'list', 'fields': ['town', 'pop2000', 'pop2010'], 'sort': [['pop2000', -1]], 'filters': [{ 'field': 'pop2000', 'operator': '<', 'value': 100000}], 'limit': 5 } func = adapter.downloadFile(townFile, headers=False, extraParameters=params) data = b''.join([part for part in func()]) data = json.loads(data.decode('utf8')) self.assertEqual(data['datacount'], 5) self.assertEqual(data['fields'], ['town', 'pop2000', 'pop2010']) self.assertLess(int(data['data'][0][1]), 100000) self.assertLess(int(data['data'][1][1]), int(data['data'][0][1])) # Test a direct query with group params = { 'format': 'rawlist', 'sort': [ [{'func': 'count', 'param': {'field': 'town'}}, -1], [{'func': 'max', 'param': {'field': 'town'}}, 1] ], 'fields': [ {'func': 'max', 'param': {'field': 'town'}}, 'pop2010', {'func': 'count', 'param': {'field': 'town'}} ], 'group': 'pop2010,popch80_90', 'limit': 5, } data = query.queryDatabase( townFile['_id'], assetstore.getDbInfoForFile(townFile), params) data = list(data[0]()) self.assertEqual(len(data), 5) self.assertEqual(data[0][0], 'ABINGTON') self.assertEqual(data[4][0], 'AGAWAM') # Test with bad extraParameters with six.assertRaisesRegex(self, Exception, 'JSON-encoded dictionary, or a url'): adapter.downloadFile(townFile, headers=False, extraParameters=6) # Test with 0 and none limits params = { 'format': 'list', 'fields': 'town,pop2000', 'sort': 'pop2000', 'filters': json.dumps([{ 'field': 'pop2000', 'operator': '>', 'value': 25000}]), } params['limit'] = 0 func = adapter.downloadFile( townFile, headers=False, extraParameters=params) jsondata = b''.join([part for part in func()]) data = json.loads(jsondata.decode('utf8')) self.assertEqual(data['datacount'], 0) self.assertEqual(data['fields'], ['town', 'pop2000']) # It shouldn't matter if we ask for this via json, query, or object func = adapter.downloadFile( townFile, headers=False, extraParameters=urllib.parse.urlencode(params)) self.assertEqual(b''.join([part for part in func()]), jsondata) func = adapter.downloadFile( townFile, headers=False, extraParameters=json.dumps(params)) self.assertEqual(b''.join([part for part in func()]), jsondata) params['limit'] = 'none' func = adapter.downloadFile( townFile, headers=False, extraParameters=params) jsondata = b''.join([part for part in func()]) data = json.loads(jsondata.decode('utf8')) self.assertEqual(data['datacount'], 71) self.assertEqual(data['fields'], ['town', 'pop2000']) # It shouldn't matter if we ask for this via json, query, or object func = adapter.downloadFile( townFile, headers=False, extraParameters=urllib.parse.urlencode(params)) self.assertEqual(b''.join([part for part in func()]), jsondata) func = adapter.downloadFile( townFile, headers=False, extraParameters=json.dumps(params)) self.assertEqual(b''.join([part for part in func()]), jsondata) # None can also be used as unlimited params['limit'] = None func = adapter.downloadFile( townFile, headers=False, extraParameters=params) self.assertEqual(b''.join([part for part in func()]), jsondata) func = adapter.downloadFile( townFile, headers=False, extraParameters=urllib.parse.urlencode(params)) self.assertEqual(b''.join([part for part in func()]), jsondata) func = adapter.downloadFile( townFile, headers=False, extraParameters=json.dumps(params)) self.assertEqual(b''.join([part for part in func()]), jsondata) # filters can also be an object or tuple params['filters'] = json.loads(params['filters']) func = adapter.downloadFile( townFile, headers=False, extraParameters=params) self.assertEqual(b''.join([part for part in func()]), jsondata) params['filters'] = tuple(params['filters']) func = adapter.downloadFile( townFile, headers=False, extraParameters=params) self.assertEqual(b''.join([part for part in func()]), jsondata) # Test with group params['sort'] = [ [{'func': 'count', 'param': {'field': 'town'}}, -1], [{'func': 'max', 'param': {'field': 'town'}}, 1]] params['fields'] = [ {'func': 'max', 'param': {'field': 'town'}}, 'pop2010', {'func': 'count', 'param': {'field': 'town'}}] params['group'] = 'pop2010' params['limit'] = 5 del params['filters'] func = adapter.downloadFile( townFile, headers=False, extraParameters=params) jsondata = b''.join([part for part in func()]) data = json.loads(jsondata.decode('utf8')) self.assertEqual(data['datacount'], 5) self.assertEqual(data['data'][0][0], 'DEDHAM') self.assertEqual(data['data'][0][2], 2) self.assertEqual(data['data'][4][0], 'ACTON') self.assertEqual(data['data'][4][2], 1)
class LabelResource(Resource): def __init__(self): super().__init__() self.resourceName = 'label' self.coll_m = Collection() self.file_m = File() self.folder_m = Folder() self.item_m = Item() self.upload_m = Upload() self.asset_m = Assetstore() self.setupRoutes() def setupRoutes(self): self.route('GET', (), handler=self.getLabelList) self.route('GET', (':label_id',), self.getLabel) self.route('GET', ('meta',), self.getLabelMeta) self.route('GET', ('create',), self.createLabelFile) self.route('GET', ('by_name',), self.getLabelByName) self.route('POST', (), self.postLabel) def createNewFile(self, folder, file_name): item = self.item_m.createItem(file_name, creator=self.getCurrentUser(), folder=folder, description='label file', reuseExisting=False) file = self.file_m.createFile(size=0, item=item, name=file_name, creator=self.getCurrentUser(), assetstore=self.asset_m.getCurrent(), mimeType="application/json") return file def copy(self, srcFile, destFile): upload = self.upload_m.createUploadToFile(destFile, self.getCurrentUser(), srcFile['size']) self.upload_m.handleChunk(upload=upload, chunk=RequestBodyStream(self.file_m.open(srcFile), size=destFile['size']), user=self.getCurrentUser()) return upload @access.public @autoDescribeRoute( Description('Get label list')) @rest.rawResponse def getLabelList(self): printOk('getLabelsList() was called!') try: collection = list(self.coll_m.list(user=self.getCurrentUser(), offset=0, limit=1))[0] files = self.coll_m.fileList(collection, user=self.getCurrentUser(), data=False, includeMetadata=True, mimeFilter=['application/json']) files = list(files) cherrypy.response.headers["Content-Type"] = "application/json" return dumps(files) except: printFail(traceback.print_exc) @staticmethod def getOwnerId(folder): aclList = Folder().getFullAccessList(folder) for acl in aclList['users']: if acl['level'] == AccessType.ADMIN: return str(acl['id']) return None def getConfigFolder(self, label_folder_id): label_folder = Folder().load(label_folder_id, user=self.getCurrentUser(), level=AccessType.READ) ownerId = self.getOwnerId(label_folder) config_folder = self.folder_m.load(label_folder['meta'][ownerId], level=AccessType.READ, user=self.getCurrentUser()) return config_folder def findConfig(self, folder_id): folder = self.getConfigFolder(folder_id) printOk2("Config folder {}".format(folder)) files = self.folder_m.fileList(folder, self.getCurrentUser(), data=False) for file_path, file in files: printOk(file) if file['name'] == "config.json": return file def __findFile(self, folder, file_name): item = list(self.item_m.find({'folderId': folder['_id'], 'name': file_name}).limit(1)) if not item: return None item = item[0] file = list(self.file_m.find({'itemId': item['_id']}).limit(1)) if not file: return None return file[0] @access.public @autoDescribeRoute( Description('Create a new label file if it doesnt exist') .param('file_name', 'label file name').param('folder_id', 'the parent folder id')) @rest.rawResponse def createLabelFile(self, file_name, folder_id): try: folder = self.folder_m.load(folder_id, user=self.getCurrentUser(), level=AccessType.WRITE) file = self.__findFile(folder, file_name) if not file: file = self.createNewFile(folder, file_name) config_file = self.findConfig(folder_id) if not config_file: printFail("No config file found") return errorMessage("No config file found") else: res = self.copy(config_file, file) return dumps({ "label_id": res['fileId'] }) return dumps({ "label_id": file['_id'] }) except: printFail(traceback.print_exc) cherrypy.response.status = 500 @access.public @autoDescribeRoute( Description('Get labels by file_name') .param('file_name', 'label file name').param('folder_id', 'the parent folder id')) @rest.rawResponse def getLabelByName(self, file_name, folder_id): try: folder = self.folder_m.load(folder_id, user=self.getCurrentUser(), level=AccessType.READ) file = self.__findFile(folder, file_name) cherrypy.response.headers["Content-Type"] = "application/json" if file: return self.file_m.download(file) else: return dumps({}) except: printFail(traceback.print_exc) cherrypy.response.status = 500 @access.public @autoDescribeRoute( Description('Get label by id') .param('label_id', 'label file id')) @rest.rawResponse def getLabel(self, label_id): try: file = self.file_m.load(label_id, level=AccessType.READ, user=self.getCurrentUser()) printOk2(file) cherrypy.response.headers["Content-Type"] = "application/json" return self.file_m.download(file) except: # Unknown slug printFail(traceback.print_exc) cherrypy.response.status = 404 @access.public @autoDescribeRoute( Description('Get label by id') .param('label_id', 'label file id')) def getLabelMeta(self, label_id): try: file = self.file_m.load(label_id, level=AccessType.READ, user=self.getCurrentUser()) cherrypy.response.headers["Content-Type"] = "application/json" return dumps(file) except: # Unknown slug printFail(traceback.print_exc) cherrypy.response.status = 404 @access.public @autoDescribeRoute( Description('Post label by id') .param('label_id', 'label file id')) @rest.rawResponse def postLabel(self, label_id, params): try: file = self.file_m.load(label_id, level=AccessType.WRITE, user=self.getCurrentUser()) cherrypy.response.headers["Content-Type"] = "application/json" params['labels'] = json.loads(params['labels']) data = json.dumps(params, indent=2, sort_keys=True) upload = writeData(self.getCurrentUser(), file, data) printOk2(file) printOk(upload) return dumps(upload) except: # Unknown slug printFail(traceback.print_exc) cherrypy.response.status = 404 @access.public @autoDescribeRoute( Description('Post label by id') .param('label_id', 'label file id')) @rest.rawResponse def strokeToOutline(self, strokes): pass
def createThumbnail(width, height, crop, fileId, attachToType, attachToId): """ Creates the thumbnail. Validation and access control must be done prior to the invocation of this method. """ fileModel = File() file = fileModel.load(fileId, force=True) streamFn = functools.partial(fileModel.download, file, headers=False) event = events.trigger('thumbnails.create', info={ 'file': file, 'width': width, 'height': height, 'crop': crop, 'attachToType': attachToType, 'attachToId': attachToId, 'streamFn': streamFn }) if len(event.responses): resp = event.responses[-1] newFile = resp['file'] if event.defaultPrevented: if resp.get('attach', True): newFile = attachThumbnail(file, newFile, attachToType, attachToId, width, height) return newFile else: file = newFile streamFn = functools.partial( fileModel.download, file, headers=False) if 'assetstoreId' not in file: # TODO we could thumbnail link files if we really wanted. raise Exception('File %s has no assetstore.' % fileId) stream = streamFn() data = b''.join(stream()) image = _getImage(file['mimeType'], file['exts'], data) if not width: width = int(height * image.size[0] / image.size[1]) elif not height: height = int(width * image.size[1] / image.size[0]) elif crop: x1 = y1 = 0 x2, y2 = image.size wr = float(image.size[0]) / width hr = float(image.size[1]) / height if hr > wr: y1 = int(y2 / 2 - height * wr / 2) y2 = int(y2 / 2 + height * wr / 2) else: x1 = int(x2 / 2 - width * hr / 2) x2 = int(x2 / 2 + width * hr / 2) image = image.crop((x1, y1, x2, y2)) image.thumbnail((width, height), Image.ANTIALIAS) out = six.BytesIO() image.convert('RGB').save(out, 'JPEG', quality=85) size = out.tell() out.seek(0) thumbnail = Upload().uploadFromFile( out, size=size, name='_thumb.jpg', parentType=attachToType, parent={'_id': ObjectId(attachToId)}, user=None, mimeType='image/jpeg', attachParent=True) return attachThumbnail(file, thumbnail, attachToType, attachToId, width, height)
class HdfsAssetstoreResource(Resource): def __init__(self): super(HdfsAssetstoreResource, self).__init__() self.resourceName = 'hdfs_assetstore' self.route('PUT', (':id', 'import'), self.importData) self.folderModel = Folder() # Save to avoid many lookups self.itemModel = Item() self.fileModel = File() def _importFile(self, parent, name, user, assetstore, node): item = self.itemModel.findOne({ 'folderId': parent['_id'], 'name': name }) if item is None: item = self.itemModel.createItem( name=name, creator=user, folder=parent) file = self.fileModel.findOne({ 'name': name, 'itemId': item['_id'] }) if file is None: file = self.fileModel.createFile( creator=user, item=item, name=name, size=node['length'], assetstore=assetstore, mimeType=None, saveFile=False) file['hdfs'] = { 'imported': True, 'path': node['path'] } self.fileModel.save(file) def _importData(self, parentType, parent, assetstore, client, path, ctx, user): for node in client.ls([path]): ctx.update(message='Importing ' + node['path']) name = posixpath.basename(node['path']) if node['file_type'] == 'd': folder = self.folderModel.findOne({ 'parentId': parent['_id'], 'name': name, 'parentCollection': parentType }) if folder is None: folder = self.folderModel.createFolder( parent, name, parentType=parentType, creator=user) self._importData('folder', folder, assetstore, client, node['path'], ctx, user) elif node['file_type'] == 'f' and parentType == 'folder': self._importFile(parent, name, user, assetstore, node) @access.admin @loadmodel(model='assetstore') @describeRoute( Description('Import a data hierarchy from an HDFS instance.') .notes('Only site administrators may use this endpoint.') .param('id', 'The ID of the assetstore representing the HDFS instance.', paramType='path') .param('parentId', 'The ID of the parent object in the Girder data ' 'hierarchy under which to import the files.') .param('parentType', 'The type of the parent object to import into.', enum=('folder', 'user', 'collection'), required=False) .param('path', 'Root of the directory structure (relative to the root ' 'of the HDFS) to import.') .param('progress', 'Whether to record progress on this operation (' 'default=False)', required=False, dataType='boolean') .errorResponse() .errorResponse('You are not an administrator.', 403) ) def importData(self, assetstore, params): self.requireParams(('parentId', 'path'), params) user = self.getCurrentUser() parentType = params.get('parentType', 'folder') if parentType not in ('user', 'collection', 'folder'): raise RestException('Invalid parentType.') parent = self.model(parentType).load(params['parentId'], force=True, exc=True) progress = self.boolParam('progress', params, default=False) client = HdfsClient( host=assetstore['hdfs']['host'], port=assetstore['hdfs']['port'], use_trash=False) path = params['path'] with ProgressContext(progress, user=user, title='Importing data from HDFS') as ctx: try: self._importData(parentType, parent, assetstore, client, path, ctx, user) except FileNotFoundException: raise RestException('File not found: %s.' % path)