def setUp(self): self.client = client_context.client self.client.drop_database('perftest') bucket = GridFSBucket(self.client.perftest) gridfs_path = os.path.join( TEST_PATH, os.path.join('parallel', 'gridfs_multi')) self.files = [os.path.join( gridfs_path, s) for s in os.listdir(gridfs_path)] for fname in self.files: with open(fname, 'rb') as gfile: bucket.upload_from_stream(fname, gfile)
def put(self): client_uuid = self.request.matchdict['uuid'] images = self.request.db['images'].find({"client_key": client_uuid}) fs = GridFSBucket(self.request.db) file_key = None for image in images: file_key = image['file_key'] file_data = self.request.body if not file_data: return Response("FILE DATA MUST BE SPECIFIED!") fs.delete(file_key) file_id = fs.upload_from_stream(str(file_key), file_data) self.request.db['images'].insert( { 'client_key': client_uuid, 'file_key': file_id, 'description': self.request.POST.get('description') } ) return Response("UPDATED!")
def save(self): #Save to the database super(Tracks, self).save() #if genre doesn't exist add it try: genre_object = Genres.objects.get(genre_name=self.track_genre) except: genre_object = Genres.objects.create(genre_name=self.track_genre) genre_object.save() #Upload music to Gridfs using pymongo commands print("Uploading to GridFS") db = MongoClient().MusicDB fs = GridFSBucket(db) music_file = open(self.actual_track.path, 'rb') #open the file in binary mode fs.upload_from_stream(self.track_name, music_file.read()) #upload to gridfs
class TestGridFsUpload(PerformanceTest, unittest.TestCase): data_size = 52428800 def setUp(self): self.client = client_context.client self.client.drop_database('perftest') gridfs_path = os.path.join( TEST_PATH, os.path.join('single_and_multi_document', 'gridfs_large.bin')) with open(gridfs_path, 'rb') as data: self.document = data.read() self.bucket = GridFSBucket(self.client.perftest) def tearDown(self): super(TestGridFsUpload, self).tearDown() self.client.drop_database('perftest') def before(self): self.bucket.upload_from_stream('init', b'x') def do_task(self): self.bucket.upload_from_stream('gridfstest', self.document)
def temporary_fs(self, snapshot=None, **kwargs): with temporary_mongodb() as conn_str: if snapshot: client = MongoClient(conn_str) try: database = client.get_database('admin') files_coll = database['test.files'] gridfs_bucket = GridFSBucket(database, 'test') for filename, payload in six.iteritems(snapshot): content = payload[0] meta_dict = payload[1] if len(payload) > 1 else None gridfs_bucket.upload_from_stream( filename, BytesIO(content)) if meta_dict: files_coll.update_one( {'filename': filename}, {'$set': { 'metadata': meta_dict }}) finally: client.close() with MongoFS(conn_str, 'admin', 'test', **kwargs) as fs: yield fs
def collection_post(self): fs = GridFSBucket(self.request.db) client_key = uuid.uuid4().hex file_data = self.request.body if not file_data: return Response("FILE DATA MUST BE SPECIFIED!") file_id = fs.upload_from_stream(str(client_key), file_data) self.request.db['images'].insert( { 'client_key': client_key, 'file_key': file_id, 'description': self.request.POST.get('description'), 'date': datetime.datetime.now() } ) return {'file_key': client_key}
def insert_gridfs_file(filename): bucket = GridFSBucket(proc_client.perftest) with open(filename, 'rb') as gfile: bucket.upload_from_stream(filename, gfile)
class FsMongo(FsBase): def __init__(self, logger_name='fs', lock=False): super().__init__(logger_name, lock) self.path = None self.client = None self.fs = None def __update_local_fs(self, from_path=None): dir_cursor = self.fs.find({"metadata.type": "dir"}, no_cursor_timeout=True) for directory in dir_cursor: if from_path and not directory.filename.startswith(from_path): continue os.makedirs(self.path + directory.filename, exist_ok=True) file_cursor = self.fs.find({"metadata.type": { "$in": ["file", "sym"] }}, no_cursor_timeout=True) for writing_file in file_cursor: if from_path and not writing_file.filename.startswith(from_path): continue file_path = self.path + writing_file.filename if writing_file.metadata["type"] == "sym": with BytesIO() as b: self.fs.download_to_stream(writing_file._id, b) b.seek(0) link = b.read().decode("utf-8") try: os.remove(file_path) except OSError as e: if e.errno != errno.ENOENT: # This is probably permission denied or worse raise os.symlink(link, file_path) else: with open(file_path, 'wb+') as file_stream: self.fs.download_to_stream(writing_file._id, file_stream) if "permissions" in writing_file.metadata: os.chmod(file_path, writing_file.metadata["permissions"]) def get_params(self): return {"fs": "mongo", "path": self.path} def fs_connect(self, config): try: if "logger_name" in config: self.logger = logging.getLogger(config["logger_name"]) if "path" in config: self.path = config["path"] else: raise FsException("Missing parameter \"path\"") if not self.path.endswith("/"): self.path += "/" if not os.path.exists(self.path): raise FsException( "Invalid configuration param at '[storage]': path '{}' does not exist" .format(config["path"])) elif not os.access(self.path, os.W_OK): raise FsException( "Invalid configuration param at '[storage]': path '{}' is not writable" .format(config["path"])) if all(key in config.keys() for key in ["uri", "collection"]): self.client = MongoClient(config["uri"]) self.fs = GridFSBucket(self.client[config["collection"]]) elif all(key in config.keys() for key in ["host", "port", "collection"]): self.client = MongoClient(config["host"], config["port"]) self.fs = GridFSBucket(self.client[config["collection"]]) else: if "collection" not in config.keys(): raise FsException("Missing parameter \"collection\"") else: raise FsException( "Missing parameters: \"uri\" or \"host\" + \"port\"") except FsException: raise except Exception as e: # TODO refine raise FsException(str(e)) def fs_disconnect(self): pass # TODO def mkdir(self, folder): """ Creates a folder or parent object location :param folder: :return: None or raises an exception """ try: self.fs.upload_from_stream(folder, BytesIO(), metadata={"type": "dir"}) except errors.FileExists: # make it idempotent pass except Exception as e: raise FsException(str(e), http_code=HTTPStatus.INTERNAL_SERVER_ERROR) def dir_rename(self, src, dst): """ Rename one directory name. If dst exist, it replaces (deletes) existing directory :param src: source directory :param dst: destination directory :return: None or raises and exception """ try: dst_cursor = self.fs.find( {"filename": { "$regex": "^{}(/|$)".format(dst) }}, no_cursor_timeout=True) for dst_file in dst_cursor: self.fs.delete(dst_file._id) src_cursor = self.fs.find( {"filename": { "$regex": "^{}(/|$)".format(src) }}, no_cursor_timeout=True) for src_file in src_cursor: self.fs.rename(src_file._id, src_file.filename.replace(src, dst, 1)) except Exception as e: raise FsException(str(e), http_code=HTTPStatus.INTERNAL_SERVER_ERROR) def file_exists(self, storage, mode=None): """ Indicates if "storage" file exist :param storage: can be a str or a str list :param mode: can be 'file' exist as a regular file; 'dir' exists as a directory or; 'None' just exists :return: True, False """ f = storage if isinstance(storage, str) else "/".join(storage) cursor = self.fs.find({"filename": f}) for requested_file in cursor: exception_file = next(cursor, None) if exception_file: raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) if requested_file.metadata["type"] == mode: return True if requested_file.metadata["type"] == "sym" and mode == "file": return True return False def file_size(self, storage): """ return file size :param storage: can be a str or a str list :return: file size """ f = storage if isinstance(storage, str) else "/".join(storage) cursor = self.fs.find({"filename": f}) for requested_file in cursor: exception_file = next(cursor, None) if exception_file: raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) return requested_file.length def file_extract(self, tar_object, path): """ extract a tar file :param tar_object: object of type tar :param path: can be a str or a str list, or a tar object where to extract the tar_object :return: None """ f = path if isinstance(path, str) else "/".join(path) for member in tar_object.getmembers(): if member.isfile(): stream = tar_object.extractfile(member) elif member.issym(): stream = BytesIO(member.linkname.encode("utf-8")) else: stream = BytesIO() if member.isfile(): file_type = "file" elif member.issym(): file_type = "sym" else: file_type = "dir" metadata = {"type": file_type, "permissions": member.mode} self.fs.upload_from_stream(f + "/" + member.name, stream, metadata=metadata) stream.close() def file_open(self, storage, mode): """ Open a file :param storage: can be a str or list of str :param mode: file mode :return: file object """ try: f = storage if isinstance(storage, str) else "/".join(storage) if "b" in mode: return GridByteStream(f, self.fs, mode) else: return GridStringStream(f, self.fs, mode) except errors.NoFile: raise FsException("File {} does not exist".format(f), http_code=HTTPStatus.NOT_FOUND) except IOError: raise FsException("File {} cannot be opened".format(f), http_code=HTTPStatus.BAD_REQUEST) def dir_ls(self, storage): """ return folder content :param storage: can be a str or list of str :return: folder content """ try: f = storage if isinstance(storage, str) else "/".join(storage) files = [] dir_cursor = self.fs.find({"filename": f}) for requested_dir in dir_cursor: exception_dir = next(dir_cursor, None) if exception_dir: raise FsException( "Multiple directories found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) if requested_dir.metadata["type"] != "dir": raise FsException("File {} does not exist".format(f), http_code=HTTPStatus.NOT_FOUND) files_cursor = self.fs.find( {"filename": { "$regex": "^{}/([^/])*".format(f) }}) for children_file in files_cursor: files += [children_file.filename.replace(f + '/', '', 1)] return files except IOError: raise FsException("File {} cannot be opened".format(f), http_code=HTTPStatus.BAD_REQUEST) def file_delete(self, storage, ignore_non_exist=False): """ Delete storage content recursively :param storage: can be a str or list of str :param ignore_non_exist: not raise exception if storage does not exist :return: None """ try: f = storage if isinstance(storage, str) else "/".join(storage) file_cursor = self.fs.find({"filename": f}) found = False for requested_file in file_cursor: found = True exception_file = next(file_cursor, None) if exception_file: raise FsException( "Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) if requested_file.metadata["type"] == "dir": dir_cursor = self.fs.find( {"filename": { "$regex": "^{}".format(f) }}) for tmp in dir_cursor: self.fs.delete(tmp._id) else: self.fs.delete(requested_file._id) if not found and not ignore_non_exist: raise FsException("File {} does not exist".format(storage), http_code=HTTPStatus.NOT_FOUND) except IOError as e: raise FsException("File {} cannot be deleted: {}".format(f, e), http_code=HTTPStatus.INTERNAL_SERVER_ERROR) def sync(self, from_path=None): """ Sync from FSMongo to local storage :param from_path: if supplied, only copy content from this path, not all :return: None """ self.__update_local_fs(from_path=from_path)
class FsMongo(FsBase): def __init__(self, logger_name='fs', lock=False): super().__init__(logger_name, lock) self.path = None self.client = None self.fs = None def __update_local_fs(self, from_path=None): dir_cursor = self.fs.find({"metadata.type": "dir"}, no_cursor_timeout=True) for directory in dir_cursor: if from_path and not directory.filename.startswith(from_path): continue os.makedirs(self.path + directory.filename, exist_ok=True) file_cursor = self.fs.find({"metadata.type": { "$in": ["file", "sym"] }}, no_cursor_timeout=True) for writing_file in file_cursor: if from_path and not writing_file.filename.startswith(from_path): continue file_path = self.path + writing_file.filename if writing_file.metadata["type"] == "sym": with BytesIO() as b: self.fs.download_to_stream(writing_file._id, b) b.seek(0) link = b.read().decode("utf-8") try: os.remove(file_path) except OSError as e: if e.errno != errno.ENOENT: # This is probably permission denied or worse raise os.symlink(link, file_path) else: with open(file_path, 'wb+') as file_stream: self.fs.download_to_stream(writing_file._id, file_stream) if "permissions" in writing_file.metadata: os.chmod(file_path, writing_file.metadata["permissions"]) def get_params(self): return {"fs": "mongo", "path": self.path} def fs_connect(self, config): try: if "logger_name" in config: self.logger = logging.getLogger(config["logger_name"]) if "path" in config: self.path = config["path"] else: raise FsException("Missing parameter \"path\"") if not self.path.endswith("/"): self.path += "/" if not os.path.exists(self.path): raise FsException( "Invalid configuration param at '[storage]': path '{}' does not exist" .format(config["path"])) elif not os.access(self.path, os.W_OK): raise FsException( "Invalid configuration param at '[storage]': path '{}' is not writable" .format(config["path"])) if all(key in config.keys() for key in ["uri", "collection"]): self.client = MongoClient(config["uri"]) self.fs = GridFSBucket(self.client[config["collection"]]) elif all(key in config.keys() for key in ["host", "port", "collection"]): self.client = MongoClient(config["host"], config["port"]) self.fs = GridFSBucket(self.client[config["collection"]]) else: if "collection" not in config.keys(): raise FsException("Missing parameter \"collection\"") else: raise FsException( "Missing parameters: \"uri\" or \"host\" + \"port\"") except FsException: raise except Exception as e: # TODO refine raise FsException(str(e)) def fs_disconnect(self): pass # TODO def mkdir(self, folder): """ Creates a folder or parent object location :param folder: :return: None or raises an exception """ try: self.fs.upload_from_stream(folder, BytesIO(), metadata={"type": "dir"}) except errors.FileExists: # make it idempotent pass except Exception as e: raise FsException(str(e), http_code=HTTPStatus.INTERNAL_SERVER_ERROR) def dir_rename(self, src, dst): """ Rename one directory name. If dst exist, it replaces (deletes) existing directory :param src: source directory :param dst: destination directory :return: None or raises and exception """ try: dst_cursor = self.fs.find( {"filename": { "$regex": "^{}(/|$)".format(dst) }}, no_cursor_timeout=True) for dst_file in dst_cursor: self.fs.delete(dst_file._id) src_cursor = self.fs.find( {"filename": { "$regex": "^{}(/|$)".format(src) }}, no_cursor_timeout=True) for src_file in src_cursor: self.fs.rename(src_file._id, src_file.filename.replace(src, dst, 1)) except Exception as e: raise FsException(str(e), http_code=HTTPStatus.INTERNAL_SERVER_ERROR) def file_exists(self, storage, mode=None): """ Indicates if "storage" file exist :param storage: can be a str or a str list :param mode: can be 'file' exist as a regular file; 'dir' exists as a directory or; 'None' just exists :return: True, False """ f = storage if isinstance(storage, str) else "/".join(storage) cursor = self.fs.find({"filename": f}) for requested_file in cursor: exception_file = next(cursor, None) if exception_file: raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) # if no special mode is required just check it does exists if not mode: return True if requested_file.metadata["type"] == mode: return True if requested_file.metadata["type"] == "sym" and mode == "file": return True return False def file_size(self, storage): """ return file size :param storage: can be a str or a str list :return: file size """ f = storage if isinstance(storage, str) else "/".join(storage) cursor = self.fs.find({"filename": f}) for requested_file in cursor: exception_file = next(cursor, None) if exception_file: raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) return requested_file.length def file_extract(self, tar_object, path): """ extract a tar file :param tar_object: object of type tar :param path: can be a str or a str list, or a tar object where to extract the tar_object :return: None """ f = path if isinstance(path, str) else "/".join(path) for member in tar_object.getmembers(): if member.isfile(): stream = tar_object.extractfile(member) elif member.issym(): stream = BytesIO(member.linkname.encode("utf-8")) else: stream = BytesIO() if member.isfile(): file_type = "file" elif member.issym(): file_type = "sym" else: file_type = "dir" metadata = {"type": file_type, "permissions": member.mode} self.fs.upload_from_stream(f + "/" + member.name, stream, metadata=metadata) stream.close() def file_open(self, storage, mode): """ Open a file :param storage: can be a str or list of str :param mode: file mode :return: file object """ try: f = storage if isinstance(storage, str) else "/".join(storage) if "b" in mode: return GridByteStream(f, self.fs, mode) else: return GridStringStream(f, self.fs, mode) except errors.NoFile: raise FsException("File {} does not exist".format(f), http_code=HTTPStatus.NOT_FOUND) except IOError: raise FsException("File {} cannot be opened".format(f), http_code=HTTPStatus.BAD_REQUEST) def dir_ls(self, storage): """ return folder content :param storage: can be a str or list of str :return: folder content """ try: f = storage if isinstance(storage, str) else "/".join(storage) files = [] dir_cursor = self.fs.find({"filename": f}) for requested_dir in dir_cursor: exception_dir = next(dir_cursor, None) if exception_dir: raise FsException( "Multiple directories found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) if requested_dir.metadata["type"] != "dir": raise FsException("File {} does not exist".format(f), http_code=HTTPStatus.NOT_FOUND) files_cursor = self.fs.find( {"filename": { "$regex": "^{}/([^/])*".format(f) }}) for children_file in files_cursor: files += [children_file.filename.replace(f + '/', '', 1)] return files except IOError: raise FsException("File {} cannot be opened".format(f), http_code=HTTPStatus.BAD_REQUEST) def file_delete(self, storage, ignore_non_exist=False): """ Delete storage content recursively :param storage: can be a str or list of str :param ignore_non_exist: not raise exception if storage does not exist :return: None """ try: f = storage if isinstance(storage, str) else "/".join(storage) file_cursor = self.fs.find({"filename": f}) found = False for requested_file in file_cursor: found = True exception_file = next(file_cursor, None) if exception_file: raise FsException( "Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) if requested_file.metadata["type"] == "dir": dir_cursor = self.fs.find( {"filename": { "$regex": "^{}".format(f) }}) for tmp in dir_cursor: self.fs.delete(tmp._id) else: self.fs.delete(requested_file._id) if not found and not ignore_non_exist: raise FsException("File {} does not exist".format(storage), http_code=HTTPStatus.NOT_FOUND) except IOError as e: raise FsException("File {} cannot be deleted: {}".format(f, e), http_code=HTTPStatus.INTERNAL_SERVER_ERROR) def sync(self, from_path=None): """ Sync from FSMongo to local storage :param from_path: if supplied, only copy content from this path, not all :return: None """ if from_path: if os.path.isabs(from_path): from_path = os.path.relpath(from_path, self.path) self.__update_local_fs(from_path=from_path) def _update_mongo_fs(self, from_path): os_path = self.path + from_path # Obtain list of files and dirs in filesystem members = [] for root, dirs, files in os.walk(os_path): for folder in dirs: member = { "filename": os.path.join(root, folder), "type": "dir" } members.append(member) for file in files: filename = os.path.join(root, file) if os.path.islink(filename): file_type = "sym" else: file_type = "file" member = { "filename": os.path.join(root, file), "type": file_type } members.append(member) # Obtain files in mongo dict remote_files = self._get_mongo_files(from_path) # Upload members if they do not exists or have been modified # We will do this for performance (avoid updating unmodified files) and to avoid # updating a file with an older one in case there are two sources for synchronization # in high availability scenarios for member in members: # obtain permission mask = int(oct(os.stat(member["filename"]).st_mode)[-3:], 8) # convert to relative path rel_filename = os.path.relpath(member["filename"], self.path) last_modified_date = datetime.datetime.fromtimestamp( os.path.getmtime(member["filename"])) remote_file = remote_files.get(rel_filename) upload_date = remote_file[ 0].uploadDate if remote_file else datetime.datetime.min # remove processed files from dict remote_files.pop(rel_filename, None) if last_modified_date >= upload_date: stream = None fh = None try: file_type = member["type"] if file_type == "dir": stream = BytesIO() elif file_type == "sym": stream = BytesIO( os.readlink(member["filename"]).encode("utf-8")) else: fh = open(member["filename"], "rb") stream = BytesIO(fh.read()) metadata = {"type": file_type, "permissions": mask} self.fs.upload_from_stream(rel_filename, stream, metadata=metadata) # delete old files if remote_file: for file in remote_file: self.fs.delete(file._id) finally: if fh: fh.close() if stream: stream.close() # delete files that are not any more in local fs for remote_file in remote_files.values(): for file in remote_file: self.fs.delete(file._id) def _get_mongo_files(self, from_path=None): file_dict = {} file_cursor = self.fs.find(no_cursor_timeout=True, sort=[('uploadDate', -1)]) for file in file_cursor: if from_path and not file.filename.startswith(from_path): continue if file.filename in file_dict: file_dict[file.filename].append(file) else: file_dict[file.filename] = [file] return file_dict def reverse_sync(self, from_path: str): """ Sync from local storage to FSMongo :param from_path: base directory to upload content to mongo fs :return: None """ if os.path.isabs(from_path): from_path = os.path.relpath(from_path, self.path) self._update_mongo_fs(from_path=from_path)