def __init__(self): self.sourceDir = 'imagesbackup' self.destDir = 'imagesdir' self.filesystem = Filesystem() if not self.filesystem.checkDirExists(self.destDir): self.filesystem.makeDir(self.destDir) assert self.filesystem.checkDirExists(self.destDir), \ self.destDir + ' does not exist'
def get_filesystem(self): """ Get a filesystem object. Does not use a request. :return: Filesystem object linked to this session. """ return Filesystem(self.rest_interface.get_copy())
def __init__(self, mountpoint, bootstrap, host_port): os.system("mkdir -p %s" % syscalls.LOCALSTORAGE_DIR) sock = listening_socket(host_port) response = one_time_message(bootstrap, method='join', port=host_port) filesystem = Filesystem(bootstrap) thread = Thread(target=listen, args=(sock, )) thread.daemon = True thread.start() atexit.register(shutdown, bootstrap) fuse = FUSE(filesystem, mountpoint, foreground=True)
def __init__(self, logger, gen, use_dfs=False, dfs_conf=None): """ Initialize a WorkQueue instance @param logger a logger object @param gen a generator @param use_dfs boolean indicating whether to use a DFS or not @param dfs_conf a dictionary containing the necessary parameters for the DFS Master initialization. """ self.logger = logger self.generator = gen self.dead_queue = [] self.last_tag = 0 self.use_dfs = use_dfs if use_dfs and not DFS_AVAILABLE: raise Exception("You need to install fsdfs in order to use the" \ " distributed mode. Otherwise just toggle it " \ "off from the configuration file") elif use_dfs: self.datadir = dfs_conf['datadir'] self.fs = Filesystem(dfs_conf)
def main(): print 'Import Filesystem and _File' from filesystem import Filesystem from myfile import _File print 'Creating filesystem' fs = Filesystem() print 'Creating directory /usr' fs.mkdir('/usr') print 'Creating directory /usr/local/bin' fs.mkdir('/usr/local/bin') print 'Creating file' _file = _File() print 'Writing to file' _file.write(0, 'blah blah blah') print 'Saving file to /usr/local/bin/file' fs.write(_file, '/usr/local/bin/file') print 'Saving file to /file' fs.write(_file, '/file') print 'Deleting /usr/local/bin/file' fs.delete('/usr/local/bin/file') print 'Deleting /file' fs.delete('/file') print 'Deleting /usr recursively' fs.delete_recursive('/usr') print 'All done'
def __init__(self, fconf, master_id, worker_id): super(Worker, self).__init__("Worker") finished = False self.comm = MPI.COMM_WORLD.Get_parent() # Here we also need to handle the configuration file somehow self.conf = conf = json.load(open(fconf)) self.use_dfs = use_dfs = conf['dfs-enabled'] self.datadir = self.conf['datadir'] self.input_prefix = self.conf['input-prefix'] self.output_prefix = self.conf['output-prefix'] if use_dfs and not DFS_AVAILABLE: raise Exception("You need to install fsdfs in order to use the" \ " distributed mode. Otherwise just toggle it " \ "off from the configuration file") elif use_dfs: dconf = conf['dfs-conf'] host = '%s:%d' % (conf['dfs-host'], conf['dfs-startport'] + int(worker_id)) self.datadir = os.path.join( self.datadir, 'master-{:06d}'.format(int(master_id)), 'worker-{:06d}'.format(int(worker_id)) ) self.info("Creating directory structure in %s" % self.datadir) if os.path.exists(self.datadir): shutil.rmtree(self.datadir) os.makedirs(self.datadir) os.makedirs(os.path.join(self.datadir, self.input_prefix)) os.makedirs(os.path.join(self.datadir, self.output_prefix)) dconf['host'] = host dconf['datadir'] = self.datadir self.info('Starting DFS client on %s' % host) self.fs = Filesystem(dconf) self.fs.start() # Here we need somehow to override the default scheme in case of DFS conf['datadir'] = self.datadir self.master_id = int(master_id) self.worker_id = int(worker_id) self.mapper = self.extract_cls(conf['map-module'], 'Mapper')(conf) self.reducer = self.extract_cls(conf['reduce-module'], 'Reducer')(conf) # We provide a VFS abstraction self.mapper.setup(self) self.reducer.setup(self) while not finished: self.comm.send(Message(MSG_AVAILABLE, 0, None), dest=0) msg = self.comm.recv() if msg.command == MSG_COMPUTE_MAP: info, result = self.mapper.execute(msg.result) msg = Message(MSG_FINISHED_MAP, msg.tag, result) msg.info = info self.info("Map performance: %.2f" % \ (info[0] / (1024 ** 2 * info[1]))) self.comm.send(msg, dest=0) elif msg.command == MSG_COMPUTE_REDUCE: info, result = self.reducer.execute(msg.result) msg = Message(MSG_FINISHED_REDUCE, msg.tag, result) msg.info = info self.info("Reduce performance: %.2f" % \ (info[0] / (1024 ** 2 * info[1]))) self.comm.send(msg, dest=0) elif msg.command == MSG_SLEEP: time.sleep(msg.result) elif msg.command == MSG_QUIT: finished = True if self.use_dfs: self.info("Stopping DFS client") self.fs.stop() self.info("Stopped")
class Worker(Logger): def __init__(self, fconf, master_id, worker_id): super(Worker, self).__init__("Worker") finished = False self.comm = MPI.COMM_WORLD.Get_parent() # Here we also need to handle the configuration file somehow self.conf = conf = json.load(open(fconf)) self.use_dfs = use_dfs = conf['dfs-enabled'] self.datadir = self.conf['datadir'] self.input_prefix = self.conf['input-prefix'] self.output_prefix = self.conf['output-prefix'] if use_dfs and not DFS_AVAILABLE: raise Exception("You need to install fsdfs in order to use the" \ " distributed mode. Otherwise just toggle it " \ "off from the configuration file") elif use_dfs: dconf = conf['dfs-conf'] host = '%s:%d' % (conf['dfs-host'], conf['dfs-startport'] + int(worker_id)) self.datadir = os.path.join( self.datadir, 'master-{:06d}'.format(int(master_id)), 'worker-{:06d}'.format(int(worker_id)) ) self.info("Creating directory structure in %s" % self.datadir) if os.path.exists(self.datadir): shutil.rmtree(self.datadir) os.makedirs(self.datadir) os.makedirs(os.path.join(self.datadir, self.input_prefix)) os.makedirs(os.path.join(self.datadir, self.output_prefix)) dconf['host'] = host dconf['datadir'] = self.datadir self.info('Starting DFS client on %s' % host) self.fs = Filesystem(dconf) self.fs.start() # Here we need somehow to override the default scheme in case of DFS conf['datadir'] = self.datadir self.master_id = int(master_id) self.worker_id = int(worker_id) self.mapper = self.extract_cls(conf['map-module'], 'Mapper')(conf) self.reducer = self.extract_cls(conf['reduce-module'], 'Reducer')(conf) # We provide a VFS abstraction self.mapper.setup(self) self.reducer.setup(self) while not finished: self.comm.send(Message(MSG_AVAILABLE, 0, None), dest=0) msg = self.comm.recv() if msg.command == MSG_COMPUTE_MAP: info, result = self.mapper.execute(msg.result) msg = Message(MSG_FINISHED_MAP, msg.tag, result) msg.info = info self.info("Map performance: %.2f" % \ (info[0] / (1024 ** 2 * info[1]))) self.comm.send(msg, dest=0) elif msg.command == MSG_COMPUTE_REDUCE: info, result = self.reducer.execute(msg.result) msg = Message(MSG_FINISHED_REDUCE, msg.tag, result) msg.info = info self.info("Reduce performance: %.2f" % \ (info[0] / (1024 ** 2 * info[1]))) self.comm.send(msg, dest=0) elif msg.command == MSG_SLEEP: time.sleep(msg.result) elif msg.command == MSG_QUIT: finished = True if self.use_dfs: self.info("Stopping DFS client") self.fs.stop() self.info("Stopped") def extract_cls(self, mname, fname): module = load_module(mname) return getattr(module, fname) def pull_remote_files(self, reduce_idx, file_ids): """ Pull a set of files from the global DFS @param reduce_idx the reducer ID @param file_ids an iterable object containing integers (they will be casted to int()) """ if not self.use_dfs: return for fileid in file_ids: fname = "output-r{:06d}-p{:018d}".format(reduce_idx, int(fileid)) fname = os.path.join(self.output_prefix, fname) full_path = os.path.join(self.datadir, fname) self.info("Checking %s" % full_path) if os.path.exists(full_path): self.info("Skipping %s. It is already present" % fname) continue self.info("Worker worker_id=%d is downloading file '%s'" % \ (self.worker_id, fname)) downloaded = False while not downloaded: try: downloaded = self.fs.downloadFile(fname) except: self.info("Failed to download %s. Retrying in 2 sec" % fname) sleep(2) def pull_remote_file(self, inp): """ Pull a file from the global DFS @param inp a tuple in the form (file name, file id) """ filename, fileid = inp if self.use_dfs: self.info("Worker worker_id=%d is downloading file '%s'" % \ (self.worker_id, filename)) downloaded = False while not downloaded: try: downloaded = self.fs.downloadFile(filename) except: self.info("Failed to download %s. Retrying in 2 sec" % fname) sleep(2) return (os.path.join(self.datadir, filename), fileid) def push_local_file(self, fname, push=False): """ Push a local file into the global DFS @param fname the file to import @param push if True the file will be pushed on the master. """ if not self.use_dfs: return fname = os.path.join(self.output_prefix, fname) self.info("Pushing file '%s' into global DFS" % fname) self.fs.importFile(os.path.join(self.datadir, fname), fname) if push: ret = self.fs.pushFile(fname) self.info("Pushing returned %s" % str(ret))
def __init__(self): Filesystem.__init__(self)
class WorkQueue(object): """ The object is able to merge a generator and a queue and trasparently expose a simple interface for retrieving objects. The generator is prioritized with respect to the queue, which is used as a backup in some sense. """ def __init__(self, logger, gen, use_dfs=False, dfs_conf=None): """ Initialize a WorkQueue instance @param logger a logger object @param gen a generator @param use_dfs boolean indicating whether to use a DFS or not @param dfs_conf a dictionary containing the necessary parameters for the DFS Master initialization. """ self.logger = logger self.generator = gen self.dead_queue = [] self.last_tag = 0 self.use_dfs = use_dfs if use_dfs and not DFS_AVAILABLE: raise Exception("You need to install fsdfs in order to use the" \ " distributed mode. Otherwise just toggle it " \ "off from the configuration file") elif use_dfs: self.datadir = dfs_conf['datadir'] self.fs = Filesystem(dfs_conf) def push(self, item): """ Push an item in the dead_queue. Please beware that it is not possible to push None objects @param item the item you want to push """ if item is None: raise ValueError("Cannot push None in the queue") self.dead_queue.append(item) def next(self): """ Extract the next value from the WorkQueue @return an object or None if the retrieve is not possible """ self.last_tag += 1 try: # Here value is a tuple (path to file, file id) fname, fid = self.generator.next() if self.use_dfs: self.logger.info("Publishing file '%s' from '%s'" % \ (fname, self.datadir)) self.fs.importFile(os.path.join(self.datadir, fname), fname) return WorkerStatus(TYPE_MAP, self.last_tag, (fname, fid)) except StopIteration: if self.dead_queue: value = self.dead_queue.pop(0) return WorkerStatus(TYPE_MAP, self.last_tag, value) else: return None def pop(self): """ An alias for the next method @return an object """ return self.next()
class Reset: def __init__(self): self.sourceDir = 'imagesbackup' self.destDir = 'imagesdir' self.filesystem = Filesystem() if not self.filesystem.checkDirExists(self.destDir): self.filesystem.makeDir(self.destDir) assert self.filesystem.checkDirExists(self.destDir), \ self.destDir + ' does not exist' def fill(self): assert self.filesystem.checkDirExists(self.sourceDir), \ self.sourceDir + ' does not exist' self.sourceImages = self.filesystem.listJpegs(self.sourceDir) for image in self.sourceImages: self.filesystem.copy([self.sourceDir, image], [self.destDir, image]) def empty(self, path='', removeDir=True): if path == '': path = self.destDir path = self.filesystem.joinPath(path) if not self.filesystem.checkDirExists(path): return self.destImages = self.filesystem.listJpegs(path) for image in self.destImages: self.filesystem.removeFile([path, image]) if removeDir: self.filesystem.removeDir(path)