def __get_entries_to_update(self, requested_entry_id): # Get more entries than just what was requested, while we're at it. try: parent_ids = drive_proxy('get_parents_containing_id', child_id=requested_entry_id) except: self.__log.exception( "Could not retrieve parents for child with ID " "[%s]." % (requested_entry_id)) raise self.__log.debug("Found (%d) parents." % (len(parent_ids))) affected_entries = [requested_entry_id] considered_entries = {} max_readahead_entries = Conf.get('max_readahead_entries') for parent_id in parent_ids: self.__log.debug("Retrieving children for parent with ID [%s]." % (parent_id)) try: child_ids = drive_proxy('get_children_under_parent_id', parent_id=parent_id) except: self.__log.exception( "Could not retrieve children for parent with" " ID [%s]." % (requested_entry_id)) raise self.__log.debug("(%d) children found under parent with ID [%s]." % (len(child_ids), parent_id)) for child_id in child_ids: if child_id == requested_entry_id: continue # We've already looked into this entry. try: considered_entries[child_id] continue except: pass considered_entries[child_id] = True # Is it already cached? if self.cache.exists(child_id): continue affected_entries.append(child_id) if len(affected_entries) >= max_readahead_entries: break return affected_entries
def __get_entries_to_update(self, requested_entry_id): # Get more entries than just what was requested, while we're at it. try: parent_ids = drive_proxy('get_parents_containing_id', child_id=requested_entry_id) except: self.__log.exception("Could not retrieve parents for child with ID " "[%s]." % (requested_entry_id)) raise self.__log.debug("Found (%d) parents." % (len(parent_ids))) affected_entries = [ requested_entry_id ] considered_entries = { } max_readahead_entries = Conf.get('max_readahead_entries') for parent_id in parent_ids: self.__log.debug("Retrieving children for parent with ID [%s]." % (parent_id)) try: child_ids = drive_proxy('get_children_under_parent_id', parent_id=parent_id) except: self.__log.exception("Could not retrieve children for parent with" " ID [%s]." % (requested_entry_id)) raise self.__log.debug("(%d) children found under parent with ID [%s]." % (len(child_ids), parent_id)) for child_id in child_ids: if child_id == requested_entry_id: continue # We've already looked into this entry. try: considered_entries[child_id] continue except: pass considered_entries[child_id] = True # Is it already cached? if self.cache.exists(child_id): continue affected_entries.append(child_id) if len(affected_entries) >= max_readahead_entries: break return affected_entries
def rmdir(self, filepath): """Remove a directory.""" path_relations = PathRelations.get_instance() try: entry_clause = path_relations.get_clause_from_path(filepath) except GdNotFoundError: _logger.exception("Could not process [%s] (rmdir).") raise FuseOSError(ENOENT) except: _logger.exception("Could not get clause from file-path [%s] " "(rmdir)." % (filepath)) raise FuseOSError(EIO) if not entry_clause: _logger.error("Path [%s] does not exist for rmdir()." % (filepath)) raise FuseOSError(ENOENT) entry_id = entry_clause[CLAUSE_ID] normalized_entry = entry_clause[CLAUSE_ENTRY] # Check if not a directory. if not normalized_entry.is_directory: _logger.error("Can not rmdir() non-directory [%s] with ID [%s].", filepath, entry_id) raise FuseOSError(ENOTDIR) # Ensure the folder is empty. try: found = drive_proxy('get_children_under_parent_id', parent_id=entry_id, max_results=1) except: _logger.exception("Could not determine if directory to be removed " "has children." % (entry_id)) raise FuseOSError(EIO) if found: raise FuseOSError(ENOTEMPTY) try: drive_proxy('remove_entry', normalized_entry=normalized_entry) except (NameError): raise FuseOSError(ENOENT) except: _logger.exception("Could not remove directory [%s] with ID [%s]." % (filepath, entry_id)) raise FuseOSError(EIO)
def __do_update_for_missing_entry(self, requested_entry_id): # Get the entries to update. affected_entries = self.__get_entries_to_update(requested_entry_id) # Read the entries, now. # self.__log.debug("(%d) primary and secondary entry/entries will be " # "updated." % (len(affected_entries))) # TODO: We have to determine when this is called, and either remove it # (if it's not), or find another way to not have to load them # individually. retrieved = drive_proxy('get_entries', entry_ids=affected_entries) # Update the cache. path_relations = PathRelations.get_instance() for entry_id, entry in retrieved.iteritems(): path_relations.register_entry(entry) # self.__log.debug("(%d) entries were loaded.", len(retrieved)) return retrieved
def deposit_file(self, mime_type): """Write the file to a temporary path, and present a stub (JSON) to the user. This is the only way of getting files that don't have a well-defined filesize without providing a type, ahead of time. """ temp_path = Conf.get('file_download_temp_path') file_path = ("%s/displaced/%s.%s" % (temp_path, self.__normalized_entry.title, mime_type.replace('/', '+'))) try: result = drive_proxy('download_to_local', output_file_path=file_path, normalized_entry=self.__normalized_entry, mime_type=mime_type) (length, cache_fault) = result except: self.__log.exception("Could not localize displaced file with " "entry having ID [%s]." % (self.__normalized_entry.id)) raise self.__log.debug("Displaced entry [%s] deposited to [%s] with length " "(%d)." % (self.__normalized_entry, file_path, length)) try: return self.get_stub(mime_type, length, file_path) except: self.__log.exception("Could not build stub for [%s]." % (self.__normalized_entry)) raise
def rename(self, filepath_old, filepath_new): # Make sure the old filepath exists. (entry, path, filename_old) = get_entry_or_raise(filepath_old) # At this point, decorations, the is-hidden prefix, etc.. haven't been # stripped. (path, filename_new_raw) = split(filepath_new) # Make sure the new filepath doesn't exist. try: get_entry_or_raise(filepath_new, True) except GdNotFoundError: pass try: entry = drive_proxy('rename', normalized_entry=entry, new_filename=filename_new_raw) except: _logger.exception("Could not update entry [%s] for rename." % (entry)) raise FuseOSError(EIO) # Update our knowledge of the entry. path_relations = PathRelations.get_instance() try: path_relations.register_entry(entry) except: _logger.exception("Could not register renamed entry: %s" % (entry)) raise FuseOSError(EIO)
def utimens(self, raw_path, times=None): """Set the file times.""" if times is not None: (atime, mtime) = times else: now = time() (atime, mtime) = (now, now) (entry, path, filename) = self.__get_entry_or_raise(raw_path) mtime_phrase = get_flat_normal_fs_time_from_epoch(mtime) atime_phrase = get_flat_normal_fs_time_from_epoch(atime) self.__log.debug("Updating entry [%s] with m-time [%s] and a-time " "[%s]." % (entry, mtime_phrase, atime_phrase)) try: entry = drive_proxy('update_entry', normalized_entry=entry, modified_datetime=mtime_phrase, accessed_datetime=atime_phrase) except: self.__log.exception("Could not update entry [%s] for times." % (entry)) raise FuseOSError(EIO) self.__log.debug("Entry [%s] mtime is now [%s] and atime is now " "[%s]." % (entry, entry.modified_date, entry.atime_byme_date)) return 0
def __load_all_children(self, parent_id): # self.__log.debug("Loading children under parent with ID [%s].", # parent_id) with PathRelations.rlock: children = drive_proxy('list_files', parent_id=parent_id) child_ids = [ ] if children: # self.__log.debug("(%d) children returned and will be " # "registered.", len(children)) for child in children: self.register_entry(child) # self.__log.debug("Looking up parent with ID [%s] for all-" # "children update.", parent_id) parent_clause = self.__get_entry_clause_by_id(parent_id) parent_clause[4] = True # self.__log.debug("All children have been loaded.") return children
def truncate(self, filepath, length, fh=None): if fh is not None: try: opened_file = OpenedManager.get_instance().get_by_fh(fh) except: _logger.exception("Could not retrieve OpenedFile for handle " "with ID (%d) (truncate)." % (fh)) raise FuseOSError(EIO) opened_file.reset_state() entry_id = opened_file.entry_id cache = EntryCache.get_instance().cache try: entry = cache.get(entry_id) except: _logger.exception("Could not fetch normalized entry with " "ID [%s] for truncate with FH." % (entry_id)) raise else: (entry, path, filename) = get_entry_or_raise(filepath) try: entry = drive_proxy('truncate', normalized_entry=entry) except: _logger.exception("Could not truncate entry [%s]." % (entry)) raise FuseOSError(EIO)
def utimens(self, raw_path, times=None): """Set the file times.""" if times is not None: (atime, mtime) = times else: now = time() (atime, mtime) = (now, now) (entry, path, filename) = self.__get_entry_or_raise(raw_path) tz_get = lambda dt: datetime.fromtimestamp(dt, tzlocal()).\ astimezone(tzutc()) mtime_phrase = build_rfc3339_phrase(tz_get(mtime)) atime_phrase = build_rfc3339_phrase(tz_get(atime)) self.__log.debug("Updating entry [%s] with m-time [%s] and a-time " "[%s]." % (entry, mtime_phrase, atime_phrase)) try: entry = drive_proxy('update_entry', normalized_entry=entry, modified_datetime=mtime_phrase, accessed_datetime=atime_phrase) except: self.__log.exception("Could not update entry [%s] for times." % (entry)) raise FuseOSError(EIO) self.__log.debug("Entry [%s] mtime is now [%s] and atime is now " "[%s]." % (entry, entry.modified_date, entry.atime_byme_date)) return 0
def utimens(self, raw_path, times=None): """Set the file times.""" if times is not None: (atime, mtime) = times else: now = time() (atime, mtime) = (now, now) (entry, path, filename) = get_entry_or_raise(raw_path) mtime_phrase = get_flat_normal_fs_time_from_epoch(mtime) atime_phrase = get_flat_normal_fs_time_from_epoch(atime) try: entry = drive_proxy('update_entry', normalized_entry=entry, modified_datetime=mtime_phrase, accessed_datetime=atime_phrase) except: _logger.exception("Could not update entry [%s] for times." % (entry)) raise FuseOSError(EIO) return 0
def __create(self, filepath, mode=None): """Create a new file. We don't implement "mode" (permissions) because the model doesn't agree with GD. """ # TODO: Fail if it already exists. try: result = split_path(filepath, path_resolver) (parent_clause, path, filename, mime_type, is_hidden) = result except GdNotFoundError: _logger.exception("Could not process [%s] (i-create).") raise FuseOSError(ENOENT) except: _logger.exception("Could not split path [%s] (i-create)." % (filepath)) raise FuseOSError(EIO) distilled_filepath = build_filepath(path, filename) # Try to guess at a mime-type, if not otherwise given. if mime_type is None: (mimetype_guess, _) = guess_type(filename, True) if mimetype_guess is not None: mime_type = mimetype_guess else: mime_type = Conf.get('default_mimetype') try: entry = drive_proxy('create_file', filename=filename, data_filepath='/dev/null', parents=[parent_clause[3]], mime_type=mime_type, is_hidden=is_hidden) except: _logger.exception("Could not create empty file [%s] under " "parent with ID [%s]." % (filename, parent_clause[3])) raise FuseOSError(EIO) path_relations = PathRelations.get_instance() try: path_relations.register_entry(entry) except: _logger.exception("Could not register created file in cache.") raise FuseOSError(EIO) _logger.info("Inner-create of [%s] completed." % (distilled_filepath)) return (entry, path, filename, mime_type)
def process_updates(self): """Process any changes to our files. Return True if everything is up to date or False if we need to be run again. """ start_at_id = (self.at_change_id + 1) try: result = drive_proxy('list_changes', start_change_id=start_at_id) except: self.__log.exception("Could not retrieve updates. Skipped.") return True (largest_change_id, next_page_token, changes) = result self.__log.debug("The latest reported change-ID is (%d) and we're " "currently at change-ID (%d)." % (largest_change_id, self.at_change_id)) if largest_change_id == self.at_change_id: self.__log.debug("No entries have changed.") return True self.__log.info("(%d) changes will now be applied." % (len(changes))) for change_id, change_tuple in changes.iteritems(): # Apply the changes. We expect to be running them from oldest to # newest. self.__log.info( "========== Change with ID (%d) will now be applied. ==========" % (change_id)) try: self.__apply_change(change_id, change_tuple) except: self.__log.exception( "There was a problem while processing change" " with ID (%d). No more changes will be " "applied." % (change_id)) return False self.at_change_id = change_id return (next_page_token == None)
def mkdir(self, filepath, mode): """Create the given directory.""" # TODO: Implement the "mode". try: result = split_path(filepath, path_resolver) (parent_clause, path, filename, mime_type, is_hidden) = result except GdNotFoundError: self.__log.exception("Could not process [%s] (mkdir).") raise FuseOSError(ENOENT) except: self.__log.exception("Could not split path [%s] (mkdir)." % (filepath)) raise FuseOSError(EIO) parent_id = parent_clause[CLAUSE_ID] self.__log.debug("Creating directory [%s] under parent [%s] with ID " "[%s]." % (filename, path, parent_id)) try: entry = drive_proxy('create_directory', filename=filename, parents=[parent_id], is_hidden=is_hidden) except: self.__log.exception("Could not create directory with name [%s] " "and parent with ID [%s]." % (filename, parent_clause[0].id)) raise FuseOSError(EIO) self.__log.info("Directory [%s] created as ID [%s] under parent with " "ID [%s]." % (filepath, entry.id, parent_id)) #parent_clause[4] = False path_relations = PathRelations.get_instance() try: path_relations.register_entry(entry) except: self.__log.exception("Could not register new directory in cache.") raise FuseOSError(EIO)
def __do_update_for_missing_entry(self, requested_entry_id): # Get the entries to update. try: affected_entries = self.__get_entries_to_update(requested_entry_id) except: self.__log.exception("Could not aggregate requested and readahead " "entries to refresh.") raise # Read the entries, now. self.__log.info("(%d) primary and secondary entry/entries will be " "updated." % (len(affected_entries))) # TODO: We have to determine when this is called, and either remove it # (if it's not), or find another way to not have to load them # individually. try: retrieved = drive_proxy('get_entries', entry_ids=affected_entries) except: self.__log.exception("Could not retrieve the (%d) entries." % (len(affected_entries))) raise # Update the cache. path_relations = PathRelations.get_instance() for entry_id, entry in retrieved.iteritems(): try: path_relations.register_entry(entry) except: self.__log.exception( "Could not register entry with ID [%s] with path-relations cache." % (entry_id)) raise self.__log.debug("(%d) entries were loaded." % (len(retrieved))) return retrieved
def __load_all_children(self, parent_id): self.__log.info("Loading children under parent with ID [%s]." % (parent_id)) with PathRelations.rlock: try: children = drive_proxy('list_files', parent_id=parent_id) except: self.__log.exception( "Could not retrieve children for parent with" " ID [%s]." % (parent_id)) raise child_ids = [] if children: self.__log.debug("(%d) children returned and will be " "registered." % (len(children))) for child in children: try: self.register_entry(child) except: self.__log.exception( "Could not register retrieved-entry for " "child with ID [%s] in path-cache." % (child.id)) raise self.__log.debug("Looking up parent with ID [%s] for all-" "children update." % (parent_id)) try: parent_clause = self.__get_entry_clause_by_id(parent_id) except: self.__log.exception( "Could not retrieve clause for parent-entry " "[%s] in load-all-children function." % (parent_id)) raise parent_clause[4] = True self.__log.debug("All children have been loaded.") return children
def process_updates(self): """Process any changes to our files. Return True if everything is up to date or False if we need to be run again. """ start_at_id = (self.at_change_id + 1) try: result = drive_proxy('list_changes', start_change_id=start_at_id) except: self.__log.exception("Could not retrieve updates. Skipped.") return True (largest_change_id, next_page_token, changes) = result self.__log.debug("The latest reported change-ID is (%d) and we're " "currently at change-ID (%d)." % (largest_change_id, self.at_change_id)) if largest_change_id == self.at_change_id: self.__log.debug("No entries have changed.") return True self.__log.info("(%d) changes will now be applied." % (len(changes))) for change_id, change_tuple in changes.iteritems(): # Apply the changes. We expect to be running them from oldest to # newest. self.__log.info("========== Change with ID (%d) will now be applied. ==========" % (change_id)) try: self.__apply_change(change_id, change_tuple) except: self.__log.exception("There was a problem while processing change" " with ID (%d). No more changes will be " "applied." % (change_id)) return False self.at_change_id = change_id return (next_page_token == None)
def __load_all_children(self, parent_id): self.__log.info("Loading children under parent with ID [%s]." % (parent_id)) with PathRelations.rlock: try: children = drive_proxy('list_files', parent_id=parent_id) except: self.__log.exception("Could not retrieve children for parent with" " ID [%s]." % (parent_id)) raise child_ids = [ ] if children: self.__log.debug("(%d) children returned and will be " "registered." % (len(children))) for child in children: try: self.register_entry(child) except: self.__log.exception("Could not register retrieved-entry for " "child with ID [%s] in path-cache." % (child.id)) raise self.__log.debug("Looking up parent with ID [%s] for all-" "children update." % (parent_id)) try: parent_clause = self.__get_entry_clause_by_id(parent_id) except: self.__log.exception("Could not retrieve clause for parent-entry " "[%s] in load-all-children function." % (parent_id)) raise parent_clause[4] = True self.__log.debug("All children have been loaded.") return children
def __do_update_for_missing_entry(self, requested_entry_id): # Get the entries to update. try: affected_entries = self.__get_entries_to_update(requested_entry_id) except: self.__log.exception("Could not aggregate requested and readahead " "entries to refresh.") raise # Read the entries, now. self.__log.info("(%d) primary and secondary entry/entries will be " "updated." % (len(affected_entries))) # TODO: We have to determine when this is called, and either remove it # (if it's not), or find another way to not have to load them # individually. try: retrieved = drive_proxy('get_entries', entry_ids=affected_entries) except: self.__log.exception("Could not retrieve the (%d) entries." % (len(affected_entries))) raise # Update the cache. path_relations = PathRelations.get_instance() for entry_id, entry in retrieved.iteritems(): try: path_relations.register_entry(entry) except: self.__log.exception("Could not register entry with ID [%s] with path-relations cache." % (entry_id)) raise self.__log.debug("(%d) entries were loaded." % (len(retrieved))) return retrieved
def truncate(self, filepath, length, fh=None): self.__log.debug("Truncating file-path [%s] with FH [%s]." % (filepath, fh)) if fh is not None: self.__log.debug("Doing truncate by FH (%d)." % (fh)) try: opened_file = OpenedManager.get_instance().get_by_fh(fh) except: self.__log.exception( "Could not retrieve OpenedFile for handle " "with ID (%d) (truncate)." % (fh)) raise FuseOSError(EIO) self.__log.debug("Truncating and clearing FH: %s" % (opened_file)) opened_file.reset_state() entry_id = opened_file.entry_id cache = EntryCache.get_instance().cache try: entry = cache.get(entry_id) except: self.__log.exception("Could not fetch normalized entry with " "ID [%s] for truncate with FH." % (entry_id)) raise else: (entry, path, filename) = self.__get_entry_or_raise(filepath) self.__log.debug("Sending truncate request for [%s]." % (entry)) try: entry = drive_proxy('truncate', normalized_entry=entry) except: self.__log.exception("Could not truncate entry [%s]." % (entry)) raise FuseOSError(EIO)
def truncate(self, filepath, length, fh=None): self.__log.debug("Truncating file-path [%s] with FH [%s]." % (filepath, fh)) if fh is not None: self.__log.debug("Doing truncate by FH (%d)." % (fh)) try: opened_file = OpenedManager.get_instance().get_by_fh(fh) except: self.__log.exception("Could not retrieve OpenedFile for handle " "with ID (%d) (truncate)." % (fh)) raise FuseOSError(EIO) self.__log.debug("Truncating and clearing FH: %s" % (opened_file)) opened_file.reset_state() entry_id = opened_file.entry_id cache = EntryCache.get_instance().cache try: entry = cache.get(entry_id) except: self.__log.exception("Could not fetch normalized entry with " "ID [%s] for truncate with FH." % (entry_id)) raise else: (entry, path, filename) = self.__get_entry_or_raise(filepath) self.__log.debug("Sending truncate request for [%s]." % (entry)) try: entry = drive_proxy('truncate', normalized_entry=entry) except: self.__log.exception("Could not truncate entry [%s]." % (entry)) raise FuseOSError(EIO)
def get_data(self): try: return drive_proxy('get_about_info') except: self.__log.exception("get_about_info() call failed.") raise
def __load_base_from_remote(self): """Download the data for the entry that we represent. This is probably a file, but could also be a stub for -any- entry. """ try: entry = self.__get_entry_or_raise() except: self.__log.exception("Could not get entry with ID [%s] for " "write-flush." % (self.__entry_id)) raise self.__log.debug("Ensuring local availability of [%s]." % (entry)) temp_file_path = get_temp_filepath(entry, self.mime_type) self.__log.debug("__load_base_from_remote about to download.") with self.__class__.__download_lock: # Get the current version of the write-cache file, or note that we # don't have it. self.__log.info("Attempting local cache update of file [%s] for " "entry [%s] and mime-type [%s]." % (temp_file_path, entry, self.mime_type)) if entry.requires_mimetype: length = DisplacedFile.file_size try: d = DisplacedFile(entry) stub_data = d.deposit_file(self.mime_type) with file(temp_file_path, 'w') as f: f.write(stub_data) except: self.__log.exception("Could not deposit to file [%s] from " "entry [%s]." % (temp_file_path, entry)) raise # TODO: Accommodate the cache for displaced-files. cache_fault = True else: self.__log.info("Executing the download.") try: # TODO(dustin): We're not inheriting an existing file (same mtime, same size). result = drive_proxy('download_to_local', output_file_path=temp_file_path, normalized_entry=entry, mime_type=self.mime_type) (length, cache_fault) = result except ExportFormatError: self.__log.exception("There was an export-format error.") raise FuseOSError(ENOENT) except: self.__log.exception("Could not localize file with entry " "[%s]." % (entry)) raise self.__log.info("Download complete. cache_fault= [%s] " "__is_loaded= [%s]" % (cache_fault, self.__is_loaded)) # We've either not loaded it, yet, or it has changed. if cache_fault or not self.__is_loaded: with self.__class__.__update_lock: self.__log.info("Checking queued items for fault.") if cache_fault: if self.__is_dirty: self.__log.error("Entry [%s] has been changed. " "Forcing buffer updates, and " "clearing uncommitted updates." % (entry)) else: self.__log.debug("Entry [%s] has changed. " "Updating buffers." % (entry)) self.__log.debug("Loading buffers.") with open(temp_file_path, 'rb') as f: # Read the locally cached file in. try: # TODO(dustin): This is the source of: # 1) An enormous slowdown where we first have to write the data, and then have to read it back. # 2) An enormous resource burden. data = f.read() read_blocksize = Conf.get( 'default_buffer_read_blocksize') self.__buffer = BufferSegments( data, read_blocksize) except: self.__log.exception( "Could not read current cached " "file into buffer.") raise self.__is_dirty = False self.__is_loaded = True self.__log.debug("__load_base_from_remote complete.") return cache_fault
def unlink(self, file_path): """Remove a file.""" # TODO: Change to simply move to "trash". Have a FUSE option to elect this # behavior. path_relations = PathRelations.get_instance() self.__log.debug("Removing file [%s]." % (file_path)) try: entry_clause = path_relations.get_clause_from_path(file_path) except GdNotFoundError: self.__log.exception("Could not process [%s] (unlink).") raise FuseOSError(ENOENT) except: self.__log.exception("Could not get clause from file-path [%s] " "(unlink)." % (file_path)) raise FuseOSError(EIO) if not entry_clause: self.__log.error("Path [%s] does not exist for unlink()." % (file_path)) raise FuseOSError(ENOENT) entry_id = entry_clause[CLAUSE_ID] normalized_entry = entry_clause[CLAUSE_ENTRY] # Check if a directory. self.__log.debug("Ensuring it is a file (not a directory).") if normalized_entry.is_directory: self.__log.error("Can not unlink() directory [%s] with ID [%s]. " "Must be file.", file_path, entry_id) raise FuseOSError(errno.EISDIR) self.__log.debug("Doing remove of directory [%s] with ID [%s]." % (file_path, entry_id)) # Remove online. Complements local removal (if not found locally, a # follow-up request checks online). try: drive_proxy('remove_entry', normalized_entry=normalized_entry) except (NameError): raise FuseOSError(ENOENT) except: self.__log.exception("Could not remove file [%s] with ID [%s]." % (file_path, entry_id)) raise FuseOSError(EIO) # Remove from cache. Will no longer be able to be found, locally. self.__log.debug("Removing all trace of entry [%s] from cache " "(unlink)." % (normalized_entry)) try: PathRelations.get_instance().remove_entry_all(entry_id) except: self.__log.exception("There was a problem removing entry [%s] " "from the caches." % (normalized_entry)) raise # Remove from among opened-files. self.__log.debug("Removing all opened-files for [%s]." % (file_path)) try: opened_file = OpenedManager.get_instance().\ remove_by_filepath(file_path) except: self.__log.exception("There was an error while removing all " "opened-file instances for file [%s] " "(remove)." % (file_path)) raise FuseOSError(EIO) self.__log.debug("File removal complete.")
def flush(self): """The OS wants to effect any changes made to the file.""" self.__log.debug("Retrieving entry for write-flush.") entry = self.__get_entry_or_raise() cache_fault = self.__load_base_from_remote() with self.__class__.__update_lock: if self.__is_dirty is False: self.__log.debug("Flush will be skipped because there are no " "changes.") # TODO: Raise an exception? return # Write back out to the temporary file. self.__log.debug("Writing buffer to temporary file.") # TODO: Make sure to uncache the temp data if self.temp_file_path is not None. mime_type = self.mime_type # If we've already opened a work file, use it. Else, use a # temporary file that we'll close at the end of the method. if self.__is_loaded: is_temp = False temp_file_path = get_temp_filepath(entry, mime_type) with file(temp_file_path, 'w') as f: for block in self.__buffer.read(): f.write(block) write_filepath = temp_file_path else: is_temp = True with NamedTemporaryFile(delete=False) as f: write_filepath = f.name for block in self.__buffer.read(): f.write(block) # Push to GD. self.__log.debug("Pushing (%d) bytes for entry with ID from [%s] " "to GD for file-path [%s]." % (self.__buffer.length, entry.id, write_filepath)) # print("Sending updates.") # TODO: Update mtime? try: entry = drive_proxy('update_entry', normalized_entry=entry, filename=entry.title, data_filepath=write_filepath, mime_type=mime_type, parents=entry.parents, is_hidden=self.__is_hidden) except: self.__log.exception("Could not localize displaced file with " "entry having ID [%s]." % (entry.id)) raise if not is_temp: unlink(write_filepath) else: # Update the write-cache file to the official mtime. We won't # redownload it on the next flush if it wasn't changed, # elsewhere. self.__log.debug("Updating local write-cache file to official " "mtime [%s]." % (entry.modified_date_epoch)) try: utime( write_filepath, (entry.modified_date_epoch, entry.modified_date_epoch)) except: self.__log.exception("Could not update mtime of write-" "cache [%s] for entry with ID [%s], " "post-flush." % (entry.modified_date_epoch, entry.id)) raise # Immediately update our current cached entry. self.__log.debug("Update successful. Updating local cache.") path_relations = PathRelations.get_instance() try: path_relations.register_entry(entry) except: self.__log.exception("Could not register updated file in cache.") raise self.__is_dirty = False self.__log.info("Update complete on entry with ID [%s]." % (entry.id))
def find_path_components_goandget(self, path): """Do the same thing that find_path_components() does, except that when we don't have record of a path-component, try to go and find it among the children of the previous path component, and then try again. """ with PathRelations.rlock: previous_results = [] i = 0 while 1: self.__log.info("Attempting to find path-components (go and " "get) for path [%s]. CYCLE= (%d)" % (path, i)) # See how many components can be found in our current cache. try: result = self.__find_path_components(path) except: self.__log.exception("There was a problem doing an " "iteration of find_path_components() " "on [%s]." % (path)) raise self.__log.debug("Path resolution cycle (%d) results: %s" % (i, result)) # If we could resolve the entire path, return success. self.__log.debug("Found within current cache? %s" % (result[2])) if result[2] == True: return result # If we could not resolve the entire path, and we're no more # successful than a prior attempt, we'll just have to return a # partial. num_results = len(result[0]) if num_results in previous_results: self.__log.debug("We couldn't improve our results. This " "path most likely does not exist.") return result previous_results.append(num_results) self.__log.debug("(%d) path-components were found, but not " "all." % (num_results)) # Else, we've encountered a component/depth of the path that we # don't currently know about. # TODO: This is going to be the general area that we'd have to adjust to # support multiple, identical entries. This currently only considers the # first result. We should rewrite this to be recursive in order to make # it easier to keep track of a list of results. # The parent is the last one found, or the root if none. parent_id = result[0][num_results - 1] \ if num_results \ else AccountInfo.get_instance().root_id # The child will be the first part that was not found. child_name = result[1][num_results] self.__log.debug("Trying to reconcile child named [%s] under " "folder with entry-ID [%s]." % (child_name, parent_id)) try: children = drive_proxy('list_files', parent_id=parent_id, query_is_string=child_name) except: self.__log.exception("Could not retrieve children for " "parent with ID [%s]." % (parent_id)) raise for child in children: try: self.register_entry(child) except: self.__log.exception("Could not register child entry " "for entry with ID [%s] in path-" "cache." % (child.id)) raise filenames_phrase = ', '.join([ candidate.id for candidate in children ]) self.__log.debug("(%d) candidate children were found: %s" % (len(children), filenames_phrase)) i += 1
def get_data(self, key): try: return drive_proxy('get_about_info') except: self.__log.exception("get_about_info() call failed.") raise
def find_path_components_goandget(self, path): """Do the same thing that find_path_components() does, except that when we don't have record of a path-component, try to go and find it among the children of the previous path component, and then try again. """ with PathRelations.rlock: previous_results = [] i = 0 while 1: # self.__log.debug("Attempting to find path-components (go and " # "get) for path [%s]. CYCLE= (%d)", path, i) # See how many components can be found in our current cache. result = self.__find_path_components(path) # self.__log.debug("Path resolution cycle (%d) results: %s" % # (i, result)) # If we could resolve the entire path, return success. # self.__log.debug("Found within current cache? %s" % # (result[2])) if result[2] == True: return result # If we could not resolve the entire path, and we're no more # successful than a prior attempt, we'll just have to return a # partial. num_results = len(result[0]) if num_results in previous_results: # self.__log.debug("We couldn't improve our results. This " # "path most likely does not exist.") return result previous_results.append(num_results) # self.__log.debug("(%d) path-components were found, but not " # "all." % (num_results)) # Else, we've encountered a component/depth of the path that we # don't currently know about. # TODO: This is going to be the general area that we'd have to adjust to # support multiple, identical entries. This currently only considers the # first result. We should rewrite this to be recursive in order to make # it easier to keep track of a list of results. # The parent is the last one found, or the root if none. parent_id = result[0][num_results - 1] \ if num_results \ else AccountInfo.get_instance().root_id # The child will be the first part that was not found. child_name = result[1][num_results] # self.__log.debug("Trying to reconcile child named [%s] under " # "folder with entry-ID [%s]." % (child_name, # parent_id)) children = drive_proxy('list_files', parent_id=parent_id, query_is_string=child_name) for child in children: self.register_entry(child) filenames_phrase = ', '.join([ candidate.id for candidate in children ]) # self.__log.debug("(%d) candidate children were found: %s", # len(children), filenames_phrase) i += 1
def flush(self): """The OS wants to effect any changes made to the file.""" self.__log.debug("Retrieving entry for write-flush.") entry = self.__get_entry_or_raise() cache_fault = self.__load_base_from_remote() with self.__class__.__update_lock: if self.__is_dirty is False: self.__log.debug("Flush will be skipped because there are no " "changes.") # TODO: Raise an exception? return # Write back out to the temporary file. self.__log.debug("Writing buffer to temporary file.") # TODO: Make sure to uncache the temp data if self.temp_file_path is not None. mime_type = self.mime_type # If we've already opened a work file, use it. Else, use a # temporary file that we'll close at the end of the method. if self.__is_loaded: is_temp = False temp_file_path = get_temp_filepath(entry, mime_type) with file(temp_file_path, 'w') as f: for block in self.__buffer.read(): f.write(block) write_filepath = temp_file_path else: is_temp = True with NamedTemporaryFile(delete=False) as f: write_filepath = f.name for block in self.__buffer.read(): f.write(block) # Push to GD. self.__log.debug("Pushing (%d) bytes for entry with ID from [%s] " "to GD for file-path [%s]." % (self.__buffer.length, entry.id, write_filepath)) # print("Sending updates.") # TODO: Update mtime? try: entry = drive_proxy('update_entry', normalized_entry=entry, filename=entry.title, data_filepath=write_filepath, mime_type=mime_type, parents=entry.parents, is_hidden=self.__is_hidden) except: self.__log.exception("Could not localize displaced file with " "entry having ID [%s]." % (entry.id)) raise if not is_temp: unlink(write_filepath) else: # Update the write-cache file to the official mtime. We won't # redownload it on the next flush if it wasn't changed, # elsewhere. self.__log.debug("Updating local write-cache file to official " "mtime [%s]." % (entry.modified_date_epoch)) try: utime(write_filepath, (entry.modified_date_epoch, entry.modified_date_epoch)) except: self.__log.exception("Could not update mtime of write-" "cache [%s] for entry with ID [%s], " "post-flush." % (entry.modified_date_epoch, entry.id)) raise # Immediately update our current cached entry. self.__log.debug("Update successful. Updating local cache.") path_relations = PathRelations.get_instance() try: path_relations.register_entry(entry) except: self.__log.exception("Could not register updated file in cache.") raise self.__is_dirty = False self.__log.info("Update complete on entry with ID [%s]." % (entry.id))
def __load_base_from_remote(self): """Download the data for the entry that we represent. This is probably a file, but could also be a stub for -any- entry. """ try: entry = self.__get_entry_or_raise() except: self.__log.exception("Could not get entry with ID [%s] for " "write-flush." % (self.__entry_id)) raise self.__log.debug("Ensuring local availability of [%s]." % (entry)) temp_file_path = get_temp_filepath(entry, self.mime_type) self.__log.debug("__load_base_from_remote about to download.") with self.__class__.__download_lock: # Get the current version of the write-cache file, or note that we # don't have it. self.__log.info("Attempting local cache update of file [%s] for " "entry [%s] and mime-type [%s]." % (temp_file_path, entry, self.mime_type)) if entry.requires_mimetype: length = DisplacedFile.file_size try: d = DisplacedFile(entry) stub_data = d.deposit_file(self.mime_type) with file(temp_file_path, 'w') as f: f.write(stub_data) except: self.__log.exception("Could not deposit to file [%s] from " "entry [%s]." % (temp_file_path, entry)) raise # TODO: Accommodate the cache for displaced-files. cache_fault = True else: self.__log.info("Executing the download.") try: # TODO(dustin): We're not inheriting an existing file (same mtime, same size). result = drive_proxy('download_to_local', output_file_path=temp_file_path, normalized_entry=entry, mime_type=self.mime_type) (length, cache_fault) = result except ExportFormatError: self.__log.exception("There was an export-format error.") raise FuseOSError(ENOENT) except: self.__log.exception("Could not localize file with entry " "[%s]." % (entry)) raise self.__log.info("Download complete. cache_fault= [%s] " "__is_loaded= [%s]" % (cache_fault, self.__is_loaded)) # We've either not loaded it, yet, or it has changed. if cache_fault or not self.__is_loaded: with self.__class__.__update_lock: self.__log.info("Checking queued items for fault.") if cache_fault: if self.__is_dirty: self.__log.error("Entry [%s] has been changed. " "Forcing buffer updates, and " "clearing uncommitted updates." % (entry)) else: self.__log.debug("Entry [%s] has changed. " "Updating buffers." % (entry)) self.__log.debug("Loading buffers.") with open(temp_file_path, 'rb') as f: # Read the locally cached file in. try: # TODO(dustin): Our accounting is broken when it comes to loading and/or update-tracking. If we have a guarantee thawrites only appear in sequence and in increasing order, we can dump BufferSegments. # TODO(dustin): This is the source of: # 1) An enormous slowdown where we first have to write the data, and then have to read it back. # 2) An enormous resource burden. data = f.read() read_blocksize = Conf.get('default_buffer_read_blocksize') self.__buffer = BufferSegments(data, read_blocksize) except: self.__log.exception("Could not read current cached " "file into buffer.") raise self.__is_dirty = False self.__is_loaded = True self.__log.debug("__load_base_from_remote complete.") return cache_fault
def unlink(self, file_path): """Remove a file.""" # TODO: Change to simply move to "trash". Have a FUSE option to elect this # behavior. path_relations = PathRelations.get_instance() self.__log.debug("Removing file [%s]." % (file_path)) try: entry_clause = path_relations.get_clause_from_path(file_path) except GdNotFoundError: self.__log.exception("Could not process [%s] (unlink).") raise FuseOSError(ENOENT) except: self.__log.exception("Could not get clause from file-path [%s] " "(unlink)." % (file_path)) raise FuseOSError(EIO) if not entry_clause: self.__log.error("Path [%s] does not exist for unlink()." % (file_path)) raise FuseOSError(ENOENT) entry_id = entry_clause[CLAUSE_ID] normalized_entry = entry_clause[CLAUSE_ENTRY] # Check if a directory. self.__log.debug("Ensuring it is a file (not a directory).") if normalized_entry.is_directory: self.__log.error( "Can not unlink() directory [%s] with ID [%s]. " "Must be file.", file_path, entry_id) raise FuseOSError(errno.EISDIR) self.__log.debug("Doing remove of directory [%s] with ID [%s]." % (file_path, entry_id)) # Remove online. Complements local removal (if not found locally, a # follow-up request checks online). try: drive_proxy('remove_entry', normalized_entry=normalized_entry) except (NameError): raise FuseOSError(ENOENT) except: self.__log.exception("Could not remove file [%s] with ID [%s]." % (file_path, entry_id)) raise FuseOSError(EIO) # Remove from cache. Will no longer be able to be found, locally. self.__log.debug("Removing all trace of entry [%s] from cache " "(unlink)." % (normalized_entry)) try: PathRelations.get_instance().remove_entry_all(entry_id) except: self.__log.exception("There was a problem removing entry [%s] " "from the caches." % (normalized_entry)) raise # Remove from among opened-files. self.__log.debug("Removing all opened-files for [%s]." % (file_path)) try: opened_file = OpenedManager.get_instance().\ remove_by_filepath(file_path) except: self.__log.exception("There was an error while removing all " "opened-file instances for file [%s] " "(remove)." % (file_path)) raise FuseOSError(EIO) self.__log.debug("File removal complete.")