def __init__(self): self.at_change_id = AccountInfo.get_instance().largest_change_id _logger.debug("Latest change-ID at startup is (%d)." % (self.at_change_id)) self.__t = None self.__t_quit_ev = threading.Event()
def find_path_components_goandget(self, path): """Do the same thing that find_path_components() does, except that when we don't have record of a path-component, try to go and find it among the children of the previous path component, and then try again. """ gd = get_gdrive() with PathRelations.rlock: previous_results = [] i = 0 while 1: # self.__log.debug("Attempting to find path-components (go and " # "get) for path [%s]. CYCLE= (%d)", path, i) # See how many components can be found in our current cache. result = self.__find_path_components(path) # If we could resolve the entire path, return success. if result[2] == True: return result # If we could not resolve the entire path, and we're no more # successful than a prior attempt, we'll just have to return a # partial. num_results = len(result[0]) if num_results in previous_results: return result previous_results.append(num_results) # Else, we've encountered a component/depth of the path that we # don't currently know about. # TODO: This is going to be the general area that we'd have to adjust to # support multiple, identical entries. This currently only considers the # first result. We should rewrite this to be recursive in order to make # it easier to keep track of a list of results. # The parent is the last one found, or the root if none. parent_id = result[0][num_results - 1] \ if num_results \ else AccountInfo.get_instance().root_id # The child will be the first part that was not found. child_name = result[1][num_results] children = gd.list_files(parent_id=parent_id, query_is_string=child_name) for child in children: self.register_entry(child) filenames_phrase = ', '.join( [candidate.id for candidate in children]) # self.__log.debug("(%d) candidate children were found: %s", # len(children), filenames_phrase) i += 1
def __init__(self): self.__log = logging.getLogger().getChild('ChangeMan') try: self.at_change_id = AccountInfo.get_instance().largest_change_id except: self.__log.exception("Could not get largest change-ID.") raise self.__log.info("Latest change-ID at startup is (%d)." % (self.at_change_id))
def statfs(self, filepath): """Return filesystem status info (for df). The given file-path seems to always be '/'. REF: http://www.ibm.com/developerworks/linux/library/l-fuse/ REF: http://stackoverflow.com/questions/4965355/converting-statvfs-to-percentage-free-correctly """ block_size = 512 try: account_info = AccountInfo.get_instance() total = account_info.quota_bytes_total / block_size used = account_info.quota_bytes_used / block_size free = total - used except: self.__log.exception("Could not get account-info.") raise FuseOSError(EIO) return { # Optimal transfer block size. 'f_bsize': block_size, # Total data blocks in file system. 'f_blocks': total, # Fragment size. 'f_frsize': block_size, # Free blocks in filesystem. 'f_bfree': free, # Free blocks avail to non-superuser. 'f_bavail': free # Total file nodes in filesystem. # 'f_files': 0, # Free file nodes in filesystem. # 'f_ffree': 0, # Free inodes for unprivileged users. # 'f_favail': 0 }
def __find_path_components(self, path): """Given a path, return a list of all Google Drive entries that comprise each component, or as many as can be found. As we've ensured that all sibling filenames are unique, there can not be multiple matches. """ self.__log.debug("Searching for path components of [%s]. Now " "resolving entry_clause." % (path)) if path[0] == '/': path = path[1:] if len(path) and path[-1] == '/': path = path[:-1] if path in self.path_cache: return self.path_cache[path] with PathRelations.rlock: self.__log.debug("Locating entry information for path [%s]." % (path)) try: root_id = AccountInfo.get_instance().root_id except: self.__log.exception("Could not get root-ID.") raise # Ensure that the root node is loaded. try: self.__get_entry_clause_by_id(root_id) except: self.__log.exception("Could not ensure root-node with entry-ID " "[%s]." % (root_id)) raise path_parts = path.split('/') entry_ptr = root_id parent_id = None i = 0 num_parts = len(path_parts) results = [ ] while i < num_parts: child_filename_to_search_fs = get_utility(). \ translate_filename_charset(path_parts[i]) self.__log.debug("Checking for part (%d) [%s] under parent with " "ID [%s]." % (i, child_filename_to_search_fs, entry_ptr)) try: current_clause = self.entry_ll[entry_ptr] except: # TODO: If entry with ID entry_ptr is not registered, update # children of parent parent_id. Throttle how often this # happens. self.__log.exception("Could not find current subdirectory. " "ENTRY_ID= [%s]" % (entry_ptr)) raise # Search this entry's children for the next filename further down # in the path among this entry's children. Any duplicates should've # already beeen handled as entries were stored. We name the variable # just to emphasize that no ambiguity -as well as- no error will # occur in the traversal process. first_matching_child_clause = None children = current_clause[2] # If they just wanted the "" path (root), return the root-ID. if path == "": found = [ root_id ] else: # self.__log.debug("Looking for child [%s] among (%d): %s" % # (child_filename_to_search_fs, len(children), # [ child_tuple[0] for child_tuple # in children ])) found = [ child_tuple[1][3] for child_tuple in children if child_tuple[0] == child_filename_to_search_fs ] if found: self.__log.debug("Found matching child with ID [%s]." % (found[0])) results.append(found[0]) else: self.__log.debug("Did not find matching child.") return (results, path_parts, False) # Have we traveled far enough into the linked list? if (i + 1) >= num_parts: self.__log.debug("Path has been completely resolved: %s" % (', '.join(results))) self.path_cache[path] = (results, path_parts, True) final_entry_id = results[-1] self.path_cache_byid[final_entry_id] = path return self.path_cache[path] parent_id = entry_ptr entry_ptr = found[0] i += 1
def find_path_components_goandget(self, path): """Do the same thing that find_path_components() does, except that when we don't have record of a path-component, try to go and find it among the children of the previous path component, and then try again. """ with PathRelations.rlock: previous_results = [] i = 0 while 1: self.__log.info("Attempting to find path-components (go and " "get) for path [%s]. CYCLE= (%d)" % (path, i)) # See how many components can be found in our current cache. try: result = self.__find_path_components(path) except: self.__log.exception("There was a problem doing an " "iteration of find_path_components() " "on [%s]." % (path)) raise self.__log.debug("Path resolution cycle (%d) results: %s" % (i, result)) # If we could resolve the entire path, return success. self.__log.debug("Found within current cache? %s" % (result[2])) if result[2] == True: return result # If we could not resolve the entire path, and we're no more # successful than a prior attempt, we'll just have to return a # partial. num_results = len(result[0]) if num_results in previous_results: self.__log.debug("We couldn't improve our results. This " "path most likely does not exist.") return result previous_results.append(num_results) self.__log.debug("(%d) path-components were found, but not " "all." % (num_results)) # Else, we've encountered a component/depth of the path that we # don't currently know about. # TODO: This is going to be the general area that we'd have to adjust to # support multiple, identical entries. This currently only considers the # first result. We should rewrite this to be recursive in order to make # it easier to keep track of a list of results. # The parent is the last one found, or the root if none. parent_id = result[0][num_results - 1] \ if num_results \ else AccountInfo.get_instance().root_id # The child will be the first part that was not found. child_name = result[1][num_results] self.__log.debug("Trying to reconcile child named [%s] under " "folder with entry-ID [%s]." % (child_name, parent_id)) try: children = drive_proxy('list_files', parent_id=parent_id, query_is_string=child_name) except: self.__log.exception("Could not retrieve children for " "parent with ID [%s]." % (parent_id)) raise for child in children: try: self.register_entry(child) except: self.__log.exception("Could not register child entry " "for entry with ID [%s] in path-" "cache." % (child.id)) raise filenames_phrase = ', '.join([ candidate.id for candidate in children ]) self.__log.debug("(%d) candidate children were found: %s" % (len(children), filenames_phrase)) i += 1
class EntryCache(CacheClientBase): """Manages our knowledge of file entries.""" __log = None about = AccountInfo.get_instance() def __init__(self): self.__log = logging.getLogger().getChild('EntryCache') CacheClientBase.__init__(self) def __get_entries_to_update(self, requested_entry_id): # Get more entries than just what was requested, while we're at it. parent_ids = drive_proxy('get_parents_containing_id', child_id=requested_entry_id) # self.__log.debug("Found (%d) parents.", len(parent_ids)) affected_entries = [ requested_entry_id ] considered_entries = { } max_readahead_entries = Conf.get('max_readahead_entries') for parent_id in parent_ids: # self.__log.debug("Retrieving children for parent with ID [%s].", # parent_id) child_ids = drive_proxy('get_children_under_parent_id', parent_id=parent_id) # self.__log.debug("(%d) children found under parent with ID [%s].", # len(child_ids), parent_id) for child_id in child_ids: if child_id == requested_entry_id: continue # We've already looked into this entry. try: considered_entries[child_id] continue except: pass considered_entries[child_id] = True # Is it already cached? if self.cache.exists(child_id): continue affected_entries.append(child_id) if len(affected_entries) >= max_readahead_entries: break return affected_entries def __do_update_for_missing_entry(self, requested_entry_id): # Get the entries to update. affected_entries = self.__get_entries_to_update(requested_entry_id) # Read the entries, now. # self.__log.debug("(%d) primary and secondary entry/entries will be " # "updated." % (len(affected_entries))) # TODO: We have to determine when this is called, and either remove it # (if it's not), or find another way to not have to load them # individually. retrieved = drive_proxy('get_entries', entry_ids=affected_entries) # Update the cache. path_relations = PathRelations.get_instance() for entry_id, entry in retrieved.iteritems(): path_relations.register_entry(entry) # self.__log.debug("(%d) entries were loaded.", len(retrieved)) return retrieved def fault_handler(self, resource_name, requested_entry_id): """A requested entry wasn't stored.""" # self.__log.debug("EntryCache has faulted on entry with ID [%s].", # requested_entry_id) retrieved = self.__do_update_for_missing_entry(requested_entry_id) # Return the requested entry. return retrieved[requested_entry_id] def cleanup_pretrigger(self, resource_name, entry_id, force): """The core entry cache has a clean-up process that will remove old " entries. This is called just before any record is removed. """ # Now that the local cache-item has been removed, remove the same from # the PathRelations cache. path_relations = PathRelations.get_instance() if path_relations.is_cached(entry_id): # self.__log.debug("Removing PathRelations entry for cleaned-up entry " # "with ID [%s]." % (entry_id)) path_relations.remove_entry_recursive(entry_id) def get_max_cache_age_seconds(self): return Conf.get('cache_entries_max_age')
def __find_path_components(self, path): """Given a path, return a list of all Google Drive entries that comprise each component, or as many as can be found. As we've ensured that all sibling filenames are unique, there can not be multiple matches. """ # self.__log.debug("Searching for path components of [%s]. Now " # "resolving entry_clause." % (path)) if path[0] == '/': path = path[1:] if len(path) and path[-1] == '/': path = path[:-1] if path in self.path_cache: return self.path_cache[path] with PathRelations.rlock: # self.__log.debug("Locating entry information for path [%s].", path) root_id = AccountInfo.get_instance().root_id # Ensure that the root node is loaded. self.__get_entry_clause_by_id(root_id) path_parts = path.split('/') entry_ptr = root_id parent_id = None i = 0 num_parts = len(path_parts) results = [ ] while i < num_parts: child_filename_to_search_fs = utility. \ translate_filename_charset(path_parts[i]) # self.__log.debug("Checking for part (%d) [%s] under parent " # "with ID [%s].", # i, child_filename_to_search_fs, entry_ptr) current_clause = self.entry_ll[entry_ptr] # Search this entry's children for the next filename further down # in the path among this entry's children. Any duplicates should've # already beeen handled as entries were stored. We name the variable # just to emphasize that no ambiguity -as well as- no error will # occur in the traversal process. first_matching_child_clause = None children = current_clause[2] # If they just wanted the "" path (root), return the root-ID. if path == "": found = [ root_id ] else: # self.__log.debug("Looking for child [%s] among (%d): %s" % # (child_filename_to_search_fs, len(children), # [ child_tuple[0] for child_tuple # in children ])) found = [ child_tuple[1][3] for child_tuple in children if child_tuple[0] == child_filename_to_search_fs ] if found: # self.__log.debug("Found matching child with ID [%s]." % (found[0])) results.append(found[0]) else: # self.__log.debug("Did not find matching child.") return (results, path_parts, False) # Have we traveled far enough into the linked list? if (i + 1) >= num_parts: # self.__log.debug("Path has been completely resolved: %s" % (', '.join(results))) self.path_cache[path] = (results, path_parts, True) final_entry_id = results[-1] self.path_cache_byid[final_entry_id] = path return self.path_cache[path] parent_id = entry_ptr entry_ptr = found[0] i += 1
def __init__(self): self.at_change_id = AccountInfo.get_instance().largest_change_id _logger.debug("Latest change-ID at startup is (%d)." % (self.at_change_id))
def __find_path_components(self, path): """Given a path, return a list of all Google Drive entries that comprise each component, or as many as can be found. As we've ensured that all sibling filenames are unique, there can not be multiple matches. """ if path[0] == '/': path = path[1:] if len(path) and path[-1] == '/': path = path[:-1] if path in self.path_cache: return self.path_cache[path] with PathRelations.rlock: # self.__log.debug("Locating entry information for path [%s].", path) root_id = AccountInfo.get_instance().root_id # Ensure that the root node is loaded. self.__get_entry_clause_by_id(root_id) path_parts = path.split('/') entry_ptr = root_id parent_id = None i = 0 num_parts = len(path_parts) results = [ ] while i < num_parts: child_filename_to_search_fs = utility. \ translate_filename_charset(path_parts[i]) # self.__log.debug("Checking for part (%d) [%s] under parent " # "with ID [%s].", # i, child_filename_to_search_fs, entry_ptr) current_clause = self.entry_ll[entry_ptr] # Search this entry's children for the next filename further down # in the path among this entry's children. Any duplicates should've # already beeen handled as entries were stored. We name the variable # just to emphasize that no ambiguity -as well as- no error will # occur in the traversal process. first_matching_child_clause = None children = current_clause[2] # If they just wanted the "" path (root), return the root-ID. if path == "": found = [ root_id ] else: found = [ child_tuple[1][3] for child_tuple in children if child_tuple[0] == child_filename_to_search_fs ] if found: results.append(found[0]) else: return (results, path_parts, False) # Have we traveled far enough into the linked list? if (i + 1) >= num_parts: self.path_cache[path] = (results, path_parts, True) final_entry_id = results[-1] self.path_cache_byid[final_entry_id] = path return self.path_cache[path] parent_id = entry_ptr entry_ptr = found[0] i += 1
def find_path_components_goandget(self, path): """Do the same thing that find_path_components() does, except that when we don't have record of a path-component, try to go and find it among the children of the previous path component, and then try again. """ gd = get_gdrive() with PathRelations.rlock: previous_results = [] i = 0 while True: # self.__log.debug("Attempting to find path-components (go and " # "get) for path [%s]. CYCLE= (%d)", path, i) # See how many components can be found in our current cache. result = self.__find_path_components(path) # If we could resolve the entire path, return success. if result[2] == True: return result # If we could not resolve the entire path, and we're no more # successful than a prior attempt, we'll just have to return a # partial. num_results = len(result[0]) if num_results in previous_results: return result previous_results.append(num_results) # Else, we've encountered a component/depth of the path that we # don't currently know about. # TODO: This is going to be the general area that we'd have to adjust to # support multiple, identical entries. This currently only considers the # first result. We should rewrite this to be recursive in order to make # it easier to keep track of a list of results. # The parent is the last one found, or the root if none. parent_id = result[0][num_results - 1] \ if num_results \ else AccountInfo.get_instance().root_id # The child will be the first part that was not found. child_name = result[1][num_results] children = gd.list_files( parent_id=parent_id, query_is_string=child_name) for child in children: self.register_entry(child) filenames_phrase = ', '.join([ candidate.id for candidate in children ]) # self.__log.debug("(%d) candidate children were found: %s", # len(children), filenames_phrase) i += 1
def find_path_components_goandget(self, path): """Do the same thing that find_path_components() does, except that when we don't have record of a path-component, try to go and find it among the children of the previous path component, and then try again. """ with PathRelations.rlock: previous_results = [] i = 0 while 1: self.__log.info("Attempting to find path-components (go and " "get) for path [%s]. CYCLE= (%d)" % (path, i)) # See how many components can be found in our current cache. try: result = self.__find_path_components(path) except: self.__log.exception("There was a problem doing an " "iteration of find_path_components() " "on [%s]." % (path)) raise self.__log.debug("Path resolution cycle (%d) results: %s" % (i, result)) # If we could resolve the entire path, return success. self.__log.debug("Found within current cache? %s" % (result[2])) if result[2] == True: return result # If we could not resolve the entire path, and we're no more # successful than a prior attempt, we'll just have to return a # partial. num_results = len(result[0]) if num_results in previous_results: self.__log.debug("We couldn't improve our results. This " "path most likely does not exist.") return result previous_results.append(num_results) self.__log.debug("(%d) path-components were found, but not " "all." % (num_results)) # Else, we've encountered a component/depth of the path that we # don't currently know about. # TODO: This is going to be the general area that we'd have to adjust to # support multiple, identical entries. This currently only considers the # first result. We should rewrite this to be recursive in order to make # it easier to keep track of a list of results. # The parent is the last one found, or the root if none. parent_id = result[0][num_results - 1] \ if num_results \ else AccountInfo.get_instance().root_id # The child will be the first part that was not found. child_name = result[1][num_results] self.__log.debug("Trying to reconcile child named [%s] under " "folder with entry-ID [%s]." % (child_name, parent_id)) try: children = drive_proxy('list_files', parent_id=parent_id, query_is_string=child_name) except: self.__log.exception("Could not retrieve children for " "parent with ID [%s]." % (parent_id)) raise for child in children: try: self.register_entry(child) except: self.__log.exception("Could not register child entry " "for entry with ID [%s] in path-" "cache." % (child.id)) raise filenames_phrase = ', '.join( [candidate.id for candidate in children]) self.__log.debug("(%d) candidate children were found: %s" % (len(children), filenames_phrase)) i += 1