def __build_stat_from_entry(self, entry): (uid, gid, pid) = fuse_get_context() if entry.is_directory: effective_permission = int(Conf.get("default_perm_folder"), 8) elif entry.editable: effective_permission = int(Conf.get("default_perm_file_editable"), 8) else: effective_permission = int(Conf.get("default_perm_file_noneditable"), 8) stat_result = { "st_mtime": entry.modified_date_epoch, # modified time. "st_ctime": entry.modified_date_epoch, # changed time. "st_atime": time(), "st_uid": uid, "st_gid": gid, } if entry.is_directory: # Per http://sourceforge.net/apps/mediawiki/fuse/index.php?title=SimpleFilesystemHowto, # default size should be 4K. # TODO(dustin): Should we just make this (0), since that's what it is? stat_result["st_size"] = 1024 * 4 stat_result["st_mode"] = stat.S_IFDIR | effective_permission stat_result["st_nlink"] = 2 else: stat_result["st_size"] = DisplacedFile.file_size if entry.requires_mimetype else entry.file_size stat_result["st_mode"] = stat.S_IFREG | effective_permission stat_result["st_nlink"] = 1 stat_result["st_blocks"] = int(math.ceil(float(stat_result["st_size"]) / 512.0)) return stat_result
def __init__(self): cache_filepath = Conf.get('auth_cache_filepath') api_credentials = Conf.get('api_credentials') self.cache_filepath = cache_filepath self.credentials = None with NamedTemporaryFile() as f: json.dump(api_credentials, f) f.flush() self.flow = flow_from_clientsecrets(f.name, scope=self.__get_scopes(), redirect_uri=OOB_CALLBACK_URN)
def __init__(self): self.__log = logging.getLogger().getChild('OauthAuth') cache_filepath = Conf.get('auth_cache_filepath') api_credentials = Conf.get('api_credentials') self.cache_filepath = cache_filepath with NamedTemporaryFile() as f: json.dump(api_credentials, f) f.flush() self.flow = flow_from_clientsecrets(f.name, scope=self.__get_scopes(), redirect_uri=OOB_CALLBACK_URN)
def get_client(self): if self.__client is None: authed_http = self.get_authed_http() # Build a client from the passed discovery document path discoveryUrl = Conf.get('google_discovery_service_url') # TODO: We should cache this, since we have, so often, had a problem # retrieving it. If there's no other way, grab it directly, and then pass # via a file:// URI. try: client = build(_CONF_SERVICE_NAME, _CONF_SERVICE_VERSION, http=authed_http, discoveryServiceUrl=discoveryUrl) except HttpError as e: # We've seen situations where the discovery URL's server is down, # with an alternate one to be used. # # An error here shouldn't leave GDFS in an unstable state (the # current command should just fail). Hoepfully, the failure is # momentary, and the next command succeeds. _logger.exception("There was an HTTP response-code of (%d) while " "building the client with discovery URL [%s].", e.resp.status, discoveryUrl) raise self.__client = client return self.__client
def get_client(self): if self.__client is None: authed_http = self.get_authed_http() # Build a client from the passed discovery document path discoveryUrl = Conf.get('google_discovery_service_url') # TODO: We should cache this, since we have, so often, had a problem # retrieving it. If there's no other way, grab it directly, and then pass # via a file:// URI. try: client = build(_CONF_SERVICE_NAME, _CONF_SERVICE_VERSION, http=authed_http, discoveryServiceUrl=discoveryUrl) except HttpError as e: # We've seen situations where the discovery URL's server is down, # with an alternate one to be used. # # An error here shouldn't leave GDFS in an unstable state (the # current command should just fail). Hoepfully, the failure is # momentary, and the next command succeeds. _logger.exception( "There was an HTTP response-code of (%d) while " "building the client with discovery URL [%s].", e.resp.status, discoveryUrl) raise self.__client = client return self.__client
def deposit_file(self, mime_type): """Write the file to a temporary path, and present a stub (JSON) to the user. This is the only way of getting files that don't have a well-defined filesize without providing a type, ahead of time. """ temp_path = Conf.get('file_download_temp_path') file_path = ("%s/displaced/%s.%s" % (temp_path, self.__normalized_entry.title, mime_type.replace('/', '+'))) try: result = drive_proxy('download_to_local', output_file_path=file_path, normalized_entry=self.__normalized_entry, mime_type=mime_type) (length, cache_fault) = result except: self.__log.exception("Could not localize displaced file with " "entry having ID [%s]." % (self.__normalized_entry.id)) raise self.__log.debug("Displaced entry [%s] deposited to [%s] with length " "(%d)." % (self.__normalized_entry, file_path, length)) try: return self.get_stub(mime_type, length, file_path) except: self.__log.exception("Could not build stub for [%s]." % (self.__normalized_entry)) raise
def __check_changes(self): _logger.info("Change-processing thread running.") interval_s = Conf.get('change_check_frequency_s') cm = get_change_manager() while self.__t_quit_ev.is_set() is False and \ gdrivefs.state.GLOBAL_EXIT_EVENT.is_set() is False: _logger.debug("Checking for changes.") try: is_done = cm.process_updates() except: _logger.exception("Squelching an exception that occurred " "while reading/processing changes.") # Force another check, soon. is_done = False # If there are still more changes, take them as quickly as # possible. if is_done is True: _logger.debug("No more changes. Waiting.") time.sleep(interval_s) else: _logger.debug("There are more changes to be applied. Cycling " "immediately.") _logger.info("Change-processing thread terminating.")
def get_temp_filepath(normalized_entry, mime_type): temp_filename = ("%s.%s" % (normalized_entry.id, mime_type.replace('/', '+'))).\ encode('ascii') temp_path = Conf.get('file_download_temp_path') return ("%s/local/%s" % (temp_path, temp_filename))
def is_visible(self): if [ flag for flag, value in self.labels.items() if flag in Conf.get('hidden_flags_list_local') and value ]: return False else: return True
def __get_entries_to_update(self, requested_entry_id): # Get more entries than just what was requested, while we're at it. try: parent_ids = drive_proxy('get_parents_containing_id', child_id=requested_entry_id) except: self.__log.exception( "Could not retrieve parents for child with ID " "[%s]." % (requested_entry_id)) raise self.__log.debug("Found (%d) parents." % (len(parent_ids))) affected_entries = [requested_entry_id] considered_entries = {} max_readahead_entries = Conf.get('max_readahead_entries') for parent_id in parent_ids: self.__log.debug("Retrieving children for parent with ID [%s]." % (parent_id)) try: child_ids = drive_proxy('get_children_under_parent_id', parent_id=parent_id) except: self.__log.exception( "Could not retrieve children for parent with" " ID [%s]." % (requested_entry_id)) raise self.__log.debug("(%d) children found under parent with ID [%s]." % (len(child_ids), parent_id)) for child_id in child_ids: if child_id == requested_entry_id: continue # We've already looked into this entry. try: considered_entries[child_id] continue except: pass considered_entries[child_id] = True # Is it already cached? if self.cache.exists(child_id): continue affected_entries.append(child_id) if len(affected_entries) >= max_readahead_entries: break return affected_entries
def __create(self, filepath, mode=None): """Create a new file. We don't implement "mode" (permissions) because the model doesn't agree with GD. """ # TODO: Fail if it already exists. try: result = split_path(filepath, path_resolver) (parent_clause, path, filename, mime_type, is_hidden) = result except GdNotFoundError: _logger.exception("Could not process [%s] (i-create).", filepath) raise FuseOSError(ENOENT) except: _logger.exception("Could not split path [%s] (i-create).", filepath) raise FuseOSError(EIO) distilled_filepath = build_filepath(path, filename) # Try to guess at a mime-type, if not otherwise given. if mime_type is None: (mimetype_guess, _) = guess_type(filename, True) if mimetype_guess is not None: mime_type = mimetype_guess else: mime_type = Conf.get('default_mimetype') gd = get_gdrive() try: entry = gd.create_file( filename, [parent_clause[3]], mime_type, is_hidden=is_hidden) except: _logger.exception("Could not create empty file [%s] under " "parent with ID [%s].", filename, parent_clause[3]) raise FuseOSError(EIO) path_relations = PathRelations.get_instance() try: path_relations.register_entry(entry) except: _logger.exception("Could not register created file in cache.") raise FuseOSError(EIO) _logger.info("Inner-create of [%s] completed.", distilled_filepath) return (entry, path, filename, mime_type)
def __emit_log(self): for source_name, source_data in self.data.iteritems(): pairs = [("%s= [%s]" % (k, v)) for k, v in source_data.iteritems()] logging.info("RPT EMIT(%s): %s" % (source_name, ', '.join(pairs))) report_emit_interval_s = Conf.get('report_emit_frequency_s') emit_timer = Timer(report_emit_interval_s, self.__emit_log) Timers.get_instance().register_timer('emit', emit_timer)
def create_directory(self, filename, parents, **kwargs): mimetype_directory = Conf.get('directory_mimetype') return self.__insert_entry( False, filename, parents, mimetype_directory, **kwargs)
def __get_entries_to_update(self, requested_entry_id): # Get more entries than just what was requested, while we're at it. try: parent_ids = drive_proxy('get_parents_containing_id', child_id=requested_entry_id) except: self.__log.exception("Could not retrieve parents for child with ID " "[%s]." % (requested_entry_id)) raise self.__log.debug("Found (%d) parents." % (len(parent_ids))) affected_entries = [ requested_entry_id ] considered_entries = { } max_readahead_entries = Conf.get('max_readahead_entries') for parent_id in parent_ids: self.__log.debug("Retrieving children for parent with ID [%s]." % (parent_id)) try: child_ids = drive_proxy('get_children_under_parent_id', parent_id=parent_id) except: self.__log.exception("Could not retrieve children for parent with" " ID [%s]." % (requested_entry_id)) raise self.__log.debug("(%d) children found under parent with ID [%s]." % (len(child_ids), parent_id)) for child_id in child_ids: if child_id == requested_entry_id: continue # We've already looked into this entry. try: considered_entries[child_id] continue except: pass considered_entries[child_id] = True # Is it already cached? if self.cache.exists(child_id): continue affected_entries.append(child_id) if len(affected_entries) >= max_readahead_entries: break return affected_entries
def __build_stat_from_entry(self, entry): (uid, gid, pid) = fuse_get_context() block_size_b = gdrivefs.config.fs.CALCULATION_BLOCK_SIZE if entry.is_directory: effective_permission = \ int(Conf.get('default_perm_folder'), 8) elif entry.editable: effective_permission = \ int(Conf.get('default_perm_file_editable'), 8) else: effective_permission = \ int(Conf.get('default_perm_file_noneditable'), 8) stat_result = { "st_mtime": entry.modified_date_epoch, # modified time. "st_ctime": entry.modified_date_epoch, # changed time. "st_atime": time(), "st_uid": uid, "st_gid": gid, } if entry.is_directory: # Per http://sourceforge.net/apps/mediawiki/fuse/index.php?title=SimpleFilesystemHowto, # default size should be 4K. # TODO(dustin): Should we just make this (0), since that's what it is? stat_result["st_size"] = 1024 * 4 stat_result["st_mode"] = (stat.S_IFDIR | effective_permission) stat_result["st_nlink"] = 2 else: if entry.requires_mimetype: stat_result["st_size"] = DisplacedFile.file_size else: stat_result["st_size"] = entry.file_size stat_result["st_mode"] = (stat.S_IFREG | effective_permission) stat_result["st_nlink"] = 1 stat_result["st_blocks"] = \ int(math.ceil(float(stat_result["st_size"]) / block_size_b)) return stat_result
def _sched_check_changes(): logging.debug("Doing scheduled check for changes.") get_change_manager().process_updates() # Schedule next invocation. t = Timer(Conf.get('change_check_frequency_s'), _sched_check_changes) Timers.get_instance().register_timer('change', t)
def __create(self, filepath, mode=None): """Create a new file. We don't implement "mode" (permissions) because the model doesn't agree with GD. """ # TODO: Fail if it already exists. try: result = split_path(filepath, path_resolver) (parent_clause, path, filename, mime_type, is_hidden) = result except GdNotFoundError: _logger.exception("Could not process [%s] (i-create).", filepath) raise FuseOSError(ENOENT) except: _logger.exception("Could not split path [%s] (i-create).", filepath) raise FuseOSError(EIO) distilled_filepath = build_filepath(path, filename) # Try to guess at a mime-type, if not otherwise given. if mime_type is None: (mimetype_guess, _) = guess_type(filename, True) if mimetype_guess is not None: mime_type = mimetype_guess else: mime_type = Conf.get('default_mimetype') gd = get_gdrive() try: entry = gd.create_file(filename, [parent_clause[3]], mime_type, is_hidden=is_hidden) except: _logger.exception( "Could not create empty file [%s] under " "parent with ID [%s].", filename, parent_clause[3]) raise FuseOSError(EIO) path_relations = PathRelations.get_instance() try: path_relations.register_entry(entry) except: _logger.exception("Could not register created file in cache.") raise FuseOSError(EIO) _logger.info("Inner-create of [%s] completed.", distilled_filepath) return (entry, path, filename, mime_type)
def getattr(self, raw_path, fh=None): """Return a stat() structure.""" # TODO: Implement handle. (entry, path, filename) = self.__get_entry_or_raise(raw_path) (uid, gid, pid) = fuse_get_context() self.__log.debug("Context: UID= (%d) GID= (%d) PID= (%d)" % (uid, gid, pid)) if entry.is_directory: effective_permission = int(Conf.get('default_perm_folder'), 8) elif entry.editable: effective_permission = int(Conf.get('default_perm_file_editable'), 8) else: effective_permission = int( Conf.get('default_perm_file_noneditable'), 8) stat_result = { "st_mtime": entry.modified_date_epoch, # modified time. "st_ctime": entry.modified_date_epoch, # changed time. "st_atime": time(), "st_uid": uid, "st_gid": gid } if entry.is_directory: # Per http://sourceforge.net/apps/mediawiki/fuse/index.php?title=SimpleFilesystemHowto, # default size should be 4K. stat_result["st_size"] = 1024 * 4 stat_result["st_mode"] = (stat.S_IFDIR | effective_permission) stat_result["st_nlink"] = 2 else: stat_result["st_size"] = DisplacedFile.file_size \ if entry.requires_mimetype \ else entry.file_size stat_result["st_mode"] = (stat.S_IFREG | effective_permission) stat_result["st_nlink"] = 1 return stat_result
def __emit_log(self): for source_name, source_data in self.data.iteritems(): pairs = [ ("%s= [%s]" % (k, v)) for k, v in source_data.iteritems() ] logging.info("RPT EMIT(%s): %s" % (source_name, ', '.join(pairs))) report_emit_interval_s = Conf.get('report_emit_frequency_s') emit_timer = Timer(report_emit_interval_s, self.__emit_log) Timers.get_instance().register_timer('emit', emit_timer)
def __post_status(self): """Send the current status to our reporting tool.""" num_values = self.registry.count(self.resource_name) self.report.set_values(self.report_source_name, 'count', num_values) status_post_interval_s = Conf.get('cache_status_post_frequency_s') status_timer = Timer(status_post_interval_s, self.__post_status) Timers.get_instance().register_timer('status', status_timer)
def rename(self, normalized_entry, new_filename): # TODO: It doesn't seem as if the created file is being registered. # Even though we're supposed to provide an extension, we can get away # without having one. We don't want to impose this when acting like a # normal FS. # If no data and no mime-type was given, default it. if mime_type == None: mime_type = Conf.get('file_default_mime_type') self.__log.debug("No mime-type was presented for file create/update. " "Defaulting to [%s]." % (mime_type)) return self.__insert_entry(filename=filename, data_filepath=data_filepath, mime_type=mime_type, **kwargs)
def __load_mappings(self): # Allow someone to override our default mappings of the GD types. gd_to_normal_mapping_filepath = \ Conf.get('gd_to_normal_mapping_filepath') try: with open(gd_to_normal_mapping_filepath, 'r') as f: self.gd_to_normal_mime_mappings.extend(json.load(f)) except: logging.info("No mime-mapping was found.") # Allow someone to set file-extensions for mime-types, and not rely on # Python's educated guesses. extension_mapping_filepath = Conf.get('extension_mapping_filepath') try: with open(extension_mapping_filepath, 'r') as f: self.default_extensions.extend(json.load(f)) except: logging.info("No extension-mapping was found.")
def getattr(self, raw_path, fh=None): """Return a stat() structure.""" # TODO: Implement handle. (entry, path, filename) = self.__get_entry_or_raise(raw_path) (uid, gid, pid) = fuse_get_context() self.__log.debug("Context: UID= (%d) GID= (%d) PID= (%d)" % (uid, gid, pid)) if entry.is_directory: effective_permission = int(Conf.get('default_perm_folder'), 8) elif entry.editable: effective_permission = int(Conf.get('default_perm_file_editable'), 8) else: effective_permission = int(Conf.get('default_perm_file_noneditable'), 8) stat_result = { "st_mtime": entry.modified_date_epoch, # modified time. "st_ctime": entry.modified_date_epoch, # changed time. "st_atime": time(), "st_uid": uid, "st_gid": gid } if entry.is_directory: # Per http://sourceforge.net/apps/mediawiki/fuse/index.php?title=SimpleFilesystemHowto, # default size should be 4K. # TODO(dustin): Should we just make this (0), since that's what it is? stat_result["st_size"] = 1024 * 4 stat_result["st_mode"] = (stat.S_IFDIR | effective_permission) stat_result["st_nlink"] = 2 else: stat_result["st_size"] = DisplacedFile.file_size \ if entry.requires_mimetype \ else entry.file_size stat_result["st_mode"] = (stat.S_IFREG | effective_permission) stat_result["st_nlink"] = 1 return stat_result
def __cleanup_check(self): """Scan the current cache and determine items old-enough to be removed. """ self.__log.debug("Doing clean-up for cache resource with name [%s]." % (self.resource_name)) try: cache_dict = self.registry.list_raw(self.resource_name) except: self.__log.exception("Could not do clean-up check with resource-" "name [%s]." % (self.resource_name)) raise total_keys = [ (key, value_tuple[1]) for key, value_tuple \ in cache_dict.iteritems() ] cleanup_keys = [ key for key, value_tuple \ in cache_dict.iteritems() \ if (datetime.now() - value_tuple[1]).seconds > \ self.max_age ] self.__log.info("Found (%d) entries to clean-up from entry-cache." % (len(cleanup_keys))) if cleanup_keys: for key in cleanup_keys: self.__log.debug("Cache entry [%s] under resource-name [%s] " "will be cleaned-up." % (key, self.resource_name)) if self.exists(key, no_fault_check=True) == False: self.__log.debug("Entry with ID [%s] has already been " "cleaned-up." % (key)) else: try: self.remove(key) except: self.__log.exception("Cache entry [%s] under resource-" "name [%s] could not be cleaned-" "up." % (key, self.resource_name)) raise self.__log.debug("Scheduled clean-up complete.") cleanup_interval_s = Conf.get('cache_cleanup_check_frequency_s') cleanup_timer = Timer(cleanup_interval_s, self.__cleanup_check) Timers.get_instance().register_timer('cleanup', cleanup_timer)
def list_files(self, query_contains_string=None, query_is_string=None, parent_id=None): self.__log.info("Listing all files.") try: client = self.get_client() except: self.__log.exception("There was an error while acquiring the Google " "Drive client (list_files).") raise query_components = [ ] if parent_id: query_components.append("'%s' in parents" % (parent_id)) if query_is_string: query_components.append("title='%s'" % (query_is_string.replace("'", "\\'"))) elif query_contains_string: query_components.append("title contains '%s'" % (query_contains_string.replace("'", "\\'"))) # Make sure that we don't get any entries that we would have to ignore. hidden_flags = Conf.get('hidden_flags_list_remote') if hidden_flags: for hidden_flag in hidden_flags: query_components.append("%s = false" % (hidden_flag)) query = ' and '.join(query_components) if query_components else None try: result = client.files().list(q=query).execute() except: self.__log.exception("Could not get the list of files.") raise entries = [] for entry_raw in result[u'items']: try: entry = NormalEntry('list_files', entry_raw) except: self.__log.exception("Could not normalize raw-data for entry with" " ID [%s]." % (entry_raw[u'id'])) raise entries.append(entry) return entries
def _sched_check_changes(): logging.debug("Doing scheduled check for changes.") try: get_change_manager().process_updates() logging.debug("Updates have been processed. Rescheduling.") # Schedule next invocation. t = Timer(Conf.get('change_check_frequency_s'), _sched_check_changes) Timers.get_instance().register_timer('change', t) except: _logger.exception("Exception while managing changes.") raise
def get_client(self): if self.client != None: return self.client try: authed_http = self.get_authed_http() except: self.__log.exception("Could not get authed Http instance.") raise self.__log.info("Building authorized client from Http. TYPE= [%s]" % (type(authed_http))) # Build a client from the passed discovery document path discoveryUrl = Conf.get('google_discovery_service_url') # TODO: We should cache this, since we have, so often, having a problem # retrieving it. If there's no other way, grab it directly, and then pass # via a file:// URI. try: client = build(self.conf_service_name, self.conf_service_version, http=authed_http, discoveryServiceUrl=discoveryUrl) except HttpError as e: # We've seen situations where the discovery URL's server is down, # with an alternate one to be used. # # An error here shouldn't leave GDFS in an unstable state (the # current command should just fail). Hoepfully, the failure is # momentary, and the next command succeeds. logging.exception("There was an HTTP response-code of (%d) while " "building the client with discovery URL [%s]." % (e.resp.status, discoveryUrl)) raise except: raise self.client = client return self.client
def create_file(self, filename, data_filepath, parents, mime_type=None, **kwargs): # TODO: It doesn't seem as if the created file is being registered. # Even though we're supposed to provide an extension, we can get away # without having one. We don't want to impose this when acting like a # normal FS. # If no data and no mime-type was given, default it. if mime_type == None: mime_type = Conf.get('file_default_mime_type') self.__log.debug("No mime-type was presented for file " "create/update. Defaulting to [%s]." % (mime_type)) return self.__insert_entry(filename, mime_type, parents, data_filepath, **kwargs)
def __cleanup(self): """Scan the current cache and determine items old-enough to be removed. """ cleanup_interval_s = Conf.get('cache_cleanup_check_frequency_s') _logger.info("Cache-cleanup thread running: %s", self) while self.__t_quit_ev.is_set() is False and \ gdrivefs.state.GLOBAL_EXIT_EVENT.is_set() is False: _logger.debug("Doing clean-up for cache resource with name [%s]." % (self.resource_name)) cache_dict = self.registry.list_raw(self.resource_name) total_keys = [ (key, value_tuple[1]) for key, value_tuple \ in list(cache_dict.items()) ] cleanup_keys = [ key for key, value_tuple \ in list(cache_dict.items()) \ if (datetime.datetime.now() - value_tuple[1]).seconds > \ self.max_age ] _logger.debug("Found (%d) entries to clean-up from entry-cache." % (len(cleanup_keys))) if cleanup_keys: for key in cleanup_keys: _logger.debug("Cache entry [%s] under resource-name [%s] " "will be cleaned-up." % (key, self.resource_name)) if self.exists(key, no_fault_check=True) == False: _logger.debug("Entry with ID [%s] has already been " "cleaned-up." % (key)) else: self.remove(key) else: _logger.debug("No cache-cleanup required.") time.sleep(cleanup_interval_s) _logger.info("Cache-cleanup thread terminating: %s", self)
def __post_status(self): """Send the current status to our reporting tool.""" try: num_values = self.registry.count(self.resource_name) except: self.__log.exception("Could not get count of values for resource " "with name [%s]." % (self.resource_name)) raise try: self.report.set_values(self.report_source_name, 'count', num_values) except: self.__log.exception("Cache could not post status for resource " "with name [%s]." % (self.resource_name)) raise status_post_interval_s = Conf.get('cache_status_post_frequency_s') status_timer = Timer(status_post_interval_s, self.__post_status) Timers.get_instance().register_timer('status', status_timer)
def __get_entries_to_update(self, requested_entry_id): # Get more entries than just what was requested, while we're at it. parent_ids = self.__gd.get_parents_containing_id(requested_entry_id) affected_entries = [requested_entry_id] considered_entries = {} max_readahead_entries = Conf.get('max_readahead_entries') for parent_id in parent_ids: child_ids = self.__gd.get_children_under_parent_id(parent_id) for child_id in child_ids: if child_id == requested_entry_id: continue # We've already looked into this entry. try: considered_entries[child_id] continue except: pass considered_entries[child_id] = True # Is it already cached? if self.cache.exists(child_id): continue affected_entries.append(child_id) if len(affected_entries) >= max_readahead_entries: break return affected_entries
def list_files(self, query_contains_string=None, query_is_string=None, parent_id=None): self.__log.info( "Listing all files. CONTAINS=[%s] IS=[%s] " "PARENT_ID=[%s]" % (query_contains_string if query_contains_string is not None else '<none>', query_is_string if query_is_string is not None else '<none>', parent_id if parent_id is not None else '<none>')) try: client = self.get_client() except: self.__log.exception("There was an error while acquiring the " "Google Drive client (list_files).") raise query_components = [] if parent_id: query_components.append("'%s' in parents" % (parent_id)) if query_is_string: query_components.append( "title='%s'" % (escape_filename_for_query(query_is_string))) elif query_contains_string: query_components.append( "title contains '%s'" % (escape_filename_for_query(query_contains_string))) # Make sure that we don't get any entries that we would have to ignore. hidden_flags = Conf.get('hidden_flags_list_remote') if hidden_flags: for hidden_flag in hidden_flags: query_components.append("%s = false" % (hidden_flag)) query = ' and '.join(query_components) if query_components else None page_token = None page_num = 0 entries = [] while 1: self.__log.debug("Doing request for listing of files with page-" "token [%s] and page-number (%d): %s" % (page_token, page_num, query)) try: result = client.files().list(q=query, pageToken=page_token).\ execute() except: self.__log.exception("Could not get the list of files.") raise self.__log.debug("(%d) entries were presented for page-number " "(%d)." % (len(result[u'items']), page_num)) for entry_raw in result[u'items']: try: entry = NormalEntry('list_files', entry_raw) except: self.__log.exception( "Could not normalize raw-data for entry " "with ID [%s]." % (entry_raw[u'id'])) raise entries.append(entry) if u'nextPageToken' not in result: self.__log.debug("No more pages in file listing.") break self.__log.debug("Next page-token in file-listing is [%s]." % (result[u'nextPageToken'])) page_token = result[u'nextPageToken'] page_num += 1 return entries
def set_auth_cache_filepath(auth_storage_filepath): Conf.set('auth_cache_filepath', auth_storage_filepath)
def mount(auth_storage_filepath, mountpoint, debug=None, nothreads=None, option_string=None): fuse_opts = { } if option_string: for opt_parts in [opt.split('=', 1) \ for opt \ in option_string.split(',') ]: k = opt_parts[0] # We need to present a bool type for on/off flags. Since all we # have are strings, we'll convert anything with a 'True' or 'False' # to a bool, or anything with just a key to True. if len(opt_parts) == 2: v = opt_parts[1] v_lower = v.lower() if v_lower == 'true': v = True elif v_lower == 'false': v = False else: v = True # We have a list of provided options. See which match against our # application options. logging.info("Setting option [%s] to [%s]." % (k, v)) try: Conf.set(k, v) except (KeyError) as e: logging.debug("Forwarding option [%s] with value [%s] to " "FUSE." % (k, v)) fuse_opts[k] = v except: logging.exception("Could not set option [%s]. It is probably " "invalid." % (k)) raise logging.debug("PERMS: F=%s E=%s NE=%s" % (Conf.get('default_perm_folder'), Conf.get('default_perm_file_editable'), Conf.get('default_perm_file_noneditable'))) # Assume that any option that wasn't an application option is a FUSE # option. The Python-FUSE interface that we're using is beautiful/elegant, # but there's no help support. The user is just going to have to know the # options. set_auth_cache_filepath(auth_storage_filepath) # How we'll appear in diskfree, mtab, etc.. name = ("gdfs(%s)" % (auth_storage_filepath)) # Don't start any of the scheduled tasks, such as change checking, cache # cleaning, etc. It will minimize outside influence of the logs and state # to make it easier to debug. # atexit.register(Timers.get_instance().cancel_all) if debug: Timers.get_instance().set_autostart_default(False) fuse = FUSE(GDriveFS(), mountpoint, debug=debug, foreground=debug, nothreads=nothreads, fsname=name, **fuse_opts)
def __create(self, filepath, mode=None): """Create a new file. We don't implement "mode" (permissions) because the model doesn't agree with GD. """ # TODO: Fail if it already exists. self.__log.debug("Splitting file-path [%s] for inner create." % (filepath)) try: result = split_path(filepath, path_resolver) (parent_clause, path, filename, mime_type, is_hidden) = result except GdNotFoundError: self.__log.exception("Could not process [%s] (i-create).") raise FuseOSError(ENOENT) except: self.__log.exception("Could not split path [%s] (i-create)." % (filepath)) raise FuseOSError(EIO) distilled_filepath = build_filepath(path, filename) self.__log.debug("Acquiring file-handle.") # Try to guess at a mime-type, if not otherwise given. if mime_type is None: (mimetype_guess, _) = guess_type(filename, True) if mimetype_guess is not None: mime_type = mimetype_guess else: mime_type = Conf.get('default_mimetype') self.__log.debug("Creating empty file [%s] under parent with ID " "[%s]." % (filename, parent_clause[3])) try: entry = drive_proxy('create_file', filename=filename, data_filepath='/dev/null', parents=[parent_clause[3]], mime_type=mime_type, is_hidden=is_hidden) except: self.__log.exception("Could not create empty file [%s] under " "parent with ID [%s]." % (filename, parent_clause[3])) raise FuseOSError(EIO) self.__log.debug("Registering created file in cache.") path_relations = PathRelations.get_instance() try: path_relations.register_entry(entry) except: self.__log.exception("Could not register created file in cache.") raise FuseOSError(EIO) self.__log.info("Inner-create of [%s] completed." % (distilled_filepath)) return (entry, path, filename, mime_type)
import logging import json from os import makedirs from os.path import isdir from gdrivefs.gdtool.drive import drive_proxy from gdrivefs.gdtool.normal_entry import NormalEntry from gdrivefs.conf import Conf temp_path = ("%s/displaced" % (Conf.get('file_download_temp_path'))) if isdir(temp_path) is False: makedirs(temp_path) class DisplacedFile(object): __log = None normalized_entry = None file_size = 1000 def __init__(self, normalized_entry): self.__log = logging.getLogger().getChild('DisFile') if normalized_entry.__class__ != NormalEntry: raise Exception("_DisplacedFile can not wrap a non-NormalEntry " "object.") self.__normalized_entry = normalized_entry def deposit_file(self, mime_type): """Write the file to a temporary path, and present a stub (JSON) to the
#!/usr/bin/env python2.7 import sys sys.path.insert(0, '..') import datetime import time import dateutil.tz from gdrivefs.conf import Conf Conf.set('auth_cache_filepath', '/var/cache/creds/gdfs') import gdrivefs.gdfs.gdfuse import gdrivefs.gdtool.drive import gdrivefs.time_support auth = gdrivefs.gdtool.drive.GdriveAuth() client = auth.get_client() def get_phrase(epoch): dt = datetime.datetime.utcfromtimestamp(entry.modified_date_epoch) return datetime.datetime.strftime(dt, gdrivefs.time_support.DTF_DATETIMET) print("Before:\n") (entry, path, filename) = gdrivefs.gdfs.gdfuse.get_entry_or_raise( '/20140426-171136') print(entry.modified_date) print(entry.modified_date.utctimetuple()) print(entry.modified_date_epoch)
def create_directory(self, filename, parents, **kwargs): mimetype_directory = Conf.get('directory_mimetype') return self.__insert_entry(filename, mimetype_directory, parents, **kwargs)
def get_max_cache_age_seconds(self): return Conf.get('cache_entries_max_age')
def download_to_local(self, output_file_path, normalized_entry, mime_type, allow_cache=True): """Download the given file. If we've cached a previous download and the mtime hasn't changed, re-use. The third item returned reflects whether the data has changed since any prior attempts. """ self.__log.info("Downloading entry with ID [%s] and mime-type [%s]." % (normalized_entry.id, mime_type)) if mime_type != normalized_entry.mime_type and \ mime_type not in normalized_entry.download_links: message = ("Entry with ID [%s] can not be exported to type [%s]. " "The available types are: %s" % (normalized_entry.id, mime_type, ', '.join( normalized_entry.download_links.keys()))) self.__log.warning(message) raise ExportFormatError(message) temp_path = Conf.get('file_download_temp_path') if not isdir(temp_path): try: makedirs(temp_path) except: self.__log.exception("Could not create temporary download " "path [%s]." % (temp_path)) raise gd_mtime_epoch = mktime(normalized_entry.modified_date.timetuple()) self.__log.info("File will be downloaded to [%s]." % (output_file_path)) use_cache = False if allow_cache and isfile(output_file_path): # Determine if a local copy already exists that we can use. try: stat_info = stat(output_file_path) except: self.__log.exception("Could not retrieve stat() information " "for temp download file [%s]." % (output_file_path)) raise if gd_mtime_epoch == stat_info.st_mtime: use_cache = True if use_cache: # Use the cache. It's fine. self.__log.info("File retrieved from the previously downloaded, " "still-current file.") return (stat_info.st_size, False) # Go and get the file. try: # TODO(dustin): This might establish a new connection. Not cool. authed_http = self.get_authed_http() except: self.__log.exception("Could not get authed Http instance for " "download.") raise url = normalized_entry.download_links[mime_type] self.__log.debug("Downloading file from [%s]." % (url)) try: # TODO(dustin): Right now, we're downloading the complete body of data into memory, and then saving. data_tuple = authed_http.request(url) except: self.__log.exception("Could not download entry with ID [%s], type " "[%s], and URL [%s]." % (normalized_entry.id, mime_type, url)) raise (response_headers, data) = data_tuple # Throw a log-item if we see any "Range" response-headers. If GD ever # starts supporting "Range" headers, we'll be able to write smarter # download mechanics (resume, etc..). r = re.compile('Range') range_found = [("%s: %s" % (k, v)) for k, v in response_headers.iteritems() if r.match(k)] if range_found: self.__log.info("GD has returned Range-related headers: %s" % (", ".join(found))) self.__log.info("Downloaded file is (%d) bytes. Writing to [%s]." % (len(data), output_file_path)) try: with open(output_file_path, 'wb') as f: f.write(data) except: self.__log.exception("Could not cached downloaded file. Skipped.") else: self.__log.info("File written to cache successfully.") try: utime(output_file_path, (time(), gd_mtime_epoch)) except: self.__log.exception("Could not set time on [%s]." % (output_file_path)) raise return (len(data), True)
def download_to_local(self, normalized_entry, mime_type, force_output_filename=None, allow_cache=True): """Download the given file. If we've cached a previous download and the mtime hasn't changed, re-use. """ self.__log.info("Downloading entry with ID [%s] and mime-type [%s]." % (normalized_entry.id, mime_type)) if mime_type != normalized_entry.mime_type and \ mime_type not in normalized_entry.download_links: message = ("Entry with ID [%s] can not be exported to type [%s]. The available types are: %s" % (normalized_entry.id, mime_type, ', '.join(normalized_entry.download_links.keys()))) self.__log.warning(message) raise ExportFormatError(message) temp_path = Conf.get('file_download_temp_path') if not os.path.isdir(temp_path): try: os.makedirs(temp_path) except: self.__log.exception("Could not create temporary download path " "[%s]." % (temp_path)) raise # Produce a file-path of a temporary file that we can store the data # to. More often than not, we'll be called when the OS wants to read # the file, and we'll need the data at hand in order to page through # it. if force_output_filename: temp_filename = force_output_filename else: temp_filename = ("%s.%s" % (normalized_entry.id, mime_type)). \ encode('ascii') temp_filename = re.sub('[^0-9a-zA-Z_\.]+', '', temp_filename) temp_filepath = ("%s/%s" % (temp_path, temp_filename)) gd_mtime_epoch = mktime(normalized_entry.modified_date.timetuple()) self.__log.info("File will be downloaded to [%s]." % (temp_filepath)) use_cache = False if allow_cache and os.path.isfile(temp_filepath): # Determine if a local copy already exists that we can use. try: stat = os.stat(temp_filepath) except: self.__log.exception("Could not retrieve stat() information for " "temp download file [%s]." % (temp_filepath)) raise if gd_mtime_epoch == stat.st_mtime: use_cache = True if use_cache: # Use the cache. It's fine. self.__log.info("File retrieved from the previously downloaded, still-current file.") return (temp_filepath, stat.st_size) # Go and get the file. try: authed_http = self.get_authed_http() except: self.__log.exception("Could not get authed Http instance for download.") raise url = normalized_entry.download_links[mime_type] self.__log.debug("Downloading file from [%s]." % (url)) try: data_tuple = authed_http.request(url) except: self.__log.exception("Could not download entry with ID [%s], type " "[%s], and URL [%s]." % (normalized_entry.id, mime_type, url)) raise (response_headers, data) = data_tuple # Throw a log-item if we see any "Range" response-headers. If GD ever # starts supporting "Range" headers, we'll be able to write smarter # download mechanics (resume, etc..). r = re.compile('Range') range_found = [("%s: %s" % (k, v)) for k, v in response_headers.iteritems() if r.match(k)] if range_found: logger.info("GD has returned Range-related headers: %s" % (", ".join(found))) self.__log.info("Downloaded file is (%d) bytes. Writing to [%s]." % (len(data), temp_filepath)) try: with open(temp_filepath, 'wb') as f: f.write(data) except: self.__log.exception("Could not cached downloaded file. Skipped.") else: self.__log.info("File written to cache successfully.") try: os.utime(temp_filepath, (time(), gd_mtime_epoch)) except: self.__log.exception("Could not set time on [%s]." % (temp_filepath)) raise return (temp_filepath, len(data))
class NormalEntry(object): __default_general_mime_type = Conf.get('default_mimetype') __directory_mimetype = Conf.get('directory_mimetype') __properties_extra = [ 'is_directory', 'is_visible', 'parents', 'download_types', 'modified_date', 'modified_date_epoch', 'mtime_byme_date', 'mtime_byme_date_epoch', 'atime_byme_date', 'atime_byme_date_epoch', ] def __init__(self, gd_resource_type, raw_data): self.__info = {} self.__parents = [] self.__raw_data = raw_data self.__cache_data = None self.__cache_mimetypes = None self.__cache_dict = {} # Return True if reading from this file should return info and deposit # the data elsewhere. This is predominantly determined by whether we # can get a file-size up-front, or we have to decide on a specific # mime-type in order to do so. try: requires_mimetype = u'fileSize' not in self.__raw_data and \ raw_data[u'mimeType'] != self.__directory_mimetype self.__info['requires_mimetype'] = \ requires_mimetype self.__info['title'] = \ raw_data[u'title'] self.__info['mime_type'] = \ raw_data[u'mimeType'] self.__info['labels'] = \ raw_data[u'labels'] self.__info['id'] = \ raw_data[u'id'] self.__info['last_modifying_user_name'] = \ raw_data[u'lastModifyingUserName'] self.__info['writers_can_share'] = \ raw_data[u'writersCanShare'] self.__info['owner_names'] = \ raw_data[u'ownerNames'] self.__info['editable'] = \ raw_data[u'editable'] self.__info['user_permission'] = \ raw_data[u'userPermission'] except KeyError: _logger.exception( "Could not normalize with missing key.\nRAW:\n" "%s", pprint.pformat(raw_data)) raise self.__info['link'] = \ raw_data.get(u'embedLink') self.__info['file_size'] = \ int(raw_data.get(u'fileSize', 0)) self.__info['file_extension'] = \ raw_data.get(u'fileExtension') self.__info['md5_checksum'] = \ raw_data.get(u'md5Checksum') self.__info['image_media_metadata'] = \ raw_data.get(u'imageMediaMetadata') self.__info['download_links'] = \ raw_data.get(u'exportLinks', {}) try: self.__info['download_links'][self.__info['mime_type']] = \ raw_data[u'downloadUrl'] except KeyError: pass self.__update_display_name() for parent in raw_data[u'parents']: self.__parents.append(parent[u'id']) def __getattr__(self, key): return self.__info[key] def __str__(self): return ( "<NORMAL ID= [%s] MIME= [%s] NAME= [%s] URIS= (%d)>" % (self.id, self.mime_type, self.title, len(self.download_links))) def __repr__(self): return str(self) def __update_display_name(self): # This is encoded for displaying locally. self.__info['title_fs'] = utility.translate_filename_charset( self.__info['title']) def temp_rename(self, new_filename): """Set the name to something else, here, while we, most likely, wait for the change at the server to propogate. """ self.__info['title'] = new_filename self.__update_display_name() def normalize_download_mimetype(self, specific_mimetype=None): """If a mimetype is given, return it if there is a download-URL available for it, or fail. Else, determine if a copy can downloaded with the default mime-type (application/octet-stream, or something similar), or return the only mime-type in the event that there's only one download format. """ if self.__cache_mimetypes is None: self.__cache_mimetypes = [[], None] if specific_mimetype is not None: if specific_mimetype not in self.__cache_mimetypes[0]: _logger.debug( "Normalizing mime-type [%s] for download. " "Options: %s", specific_mimetype, self.download_types) if specific_mimetype not in self.download_links: raise ExportFormatError( "Mime-type [%s] is not available for " "download. Options: %s" % (self.download_types)) self.__cache_mimetypes[0].append(specific_mimetype) return specific_mimetype if self.__cache_mimetypes[1] is None: # Try to derive a mimetype from the filename, and see if it matches # against available export types. (mimetype_candidate, _) = guess_type(self.title_fs, True) if mimetype_candidate is not None and \ mimetype_candidate in self.download_links: mime_type = mimetype_candidate elif NormalEntry.__default_general_mime_type in self.download_links: mime_type = NormalEntry.__default_general_mime_type # If there's only one download link, resort to using it (perhaps it was # an uploaded file, assigned only one type). elif len(self.download_links) == 1: mime_type = self.download_links.keys()[0] else: raise ExportFormatError("A correct mime-type needs to be " "specified. Options: %s" % (self.download_types)) self.__cache_mimetypes[1] = mime_type return self.__cache_mimetypes[1] def __convert(self, data): if isinstance(data, dict): list_ = [("K(%s)=V(%s)" % (self.__convert(key), self.__convert(value))) \ for key, value \ in data.iteritems()] final = '; '.join(list_) return final elif isinstance(data, list): final = ', '.join([('LI(%s)' % (self.__convert(element))) \ for element \ in data]) return final elif isinstance(data, unicode): return utility.translate_filename_charset(data) elif isinstance(data, Number): return str(data) elif isinstance(data, datetime): return get_flat_normal_fs_time_from_dt(data) else: return data def get_data(self): original = dict([(key.encode('ASCII'), value) for key, value in self.__raw_data.iteritems()]) distilled = self.__info extra = dict([(key, getattr(self, key)) for key in self.__properties_extra]) data_dict = { 'original': original, #'distilled': distilled, 'extra': extra } return data_dict @property def xattr_data(self): if self.__cache_data is None: data_dict = self.get_data() attrs = {} for a_type, a_dict in data_dict.iteritems(): # self.__log.debug("Setting [%s]." % (a_type)) for key, value in a_dict.iteritems(): fqkey = ('user.%s.%s' % (a_type, key)) attrs[fqkey] = self.__convert(value) self.__cache_data = attrs return self.__cache_data @property def is_directory(self): """Return True if we represent a directory.""" return (self.__info['mime_type'] == self.__directory_mimetype) @property def is_visible(self): if [ flag for flag, value in self.labels.items() if flag in Conf.get('hidden_flags_list_local') and value ]: return False else: return True @property def parents(self): return self.__parents @property def download_types(self): return self.download_links.keys() @property def modified_date(self): if 'modified_date' not in self.__cache_dict: self.__cache_dict['modified_date'] = \ dateutil.parser.parse(self.__raw_data[u'modifiedDate']) return self.__cache_dict['modified_date'] @property def modified_date_epoch(self): # mktime() only works in terms of the local timezone, so compensate # (this works with DST, too). return mktime(self.modified_date.timetuple()) - time.timezone @property def mtime_byme_date(self): if 'modified_byme_date' not in self.__cache_dict: self.__cache_dict['modified_byme_date'] = \ dateutil.parser.parse(self.__raw_data[u'modifiedByMeDate']) return self.__cache_dict['modified_byme_date'] @property def mtime_byme_date_epoch(self): return mktime(self.mtime_byme_date.timetuple()) - time.timezone @property def atime_byme_date(self): if 'viewed_byme_date' not in self.__cache_dict: self.__cache_dict['viewed_byme_date'] = \ dateutil.parser.parse(self.__raw_data[u'lastViewedByMeDate']) \ if u'lastViewedByMeDate' in self.__raw_data \ else None return self.__cache_dict['viewed_byme_date'] @property def atime_byme_date_epoch(self): return mktime(self.atime_byme_date.timetuple()) - time.timezone \ if self.atime_byme_date \ else None
def __load_base_from_remote(self): """Download the data for the entry that we represent. This is probably a file, but could also be a stub for -any- entry. """ try: entry = self.__get_entry_or_raise() except: self.__log.exception("Could not get entry with ID [%s] for " "write-flush." % (self.__entry_id)) raise self.__log.debug("Ensuring local availability of [%s]." % (entry)) temp_file_path = get_temp_filepath(entry, self.mime_type) self.__log.debug("__load_base_from_remote about to download.") with self.__class__.__download_lock: # Get the current version of the write-cache file, or note that we # don't have it. self.__log.info("Attempting local cache update of file [%s] for " "entry [%s] and mime-type [%s]." % (temp_file_path, entry, self.mime_type)) if entry.requires_mimetype: length = DisplacedFile.file_size try: d = DisplacedFile(entry) stub_data = d.deposit_file(self.mime_type) with file(temp_file_path, 'w') as f: f.write(stub_data) except: self.__log.exception("Could not deposit to file [%s] from " "entry [%s]." % (temp_file_path, entry)) raise # TODO: Accommodate the cache for displaced-files. cache_fault = True else: self.__log.info("Executing the download.") try: # TODO(dustin): We're not inheriting an existing file (same mtime, same size). result = drive_proxy('download_to_local', output_file_path=temp_file_path, normalized_entry=entry, mime_type=self.mime_type) (length, cache_fault) = result except ExportFormatError: self.__log.exception("There was an export-format error.") raise FuseOSError(ENOENT) except: self.__log.exception("Could not localize file with entry " "[%s]." % (entry)) raise self.__log.info("Download complete. cache_fault= [%s] " "__is_loaded= [%s]" % (cache_fault, self.__is_loaded)) # We've either not loaded it, yet, or it has changed. if cache_fault or not self.__is_loaded: with self.__class__.__update_lock: self.__log.info("Checking queued items for fault.") if cache_fault: if self.__is_dirty: self.__log.error("Entry [%s] has been changed. " "Forcing buffer updates, and " "clearing uncommitted updates." % (entry)) else: self.__log.debug("Entry [%s] has changed. " "Updating buffers." % (entry)) self.__log.debug("Loading buffers.") with open(temp_file_path, 'rb') as f: # Read the locally cached file in. try: # TODO(dustin): Our accounting is broken when it comes to loading and/or update-tracking. If we have a guarantee thawrites only appear in sequence and in increasing order, we can dump BufferSegments. # TODO(dustin): This is the source of: # 1) An enormous slowdown where we first have to write the data, and then have to read it back. # 2) An enormous resource burden. data = f.read() read_blocksize = Conf.get('default_buffer_read_blocksize') self.__buffer = BufferSegments(data, read_blocksize) except: self.__log.exception("Could not read current cached " "file into buffer.") raise self.__is_dirty = False self.__is_loaded = True self.__log.debug("__load_base_from_remote complete.") return cache_fault