Exemplo n.º 1
0
    def __build_stat_from_entry(self, entry):
        (uid, gid, pid) = fuse_get_context()

        if entry.is_directory:
            effective_permission = int(Conf.get("default_perm_folder"), 8)
        elif entry.editable:
            effective_permission = int(Conf.get("default_perm_file_editable"), 8)
        else:
            effective_permission = int(Conf.get("default_perm_file_noneditable"), 8)

        stat_result = {
            "st_mtime": entry.modified_date_epoch,  # modified time.
            "st_ctime": entry.modified_date_epoch,  # changed time.
            "st_atime": time(),
            "st_uid": uid,
            "st_gid": gid,
        }

        if entry.is_directory:
            # Per http://sourceforge.net/apps/mediawiki/fuse/index.php?title=SimpleFilesystemHowto,
            # default size should be 4K.
            # TODO(dustin): Should we just make this (0), since that's what it is?
            stat_result["st_size"] = 1024 * 4
            stat_result["st_mode"] = stat.S_IFDIR | effective_permission
            stat_result["st_nlink"] = 2
        else:
            stat_result["st_size"] = DisplacedFile.file_size if entry.requires_mimetype else entry.file_size

            stat_result["st_mode"] = stat.S_IFREG | effective_permission
            stat_result["st_nlink"] = 1

        stat_result["st_blocks"] = int(math.ceil(float(stat_result["st_size"]) / 512.0))

        return stat_result
Exemplo n.º 2
0
    def __init__(self):
        cache_filepath  = Conf.get('auth_cache_filepath')
        api_credentials = Conf.get('api_credentials')

        self.cache_filepath = cache_filepath
        self.credentials = None

        with NamedTemporaryFile() as f:
            json.dump(api_credentials, f)
            f.flush()

            self.flow = flow_from_clientsecrets(f.name, 
                                                scope=self.__get_scopes(), 
                                                redirect_uri=OOB_CALLBACK_URN)
Exemplo n.º 3
0
    def __init__(self):
        cache_filepath = Conf.get('auth_cache_filepath')
        api_credentials = Conf.get('api_credentials')

        self.cache_filepath = cache_filepath
        self.credentials = None

        with NamedTemporaryFile() as f:
            json.dump(api_credentials, f)
            f.flush()

            self.flow = flow_from_clientsecrets(f.name,
                                                scope=self.__get_scopes(),
                                                redirect_uri=OOB_CALLBACK_URN)
Exemplo n.º 4
0
    def __init__(self):
        self.__log = logging.getLogger().getChild('OauthAuth')

        cache_filepath  = Conf.get('auth_cache_filepath')
        api_credentials = Conf.get('api_credentials')

        self.cache_filepath = cache_filepath

        with NamedTemporaryFile() as f:
            json.dump(api_credentials, f)
            f.flush()

            self.flow = flow_from_clientsecrets(f.name, 
                                                scope=self.__get_scopes(), 
                                                redirect_uri=OOB_CALLBACK_URN)
Exemplo n.º 5
0
    def __init__(self):
        self.__log = logging.getLogger().getChild('OauthAuth')

        cache_filepath = Conf.get('auth_cache_filepath')
        api_credentials = Conf.get('api_credentials')

        self.cache_filepath = cache_filepath

        with NamedTemporaryFile() as f:
            json.dump(api_credentials, f)
            f.flush()

            self.flow = flow_from_clientsecrets(f.name,
                                                scope=self.__get_scopes(),
                                                redirect_uri=OOB_CALLBACK_URN)
Exemplo n.º 6
0
    def get_client(self):
        if self.__client is None:
            authed_http = self.get_authed_http()
        
            # Build a client from the passed discovery document path
            
            discoveryUrl = Conf.get('google_discovery_service_url')
# TODO: We should cache this, since we have, so often, had a problem 
#       retrieving it. If there's no other way, grab it directly, and then pass
#       via a file:// URI.
        
            try:
                client = build(_CONF_SERVICE_NAME, 
                               _CONF_SERVICE_VERSION, 
                               http=authed_http, 
                               discoveryServiceUrl=discoveryUrl)
            except HttpError as e:
                # We've seen situations where the discovery URL's server is down,
                # with an alternate one to be used.
                #
                # An error here shouldn't leave GDFS in an unstable state (the 
                # current command should just fail). Hoepfully, the failure is 
                # momentary, and the next command succeeds.

                _logger.exception("There was an HTTP response-code of (%d) while "
                                  "building the client with discovery URL [%s].",
                                  e.resp.status, discoveryUrl)
                raise

            self.__client = client

        return self.__client
Exemplo n.º 7
0
    def get_client(self):
        if self.__client is None:
            authed_http = self.get_authed_http()

            # Build a client from the passed discovery document path

            discoveryUrl = Conf.get('google_discovery_service_url')
            # TODO: We should cache this, since we have, so often, had a problem
            #       retrieving it. If there's no other way, grab it directly, and then pass
            #       via a file:// URI.

            try:
                client = build(_CONF_SERVICE_NAME,
                               _CONF_SERVICE_VERSION,
                               http=authed_http,
                               discoveryServiceUrl=discoveryUrl)
            except HttpError as e:
                # We've seen situations where the discovery URL's server is down,
                # with an alternate one to be used.
                #
                # An error here shouldn't leave GDFS in an unstable state (the
                # current command should just fail). Hoepfully, the failure is
                # momentary, and the next command succeeds.

                _logger.exception(
                    "There was an HTTP response-code of (%d) while "
                    "building the client with discovery URL [%s].",
                    e.resp.status, discoveryUrl)
                raise

            self.__client = client

        return self.__client
Exemplo n.º 8
0
    def deposit_file(self, mime_type):
        """Write the file to a temporary path, and present a stub (JSON) to the 
        user. This is the only way of getting files that don't have a 
        well-defined filesize without providing a type, ahead of time.
        """

        temp_path = Conf.get('file_download_temp_path')
        file_path = ("%s/displaced/%s.%s" %
                     (temp_path, self.__normalized_entry.title,
                      mime_type.replace('/', '+')))

        try:
            result = drive_proxy('download_to_local',
                                 output_file_path=file_path,
                                 normalized_entry=self.__normalized_entry,
                                 mime_type=mime_type)
            (length, cache_fault) = result
        except:
            self.__log.exception("Could not localize displaced file with "
                                 "entry having ID [%s]." %
                                 (self.__normalized_entry.id))
            raise

        self.__log.debug("Displaced entry [%s] deposited to [%s] with length "
                         "(%d)." %
                         (self.__normalized_entry, file_path, length))

        try:
            return self.get_stub(mime_type, length, file_path)
        except:
            self.__log.exception("Could not build stub for [%s]." %
                                 (self.__normalized_entry))
            raise
Exemplo n.º 9
0
    def __check_changes(self):
        _logger.info("Change-processing thread running.")

        interval_s = Conf.get('change_check_frequency_s')
        cm = get_change_manager()

        while self.__t_quit_ev.is_set() is False and \
                gdrivefs.state.GLOBAL_EXIT_EVENT.is_set() is False:
            _logger.debug("Checking for changes.")

            try:
                is_done = cm.process_updates()
            except:
                _logger.exception("Squelching an exception that occurred "
                                  "while reading/processing changes.")

                # Force another check, soon.
                is_done = False

            # If there are still more changes, take them as quickly as
            # possible.
            if is_done is True:
                _logger.debug("No more changes. Waiting.")
                time.sleep(interval_s)
            else:
                _logger.debug("There are more changes to be applied. Cycling "
                              "immediately.")

        _logger.info("Change-processing thread terminating.")
Exemplo n.º 10
0
def get_temp_filepath(normalized_entry, mime_type):
    temp_filename = ("%s.%s" %
                     (normalized_entry.id, mime_type.replace('/', '+'))).\
                    encode('ascii')

    temp_path = Conf.get('file_download_temp_path')
    return ("%s/local/%s" % (temp_path, temp_filename))
Exemplo n.º 11
0
    def __check_changes(self):
        _logger.info("Change-processing thread running.")

        interval_s = Conf.get('change_check_frequency_s')
        cm = get_change_manager()

        while self.__t_quit_ev.is_set() is False and \
                gdrivefs.state.GLOBAL_EXIT_EVENT.is_set() is False:
            _logger.debug("Checking for changes.")

            try:
                is_done = cm.process_updates()
            except:
                _logger.exception("Squelching an exception that occurred "
                                  "while reading/processing changes.")

                # Force another check, soon.
                is_done = False

            # If there are still more changes, take them as quickly as 
            # possible.
            if is_done is True:
                _logger.debug("No more changes. Waiting.")
                time.sleep(interval_s)
            else:
                _logger.debug("There are more changes to be applied. Cycling "
                              "immediately.")

        _logger.info("Change-processing thread terminating.")
Exemplo n.º 12
0
    def deposit_file(self, mime_type):
        """Write the file to a temporary path, and present a stub (JSON) to the 
        user. This is the only way of getting files that don't have a 
        well-defined filesize without providing a type, ahead of time.
        """

        temp_path = Conf.get('file_download_temp_path')
        file_path = ("%s/displaced/%s.%s" % (temp_path, 
                                             self.__normalized_entry.title, 
                                             mime_type.replace('/', '+')))

        try:
            result = drive_proxy('download_to_local', 
                                 output_file_path=file_path, 
                                 normalized_entry=self.__normalized_entry,
                                 mime_type=mime_type)
            (length, cache_fault) = result
        except:
            self.__log.exception("Could not localize displaced file with "
                                 "entry having ID [%s]." % 
                                 (self.__normalized_entry.id))
            raise

        self.__log.debug("Displaced entry [%s] deposited to [%s] with length "
                         "(%d)." % 
                         (self.__normalized_entry, file_path, length)) 

        try:
            return self.get_stub(mime_type, length, file_path)
        except:
            self.__log.exception("Could not build stub for [%s]." % 
                                 (self.__normalized_entry))
            raise
Exemplo n.º 13
0
def get_temp_filepath(normalized_entry, mime_type):
    temp_filename = ("%s.%s" % 
                     (normalized_entry.id, mime_type.replace('/', '+'))).\
                    encode('ascii')

    temp_path = Conf.get('file_download_temp_path')
    return ("%s/local/%s" % (temp_path, temp_filename))
Exemplo n.º 14
0
 def is_visible(self):
     if [ flag 
          for flag, value 
          in self.labels.items() 
          if flag in Conf.get('hidden_flags_list_local') and value ]:
         return False
     else:
         return True
Exemplo n.º 15
0
 def is_visible(self):
     if [
             flag for flag, value in self.labels.items()
             if flag in Conf.get('hidden_flags_list_local') and value
     ]:
         return False
     else:
         return True
Exemplo n.º 16
0
    def __get_entries_to_update(self, requested_entry_id):
        # Get more entries than just what was requested, while we're at it.

        try:
            parent_ids = drive_proxy('get_parents_containing_id',
                                     child_id=requested_entry_id)
        except:
            self.__log.exception(
                "Could not retrieve parents for child with ID "
                "[%s]." % (requested_entry_id))
            raise

        self.__log.debug("Found (%d) parents." % (len(parent_ids)))

        affected_entries = [requested_entry_id]
        considered_entries = {}
        max_readahead_entries = Conf.get('max_readahead_entries')
        for parent_id in parent_ids:
            self.__log.debug("Retrieving children for parent with ID [%s]." %
                             (parent_id))

            try:
                child_ids = drive_proxy('get_children_under_parent_id',
                                        parent_id=parent_id)
            except:
                self.__log.exception(
                    "Could not retrieve children for parent with"
                    " ID [%s]." % (requested_entry_id))
                raise

            self.__log.debug("(%d) children found under parent with ID [%s]." %
                             (len(child_ids), parent_id))

            for child_id in child_ids:
                if child_id == requested_entry_id:
                    continue

                # We've already looked into this entry.

                try:
                    considered_entries[child_id]
                    continue
                except:
                    pass

                considered_entries[child_id] = True

                # Is it already cached?

                if self.cache.exists(child_id):
                    continue

                affected_entries.append(child_id)

                if len(affected_entries) >= max_readahead_entries:
                    break

        return affected_entries
Exemplo n.º 17
0
    def __create(self, filepath, mode=None):
        """Create a new file.
                
        We don't implement "mode" (permissions) because the model doesn't agree 
        with GD.
        """

# TODO: Fail if it already exists.

        try:
            result = split_path(filepath, path_resolver)
            (parent_clause, path, filename, mime_type, is_hidden) = result
        except GdNotFoundError:
            _logger.exception("Could not process [%s] (i-create).", filepath)
            raise FuseOSError(ENOENT)
        except:
            _logger.exception("Could not split path [%s] (i-create).",
                              filepath)
            raise FuseOSError(EIO)

        distilled_filepath = build_filepath(path, filename)

        # Try to guess at a mime-type, if not otherwise given.
        if mime_type is None:
            (mimetype_guess, _) = guess_type(filename, True)
            
            if mimetype_guess is not None:
                mime_type = mimetype_guess
            else:
                mime_type = Conf.get('default_mimetype')

        gd = get_gdrive()

        try:
            entry = gd.create_file(
                        filename, 
                        [parent_clause[3]], 
                        mime_type,
                        is_hidden=is_hidden)
        except:
            _logger.exception("Could not create empty file [%s] under "
                              "parent with ID [%s].",
                              filename, parent_clause[3])

            raise FuseOSError(EIO)

        path_relations = PathRelations.get_instance()

        try:
            path_relations.register_entry(entry)
        except:
            _logger.exception("Could not register created file in cache.")
            raise FuseOSError(EIO)

        _logger.info("Inner-create of [%s] completed.", distilled_filepath)

        return (entry, path, filename, mime_type)
Exemplo n.º 18
0
    def __emit_log(self):
        for source_name, source_data in self.data.iteritems():
            pairs = [("%s= [%s]" % (k, v)) for k, v in source_data.iteritems()]
            logging.info("RPT EMIT(%s): %s" % (source_name, ', '.join(pairs)))

        report_emit_interval_s = Conf.get('report_emit_frequency_s')
        emit_timer = Timer(report_emit_interval_s, self.__emit_log)

        Timers.get_instance().register_timer('emit', emit_timer)
Exemplo n.º 19
0
    def create_directory(self, filename, parents, **kwargs):

        mimetype_directory = Conf.get('directory_mimetype')
        return self.__insert_entry(
                False,
                filename, 
                parents,
                mimetype_directory, 
                **kwargs)
Exemplo n.º 20
0
    def __get_entries_to_update(self, requested_entry_id):
        # Get more entries than just what was requested, while we're at it.

        try:
            parent_ids = drive_proxy('get_parents_containing_id', 
                                     child_id=requested_entry_id)
        except:
            self.__log.exception("Could not retrieve parents for child with ID "
                              "[%s]." % (requested_entry_id))
            raise

        self.__log.debug("Found (%d) parents." % (len(parent_ids)))

        affected_entries = [ requested_entry_id ]
        considered_entries = { }
        max_readahead_entries = Conf.get('max_readahead_entries')
        for parent_id in parent_ids:
            self.__log.debug("Retrieving children for parent with ID [%s]." % 
                          (parent_id))

            try:
                child_ids = drive_proxy('get_children_under_parent_id', 
                                        parent_id=parent_id)
            except:
                self.__log.exception("Could not retrieve children for parent with"
                                  " ID [%s]." % (requested_entry_id))
                raise

            self.__log.debug("(%d) children found under parent with ID [%s]." % 
                          (len(child_ids), parent_id))

            for child_id in child_ids:
                if child_id == requested_entry_id:
                    continue

                # We've already looked into this entry.

                try:
                    considered_entries[child_id]
                    continue
                except:
                    pass

                considered_entries[child_id] = True

                # Is it already cached?

                if self.cache.exists(child_id):
                    continue

                affected_entries.append(child_id)

                if len(affected_entries) >= max_readahead_entries:
                    break

        return affected_entries
Exemplo n.º 21
0
    def __build_stat_from_entry(self, entry):
        (uid, gid, pid) = fuse_get_context()

        block_size_b = gdrivefs.config.fs.CALCULATION_BLOCK_SIZE

        if entry.is_directory:
            effective_permission = \
                int(Conf.get('default_perm_folder'), 8)
        elif entry.editable:
            effective_permission = \
                int(Conf.get('default_perm_file_editable'), 8)
        else:
            effective_permission = \
                int(Conf.get('default_perm_file_noneditable'), 8)

        stat_result = {
            "st_mtime": entry.modified_date_epoch, # modified time.
            "st_ctime": entry.modified_date_epoch, # changed time.
            "st_atime": time(),
            "st_uid":   uid,
            "st_gid":   gid,
        }
        
        if entry.is_directory:
            # Per http://sourceforge.net/apps/mediawiki/fuse/index.php?title=SimpleFilesystemHowto, 
            # default size should be 4K.
# TODO(dustin): Should we just make this (0), since that's what it is?
            stat_result["st_size"] = 1024 * 4
            stat_result["st_mode"] = (stat.S_IFDIR | effective_permission)
            stat_result["st_nlink"] = 2
        else:
            if entry.requires_mimetype:
                stat_result["st_size"] = DisplacedFile.file_size
            else:
                stat_result["st_size"] = entry.file_size

            stat_result["st_mode"] = (stat.S_IFREG | effective_permission)
            stat_result["st_nlink"] = 1

        stat_result["st_blocks"] = \
            int(math.ceil(float(stat_result["st_size"]) / block_size_b))
  
        return stat_result
Exemplo n.º 22
0
def _sched_check_changes():

    logging.debug("Doing scheduled check for changes.")

    get_change_manager().process_updates()

    # Schedule next invocation.
    t = Timer(Conf.get('change_check_frequency_s'), _sched_check_changes)

    Timers.get_instance().register_timer('change', t)
Exemplo n.º 23
0
def _sched_check_changes():
    
    logging.debug("Doing scheduled check for changes.")

    get_change_manager().process_updates()

    # Schedule next invocation.
    t = Timer(Conf.get('change_check_frequency_s'), _sched_check_changes)

    Timers.get_instance().register_timer('change', t)
Exemplo n.º 24
0
    def __create(self, filepath, mode=None):
        """Create a new file.
                
        We don't implement "mode" (permissions) because the model doesn't agree 
        with GD.
        """

        # TODO: Fail if it already exists.

        try:
            result = split_path(filepath, path_resolver)
            (parent_clause, path, filename, mime_type, is_hidden) = result
        except GdNotFoundError:
            _logger.exception("Could not process [%s] (i-create).", filepath)
            raise FuseOSError(ENOENT)
        except:
            _logger.exception("Could not split path [%s] (i-create).",
                              filepath)
            raise FuseOSError(EIO)

        distilled_filepath = build_filepath(path, filename)

        # Try to guess at a mime-type, if not otherwise given.
        if mime_type is None:
            (mimetype_guess, _) = guess_type(filename, True)

            if mimetype_guess is not None:
                mime_type = mimetype_guess
            else:
                mime_type = Conf.get('default_mimetype')

        gd = get_gdrive()

        try:
            entry = gd.create_file(filename, [parent_clause[3]],
                                   mime_type,
                                   is_hidden=is_hidden)
        except:
            _logger.exception(
                "Could not create empty file [%s] under "
                "parent with ID [%s].", filename, parent_clause[3])

            raise FuseOSError(EIO)

        path_relations = PathRelations.get_instance()

        try:
            path_relations.register_entry(entry)
        except:
            _logger.exception("Could not register created file in cache.")
            raise FuseOSError(EIO)

        _logger.info("Inner-create of [%s] completed.", distilled_filepath)

        return (entry, path, filename, mime_type)
Exemplo n.º 25
0
    def getattr(self, raw_path, fh=None):
        """Return a stat() structure."""
        # TODO: Implement handle.

        (entry, path, filename) = self.__get_entry_or_raise(raw_path)
        (uid, gid, pid) = fuse_get_context()

        self.__log.debug("Context: UID= (%d) GID= (%d) PID= (%d)" %
                         (uid, gid, pid))

        if entry.is_directory:
            effective_permission = int(Conf.get('default_perm_folder'), 8)
        elif entry.editable:
            effective_permission = int(Conf.get('default_perm_file_editable'),
                                       8)
        else:
            effective_permission = int(
                Conf.get('default_perm_file_noneditable'), 8)

        stat_result = {
            "st_mtime": entry.modified_date_epoch,  # modified time.
            "st_ctime": entry.modified_date_epoch,  # changed time.
            "st_atime": time(),
            "st_uid": uid,
            "st_gid": gid
        }

        if entry.is_directory:
            # Per http://sourceforge.net/apps/mediawiki/fuse/index.php?title=SimpleFilesystemHowto,
            # default size should be 4K.
            stat_result["st_size"] = 1024 * 4
            stat_result["st_mode"] = (stat.S_IFDIR | effective_permission)
            stat_result["st_nlink"] = 2
        else:
            stat_result["st_size"] = DisplacedFile.file_size \
                                        if entry.requires_mimetype \
                                        else entry.file_size

            stat_result["st_mode"] = (stat.S_IFREG | effective_permission)
            stat_result["st_nlink"] = 1

        return stat_result
Exemplo n.º 26
0
    def __emit_log(self):
        for source_name, source_data in self.data.iteritems():
            pairs = [ ("%s= [%s]" % (k, v)) 
                        for k, v 
                        in source_data.iteritems() ]
            logging.info("RPT EMIT(%s): %s" % (source_name, ', '.join(pairs)))

        report_emit_interval_s = Conf.get('report_emit_frequency_s')
        emit_timer = Timer(report_emit_interval_s, self.__emit_log)

        Timers.get_instance().register_timer('emit', emit_timer)
Exemplo n.º 27
0
    def __post_status(self):
        """Send the current status to our reporting tool."""

        num_values = self.registry.count(self.resource_name)

        self.report.set_values(self.report_source_name, 'count', 
                               num_values)

        status_post_interval_s = Conf.get('cache_status_post_frequency_s')
        status_timer = Timer(status_post_interval_s, self.__post_status)

        Timers.get_instance().register_timer('status', status_timer)
Exemplo n.º 28
0
    def rename(self, normalized_entry, new_filename):
# TODO: It doesn't seem as if the created file is being registered.
        # Even though we're supposed to provide an extension, we can get away 
        # without having one. We don't want to impose this when acting like a 
        # normal FS.

        # If no data and no mime-type was given, default it.
        if mime_type == None:
            mime_type = Conf.get('file_default_mime_type')
            self.__log.debug("No mime-type was presented for file create/update. "
                          "Defaulting to [%s]." % (mime_type))

        return self.__insert_entry(filename=filename, data_filepath=data_filepath, mime_type=mime_type, **kwargs)
Exemplo n.º 29
0
    def __load_mappings(self):
        # Allow someone to override our default mappings of the GD types.

        gd_to_normal_mapping_filepath = \
            Conf.get('gd_to_normal_mapping_filepath')

        try:
            with open(gd_to_normal_mapping_filepath, 'r') as f:
                self.gd_to_normal_mime_mappings.extend(json.load(f))
        except:
            logging.info("No mime-mapping was found.")

        # Allow someone to set file-extensions for mime-types, and not rely on 
        # Python's educated guesses.

        extension_mapping_filepath = Conf.get('extension_mapping_filepath')

        try:
            with open(extension_mapping_filepath, 'r') as f:
                self.default_extensions.extend(json.load(f))
        except:
            logging.info("No extension-mapping was found.")
Exemplo n.º 30
0
    def getattr(self, raw_path, fh=None):
        """Return a stat() structure."""
# TODO: Implement handle.

        (entry, path, filename) = self.__get_entry_or_raise(raw_path)
        (uid, gid, pid) = fuse_get_context()

        self.__log.debug("Context: UID= (%d) GID= (%d) PID= (%d)" % (uid, gid, 
                                                                     pid))

        if entry.is_directory:
            effective_permission = int(Conf.get('default_perm_folder'), 8)
        elif entry.editable:
            effective_permission = int(Conf.get('default_perm_file_editable'), 8)
        else:
            effective_permission = int(Conf.get('default_perm_file_noneditable'), 8)

        stat_result = { "st_mtime": entry.modified_date_epoch, # modified time.
                        "st_ctime": entry.modified_date_epoch, # changed time.
                        "st_atime": time(),
                        "st_uid":   uid,
                        "st_gid":   gid }
        
        if entry.is_directory:
            # Per http://sourceforge.net/apps/mediawiki/fuse/index.php?title=SimpleFilesystemHowto, 
            # default size should be 4K.
# TODO(dustin): Should we just make this (0), since that's what it is?
            stat_result["st_size"] = 1024 * 4
            stat_result["st_mode"] = (stat.S_IFDIR | effective_permission)
            stat_result["st_nlink"] = 2
        else:
            stat_result["st_size"] = DisplacedFile.file_size \
                                        if entry.requires_mimetype \
                                        else entry.file_size

            stat_result["st_mode"] = (stat.S_IFREG | effective_permission)
            stat_result["st_nlink"] = 1

        return stat_result
Exemplo n.º 31
0
    def __cleanup_check(self):
        """Scan the current cache and determine items old-enough to be 
        removed.
        """

        self.__log.debug("Doing clean-up for cache resource with name [%s]." %
                         (self.resource_name))

        try:
            cache_dict = self.registry.list_raw(self.resource_name)
        except:
            self.__log.exception("Could not do clean-up check with resource-"
                                 "name [%s]." % (self.resource_name))
            raise

        total_keys = [ (key, value_tuple[1]) for key, value_tuple \
                            in cache_dict.iteritems() ]

        cleanup_keys = [ key for key, value_tuple \
                            in cache_dict.iteritems() \
                            if (datetime.now() - value_tuple[1]).seconds > \
                                    self.max_age ]

        self.__log.info("Found (%d) entries to clean-up from entry-cache." %
                        (len(cleanup_keys)))

        if cleanup_keys:
            for key in cleanup_keys:
                self.__log.debug("Cache entry [%s] under resource-name [%s] "
                                 "will be cleaned-up." %
                                 (key, self.resource_name))

                if self.exists(key, no_fault_check=True) == False:
                    self.__log.debug("Entry with ID [%s] has already been "
                                     "cleaned-up." % (key))
                else:
                    try:
                        self.remove(key)
                    except:
                        self.__log.exception("Cache entry [%s] under resource-"
                                             "name [%s] could not be cleaned-"
                                             "up." % (key, self.resource_name))
                        raise

            self.__log.debug("Scheduled clean-up complete.")

        cleanup_interval_s = Conf.get('cache_cleanup_check_frequency_s')
        cleanup_timer = Timer(cleanup_interval_s, self.__cleanup_check)

        Timers.get_instance().register_timer('cleanup', cleanup_timer)
Exemplo n.º 32
0
    def list_files(self, query_contains_string=None, query_is_string=None, parent_id=None):
        
        self.__log.info("Listing all files.")

        try:
            client = self.get_client()
        except:
            self.__log.exception("There was an error while acquiring the Google "
                              "Drive client (list_files).")
            raise

        query_components = [ ]

        if parent_id:
            query_components.append("'%s' in parents" % (parent_id))

        if query_is_string:
            query_components.append("title='%s'" % 
                                    (query_is_string.replace("'", "\\'")))
        elif query_contains_string:
            query_components.append("title contains '%s'" % 
                     (query_contains_string.replace("'", "\\'")))

        # Make sure that we don't get any entries that we would have to ignore.

        hidden_flags = Conf.get('hidden_flags_list_remote')
        if hidden_flags:
            for hidden_flag in hidden_flags:
                query_components.append("%s = false" % (hidden_flag))

        query = ' and '.join(query_components) if query_components else None

        try:
            result = client.files().list(q=query).execute()
        except:
            self.__log.exception("Could not get the list of files.")
            raise

        entries = []
        for entry_raw in result[u'items']:
            try:
                entry = NormalEntry('list_files', entry_raw)
            except:
                self.__log.exception("Could not normalize raw-data for entry with"
                                  " ID [%s]." % (entry_raw[u'id']))
                raise

            entries.append(entry)

        return entries
Exemplo n.º 33
0
def _sched_check_changes():
    logging.debug("Doing scheduled check for changes.")

    try:
        get_change_manager().process_updates()
        logging.debug("Updates have been processed. Rescheduling.")

        # Schedule next invocation.
        t = Timer(Conf.get('change_check_frequency_s'), _sched_check_changes)

        Timers.get_instance().register_timer('change', t)
    except:
        _logger.exception("Exception while managing changes.")
        raise
Exemplo n.º 34
0
    def __cleanup_check(self):
        """Scan the current cache and determine items old-enough to be 
        removed.
        """

        self.__log.debug("Doing clean-up for cache resource with name [%s]." % 
                      (self.resource_name))

        try:
            cache_dict = self.registry.list_raw(self.resource_name)
        except:
            self.__log.exception("Could not do clean-up check with resource-"
                                 "name [%s]." % (self.resource_name))
            raise

        total_keys = [ (key, value_tuple[1]) for key, value_tuple \
                            in cache_dict.iteritems() ]

        cleanup_keys = [ key for key, value_tuple \
                            in cache_dict.iteritems() \
                            if (datetime.now() - value_tuple[1]).seconds > \
                                    self.max_age ]

        self.__log.info("Found (%d) entries to clean-up from entry-cache." % 
                        (len(cleanup_keys)))

        if cleanup_keys:
            for key in cleanup_keys:
                self.__log.debug("Cache entry [%s] under resource-name [%s] "
                                 "will be cleaned-up." % (key, 
                                                          self.resource_name))

                if self.exists(key, no_fault_check=True) == False:
                    self.__log.debug("Entry with ID [%s] has already been "
                                     "cleaned-up." % (key))
                else:
                    try:
                        self.remove(key)
                    except:
                        self.__log.exception("Cache entry [%s] under resource-"
                                             "name [%s] could not be cleaned-"
                                             "up." % (key, self.resource_name))
                        raise

            self.__log.debug("Scheduled clean-up complete.")

        cleanup_interval_s = Conf.get('cache_cleanup_check_frequency_s')
        cleanup_timer = Timer(cleanup_interval_s, self.__cleanup_check)

        Timers.get_instance().register_timer('cleanup', cleanup_timer)
Exemplo n.º 35
0
def _sched_check_changes():
    logging.debug("Doing scheduled check for changes.")

    try:
        get_change_manager().process_updates()
        logging.debug("Updates have been processed. Rescheduling.")

        # Schedule next invocation.
        t = Timer(Conf.get('change_check_frequency_s'), _sched_check_changes)

        Timers.get_instance().register_timer('change', t)
    except:
        _logger.exception("Exception while managing changes.")
        raise
Exemplo n.º 36
0
    def get_client(self):

        if self.client != None:
            return self.client

        try:
            authed_http = self.get_authed_http()
        except:
            self.__log.exception("Could not get authed Http instance.")
            raise

        self.__log.info("Building authorized client from Http.  TYPE= [%s]" % 
                        (type(authed_http)))
    
        # Build a client from the passed discovery document path
        
        discoveryUrl = Conf.get('google_discovery_service_url')
# TODO: We should cache this, since we have, so often, having a problem 
#       retrieving it. If there's no other way, grab it directly, and then pass
#       via a file:// URI.
        
        try:
            client = build(self.conf_service_name, 
                           self.conf_service_version, 
                           http=authed_http, 
                           discoveryServiceUrl=discoveryUrl)
        except HttpError as e:
            # We've seen situations where the discovery URL's server is down,
            # with an alternate one to be used.
            #
            # An error here shouldn't leave GDFS in an unstable state (the 
            # current command should just fail). Hoepfully, the failure is 
            # momentary, and the next command succeeds.

            logging.exception("There was an HTTP response-code of (%d) while "
                              "building the client with discovery URL [%s]." % 
                              (e.resp.status, discoveryUrl))
            raise
        except:
            raise

        self.client = client
        return self.client
Exemplo n.º 37
0
    def get_client(self):

        if self.client != None:
            return self.client

        try:
            authed_http = self.get_authed_http()
        except:
            self.__log.exception("Could not get authed Http instance.")
            raise

        self.__log.info("Building authorized client from Http.  TYPE= [%s]" %
                        (type(authed_http)))

        # Build a client from the passed discovery document path

        discoveryUrl = Conf.get('google_discovery_service_url')
        # TODO: We should cache this, since we have, so often, having a problem
        #       retrieving it. If there's no other way, grab it directly, and then pass
        #       via a file:// URI.

        try:
            client = build(self.conf_service_name,
                           self.conf_service_version,
                           http=authed_http,
                           discoveryServiceUrl=discoveryUrl)
        except HttpError as e:
            # We've seen situations where the discovery URL's server is down,
            # with an alternate one to be used.
            #
            # An error here shouldn't leave GDFS in an unstable state (the
            # current command should just fail). Hoepfully, the failure is
            # momentary, and the next command succeeds.

            logging.exception("There was an HTTP response-code of (%d) while "
                              "building the client with discovery URL [%s]." %
                              (e.resp.status, discoveryUrl))
            raise
        except:
            raise

        self.client = client
        return self.client
Exemplo n.º 38
0
    def create_file(self, filename, data_filepath, parents, mime_type=None, 
                    **kwargs):
# TODO: It doesn't seem as if the created file is being registered.
        # Even though we're supposed to provide an extension, we can get away 
        # without having one. We don't want to impose this when acting like a 
        # normal FS.

        # If no data and no mime-type was given, default it.
        if mime_type == None:
            mime_type = Conf.get('file_default_mime_type')
            self.__log.debug("No mime-type was presented for file "
                             "create/update. Defaulting to [%s]." % 
                             (mime_type))

        return self.__insert_entry(filename,
                                   mime_type,
                                   parents,
                                   data_filepath,
                                   **kwargs)
Exemplo n.º 39
0
    def __cleanup(self):
        """Scan the current cache and determine items old-enough to be 
        removed.
        """

        cleanup_interval_s = Conf.get('cache_cleanup_check_frequency_s')

        _logger.info("Cache-cleanup thread running: %s", self)

        while self.__t_quit_ev.is_set() is False and \
                  gdrivefs.state.GLOBAL_EXIT_EVENT.is_set() is False:
            _logger.debug("Doing clean-up for cache resource with name [%s]." % 
                          (self.resource_name))

            cache_dict = self.registry.list_raw(self.resource_name)

            total_keys = [ (key, value_tuple[1]) for key, value_tuple \
                                in list(cache_dict.items()) ]

            cleanup_keys = [ key for key, value_tuple \
                                in list(cache_dict.items()) \
                                if (datetime.datetime.now() - value_tuple[1]).seconds > \
                                        self.max_age ]

            _logger.debug("Found (%d) entries to clean-up from entry-cache." % 
                          (len(cleanup_keys)))

            if cleanup_keys:
                for key in cleanup_keys:
                    _logger.debug("Cache entry [%s] under resource-name [%s] "
                                  "will be cleaned-up." % 
                                  (key, self.resource_name))

                    if self.exists(key, no_fault_check=True) == False:
                        _logger.debug("Entry with ID [%s] has already been "
                                      "cleaned-up." % (key))
                    else:
                        self.remove(key)
            else:
                _logger.debug("No cache-cleanup required.")
                time.sleep(cleanup_interval_s)

        _logger.info("Cache-cleanup thread terminating: %s", self)
Exemplo n.º 40
0
    def __post_status(self):
        """Send the current status to our reporting tool."""

        try:
            num_values = self.registry.count(self.resource_name)
        except:
            self.__log.exception("Could not get count of values for resource "
                                 "with name [%s]." % (self.resource_name))
            raise

        try:
            self.report.set_values(self.report_source_name, 'count',
                                   num_values)
        except:
            self.__log.exception("Cache could not post status for resource "
                                 "with name [%s]." % (self.resource_name))
            raise

        status_post_interval_s = Conf.get('cache_status_post_frequency_s')
        status_timer = Timer(status_post_interval_s, self.__post_status)

        Timers.get_instance().register_timer('status', status_timer)
Exemplo n.º 41
0
    def __post_status(self):
        """Send the current status to our reporting tool."""

        try:
            num_values = self.registry.count(self.resource_name)
        except:
            self.__log.exception("Could not get count of values for resource "
                                 "with name [%s]." % (self.resource_name))
            raise

        try:
            self.report.set_values(self.report_source_name, 'count', 
                                   num_values)
        except:
            self.__log.exception("Cache could not post status for resource "
                                 "with name [%s]." % (self.resource_name))
            raise

        status_post_interval_s = Conf.get('cache_status_post_frequency_s')
        status_timer = Timer(status_post_interval_s, self.__post_status)

        Timers.get_instance().register_timer('status', status_timer)
Exemplo n.º 42
0
    def __get_entries_to_update(self, requested_entry_id):
        # Get more entries than just what was requested, while we're at it.

        parent_ids = self.__gd.get_parents_containing_id(requested_entry_id)

        affected_entries = [requested_entry_id]
        considered_entries = {}
        max_readahead_entries = Conf.get('max_readahead_entries')
        for parent_id in parent_ids:
            child_ids = self.__gd.get_children_under_parent_id(parent_id)

            for child_id in child_ids:
                if child_id == requested_entry_id:
                    continue

                # We've already looked into this entry.

                try:
                    considered_entries[child_id]
                    continue
                except:
                    pass

                considered_entries[child_id] = True

                # Is it already cached?

                if self.cache.exists(child_id):
                    continue

                affected_entries.append(child_id)

                if len(affected_entries) >= max_readahead_entries:
                    break

        return affected_entries
Exemplo n.º 43
0
    def __get_entries_to_update(self, requested_entry_id):
        # Get more entries than just what was requested, while we're at it.

        parent_ids = self.__gd.get_parents_containing_id(requested_entry_id)

        affected_entries = [requested_entry_id]
        considered_entries = {}
        max_readahead_entries = Conf.get('max_readahead_entries')
        for parent_id in parent_ids:
            child_ids = self.__gd.get_children_under_parent_id(parent_id)

            for child_id in child_ids:
                if child_id == requested_entry_id:
                    continue

                # We've already looked into this entry.

                try:
                    considered_entries[child_id]
                    continue
                except:
                    pass

                considered_entries[child_id] = True

                # Is it already cached?

                if self.cache.exists(child_id):
                    continue

                affected_entries.append(child_id)

                if len(affected_entries) >= max_readahead_entries:
                    break

        return affected_entries
Exemplo n.º 44
0
    def list_files(self,
                   query_contains_string=None,
                   query_is_string=None,
                   parent_id=None):

        self.__log.info(
            "Listing all files. CONTAINS=[%s] IS=[%s] "
            "PARENT_ID=[%s]" %
            (query_contains_string if query_contains_string is not None else
             '<none>', query_is_string if query_is_string is not None else
             '<none>', parent_id if parent_id is not None else '<none>'))

        try:
            client = self.get_client()
        except:
            self.__log.exception("There was an error while acquiring the "
                                 "Google Drive client (list_files).")
            raise

        query_components = []

        if parent_id:
            query_components.append("'%s' in parents" % (parent_id))

        if query_is_string:
            query_components.append(
                "title='%s'" % (escape_filename_for_query(query_is_string)))
        elif query_contains_string:
            query_components.append(
                "title contains '%s'" %
                (escape_filename_for_query(query_contains_string)))

        # Make sure that we don't get any entries that we would have to ignore.

        hidden_flags = Conf.get('hidden_flags_list_remote')
        if hidden_flags:
            for hidden_flag in hidden_flags:
                query_components.append("%s = false" % (hidden_flag))

        query = ' and '.join(query_components) if query_components else None

        page_token = None
        page_num = 0
        entries = []
        while 1:
            self.__log.debug("Doing request for listing of files with page-"
                             "token [%s] and page-number (%d): %s" %
                             (page_token, page_num, query))

            try:
                result = client.files().list(q=query, pageToken=page_token).\
                            execute()
            except:
                self.__log.exception("Could not get the list of files.")
                raise

            self.__log.debug("(%d) entries were presented for page-number "
                             "(%d)." % (len(result[u'items']), page_num))

            for entry_raw in result[u'items']:
                try:
                    entry = NormalEntry('list_files', entry_raw)
                except:
                    self.__log.exception(
                        "Could not normalize raw-data for entry "
                        "with ID [%s]." % (entry_raw[u'id']))
                    raise

                entries.append(entry)

            if u'nextPageToken' not in result:
                self.__log.debug("No more pages in file listing.")
                break

            self.__log.debug("Next page-token in file-listing is [%s]." %
                             (result[u'nextPageToken']))
            page_token = result[u'nextPageToken']
            page_num += 1

        return entries
Exemplo n.º 45
0
def set_auth_cache_filepath(auth_storage_filepath):
    Conf.set('auth_cache_filepath', auth_storage_filepath)
Exemplo n.º 46
0
def mount(auth_storage_filepath, mountpoint, debug=None, nothreads=None, 
          option_string=None):

    fuse_opts = { }
    if option_string:
        for opt_parts in [opt.split('=', 1) \
                          for opt \
                          in option_string.split(',') ]:
            k = opt_parts[0]

            # We need to present a bool type for on/off flags. Since all we
            # have are strings, we'll convert anything with a 'True' or 'False'
            # to a bool, or anything with just a key to True.
            if len(opt_parts) == 2:
                v = opt_parts[1]
                v_lower = v.lower()

                if v_lower == 'true':
                    v = True
                elif v_lower == 'false':
                    v = False
            else:
                v = True

            # We have a list of provided options. See which match against our 
            # application options.

            logging.info("Setting option [%s] to [%s]." % (k, v))

            try:
                Conf.set(k, v)
            except (KeyError) as e:
                logging.debug("Forwarding option [%s] with value [%s] to "
                              "FUSE." % (k, v))

                fuse_opts[k] = v
            except:
                logging.exception("Could not set option [%s]. It is probably "
                                  "invalid." % (k))
                raise

    logging.debug("PERMS: F=%s E=%s NE=%s" % 
                  (Conf.get('default_perm_folder'), 
                   Conf.get('default_perm_file_editable'), 
                   Conf.get('default_perm_file_noneditable')))

    # Assume that any option that wasn't an application option is a FUSE 
    # option. The Python-FUSE interface that we're using is beautiful/elegant,
    # but there's no help support. The user is just going to have to know the
    # options.

    set_auth_cache_filepath(auth_storage_filepath)

    # How we'll appear in diskfree, mtab, etc..
    name = ("gdfs(%s)" % (auth_storage_filepath))

    # Don't start any of the scheduled tasks, such as change checking, cache
    # cleaning, etc. It will minimize outside influence of the logs and state
    # to make it easier to debug.

#    atexit.register(Timers.get_instance().cancel_all)
    if debug:
        Timers.get_instance().set_autostart_default(False)

    fuse = FUSE(GDriveFS(), mountpoint, debug=debug, foreground=debug, 
                nothreads=nothreads, fsname=name, **fuse_opts)
Exemplo n.º 47
0
    def __create(self, filepath, mode=None):
        """Create a new file.
                
        We don't implement "mode" (permissions) because the model doesn't agree 
        with GD.
        """
# TODO: Fail if it already exists.

        self.__log.debug("Splitting file-path [%s] for inner create." % 
                         (filepath))

        try:
            result = split_path(filepath, path_resolver)
            (parent_clause, path, filename, mime_type, is_hidden) = result
        except GdNotFoundError:
            self.__log.exception("Could not process [%s] (i-create).")
            raise FuseOSError(ENOENT)
        except:
            self.__log.exception("Could not split path [%s] (i-create)." % 
                              (filepath))
            raise FuseOSError(EIO)

        distilled_filepath = build_filepath(path, filename)

        self.__log.debug("Acquiring file-handle.")

        # Try to guess at a mime-type, if not otherwise given.
        if mime_type is None:
            (mimetype_guess, _) = guess_type(filename, True)
            
            if mimetype_guess is not None:
                mime_type = mimetype_guess
            else:
                mime_type = Conf.get('default_mimetype')

        self.__log.debug("Creating empty file [%s] under parent with ID "
                         "[%s]." % (filename, parent_clause[3]))

        try:
            entry = drive_proxy('create_file', filename=filename, 
                                data_filepath='/dev/null', 
                                parents=[parent_clause[3]], 
                                mime_type=mime_type,
                                is_hidden=is_hidden)
        except:
            self.__log.exception("Could not create empty file [%s] under "
                                 "parent with ID [%s]." % (filename, 
                                                           parent_clause[3]))
            raise FuseOSError(EIO)

        self.__log.debug("Registering created file in cache.")

        path_relations = PathRelations.get_instance()

        try:
            path_relations.register_entry(entry)
        except:
            self.__log.exception("Could not register created file in cache.")
            raise FuseOSError(EIO)

        self.__log.info("Inner-create of [%s] completed." % 
                        (distilled_filepath))

        return (entry, path, filename, mime_type)
Exemplo n.º 48
0
import logging
import json

from os import makedirs
from os.path import isdir

from gdrivefs.gdtool.drive import drive_proxy
from gdrivefs.gdtool.normal_entry import NormalEntry
from gdrivefs.conf import Conf

temp_path = ("%s/displaced" % (Conf.get('file_download_temp_path')))
if isdir(temp_path) is False:
    makedirs(temp_path)


class DisplacedFile(object):
    __log = None
    normalized_entry = None
    file_size = 1000

    def __init__(self, normalized_entry):
        self.__log = logging.getLogger().getChild('DisFile')

        if normalized_entry.__class__ != NormalEntry:
            raise Exception("_DisplacedFile can not wrap a non-NormalEntry "
                            "object.")

        self.__normalized_entry = normalized_entry

    def deposit_file(self, mime_type):
        """Write the file to a temporary path, and present a stub (JSON) to the 
Exemplo n.º 49
0
#!/usr/bin/env python2.7

import sys
sys.path.insert(0, '..')

import datetime
import time
import dateutil.tz

from gdrivefs.conf import Conf
Conf.set('auth_cache_filepath', '/var/cache/creds/gdfs')

import gdrivefs.gdfs.gdfuse
import gdrivefs.gdtool.drive
import gdrivefs.time_support

auth = gdrivefs.gdtool.drive.GdriveAuth()
client = auth.get_client()

def get_phrase(epoch):
    dt = datetime.datetime.utcfromtimestamp(entry.modified_date_epoch)
    return datetime.datetime.strftime(dt, gdrivefs.time_support.DTF_DATETIMET)

print("Before:\n")

(entry, path, filename) = gdrivefs.gdfs.gdfuse.get_entry_or_raise(
                            '/20140426-171136')

print(entry.modified_date)
print(entry.modified_date.utctimetuple())
print(entry.modified_date_epoch)
Exemplo n.º 50
0
    def create_directory(self, filename, parents, **kwargs):

        mimetype_directory = Conf.get('directory_mimetype')
        return self.__insert_entry(filename, mimetype_directory, parents,
                                   **kwargs)
Exemplo n.º 51
0
 def get_max_cache_age_seconds(self):
     return Conf.get('cache_entries_max_age')
Exemplo n.º 52
0
def set_auth_cache_filepath(auth_storage_filepath):
    Conf.set('auth_cache_filepath', auth_storage_filepath)
Exemplo n.º 53
0
    def download_to_local(self,
                          output_file_path,
                          normalized_entry,
                          mime_type,
                          allow_cache=True):
        """Download the given file. If we've cached a previous download and the 
        mtime hasn't changed, re-use. The third item returned reflects whether 
        the data has changed since any prior attempts.
        """

        self.__log.info("Downloading entry with ID [%s] and mime-type [%s]." %
                        (normalized_entry.id, mime_type))

        if mime_type != normalized_entry.mime_type and \
                mime_type not in normalized_entry.download_links:
            message = ("Entry with ID [%s] can not be exported to type [%s]. "
                       "The available types are: %s" %
                       (normalized_entry.id, mime_type, ', '.join(
                           normalized_entry.download_links.keys())))

            self.__log.warning(message)
            raise ExportFormatError(message)

        temp_path = Conf.get('file_download_temp_path')

        if not isdir(temp_path):
            try:
                makedirs(temp_path)
            except:
                self.__log.exception("Could not create temporary download "
                                     "path [%s]." % (temp_path))
                raise

        gd_mtime_epoch = mktime(normalized_entry.modified_date.timetuple())

        self.__log.info("File will be downloaded to [%s]." %
                        (output_file_path))

        use_cache = False
        if allow_cache and isfile(output_file_path):
            # Determine if a local copy already exists that we can use.
            try:
                stat_info = stat(output_file_path)
            except:
                self.__log.exception("Could not retrieve stat() information "
                                     "for temp download file [%s]." %
                                     (output_file_path))
                raise

            if gd_mtime_epoch == stat_info.st_mtime:
                use_cache = True

        if use_cache:
            # Use the cache. It's fine.

            self.__log.info("File retrieved from the previously downloaded, "
                            "still-current file.")
            return (stat_info.st_size, False)

        # Go and get the file.

        try:
            # TODO(dustin): This might establish a new connection. Not cool.
            authed_http = self.get_authed_http()
        except:
            self.__log.exception("Could not get authed Http instance for "
                                 "download.")
            raise

        url = normalized_entry.download_links[mime_type]

        self.__log.debug("Downloading file from [%s]." % (url))

        try:
            # TODO(dustin): Right now, we're downloading the complete body of data into memory, and then saving.
            data_tuple = authed_http.request(url)
        except:
            self.__log.exception("Could not download entry with ID [%s], type "
                                 "[%s], and URL [%s]." %
                                 (normalized_entry.id, mime_type, url))
            raise

        (response_headers, data) = data_tuple

        # Throw a log-item if we see any "Range" response-headers. If GD ever
        # starts supporting "Range" headers, we'll be able to write smarter
        # download mechanics (resume, etc..).

        r = re.compile('Range')
        range_found = [("%s: %s" % (k, v))
                       for k, v in response_headers.iteritems() if r.match(k)]
        if range_found:
            self.__log.info("GD has returned Range-related headers: %s" %
                            (", ".join(found)))

        self.__log.info("Downloaded file is (%d) bytes. Writing to [%s]." %
                        (len(data), output_file_path))

        try:
            with open(output_file_path, 'wb') as f:
                f.write(data)
        except:
            self.__log.exception("Could not cached downloaded file. Skipped.")

        else:
            self.__log.info("File written to cache successfully.")

        try:
            utime(output_file_path, (time(), gd_mtime_epoch))
        except:
            self.__log.exception("Could not set time on [%s]." %
                                 (output_file_path))
            raise

        return (len(data), True)
Exemplo n.º 54
0
    def download_to_local(self, normalized_entry, mime_type, force_output_filename=None, allow_cache=True):
        """Download the given file. If we've cached a previous download and the 
        mtime hasn't changed, re-use.
        """

        self.__log.info("Downloading entry with ID [%s] and mime-type [%s]." % 
                     (normalized_entry.id, mime_type))

        if mime_type != normalized_entry.mime_type and \
                mime_type not in normalized_entry.download_links:
            message = ("Entry with ID [%s] can not be exported to type [%s]. The available types are: %s" % 
                       (normalized_entry.id, mime_type, ', '.join(normalized_entry.download_links.keys())))

            self.__log.warning(message)
            raise ExportFormatError(message)

        temp_path = Conf.get('file_download_temp_path')

        if not os.path.isdir(temp_path):
            try:
                os.makedirs(temp_path)
            except:
                self.__log.exception("Could not create temporary download path "
                                  "[%s]." % (temp_path))
                raise

        # Produce a file-path of a temporary file that we can store the data 
        # to. More often than not, we'll be called when the OS wants to read 
        # the file, and we'll need the data at hand in order to page through 
        # it.

        if force_output_filename:
            temp_filename = force_output_filename
        else:
            temp_filename = ("%s.%s" % (normalized_entry.id, mime_type)). \
                                encode('ascii')
            temp_filename = re.sub('[^0-9a-zA-Z_\.]+', '', temp_filename)
            temp_filepath = ("%s/%s" % (temp_path, temp_filename))

        gd_mtime_epoch = mktime(normalized_entry.modified_date.timetuple())

        self.__log.info("File will be downloaded to [%s]." % (temp_filepath))

        use_cache = False
        if allow_cache and os.path.isfile(temp_filepath):
            # Determine if a local copy already exists that we can use.
            try:
                stat = os.stat(temp_filepath)
            except:
                self.__log.exception("Could not retrieve stat() information for "
                                  "temp download file [%s]." % (temp_filepath))
                raise

            if gd_mtime_epoch == stat.st_mtime:
                use_cache = True

        if use_cache:
            # Use the cache. It's fine.

            self.__log.info("File retrieved from the previously downloaded, still-current file.")
            return (temp_filepath, stat.st_size)

        # Go and get the file.

        try:
            authed_http = self.get_authed_http()
        except:
            self.__log.exception("Could not get authed Http instance for download.")
            raise

        url = normalized_entry.download_links[mime_type]

        self.__log.debug("Downloading file from [%s]." % (url))

        try:
            data_tuple = authed_http.request(url)
        except:
            self.__log.exception("Could not download entry with ID [%s], type "
                              "[%s], and URL [%s]." % (normalized_entry.id, mime_type, url))
            raise

        (response_headers, data) = data_tuple

        # Throw a log-item if we see any "Range" response-headers. If GD ever
        # starts supporting "Range" headers, we'll be able to write smarter 
        # download mechanics (resume, etc..).

        r = re.compile('Range')
        range_found = [("%s: %s" % (k, v)) for k, v in response_headers.iteritems() if r.match(k)]
        if range_found:
            logger.info("GD has returned Range-related headers: %s" % (", ".join(found)))

        self.__log.info("Downloaded file is (%d) bytes. Writing to [%s]." % (len(data), temp_filepath))

        try:
            with open(temp_filepath, 'wb') as f:
                f.write(data)
        except:
            self.__log.exception("Could not cached downloaded file. Skipped.")

        else:
            self.__log.info("File written to cache successfully.")

        try:
            os.utime(temp_filepath, (time(), gd_mtime_epoch))
        except:
            self.__log.exception("Could not set time on [%s]." % (temp_filepath))
            raise

        return (temp_filepath, len(data))
Exemplo n.º 55
0
class NormalEntry(object):
    __default_general_mime_type = Conf.get('default_mimetype')
    __directory_mimetype = Conf.get('directory_mimetype')

    __properties_extra = [
        'is_directory',
        'is_visible',
        'parents',
        'download_types',
        'modified_date',
        'modified_date_epoch',
        'mtime_byme_date',
        'mtime_byme_date_epoch',
        'atime_byme_date',
        'atime_byme_date_epoch',
    ]

    def __init__(self, gd_resource_type, raw_data):
        self.__info = {}
        self.__parents = []
        self.__raw_data = raw_data
        self.__cache_data = None
        self.__cache_mimetypes = None
        self.__cache_dict = {}

        # Return True if reading from this file should return info and deposit
        # the data elsewhere. This is predominantly determined by whether we
        # can get a file-size up-front, or we have to decide on a specific
        # mime-type in order to do so.

        try:
            requires_mimetype = u'fileSize' not in self.__raw_data and \
                                raw_data[u'mimeType'] != self.__directory_mimetype

            self.__info['requires_mimetype'] = \
                requires_mimetype

            self.__info['title'] = \
                raw_data[u'title']

            self.__info['mime_type'] = \
                raw_data[u'mimeType']

            self.__info['labels'] = \
                raw_data[u'labels']

            self.__info['id'] = \
                raw_data[u'id']

            self.__info['last_modifying_user_name'] = \
                raw_data[u'lastModifyingUserName']

            self.__info['writers_can_share'] = \
                raw_data[u'writersCanShare']

            self.__info['owner_names'] = \
                raw_data[u'ownerNames']

            self.__info['editable'] = \
                raw_data[u'editable']

            self.__info['user_permission'] = \
                raw_data[u'userPermission']
        except KeyError:
            _logger.exception(
                "Could not normalize with missing key.\nRAW:\n"
                "%s", pprint.pformat(raw_data))
            raise

        self.__info['link'] = \
            raw_data.get(u'embedLink')

        self.__info['file_size'] = \
            int(raw_data.get(u'fileSize', 0))

        self.__info['file_extension'] = \
            raw_data.get(u'fileExtension')

        self.__info['md5_checksum'] = \
            raw_data.get(u'md5Checksum')

        self.__info['image_media_metadata'] = \
            raw_data.get(u'imageMediaMetadata')

        self.__info['download_links'] = \
            raw_data.get(u'exportLinks', {})

        try:
            self.__info['download_links'][self.__info['mime_type']] = \
                raw_data[u'downloadUrl']
        except KeyError:
            pass

        self.__update_display_name()

        for parent in raw_data[u'parents']:
            self.__parents.append(parent[u'id'])

    def __getattr__(self, key):
        return self.__info[key]

    def __str__(self):
        return (
            "<NORMAL ID= [%s] MIME= [%s] NAME= [%s] URIS= (%d)>" %
            (self.id, self.mime_type, self.title, len(self.download_links)))

    def __repr__(self):
        return str(self)

    def __update_display_name(self):
        # This is encoded for displaying locally.
        self.__info['title_fs'] = utility.translate_filename_charset(
            self.__info['title'])

    def temp_rename(self, new_filename):
        """Set the name to something else, here, while we, most likely, wait 
        for the change at the server to propogate.
        """

        self.__info['title'] = new_filename
        self.__update_display_name()

    def normalize_download_mimetype(self, specific_mimetype=None):
        """If a mimetype is given, return it if there is a download-URL 
        available for it, or fail. Else, determine if a copy can downloaded 
        with the default mime-type (application/octet-stream, or something 
        similar), or return the only mime-type in the event that there's only 
        one download format.
        """

        if self.__cache_mimetypes is None:
            self.__cache_mimetypes = [[], None]

        if specific_mimetype is not None:
            if specific_mimetype not in self.__cache_mimetypes[0]:
                _logger.debug(
                    "Normalizing mime-type [%s] for download.  "
                    "Options: %s", specific_mimetype, self.download_types)

                if specific_mimetype not in self.download_links:
                    raise ExportFormatError(
                        "Mime-type [%s] is not available for "
                        "download. Options: %s" % (self.download_types))

                self.__cache_mimetypes[0].append(specific_mimetype)

            return specific_mimetype

        if self.__cache_mimetypes[1] is None:
            # Try to derive a mimetype from the filename, and see if it matches
            # against available export types.
            (mimetype_candidate, _) = guess_type(self.title_fs, True)
            if mimetype_candidate is not None and \
               mimetype_candidate in self.download_links:
                mime_type = mimetype_candidate

            elif NormalEntry.__default_general_mime_type in self.download_links:
                mime_type = NormalEntry.__default_general_mime_type

            # If there's only one download link, resort to using it (perhaps it was
            # an uploaded file, assigned only one type).
            elif len(self.download_links) == 1:
                mime_type = self.download_links.keys()[0]

            else:
                raise ExportFormatError("A correct mime-type needs to be "
                                        "specified. Options: %s" %
                                        (self.download_types))

            self.__cache_mimetypes[1] = mime_type

        return self.__cache_mimetypes[1]

    def __convert(self, data):
        if isinstance(data, dict):
            list_ = [("K(%s)=V(%s)" % (self.__convert(key),
                                  self.__convert(value))) \
                     for key, value \
                     in data.iteritems()]

            final = '; '.join(list_)
            return final
        elif isinstance(data, list):
            final = ', '.join([('LI(%s)' % (self.__convert(element))) \
                               for element \
                               in data])
            return final
        elif isinstance(data, unicode):
            return utility.translate_filename_charset(data)
        elif isinstance(data, Number):
            return str(data)
        elif isinstance(data, datetime):
            return get_flat_normal_fs_time_from_dt(data)
        else:
            return data

    def get_data(self):
        original = dict([(key.encode('ASCII'), value)
                         for key, value in self.__raw_data.iteritems()])

        distilled = self.__info

        extra = dict([(key, getattr(self, key))
                      for key in self.__properties_extra])

        data_dict = {
            'original': original,
            #'distilled': distilled,
            'extra': extra
        }

        return data_dict

    @property
    def xattr_data(self):
        if self.__cache_data is None:
            data_dict = self.get_data()

            attrs = {}
            for a_type, a_dict in data_dict.iteritems():
                #                self.__log.debug("Setting [%s]." % (a_type))
                for key, value in a_dict.iteritems():
                    fqkey = ('user.%s.%s' % (a_type, key))
                    attrs[fqkey] = self.__convert(value)

            self.__cache_data = attrs

        return self.__cache_data

    @property
    def is_directory(self):
        """Return True if we represent a directory."""
        return (self.__info['mime_type'] == self.__directory_mimetype)

    @property
    def is_visible(self):
        if [
                flag for flag, value in self.labels.items()
                if flag in Conf.get('hidden_flags_list_local') and value
        ]:
            return False
        else:
            return True

    @property
    def parents(self):
        return self.__parents

    @property
    def download_types(self):
        return self.download_links.keys()

    @property
    def modified_date(self):
        if 'modified_date' not in self.__cache_dict:
            self.__cache_dict['modified_date'] = \
                dateutil.parser.parse(self.__raw_data[u'modifiedDate'])

        return self.__cache_dict['modified_date']

    @property
    def modified_date_epoch(self):
        # mktime() only works in terms of the local timezone, so compensate
        # (this works with DST, too).
        return mktime(self.modified_date.timetuple()) - time.timezone

    @property
    def mtime_byme_date(self):
        if 'modified_byme_date' not in self.__cache_dict:
            self.__cache_dict['modified_byme_date'] = \
                dateutil.parser.parse(self.__raw_data[u'modifiedByMeDate'])

        return self.__cache_dict['modified_byme_date']

    @property
    def mtime_byme_date_epoch(self):
        return mktime(self.mtime_byme_date.timetuple()) - time.timezone

    @property
    def atime_byme_date(self):
        if 'viewed_byme_date' not in self.__cache_dict:
            self.__cache_dict['viewed_byme_date'] = \
                dateutil.parser.parse(self.__raw_data[u'lastViewedByMeDate']) \
                if u'lastViewedByMeDate' in self.__raw_data \
                else None

        return self.__cache_dict['viewed_byme_date']

    @property
    def atime_byme_date_epoch(self):
        return mktime(self.atime_byme_date.timetuple()) - time.timezone \
                if self.atime_byme_date \
                else None
Exemplo n.º 56
0
 def get_max_cache_age_seconds(self):
     return Conf.get('cache_entries_max_age')
Exemplo n.º 57
0
    def __load_base_from_remote(self):
        """Download the data for the entry that we represent. This is probably 
        a file, but could also be a stub for -any- entry.
        """

        try:
            entry = self.__get_entry_or_raise()
        except:
            self.__log.exception("Could not get entry with ID [%s] for "
                                 "write-flush." % (self.__entry_id))
            raise

        self.__log.debug("Ensuring local availability of [%s]." % (entry))

        temp_file_path = get_temp_filepath(entry, self.mime_type)

        self.__log.debug("__load_base_from_remote about to download.")

        with self.__class__.__download_lock:
            # Get the current version of the write-cache file, or note that we 
            # don't have it.

            self.__log.info("Attempting local cache update of file [%s] for "
                            "entry [%s] and mime-type [%s]." % 
                            (temp_file_path, entry, self.mime_type))

            if entry.requires_mimetype:
                length = DisplacedFile.file_size

                try:
                    d = DisplacedFile(entry)
                    stub_data = d.deposit_file(self.mime_type)

                    with file(temp_file_path, 'w') as f:
                        f.write(stub_data)
                except:
                    self.__log.exception("Could not deposit to file [%s] from "
                                         "entry [%s]." % (temp_file_path, 
                                                          entry))
                    raise

# TODO: Accommodate the cache for displaced-files.
                cache_fault = True

            else:
                self.__log.info("Executing the download.")
                
                try:
# TODO(dustin): We're not inheriting an existing file (same mtime, same size).
                    result = drive_proxy('download_to_local', 
                                         output_file_path=temp_file_path,
                                         normalized_entry=entry,
                                         mime_type=self.mime_type)

                    (length, cache_fault) = result
                except ExportFormatError:
                    self.__log.exception("There was an export-format error.")
                    raise FuseOSError(ENOENT)
                except:
                    self.__log.exception("Could not localize file with entry "
                                         "[%s]." % (entry))
                    raise

            self.__log.info("Download complete.  cache_fault= [%s] "
                            "__is_loaded= [%s]" % 
                            (cache_fault, self.__is_loaded))

            # We've either not loaded it, yet, or it has changed.
            if cache_fault or not self.__is_loaded:
                with self.__class__.__update_lock:
                    self.__log.info("Checking queued items for fault.")

                    if cache_fault:
                        if self.__is_dirty:
                            self.__log.error("Entry [%s] has been changed. "
                                             "Forcing buffer updates, and "
                                             "clearing uncommitted updates." % 
                                             (entry))
                        else:
                            self.__log.debug("Entry [%s] has changed. "
                                             "Updating buffers." % (entry))

                    self.__log.debug("Loading buffers.")

                    with open(temp_file_path, 'rb') as f:
                        # Read the locally cached file in.

                        try:
# TODO(dustin): Our accounting is broken when it comes to loading and/or update-tracking. If we have a guarantee thawrites only appear in sequence and in increasing order, we can dump BufferSegments.

# TODO(dustin): This is the source of:
# 1) An enormous slowdown where we first have to write the data, and then have to read it back.
# 2) An enormous resource burden.
                            data = f.read()

                            read_blocksize = Conf.get('default_buffer_read_blocksize')
                            self.__buffer = BufferSegments(data, read_blocksize)
                        except:
                            self.__log.exception("Could not read current cached "
                                                 "file into buffer.")
                            raise

                        self.__is_dirty = False

                    self.__is_loaded = True

        self.__log.debug("__load_base_from_remote complete.")
        return cache_fault