Пример #1
0
 def _exception_occurred(self, server, e, action='talking',
                         sock=None, fp=None):
     if isinstance(e, socket.timeout):
         logging.error(_("Timeout %(action)s to memcached: %(server)s"),
                       {'action': action, 'server': server})
     else:
         logging.exception(_("Error %(action)s to memcached: %(server)s"),
                           {'action': action, 'server': server})
     try:
         if fp:
             fp.close()
             del fp
     except Exception:
         pass
     try:
         if sock:
             sock.close()
             del sock
     except Exception:
         pass
     now = time.time()
     self._errors[server].append(time.time())
     if len(self._errors[server]) > ERROR_LIMIT_COUNT:
         self._errors[server] = [err for err in self._errors[server]
                                 if err > now - ERROR_LIMIT_TIME]
         if len(self._errors[server]) > ERROR_LIMIT_COUNT:
             self._error_limited[server] = now + ERROR_LIMIT_DURATION
             logging.error(_('Error limiting server %s'), server)
Пример #2
0
 def format_source_code(self, nfl):
     nfls = re.split('[:()]', nfl)
     file_path = nfls[0]
     try:
         lineno = int(nfls[1])
     except (TypeError, ValueError, IndexError):
         lineno = 0
     # for security reason, this need to be fixed.
     if not file_path.endswith('.py'):
         return _('The file type are forbidden to access!')
     try:
         data = []
         i = 0
         with open(file_path) as f:
             lines = f.readlines()
             max_width = str(len(str(len(lines))))
             fmt = '<span id="L%d" rel="#L%d">%' + max_width\
                 + 'd|<code>%s</code></span>'
             for line in lines:
                 l = cgi.escape(line, quote=None)
                 i = i + 1
                 if i == lineno:
                     fmt2 = '<span id="L%d" style="background-color: \
                         rgb(127,255,127)">%' + max_width +\
                         'd|<code>%s</code></span>'
                     data.append(fmt2 % (i, i, l))
                 else:
                     data.append(fmt % (i, i, i, l))
         data = ''.join(data)
     except Exception:
         return _('Can not access the file %s.') % file_path
     return '<pre>%s</pre>' % data
Пример #3
0
def http_connect(ipaddr, port, device, partition, method, path,
                 headers=None, query_string=None, ssl=False):
    """
    Helper function to create an HTTPConnection object. If ssl is set True,
    HTTPSConnection will be used. However, if ssl=False, BufferedHTTPConnection
    will be used, which is buffered for backend Swift services.

    :param ipaddr: IPv4 address to connect to
    :param port: port to connect to
    :param device: device of the node to query
    :param partition: partition on the device
    :param method: HTTP method to request ('GET', 'PUT', 'POST', etc.)
    :param path: request path
    :param headers: dictionary of headers
    :param query_string: request query string
    :param ssl: set True if SSL should be used (default: False)
    :returns: HTTPConnection object
    """
    if isinstance(path, six.text_type):
        try:
            path = path.encode("utf-8")
        except UnicodeError as e:
            logging.exception(_('Error encoding to UTF-8: %s'), str(e))
    if isinstance(device, six.text_type):
        try:
            device = device.encode("utf-8")
        except UnicodeError as e:
            logging.exception(_('Error encoding to UTF-8: %s'), str(e))
    path = quote('/' + device + '/' + str(partition) + path)
    return http_connect_raw(
        ipaddr, port, method, path, headers, query_string, ssl)
Пример #4
0
    def status(self, pids=None, **kwargs):
        """Display status of server

        :param: pids, if not supplied pids will be populated automatically
        :param: number, if supplied will only lookup the nth server

        :returns: 1 if server is not running, 0 otherwise
        """
        if pids is None:
            pids = self.get_running_pids(**kwargs)
        if not pids:
            number = kwargs.get('number', 0)
            if number:
                kwargs['quiet'] = True
                conf_files = self.conf_files(**kwargs)
                if conf_files:
                    print(_("%s #%d not running (%s)") % (self.server, number,
                                                          conf_files[0]))
            else:
                print(_("No %s running") % self.server)
            return 1
        for pid, pid_file in pids.items():
            conf_file = self.get_conf_file_name(pid_file)
            print(_("%s running (%s - %s)") % (self.server, pid, conf_file))
        return 0
Пример #5
0
    def stop(self, **kwargs):
        """stops a server
        """
        server_pids = {}
        for server in self.servers:
            signaled_pids = server.stop(**kwargs)
            if not signaled_pids:
                print(_('No %s running') % server)
            else:
                server_pids[server] = signaled_pids

        # all signaled_pids, i.e. list(itertools.chain(*server_pids.values()))
        signaled_pids = [p for server, pids in server_pids.items()
                         for p in pids]
        # keep track of the pids yeiled back as killed for all servers
        killed_pids = set()
        kill_wait = kwargs.get('kill_wait', KILL_WAIT)
        for server, killed_pid in watch_server_pids(server_pids,
                                                    interval=kill_wait,
                                                    **kwargs):
            print(_("%s (%s) appears to have stopped") % (server, killed_pid))
            killed_pids.add(killed_pid)
            if not killed_pids.symmetric_difference(signaled_pids):
                # all processes have been stopped
                return 0

        # reached interval n watch_pids w/o killing all servers
        for server, pids in server_pids.items():
            if not killed_pids.issuperset(pids):
                # some pids of this server were not killed
                print(_('Waited %s seconds for %s to die; giving up') % (
                    kill_wait, server))
        return 1
Пример #6
0
    def conf_files(self, **kwargs):
        """Get conf files for this server

        :param: number, if supplied will only lookup the nth server

        :returns: list of conf files
        """
        if self.server in STANDALONE_SERVERS:
            found_conf_files = search_tree(SWIFT_DIR, self.server + '*',
                                           '.conf', dir_ext='.conf.d')
        else:
            found_conf_files = search_tree(SWIFT_DIR, '%s-server*' % self.type,
                                           '.conf', dir_ext='.conf.d')
        number = kwargs.get('number')
        if number:
            try:
                conf_files = [found_conf_files[number - 1]]
            except IndexError:
                conf_files = []
        else:
            conf_files = found_conf_files
        if not conf_files:
            # maybe there's a config file(s) out there, but I couldn't find it!
            if not kwargs.get('quiet'):
                print _('Unable to locate config %sfor %s') % (
                    ('number %s ' % number if number else ''), self.server)
            if kwargs.get('verbose') and not kwargs.get('quiet'):
                if found_conf_files:
                    print _('Found configs:')
                for i, conf_file in enumerate(found_conf_files):
                    print '  %d) %s' % (i + 1, conf_file)

        return conf_files
Пример #7
0
def setup_env():
    """Try to increase resource limits of the OS. Move PYTHON_EGG_CACHE to /tmp
    """
    try:
        resource.setrlimit(resource.RLIMIT_NOFILE,
                           (MAX_DESCRIPTORS, MAX_DESCRIPTORS))
    except ValueError:
        print(_("WARNING: Unable to modify file descriptor limit.  "
                "Running as non-root?"))

    try:
        resource.setrlimit(resource.RLIMIT_DATA,
                           (MAX_MEMORY, MAX_MEMORY))
    except ValueError:
        print(_("WARNING: Unable to modify memory limit.  "
                "Running as non-root?"))

    try:
        resource.setrlimit(resource.RLIMIT_NPROC,
                           (MAX_PROCS, MAX_PROCS))
    except ValueError:
        print(_("WARNING: Unable to modify max process limit.  "
                "Running as non-root?"))

    # Set PYTHON_EGG_CACHE if it isn't already set
    os.environ.setdefault('PYTHON_EGG_CACHE', '/tmp')
Пример #8
0
    def run_once(self, *args, **kwargs):
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        override_policies = list_from_csv(kwargs.get('policies'))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies)
        total = (time.time() - self.stats['start']) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            replication_last = time.time()
            dump_recon_cache({'replication_stats': self.stats,
                              'replication_time': total,
                              'replication_last': replication_last,
                              'object_replication_time': total,
                              'object_replication_last': replication_last},
                             self.rcache, self.logger)
Пример #9
0
    def object_update(self, node, part, op, obj, headers_out):
        """
        Perform the object update to the container

        :param node: node dictionary from the container ring
        :param part: partition that holds the container
        :param op: operation performed (ex: 'PUT' or 'DELETE')
        :param obj: object name being updated
        :param headers_out: headers to send with the update
        """
        try:
            with ConnectionTimeout(self.conn_timeout):
                conn = http_connect(node['ip'], node['port'], node['device'],
                                    part, op, obj, headers_out)
            with Timeout(self.node_timeout):
                resp = conn.getresponse()
                resp.read()
                success = is_success(resp.status)
                if not success:
                    self.logger.debug(
                        _('Error code %(status)d is returned from remote '
                          'server %(ip)s: %(port)s / %(device)s'),
                        {'status': resp.status, 'ip': node['ip'],
                         'port': node['port'], 'device': node['device']})
                return (success, node['id'])
        except (Exception, Timeout):
            self.logger.exception(_('ERROR with remote server '
                                    '%(ip)s:%(port)s/%(device)s'), node)
        return HTTP_INTERNAL_SERVER_ERROR, node['id']
Пример #10
0
 def run_forever(self, *args, **kwargs):
     """
     Run the updater continuously.
     """
     time.sleep(random() * self.interval)
     while True:
         self.logger.info(_('Begin container update sweep'))
         begin = time.time()
         now = time.time()
         expired_suppressions = \
             [a for a, u in self.account_suppressions.items()
              if u < now]
         for account in expired_suppressions:
             del self.account_suppressions[account]
         pid2filename = {}
         # read from account ring to ensure it's fresh
         self.get_account_ring().get_nodes('')
         for path in self.get_paths():
             while len(pid2filename) >= self.concurrency:
                 pid = os.wait()[0]
                 try:
                     self._load_suppressions(pid2filename[pid])
                 finally:
                     del pid2filename[pid]
             fd, tmpfilename = mkstemp()
             os.close(fd)
             pid = os.fork()
             if pid:
                 pid2filename[pid] = tmpfilename
             else:
                 signal.signal(signal.SIGTERM, signal.SIG_DFL)
                 eventlet_monkey_patch()
                 self.no_changes = 0
                 self.successes = 0
                 self.failures = 0
                 self.new_account_suppressions = open(tmpfilename, 'w')
                 forkbegin = time.time()
                 self.container_sweep(path)
                 elapsed = time.time() - forkbegin
                 self.logger.debug(
                     _('Container update sweep of %(path)s completed: '
                       '%(elapsed).02fs, %(success)s successes, %(fail)s '
                       'failures, %(no_change)s with no changes'),
                     {'path': path, 'elapsed': elapsed,
                      'success': self.successes, 'fail': self.failures,
                      'no_change': self.no_changes})
                 sys.exit()
         while pid2filename:
             pid = os.wait()[0]
             try:
                 self._load_suppressions(pid2filename[pid])
             finally:
                 del pid2filename[pid]
         elapsed = time.time() - begin
         self.logger.info(_('Container update sweep completed: %.02fs'),
                          elapsed)
         dump_recon_cache({'container_updater_sweep': elapsed},
                          self.rcache, self.logger)
         if elapsed < self.interval:
             time.sleep(self.interval - elapsed)
Пример #11
0
    def run_once(self, *args, **kwargs):
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get("devices"))
        override_partitions = list_from_csv(kwargs.get("partitions"))
        override_policies = list_from_csv(kwargs.get("policies"))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies,
        )
        total = (time.time() - self.stats["start"]) / 60
        self.logger.info(_("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            replication_last = time.time()
            dump_recon_cache(
                {
                    "replication_stats": self.stats,
                    "replication_time": total,
                    "replication_last": replication_last,
                    "object_replication_time": total,
                    "object_replication_last": replication_last,
                },
                self.rcache,
                self.logger,
            )
Пример #12
0
    def _get_response(self, node, part, path, headers, policy):
        """
        Helper method for reconstruction that GETs a single EC fragment
        archive

        :param node: the node to GET from
        :param part: the partition
        :param path: full path of the desired EC archive
        :param headers: the headers to send
        :param policy: an instance of
                       :class:`~swift.common.storage_policy.BaseStoragePolicy`
        :returns: response
        """
        resp = None
        try:
            with ConnectionTimeout(self.conn_timeout):
                conn = http_connect(node['ip'], node['port'], node['device'],
                                    part, 'GET', path, headers=headers)
            with Timeout(self.node_timeout):
                resp = conn.getresponse()
            if resp.status not in [HTTP_OK, HTTP_NOT_FOUND]:
                self.logger.warning(
                    _("Invalid response %(resp)s from %(full_path)s"),
                    {'resp': resp.status,
                     'full_path': self._full_path(node, part, path, policy)})
                resp = None
            elif resp.status == HTTP_NOT_FOUND:
                resp = None
        except (Exception, Timeout):
            self.logger.exception(
                _("Trying to GET %(full_path)s"), {
                    'full_path': self._full_path(node, part, path, policy)})
        return resp
Пример #13
0
    def account_audit(self, path):
        """
        Audits the given account path

        :param path: the path to an account db
        """
        start_time = time.time()
        try:
            broker = AccountBroker(path)
            if not broker.is_deleted():
                self.validate_per_policy_counts(broker)
                self.logger.increment('passes')
                self.account_passes += 1
                self.logger.debug(_('Audit passed for %s'), broker)
        except InvalidAccountInfo as e:
            self.logger.increment('failures')
            self.account_failures += 1
            self.logger.error(
                _('Audit Failed for %(path)s: %(err)s'),
                {'path': path, 'err': str(e)})
        except (Exception, Timeout):
            self.logger.increment('failures')
            self.account_failures += 1
            self.logger.exception(_('ERROR Could not get account info %s'),
                                  path)
        self.logger.timing_since('timing', start_time)
Пример #14
0
    def _make_app_iter(self, node, source):
        """
        Returns an iterator over the contents of the source (via its read
        func).  There is also quite a bit of cleanup to ensure garbage
        collection works and the underlying socket of the source is closed.

        :param source: The httplib.Response object this iterator should read
                       from.
        :param node: The node the source is reading from, for logging purposes.
        """
        try:
            # Spawn reader to read from the source and place in the queue.
            # We then drop any reference to the source or node, for garbage
            # collection purposes.
            queue = Queue(1)
            spawn_n(self._make_app_iter_reader, node, source, queue,
                    self.app.logger.thread_locals)
            source = node = None
            while True:
                chunk = queue.get(timeout=self.app.node_timeout)
                if isinstance(chunk, bool):  # terminator
                    success = chunk
                    if not success:
                        raise Exception(_('Failed to read all data'
                                          ' from the source'))
                    break
                yield chunk
        except Empty:
            raise ChunkReadTimeout()
        except (GeneratorExit, Timeout):
            self.app.logger.warn(_('Client disconnected on read'))
        except Exception:
            self.app.logger.exception(_('Trying to send to client'))
            raise
Пример #15
0
 def _connect_put_node(self, nodes, part, path, headers,
                       logger_thread_locals):
     """Method for a file PUT connect"""
     self.app.logger.thread_locals = logger_thread_locals
     for node in nodes:
         try:
             start_time = time.time()
             with ConnectionTimeout(self.app.conn_timeout):
                 conn = http_connect(
                     node['ip'], node['port'], node['device'], part, 'PUT',
                     path, headers)
             self.app.set_node_timing(node, time.time() - start_time)
             with Timeout(self.app.node_timeout):
                 resp = conn.getexpect()
             if resp.status == HTTP_CONTINUE:
                 conn.resp = None
                 conn.node = node
                 return conn
             elif is_success(resp.status):
                 conn.resp = resp
                 conn.node = node
                 return conn
             elif headers['If-None-Match'] is not None and \
                     resp.status == HTTP_PRECONDITION_FAILED:
                 conn.resp = resp
                 conn.node = node
                 return conn
             elif resp.status == HTTP_INSUFFICIENT_STORAGE:
                 self.app.error_limit(node, _('ERROR Insufficient Storage'))
         except (Exception, Timeout):
             self.app.exception_occurred(
                 node, _('Object'),
                 _('Expect: 100-continue on %s') % path)
Пример #16
0
 def run_once(self, *args, **kwargs):
     """Run a replication pass once."""
     self._zero_stats()
     dirs = []
     ips = whataremyips()
     if not ips:
         self.logger.error(_('ERROR Failed to get my own IPs?'))
         return
     self._local_device_ids = set()
     for node in self.ring.devs:
         if node and is_local_device(ips, self.port,
                                     node['replication_ip'],
                                     node['replication_port']):
             if self.mount_check and not ismount(
                     os.path.join(self.root, node['device'])):
                 self.logger.warn(
                     _('Skipping %(device)s as it is not mounted') % node)
                 continue
             unlink_older_than(
                 os.path.join(self.root, node['device'], 'tmp'),
                 time.time() - self.reclaim_age)
             datadir = os.path.join(self.root, node['device'], self.datadir)
             if os.path.isdir(datadir):
                 self._local_device_ids.add(node['id'])
                 dirs.append((datadir, node['id']))
     self.logger.info(_('Beginning replication run'))
     for part, object_file, node_id in roundrobin_datadirs(dirs):
         self.cpool.spawn_n(
             self._replicate_object, part, object_file, node_id)
     self.cpool.waitall()
     self.logger.info(_('Replication run OVER'))
     self._report_stats()
Пример #17
0
    def modify_wsgi_pipeline(self, pipe):
        """
        Called during WSGI pipeline creation. Modifies the WSGI pipeline
        context to ensure that mandatory middleware is present in the pipeline.

        :param pipe: A PipelineWrapper object
        """
        pipeline_was_modified = False
        for filter_spec in reversed(required_filters):
            filter_name = filter_spec["name"]
            if filter_name not in pipe:
                afters = filter_spec.get("after_fn", lambda _junk: [])(pipe)
                insert_at = 0
                for after in afters:
                    try:
                        insert_at = max(insert_at, pipe.index(after) + 1)
                    except ValueError:  # not in pipeline; ignore it
                        pass
                self.logger.info(
                    _("Adding required filter %(filter_name)s to pipeline at " "position %(insert_at)d"),
                    {"filter_name": filter_name, "insert_at": insert_at},
                )
                ctx = pipe.create_filter(filter_name)
                pipe.insert_filter(ctx, index=insert_at)
                pipeline_was_modified = True

        if pipeline_was_modified:
            self.logger.info(_("Pipeline was modified. " 'New pipeline is "%s".'), pipe)
        else:
            self.logger.debug(_('Pipeline is "%s"'), pipe)
Пример #18
0
    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        override_policies = list_from_csv(kwargs.get('policies'))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None
######################################  CHANGED_CODE  ########################################################

        override_devices = ['sda4']
######################################  CHANGED_CODE  ########################################################

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies)
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            dump_recon_cache({'object_replication_time': total,
                              'object_replication_last': time.time()},
                             self.rcache, self.logger)
Пример #19
0
    def get_keys(self, env, required=None):
        # Get the key(s) from the keymaster
        required = required if required is not None else [self.server_type]
        try:
            fetch_crypto_keys = env[CRYPTO_KEY_CALLBACK]
        except KeyError:
            self.logger.exception(_('ERROR get_keys() missing callback'))
            raise HTTPInternalServerError(
                "Unable to retrieve encryption keys.")

        try:
            keys = fetch_crypto_keys()
        except Exception as err:  # noqa
            self.logger.exception(_(
                'ERROR get_keys(): from callback: %s') % err)
            raise HTTPInternalServerError(
                "Unable to retrieve encryption keys.")

        for name in required:
            try:
                key = keys[name]
                self.crypto.check_key(key)
                continue
            except KeyError:
                self.logger.exception(_("Missing key for %r") % name)
            except TypeError:
                self.logger.exception(_("Did not get a keys dict"))
            except ValueError as e:
                # don't include the key in any messages!
                self.logger.exception(_("Bad key for %(name)r: %(err)s") %
                                      {'name': name, 'err': e})
            raise HTTPInternalServerError(
                "Unable to retrieve encryption keys.")

        return keys
Пример #20
0
 def download(self, log_files, sort='time', limit=-1, nfl_filter='',
              output_format='default'):
     if len(log_files) == 0:
         raise NotFoundException(_('no log file found'))
     try:
         nfl_esc = nfl_filter.replace('(', '\(').replace(')', '\)')
         # remove the slash that is intentionally added in the URL
         # to avoid failure of filtering stats data.
         if nfl_esc.startswith('/'):
             nfl_esc = nfl_esc[1:]
         stats = Stats2(*log_files)
         stats.sort_stats(sort)
         if output_format == 'python':
             data = self.format_source_code(nfl_filter)
         elif output_format == 'json':
             data = stats.to_json(nfl_esc, limit)
         elif output_format == 'csv':
             data = stats.to_csv(nfl_esc, limit)
         elif output_format == 'ods':
             data = stats.to_ods(nfl_esc, limit)
         else:
             data = stats.print_stats()
         return data, [('content-type', self.format_dict[output_format])]
     except ODFLIBNotInstalled:
         raise
     except Exception as ex:
         raise ProfileException(_('Data download error: %s') % ex)
Пример #21
0
 def _exception_occurred(self, server, e, action='talking',
                         sock=None, fp=None, got_connection=True):
     if isinstance(e, Timeout):
         logging.error(_("Timeout %(action)s to memcached: %(server)s"),
                       {'action': action, 'server': server})
     else:
         logging.exception(_("Error %(action)s to memcached: %(server)s"),
                           {'action': action, 'server': server})
     try:
         if fp:
             fp.close()
             del fp
     except Exception:
         pass
     try:
         if sock:
             sock.close()
             del sock
     except Exception:
         pass
     if got_connection:
         # We need to return something to the pool
         # A new connection will be created the next time it is retrieved
         self._return_conn(server, None, None)
     now = time.time()
     self._errors[server].append(time.time())
     if len(self._errors[server]) > ERROR_LIMIT_COUNT:
         self._errors[server] = [err for err in self._errors[server]
                                 if err > now - ERROR_LIMIT_TIME]
         if len(self._errors[server]) > ERROR_LIMIT_COUNT:
             self._error_limited[server] = now + ERROR_LIMIT_DURATION
             logging.error(_('Error limiting server %s'), server)
Пример #22
0
    def _transfer_data(self, req, data_source, conns, nodes):
        min_conns = quorum_size(len(nodes))

        bytes_transferred = 0
        try:
            with ContextPool(len(nodes)) as pool:
                for conn in conns:
                    conn.failed = False
                    conn.queue = Queue(self.app.put_queue_depth)
                    pool.spawn(self._send_file, conn, req.path)
                while True:
                    with ChunkReadTimeout(self.app.client_timeout):
                        try:
                            chunk = next(data_source)
                        except StopIteration:
                            if req.is_chunked:
                                for conn in conns:
                                    conn.queue.put('0\r\n\r\n')
                            break
                    bytes_transferred += len(chunk)
                    if bytes_transferred > constraints.MAX_FILE_SIZE:
                        raise HTTPRequestEntityTooLarge(request=req)
                    for conn in list(conns):
                        if not conn.failed:
                            conn.queue.put(
                                '%x\r\n%s\r\n' % (len(chunk), chunk)
                                if req.is_chunked else chunk)
                        else:
                            conn.close()
                            conns.remove(conn)
                    self._check_min_conn(
                        req, conns, min_conns,
                        msg='Object PUT exceptions during'
                            ' send, %(conns)s/%(nodes)s required connections')
                for conn in conns:
                    if conn.queue.unfinished_tasks:
                        conn.queue.join()
            conns = [conn for conn in conns if not conn.failed]
            self._check_min_conn(
                req, conns, min_conns,
                msg='Object PUT exceptions after last send, '
                '%(conns)s/%(nodes)s required connections')
        except ChunkReadTimeout as err:
            self.app.logger.warn(
                _('ERROR Client read timeout (%ss)'), err.seconds)
            self.app.logger.increment('client_timeouts')
            raise HTTPRequestTimeout(request=req)
        except HTTPException:
            raise
        except (Exception, Timeout):
            self.app.logger.exception(
                _('ERROR Exception causing client disconnect'))
            raise HTTPClientDisconnect(request=req)
        if req.content_length and bytes_transferred < req.content_length:
            req.client_disconnect = True
            self.app.logger.warn(
                _('Client disconnected without sending enough data'))
            self.app.logger.increment('client_disconnects')
            raise HTTPClientDisconnect(request=req)
Пример #23
0
    def object_audit(self, path, device, partition):
        """
        Audits the given object path.

        :param path: a path to an object
        :param device: the device the path is on
        :param partition: the partition the path is on
        """
        try:
            try:
                name = diskfile.read_metadata(path)['name']
            except (Exception, Timeout) as exc:
                raise AuditException('Error when reading metadata: %s' % exc)
            _junk, account, container, obj = name.split('/', 3)
            df = self.diskfile_mgr.get_diskfile(
                device, partition, account, container, obj)
            try:
                with df.open():
                    metadata = df.get_metadata()
                    obj_size = int(metadata['Content-Length'])
                    if self.stats_sizes:
                        self.record_stats(obj_size)
                    if self.zero_byte_only_at_fps and obj_size:
                        self.passes += 1
                        return
                    reader = df.reader()
                with closing(reader):
                    for chunk in reader:
                        chunk_len = len(chunk)
                        self.bytes_running_time = ratelimit_sleep(
                            self.bytes_running_time,
                            self.max_bytes_per_second,
                            incr_by=chunk_len)
                        self.bytes_processed += chunk_len
                        self.total_bytes_processed += chunk_len
                if reader.was_quarantined:
                    self.quarantines += 1
                    self.logger.error(_('ERROR Object %(obj)s failed audit and'
                                        ' was quarantined: %(err)s'),
                                      {'obj': path,
                                       'err': reader.was_quarantined})
                    return
            except DiskFileNotExist:
                return
        except DiskFileQuarantined as err:
            self.quarantines += 1
            self.logger.error(_('ERROR Object %(obj)s failed audit and was'
                                ' quarantined: %(err)s'),
                              {'obj': path, 'err': err})
        except AuditException as err:
            self.logger.increment('quarantines')
            self.quarantines += 1
            self.logger.error(_('ERROR Object %(obj)s failed audit and will'
                                ' be quarantined: %(err)s'),
                              {'obj': path, 'err': err})
            diskfile.quarantine_renamer(
                os.path.join(self.devices, device), path)
            return
        self.passes += 1
    def run_once(self, *args, **kwargs):
        processes, process = self.get_process_values(kwargs)
        pool = GreenPool(self.concurrency)
        self.report_first_time = self.report_last_time = time()
        self.report_objects = 0
        self.report_containers = 0
        containers_to_delete = []
        try:
            self.logger.debug(_('Run begin'))
            containers, objects = \
                self.swift.get_account_info(self.sample_account)
            self.logger.info(_('Pass beginning; %s possible containers; %s '
                               'possible objects') % (containers, objects))
            for c in self.swift.iter_containers(self.sample_account):
                container = c['name']
                try:
                    timestamp, account = container.split('_', 1)
                    timestamp = float(timestamp)
                except ValueError:
                    self.logger.debug('ValueError: %s, '
                                      'need more than 1 value to unpack' % \
                                      container)
                else:
                    if processes > 0:
                        obj_proc = int(hashlib.md5(container).hexdigest(), 16)
                        if obj_proc % processes != process:
                            continue
                    n = (float(time()) // self.sample_rate) * self.sample_rate
                    if timestamp <= n:
                        containers_to_delete.append(container)
                        pool.spawn_n(self.aggregate_container, container)
            pool.waitall()
            for container in containers_to_delete:
                try:
                    self.logger.debug('delete container: %s' % container)
                    self.swift.delete_container(self.sample_account, container,
                                                acceptable_statuses=(
                                                    2, HTTP_NOT_FOUND,
                                                    HTTP_CONFLICT))
                except (Exception, Timeout) as err:
                    self.logger.exception(
                        _('Exception while deleting container %s %s') %
                        (container, str(err)))

            tenants_to_fillup = list()
            for c in self.swift.iter_containers(self.aggregate_account):
                tenant_id = c['name']
                if processes > 0:
                    c_proc = int(hashlib.md5(tenant_id).hexdigest(), 16)
                    if c_proc % processes != process:
                        continue
                    tenants_to_fillup.append(tenant_id)
            # fillup lossed usage data
            self.fillup_lossed_usage_data(tenants_to_fillup)

            self.logger.debug(_('Run end'))
            self.report(final=True)
        except (Exception, Timeout):
            self.logger.exception(_('Unhandled exception'))
 def __call__(self, env, start_response):
     if not self.storage_domain:
         return self.app(env, start_response)
     if "HTTP_HOST" in env:
         given_domain = env["HTTP_HOST"]
     else:
         given_domain = env["SERVER_NAME"]
     port = ""
     if ":" in given_domain:
         given_domain, port = given_domain.rsplit(":", 1)
     if is_ip(given_domain):
         return self.app(env, start_response)
     a_domain = given_domain
     if not self._domain_endswith_in_storage_domain(a_domain):
         if self.memcache is None:
             self.memcache = cache_from_env(env)
         error = True
         for tries in xrange(self.lookup_depth):
             found_domain = None
             if self.memcache:
                 memcache_key = "".join(["cname-", a_domain])
                 found_domain = self.memcache.get(memcache_key)
             if not found_domain:
                 ttl, found_domain = lookup_cname(a_domain)
                 if self.memcache:
                     memcache_key = "".join(["cname-", given_domain])
                     self.memcache.set(memcache_key, found_domain, time=ttl)
             if found_domain is None or found_domain == a_domain:
                 # no CNAME records or we're at the last lookup
                 error = True
                 found_domain = None
                 break
             elif self._domain_endswith_in_storage_domain(found_domain):
                 # Found it!
                 self.logger.info(
                     _("Mapped %(given_domain)s to %(found_domain)s")
                     % {"given_domain": given_domain, "found_domain": found_domain}
                 )
                 if port:
                     env["HTTP_HOST"] = ":".join([found_domain, port])
                 else:
                     env["HTTP_HOST"] = found_domain
                 error = False
                 break
             else:
                 # try one more deep in the chain
                 self.logger.debug(
                     _("Following CNAME chain for  " "%(given_domain)s to %(found_domain)s")
                     % {"given_domain": given_domain, "found_domain": found_domain}
                 )
                 a_domain = found_domain
         if error:
             if found_domain:
                 msg = "CNAME lookup failed after %d tries" % self.lookup_depth
             else:
                 msg = "CNAME lookup failed to resolve to a valid domain"
             resp = HTTPBadRequest(request=Request(env), body=msg, content_type="text/plain")
             return resp(env, start_response)
     return self.app(env, start_response)
Пример #26
0
    def run_once(self, *args, **kwargs):
        """
        Executes a single pass, looking for objects to expire.

        :param args: Extra args to fulfill the Daemon interface; this daemon
                     has no additional args.
        :param kwargs: Extra keyword args to fulfill the Daemon interface; this
                       daemon accepts processes and process keyword args.
                       These will override the values from the config file if
                       provided.
        """
        processes, process = self.get_process_values(kwargs)
        pool = GreenPool(self.concurrency)
        containers_to_delete = []
        self.report_first_time = self.report_last_time = time()
        self.report_objects = 0
        try:
            self.logger.debug(_('Run begin'))
            containers, objects = \
                self.swift.get_account_info(self.expiring_objects_account)
            self.logger.info(_('Pass beginning; %s possible containers; %s '
                               'possible objects') % (containers, objects))
            for c in self.swift.iter_containers(self.expiring_objects_account):
                container = c['name']
                timestamp = int(container)
                if timestamp > int(time()):
                    break
                containers_to_delete.append(container)
                for o in self.swift.iter_objects(self.expiring_objects_account,
                                                 container):
                    obj = o['name'].encode('utf8')
                    if processes > 0:
                        obj_process = int(
                            hashlib.md5('%s/%s' % (container, obj)).
                            hexdigest(), 16)
                        if obj_process % processes != process:
                            continue
                    timestamp, actual_obj = obj.split('-', 1)
                    timestamp = int(timestamp)
                    if timestamp > int(time()):
                        break
                    pool.spawn_n(
                        self.delete_object, actual_obj, timestamp,
                        container, obj)
            pool.waitall()
            for container in containers_to_delete:
                try:
                    self.swift.delete_container(
                        self.expiring_objects_account,
                        container,
                        acceptable_statuses=(2, HTTP_NOT_FOUND, HTTP_CONFLICT))
                except (Exception, Timeout) as err:
                    self.logger.exception(
                        _('Exception while deleting container %s %s') %
                        (container, str(err)))
            self.logger.debug(_('Run end'))
            self.report(final=True)
        except (Exception, Timeout):
            self.logger.exception(_('Unhandled exception'))
Пример #27
0
    def handle_ratelimit(self, req, account_name, container_name, obj_name):
        """
        Performs rate limiting and account white/black listing.  Sleeps
        if necessary. If self.memcache_client is not set, immediately returns
        None.

        :param account_name: account name from path
        :param container_name: container name from path
        :param obj_name: object name from path
        """
        if not self.memcache_client:
            return None

        try:
            account_info = get_account_info(req.environ, self.app,
                                            swift_source='RL')
            account_global_ratelimit = \
                account_info.get('sysmeta', {}).get('global-write-ratelimit')
        except ValueError:
            account_global_ratelimit = None

        if account_name in self.ratelimit_whitelist or \
                account_global_ratelimit == 'WHITELIST':
            return None

        if account_name in self.ratelimit_blacklist or \
                account_global_ratelimit == 'BLACKLIST':
            self.logger.error(_('Returning 497 because of blacklisting: %s'),
                              account_name)
            eventlet.sleep(self.BLACK_LIST_SLEEP)
            return Response(status='497 Blacklisted',
                            body='Your account has been blacklisted',
                            request=req)

        for key, max_rate in self.get_ratelimitable_key_tuples(
                req, account_name, container_name=container_name,
                obj_name=obj_name, global_ratelimit=account_global_ratelimit):
            try:
                need_to_sleep = self._get_sleep_time(key, max_rate)
                if self.log_sleep_time_seconds and \
                        need_to_sleep > self.log_sleep_time_seconds:
                    self.logger.warning(
                        _("Ratelimit sleep log: %(sleep)s for "
                          "%(account)s/%(container)s/%(object)s"),
                        {'sleep': need_to_sleep, 'account': account_name,
                         'container': container_name, 'object': obj_name})
                if need_to_sleep > 0:
                    eventlet.sleep(need_to_sleep)
            except MaxSleepTimeHitError as e:
                self.logger.error(
                    _('Returning 498 for %(meth)s to %(acc)s/%(cont)s/%(obj)s '
                      '. Ratelimit (Max Sleep) %(e)s'),
                    {'meth': req.method, 'acc': account_name,
                     'cont': container_name, 'obj': obj_name, 'e': str(e)})
                error_resp = Response(status='498 Rate Limited',
                                      body='Slow down', request=req)
                return error_resp
        return None
Пример #28
0
    def process_container(self, dbfile):
        """
        Process a container, and update the information in the account.

        :param dbfile: container DB to process
        """
        start_time = time.time()
        broker = ContainerBroker(dbfile, logger=self.logger)
        info = broker.get_info()
        # Don't send updates if the container was auto-created since it
        # definitely doesn't have up to date statistics.
        if float(info['put_timestamp']) <= 0:
            return
        if self.account_suppressions.get(info['account'], 0) > time.time():
            return
        if info['put_timestamp'] > info['reported_put_timestamp'] or \
                info['delete_timestamp'] > info['reported_delete_timestamp'] \
                or info['object_count'] != info['reported_object_count'] or \
                info['bytes_used'] != info['reported_bytes_used']:
            container = '/%s/%s' % (info['account'], info['container'])
            part, nodes = self.get_account_ring().get_nodes(info['account'])
            events = [spawn(self.container_report, node, part, container,
                            info['put_timestamp'], info['delete_timestamp'],
                            info['object_count'], info['bytes_used'])
                      for node in nodes]
            successes = 0
            failures = 0
            for event in events:
                if is_success(event.wait()):
                    successes += 1
                else:
                    failures += 1
            if successes > failures:
                self.logger.increment('successes')
                self.successes += 1
                self.logger.debug(
                    _('Update report sent for %(container)s %(dbfile)s'),
                    {'container': container, 'dbfile': dbfile})
                broker.reported(info['put_timestamp'],
                                info['delete_timestamp'], info['object_count'],
                                info['bytes_used'])
            else:
                self.logger.increment('failures')
                self.failures += 1
                self.logger.debug(
                    _('Update report failed for %(container)s %(dbfile)s'),
                    {'container': container, 'dbfile': dbfile})
                self.account_suppressions[info['account']] = until = \
                    time.time() + self.account_suppression_time
                if self.new_account_suppressions:
                    print >>self.new_account_suppressions, \
                        info['account'], until
            # Only track timing data for attempted updates:
            self.logger.timing_since('timing', start_time)
        else:
            self.logger.increment('no_changes')
            self.no_changes += 1
Пример #29
0
def get_socket(conf):
    """Bind socket to bind ip:port in conf

    :param conf: Configuration dict to read settings from

    :returns: a socket object as returned from socket.listen or
              ssl.wrap_socket if conf specifies cert_file
    """
    try:
        bind_port = int(conf['bind_port'])
    except (ValueError, KeyError, TypeError):
        raise ConfigFilePortError()
    bind_addr = (conf.get('bind_ip', '0.0.0.0'), bind_port)
    address_family = [addr[0] for addr in socket.getaddrinfo(
        bind_addr[0], bind_addr[1], socket.AF_UNSPEC, socket.SOCK_STREAM)
        if addr[0] in (socket.AF_INET, socket.AF_INET6)][0]
    sock = None
    bind_timeout = int(conf.get('bind_timeout', 30))
    retry_until = time.time() + bind_timeout
    warn_ssl = False

    try:
        keepidle = int(conf.get('keep_idle', 600))
        if keepidle <= 0 or keepidle >= 2 ** 15 - 1:
            raise ValueError()
    except (ValueError, KeyError, TypeError):
        raise ConfigFileError()

    while not sock and time.time() < retry_until:
        try:
            sock = listen(bind_addr, backlog=int(conf.get('backlog', 4096)),
                          family=address_family)
            if 'cert_file' in conf:
                warn_ssl = True
                sock = ssl.wrap_socket(sock, certfile=conf['cert_file'],
                                       keyfile=conf['key_file'])
        except socket.error as err:
            if err.args[0] != errno.EADDRINUSE:
                raise
            sleep(0.1)
    if not sock:
        raise Exception(_('Could not bind to %(addr)s:%(port)s '
                          'after trying for %(timeout)s seconds') % {
                              'addr': bind_addr[0], 'port': bind_addr[1],
                              'timeout': bind_timeout})
    # in my experience, sockets can hang around forever without keepalive
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
    sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
    if hasattr(socket, 'TCP_KEEPIDLE'):
        sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, keepidle)
    if warn_ssl:
        ssl_warning_message = _('WARNING: SSL should only be enabled for '
                                'testing purposes. Use external SSL '
                                'termination for a production deployment.')
        get_logger(conf).warning(ssl_warning_message)
        print(ssl_warning_message)
    return sock
Пример #30
0
    def object_sweep(self, device):
        """
        If there are async pendings on the device, walk each one and update.

        :param device: path to device
        """
        start_time = time.time()
        # loop through async pending dirs for all policies
        for asyncdir in self._listdir(device):
            # we only care about directories
            async_pending = os.path.join(device, asyncdir)
            if not os.path.isdir(async_pending):
                continue
            if not asyncdir.startswith(ASYNCDIR_BASE):
                # skip stuff like "accounts", "containers", etc.
                continue
            try:
                base, policy = split_policy_string(asyncdir)
            except PolicyError as e:
                self.logger.warning(_('Directory %(directory)r does not map '
                                      'to a valid policy (%(error)s)') % {
                                    'directory': asyncdir, 'error': e})
                continue
            for prefix in self._listdir(async_pending):
                prefix_path = os.path.join(async_pending, prefix)
                if not os.path.isdir(prefix_path):
                    continue
                last_obj_hash = None
                for update in sorted(self._listdir(prefix_path), reverse=True):
                    update_path = os.path.join(prefix_path, update)
                    if not os.path.isfile(update_path):
                        continue
                    try:
                        obj_hash, timestamp = update.split('-')
                    except ValueError:
                        self.logger.increment('errors')
                        self.logger.error(
                            _('ERROR async pending file with unexpected '
                              'name %s')
                            % (update_path))
                        continue
                    if obj_hash == last_obj_hash:
                        self.logger.increment("unlinks")
                        os.unlink(update_path)
                    else:
                        self.process_object_update(update_path, device,
                                                   policy)
                        last_obj_hash = obj_hash

                    self.objects_running_time = ratelimit_sleep(
                        self.objects_running_time,
                        self.max_objects_per_second)
                try:
                    os.rmdir(prefix_path)
                except OSError:
                    pass
            self.logger.timing_since('timing', start_time)
Пример #31
0
    def handle_ratelimit(self, req, account_name, container_name, obj_name):
        """
        Performs rate limiting and account white/black listing.  Sleeps
        if necessary. If self.memcache_client is not set, immediately returns
        None.

        :param account_name: account name from path
        :param container_name: container name from path
        :param obj_name: object name from path
        """
        if not self.memcache_client:
            return None

        try:
            account_info = get_account_info(req.environ,
                                            self.app,
                                            swift_source='RL')
            account_global_ratelimit = \
                account_info.get('sysmeta', {}).get('global-write-ratelimit')
        except ValueError:
            account_global_ratelimit = None

        if account_name in self.ratelimit_whitelist or \
                account_global_ratelimit == 'WHITELIST':
            return None

        if account_name in self.ratelimit_blacklist or \
                account_global_ratelimit == 'BLACKLIST':
            self.logger.error(_('Returning 497 because of blacklisting: %s'),
                              account_name)
            eventlet.sleep(self.BLACK_LIST_SLEEP)
            return Response(status='497 Blacklisted',
                            body='Your account has been blacklisted',
                            request=req)

        for key, max_rate in self.get_ratelimitable_key_tuples(
                req,
                account_name,
                container_name=container_name,
                obj_name=obj_name,
                global_ratelimit=account_global_ratelimit):
            try:
                need_to_sleep = self._get_sleep_time(key, max_rate)
                if self.log_sleep_time_seconds and \
                        need_to_sleep > self.log_sleep_time_seconds:
                    self.logger.warning(
                        _("Ratelimit sleep log: %(sleep)s for "
                          "%(account)s/%(container)s/%(object)s"), {
                              'sleep': need_to_sleep,
                              'account': account_name,
                              'container': container_name,
                              'object': obj_name
                          })
                if need_to_sleep > 0:
                    eventlet.sleep(need_to_sleep)
            except MaxSleepTimeHitError as e:
                self.logger.error(
                    _('Returning 498 for %(meth)s to %(acc)s/%(cont)s/%(obj)s '
                      '. Ratelimit (Max Sleep) %(e)s'), {
                          'meth': req.method,
                          'acc': account_name,
                          'cont': container_name,
                          'obj': obj_name,
                          'e': str(e)
                      })
                error_resp = Response(status='498 Rate Limited',
                                      body='Slow down',
                                      request=req)
                return error_resp
        return None
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'], ))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        target_devs_info = set()
        failure_devs_info = set()
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(
                self._diskfile_mgr._get_hashes,
                job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            synced_remote_regions = set()
            random.shuffle(job['nodes'])
            nodes = itertools.chain(
                job['nodes'], job['policy'].object_ring.get_more_nodes(
                    int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
                target_devs_info.add((node['replication_ip'], node['device']))
                attempts_left -= 1
                # if we have already synced to this remote region,
                # don't sync again on this replication pass
                if node['region'] in synced_remote_regions:
                    continue
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '',
                                            headers=headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(ip)s/%(device)s responded'
                                  ' as unmounted'), node)
                            attempts_left += 1
                            failure_devs_info.add(
                                (node['replication_ip'], node['device']))
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['replication_ip']
                                  })
                            failure_devs_info.add(
                                (node['replication_ip'], node['device']))
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        self.stats['hashmatch'] += 1
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        self._diskfile_mgr._get_hashes,
                        job['path'],
                        recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    self.stats['rsync'] += 1
                    success, _junk = self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers=headers)
                        conn.getresponse().read()
                    if not success:
                        failure_devs_info.add(
                            (node['replication_ip'], node['device']))
                    # add only remote region when replicate succeeded
                    if success and node['region'] != job['region']:
                        synced_remote_regions.add(node['region'])
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    failure_devs_info.add(
                        (node['replication_ip'], node['device']))
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            failure_devs_info.update(target_devs_info)
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.stats['success'] += len(target_devs_info - failure_devs_info)
            self._add_failure_stats(failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)
 def delete_partition(self, path):
     self.logger.info(_("Removing partition: %s"), path)
     tpool.execute(shutil.rmtree, path)
Пример #34
0
    def _replicate_object(self, partition, object_file, node_id):
        """
        Replicate the db, choosing method based on whether or not it
        already exists on peers.

        :param partition: partition to be replicated to
        :param object_file: DB file name to be replicated
        :param node_id: node id of the node to be replicated to
        """
        start_time = now = time.time()
        self.logger.debug('Replicating db %s', object_file)
        self.stats['attempted'] += 1
        self.logger.increment('attempts')
        shouldbehere = True
        try:
            broker = self.brokerclass(object_file, pending_timeout=30)
            broker.reclaim(now - self.reclaim_age,
                           now - (self.reclaim_age * 2))
            info = broker.get_replication_info()
            bpart = self.ring.get_part(info['account'], info.get('container'))
            if bpart != int(partition):
                partition = bpart
                # Important to set this false here since the later check only
                # checks if it's on the proper device, not partition.
                shouldbehere = False
                name = '/' + quote(info['account'])
                if 'container' in info:
                    name += '/' + quote(info['container'])
                self.logger.error(
                    'Found %s for %s when it should be on partition %s; will '
                    'replicate out and remove.' % (object_file, name, bpart))
        except (Exception, Timeout) as e:
            if 'no such table' in str(e):
                self.logger.error(_('Quarantining DB %s'), object_file)
                quarantine_db(broker.db_file, broker.db_type)
            else:
                self.logger.exception(_('ERROR reading db %s'), object_file)
            nodes = self.ring.get_part_nodes(int(partition))
            self._add_failure_stats([(failure_dev['replication_ip'],
                                      failure_dev['device'])
                                     for failure_dev in nodes])
            self.logger.increment('failures')
            return
        # The db is considered deleted if the delete_timestamp value is greater
        # than the put_timestamp, and there are no objects.
        delete_timestamp = Timestamp(info.get('delete_timestamp') or 0)
        put_timestamp = Timestamp(info.get('put_timestamp') or 0)
        if delete_timestamp < (now - self.reclaim_age) and \
                delete_timestamp > put_timestamp and \
                info['count'] in (None, '', 0, '0'):
            if self.report_up_to_date(info):
                self.delete_db(broker)
            self.logger.timing_since('timing', start_time)
            return
        responses = []
        failure_devs_info = set()
        nodes = self.ring.get_part_nodes(int(partition))
        local_dev = None
        for node in nodes:
            if node['id'] == node_id:
                local_dev = node
                break
        if shouldbehere:
            shouldbehere = bool([n for n in nodes if n['id'] == node_id])
        # See Footnote [1] for an explanation of the repl_nodes assignment.
        i = 0
        while i < len(nodes) and nodes[i]['id'] != node_id:
            i += 1
        repl_nodes = nodes[i + 1:] + nodes[:i]
        more_nodes = self.ring.get_more_nodes(int(partition))
        if not local_dev:
            # Check further if local device is a handoff node
            for node in more_nodes:
                if node['id'] == node_id:
                    local_dev = node
                    break
        for node in repl_nodes:
            different_region = False
            if local_dev and local_dev['region'] != node['region']:
                # This additional information will help later if we
                # want to handle syncing to a node in different
                # region with some optimizations.
                different_region = True
            success = False
            try:
                success = self._repl_to_node(node, broker, partition, info,
                                             different_region)
            except DriveNotMounted:
                repl_nodes.append(next(more_nodes))
                self.logger.error(_('ERROR Remote drive not mounted %s'), node)
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR syncing %(file)s with node'
                      ' %(node)s'), {
                          'file': object_file,
                          'node': node
                      })
            if not success:
                failure_devs_info.add((node['replication_ip'], node['device']))
            self.logger.increment('successes' if success else 'failures')
            responses.append(success)
        try:
            self._post_replicate_hook(broker, info, responses)
        except (Exception, Timeout):
            self.logger.exception(
                'UNHANDLED EXCEPTION: in post replicate '
                'hook for %s', broker.db_file)
        if not shouldbehere and all(responses):
            # If the db shouldn't be on this node and has been successfully
            # synced to all of its peers, it can be removed.
            if not self.delete_db(broker):
                failure_devs_info.update([(failure_dev['replication_ip'],
                                           failure_dev['device'])
                                          for failure_dev in repl_nodes])

        target_devs_info = set([(target_dev['replication_ip'],
                                 target_dev['device'])
                                for target_dev in repl_nodes])
        self.stats['success'] += len(target_devs_info - failure_devs_info)
        self._add_failure_stats(failure_devs_info)

        self.logger.timing_since('timing', start_time)
Пример #35
0
    def process_container(self, dbfile):
        """
        Process a container, and update the information in the account.

        :param dbfile: container DB to process
        """
        start_time = time.time()
        broker = ContainerBroker(dbfile, logger=self.logger)
        try:
            info = broker.get_info()
        except LockTimeout as e:
            self.logger.info(
                "Failed to get container info (Lock timeout: %s); skipping.",
                str(e))
            return
        # Don't send updates if the container was auto-created since it
        # definitely doesn't have up to date statistics.
        if Timestamp(info['put_timestamp']) <= 0:
            return
        if self.account_suppressions.get(info['account'], 0) > time.time():
            return
        if info['put_timestamp'] > info['reported_put_timestamp'] or \
                info['delete_timestamp'] > info['reported_delete_timestamp'] \
                or info['object_count'] != info['reported_object_count'] or \
                info['bytes_used'] != info['reported_bytes_used']:
            container = '/%s/%s' % (info['account'], info['container'])
            part, nodes = self.get_account_ring().get_nodes(info['account'])
            events = [
                spawn(self.container_report, node, part, container,
                      info['put_timestamp'], info['delete_timestamp'],
                      info['object_count'], info['bytes_used'],
                      info['storage_policy_index']) for node in nodes
            ]
            successes = 0
            for event in events:
                if is_success(event.wait()):
                    successes += 1
            if successes >= majority_size(len(events)):
                self.logger.increment('successes')
                self.successes += 1
                self.logger.debug(
                    _('Update report sent for %(container)s %(dbfile)s'), {
                        'container': container,
                        'dbfile': dbfile
                    })
                broker.reported(info['put_timestamp'],
                                info['delete_timestamp'], info['object_count'],
                                info['bytes_used'])
            else:
                self.logger.increment('failures')
                self.failures += 1
                self.logger.debug(
                    _('Update report failed for %(container)s %(dbfile)s'), {
                        'container': container,
                        'dbfile': dbfile
                    })
                self.account_suppressions[info['account']] = until = \
                    time.time() + self.account_suppression_time
                if self.new_account_suppressions:
                    print(info['account'],
                          until,
                          file=self.new_account_suppressions)
            # Only track timing data for attempted updates:
            self.logger.timing_since('timing', start_time)
        else:
            self.logger.increment('no_changes')
            self.no_changes += 1
Пример #36
0
    def GETorHEAD_base(self, req, server_type, ring, partition, path):
        """
        Base handler for HTTP GET or HEAD requests.

        :param req: swob.Request object
        :param server_type: server type
        :param ring: the ring to obtain nodes from
        :param partition: partition
        :param path: path for the request
        :returns: swob.Response object
        """
        statuses = []
        reasons = []
        bodies = []
        source_headers = []
        sources = []
        newest = config_true_value(req.headers.get('x-newest', 'f'))
        headers = self.generate_request_headers(req, additional=req.headers)
        for node in self.iter_nodes(ring, partition):
            start_node_timing = time.time()
            try:
                with ConnectionTimeout(self.app.conn_timeout):
                    conn = http_connect(node['ip'],
                                        node['port'],
                                        node['device'],
                                        partition,
                                        req.method,
                                        path,
                                        headers=headers,
                                        query_string=req.query_string)
                self.app.set_node_timing(node, time.time() - start_node_timing)
                with Timeout(self.app.node_timeout):
                    possible_source = conn.getresponse()
                    # See NOTE: swift_conn at top of file about this.
                    possible_source.swift_conn = conn
            except (Exception, Timeout):
                self.exception_occurred(
                    node, server_type,
                    _('Trying to %(method)s %(path)s') % {
                        'method': req.method,
                        'path': req.path
                    })
                continue
            if self.is_good_source(possible_source):
                # 404 if we know we don't have a synced copy
                if not float(possible_source.getheader('X-PUT-Timestamp', 1)):
                    statuses.append(HTTP_NOT_FOUND)
                    reasons.append('')
                    bodies.append('')
                    source_headers.append('')
                    self.close_swift_conn(possible_source)
                else:
                    statuses.append(possible_source.status)
                    reasons.append(possible_source.reason)
                    bodies.append('')
                    source_headers.append('')
                    sources.append((possible_source, node))
                    if not newest:  # one good source is enough
                        break
            else:
                statuses.append(possible_source.status)
                reasons.append(possible_source.reason)
                bodies.append(possible_source.read())
                source_headers.append(possible_source.getheaders())
                if possible_source.status == HTTP_INSUFFICIENT_STORAGE:
                    self.error_limit(node, _('ERROR Insufficient Storage'))
                elif is_server_error(possible_source.status):
                    self.error_occurred(
                        node,
                        _('ERROR %(status)d %(body)s '
                          'From %(type)s Server') % {
                              'status': possible_source.status,
                              'body': bodies[-1][:1024],
                              'type': server_type
                          })
        res = None
        if sources:
            sources.sort(key=lambda s: source_key(s[0]))
            source, node = sources.pop()
            for src, _junk in sources:
                self.close_swift_conn(src)
            res = Response(request=req)
            if req.method == 'GET' and \
                    source.status in (HTTP_OK, HTTP_PARTIAL_CONTENT):
                res.app_iter = self._make_app_iter(node, source)
                # See NOTE: swift_conn at top of file about this.
                res.swift_conn = source.swift_conn
            res.status = source.status
            update_headers(res, source.getheaders())
            if not res.environ:
                res.environ = {}
            res.environ['swift_x_timestamp'] = \
                source.getheader('x-timestamp')
            res.accept_ranges = 'bytes'
            res.content_length = source.getheader('Content-Length')
            if source.getheader('Content-Type'):
                res.charset = None
                res.content_type = source.getheader('Content-Type')
        if not res:
            res = self.best_response(req,
                                     statuses,
                                     reasons,
                                     bodies,
                                     '%s %s' % (server_type, req.method),
                                     headers=source_headers)
        try:
            (account, container) = split_path(req.path_info, 1, 2)
            _set_info_cache(self.app, req.environ, account, container, res)
        except ValueError:
            pass
        try:
            (account, container, obj) = split_path(req.path_info, 3, 3, True)
            _set_object_info_cache(self.app, req.environ, account, container,
                                   obj, res)
        except ValueError:
            pass
        return res
Пример #37
0
    def __init__(self, conf, container_ring=None, logger=None):
        #: The dict of configuration values from the [container-sync] section
        #: of the container-server.conf.
        self.conf = conf
        #: Logger to use for container-sync log lines.
        self.logger = logger or get_logger(conf, log_route='container-sync')
        #: Path to the local device mount points.
        self.devices = conf.get('devices', '/srv/node')
        #: Indicates whether mount points should be verified as actual mount
        #: points (normally true, false for tests and SAIO).
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        #: Minimum time between full scans. This is to keep the daemon from
        #: running wild on near empty systems.
        self.interval = int(conf.get('interval', 300))
        #: Maximum amount of time to spend syncing a container before moving on
        #: to the next one. If a conatiner sync hasn't finished in this time,
        #: it'll just be resumed next scan.
        self.container_time = int(conf.get('container_time', 60))
        #: ContainerSyncCluster instance for validating sync-to values.
        self.realms_conf = ContainerSyncRealms(
            os.path.join(conf.get('swift_dir', '/etc/swift'),
                         'container-sync-realms.conf'), self.logger)
        #: The list of hosts we're allowed to send syncs to. This can be
        #: overridden by data in self.realms_conf
        self.allowed_sync_hosts = [
            h.strip()
            for h in conf.get('allowed_sync_hosts', '127.0.0.1').split(',')
            if h.strip()
        ]
        self.http_proxies = [
            a.strip() for a in conf.get('sync_proxy', '').split(',')
            if a.strip()
        ]
        #: ContainerSyncStore instance for iterating over synced containers
        self.sync_store = ContainerSyncStore(self.devices, self.logger,
                                             self.mount_check)
        #: Number of containers with sync turned on that were successfully
        #: synced.
        self.container_syncs = 0
        #: Number of successful DELETEs triggered.
        self.container_deletes = 0
        #: Number of successful PUTs triggered.
        self.container_puts = 0
        #: Number of containers whose sync has been turned off, but
        #: are not yet cleared from the sync store.
        self.container_skips = 0
        #: Number of containers that had a failure of some type.
        self.container_failures = 0
        #: Time of last stats report.
        self.reported = time()
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        #: swift.common.ring.Ring for locating containers.
        self.container_ring = container_ring or Ring(self.swift_dir,
                                                     ring_name='container')
        bind_ip = conf.get('bind_ip', '0.0.0.0')
        self._myips = whataremyips(bind_ip)
        self._myport = int(conf.get('bind_port', 6001))
        swift.common.db.DB_PREALLOCATION = \
            config_true_value(conf.get('db_preallocation', 'f'))
        self.conn_timeout = float(conf.get('conn_timeout', 5))
        request_tries = int(conf.get('request_tries') or 3)

        internal_client_conf_path = conf.get('internal_client_conf_path')
        if not internal_client_conf_path:
            self.logger.warning(
                _('Configuration option internal_client_conf_path not '
                  'defined. Using default configuration, See '
                  'internal-client.conf-sample for options'))
            internal_client_conf = ConfigString(ic_conf_body)
        else:
            internal_client_conf = internal_client_conf_path
        try:
            self.swift = InternalClient(internal_client_conf,
                                        'Swift Container Sync', request_tries)
        except IOError as err:
            if err.errno != errno.ENOENT:
                raise
            raise SystemExit(
                _('Unable to load internal client from config: %r (%s)') %
                (internal_client_conf_path, err))
    def replicate(self,
                  override_devices=None,
                  override_partitions=None,
                  override_policies=None):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []
        self.my_replication_ips = self._get_my_replication_ips()
        self.all_devs_info = set()
        self.handoffs_remaining = 0

        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle

        current_nodes = None
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs(override_devices=override_devices,
                                     override_partitions=override_partitions,
                                     override_policies=override_policies)
            for job in jobs:
                current_nodes = job['nodes']
                if override_devices and job['device'] not in override_devices:
                    continue
                if override_partitions and \
                        job['partition'] not in override_partitions:
                    continue
                dev_path = join(self.devices_dir, job['device'])
                if self.mount_check and not ismount(dev_path):
                    self._add_failure_stats([(failure_dev['replication_ip'],
                                              failure_dev['device'])
                                             for failure_dev in job['nodes']])
                    self.logger.warning(_('%s is not mounted'), job['device'])
                    continue
                if self.handoffs_first and not job['delete']:
                    # in handoffs first mode, we won't process primary
                    # partitions until rebalance was successful!
                    if self.handoffs_remaining:
                        self.logger.warning(
                            _("Handoffs first mode still has handoffs "
                              "remaining.  Aborting current "
                              "replication pass."))
                        break
                if not self.check_ring(job['policy'].object_ring):
                    self.logger.info(
                        _("Ring change detected. Aborting "
                          "current replication pass."))
                    return
                try:
                    if isfile(job['path']):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning(
                            'Removing partition directory '
                            'which was a file: %s', job['path'])
                        os.remove(job['path'])
                        continue
                except OSError:
                    continue
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            current_nodes = None
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            if current_nodes:
                self._add_failure_stats([(failure_dev['replication_ip'],
                                          failure_dev['device'])
                                         for failure_dev in current_nodes])
            else:
                self._add_failure_stats(self.all_devs_info)
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()
            self.stats['attempted'] = self.replication_count
Пример #39
0
    def _replicate_object(self, partition, object_file, node_id):
        """
        Replicate the db, choosing method based on whether or not it
        already exists on peers.

        :param partition: partition to be replicated to
        :param object_file: DB file name to be replicated
        :param node_id: node id of the node to be replicated to
        :returns: a tuple (success, responses). ``success`` is a boolean that
            is True if the method completed successfully, False otherwise.
            ``responses`` is a list of booleans each of which indicates the
            success or not of replicating to a peer node if replication has
            been attempted. ``success`` is False if any of ``responses`` is
            False; when ``responses`` is empty, ``success`` may be either True
            or False.
        """
        start_time = now = time.time()
        self.logger.debug('Replicating db %s', object_file)
        self.stats['attempted'] += 1
        self.logger.increment('attempts')
        shouldbehere = True
        responses = []
        try:
            broker = self.brokerclass(object_file, pending_timeout=30)
            broker.reclaim(now - self.reclaim_age,
                           now - (self.reclaim_age * 2))
            info = broker.get_replication_info()
            bpart = self.ring.get_part(info['account'], info.get('container'))
            if bpart != int(partition):
                partition = bpart
                # Important to set this false here since the later check only
                # checks if it's on the proper device, not partition.
                shouldbehere = False
                name = '/' + quote(info['account'])
                if 'container' in info:
                    name += '/' + quote(info['container'])
                self.logger.error(
                    'Found %s for %s when it should be on partition %s; will '
                    'replicate out and remove.' % (object_file, name, bpart))
        except (Exception, Timeout) as e:
            if 'no such table' in str(e):
                self.logger.error(_('Quarantining DB %s'), object_file)
                quarantine_db(broker.db_file, broker.db_type)
            else:
                self.logger.exception(_('ERROR reading db %s'), object_file)
            nodes = self.ring.get_part_nodes(int(partition))
            self._add_failure_stats([(failure_dev['replication_ip'],
                                      failure_dev['device'])
                                     for failure_dev in nodes])
            self.logger.increment('failures')
            return False, responses
        if broker.is_reclaimable(now, self.reclaim_age):
            if self.report_up_to_date(info):
                self.delete_db(broker)
            self.logger.timing_since('timing', start_time)
            return True, responses
        failure_devs_info = set()
        nodes = self.ring.get_part_nodes(int(partition))
        local_dev = None
        for node in nodes:
            if node['id'] == node_id:
                local_dev = node
                break
        if shouldbehere:
            shouldbehere = bool([n for n in nodes if n['id'] == node_id])
        # See Footnote [1] for an explanation of the repl_nodes assignment.
        if len(nodes) > 1:
            i = 0
            while i < len(nodes) and nodes[i]['id'] != node_id:
                i += 1
            repl_nodes = nodes[i + 1:] + nodes[:i]
        else:  # Special case if using only a single replica
            repl_nodes = nodes
        more_nodes = self.ring.get_more_nodes(int(partition))
        if not local_dev:
            # Check further if local device is a handoff node
            for node in self.ring.get_more_nodes(int(partition)):
                if node['id'] == node_id:
                    local_dev = node
                    break
        for node in repl_nodes:
            different_region = False
            if local_dev and local_dev['region'] != node['region']:
                # This additional information will help later if we
                # want to handle syncing to a node in different
                # region with some optimizations.
                different_region = True
            success = False
            try:
                success = self._repl_to_node(node, broker, partition, info,
                                             different_region)
            except DriveNotMounted:
                try:
                    repl_nodes.append(next(more_nodes))
                except StopIteration:
                    self.logger.error(
                        _('ERROR There are not enough handoff nodes to reach '
                          'replica count for partition %s'), partition)
                self.logger.error(_('ERROR Remote drive not mounted %s'), node)
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR syncing %(file)s with node'
                      ' %(node)s'), {
                          'file': object_file,
                          'node': node
                      })
            if not success:
                failure_devs_info.add((node['replication_ip'], node['device']))
            self.logger.increment('successes' if success else 'failures')
            responses.append(success)
        try:
            self._post_replicate_hook(broker, info, responses)
        except (Exception, Timeout):
            self.logger.exception(
                'UNHANDLED EXCEPTION: in post replicate '
                'hook for %s', broker.db_file)
        if not shouldbehere:
            if not self.cleanup_post_replicate(broker, info, responses):
                failure_devs_info.update([(failure_dev['replication_ip'],
                                           failure_dev['device'])
                                          for failure_dev in repl_nodes])
        target_devs_info = set([(target_dev['replication_ip'],
                                 target_dev['device'])
                                for target_dev in repl_nodes])
        self.stats['success'] += len(target_devs_info - failure_devs_info)
        self._add_failure_stats(failure_devs_info)

        self.logger.timing_since('timing', start_time)
        if shouldbehere:
            responses.append(True)
        return all(responses), responses
Пример #40
0
    def process_object_update(self, update_path, device, policy):
        """
        Process the object information to be updated and update.

        :param update_path: path to pickled object update file
        :param device: path to device
        :param policy: storage policy of object update
        """
        try:
            update = pickle.load(open(update_path, 'rb'))
        except Exception:
            self.logger.exception(_('ERROR Pickle problem, quarantining %s'),
                                  update_path)
            self.stats.quarantines += 1
            self.logger.increment('quarantines')
            target_path = os.path.join(device, 'quarantined', 'objects',
                                       os.path.basename(update_path))
            renamer(update_path, target_path, fsync=False)
            return

        def do_update():
            successes = update.get('successes', [])
            headers_out = HeaderKeyDict(update['headers'].copy())
            headers_out['user-agent'] = 'object-updater %s' % os.getpid()
            headers_out.setdefault('X-Backend-Storage-Policy-Index',
                                   str(int(policy)))
            headers_out.setdefault('X-Backend-Accept-Redirect', 'true')
            headers_out.setdefault('X-Backend-Accept-Quoted-Location', 'true')
            container_path = update.get('container_path')
            if container_path:
                acct, cont = split_path('/' + container_path, minsegs=2)
            else:
                acct, cont = update['account'], update['container']
            part, nodes = self.get_container_ring().get_nodes(acct, cont)
            obj = '/%s/%s/%s' % (acct, cont, update['obj'])
            events = [
                spawn(self.object_update, node, part, update['op'], obj,
                      headers_out) for node in nodes
                if node['id'] not in successes
            ]
            success = True
            new_successes = rewrite_pickle = False
            redirect = None
            redirects = set()
            for event in events:
                event_success, node_id, redirect = event.wait()
                if event_success is True:
                    successes.append(node_id)
                    new_successes = True
                else:
                    success = False
                if redirect:
                    redirects.add(redirect)

            if success:
                self.stats.successes += 1
                self.logger.increment('successes')
                self.logger.debug('Update sent for %(obj)s %(path)s', {
                    'obj': obj,
                    'path': update_path
                })
                self.stats.unlinks += 1
                self.logger.increment('unlinks')
                os.unlink(update_path)
                try:
                    # If this was the last async_pending in the directory,
                    # then this will succeed. Otherwise, it'll fail, and
                    # that's okay.
                    os.rmdir(os.path.dirname(update_path))
                except OSError:
                    pass
            elif redirects:
                # erase any previous successes
                update.pop('successes', None)
                redirect = max(redirects, key=lambda x: x[-1])[0]
                redirect_history = update.setdefault('redirect_history', [])
                if redirect in redirect_history:
                    # force next update to be sent to root, reset history
                    update['container_path'] = None
                    update['redirect_history'] = []
                else:
                    update['container_path'] = redirect
                    redirect_history.append(redirect)
                self.stats.redirects += 1
                self.logger.increment("redirects")
                self.logger.debug(
                    'Update redirected for %(obj)s %(path)s to %(shard)s', {
                        'obj': obj,
                        'path': update_path,
                        'shard': update['container_path']
                    })
                rewrite_pickle = True
            else:
                self.stats.failures += 1
                self.logger.increment('failures')
                self.logger.debug('Update failed for %(obj)s %(path)s', {
                    'obj': obj,
                    'path': update_path
                })
                if new_successes:
                    update['successes'] = successes
                    rewrite_pickle = True

            return rewrite_pickle, redirect

        rewrite_pickle, redirect = do_update()
        if redirect:
            # make one immediate retry to the redirect location
            rewrite_pickle, redirect = do_update()
        if rewrite_pickle:
            write_pickle(update, update_path,
                         os.path.join(device, get_tmp_dir(policy)))
Пример #41
0
    def _iter_async_pendings(self, device):
        """
        Locate and yield all the async pendings on the device. Multiple updates
        for the same object will come out in reverse-chronological order
        (i.e. newest first) so that callers can skip stale async_pendings.

        Tries to clean up empty directories as it goes.
        """
        # loop through async pending dirs for all policies
        for asyncdir in self._listdir(device):
            # we only care about directories
            async_pending = os.path.join(device, asyncdir)
            if not asyncdir.startswith(ASYNCDIR_BASE):
                # skip stuff like "accounts", "containers", etc.
                continue
            if not os.path.isdir(async_pending):
                continue
            try:
                base, policy = split_policy_string(asyncdir)
            except PolicyError as e:
                # This isn't an error, but a misconfiguration. Logging a
                # warning should be sufficient.
                self.logger.warning(
                    _('Directory %(directory)r does not map '
                      'to a valid policy (%(error)s)') % {
                          'directory': asyncdir,
                          'error': e
                      })
                continue
            for prefix in self._listdir(async_pending):
                prefix_path = os.path.join(async_pending, prefix)
                if not os.path.isdir(prefix_path):
                    continue
                last_obj_hash = None
                for update in sorted(self._listdir(prefix_path), reverse=True):
                    update_path = os.path.join(prefix_path, update)
                    if not os.path.isfile(update_path):
                        continue
                    try:
                        obj_hash, timestamp = update.split('-')
                    except ValueError:
                        self.stats.errors += 1
                        self.logger.increment('errors')
                        self.logger.error(
                            _('ERROR async pending file with unexpected '
                              'name %s') % (update_path))
                        continue
                    # Async pendings are stored on disk like this:
                    #
                    # <device>/async_pending/<suffix>/<obj_hash>-<timestamp>
                    #
                    # If there are multiple updates for a given object,
                    # they'll look like this:
                    #
                    # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp1>
                    # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp2>
                    # <device>/async_pending/<obj_suffix>/<obj_hash>-<timestamp3>
                    #
                    # Async updates also have the property that newer
                    # updates contain all the information in older updates.
                    # Since we sorted the directory listing in reverse
                    # order, we'll see timestamp3 first, yield it, and then
                    # unlink timestamp2 and timestamp1 since we know they
                    # are obsolete.
                    #
                    # This way, our caller only gets useful async_pendings.
                    if obj_hash == last_obj_hash:
                        self.stats.unlinks += 1
                        self.logger.increment('unlinks')
                        os.unlink(update_path)
                    else:
                        last_obj_hash = obj_hash
                        yield {
                            'device': device,
                            'policy': policy,
                            'path': update_path,
                            'obj_hash': obj_hash,
                            'timestamp': timestamp
                        }
Пример #42
0
    def object_audit(self, location):
        """
        Audits the given object location.

        :param location: an audit location
                         (from diskfile.object_audit_location_generator)
        """
        def raise_dfq(msg):
            raise DiskFileQuarantined(msg)

        diskfile_mgr = self.diskfile_router[location.policy]
        # this method doesn't normally raise errors, even if the audit
        # location does not exist; if this raises an unexpected error it
        # will get logged in failsafe
        df = diskfile_mgr.get_diskfile_from_audit_location(location)
        reader = None
        try:
            with df.open():
                metadata = df.get_metadata()
                obj_size = int(metadata['Content-Length'])
                if self.stats_sizes:
                    self.record_stats(obj_size)
                if obj_size and not self.zero_byte_only_at_fps:
                    reader = df.reader(_quarantine_hook=raise_dfq)
            if reader:
                with closing(reader):
                    for chunk in reader:
                        chunk_len = len(chunk)
                        self.bytes_running_time = ratelimit_sleep(
                            self.bytes_running_time,
                            self.max_bytes_per_second,
                            incr_by=chunk_len)
                        self.bytes_processed += chunk_len
                        self.total_bytes_processed += chunk_len
        except DiskFileQuarantined as err:
            self.quarantines += 1
            self.logger.error(
                _('ERROR Object %(obj)s failed audit and was'
                  ' quarantined: %(err)s'), {
                      'obj': location,
                      'err': err
                  })
        except DiskFileExpired:
            pass  # ignore expired objects
        except DiskFileDeleted:
            # If there is a reclaimable tombstone, we'll invalidate the hash
            # to trigger the replicator to rehash/cleanup this suffix
            ts = df._ondisk_info['ts_info']['timestamp']
            if (not self.zero_byte_only_at_fps
                    and (time.time() - float(ts)) > df.manager.reclaim_age):
                df.manager.invalidate_hash(dirname(df._datadir))
        except DiskFileNotExist:
            pass

        self.passes += 1
        # _ondisk_info attr is initialized to None and filled in by open
        ondisk_info_dict = df._ondisk_info or {}
        if 'unexpected' in ondisk_info_dict:
            is_rsync_tempfile = lambda fpath: RE_RSYNC_TEMPFILE.match(
                basename(fpath))
            rsync_tempfile_paths = filter(is_rsync_tempfile,
                                          ondisk_info_dict['unexpected'])
            mtime = time.time() - self.rsync_tempfile_timeout
            unlink_paths_older_than(rsync_tempfile_paths, mtime)
Пример #43
0
    def container_sync_row(self, row, sync_to, user_key, broker, info, realm,
                           realm_key):
        """
        Sends the update the row indicates to the sync_to container.

        :param row: The updated row in the local database triggering the sync
                    update.
        :param sync_to: The URL to the remote container.
        :param user_key: The X-Container-Sync-Key to use when sending requests
                         to the other container.
        :param broker: The local container database broker.
        :param info: The get_info result from the local container database
                     broker.
        :param realm: The realm from self.realms_conf, if there is one.
            If None, fallback to using the older allowed_sync_hosts
            way of syncing.
        :param realm_key: The realm key from self.realms_conf, if there
            is one. If None, fallback to using the older
            allowed_sync_hosts way of syncing.
        :returns: True on success
        """
        try:
            start_time = time()
            if row['deleted']:
                try:
                    headers = {'x-timestamp': row['created_at']}
                    if realm and realm_key:
                        nonce = uuid.uuid4().hex
                        path = urlparse(sync_to).path + '/' + quote(
                            row['name'])
                        sig = self.realms_conf.get_sig('DELETE', path,
                                                       headers['x-timestamp'],
                                                       nonce, realm_key,
                                                       user_key)
                        headers['x-container-sync-auth'] = '%s %s %s' % (
                            realm, nonce, sig)
                    else:
                        headers['x-container-sync-key'] = user_key
                    delete_object(sync_to,
                                  name=row['name'],
                                  headers=headers,
                                  proxy=self.select_http_proxy(),
                                  logger=self.logger,
                                  timeout=self.conn_timeout)
                except ClientException as err:
                    if err.http_status != HTTP_NOT_FOUND:
                        raise
                self.container_deletes += 1
                self.logger.increment('deletes')
                self.logger.timing_since('deletes.timing', start_time)
            else:
                part, nodes = \
                    self.get_object_ring(info['storage_policy_index']). \
                    get_nodes(info['account'], info['container'],
                              row['name'])
                shuffle(nodes)
                exc = None
                looking_for_timestamp = Timestamp(row['created_at'])
                timestamp = -1
                headers = body = None
                # look up for the newest one
                headers_out = {
                    'X-Newest':
                    True,
                    'X-Backend-Storage-Policy-Index':
                    str(info['storage_policy_index'])
                }
                try:
                    source_obj_status, source_obj_info, source_obj_iter = \
                        self.swift.get_object(info['account'],
                                              info['container'], row['name'],
                                              headers=headers_out,
                                              acceptable_statuses=(2, 4))

                except (Exception, UnexpectedResponse, Timeout) as err:
                    source_obj_info = {}
                    source_obj_iter = None
                    exc = err
                timestamp = Timestamp(source_obj_info.get('x-timestamp', 0))
                headers = source_obj_info
                body = source_obj_iter
                if timestamp < looking_for_timestamp:
                    if exc:
                        raise exc
                    raise Exception(
                        _('Unknown exception trying to GET: '
                          '%(account)r %(container)r %(object)r'), {
                              'account': info['account'],
                              'container': info['container'],
                              'object': row['name']
                          })
                for key in ('date', 'last-modified'):
                    if key in headers:
                        del headers[key]
                if 'etag' in headers:
                    headers['etag'] = headers['etag'].strip('"')
                if 'content-type' in headers:
                    headers['content-type'] = clean_content_type(
                        headers['content-type'])
                headers['x-timestamp'] = row['created_at']
                if realm and realm_key:
                    nonce = uuid.uuid4().hex
                    path = urlparse(sync_to).path + '/' + quote(row['name'])
                    sig = self.realms_conf.get_sig('PUT', path,
                                                   headers['x-timestamp'],
                                                   nonce, realm_key, user_key)
                    headers['x-container-sync-auth'] = '%s %s %s' % (
                        realm, nonce, sig)
                else:
                    headers['x-container-sync-key'] = user_key
                put_object(sync_to,
                           name=row['name'],
                           headers=headers,
                           contents=FileLikeIter(body),
                           proxy=self.select_http_proxy(),
                           logger=self.logger,
                           timeout=self.conn_timeout)
                self.container_puts += 1
                self.logger.increment('puts')
                self.logger.timing_since('puts.timing', start_time)
        except ClientException as err:
            if err.http_status == HTTP_UNAUTHORIZED:
                self.logger.info(
                    _('Unauth %(sync_from)r => %(sync_to)r'), {
                        'sync_from':
                        '%s/%s' %
                        (quote(info['account']), quote(info['container'])),
                        'sync_to':
                        sync_to
                    })
            elif err.http_status == HTTP_NOT_FOUND:
                self.logger.info(
                    _('Not found %(sync_from)r => %(sync_to)r \
                      - object %(obj_name)r'), {
                        'sync_from':
                        '%s/%s' %
                        (quote(info['account']), quote(info['container'])),
                        'sync_to':
                        sync_to,
                        'obj_name':
                        row['name']
                    })
            else:
                self.logger.exception(_('ERROR Syncing %(db_file)s %(row)s'), {
                    'db_file': str(broker),
                    'row': row
                })
            self.container_failures += 1
            self.logger.increment('failures')
            return False
        except (Exception, Timeout) as err:
            self.logger.exception(_('ERROR Syncing %(db_file)s %(row)s'), {
                'db_file': str(broker),
                'row': row
            })
            self.container_failures += 1
            self.logger.increment('failures')
            return False
        return True
Пример #44
0
        for var in 'LC_ALL', 'SWIFT_LOCALEDIR', 'LANGUAGE':
            self.orig_env[var] = os.environ.get(var)
        os.environ['LC_ALL'] = 'eo'
        os.environ['SWIFT_LOCALEDIR'] = os.path.dirname(__file__)
        os.environ['LANGUAGE'] = ''
        self.orig_stop = threading._DummyThread._Thread__stop
        # See http://stackoverflow.com/questions/13193278/\
        #     understand-python-threading-bug
        threading._DummyThread._Thread__stop = lambda x: 42

    def tearDown(self):
        for var, val in self.orig_env.items():
            if val is not None:
                os.environ[var] = val
            else:
                del os.environ[var]
        threading._DummyThread._Thread__stop = self.orig_stop

    def test_translations(self):
        path = ':'.join(sys.path)
        translated_message = check_output(['python', __file__, path])
        self.assertEqual(translated_message, 'prova mesaĝo\n')


if __name__ == "__main__":
    os.environ['LC_ALL'] = 'eo'
    os.environ['SWIFT_LOCALEDIR'] = os.path.dirname(__file__)
    sys.path = sys.argv[1].split(':')
    from swift import gettext_ as _
    print(_('test message'))
Пример #45
0
    def container_sync(self, path):
        """
        Checks the given path for a container database, determines if syncing
        is turned on for that database and, if so, sends any updates to the
        other container.

        :param path: the path to a container db
        """
        broker = None
        try:
            broker = ContainerBroker(path)
            # The path we pass to the ContainerBroker is a real path of
            # a container DB. If we get here, however, it means that this
            # path is linked from the sync_containers dir. In rare cases
            # of race or processes failures the link can be stale and
            # the get_info below will raise a DB doesn't exist exception
            # In this case we remove the stale link and raise an error
            # since in most cases the db should be there.
            try:
                info = broker.get_info()
            except DatabaseConnectionError as db_err:
                if str(db_err).endswith("DB doesn't exist"):
                    self.sync_store.remove_synced_container(broker)
                raise

            x, nodes = self.container_ring.get_nodes(info['account'],
                                                     info['container'])
            for ordinal, node in enumerate(nodes):
                if is_local_device(self._myips, self._myport, node['ip'],
                                   node['port']):
                    break
            else:
                return
            if not broker.is_deleted():
                sync_to = None
                user_key = None
                sync_point1 = info['x_container_sync_point1']
                sync_point2 = info['x_container_sync_point2']
                for key, (value, timestamp) in broker.metadata.items():
                    if key.lower() == 'x-container-sync-to':
                        sync_to = value
                    elif key.lower() == 'x-container-sync-key':
                        user_key = value
                if not sync_to or not user_key:
                    self.container_skips += 1
                    self.logger.increment('skips')
                    return
                err, sync_to, realm, realm_key = validate_sync_to(
                    sync_to, self.allowed_sync_hosts, self.realms_conf)
                if err:
                    self.logger.info(
                        _('ERROR %(db_file)s: %(validate_sync_to_err)s'), {
                            'db_file': str(broker),
                            'validate_sync_to_err': err
                        })
                    self.container_failures += 1
                    self.logger.increment('failures')
                    return
                stop_at = time() + self.container_time
                next_sync_point = None
                while time() < stop_at and sync_point2 < sync_point1:
                    rows = broker.get_items_since(sync_point2, 1)
                    if not rows:
                        break
                    row = rows[0]
                    if row['ROWID'] > sync_point1:
                        break
                    key = hash_path(info['account'],
                                    info['container'],
                                    row['name'],
                                    raw_digest=True)
                    # This node will only initially sync out one third of the
                    # objects (if 3 replicas, 1/4 if 4, etc.) and will skip
                    # problematic rows as needed in case of faults.
                    # This section will attempt to sync previously skipped
                    # rows in case the previous attempts by any of the nodes
                    # didn't succeed.
                    if not self.container_sync_row(row, sync_to, user_key,
                                                   broker, info, realm,
                                                   realm_key):
                        if not next_sync_point:
                            next_sync_point = sync_point2
                    sync_point2 = row['ROWID']
                    broker.set_x_container_sync_points(None, sync_point2)
                if next_sync_point:
                    broker.set_x_container_sync_points(None, next_sync_point)
                while time() < stop_at:
                    rows = broker.get_items_since(sync_point1, 1)
                    if not rows:
                        break
                    row = rows[0]
                    key = hash_path(info['account'],
                                    info['container'],
                                    row['name'],
                                    raw_digest=True)
                    # This node will only initially sync out one third of the
                    # objects (if 3 replicas, 1/4 if 4, etc.). It'll come back
                    # around to the section above and attempt to sync
                    # previously skipped rows in case the other nodes didn't
                    # succeed or in case it failed to do so the first time.
                    if unpack_from('>I', key)[0] % \
                            len(nodes) == ordinal:
                        self.container_sync_row(row, sync_to, user_key, broker,
                                                info, realm, realm_key)
                    sync_point1 = row['ROWID']
                    broker.set_x_container_sync_points(sync_point1, None)
                self.container_syncs += 1
                self.logger.increment('syncs')
        except (Exception, Timeout):
            self.container_failures += 1
            self.logger.increment('failures')
            self.logger.exception(_('ERROR Syncing %s'),
                                  broker if broker else path)
Пример #46
0
 def audit_all_objects(self, mode='once'):
     self.logger.info(
         _('Begin object audit "%s" mode (%s)') % (mode, self.auditor_type))
     begin = reported = time.time()
     self.total_bytes_processed = 0
     self.total_files_processed = 0
     total_quarantines = 0
     total_errors = 0
     time_auditing = 0
     all_locs = audit_location_generator(
         self.devices,
         diskfile.DATADIR,
         '.data',
         mount_check=self.diskfile_mgr.mount_check,
         logger=self.logger)
     for path, device, partition in all_locs:
         loop_time = time.time()
         self.failsafe_object_audit(path, device, partition)
         self.logger.timing_since('timing', loop_time)
         self.files_running_time = ratelimit_sleep(
             self.files_running_time, self.max_files_per_second)
         self.total_files_processed += 1
         now = time.time()
         if now - reported >= self.log_time:
             self.logger.info(
                 _('Object audit (%(type)s). '
                   'Since %(start_time)s: Locally: %(passes)d passed, '
                   '%(quars)d quarantined, %(errors)d errors '
                   'files/sec: %(frate).2f , bytes/sec: %(brate).2f, '
                   'Total time: %(total).2f, Auditing time: %(audit).2f, '
                   'Rate: %(audit_rate).2f') % {
                       'type': self.auditor_type,
                       'start_time': time.ctime(reported),
                       'passes': self.passes,
                       'quars': self.quarantines,
                       'errors': self.errors,
                       'frate': self.passes / (now - reported),
                       'brate': self.bytes_processed / (now - reported),
                       'total': (now - begin),
                       'audit': time_auditing,
                       'audit_rate': time_auditing / (now - begin)
                   })
             dump_recon_cache(
                 {
                     'object_auditor_stats_%s' % self.auditor_type: {
                         'errors': self.errors,
                         'passes': self.passes,
                         'quarantined': self.quarantines,
                         'bytes_processed': self.bytes_processed,
                         'start_time': reported,
                         'audit_time': time_auditing
                     }
                 }, self.rcache, self.logger)
             reported = now
             total_quarantines += self.quarantines
             total_errors += self.errors
             self.passes = 0
             self.quarantines = 0
             self.errors = 0
             self.bytes_processed = 0
         time_auditing += (now - loop_time)
     # Avoid divide by zero during very short runs
     elapsed = (time.time() - begin) or 0.000001
     self.logger.info(
         _('Object audit (%(type)s) "%(mode)s" mode '
           'completed: %(elapsed).02fs. Total quarantined: %(quars)d, '
           'Total errors: %(errors)d, Total files/sec: %(frate).2f , '
           'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, '
           'Rate: %(audit_rate).2f') % {
               'type': self.auditor_type,
               'mode': mode,
               'elapsed': elapsed,
               'quars': total_quarantines,
               'errors': total_errors,
               'frate': self.total_files_processed / elapsed,
               'brate': self.total_bytes_processed / elapsed,
               'audit': time_auditing,
               'audit_rate': time_auditing / elapsed
           })
     if self.stats_sizes:
         self.logger.info(
             _('Object audit stats: %s') % json.dumps(self.stats_buckets))
Пример #47
0
    def stats_line(self):
        """
        Logs various stats for the currently running reconstruction pass.
        """
        if (self.device_count and self.part_count
                and self.reconstruction_device_count):
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.reconstruction_part_count / elapsed
            total_part_count = (self.part_count * self.device_count /
                                self.reconstruction_device_count)
            self.logger.info(
                _("%(reconstructed)d/%(total)d (%(percentage).2f%%)"
                  " partitions of %(device)d/%(dtotal)d "
                  "(%(dpercentage).2f%%) devices"
                  " reconstructed in %(time).2fs "
                  "(%(rate).2f/sec, %(remaining)s remaining)"), {
                      'reconstructed':
                      self.reconstruction_part_count,
                      'total':
                      self.part_count,
                      'percentage':
                      self.reconstruction_part_count * 100.0 / self.part_count,
                      'device':
                      self.reconstruction_device_count,
                      'dtotal':
                      self.device_count,
                      'dpercentage':
                      self.reconstruction_device_count * 100.0 /
                      self.device_count,
                      'time':
                      time.time() - self.start,
                      'rate':
                      rate,
                      'remaining':
                      '%d%s' %
                      compute_eta(self.start, self.reconstruction_part_count,
                                  total_part_count)
                  })

            if self.suffix_count and self.partition_times:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"), {
                          'checked': self.suffix_count,
                          'hashed':
                          (self.suffix_hash * 100.0) / self.suffix_count,
                          'synced':
                          (self.suffix_sync * 100.0) / self.suffix_count
                      })
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"), {
                          'max': self.partition_times[-1],
                          'min': self.partition_times[0],
                          'med':
                          self.partition_times[len(self.partition_times) // 2]
                      })
        else:
            self.logger.info(_("Nothing reconstructed for %s seconds."),
                             (time.time() - self.start))
Пример #48
0
    def _replicate_object(self, partition, object_file, node_id):
        """
        Replicate the db, choosing method based on whether or not it
        already exists on peers.

        :param partition: partition to be replicated to
        :param object_file: DB file name to be replicated
        :param node_id: node id of the node to be replicated to
        """
        start_time = now = time.time()
        self.logger.debug(_('Replicating db %s'), object_file)
        self.stats['attempted'] += 1
        self.logger.increment('attempts')
        shouldbehere = True
        try:
            broker = self.brokerclass(object_file, pending_timeout=30)
            broker.reclaim(now - self.reclaim_age,
                           now - (self.reclaim_age * 2))
            info = broker.get_replication_info()
            full_info = broker.get_info()
            bpart = self.ring.get_part(
                full_info['account'], full_info.get('container'))
            if bpart != int(partition):
                partition = bpart
                # Important to set this false here since the later check only
                # checks if it's on the proper device, not partition.
                shouldbehere = False
                name = '/' + quote(full_info['account'])
                if 'container' in full_info:
                    name += '/' + quote(full_info['container'])
                self.logger.error(
                    'Found %s for %s when it should be on partition %s; will '
                    'replicate out and remove.' % (object_file, name, bpart))
        except (Exception, Timeout) as e:
            if 'no such table' in str(e):
                self.logger.error(_('Quarantining DB %s'), object_file)
                quarantine_db(broker.db_file, broker.db_type)
            else:
                self.logger.exception(_('ERROR reading db %s'), object_file)
            self.stats['failure'] += 1
            self.logger.increment('failures')
            return
        # The db is considered deleted if the delete_timestamp value is greater
        # than the put_timestamp, and there are no objects.
        delete_timestamp = 0
        try:
            delete_timestamp = float(info['delete_timestamp'])
        except ValueError:
            pass
        put_timestamp = 0
        try:
            put_timestamp = float(info['put_timestamp'])
        except ValueError:
            pass
        if delete_timestamp < (now - self.reclaim_age) and \
                delete_timestamp > put_timestamp and \
                info['count'] in (None, '', 0, '0'):
            if self.report_up_to_date(full_info):
                self.delete_db(object_file)
            self.logger.timing_since('timing', start_time)
            return
        responses = []
        nodes = self.ring.get_part_nodes(int(partition))
        if shouldbehere:
            shouldbehere = bool([n for n in nodes if n['id'] == node_id])
        # See Footnote [1] for an explanation of the repl_nodes assignment.
        i = 0
        while i < len(nodes) and nodes[i]['id'] != node_id:
            i += 1
        repl_nodes = nodes[i + 1:] + nodes[:i]
        more_nodes = self.ring.get_more_nodes(int(partition))
        for node in repl_nodes:
            success = False
            try:
                success = self._repl_to_node(node, broker, partition, info)
            except DriveNotMounted:
                repl_nodes.append(more_nodes.next())
                self.logger.error(_('ERROR Remote drive not mounted %s'), node)
            except (Exception, Timeout):
                self.logger.exception(_('ERROR syncing %(file)s with node'
                                        ' %(node)s'),
                                      {'file': object_file, 'node': node})
            self.stats['success' if success else 'failure'] += 1
            self.logger.increment('successes' if success else 'failures')
            responses.append(success)
        if not shouldbehere and all(responses):
            # If the db shouldn't be on this node and has been successfully
            # synced to all of its peers, it can be removed.
            self.delete_db(object_file)
        self.logger.timing_since('timing', start_time)
Пример #49
0
 def index_page(self,
                log_files=None,
                sort='time',
                limit=-1,
                fulldirs=0,
                nfl_filter='',
                profile_id='current',
                url='#'):
     headers = [('content-type', 'text/html')]
     if len(log_files) == 0:
         return empty_description, headers
     try:
         stats = Stats2(*log_files)
     except (IOError, ValueError):
         raise DataLoadFailure(
             _('Can not load profile data from %s.') % log_files)
     if not fulldirs:
         stats.strip_dirs()
     stats.sort_stats(sort)
     nfl_filter_esc =\
         nfl_filter.replace('(', '\(').replace(')', '\)')
     amount = [nfl_filter_esc, limit] if nfl_filter_esc else [limit]
     profile_html = self.generate_stats_html(stats, self.app_path,
                                             profile_id, *amount)
     description = "Profiling information is generated by using\
                   '%s' profiler." % self.profile_module
     sort_repl = '<option value="%s">' % sort
     sort_selected = '<option value="%s" selected>' % sort
     sort = sort_tmpl.replace(sort_repl, sort_selected)
     plist = ''.join([
         '<option value="%s">%s</option>' % (p, p)
         for p in self.profile_log.get_all_pids()
     ])
     profile_element = string.Template(profile_tmpl).substitute(
         {'profile_list': plist})
     profile_repl = '<option value="%s">' % profile_id
     profile_selected = '<option value="%s" selected>' % profile_id
     profile_element = profile_element.replace(profile_repl,
                                               profile_selected)
     limit_repl = '<option value="%s">' % limit
     limit_selected = '<option value="%s" selected>' % limit
     limit = limit_tmpl.replace(limit_repl, limit_selected)
     fulldirs_checked = 'checked' if fulldirs else ''
     fulldirs_element = string.Template(fulldirs_tmpl).substitute(
         {'fulldir_checked': fulldirs_checked})
     nfl_filter_element = string.Template(nfl_filter_tmpl).\
         substitute({'nfl_filter': nfl_filter})
     form_elements = string.Template(formelements_tmpl).substitute({
         'description':
         description,
         'action':
         url,
         'profile':
         profile_element,
         'sort':
         sort,
         'limit':
         limit,
         'fulldirs':
         fulldirs_element,
         'nfl_filter':
         nfl_filter_element,
     })
     content = string.Template(index_tmpl).substitute({
         'formelements':
         form_elements,
         'action':
         url,
         'description':
         description,
         'profilehtml':
         profile_html,
     })
     return content, headers
    def build_replication_jobs(self,
                               policy,
                               ips,
                               override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        self.all_devs_info.update([(dev['replication_ip'], dev['device'])
                                   for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [
                dev for dev in policy.object_ring.devs if
            (dev and is_local_device(ips, self.port, dev['replication_ip'],
                                     dev['replication_port']) and
             (override_devices is None or dev['device'] in override_devices))
        ]:
            found_local = True
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self._add_failure_stats([
                    (failure_dev['replication_ip'], failure_dev['device'])
                    for failure_dev in policy.object_ring.devs if failure_dev
                ])
                self.logger.warning(_('%s is not mounted'),
                                    local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [
                        node for node in part_nodes
                        if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    if part_nodes:
                        self._add_failure_stats([
                            (failure_dev['replication_ip'],
                             failure_dev['device']) for failure_dev in nodes
                        ])
                    else:
                        self._add_failure_stats([
                            (failure_dev['replication_ip'],
                             failure_dev['device'])
                            for failure_dev in policy.object_ring.devs
                            if failure_dev
                        ])
                    continue
        if not found_local:
            self.logger.error(
                "Can't find itself in policy with index %d with"
                " ips %s and with port %s in ring file, not"
                " replicating", int(policy), ", ".join(ips), self.port)
        return jobs
Пример #51
0
 def run_forever(self, *args, **kwargs):
     """
     Run the updater continuously.
     """
     time.sleep(random() * self.interval)
     while True:
         self.logger.info(_('Begin container update sweep'))
         begin = time.time()
         now = time.time()
         expired_suppressions = \
             [a for a, u in self.account_suppressions.items()
              if u < now]
         for account in expired_suppressions:
             del self.account_suppressions[account]
         pid2filename = {}
         # read from account ring to ensure it's fresh
         self.get_account_ring().get_nodes('')
         for path in self.get_paths():
             while len(pid2filename) >= self.concurrency:
                 pid = os.wait()[0]
                 try:
                     self._load_suppressions(pid2filename[pid])
                 finally:
                     del pid2filename[pid]
             fd, tmpfilename = mkstemp()
             os.close(fd)
             pid = os.fork()
             if pid:
                 pid2filename[pid] = tmpfilename
             else:
                 signal.signal(signal.SIGTERM, signal.SIG_DFL)
                 eventlet_monkey_patch()
                 self.no_changes = 0
                 self.successes = 0
                 self.failures = 0
                 self.new_account_suppressions = open(tmpfilename, 'w')
                 forkbegin = time.time()
                 self.container_sweep(path)
                 elapsed = time.time() - forkbegin
                 self.logger.debug(
                     _('Container update sweep of %(path)s completed: '
                       '%(elapsed).02fs, %(success)s successes, %(fail)s '
                       'failures, %(no_change)s with no changes'), {
                           'path': path,
                           'elapsed': elapsed,
                           'success': self.successes,
                           'fail': self.failures,
                           'no_change': self.no_changes
                       })
                 sys.exit()
         while pid2filename:
             pid = os.wait()[0]
             try:
                 self._load_suppressions(pid2filename[pid])
             finally:
                 del pid2filename[pid]
         elapsed = time.time() - begin
         self.logger.info(_('Container update sweep completed: %.02fs'),
                          elapsed)
         dump_recon_cache({'container_updater_sweep': elapsed}, self.rcache,
                          self.logger)
         if elapsed < self.interval:
             time.sleep(self.interval - elapsed)
Пример #52
0
    def handle_request(self, req):
        """
        Entry point for proxy server.
        Should return a WSGI-style callable (such as swob.Response).

        :param req: swob.Request object
        """
        try:
            self.logger.set_statsd_prefix('proxy-server')
            if req.content_length and req.content_length < 0:
                self.logger.increment('errors')
                return HTTPBadRequest(request=req,
                                      body='Invalid Content-Length')

            try:
                if not check_utf8(req.path_info):
                    self.logger.increment('errors')
                    return HTTPPreconditionFailed(
                        request=req, body='Invalid UTF8 or contains NULL')
            except UnicodeError:
                self.logger.increment('errors')
                return HTTPPreconditionFailed(
                    request=req, body='Invalid UTF8 or contains NULL')

            try:
                controller, path_parts = self.get_controller(req)
                p = req.path_info
                if isinstance(p, six.text_type):
                    p = p.encode('utf-8')
            except APIVersionError:
                self.logger.increment('errors')
                return HTTPBadRequest(request=req)
            except ValueError:
                self.logger.increment('errors')
                return HTTPNotFound(request=req)
            if not controller:
                self.logger.increment('errors')
                return HTTPPreconditionFailed(request=req, body='Bad URL')
            if self.deny_host_headers and \
                    req.host.split(':')[0] in self.deny_host_headers:
                return HTTPForbidden(request=req, body='Invalid host header')

            self.logger.set_statsd_prefix('proxy-server.' +
                                          controller.server_type.lower())
            controller = controller(self, **path_parts)
            if 'swift.trans_id' not in req.environ:
                # if this wasn't set by an earlier middleware, set it now
                trans_id_suffix = self.trans_id_suffix
                trans_id_extra = req.headers.get('x-trans-id-extra')
                if trans_id_extra:
                    trans_id_suffix += '-' + trans_id_extra[:32]
                trans_id = generate_trans_id(trans_id_suffix)
                req.environ['swift.trans_id'] = trans_id
                self.logger.txn_id = trans_id
            req.headers['x-trans-id'] = req.environ['swift.trans_id']
            controller.trans_id = req.environ['swift.trans_id']
            self.logger.client_ip = get_remote_client(req)

            if req.method not in controller.allowed_methods:
                return HTTPMethodNotAllowed(request=req, headers={
                    'Allow': ', '.join(controller.allowed_methods)})
            handler = getattr(controller, req.method)

            old_authorize = None
            if 'swift.authorize' in req.environ:
                # We call authorize before the handler, always. If authorized,
                # we remove the swift.authorize hook so isn't ever called
                # again. If not authorized, we return the denial unless the
                # controller's method indicates it'd like to gather more
                # information and try again later.
                resp = req.environ['swift.authorize'](req)
                if not resp:
                    # No resp means authorized, no delayed recheck required.
                    old_authorize = req.environ['swift.authorize']
                else:
                    # Response indicates denial, but we might delay the denial
                    # and recheck later. If not delayed, return the error now.
                    if not getattr(handler, 'delay_denial', None):
                        return resp
            # Save off original request method (GET, POST, etc.) in case it
            # gets mutated during handling.  This way logging can display the
            # method the client actually sent.
            req.environ.setdefault('swift.orig_req_method', req.method)
            try:
                if old_authorize:
                    req.environ.pop('swift.authorize', None)
                return handler(req)
            finally:
                if old_authorize:
                    req.environ['swift.authorize'] = old_authorize
        except HTTPException as error_response:
            return error_response
        except (Exception, Timeout):
            self.logger.exception(_('ERROR Unhandled exception in request'))
            return HTTPServerError(request=req)
Пример #53
0
 def __call__(self, env, start_response):
     if not self.storage_domain:
         return self.app(env, start_response)
     if 'HTTP_HOST' in env:
         given_domain = env['HTTP_HOST']
     else:
         given_domain = env['SERVER_NAME']
     port = ''
     if ':' in given_domain:
         given_domain, port = given_domain.rsplit(':', 1)
     if is_ip(given_domain):
         return self.app(env, start_response)
     a_domain = given_domain
     if not self._domain_endswith_in_storage_domain(a_domain):
         if self.memcache is None:
             self.memcache = cache_from_env(env)
         error = True
         for tries in xrange(self.lookup_depth):
             found_domain = None
             if self.memcache:
                 memcache_key = ''.join(['cname-', a_domain])
                 found_domain = self.memcache.get(memcache_key)
             if not found_domain:
                 ttl, found_domain = lookup_cname(a_domain)
                 if self.memcache:
                     memcache_key = ''.join(['cname-', given_domain])
                     self.memcache.set(memcache_key, found_domain,
                                       time=ttl)
             if found_domain is None or found_domain == a_domain:
                 # no CNAME records or we're at the last lookup
                 error = True
                 found_domain = None
                 break
             elif self._domain_endswith_in_storage_domain(found_domain):
                 # Found it!
                 self.logger.info(
                     _('Mapped %(given_domain)s to %(found_domain)s') %
                     {'given_domain': given_domain,
                      'found_domain': found_domain})
                 if port:
                     env['HTTP_HOST'] = ':'.join([found_domain, port])
                 else:
                     env['HTTP_HOST'] = found_domain
                 error = False
                 break
             else:
                 # try one more deep in the chain
                 self.logger.debug(
                     _('Following CNAME chain for  '
                       '%(given_domain)s to %(found_domain)s') %
                     {'given_domain': given_domain,
                      'found_domain': found_domain})
                 a_domain = found_domain
         if error:
             if found_domain:
                 msg = 'CNAME lookup failed after %d tries' % \
                     self.lookup_depth
             else:
                 msg = 'CNAME lookup failed to resolve to a valid domain'
             resp = HTTPBadRequest(request=Request(env), body=msg,
                                   content_type='text/plain')
             return resp(env, start_response)
     return self.app(env, start_response)
Пример #54
0
    def account_update(self, req, account, container, broker):
        """
        Update the account server(s) with latest container info.

        :param req: swob.Request object
        :param account: account name
        :param container: container name
        :param broker: container DB broker object
        :returns: if all the account requests return a 404 error code,
                  HTTPNotFound response object,
                  if the account cannot be updated due to a malformed header,
                  an HTTPBadRequest response object,
                  otherwise None.
        """
        account_hosts = [h.strip() for h in
                         req.headers.get('X-Account-Host', '').split(',')]
        account_devices = [d.strip() for d in
                           req.headers.get('X-Account-Device', '').split(',')]
        account_partition = req.headers.get('X-Account-Partition', '')

        if len(account_hosts) != len(account_devices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(_('ERROR Account update failed: different  '
                                'numbers of hosts and devices in request: '
                                '"%s" vs "%s"') %
                               (req.headers.get('X-Account-Host', ''),
                                req.headers.get('X-Account-Device', '')))
            return HTTPBadRequest(req=req)

        if account_partition:
            updates = zip(account_hosts, account_devices)
        else:
            updates = []

        account_404s = 0

        for account_host, account_device in updates:
            account_ip, account_port = account_host.rsplit(':', 1)
            new_path = '/' + '/'.join([account, container])
            info = broker.get_info()
            account_headers = HeaderKeyDict({
                'x-put-timestamp': info['put_timestamp'],
                'x-delete-timestamp': info['delete_timestamp'],
                'x-object-count': info['object_count'],
                'x-bytes-used': info['bytes_used'],
                'x-trans-id': req.headers.get('x-trans-id', '-'),
                'X-Backend-Storage-Policy-Index': info['storage_policy_index'],
                'user-agent': 'container-server %s' % os.getpid(),
                'referer': req.as_referer()})
            if req.headers.get('x-account-override-deleted', 'no').lower() == \
                    'yes':
                account_headers['x-account-override-deleted'] = 'yes'
            try:
                with ConnectionTimeout(self.conn_timeout):
                    conn = http_connect(
                        account_ip, account_port, account_device,
                        account_partition, 'PUT', new_path, account_headers)
                with Timeout(self.node_timeout):
                    account_response = conn.getresponse()
                    account_response.read()
                    if account_response.status == HTTP_NOT_FOUND:
                        account_404s += 1
                    elif not is_success(account_response.status):
                        self.logger.error(_(
                            'ERROR Account update failed '
                            'with %(ip)s:%(port)s/%(device)s (will retry '
                            'later): Response %(status)s %(reason)s'),
                            {'ip': account_ip, 'port': account_port,
                             'device': account_device,
                             'status': account_response.status,
                             'reason': account_response.reason})
            except (Exception, Timeout):
                self.logger.exception(_(
                    'ERROR account update failed with '
                    '%(ip)s:%(port)s/%(device)s (will retry later)'),
                    {'ip': account_ip, 'port': account_port,
                     'device': account_device})
        if updates and account_404s == len(updates):
            return HTTPNotFound(req=req)
        else:
            return None
Пример #55
0
    def object_audit(self, path, device, partition):
        """
        Audits the given object path.

        :param path: a path to an object
        :param device: the device the path is on
        :param partition: the partition the path is on
        """
        try:
            try:
                name = diskfile.read_metadata(path)['name']
            except (Exception, Timeout) as exc:
                raise AuditException('Error when reading metadata: %s' % exc)
            _junk, account, container, obj = name.split('/', 3)
            df = self.diskfile_mgr.get_diskfile(device, partition, account,
                                                container, obj)
            try:
                with df.open():
                    metadata = df.get_metadata()
                    obj_size = int(metadata['Content-Length'])
                    if self.stats_sizes:
                        self.record_stats(obj_size)
                    if self.zero_byte_only_at_fps and obj_size:
                        self.passes += 1
                        return
                    reader = df.reader()
                with closing(reader):
                    for chunk in reader:
                        chunk_len = len(chunk)
                        self.bytes_running_time = ratelimit_sleep(
                            self.bytes_running_time,
                            self.max_bytes_per_second,
                            incr_by=chunk_len)
                        self.bytes_processed += chunk_len
                        self.total_bytes_processed += chunk_len
                if reader.was_quarantined:
                    self.quarantines += 1
                    self.logger.error(
                        _('ERROR Object %(obj)s failed audit and'
                          ' was quarantined: %(err)s'), {
                              'obj': path,
                              'err': reader.was_quarantined
                          })
                    return
            except DiskFileNotExist:
                return
        except DiskFileQuarantined as err:
            self.quarantines += 1
            self.logger.error(
                _('ERROR Object %(obj)s failed audit and was'
                  ' quarantined: %(err)s'), {
                      'obj': path,
                      'err': err
                  })
        except AuditException as err:
            self.logger.increment('quarantines')
            self.quarantines += 1
            self.logger.error(
                _('ERROR Object %(obj)s failed audit and will'
                  ' be quarantined: %(err)s'), {
                      'obj': path,
                      'err': err
                  })
            diskfile.quarantine_renamer(os.path.join(self.devices, device),
                                        path)
            return
        self.passes += 1
Пример #56
0
    def collect_parts(self, override_devices=None,
                      override_partitions=None):
        """
        Helper for yielding partitions in the top level reconstructor
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            if policy.policy_type != EC_POLICY:
                continue
            self._diskfile_mgr = self._df_router[policy]
            self.load_object_ring(policy)
            data_dir = get_data_dir(policy)
            local_devices = list(six.moves.filter(
                lambda dev: dev and is_local_device(
                    ips, self.port,
                    dev['replication_ip'], dev['replication_port']),
                policy.object_ring.devs))

            if override_devices:
                self.device_count = len(override_devices)
            else:
                self.device_count = len(local_devices)

            for local_dev in local_devices:
                if override_devices and (local_dev['device'] not in
                                         override_devices):
                    continue
                self.reconstruction_device_count += 1
                dev_path = self._df_router[policy].get_dev_path(
                    local_dev['device'])
                if not dev_path:
                    self.logger.warn(_('%s is not mounted'),
                                     local_dev['device'])
                    continue
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() -
                                  self.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception(
                            'Unable to create %s' % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception(
                        'Unable to list partitions in %r' % obj_path)
                    continue

                self.part_count += len(partitions)
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if not (partition.isdigit() and
                            os.path.isdir(part_path)):
                        self.logger.warning(
                            'Unexpected entity in data dir: %r' % part_path)
                        remove_file(part_path)
                        self.reconstruction_part_count += 1
                        continue
                    partition = int(partition)
                    if override_partitions and (partition not in
                                                override_partitions):
                        continue
                    part_info = {
                        'local_dev': local_dev,
                        'policy': policy,
                        'partition': partition,
                        'part_path': part_path,
                    }
                    yield part_info
                    self.reconstruction_part_count += 1
Пример #57
0
    def _get_suffixes_to_sync(self, job, node):
        """
        For SYNC jobs we need to make a remote REPLICATE request to get
        the remote node's current suffix's hashes and then compare to our
        local suffix's hashes to decide which suffixes (if any) are out
        of sync.

        :param: the job dict, with the keys defined in ``_get_part_jobs``
        :param node: the remote node dict
        :returns: a (possibly empty) list of strings, the suffixes to be
                  synced with the remote node.
        """
        # get hashes from the remote node
        remote_suffixes = None
        try:
            with Timeout(self.http_timeout):
                resp = http_connect(
                    node['replication_ip'], node['replication_port'],
                    node['device'], job['partition'], 'REPLICATE',
                    '', headers=self.headers).getresponse()
            if resp.status == HTTP_INSUFFICIENT_STORAGE:
                self.logger.error(
                    _('%s responded as unmounted'),
                    self._full_path(node, job['partition'], '',
                                    job['policy']))
            elif resp.status != HTTP_OK:
                full_path = self._full_path(node, job['partition'], '',
                                            job['policy'])
                self.logger.error(
                    _("Invalid response %(resp)s from %(full_path)s"),
                    {'resp': resp.status, 'full_path': full_path})
            else:
                remote_suffixes = pickle.loads(resp.read())
        except (Exception, Timeout):
            # all exceptions are logged here so that our caller can
            # safely catch our exception and continue to the next node
            # without logging
            self.logger.exception('Unable to get remote suffix hashes '
                                  'from %r' % self._full_path(
                                      node, job['partition'], '',
                                      job['policy']))

        if remote_suffixes is None:
            raise SuffixSyncError('Unable to get remote suffix hashes')

        suffixes = self.get_suffix_delta(job['hashes'],
                                         job['frag_index'],
                                         remote_suffixes,
                                         node['index'])
        # now recalculate local hashes for suffixes that don't
        # match so we're comparing the latest
        local_suff = self._get_hashes(job['policy'], job['path'],
                                      recalculate=suffixes)

        suffixes = self.get_suffix_delta(local_suff,
                                         job['frag_index'],
                                         remote_suffixes,
                                         node['index'])

        self.suffix_count += len(suffixes)
        return suffixes
Пример #58
0
    def run_once(self, *args, **kwargs):
        """Run a replication pass once."""
        override_options = parse_override_options(once=True, **kwargs)

        devices_to_replicate = override_options.devices or Everything()
        partitions_to_replicate = override_options.partitions or Everything()

        self._zero_stats()
        dirs = []
        ips = whataremyips(self.bind_ip)
        if not ips:
            self.logger.error(_('ERROR Failed to get my own IPs?'))
            return

        if self.handoffs_only:
            self.logger.warning(
                'Starting replication pass with handoffs_only enabled. '
                'This mode is not intended for normal '
                'operation; use handoffs_only with care.')

        self._local_device_ids = set()
        found_local = False
        for node in self.ring.devs:
            if node and is_local_device(ips, self.port, node['replication_ip'],
                                        node['replication_port']):
                found_local = True
                try:
                    dev_path = check_drive(self.root, node['device'],
                                           self.mount_check)
                except ValueError as err:
                    self._add_failure_stats([
                        (failure_dev['replication_ip'], failure_dev['device'])
                        for failure_dev in self.ring.devs if failure_dev
                    ])
                    self.logger.warning('Skipping: %s', err)
                    continue
                if node['device'] not in devices_to_replicate:
                    self.logger.debug(
                        'Skipping device %s due to given arguments',
                        node['device'])
                    continue
                unlink_older_than(os.path.join(dev_path, 'tmp'),
                                  time.time() - self.reclaim_age)
                datadir = os.path.join(self.root, node['device'], self.datadir)
                if os.path.isdir(datadir):
                    self._local_device_ids.add(node['id'])
                    part_filt = self._partition_dir_filter(
                        node['id'], partitions_to_replicate)
                    dirs.append((datadir, node['id'], part_filt))
        if not found_local:
            self.logger.error(
                "Can't find itself %s with port %s in ring "
                "file, not replicating", ", ".join(ips), self.port)
        self.logger.info(_('Beginning replication run'))
        for part, object_file, node_id in self.roundrobin_datadirs(dirs):
            self.cpool.spawn_n(self._replicate_object, part, object_file,
                               node_id)
        self.cpool.waitall()
        self.logger.info(_('Replication run OVER'))
        if self.handoffs_only:
            self.logger.warning(
                'Finished replication pass with handoffs_only enabled. '
                'If handoffs_only is no longer required, disable it.')
        self._report_stats()
Пример #59
0
    def audit_all_objects(self, mode='once', device_dirs=None):
        description = ''
        if device_dirs:
            device_dir_str = ','.join(sorted(device_dirs))
            if self.auditor_type == 'ALL':
                description = _(' - parallel, %s') % device_dir_str
            else:
                description = _(' - %s') % device_dir_str
        self.logger.info(
            _('Begin object audit "%(mode)s" mode (%(audi_type)s'
              '%(description)s)') % {
                  'mode': mode,
                  'audi_type': self.auditor_type,
                  'description': description
              })
        begin = reported = time.time()
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        total_quarantines = 0
        total_errors = 0
        time_auditing = 0
        # TODO: we should move audit-location generation to the storage policy,
        # as we may (conceivably) have a different filesystem layout for each.
        # We'd still need to generate the policies to audit from the actual
        # directories found on-disk, and have appropriate error reporting if we
        # find a directory that doesn't correspond to any known policy. This
        # will require a sizable refactor, but currently all diskfile managers
        # can find all diskfile locations regardless of policy -- so for now
        # just use Policy-0's manager.
        all_locs = (
            self.diskfile_router[POLICIES[0]].object_audit_location_generator(
                device_dirs=device_dirs, auditor_type=self.auditor_type))
        for location in all_locs:
            loop_time = time.time()
            self.failsafe_object_audit(location)
            self.logger.timing_since('timing', loop_time)
            self.files_running_time = ratelimit_sleep(
                self.files_running_time, self.max_files_per_second)
            self.total_files_processed += 1
            now = time.time()
            if now - self.last_logged >= self.log_time:
                self.logger.info(
                    _('Object audit (%(type)s). '
                      'Since %(start_time)s: Locally: %(passes)d passed, '
                      '%(quars)d quarantined, %(errors)d errors, '
                      'files/sec: %(frate).2f, bytes/sec: %(brate).2f, '
                      'Total time: %(total).2f, Auditing time: %(audit).2f, '
                      'Rate: %(audit_rate).2f') % {
                          'type': '%s%s' % (self.auditor_type, description),
                          'start_time': time.ctime(reported),
                          'passes': self.passes,
                          'quars': self.quarantines,
                          'errors': self.errors,
                          'frate': self.passes / (now - reported),
                          'brate': self.bytes_processed / (now - reported),
                          'total': (now - begin),
                          'audit': time_auditing,
                          'audit_rate': time_auditing / (now - begin)
                      })
                cache_entry = self.create_recon_nested_dict(
                    'object_auditor_stats_%s' % (self.auditor_type),
                    device_dirs, {
                        'errors': self.errors,
                        'passes': self.passes,
                        'quarantined': self.quarantines,
                        'bytes_processed': self.bytes_processed,
                        'start_time': reported,
                        'audit_time': time_auditing
                    })
                dump_recon_cache(cache_entry, self.rcache, self.logger)
                reported = now
                total_quarantines += self.quarantines
                total_errors += self.errors
                self.passes = 0
                self.quarantines = 0
                self.errors = 0
                self.bytes_processed = 0
                self.last_logged = now
            time_auditing += (now - loop_time)
        # Avoid divide by zero during very short runs
        elapsed = (time.time() - begin) or 0.000001
        self.logger.info(
            _('Object audit (%(type)s) "%(mode)s" mode '
              'completed: %(elapsed).02fs. Total quarantined: %(quars)d, '
              'Total errors: %(errors)d, Total files/sec: %(frate).2f, '
              'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, '
              'Rate: %(audit_rate).2f') % {
                  'type': '%s%s' % (self.auditor_type, description),
                  'mode': mode,
                  'elapsed': elapsed,
                  'quars': total_quarantines + self.quarantines,
                  'errors': total_errors + self.errors,
                  'frate': self.total_files_processed / elapsed,
                  'brate': self.total_bytes_processed / elapsed,
                  'audit': time_auditing,
                  'audit_rate': time_auditing / elapsed
              })
        if self.stats_sizes:
            self.logger.info(
                _('Object audit stats: %s') % json.dumps(self.stats_buckets))

        # Unset remaining partitions to not skip them in the next run
        diskfile.clear_auditor_status(self.devices, self.auditor_type)
    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """
        def tpool_get_suffixes(path):
            return [
                suff for suff in os.listdir(path)
                if len(suff) == 3 and isdir(join(path, suff))
            ]

        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'], ))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        failure_devs_info = set()
        begin = time.time()
        handoff_partition_deleted = False
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            synced_remote_regions = {}
            delete_objs = None
            if suffixes:
                for node in job['nodes']:
                    self.stats['rsync'] += 1
                    kwargs = {}
                    if node['region'] in synced_remote_regions and \
                            self.conf.get('sync_method', 'rsync') == 'ssync':
                        kwargs['remote_check_objs'] = \
                            synced_remote_regions[node['region']]
                    # candidates is a dict(hash=>timestamp) of objects
                    # for deletion
                    success, candidates = self.sync(node, job, suffixes,
                                                    **kwargs)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(node['replication_ip'],
                                                node['replication_port'],
                                                node['device'],
                                                job['partition'],
                                                'REPLICATE',
                                                '/' + '-'.join(suffixes),
                                                headers=headers)
                            conn.getresponse().read()
                        if node['region'] != job['region']:
                            synced_remote_regions[node['region']] = viewkeys(
                                candidates)
                    else:
                        failure_devs_info.add(
                            (node['replication_ip'], node['device']))
                    responses.append(success)
                for region, cand_objs in synced_remote_regions.items():
                    if delete_objs is None:
                        delete_objs = cand_objs
                    else:
                        delete_objs = delete_objs & cand_objs

            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= \
                    self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job['nodes']) and \
                    all(responses)
            if delete_handoff:
                self.stats['remove'] += 1
                if (self.conf.get('sync_method', 'rsync') == 'ssync'
                        and delete_objs is not None):
                    self.logger.info(_("Removing %s objects"),
                                     len(delete_objs))
                    _junk, error_paths = self.delete_handoff_objs(
                        job, delete_objs)
                    # if replication works for a hand-off device and it failed,
                    # the remote devices which are target of the replication
                    # from the hand-off device will be marked. Because cleanup
                    # after replication failed means replicator needs to
                    # replicate again with the same info.
                    if error_paths:
                        failure_devs_info.update([
                            (failure_dev['replication_ip'],
                             failure_dev['device'])
                            for failure_dev in job['nodes']
                        ])
                else:
                    self.delete_partition(job['path'])
                    handoff_partition_deleted = True
            elif not suffixes:
                self.delete_partition(job['path'])
                handoff_partition_deleted = True
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            target_devs_info = set([(target_dev['replication_ip'],
                                     target_dev['device'])
                                    for target_dev in job['nodes']])
            self.stats['success'] += len(target_devs_info - failure_devs_info)
            self._add_failure_stats(failure_devs_info)
            if not handoff_partition_deleted:
                self.handoffs_remaining += 1
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)