Пример #1
0
    def read_with_timeout(self, size):
        timeout = Timeout(self.timeout)
        try:
            chunk = os.read(self.obj_data, size)
        except Timeout as t:
            if t is timeout:
                self.close()
                raise t
        except Exception as e:
            self.close()
            raise e
        finally:
            timeout.cancel()

        return chunk
Пример #2
0
    def _call(self, method_name, timeout_sec, **kwargs):
        LOG.debug("Calling %s" % method_name)

        timeout = Timeout(timeout_sec)
        try:
            result = rpc.call(self.context, self._get_routing_key(),
                              {'method': method_name, 'args': kwargs})
            LOG.debug("Result is %s" % result)
            return result
        except Exception as e:
            LOG.error(e)
            raise exception.GuestError(original_message=str(e))
        except Timeout as t:
            if t is not timeout:
                raise
            else:
                raise exception.GuestTimeout()
        finally:
            timeout.cancel()
Пример #3
0
    def poll_response(self, response):
        """
        The API might return a job nr in the response in case of a async
        response: https://github.com/fog/fog/issues/575
        """
        status = response.status

        timeout = Timeout(CONF[CFG_GROUP].job_timeout)
        try:
            while status == 307:
                time.sleep(1)
                url = response.headers.get('Location')
                LOG.debug("Polling %s" % url)

                polled_response = self.get(url)
                status = response.status
        except Timeout as t:
            if t == timeout:
                raise DynTimeoutError('Timeout reached when pulling job.')
        finally:
            timeout.cancel()
        return polled_response
Пример #4
0
def _make_req(node, part, method, path, headers, stype,
              conn_timeout=5, response_timeout=15, send_timeout=15,
              contents=None, content_length=None, chunk_size=65535):
    """
    Make request to backend storage node.
    (i.e. 'Account', 'Container', 'Object')
    :param node: a node dict from a ring
    :param part: an integer, the partition number
    :param method: a string, the HTTP method (e.g. 'PUT', 'DELETE', etc)
    :param path: a string, the request path
    :param headers: a dict, header name => value
    :param stype: a string, describing the type of service
    :param conn_timeout: timeout while waiting for connection; default is 5
        seconds
    :param response_timeout: timeout while waiting for response; default is 15
        seconds
    :param send_timeout: timeout for sending request body; default is 15
        seconds
    :param contents: an iterable or string to read object data from
    :param content_length: value to send as content-length header
    :param chunk_size: if defined, chunk size of data to send
    :returns: an HTTPResponse object
    :raises DirectClientException: if the response status is not 2xx
    :raises eventlet.Timeout: if either conn_timeout or response_timeout is
        exceeded
    """
    if contents is not None:
        if content_length is not None:
            headers['Content-Length'] = str(content_length)
        else:
            for n, v in headers.items():
                if n.lower() == 'content-length':
                    content_length = int(v)
        if not contents:
            headers['Content-Length'] = '0'
        if isinstance(contents, six.string_types):
            contents = [contents]
        if content_length is None:
            headers['Transfer-Encoding'] = 'chunked'

    ip, port = get_ip_port(node, headers)
    headers.setdefault('X-Backend-Allow-Reserved-Names', 'true')
    with Timeout(conn_timeout):
        conn = http_connect(ip, port, node['device'], part,
                            method, path, headers=headers)

    if contents is not None:
        contents_f = FileLikeIter(contents)

        with Timeout(send_timeout):
            if content_length is None:
                chunk = contents_f.read(chunk_size)
                while chunk:
                    conn.send(b'%x\r\n%s\r\n' % (len(chunk), chunk))
                    chunk = contents_f.read(chunk_size)
                conn.send(b'0\r\n\r\n')
            else:
                left = content_length
                while left > 0:
                    size = chunk_size
                    if size > left:
                        size = left
                    chunk = contents_f.read(size)
                    if not chunk:
                        break
                    conn.send(chunk)
                    left -= len(chunk)

    with Timeout(response_timeout):
        resp = conn.getresponse()
        resp.read()
    if not is_success(resp.status):
        raise DirectClientException(stype, method, node, part, path, resp)
    return resp
Пример #5
0
    def account_update(self, req, account, container, broker):
        """
        Update the account server(s) with latest container info.

        :param req: swob.Request object
        :param account: account name
        :param container: container name
        :param broker: container DB broker object
        :returns: if all the account requests return a 404 error code,
                  HTTPNotFound response object,
                  if the account cannot be updated due to a malformed header,
                  an HTTPBadRequest response object,
                  otherwise None.
        """
        account_hosts = [
            h.strip() for h in req.headers.get('X-Account-Host', '').split(',')
        ]
        account_devices = [
            d.strip()
            for d in req.headers.get('X-Account-Device', '').split(',')
        ]
        account_partition = req.headers.get('X-Account-Partition', '')

        if len(account_hosts) != len(account_devices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(
                _('ERROR Account update failed: different  '
                  'numbers of hosts and devices in request: '
                  '"%(hosts)s" vs "%(devices)s"') % {
                      'hosts': req.headers.get('X-Account-Host', ''),
                      'devices': req.headers.get('X-Account-Device', '')
                  })
            return HTTPBadRequest(req=req)

        if account_partition:
            # zip is lazy on py3, but we need a list, so force evaluation.
            # On py2 it's an extra list copy, but the list is so small
            # (one element per replica in account ring, usually 3) that it
            # doesn't matter.
            updates = list(zip(account_hosts, account_devices))
        else:
            updates = []

        account_404s = 0

        for account_host, account_device in updates:
            account_ip, account_port = account_host.rsplit(':', 1)
            new_path = '/' + '/'.join([account, container])
            info = broker.get_info()
            account_headers = HeaderKeyDict({
                'x-put-timestamp':
                info['put_timestamp'],
                'x-delete-timestamp':
                info['delete_timestamp'],
                'x-object-count':
                info['object_count'],
                'x-bytes-used':
                info['bytes_used'],
                'x-trans-id':
                req.headers.get('x-trans-id', '-'),
                'X-Backend-Storage-Policy-Index':
                info['storage_policy_index'],
                'user-agent':
                'container-server %s' % os.getpid(),
                'referer':
                req.as_referer()
            })
            if req.headers.get('x-account-override-deleted', 'no').lower() == \
                    'yes':
                account_headers['x-account-override-deleted'] = 'yes'
            try:
                with ConnectionTimeout(self.conn_timeout):
                    conn = http_connect(account_ip, account_port,
                                        account_device, account_partition,
                                        'PUT', new_path, account_headers)
                with Timeout(self.node_timeout):
                    account_response = conn.getresponse()
                    account_response.read()
                    if account_response.status == HTTP_NOT_FOUND:
                        account_404s += 1
                    elif not is_success(account_response.status):
                        self.logger.error(
                            _('ERROR Account update failed '
                              'with %(ip)s:%(port)s/%(device)s (will retry '
                              'later): Response %(status)s %(reason)s'), {
                                  'ip': account_ip,
                                  'port': account_port,
                                  'device': account_device,
                                  'status': account_response.status,
                                  'reason': account_response.reason
                              })
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR account update failed with '
                      '%(ip)s:%(port)s/%(device)s (will retry later)'), {
                          'ip': account_ip,
                          'port': account_port,
                          'device': account_device
                      })
        if updates and account_404s == len(updates):
            return HTTPNotFound(req=req)
        else:
            return None
Пример #6
0
 def wait(self, timeout=None):
     with Timeout(timeout):
         return self.ready.wait()
Пример #7
0
    def _get_suffixes_to_sync(self, job, node):
        """
        For SYNC jobs we need to make a remote REPLICATE request to get
        the remote node's current suffix's hashes and then compare to our
        local suffix's hashes to decide which suffixes (if any) are out
        of sync.

        :param: the job dict, with the keys defined in ``_get_part_jobs``
        :param node: the remote node dict
        :returns: a (possibly empty) list of strings, the suffixes to be
                  synced with the remote node.
        """
        # get hashes from the remote node
        remote_suffixes = None
        try:
            with Timeout(self.http_timeout):
                resp = http_connect(node['replication_ip'],
                                    node['replication_port'],
                                    node['device'],
                                    job['partition'],
                                    'REPLICATE',
                                    '',
                                    headers=self.headers).getresponse()
            if resp.status == HTTP_INSUFFICIENT_STORAGE:
                self.logger.error(
                    _('%s responded as unmounted'),
                    _full_path(node, job['partition'], '', job['policy']))
            elif resp.status != HTTP_OK:
                full_path = _full_path(node, job['partition'], '',
                                       job['policy'])
                self.logger.error(
                    _("Invalid response %(resp)s from %(full_path)s"), {
                        'resp': resp.status,
                        'full_path': full_path
                    })
            else:
                remote_suffixes = pickle.loads(resp.read())
        except (Exception, Timeout):
            # all exceptions are logged here so that our caller can
            # safely catch our exception and continue to the next node
            # without logging
            self.logger.exception(
                'Unable to get remote suffix hashes '
                'from %r' %
                _full_path(node, job['partition'], '', job['policy']))

        if remote_suffixes is None:
            raise SuffixSyncError('Unable to get remote suffix hashes')

        suffixes = self.get_suffix_delta(job['hashes'], job['frag_index'],
                                         remote_suffixes, node['index'])
        # now recalculate local hashes for suffixes that don't
        # match so we're comparing the latest
        local_suff = self._get_hashes(job['local_dev']['device'],
                                      job['partition'],
                                      job['policy'],
                                      recalculate=suffixes)

        suffixes = self.get_suffix_delta(local_suff, job['frag_index'],
                                         remote_suffixes, node['index'])

        self.suffix_count += len(suffixes)
        return suffixes
Пример #8
0
def run_wsgi(conf_path, app_section, *args, **kwargs):
    """
    Runs the server according to some strategy.  The default strategy runs a
    specified number of workers in pre-fork model.  The object-server (only)
    may use a servers-per-port strategy if its config has a servers_per_port
    setting with a value greater than zero.

    :param conf_path: Path to paste.deploy style configuration file/directory
    :param app_section: App name from conf file to load config from
    :returns: 0 if successful, nonzero otherwise
    """
    # Load configuration, Set logger and Load request processor
    try:
        (conf, logger, log_name) = \
            _initrp(conf_path, app_section, *args, **kwargs)
    except ConfigFileError as e:
        print(e)
        return 1

    # optional nice/ionice priority scheduling
    utils.modify_priority(conf, logger)

    servers_per_port = int(conf.get('servers_per_port', '0') or 0)

    # NOTE: for now servers_per_port is object-server-only; future work could
    # be done to test and allow it to be used for account and container
    # servers, but that has not been done yet.
    if servers_per_port and app_section == 'object-server':
        strategy = ServersPerPortStrategy(conf,
                                          logger,
                                          servers_per_port=servers_per_port)
    else:
        strategy = WorkersStrategy(conf, logger)

    error_msg = strategy.bind_ports()
    if error_msg:
        logger.error(error_msg)
        print(error_msg)
        return 1

    # Ensure the configuration and application can be loaded before proceeding.
    global_conf = {'log_name': log_name}
    if 'global_conf_callback' in kwargs:
        kwargs['global_conf_callback'](conf, global_conf)
    loadapp(conf_path, global_conf=global_conf)

    # set utils.FALLOCATE_RESERVE if desired
    utils.FALLOCATE_RESERVE, utils.FALLOCATE_IS_PERCENT = \
        utils.config_fallocate_value(conf.get('fallocate_reserve', '1%'))

    # redirect errors to logger and close stdio
    capture_stdio(logger)

    no_fork_sock = strategy.no_fork_sock()
    if no_fork_sock:
        run_server(conf, logger, no_fork_sock, global_conf=global_conf)
        return 0

    def kill_children(*args):
        """Kills the entire process group."""
        logger.error('SIGTERM received')
        signal.signal(signal.SIGTERM, signal.SIG_IGN)
        running[0] = False
        os.killpg(0, signal.SIGTERM)

    def hup(*args):
        """Shuts down the server, but allows running requests to complete"""
        logger.error('SIGHUP received')
        signal.signal(signal.SIGHUP, signal.SIG_IGN)
        running[0] = False

    running = [True]
    signal.signal(signal.SIGTERM, kill_children)
    signal.signal(signal.SIGHUP, hup)

    while running[0]:
        for sock, sock_info in strategy.new_worker_socks():
            pid = os.fork()
            if pid == 0:
                signal.signal(signal.SIGHUP, signal.SIG_DFL)
                signal.signal(signal.SIGTERM, signal.SIG_DFL)
                strategy.post_fork_hook()
                run_server(conf, logger, sock)
                strategy.log_sock_exit(sock, sock_info)
                return 0
            else:
                strategy.register_worker_start(sock, sock_info, pid)

        # The strategy may need to pay attention to something in addition to
        # child process exits (like new ports showing up in a ring).
        #
        # NOTE: a timeout value of None will just instantiate the Timeout
        # object and not actually schedule it, which is equivalent to no
        # timeout for the green_os.wait().
        loop_timeout = strategy.loop_timeout()

        with Timeout(loop_timeout, exception=False):
            try:
                try:
                    pid, status = green_os.wait()
                    if os.WIFEXITED(status) or os.WIFSIGNALED(status):
                        strategy.register_worker_exit(pid)
                except OSError as err:
                    if err.errno not in (errno.EINTR, errno.ECHILD):
                        raise
                    if err.errno == errno.ECHILD:
                        # If there are no children at all (ECHILD), then
                        # there's nothing to actually wait on. We sleep
                        # for a little bit to avoid a tight CPU spin
                        # and still are able to catch any KeyboardInterrupt
                        # events that happen. The value of 0.01 matches the
                        # value in eventlet's waitpid().
                        sleep(0.01)
            except KeyboardInterrupt:
                logger.notice('User quit')
                running[0] = False
                break

    strategy.shutdown_sockets()
    logger.notice('Exited')
    return 0
Пример #9
0
     self.app.logger.exception(
         _('ERROR Exception causing client disconnect'))
     return HTTPClientDisconnect(request=req)
 if req.content_length and bytes_transferred < req.content_length:
     req.client_disconnect = True
     self.app.logger.warn(
         _('Client disconnected without sending enough data'))
     self.app.logger.increment('client_disconnects')
     return HTTPClientDisconnect(request=req)
 statuses = []
 reasons = []
 bodies = []
 etags = set()
 for conn in conns:
     try:
         with Timeout(self.app.node_timeout):
             response = conn.getresponse()
             statuses.append(response.status)
             reasons.append(response.reason)
             bodies.append(response.read())
             if response.status >= HTTP_INTERNAL_SERVER_ERROR:
                 self.error_occurred(conn.node,
                     _('ERROR %(status)d %(body)s From Object Server ' \
                     're: %(path)s') % {'status': response.status,
                     'body': bodies[-1][:1024], 'path': req.path})
             elif is_success(response.status):
                 etags.add(response.getheader('etag').strip('"'))
     except (Exception, Timeout):
         self.exception_occurred(
             conn.node, _('Object'),
             _('Trying to get final status of PUT to %s') % req.path)
Пример #10
0
 def __str__(self):
     return '%s: %s' % (Timeout.__str__(self), self.msg)
Пример #11
0
def direct_get_object(node,
                      part,
                      account,
                      container,
                      obj,
                      conn_timeout=5,
                      response_timeout=15,
                      resp_chunk_size=None,
                      headers={}):
    """
    Get object directly from the object server.

    :param node: node dictionary from the ring
    :param part: partition the container is on
    :param account: account name
    :param container: container name
    :param obj: object name
    :param conn_timeout: timeout in seconds for establishing the connection
    :param response_timeout: timeout in seconds for getting the response
    :param resp_chunk_size: if defined, chunk size of data to read.
    :param headers: dict to be passed into HTTPConnection headers
    :returns: a tuple of (response headers, the object's contents) The response
              headers will be a dict and all header names will be lowercase.
    """
    path = '/%s/%s/%s' % (account, container, obj)
    with Timeout(conn_timeout):
        conn = http_connect(node['ip'],
                            node['port'],
                            node['device'],
                            part,
                            'GET',
                            path,
                            headers=headers)
    with Timeout(response_timeout):
        resp = conn.getresponse()
    if resp.status < 200 or resp.status >= 300:
        resp.read()
        raise ClientException(
            'Object server %s:%s direct GET %s gave status %s' %
            (node['ip'], node['port'],
             repr('/%s/%s%s' % (node['device'], part, path)), resp.status),
            http_host=node['ip'],
            http_port=node['port'],
            http_device=node['device'],
            http_status=resp.status,
            http_reason=resp.reason)
    if resp_chunk_size:

        def _object_body():
            buf = resp.read(resp_chunk_size)
            while buf:
                yield buf
                buf = resp.read(resp_chunk_size)

        object_body = _object_body()
    else:
        object_body = resp.read()
    resp_headers = {}
    for header, value in resp.getheaders():
        resp_headers[header.lower()] = value
    return resp_headers, object_body
Пример #12
0
def direct_put_object(node,
                      part,
                      account,
                      container,
                      name,
                      contents,
                      content_length=None,
                      etag=None,
                      content_type=None,
                      headers=None,
                      conn_timeout=5,
                      response_timeout=15,
                      resp_chunk_size=None):
    """
    Put object directly from the object server.

    :param node: node dictionary from the ring
    :param part: partition the container is on
    :param account: account name
    :param container: container name
    :param name: object name
    :param contents: a string to read object data from
    :param content_length: value to send as content-length header
    :param etag: etag of contents
    :param content_type: value to send as content-type header
    :param headers: additional headers to include in the request
    :param conn_timeout: timeout in seconds for establishing the connection
    :param response_timeout: timeout in seconds for getting the response
    :param chunk_size: if defined, chunk size of data to send.
    :returns: etag from the server response
    """
    # TODO: Add chunked puts
    path = '/%s/%s/%s' % (account, container, name)
    if headers is None:
        headers = {}
    if etag:
        headers['ETag'] = etag.strip('"')
    if content_length is not None:
        headers['Content-Length'] = str(content_length)
    if content_type is not None:
        headers['Content-Type'] = content_type
    else:
        headers['Content-Type'] = 'application/octet-stream'
    if not contents:
        headers['Content-Length'] = '0'
    headers['X-Timestamp'] = normalize_timestamp(time())
    with Timeout(conn_timeout):
        conn = http_connect(node['ip'],
                            node['port'],
                            node['device'],
                            part,
                            'PUT',
                            path,
                            headers=headers)
    conn.send(contents)
    with Timeout(response_timeout):
        resp = conn.getresponse()
        resp.read()
    if resp.status < 200 or resp.status >= 300:
        raise ClientException(
            'Object server %s:%s direct PUT %s gave status %s' %
            (node['ip'], node['port'],
             repr('/%s/%s%s' % (node['device'], part, path)), resp.status),
            http_host=node['ip'],
            http_port=node['port'],
            http_device=node['device'],
            http_status=resp.status,
            http_reason=resp.reason)
    return resp.getheader('etag').strip('"')
Пример #13
0
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'], ))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        target_devs_info = set()
        failure_devs_info = set()
        begin = time.time()
        df_mgr = self._df_router[job['policy']]
        try:
            hashed, local_hash = tpool_reraise(df_mgr._get_hashes,
                                               job['device'],
                                               job['partition'],
                                               job['policy'],
                                               do_listdir=_do_listdir(
                                                   int(job['partition']),
                                                   self.replication_cycle))
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            synced_remote_regions = set()
            random.shuffle(job['nodes'])
            nodes = itertools.chain(
                job['nodes'], job['policy'].object_ring.get_more_nodes(
                    int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
                target_devs_info.add((node['replication_ip'], node['device']))
                attempts_left -= 1
                # if we have already synced to this remote region,
                # don't sync again on this replication pass
                if node['region'] in synced_remote_regions:
                    continue
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '',
                                            headers=headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(replication_ip)s/%(device)s '
                                  'responded as unmounted'), node)
                            attempts_left += 1
                            failure_devs_info.add(
                                (node['replication_ip'], node['device']))
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['replication_ip']
                                  })
                            failure_devs_info.add(
                                (node['replication_ip'], node['device']))
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        self.stats['hashmatch'] += 1
                        continue
                    hashed, recalc_hash = tpool_reraise(df_mgr._get_hashes,
                                                        job['device'],
                                                        job['partition'],
                                                        job['policy'],
                                                        recalculate=suffixes)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    self.stats['rsync'] += 1
                    success, _junk = self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers=headers)
                        conn.getresponse().read()
                    if not success:
                        failure_devs_info.add(
                            (node['replication_ip'], node['device']))
                    # add only remote region when replicate succeeded
                    if success and node['region'] != job['region']:
                        synced_remote_regions.add(node['region'])
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    failure_devs_info.add(
                        (node['replication_ip'], node['device']))
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            failure_devs_info.update(target_devs_info)
            self._add_failure_stats(failure_devs_info)
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.stats['success'] += len(target_devs_info - failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)
Пример #14
0
    def replicate(self,
                  override_devices=None,
                  override_partitions=None,
                  override_policies=None):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.replication_cycle = (self.replication_cycle + 1) % 10
        self.partition_times = []
        self.my_replication_ips = self._get_my_replication_ips()
        self.all_devs_info = set()
        self.handoffs_remaining = 0

        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle

        current_nodes = None
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs(override_devices=override_devices,
                                     override_partitions=override_partitions,
                                     override_policies=override_policies)
            for job in jobs:
                current_nodes = job['nodes']
                if override_devices and job['device'] not in override_devices:
                    continue
                if override_partitions and \
                        job['partition'] not in override_partitions:
                    continue
                dev_path = join(self.devices_dir, job['device'])
                if self.mount_check and not ismount(dev_path):
                    self._add_failure_stats([(failure_dev['replication_ip'],
                                              failure_dev['device'])
                                             for failure_dev in job['nodes']])
                    self.logger.warning(_('%s is not mounted'), job['device'])
                    continue
                if self.handoffs_first and not job['delete']:
                    # in handoffs first mode, we won't process primary
                    # partitions until rebalance was successful!
                    if self.handoffs_remaining:
                        self.logger.warning(
                            _("Handoffs first mode still has handoffs "
                              "remaining.  Aborting current "
                              "replication pass."))
                        break
                if not self.check_ring(job['policy'].object_ring):
                    self.logger.info(
                        _("Ring change detected. Aborting "
                          "current replication pass."))
                    return

                try:
                    if isfile(job['path']):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning(
                            'Removing partition directory '
                            'which was a file: %s', job['path'])
                        os.remove(job['path'])
                        continue
                except OSError:
                    continue
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            current_nodes = None
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            if current_nodes:
                self._add_failure_stats([(failure_dev['replication_ip'],
                                          failure_dev['device'])
                                         for failure_dev in current_nodes])
            else:
                self._add_failure_stats(self.all_devs_info)
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()
            self.stats['attempted'] = self.replication_count
Пример #15
0
    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """
        def tpool_get_suffixes(path):
            return [
                suff for suff in os.listdir(path)
                if len(suff) == 3 and isdir(join(path, suff))
            ]

        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'], ))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        failure_devs_info = set()
        begin = time.time()
        handoff_partition_deleted = False
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            synced_remote_regions = {}
            delete_objs = None
            if suffixes:
                for node in job['nodes']:
                    self.stats['rsync'] += 1
                    kwargs = {}
                    if node['region'] in synced_remote_regions and \
                            self.conf.get('sync_method', 'rsync') == 'ssync':
                        kwargs['remote_check_objs'] = \
                            synced_remote_regions[node['region']]
                    # candidates is a dict(hash=>timestamp) of objects
                    # for deletion
                    success, candidates = self.sync(node, job, suffixes,
                                                    **kwargs)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(node['replication_ip'],
                                                node['replication_port'],
                                                node['device'],
                                                job['partition'],
                                                'REPLICATE',
                                                '/' + '-'.join(suffixes),
                                                headers=headers)
                            conn.getresponse().read()
                        if node['region'] != job['region']:
                            synced_remote_regions[node['region']] = viewkeys(
                                candidates)
                    else:
                        failure_devs_info.add(
                            (node['replication_ip'], node['device']))
                    responses.append(success)
                for cand_objs in synced_remote_regions.values():
                    if delete_objs is None:
                        delete_objs = cand_objs
                    else:
                        delete_objs = delete_objs & cand_objs

            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= \
                    self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job['nodes']) and \
                    all(responses)
            if delete_handoff:
                self.stats['remove'] += 1
                if (self.conf.get('sync_method', 'rsync') == 'ssync'
                        and delete_objs is not None):
                    self.logger.info(_("Removing %s objects"),
                                     len(delete_objs))
                    _junk, error_paths = self.delete_handoff_objs(
                        job, delete_objs)
                    # if replication works for a hand-off device and it failed,
                    # the remote devices which are target of the replication
                    # from the hand-off device will be marked. Because cleanup
                    # after replication failed means replicator needs to
                    # replicate again with the same info.
                    if error_paths:
                        failure_devs_info.update([
                            (failure_dev['replication_ip'],
                             failure_dev['device'])
                            for failure_dev in job['nodes']
                        ])
                else:
                    self.delete_partition(job['path'])
                    handoff_partition_deleted = True
            elif not suffixes:
                self.delete_partition(job['path'])
                handoff_partition_deleted = True
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
            self._add_failure_stats(failure_devs_info)
        finally:
            target_devs_info = set([(target_dev['replication_ip'],
                                     target_dev['device'])
                                    for target_dev in job['nodes']])
            self.stats['success'] += len(target_devs_info - failure_devs_info)
            if not handoff_partition_deleted:
                self.handoffs_remaining += 1
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)
Пример #16
0
    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """
        def tpool_get_suffixes(path):
            return [
                suff for suff in os.listdir(path)
                if len(suff) == 3 and isdir(join(path, suff))
            ]

        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'], ))
        self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        begin = time.time()
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            synced_remote_regions = {}
            delete_objs = None
            if suffixes:
                for node in job['nodes']:
                    kwargs = {}
                    if node['region'] in synced_remote_regions and \
                            self.conf.get('sync_method', 'rsync') == 'ssync':
                        kwargs['remote_check_objs'] = \
                            synced_remote_regions[node['region']]
                    # candidates is a dict(hash=>timestamp) of objects
                    # for deletion
                    success, candidates = self.sync(node, job, suffixes,
                                                    **kwargs)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(node['replication_ip'],
                                                node['replication_port'],
                                                node['device'],
                                                job['partition'],
                                                'REPLICATE',
                                                '/' + '-'.join(suffixes),
                                                headers=self.headers)
                            conn.getresponse().read()
                        if node['region'] != job['region']:
                            synced_remote_regions[node['region']] = \
                                candidates.keys()
                    responses.append(success)
                for region, cand_objs in synced_remote_regions.iteritems():
                    if delete_objs is None:
                        delete_objs = set(cand_objs)
                    else:
                        delete_objs = delete_objs.intersection(cand_objs)

            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= \
                    self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job['nodes']) and \
                    all(responses)
            if delete_handoff:
                if (self.conf.get('sync_method', 'rsync') == 'ssync'
                        and delete_objs is not None):
                    self.logger.info(_("Removing %s objects"),
                                     len(delete_objs))
                    self.delete_handoff_objs(job, delete_objs)
                else:
                    self.delete_partition(job['path'])
            elif not suffixes:
                self.delete_partition(job['path'])
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)
Пример #17
0
    def account_update(self, req, account, container, broker):
        """
        Update the account server(s) with latest container info.

        :param req: swob.Request object
        :param account: account name
        :param container: container name
        :param broker: container DB broker object
        :returns: if all the account requests return a 404 error code,
                  HTTPNotFound response object, otherwise None.
        """
        account_hosts = [
            h.strip() for h in req.headers.get('X-Account-Host', '').split(',')
        ]
        account_devices = [
            d.strip()
            for d in req.headers.get('X-Account-Device', '').split(',')
        ]
        account_partition = req.headers.get('X-Account-Partition', '')

        if len(account_hosts) != len(account_devices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(
                _('ERROR Account update failed: different  '
                  'numbers of hosts and devices in request: '
                  '"%s" vs "%s"' % (req.headers.get('X-Account-Host', ''),
                                    req.headers.get('X-Account-Device', ''))))
            return

        if account_partition:
            updates = zip(account_hosts, account_devices)
        else:
            updates = []

        account_404s = 0

        for account_host, account_device in updates:
            account_ip, account_port = account_host.rsplit(':', 1)
            new_path = '/' + '/'.join([account, container])
            info = broker.get_info()
            account_headers = {
                'x-put-timestamp': info['put_timestamp'],
                'x-delete-timestamp': info['delete_timestamp'],
                'x-object-count': info['object_count'],
                'x-bytes-used': info['bytes_used'],
                'x-trans-id': req.headers.get('x-trans-id', '-')
            }
            if req.headers.get('x-account-override-deleted', 'no').lower() == \
                    'yes':
                account_headers['x-account-override-deleted'] = 'yes'
            try:
                with ConnectionTimeout(self.conn_timeout):
                    conn = http_connect(account_ip, account_port,
                                        account_device, account_partition,
                                        'PUT', new_path, account_headers)
                with Timeout(self.node_timeout):
                    account_response = conn.getresponse()
                    account_response.read()
                    if account_response.status == HTTP_NOT_FOUND:
                        account_404s += 1
                    elif not is_success(account_response.status):
                        self.logger.error(
                            _('ERROR Account update failed '
                              'with %(ip)s:%(port)s/%(device)s (will retry '
                              'later): Response %(status)s %(reason)s'), {
                                  'ip': account_ip,
                                  'port': account_port,
                                  'device': account_device,
                                  'status': account_response.status,
                                  'reason': account_response.reason
                              })
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR account update failed with '
                      '%(ip)s:%(port)s/%(device)s (will retry later)'), {
                          'ip': account_ip,
                          'port': account_port,
                          'device': account_device
                      })
        if updates and account_404s == len(updates):
            return HTTPNotFound(req=req)
        else:
            return None
Пример #18
0
 def test_api_call_timeout(self):
     self.call_context.call.side_effect = Timeout()
     self.assertRaises(exception.GuestTimeout, self.api.restart)
Пример #19
0
def direct_get_account(node,
                       part,
                       account,
                       marker=None,
                       limit=None,
                       prefix=None,
                       delimiter=None,
                       conn_timeout=5,
                       response_timeout=15):
    """
    Get listings directly from the account server.

    :param node: node dictionary from the ring
    :param part: partition the account is on
    :param account: account name
    :param marker: marker query
    :param limit: query limit
    :param prefix: prefix query
    :param delimeter: delimeter for the query
    :param conn_timeout: timeout in seconds for establishing the connection
    :param response_timeout: timeout in seconds for getting the response
    :returns: a tuple of (response headers, a list of containers) The response
              headers will be a dict and all header names will be lowercase.
    """
    path = '/' + account
    qs = 'format=json'
    if marker:
        qs += '&marker=%s' % quote(marker)
    if limit:
        qs += '&limit=%d' % limit
    if prefix:
        qs += '&prefix=%s' % quote(prefix)
    if delimiter:
        qs += '&delimiter=%s' % quote(delimiter)
    with Timeout(conn_timeout):
        conn = http_connect(node['ip'],
                            node['port'],
                            node['device'],
                            part,
                            'GET',
                            path,
                            query_string=qs)
    with Timeout(response_timeout):
        resp = conn.getresponse()
    if resp.status < 200 or resp.status >= 300:
        resp.read()
        raise ClientException(
            'Account server %s:%s direct GET %s gave status %s' %
            (node['ip'], node['port'],
             repr('/%s/%s%s' % (node['device'], part, path)), resp.status),
            http_host=node['ip'],
            http_port=node['port'],
            http_device=node['device'],
            http_status=resp.status,
            http_reason=resp.reason)
    resp_headers = {}
    for header, value in resp.getheaders():
        resp_headers[header.lower()] = value
    if resp.status == 204:
        resp.read()
        return resp_headers, []
    return resp_headers, json_loads(resp.read())
Пример #20
0
 def getresponse(self):
     if kwargs.get('raise_exc'):
         raise Exception('test')
     if kwargs.get('raise_timeout_exc'):
         raise Timeout()
     return self
Пример #21
0
def direct_put_object(node,
                      part,
                      account,
                      container,
                      name,
                      contents,
                      content_length=None,
                      etag=None,
                      content_type=None,
                      headers=None,
                      conn_timeout=5,
                      response_timeout=15,
                      chunk_size=65535):
    """
    Put object directly from the object server.

    :param node: node dictionary from the ring
    :param part: partition the container is on
    :param account: account name
    :param container: container name
    :param name: object name
    :param contents: an iterable or string to read object data from
    :param content_length: value to send as content-length header
    :param etag: etag of contents
    :param content_type: value to send as content-type header
    :param headers: additional headers to include in the request
    :param conn_timeout: timeout in seconds for establishing the connection
    :param response_timeout: timeout in seconds for getting the response
    :param chunk_size: if defined, chunk size of data to send.
    :returns: etag from the server response
    :raises ClientException: HTTP PUT request failed
    """

    path = '/%s/%s/%s' % (account, container, name)
    if headers is None:
        headers = {}
    if etag:
        headers['ETag'] = etag.strip('"')
    if content_length is not None:
        headers['Content-Length'] = str(content_length)
    else:
        for n, v in headers.items():
            if n.lower() == 'content-length':
                content_length = int(v)
    if content_type is not None:
        headers['Content-Type'] = content_type
    else:
        headers['Content-Type'] = 'application/octet-stream'
    if not contents:
        headers['Content-Length'] = '0'
    if isinstance(contents, six.string_types):
        contents = [contents]
    # Incase the caller want to insert an object with specific age
    add_ts = 'X-Timestamp' not in headers

    if content_length is None:
        headers['Transfer-Encoding'] = 'chunked'

    with Timeout(conn_timeout):
        conn = http_connect(node['ip'],
                            node['port'],
                            node['device'],
                            part,
                            'PUT',
                            path,
                            headers=gen_headers(headers, add_ts))

    contents_f = FileLikeIter(contents)

    if content_length is None:
        chunk = contents_f.read(chunk_size)
        while chunk:
            conn.send('%x\r\n%s\r\n' % (len(chunk), chunk))
            chunk = contents_f.read(chunk_size)
        conn.send('0\r\n\r\n')
    else:
        left = content_length
        while left > 0:
            size = chunk_size
            if size > left:
                size = left
            chunk = contents_f.read(size)
            if not chunk:
                break
            conn.send(chunk)
            left -= len(chunk)

    with Timeout(response_timeout):
        resp = conn.getresponse()
        resp.read()
    if not is_success(resp.status):
        raise DirectClientException('Object', 'PUT', node, part, path, resp)
    return resp.getheader('etag').strip('"')
Пример #22
0
 def __init__(self, seconds=None, msg=None):
     Timeout.__init__(self, seconds=seconds)
     self.msg = msg
Пример #23
0
    def container_update(self, op, account, container, obj, request,
                         headers_out, objdevice, policy):
        """
        Update the container when objects are updated.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param request: the original request object driving the update
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
        :param policy:  the BaseStoragePolicy instance
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in
                     headers_in.get('X-Container-Host', '').split(',')]
        contdevices = [d.strip() for d in
                       headers_in.get('X-Container-Device', '').split(',')]
        contpartition = headers_in.get('X-Container-Partition', '')

        if len(conthosts) != len(contdevices):
            # This shouldn't happen unless there's a bug in the proxy,
            # but if there is, we want to know about it.
            self.logger.error(_('ERROR Container update failed: different '
                                'numbers of hosts and devices in request: '
                                '"%s" vs "%s"') %
                               (headers_in.get('X-Container-Host', ''),
                                headers_in.get('X-Container-Device', '')))
            return

        if contpartition:
            updates = zip(conthosts, contdevices)
        else:
            updates = []

        headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
        headers_out['referer'] = request.as_referer()
        headers_out['X-Backend-Storage-Policy-Index'] = int(policy)
        update_greenthreads = []
        for conthost, contdevice in updates:
            gt = spawn(self.async_update, op, account, container, obj,
                       conthost, contpartition, contdevice, headers_out,
                       objdevice, policy,
                       logger_thread_locals=self.logger.thread_locals)
            update_greenthreads.append(gt)
        # Wait a little bit to see if the container updates are successful.
        # If we immediately return after firing off the greenthread above, then
        # we're more likely to confuse the end-user who does a listing right
        # after getting a successful response to the object create. The
        # `container_update_timeout` bounds the length of time we wait so that
        # one slow container server doesn't make the entire request lag.
        try:
            with Timeout(self.container_update_timeout):
                for gt in update_greenthreads:
                    gt.wait()
        except Timeout:
            # updates didn't go through, log it and return
            self.logger.debug(
                'Container update timeout (%.4fs) waiting for %s',
                self.container_update_timeout, updates)
Пример #24
0
    def async_update(self, op, account, container, obj, host, partition,
                     contdevice, headers_out, objdevice):
        """
        Sends or saves an async update.

        :param op: operation performed (ex: 'PUT', or 'DELETE')
        :param account: account name for the object
        :param container: container name for the object
        :param obj: object name
        :param host: host that the container is on
        :param partition: partition that the container is on
        :param contdevice: device name that the container is on
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
        """
        full_path = '/%s/%s/%s' % (account, container, obj)
        if all([host, partition, contdevice]):
            try:
                with ConnectionTimeout(self.conn_timeout):
                    ip, port = host.rsplit(':', 1)
                    conn = http_connect(ip, port, contdevice, partition, op,
                                        full_path, headers_out)
                with Timeout(self.node_timeout):
                    response = conn.getresponse()
                    response.read()
                    if is_success(response.status):
                        return
                    else:
                        self.logger.error(
                            _('ERROR Container update failed '
                              '(saving for async update later): %(status)d '
                              'response from %(ip)s:%(port)s/%(dev)s'), {
                                  'status': response.status,
                                  'ip': ip,
                                  'port': port,
                                  'dev': contdevice
                              })
            except (Exception, Timeout):
                self.logger.exception(
                    _('ERROR container update failed with '
                      '%(ip)s:%(port)s/%(dev)s (saving for async update later)'
                      ), {
                          'ip': ip,
                          'port': port,
                          'dev': contdevice
                      })
        async_dir = os.path.join(self.devices, objdevice, ASYNCDIR)
        ohash = hash_path(account, container, obj)
        self.logger.increment('async_pendings')
        write_pickle(
            {
                'op': op,
                'account': account,
                'container': container,
                'obj': obj,
                'headers': headers_out
            },
            os.path.join(
                async_dir, ohash[-3:],
                ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])),
            os.path.join(self.devices, objdevice, 'tmp'))
Пример #25
0
def run_wsgi(conf_path, app_section, *args, **kwargs):
    """
    Runs the server according to some strategy.  The default strategy runs a
    specified number of workers in pre-fork model.  The object-server (only)
    may use a servers-per-port strategy if its config has a servers_per_port
    setting with a value greater than zero.

    :param conf_path: Path to paste.deploy style configuration file/directory
    :param app_section: App name from conf file to load config from
    :returns: 0 if successful, nonzero otherwise
    """
    # Load configuration, Set logger and Load request processor
    # conf: 包含配置信息的字典
    # logger: log对象
    # log_name: log名,eg:proxy_logging
    try:
        (conf, logger, log_name) = \
            _initrp(conf_path, app_section, *args, **kwargs)
    except ConfigFileError as e:
        print(e)
        return 1

    # optional nice/ionice priority scheduling
    utils.modify_priority(conf, logger)

    servers_per_port = int(conf.get('servers_per_port', '0') or 0)

    # NOTE: for now servers_per_port is object-server-only; future work could
    # be done to test and allow it to be used for account and container
    # servers, but that has not been done yet.
    if servers_per_port and app_section == 'object-server':
        strategy = ServersPerPortStrategy(
            conf, logger, servers_per_port=servers_per_port)
    else:
        # 多个 server 进程监听一个端口,默认为逻辑cpu的个数
        strategy = WorkersStrategy(conf, logger)

    # patch event before loadapp
    # 运行时修改已有的代码,动态替换已有的标准库
    utils.eventlet_monkey_patch()

    # Ensure the configuration and application can be loaded before proceeding.
    global_conf = {'log_name': log_name}
    if 'global_conf_callback' in kwargs:
        kwargs['global_conf_callback'](conf, global_conf)
    loadapp(conf_path, global_conf=global_conf)

    # set utils.FALLOCATE_RESERVE if desired
    utils.FALLOCATE_RESERVE, utils.FALLOCATE_IS_PERCENT = \
        utils.config_fallocate_value(conf.get('fallocate_reserve', '1%'))

    # Start listening on bind_addr/port
    error_msg = strategy.do_bind_ports()
    if error_msg:
        logger.error(error_msg)
        print(error_msg)
        return 1

    # Redirect errors to logger and close stdio. Do this *after* binding ports;
    # we use this to signal that the service is ready to accept connections.
    capture_stdio(logger)

    no_fork_sock = strategy.no_fork_sock()
    if no_fork_sock:
        run_server(conf, logger, no_fork_sock, global_conf=global_conf)
        return 0

    def stop_with_signal(signum, *args):
        """Set running flag to False and capture the signum"""
        running_context[0] = False
        running_context[1] = signum

    # context to hold boolean running state and stop signum
    running_context = [True, None]
    signal.signal(signal.SIGTERM, stop_with_signal)
    signal.signal(signal.SIGHUP, stop_with_signal)

    while running_context[0]:
        for sock, sock_info in strategy.new_worker_socks():
            pid = os.fork()
            if pid == 0:   # in child process
                signal.signal(signal.SIGHUP, signal.SIG_DFL)
                signal.signal(signal.SIGTERM, signal.SIG_DFL)
                strategy.post_fork_hook()
                run_server(conf, logger, sock)
                strategy.log_sock_exit(sock, sock_info)
                return 0
            else:  # in parent process
                strategy.register_worker_start(sock, sock_info, pid)

        # The strategy may need to pay attention to something in addition to
        # child process exits (like new ports showing up in a ring).
        #
        # NOTE: a timeout value of None will just instantiate the Timeout
        # object and not actually schedule it, which is equivalent to no
        # timeout for the green_os.wait().
        loop_timeout = strategy.loop_timeout()

        with Timeout(loop_timeout, exception=False):
            try:
                try:
                    pid, status = green_os.wait()
                    if os.WIFEXITED(status) or os.WIFSIGNALED(status):
                        strategy.register_worker_exit(pid)
                except OSError as err:
                    if err.errno not in (errno.EINTR, errno.ECHILD):
                        raise
                    if err.errno == errno.ECHILD:
                        # If there are no children at all (ECHILD), then
                        # there's nothing to actually wait on. We sleep
                        # for a little bit to avoid a tight CPU spin
                        # and still are able to catch any KeyboardInterrupt
                        # events that happen. The value of 0.01 matches the
                        # value in eventlet's waitpid().
                        sleep(0.01)
            except KeyboardInterrupt:
                logger.notice('User quit')
                running_context[0] = False
                break

    if running_context[1] is not None:
        try:
            signame = SIGNUM_TO_NAME[running_context[1]]
        except KeyError:
            logger.error('Stopping with unexpected signal %r' %
                         running_context[1])
        else:
            logger.error('%s received', signame)
    if running_context[1] == signal.SIGTERM:
        os.killpg(0, signal.SIGTERM)

    strategy.shutdown_sockets()
    signal.signal(signal.SIGTERM, signal.SIG_IGN)
    logger.notice('Exited (%s)', os.getpid())
    return 0
Пример #26
0
 def fake_get_hashes(*args, **kwargs):
     self.get_hash_count += 1
     if self.get_hash_count == 3:
         # raise timeout on last call to get hashes
         raise Timeout()
     return 2, {'abc': 'def'}
Пример #27
0
 def __init__(self, seconds=None, msg=None):
     Timeout.__init__(self, seconds=seconds)
     self.msg = msg
Пример #28
0
 def read(self, size):
     raise Timeout()
Пример #29
0
class TestContainerController(unittest.TestCase):
    """ Test swift.container_server.ContainerController """
    def setUp(self):
        """ Set up for testing swift.object_server.ObjectController """
        self.testdir = os.path.join(mkdtemp(),
                                    'tmp_test_object_server_ObjectController')
        mkdirs(self.testdir)
        rmtree(self.testdir)
        mkdirs(os.path.join(self.testdir, 'sda1'))
        mkdirs(os.path.join(self.testdir, 'sda1', 'tmp'))
        self.controller = container_server.ContainerController(
            {'devices': self.testdir, 'mount_check': 'false'})

    def tearDown(self):
        """ Tear down for testing swift.object_server.ObjectController """
        rmtree(os.path.dirname(self.testdir), ignore_errors=1)

    def test_acl_container(self):
        # Ensure no acl by default
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': '0'})
        self.controller.PUT(req)
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'})
        response = self.controller.HEAD(req)
        self.assert_(response.status.startswith('204'))
        self.assert_('x-container-read' not in response.headers)
        self.assert_('x-container-write' not in response.headers)
        # Ensure POSTing acls works
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'},
            headers={'X-Timestamp': '1', 'X-Container-Read': '.r:*',
                     'X-Container-Write': 'account:user'})
        self.controller.POST(req)
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'})
        response = self.controller.HEAD(req)
        self.assert_(response.status.startswith('204'))
        self.assertEquals(response.headers.get('x-container-read'), '.r:*')
        self.assertEquals(response.headers.get('x-container-write'),
                          'account:user')
        # Ensure we can clear acls on POST
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'},
            headers={'X-Timestamp': '3', 'X-Container-Read': '',
                     'X-Container-Write': ''})
        self.controller.POST(req)
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'})
        response = self.controller.HEAD(req)
        self.assert_(response.status.startswith('204'))
        self.assert_('x-container-read' not in response.headers)
        self.assert_('x-container-write' not in response.headers)
        # Ensure PUTing acls works
        req = Request.blank('/sda1/p/a/c2', environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': '4', 'X-Container-Read': '.r:*',
                     'X-Container-Write': 'account:user'})
        self.controller.PUT(req)
        req = Request.blank('/sda1/p/a/c2', environ={'REQUEST_METHOD': 'HEAD'})
        response = self.controller.HEAD(req)
        self.assert_(response.status.startswith('204'))
        self.assertEquals(response.headers.get('x-container-read'), '.r:*')
        self.assertEquals(response.headers.get('x-container-write'),
                          'account:user')

    def test_HEAD(self):
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
            'HTTP_X_TIMESTAMP': '0'})
        self.controller.PUT(req)
        response = self.controller.HEAD(req)
        self.assert_(response.status.startswith('204'))
        self.assertEquals(int(response.headers['x-container-bytes-used']), 0)
        self.assertEquals(int(response.headers['x-container-object-count']), 0)
        req2 = Request.blank('/sda1/p/a/c/o', environ=
                {'HTTP_X_TIMESTAMP': '1', 'HTTP_X_SIZE': 42,
                 'HTTP_X_CONTENT_TYPE': 'text/plain', 'HTTP_X_ETAG': 'x'})
        self.controller.PUT(req2)
        response = self.controller.HEAD(req)
        self.assertEquals(int(response.headers['x-container-bytes-used']), 42)
        self.assertEquals(int(response.headers['x-container-object-count']), 1)

    def test_HEAD_not_found(self):
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'})
        resp = self.controller.HEAD(req)
        self.assertEquals(resp.status_int, 404)

    def test_PUT(self):
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
            'HTTP_X_TIMESTAMP': '1'})
        resp = self.controller.PUT(req)
        self.assertEquals(resp.status_int, 201)
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
            'HTTP_X_TIMESTAMP': '2'})
        resp = self.controller.PUT(req)
        self.assertEquals(resp.status_int, 202)

    def test_PUT_obj_not_found(self):
        req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': '1', 'X-Size': '0',
                     'X-Content-Type': 'text/plain', 'X-ETag': 'e'})
        resp = self.controller.PUT(req)
        self.assertEquals(resp.status_int, 404)

    def test_PUT_GET_metadata(self):
        # Set metadata header
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': normalize_timestamp(1),
                     'X-Container-Meta-Test': 'Value'})
        resp = self.controller.PUT(req)
        self.assertEquals(resp.status_int, 201)
        req = Request.blank('/sda1/p/a/c')
        resp = self.controller.GET(req)
        self.assertEquals(resp.status_int, 204)
        self.assertEquals(resp.headers.get('x-container-meta-test'), 'Value')
        # Set another metadata header, ensuring old one doesn't disappear
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'},
            headers={'X-Timestamp': normalize_timestamp(1),
                     'X-Container-Meta-Test2': 'Value2'})
        resp = self.controller.POST(req)
        self.assertEquals(resp.status_int, 204)
        req = Request.blank('/sda1/p/a/c')
        resp = self.controller.GET(req)
        self.assertEquals(resp.status_int, 204)
        self.assertEquals(resp.headers.get('x-container-meta-test'), 'Value')
        self.assertEquals(resp.headers.get('x-container-meta-test2'), 'Value2')
        # Update metadata header
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': normalize_timestamp(3),
                     'X-Container-Meta-Test': 'New Value'})
        resp = self.controller.PUT(req)
        self.assertEquals(resp.status_int, 202)
        req = Request.blank('/sda1/p/a/c')
        resp = self.controller.GET(req)
        self.assertEquals(resp.status_int, 204)
        self.assertEquals(resp.headers.get('x-container-meta-test'),
                          'New Value')
        # Send old update to metadata header
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': normalize_timestamp(2),
                     'X-Container-Meta-Test': 'Old Value'})
        resp = self.controller.PUT(req)
        self.assertEquals(resp.status_int, 202)
        req = Request.blank('/sda1/p/a/c')
        resp = self.controller.GET(req)
        self.assertEquals(resp.status_int, 204)
        self.assertEquals(resp.headers.get('x-container-meta-test'),
                          'New Value')
        # Remove metadata header (by setting it to empty)
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': normalize_timestamp(4),
                     'X-Container-Meta-Test': ''})
        resp = self.controller.PUT(req)
        self.assertEquals(resp.status_int, 202)
        req = Request.blank('/sda1/p/a/c')
        resp = self.controller.GET(req)
        self.assertEquals(resp.status_int, 204)
        self.assert_('x-container-meta-test' not in resp.headers)

    def test_POST_HEAD_metadata(self):
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': normalize_timestamp(1)})
        resp = self.controller.PUT(req)
        self.assertEquals(resp.status_int, 201)
        # Set metadata header
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'},
            headers={'X-Timestamp': normalize_timestamp(1),
                     'X-Container-Meta-Test': 'Value'})
        resp = self.controller.POST(req)
        self.assertEquals(resp.status_int, 204)
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'})
        resp = self.controller.HEAD(req)
        self.assertEquals(resp.status_int, 204)
        self.assertEquals(resp.headers.get('x-container-meta-test'), 'Value')
        # Update metadata header
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'},
            headers={'X-Timestamp': normalize_timestamp(3),
                     'X-Container-Meta-Test': 'New Value'})
        resp = self.controller.POST(req)
        self.assertEquals(resp.status_int, 204)
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'})
        resp = self.controller.HEAD(req)
        self.assertEquals(resp.status_int, 204)
        self.assertEquals(resp.headers.get('x-container-meta-test'),
                          'New Value')
        # Send old update to metadata header
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'},
            headers={'X-Timestamp': normalize_timestamp(2),
                     'X-Container-Meta-Test': 'Old Value'})
        resp = self.controller.POST(req)
        self.assertEquals(resp.status_int, 204)
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'})
        resp = self.controller.HEAD(req)
        self.assertEquals(resp.status_int, 204)
        self.assertEquals(resp.headers.get('x-container-meta-test'),
                          'New Value')
        # Remove metadata header (by setting it to empty)
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'},
            headers={'X-Timestamp': normalize_timestamp(4),
                     'X-Container-Meta-Test': ''})
        resp = self.controller.POST(req)
        self.assertEquals(resp.status_int, 204)
        req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'})
        resp = self.controller.HEAD(req)
        self.assertEquals(resp.status_int, 204)
        self.assert_('x-container-meta-test' not in resp.headers)

    def test_DELETE_obj_not_found(self):
        req = Request.blank('/sda1/p/a/c/o',
                environ={'REQUEST_METHOD': 'DELETE'},
                headers={'X-Timestamp': '1'})
        resp = self.controller.DELETE(req)
        self.assertEquals(resp.status_int, 404)

    def test_PUT_utf8(self):
        snowman = u'\u2603'
        container_name = snowman.encode('utf-8')
        req = Request.blank('/sda1/p/a/%s'%container_name, environ={'REQUEST_METHOD': 'PUT',
            'HTTP_X_TIMESTAMP': '1'})
        resp = self.controller.PUT(req)
        self.assertEquals(resp.status_int, 201)

    def test_PUT_account_update(self):
        bindsock = listen(('127.0.0.1', 0))
        def accept(return_code, expected_timestamp):
            try:
                with Timeout(3):
                    sock, addr = bindsock.accept()
                    inc = sock.makefile('rb')
                    out = sock.makefile('wb')
                    out.write('HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' %
                              return_code)
                    out.flush()
                    self.assertEquals(inc.readline(),
                                      'PUT /sda1/123/a/c HTTP/1.1\r\n')
                    headers = {}
                    line = inc.readline()
                    while line and line != '\r\n':
                        headers[line.split(':')[0].lower()] = \
                            line.split(':')[1].strip()
                        line = inc.readline()
                    self.assertEquals(headers['x-put-timestamp'],
                                      expected_timestamp)
            except BaseException, err:
                return err
            return None
        req = Request.blank('/sda1/p/a/c',
            environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': '0000000001.00000',
                     'X-Account-Host': '%s:%s' % bindsock.getsockname(),
                     'X-Account-Partition': '123',
                     'X-Account-Device': 'sda1'})
        event = spawn(accept, 201, '0000000001.00000')
        try:
            with Timeout(3):
                resp = self.controller.PUT(req)
                self.assertEquals(resp.status_int, 201)
        finally:
            err = event.wait()
            if err:
                raise Exception(err)
        req = Request.blank('/sda1/p/a/c',
            environ={'REQUEST_METHOD': 'DELETE'},
            headers={'X-Timestamp': '2'})
        resp = self.controller.DELETE(req)
        self.assertEquals(resp.status_int, 204)
        req = Request.blank('/sda1/p/a/c',
            environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': '0000000003.00000',
                     'X-Account-Host': '%s:%s' % bindsock.getsockname(),
                     'X-Account-Partition': '123',
                     'X-Account-Device': 'sda1'})
        event = spawn(accept, 404, '0000000003.00000')
        try:
            with Timeout(3):
                resp = self.controller.PUT(req)
                self.assertEquals(resp.status_int, 404)
        finally:
            err = event.wait()
            if err:
                raise Exception(err)
        req = Request.blank('/sda1/p/a/c',
            environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Timestamp': '0000000005.00000',
                     'X-Account-Host': '%s:%s' % bindsock.getsockname(),
                     'X-Account-Partition': '123',
                     'X-Account-Device': 'sda1'})
        event = spawn(accept, 503, '0000000005.00000')
        got_exc = False
        try:
            with Timeout(3):
                resp = self.controller.PUT(req)
        except BaseException, err:
            got_exc = True
Пример #30
0
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        begin = time.time()
        try:
            hashed, local_hash = tpool.execute(
                tpooled_get_hashes,
                job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            # See tpooled_get_hashes "Hack".
            if isinstance(hashed, BaseException):
                raise hashed
            self.suffix_hash += hashed
            attempts_left = self.object_ring.replica_count - 1
            nodes = itertools.chain(
                job['nodes'],
                self.object_ring.get_more_nodes(int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(node['ip'],
                                            node['port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '',
                                            headers={
                                                'Content-Length': '0'
                                            }).getresponse()
                        if resp.status == 507:
                            self.logger.error(
                                _('%(ip)s/%(device)s responded'
                                  ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != 200:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['ip']
                                  })
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool.execute(
                        tpooled_get_hashes,
                        job['path'],
                        recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    # See tpooled_get_hashes "Hack".
                    if isinstance(hashed, BaseException):
                        raise hashed
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    self.rsync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['ip'],
                                            node['port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers={'Content-Length': '0'})
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                except (Exception, Timeout):
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
Пример #31
0
            db_conns = []
            for i in range(num_locks):
                db_conn = connect(db_files[i])
                db_conn.execute('begin exclusive transaction')
                db_conns.append(db_conn)
            if catch_503:
                exc = None
                try:
                    client.delete_container(self.url, self.token, container)
                except client.ClientException, err:
                    exc = err
                self.assertEquals(exc.http_status, 503)
            else:
                client.delete_container(self.url, self.token, container)

        pool = GreenPool()
        try:
            with Timeout(15):
                pool.spawn(run_test, 1, False)
                pool.spawn(run_test, 2, True)
                pool.spawn(run_test, 3, True)
                pool.waitall()
        except Timeout, err:
            raise Exception(
                "The server did not return a 503 on container db locks, "
                "it just hangs: %s" % err)


if __name__ == '__main__':
    main()
Пример #32
0
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'], ))
        self.headers['X-Backend-Storage-Policy-Index'] = job['policy_idx']
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(
                get_hashes,
                job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            nodes = itertools.chain(
                job['nodes'],
                job['object_ring'].get_more_nodes(int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'],
                            node['replication_port'],
                            node['device'],
                            job['partition'],
                            'REPLICATE',
                            '',
                            headers=self.headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(ip)s/%(device)s responded'
                                  ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['replication_ip']
                                  })
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        get_hashes,
                        job['path'],
                        recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers=self.headers)
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)
Пример #33
0
 def get_node_timeout(self, timeout):
     """
     Return node timeout. 
     """
     return Timeout(timeout)
Пример #34
0
 def __str__(self):
     return '%s: %s' % (Timeout.__str__(self), self.msg)
Пример #35
0
     except BaseException, err:
         return err
     return None
 req = Request.blank('/sda1/p/a/c',
     environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '1'})
 resp = self.controller.PUT(req)
 self.assertEquals(resp.status_int, 201)
 req = Request.blank('/sda1/p/a/c',
     environ={'REQUEST_METHOD': 'DELETE'},
     headers={'X-Timestamp': '0000000002.00000',
              'X-Account-Host': '%s:%s' % bindsock.getsockname(),
              'X-Account-Partition': '123',
              'X-Account-Device': 'sda1'})
 event = spawn(accept, 204, '0000000002.00000')
 try:
     with Timeout(3):
         resp = self.controller.DELETE(req)
         self.assertEquals(resp.status_int, 204)
 finally:
     err = event.wait()
     if err:
         raise Exception(err)
 req = Request.blank('/sda1/p/a/c',
     environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '2'})
 resp = self.controller.PUT(req)
 self.assertEquals(resp.status_int, 201)
 req = Request.blank('/sda1/p/a/c',
     environ={'REQUEST_METHOD': 'DELETE'},
     headers={'X-Timestamp': '0000000003.00000',
              'X-Account-Host': '%s:%s' % bindsock.getsockname(),
              'X-Account-Partition': '123',