示例#1
0
 def test_is_local_device(self):
     my_ips = ["127.0.0.1", "0000:0000:0000:0000:0000:0000:0000:0001"]
     my_port = 6000
     self.assertTrue(is_local_device(my_ips, my_port, "localhost", my_port))
     self.assertFalse(
         is_local_device(my_ips, my_port, "localhost", my_port + 1))
     self.assertFalse(is_local_device(my_ips, my_port, "127.0.0.2",
                                      my_port))
     # for those that don't have a local port
     self.assertTrue(is_local_device(my_ips, None, my_ips[0], None))
示例#2
0
 def run_once(self, *args, **kwargs):
     """Run a replication pass once."""
     self._zero_stats()
     dirs = []
     ips = whataremyips(self.bind_ip)
     if not ips:
         self.logger.error(_('ERROR Failed to get my own IPs?'))
         return
     self._local_device_ids = set()
     for node in self.ring.devs:
         if node and is_local_device(ips, self.port, node['replication_ip'],
                                     node['replication_port']):
             if self.mount_check and not ismount(
                     os.path.join(self.root, node['device'])):
                 self._add_failure_stats([
                     (failure_dev['replication_ip'], failure_dev['device'])
                     for failure_dev in self.ring.devs if failure_dev
                 ])
                 self.logger.warn(
                     _('Skipping %(device)s as it is not mounted') % node)
                 continue
             unlink_older_than(
                 os.path.join(self.root, node['device'], 'tmp'),
                 time.time() - self.reclaim_age)
             datadir = os.path.join(self.root, node['device'], self.datadir)
             if os.path.isdir(datadir):
                 self._local_device_ids.add(node['id'])
                 dirs.append((datadir, node['id']))
     self.logger.info(_('Beginning replication run'))
     for part, object_file, node_id in roundrobin_datadirs(dirs):
         self.cpool.spawn_n(self._replicate_object, part, object_file,
                            node_id)
     self.cpool.waitall()
     self.logger.info(_('Replication run OVER'))
     self._report_stats()
示例#3
0
    def handle_container(self, handler, job):
        part, container_nodes = self.container_ring.get_nodes(
            handler._account.encode('utf-8'),
            handler._container.encode('utf-8'))
        nodes_count = len(container_nodes)

        for index, node in enumerate(container_nodes):
            if not is_local_device(self.myips, None, node['ip'], node['port']):
                continue
            broker = self.get_broker(handler._account, handler._container,
                                     part, node)
            broker_info = broker.get_info()
            last_row = handler.get_last_row(broker_info['id'])
            if not last_row:
                last_row = 0
            try:
                items = broker.get_items_since(last_row, self.items_chunk)
            except DatabaseConnectionError:
                continue

            if not items:
                return (0, 0, None, broker_info['id'])

            self.log(
                'info', 'Processing %d rows since row %d for %s/%s' %
                (len(items), last_row, handler._account, handler._container))
            owned_count, verified_count = self.process_items(
                handler, items, nodes_count, index, job)

            return (owned_count, verified_count, items[-1]['ROWID'],
                    broker_info['id'])
        return (0, 0, None, None)
示例#4
0
 def run_once(self, *args, **kwargs):
     """Run a replication pass once."""
     self._zero_stats()
     dirs = []
     ips = whataremyips()
     if not ips:
         self.logger.error(_('ERROR Failed to get my own IPs?'))
         return
     self._local_device_ids = set()
     for node in self.ring.devs:
         if node and is_local_device(ips, self.port,
                                     node['replication_ip'],
                                     node['replication_port']):
             if self.mount_check and not ismount(
                     os.path.join(self.root, node['device'])):
                 self.logger.warn(
                     _('Skipping %(device)s as it is not mounted') % node)
                 continue
             unlink_older_than(
                 os.path.join(self.root, node['device'], 'tmp'),
                 time.time() - self.reclaim_age)
             datadir = os.path.join(self.root, node['device'], self.datadir)
             if os.path.isdir(datadir):
                 self._local_device_ids.add(node['id'])
                 dirs.append((datadir, node['id']))
     self.logger.info(_('Beginning replication run'))
     for part, object_file, node_id in roundrobin_datadirs(dirs):
         self.cpool.spawn_n(
             self._replicate_object, part, object_file, node_id)
     self.cpool.waitall()
     self.logger.info(_('Replication run OVER'))
     self._report_stats()
示例#5
0
 def test_is_local_device(self):
     my_ips = ["127.0.0.1",
               "0000:0000:0000:0000:0000:0000:0000:0001"]
     my_port = 6000
     self.assertTrue(is_local_device(my_ips, my_port,
                                     "localhost",
                                     my_port))
     self.assertFalse(is_local_device(my_ips, my_port,
                                      "localhost",
                                      my_port + 1))
     self.assertFalse(is_local_device(my_ips, my_port,
                                      "127.0.0.2",
                                      my_port))
     # for those that don't have a local port
     self.assertTrue(is_local_device(my_ips, None,
                                     my_ips[0], None))
示例#6
0
 def collect_parts(self, override_devices=None, override_partitions=None):
     """
     Helper for yielding partitions in the top level reconstructor
     """
     override_devices = override_devices or []
     override_partitions = override_partitions or []
     ips = whataremyips()
     for policy in POLICIES:
         if policy.policy_type != EC_POLICY:
             continue
         self._diskfile_mgr = self._df_router[policy]
         self.load_object_ring(policy)
         data_dir = get_data_dir(policy)
         local_devices = itertools.ifilter(
             lambda dev: dev and is_local_device(ips, self.port, dev[
                 'replication_ip'], dev['replication_port']),
             policy.object_ring.devs)
         for local_dev in local_devices:
             if override_devices and (local_dev['device']
                                      not in override_devices):
                 continue
             dev_path = join(self.devices_dir, local_dev['device'])
             obj_path = join(dev_path, data_dir)
             tmp_path = join(dev_path, get_tmp_dir(int(policy)))
             if self.mount_check and not ismount(dev_path):
                 self.logger.warn(_('%s is not mounted'),
                                  local_dev['device'])
                 continue
             unlink_older_than(tmp_path, time.time() - self.reclaim_age)
             if not os.path.exists(obj_path):
                 try:
                     mkdirs(obj_path)
                 except Exception:
                     self.logger.exception('Unable to create %s' % obj_path)
                 continue
             try:
                 partitions = os.listdir(obj_path)
             except OSError:
                 self.logger.exception('Unable to list partitions in %r' %
                                       obj_path)
                 continue
             for partition in partitions:
                 part_path = join(obj_path, partition)
                 if not (partition.isdigit() and os.path.isdir(part_path)):
                     self.logger.warning(
                         'Unexpected entity in data dir: %r' % part_path)
                     remove_file(part_path)
                     continue
                 partition = int(partition)
                 if override_partitions and (partition
                                             not in override_partitions):
                     continue
                 part_info = {
                     'local_dev': local_dev,
                     'policy': policy,
                     'partition': partition,
                     'part_path': part_path,
                 }
                 yield part_info
示例#7
0
    def reap_device(self, device):
        """
        Called once per pass for each device on the server. This will scan the
        accounts directory for the device, looking for partitions this device
        is the primary for, then looking for account databases that are marked
        status=DELETED and still have containers and calling
        :func:`reap_account`. Account databases marked status=DELETED that no
        longer have containers will eventually be permanently removed by the
        reclaim process within the account replicator (see
        :mod:`swift.db_replicator`).

        :param device: The device to look for accounts to be deleted.
        """
        datadir = os.path.join(self.devices, device, DATADIR)
        if not os.path.exists(datadir):
            return
        for partition in os.listdir(datadir):
            partition_path = os.path.join(datadir, partition)
            if not partition.isdigit():
                continue
            nodes = self.get_account_ring().get_part_nodes(int(partition))
            if not os.path.isdir(partition_path):
                continue
            container_shard = None
            for container_shard, node in enumerate(nodes):
                if is_local_device(self.myips, None, node['ip'], None) and \
                        (not self.bind_port or
                         self.bind_port == node['port']) and \
                        (device == node['device']):
                    break
            else:
                continue

            for suffix in os.listdir(partition_path):
                suffix_path = os.path.join(partition_path, suffix)
                if not os.path.isdir(suffix_path):
                    continue
                for hsh in os.listdir(suffix_path):
                    hsh_path = os.path.join(suffix_path, hsh)
                    if not os.path.isdir(hsh_path):
                        continue
                    for fname in sorted(os.listdir(hsh_path), reverse=True):
                        if fname.endswith('.ts'):
                            break
                        elif fname.endswith('.db'):
                            self.start_time = time()
                            broker = \
                                AccountBroker(os.path.join(hsh_path, fname),
                                              logger=self.logger)
                            if broker.is_status_deleted() and \
                                    not broker.empty():
                                self.reap_account(
                                    broker,
                                    partition,
                                    nodes,
                                    container_shard=container_shard)
示例#8
0
    def collect_parts(self, override_devices=None, override_partitions=None):
        """
        Helper for yielding partitions in the top level reconstructor
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            if policy.policy_type != EC_POLICY:
                continue
            self._diskfile_mgr = self._df_router[policy]
            self.load_object_ring(policy)
            data_dir = get_data_dir(policy)
            local_devices = itertools.ifilter(
                lambda dev: dev and is_local_device(ips, self.port, dev["replication_ip"], dev["replication_port"]),
                policy.object_ring.devs,
            )

            for local_dev in local_devices:
                if override_devices and (local_dev["device"] not in override_devices):
                    continue
                dev_path = self._df_router[policy].get_dev_path(local_dev["device"])
                if not dev_path:
                    self.logger.warn(_("%s is not mounted"), local_dev["device"])
                    continue
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() - self.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception("Unable to create %s" % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception("Unable to list partitions in %r" % obj_path)
                    continue
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if not (partition.isdigit() and os.path.isdir(part_path)):
                        self.logger.warning("Unexpected entity in data dir: %r" % part_path)
                        remove_file(part_path)
                        continue
                    partition = int(partition)
                    if override_partitions and (partition not in override_partitions):
                        continue
                    part_info = {
                        "local_dev": local_dev,
                        "policy": policy,
                        "partition": partition,
                        "part_path": part_path,
                    }
                    yield part_info
示例#9
0
    def build_replication_jobs(self,
                               policy,
                               ips,
                               override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        data_dir = get_data_dir(policy)
        for local_dev in [
                dev for dev in policy.object_ring.devs if
            (dev and is_local_device(ips, self.port, dev['replication_ip'],
                                     dev['replication_port']) and
             (override_devices is None or dev['device'] in override_devices))
        ]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [
                        node for node in part_nodes
                        if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    continue
        return jobs
示例#10
0
    def run_once(self, *args, **kwargs):
        """Run a replication pass once."""
        self._zero_stats()
        dirs = []
        ips = whataremyips(self.bind_ip)
        if not ips:
            self.logger.error(_('ERROR Failed to get my own IPs?'))
            return

        if self.handoffs_only:
            self.logger.warning(
                'Starting replication pass with handoffs_only enabled. '
                'This mode is not intended for normal '
                'operation; use handoffs_only with care.')

        self._local_device_ids = set()
        found_local = False
        for node in self.ring.devs:
            if node and is_local_device(ips, self.port, node['replication_ip'],
                                        node['replication_port']):
                found_local = True
                if not check_drive(self.root, node['device'],
                                   self.mount_check):
                    self._add_failure_stats([
                        (failure_dev['replication_ip'], failure_dev['device'])
                        for failure_dev in self.ring.devs if failure_dev
                    ])
                    self.logger.warning(
                        _('Skipping %(device)s as it is not mounted') % node)
                    continue
                unlink_older_than(
                    os.path.join(self.root, node['device'], 'tmp'),
                    time.time() - self.reclaim_age)
                datadir = os.path.join(self.root, node['device'], self.datadir)
                if os.path.isdir(datadir):
                    self._local_device_ids.add(node['id'])
                    filt = (self.handoffs_only_filter(node['id'])
                            if self.handoffs_only else None)
                    dirs.append((datadir, node['id'], filt))
        if not found_local:
            self.logger.error(
                "Can't find itself %s with port %s in ring "
                "file, not replicating", ", ".join(ips), self.port)
        self.logger.info(_('Beginning replication run'))
        for part, object_file, node_id in roundrobin_datadirs(dirs):
            self.cpool.spawn_n(self._replicate_object, part, object_file,
                               node_id)
        self.cpool.waitall()
        self.logger.info(_('Replication run OVER'))
        if self.handoffs_only:
            self.logger.warning(
                'Finished replication pass with handoffs_only enabled. '
                'If handoffs_only is no longer required, disable it.')
        self._report_stats()
示例#11
0
文件: reaper.py 项目: mahak/swift
    def reap_device(self, device):
        """
        Called once per pass for each device on the server. This will scan the
        accounts directory for the device, looking for partitions this device
        is the primary for, then looking for account databases that are marked
        status=DELETED and still have containers and calling
        :func:`reap_account`. Account databases marked status=DELETED that no
        longer have containers will eventually be permanently removed by the
        reclaim process within the account replicator (see
        :mod:`swift.db_replicator`).

        :param device: The device to look for accounts to be deleted.
        """
        datadir = os.path.join(self.devices, device, DATADIR)
        if not os.path.exists(datadir):
            return
        for partition in os.listdir(datadir):
            partition_path = os.path.join(datadir, partition)
            if not partition.isdigit():
                continue
            nodes = self.get_account_ring().get_part_nodes(int(partition))
            if not os.path.isdir(partition_path):
                continue
            container_shard = None
            for container_shard, node in enumerate(nodes):
                if is_local_device(self.myips, None, node['ip'], None) and \
                        (not self.bind_port or
                         self.bind_port == node['port']) and \
                        (device == node['device']):
                    break
            else:
                continue

            for suffix in os.listdir(partition_path):
                suffix_path = os.path.join(partition_path, suffix)
                if not os.path.isdir(suffix_path):
                    continue
                for hsh in os.listdir(suffix_path):
                    hsh_path = os.path.join(suffix_path, hsh)
                    if not os.path.isdir(hsh_path):
                        continue
                    for fname in sorted(os.listdir(hsh_path), reverse=True):
                        if fname.endswith('.ts'):
                            break
                        elif fname.endswith('.db'):
                            self.start_time = time()
                            broker = \
                                AccountBroker(os.path.join(hsh_path, fname),
                                              logger=self.logger)
                            if broker.is_status_deleted() and \
                                    not broker.empty():
                                self.reap_account(
                                    broker, partition, nodes,
                                    container_shard=container_shard)
示例#12
0
    def process_repl(self, policy, ips, override_devices=None,
                     override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        obj_ring = self.get_object_ring(policy.idx)
        data_dir = get_data_dir(policy.idx)
        for local_dev in [dev for dev in obj_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              and (override_devices is None
                                   or dev['device'] in override_devices))]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(int(policy)))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                try:
                    job_path = join(obj_path, partition)
                    part_nodes = obj_ring.get_part_nodes(int(partition))
                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id']]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy_idx=policy.idx,
                             partition=partition,
                             object_ring=obj_ring,
                             region=local_dev['region']))
                except ValueError:
                    continue
        return jobs
示例#13
0
 def run_once(self, *args, **kwargs):
     """Run a replication pass once."""
     self._zero_stats()
     dirs = []
     ips = whataremyips(self.bind_ip)
     if not ips:
         self.logger.error(_('ERROR Failed to get my own IPs?'))
         return
     self._local_device_ids = set()
     found_local = False
     for node in self.ring.devs:
         if node and is_local_device(ips, self.port,
                                     node['replication_ip'],
                                     node['replication_port']):
             found_local = True
             if not check_drive(self.root, node['device'],
                                self.mount_check):
                 self._add_failure_stats(
                     [(failure_dev['replication_ip'],
                       failure_dev['device'])
                      for failure_dev in self.ring.devs if failure_dev])
                 self.logger.warning(
                     _('Skipping %(device)s as it is not mounted') % node)
                 continue
             unlink_older_than(
                 os.path.join(self.root, node['device'], 'tmp'),
                 time.time() - self.reclaim_age)
             datadir = os.path.join(self.root, node['device'], self.datadir)
             if os.path.isdir(datadir):
                 self._local_device_ids.add(node['id'])
                 dirs.append((datadir, node['id']))
     if not found_local:
         self.logger.error("Can't find itself %s with port %s in ring "
                           "file, not replicating",
                           ", ".join(ips), self.port)
     self.logger.info(_('Beginning replication run'))
     for part, object_file, node_id in roundrobin_datadirs(dirs):
         self.cpool.spawn_n(
             self._replicate_object, part, object_file, node_id)
     self.cpool.waitall()
     self.logger.info(_('Replication run OVER'))
     self._report_stats()
示例#14
0
    def get_local_devices(self):
        """
        Returns a set of all local devices in all replication-type storage
        policies.

        This is the device names, e.g. "sdq" or "d1234" or something, not
        the full ring entries.
        """
        ips = whataremyips(self.bind_ip)
        local_devices = set()
        for policy in POLICIES:
            if policy.policy_type != REPL_POLICY:
                continue
            self.load_object_ring(policy)
            for device in policy.object_ring.devs:
                if device and is_local_device(ips, self.port,
                                              device['replication_ip'],
                                              device['replication_port']):
                    local_devices.add(device['device'])
        return local_devices
示例#15
0
文件: replicator.py 项目: mahak/swift
    def get_local_devices(self):
        """
        Returns a set of all local devices in all replication-type storage
        policies.

        This is the device names, e.g. "sdq" or "d1234" or something, not
        the full ring entries.
        """
        ips = whataremyips(self.bind_ip)
        local_devices = set()
        for policy in POLICIES:
            if policy.policy_type != REPL_POLICY:
                continue
            self.load_object_ring(policy)
            for device in policy.object_ring.devs:
                if device and is_local_device(
                        ips, self.port,
                        device['replication_ip'],
                        device['replication_port']):
                    local_devices.add(device['device'])
        return local_devices
示例#16
0
    def _get_db_info(self, account, container):
        """
        Returns the database path of the container

        :param account: UTF-8 encoded account name
        :param container: UTF-8 encoded container name
        :returns: a tuple of (db path, nodes count, index of replica)
        """

        part, container_nodes = self.container_ring.get_nodes(
            account, container)
        nodes_count = len(container_nodes)
        db_hash = hash_path(account, container)
        db_dir = storage_directory(DATADIR, part, db_hash)

        for index, node in enumerate(container_nodes):
            if not is_local_device(self.myips, None, node['ip'], node['port']):
                continue
            db_path = os.path.join(self.root, node['device'], db_dir,
                                   db_hash + '.db')
            return db_path, nodes_count, index
        return None, None, None
示例#17
0
    def handle_container(self, settings):
        part, container_nodes = self.container_ring.get_nodes(
            settings['account'], settings['container'])
        nodes_count = len(container_nodes)
        handler = self.handler_class(self.status_dir, settings)

        for index, node in enumerate(container_nodes):
            if not is_local_device(self.myips, None, node['ip'], node['port']):
                continue
            broker = self.get_broker(settings['account'],
                                     settings['container'], part, node)
            broker_info = broker.get_info()
            last_row = handler.get_last_row(broker_info['id'])
            if not last_row:
                last_row = 0
            try:
                items = broker.get_items_since(last_row, self.items_chunk)
            except DatabaseConnectionError:
                continue
            if items:
                self.process_items(handler, items, nodes_count, index)
                handler.save_last_row(items[-1]['ROWID'], broker_info['id'])
            return
示例#18
0
    def build_replication_jobs(self, policy, ips, override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        df_mgr = self._df_router[policy]
        self.all_devs_info.update(
            [(dev['replication_ip'], dev['device'])
             for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [dev for dev in policy.object_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              and (override_devices is None
                                   or dev['device'] in override_devices))]:
            found_local = True
            dev_path = check_drive(self.devices_dir, local_dev['device'],
                                   self.mount_check)
            if not dev_path:
                self._add_failure_stats(
                    [(failure_dev['replication_ip'],
                      failure_dev['device'])
                     for failure_dev in policy.object_ring.devs
                     if failure_dev])
                self.logger.warning(
                    _('%s is not mounted'), local_dev['device'])
                continue
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            unlink_older_than(tmp_path, time.time() -
                              df_mgr.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                if (partition.startswith('auditor_status_') and
                        partition.endswith('.json')):
                    # ignore auditor status files
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id']]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    if part_nodes:
                        self._add_failure_stats(
                            [(failure_dev['replication_ip'],
                              failure_dev['device'])
                             for failure_dev in nodes])
                    else:
                        self._add_failure_stats(
                            [(failure_dev['replication_ip'],
                              failure_dev['device'])
                             for failure_dev in policy.object_ring.devs
                             if failure_dev])
                    continue
        if not found_local:
            self.logger.error("Can't find itself in policy with index %d with"
                              " ips %s and with port %s in ring file, not"
                              " replicating",
                              int(policy), ", ".join(ips), self.port)
        return jobs
示例#19
0
    def container_sync(self, path):
        """
        Checks the given path for a container database, determines if syncing
        is turned on for that database and, if so, sends any updates to the
        other container.

        :param path: the path to a container db
        """
        broker = None
        try:
            broker = ContainerBroker(path)
            info = broker.get_info()
            x, nodes = self.container_ring.get_nodes(info['account'],
                                                     info['container'])
            for ordinal, node in enumerate(nodes):
                if is_local_device(self._myips, self._myport, node['ip'],
                                   node['port']):
                    break
            else:
                return
            if not broker.is_deleted():
                sync_to = None
                user_key = None
                sync_point1 = info['x_container_sync_point1']
                sync_point2 = info['x_container_sync_point2']
                for key, (value, timestamp) in broker.metadata.iteritems():
                    if key.lower() == 'x-container-sync-to':
                        sync_to = value
                    elif key.lower() == 'x-container-sync-key':
                        user_key = value
                if not sync_to or not user_key:
                    self.container_skips += 1
                    self.logger.increment('skips')
                    return
                err, sync_to, realm, realm_key = validate_sync_to(
                    sync_to, self.allowed_sync_hosts, self.realms_conf)
                if err:
                    self.logger.info(
                        _('ERROR %(db_file)s: %(validate_sync_to_err)s'), {
                            'db_file': str(broker),
                            'validate_sync_to_err': err
                        })
                    self.container_failures += 1
                    self.logger.increment('failures')
                    return
                stop_at = time() + self.container_time
                next_sync_point = None
                while time() < stop_at and sync_point2 < sync_point1:
                    rows = broker.get_items_since(sync_point2, 1)
                    if not rows:
                        break
                    row = rows[0]
                    if row['ROWID'] > sync_point1:
                        break
                    key = hash_path(info['account'],
                                    info['container'],
                                    row['name'],
                                    raw_digest=True)
                    # This node will only initially sync out one third of the
                    # objects (if 3 replicas, 1/4 if 4, etc.) and will skip
                    # problematic rows as needed in case of faults.
                    # This section will attempt to sync previously skipped
                    # rows in case the previous attempts by any of the nodes
                    # didn't succeed.
                    if not self.container_sync_row(row, sync_to, user_key,
                                                   broker, info, realm,
                                                   realm_key):
                        if not next_sync_point:
                            next_sync_point = sync_point2
                    sync_point2 = row['ROWID']
                    broker.set_x_container_sync_points(None, sync_point2)
                if next_sync_point:
                    broker.set_x_container_sync_points(None, next_sync_point)
                while time() < stop_at:
                    rows = broker.get_items_since(sync_point1, 1)
                    if not rows:
                        break
                    row = rows[0]
                    key = hash_path(info['account'],
                                    info['container'],
                                    row['name'],
                                    raw_digest=True)
                    # This node will only initially sync out one third of the
                    # objects (if 3 replicas, 1/4 if 4, etc.). It'll come back
                    # around to the section above and attempt to sync
                    # previously skipped rows in case the other nodes didn't
                    # succeed or in case it failed to do so the first time.
                    if unpack_from('>I', key)[0] % \
                            len(nodes) == ordinal:
                        self.container_sync_row(row, sync_to, user_key, broker,
                                                info, realm, realm_key)
                    sync_point1 = row['ROWID']
                    broker.set_x_container_sync_points(sync_point1, None)
                self.container_syncs += 1
                self.logger.increment('syncs')
        except (Exception, Timeout) as err:
            self.container_failures += 1
            self.logger.increment('failures')
            self.logger.exception(_('ERROR Syncing %s'),
                                  broker if broker else path)
示例#20
0
文件: sync.py 项目: BjoernT/swift
    def container_sync(self, path):
        """
        Checks the given path for a container database, determines if syncing
        is turned on for that database and, if so, sends any updates to the
        other container.

        :param path: the path to a container db
        """
        broker = None
        try:
            broker = ContainerBroker(path)
            # The path we pass to the ContainerBroker is a real path of
            # a container DB. If we get here, however, it means that this
            # path is linked from the sync_containers dir. In rare cases
            # of race or processes failures the link can be stale and
            # the get_info below will raise a DB doesn't exist exception
            # In this case we remove the stale link and raise an error
            # since in most cases the db should be there.
            try:
                info = broker.get_info()
            except DatabaseConnectionError as db_err:
                if str(db_err).endswith("DB doesn't exist"):
                    self.sync_store.remove_synced_container(broker)
                raise

            x, nodes = self.container_ring.get_nodes(info['account'],
                                                     info['container'])
            for ordinal, node in enumerate(nodes):
                if is_local_device(self._myips, self._myport,
                                   node['ip'], node['port']):
                    break
            else:
                return
            if not broker.is_deleted():
                sync_to = None
                user_key = None
                sync_point1 = info['x_container_sync_point1']
                sync_point2 = info['x_container_sync_point2']
                for key, (value, timestamp) in broker.metadata.items():
                    if key.lower() == 'x-container-sync-to':
                        sync_to = value
                    elif key.lower() == 'x-container-sync-key':
                        user_key = value
                if not sync_to or not user_key:
                    self.container_skips += 1
                    self.logger.increment('skips')
                    return
                err, sync_to, realm, realm_key = validate_sync_to(
                    sync_to, self.allowed_sync_hosts, self.realms_conf)
                if err:
                    self.logger.info(
                        _('ERROR %(db_file)s: %(validate_sync_to_err)s'),
                        {'db_file': str(broker),
                         'validate_sync_to_err': err})
                    self.container_failures += 1
                    self.logger.increment('failures')
                    return
                stop_at = time() + self.container_time
                next_sync_point = None
                while time() < stop_at and sync_point2 < sync_point1:
                    rows = broker.get_items_since(sync_point2, 1)
                    if not rows:
                        break
                    row = rows[0]
                    if row['ROWID'] > sync_point1:
                        break
                    key = hash_path(info['account'], info['container'],
                                    row['name'], raw_digest=True)
                    # This node will only initially sync out one third of the
                    # objects (if 3 replicas, 1/4 if 4, etc.) and will skip
                    # problematic rows as needed in case of faults.
                    # This section will attempt to sync previously skipped
                    # rows in case the previous attempts by any of the nodes
                    # didn't succeed.
                    if not self.container_sync_row(
                            row, sync_to, user_key, broker, info, realm,
                            realm_key):
                        if not next_sync_point:
                            next_sync_point = sync_point2
                    sync_point2 = row['ROWID']
                    broker.set_x_container_sync_points(None, sync_point2)
                if next_sync_point:
                    broker.set_x_container_sync_points(None, next_sync_point)
                while time() < stop_at:
                    rows = broker.get_items_since(sync_point1, 1)
                    if not rows:
                        break
                    row = rows[0]
                    key = hash_path(info['account'], info['container'],
                                    row['name'], raw_digest=True)
                    # This node will only initially sync out one third of the
                    # objects (if 3 replicas, 1/4 if 4, etc.). It'll come back
                    # around to the section above and attempt to sync
                    # previously skipped rows in case the other nodes didn't
                    # succeed or in case it failed to do so the first time.
                    if unpack_from('>I', key)[0] % \
                            len(nodes) == ordinal:
                        self.container_sync_row(
                            row, sync_to, user_key, broker, info, realm,
                            realm_key)
                    sync_point1 = row['ROWID']
                    broker.set_x_container_sync_points(sync_point1, None)
                self.container_syncs += 1
                self.logger.increment('syncs')
        except (Exception, Timeout):
            self.container_failures += 1
            self.logger.increment('failures')
            self.logger.exception(_('ERROR Syncing %s'),
                                  broker if broker else path)
示例#21
0
文件: sync.py 项目: irispastal/swift
    def container_sync(self, path):
        """
        Checks the given path for a container database, determines if syncing
        is turned on for that database and, if so, sends any updates to the
        other container.

        :param path: the path to a container db
        """
        broker = None
        try:
            broker = ContainerBroker(path, logger=self.logger)
            # The path we pass to the ContainerBroker is a real path of
            # a container DB. If we get here, however, it means that this
            # path is linked from the sync_containers dir. In rare cases
            # of race or processes failures the link can be stale and
            # the get_info below will raise a DB doesn't exist exception
            # In this case we remove the stale link and raise an error
            # since in most cases the db should be there.
            try:
                info = broker.get_info()
            except DatabaseConnectionError as db_err:
                if str(db_err).endswith("DB doesn't exist"):
                    self.sync_store.remove_synced_container(broker)
                raise

            x, nodes = self.container_ring.get_nodes(info['account'],
                                                     info['container'])
            for ordinal, node in enumerate(nodes):
                if is_local_device(self._myips, self._myport, node['ip'],
                                   node['port']):
                    break
            else:
                return
            if not broker.is_deleted():
                sync_to = None
                user_key = None
                sync_point1 = info['x_container_sync_point1']
                sync_point2 = info['x_container_sync_point2']
                for key, (value, timestamp) in broker.metadata.items():
                    if key.lower() == 'x-container-sync-to':
                        sync_to = value
                    elif key.lower() == 'x-container-sync-key':
                        user_key = value
                if not sync_to or not user_key:
                    self.container_skips += 1
                    self.logger.increment('skips')
                    return
                err, sync_to, realm, realm_key = validate_sync_to(
                    sync_to, self.allowed_sync_hosts, self.realms_conf)
                if err:
                    self.logger.info(
                        _('ERROR %(db_file)s: %(validate_sync_to_err)s'), {
                            'db_file': str(broker),
                            'validate_sync_to_err': err
                        })
                    self.container_failures += 1
                    self.logger.increment('failures')
                    return
                start_at = time()
                stop_at = start_at + self.container_time
                next_sync_point = None
                sync_stage_time = start_at
                try:
                    while time() < stop_at and sync_point2 < sync_point1:
                        rows = broker.get_items_since(sync_point2, 1)
                        if not rows:
                            break
                        row = rows[0]
                        if row['ROWID'] > sync_point1:
                            break
                        # This node will only initially sync out one third
                        # of the objects (if 3 replicas, 1/4 if 4, etc.)
                        # and will skip problematic rows as needed in case of
                        # faults.
                        # This section will attempt to sync previously skipped
                        # rows in case the previous attempts by any of the
                        # nodes didn't succeed.
                        if not self.container_sync_row(row, sync_to, user_key,
                                                       broker, info, realm,
                                                       realm_key):
                            if not next_sync_point:
                                next_sync_point = sync_point2
                        sync_point2 = row['ROWID']
                        broker.set_x_container_sync_points(None, sync_point2)
                    if next_sync_point:
                        broker.set_x_container_sync_points(
                            None, next_sync_point)
                    else:
                        next_sync_point = sync_point2
                    sync_stage_time = time()
                    while sync_stage_time < stop_at:
                        rows = broker.get_items_since(sync_point1, 1)
                        if not rows:
                            break
                        row = rows[0]
                        key = hash_path(info['account'],
                                        info['container'],
                                        row['name'],
                                        raw_digest=True)
                        # This node will only initially sync out one third of
                        # the objects (if 3 replicas, 1/4 if 4, etc.).
                        # It'll come back around to the section above
                        # and attempt to sync previously skipped rows in case
                        # the other nodes didn't succeed or in case it failed
                        # to do so the first time.
                        if unpack_from('>I', key)[0] % \
                                len(nodes) == ordinal:
                            self.container_sync_row(row, sync_to, user_key,
                                                    broker, info, realm,
                                                    realm_key)
                        sync_point1 = row['ROWID']
                        broker.set_x_container_sync_points(sync_point1, None)
                        sync_stage_time = time()
                    self.container_syncs += 1
                    self.logger.increment('syncs')
                finally:
                    self.container_report(start_at, sync_stage_time,
                                          sync_point1, next_sync_point, info,
                                          broker.get_max_row())
        except (Exception, Timeout):
            self.container_failures += 1
            self.logger.increment('failures')
            self.logger.exception(_('ERROR Syncing %s'),
                                  broker if broker else path)
示例#22
0
    def collect_parts(self, override_devices=None, override_partitions=None):
        """
        Helper for getting partitions in the top level reconstructor

        In handoffs_only mode no primary partitions will not be included in the
        returned (possibly empty) list.
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        ec_policies = (policy for policy in POLICIES
                       if policy.policy_type == EC_POLICY)

        policy2devices = {}

        for policy in ec_policies:
            self.load_object_ring(policy)
            local_devices = list(
                six.moves.filter(
                    lambda dev: dev and is_local_device(
                        ips, self.port, dev['replication_ip'], dev[
                            'replication_port']), policy.object_ring.devs))

            if override_devices:
                local_devices = list(
                    six.moves.filter(
                        lambda dev_info: dev_info['device'] in
                        override_devices, local_devices))

            policy2devices[policy] = local_devices
            self.device_count += len(local_devices)

        all_parts = []

        for policy, local_devices in policy2devices.items():
            # Skip replication if next_part_power is set. In this case
            # every object is hard-linked twice, but the replicator
            # can't detect them and would create a second copy of the
            # file if not yet existing - and this might double the
            # actual transferred and stored data
            next_part_power = getattr(policy.object_ring, 'next_part_power',
                                      None)
            if next_part_power is not None:
                self.logger.warning(
                    _("next_part_power set in policy '%s'. Skipping"),
                    policy.name)
                continue

            df_mgr = self._df_router[policy]
            for local_dev in local_devices:
                dev_path = df_mgr.get_dev_path(local_dev['device'])
                if not dev_path:
                    self.logger.warning(_('%s is not mounted'),
                                        local_dev['device'])
                    continue
                data_dir = get_data_dir(policy)
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception('Unable to create %s' % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception('Unable to list partitions in %r' %
                                          obj_path)
                    continue

                self.part_count += len(partitions)
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if partition in ('auditor_status_ALL.json',
                                     'auditor_status_ZBF.json'):
                        continue
                    if not partition.isdigit():
                        self.logger.warning(
                            'Unexpected entity in data dir: %r' % part_path)
                        self.delete_partition(part_path)
                        self.reconstruction_part_count += 1
                        continue
                    partition = int(partition)
                    if override_partitions and (partition
                                                not in override_partitions):
                        continue
                    # N.B. At a primary node in handoffs_only mode may skip to
                    # sync misplaced (handoff) fragments in the primary
                    # partition. That may happen while rebalancing several
                    # times. (e.g. a node holding handoff fragment being a new
                    # primary) Those fragments will be synced (and revert) once
                    # handoffs_only mode turned off.
                    if self.handoffs_only and any(local_dev['id'] == n['id']
                                                  for n in policy.object_ring.
                                                  get_part_nodes(partition)):
                        self.logger.debug(
                            'Skipping %s job for %s '
                            'while in handoffs_only mode.', SYNC, part_path)
                        continue
                    part_info = {
                        'local_dev': local_dev,
                        'policy': policy,
                        'partition': partition,
                        'part_path': part_path,
                    }
                    all_parts.append(part_info)
        random.shuffle(all_parts)
        return all_parts
示例#23
0
    def collect_parts(self, override_devices=None, override_partitions=None):
        """
        Helper for getting partitions in the top level reconstructor
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        ec_policies = (policy for policy in POLICIES
                       if policy.policy_type == EC_POLICY)

        policy2devices = {}

        for policy in ec_policies:
            self.load_object_ring(policy)
            local_devices = list(
                six.moves.filter(
                    lambda dev: dev and is_local_device(
                        ips, self.port, dev['replication_ip'], dev[
                            'replication_port']), policy.object_ring.devs))

            if override_devices:
                local_devices = list(
                    six.moves.filter(
                        lambda dev_info: dev_info['device'] in
                        override_devices, local_devices))

            policy2devices[policy] = local_devices
            self.device_count += len(local_devices)

        all_parts = []

        for policy, local_devices in policy2devices.items():
            df_mgr = self._df_router[policy]
            for local_dev in local_devices:
                dev_path = df_mgr.get_dev_path(local_dev['device'])
                if not dev_path:
                    self.logger.warning(_('%s is not mounted'),
                                        local_dev['device'])
                    continue
                data_dir = get_data_dir(policy)
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception('Unable to create %s' % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception('Unable to list partitions in %r' %
                                          obj_path)
                    continue

                self.part_count += len(partitions)
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if partition in ('auditor_status_ALL.json',
                                     'auditor_status_ZBF.json'):
                        continue
                    if not partition.isdigit():
                        self.logger.warning(
                            'Unexpected entity in data dir: %r' % part_path)
                        self.delete_partition(part_path)
                        self.reconstruction_part_count += 1
                        continue
                    partition = int(partition)
                    if override_partitions and (partition
                                                not in override_partitions):
                        continue
                    part_info = {
                        'local_dev': local_dev,
                        'policy': policy,
                        'partition': partition,
                        'part_path': part_path,
                    }
                    all_parts.append(part_info)
        random.shuffle(all_parts)
        return all_parts
示例#24
0
    def collect_parts(self, override_devices=None,
                      override_partitions=None):
        """
        Helper for yielding partitions in the top level reconstructor
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            if policy.policy_type != EC_POLICY:
                continue
            self._diskfile_mgr = self._df_router[policy]
            self.load_object_ring(policy)
            data_dir = get_data_dir(policy)
            local_devices = list(six.moves.filter(
                lambda dev: dev and is_local_device(
                    ips, self.port,
                    dev['replication_ip'], dev['replication_port']),
                policy.object_ring.devs))

            if override_devices:
                self.device_count = len(override_devices)
            else:
                self.device_count = len(local_devices)

            for local_dev in local_devices:
                if override_devices and (local_dev['device'] not in
                                         override_devices):
                    continue
                self.reconstruction_device_count += 1
                dev_path = self._df_router[policy].get_dev_path(
                    local_dev['device'])
                if not dev_path:
                    self.logger.warning(_('%s is not mounted'),
                                        local_dev['device'])
                    continue
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() -
                                  self.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception(
                            'Unable to create %s' % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception(
                        'Unable to list partitions in %r' % obj_path)
                    continue

                self.part_count += len(partitions)
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if not (partition.isdigit() and
                            os.path.isdir(part_path)):
                        self.logger.warning(
                            'Unexpected entity in data dir: %r' % part_path)
                        remove_file(part_path)
                        self.reconstruction_part_count += 1
                        continue
                    partition = int(partition)
                    if override_partitions and (partition not in
                                                override_partitions):
                        continue
                    part_info = {
                        'local_dev': local_dev,
                        'policy': policy,
                        'partition': partition,
                        'part_path': part_path,
                    }
                    yield part_info
                    self.reconstruction_part_count += 1
示例#25
0
    def test_is_local_device(self):
        # localhost shows up in whataremyips() output as "::1" for IPv6
        my_ips = ["127.0.0.1", "::1"]
        my_port = 6000
        self.assertTrue(is_local_device(my_ips, my_port,
                                        "127.0.0.1", my_port))
        self.assertTrue(is_local_device(my_ips, my_port,
                                        "::1", my_port))
        self.assertTrue(is_local_device(
            my_ips, my_port,
            "0000:0000:0000:0000:0000:0000:0000:0001", my_port))
        self.assertTrue(is_local_device(my_ips, my_port,
                                        "localhost", my_port))
        self.assertFalse(is_local_device(my_ips, my_port,
                                         "localhost", my_port + 1))
        self.assertFalse(is_local_device(my_ips, my_port,
                                         "127.0.0.2", my_port))
        # for those that don't have a local port
        self.assertTrue(is_local_device(my_ips, None,
                                        my_ips[0], None))

        # When servers_per_port is active, the "my_port" passed in is None
        # which means "don't include port in the determination of locality
        # because it's not reliable in this deployment scenario"
        self.assertTrue(is_local_device(my_ips, None,
                                        "127.0.0.1", 6666))
        self.assertTrue(is_local_device(my_ips, None,
                                        "::1", 6666))
        self.assertTrue(is_local_device(
            my_ips, None,
            "0000:0000:0000:0000:0000:0000:0000:0001", 6666))
        self.assertTrue(is_local_device(my_ips, None,
                                        "localhost", 6666))
        self.assertFalse(is_local_device(my_ips, None,
                                         "127.0.0.2", my_port))
示例#26
0
    def build_replication_jobs(self, policy, ips, old_dict, new_dict,
                               moving_map):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy

        :param policy: swift policy object
        :param ips: the local server ips
        :param old_dict: dictionary with devices from old ring
        :param new_dict: dictionary with devices from new ring
        :param moving_map: the dictionary that contains all the partitions
            that should be moved, their sources and destinations
        """

        jobs = []
        data_dir = get_data_dir(policy)
        devices = Set(map(lambda x: x[1], moving_map.values()))
        partitions = Set(map(lambda x: x[0], moving_map.values()))

        for local_dev in [
                dev for dev in policy.object_ring.devs if
            (dev and is_local_device(ips, self.port, dev['replication_ip'],
                                     dev['replication_port']))
        ]:

            if self.test:
                print local_dev['id']

            if unicode(local_dev['id']) not in devices:
                continue

            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn('%s is not mounted' % local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)

            for partition in os.listdir(obj_path):
                partition = unicode(partition)

                if (partition not in partitions):
                    continue

                try:

                    key = "%s_%s" % (local_dev['id'], partition)
                    if key not in moving_map:
                        continue

                    job_path = join(obj_path, partition)

                    _, source_id, dest_id = moving_map[key]

                    if source_id != unicode(local_dev['id']):
                        continue

                    node = {}
                    replication_ip, replication_device = new_dict[dest_id]
                    node['replication_ip'] = replication_ip
                    node['device'] = replication_device

                    remote_path = os.path.join(self.devices_dir,
                                               node['device'],
                                               self.mover_tmp_dir)

                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             node=node,
                             policy=policy,
                             partition=partition,
                             remote_path=remote_path))

                except ValueError:
                    continue
                except Exception as e:
                    self.logger.exception(
                        "an %s exception accure at build_replication_jobs" % e)
                    if self.test:
                        print e
        return jobs
示例#27
0
    def process_repl(self, policy, ips, override_devices=None,
                     override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        obj_ring = self.get_object_ring(policy.idx)
        data_dir = get_data_dir(policy.idx)
        for local_dev in [dev for dev in obj_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              and (override_devices is None
                                   or dev['device'] in override_devices))]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(int(policy)))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                try:
                    job_path = join(obj_path, partition)
                    part_nodes = obj_ring.get_part_nodes(int(partition))

######################################  CHANGED_CODE  ########################################################
                    f = open("/home/hduser/swift/swift/proxy/controllers/spindowndevices")
                    downlist = f.read().split("\n")
                    f.close()

                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id'] and node['device'] not in downlist]
                    print("===Replication nodes===",nodes)

######################################  CHANGED_CODE  ########################################################

                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy_idx=policy.idx,
                             partition=partition,
                             object_ring=obj_ring,
                             region=local_dev['region']))
                except ValueError:
                    continue
        return jobs
示例#28
0
    def run_once(self, *args, **kwargs):
        """Run a replication pass once."""
        override_options = parse_override_options(once=True, **kwargs)

        devices_to_replicate = override_options.devices or Everything()
        partitions_to_replicate = override_options.partitions or Everything()

        self._zero_stats()
        dirs = []
        ips = whataremyips(self.bind_ip)
        if not ips:
            self.logger.error(_('ERROR Failed to get my own IPs?'))
            return

        if self.handoffs_only:
            self.logger.warning(
                'Starting replication pass with handoffs_only enabled. '
                'This mode is not intended for normal '
                'operation; use handoffs_only with care.')

        self._local_device_ids = set()
        found_local = False
        for node in self.ring.devs:
            if node and is_local_device(ips, self.port,
                                        node['replication_ip'],
                                        node['replication_port']):
                found_local = True
                try:
                    dev_path = check_drive(self.root, node['device'],
                                           self.mount_check)
                except ValueError as err:
                    self._add_failure_stats(
                        [(failure_dev['replication_ip'],
                          failure_dev['device'])
                         for failure_dev in self.ring.devs if failure_dev])
                    self.logger.warning('Skipping: %s', err)
                    continue
                if node['device'] not in devices_to_replicate:
                    self.logger.debug(
                        'Skipping device %s due to given arguments',
                        node['device'])
                    continue
                unlink_older_than(
                    os.path.join(dev_path, 'tmp'),
                    time.time() - self.reclaim_age)
                datadir = os.path.join(self.root, node['device'], self.datadir)
                if os.path.isdir(datadir):
                    self._local_device_ids.add(node['id'])
                    part_filt = self._partition_dir_filter(
                        node['id'], partitions_to_replicate)
                    dirs.append((datadir, node['id'], part_filt))
        if not found_local:
            self.logger.error("Can't find itself %s with port %s in ring "
                              "file, not replicating",
                              ", ".join(ips), self.port)
        self.logger.info(_('Beginning replication run'))
        for part, object_file, node_id in self.roundrobin_datadirs(dirs):
            self.cpool.spawn_n(
                self._replicate_object, part, object_file, node_id)
        self.cpool.waitall()
        self.logger.info(_('Replication run OVER'))
        if self.handoffs_only:
            self.logger.warning(
                'Finished replication pass with handoffs_only enabled. '
                'If handoffs_only is no longer required, disable it.')
        self._report_stats()
示例#29
0
    def build_replication_jobs(self,
                               policy,
                               ips,
                               override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        self.all_devs_info.update([(dev['replication_ip'], dev['device'])
                                   for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [
                dev for dev in policy.object_ring.devs if
            (dev and is_local_device(ips, self.port, dev['replication_ip'],
                                     dev['replication_port']) and
             (override_devices is None or dev['device'] in override_devices))
        ]:
            found_local = True
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self._add_failure_stats([
                    (failure_dev['replication_ip'], failure_dev['device'])
                    for failure_dev in policy.object_ring.devs if failure_dev
                ])
                self.logger.warning(_('%s is not mounted'),
                                    local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                if (partition.startswith('auditor_status_')
                        and partition.endswith('.json')):
                    # ignore auditor status files
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [
                        node for node in part_nodes
                        if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    if part_nodes:
                        self._add_failure_stats([
                            (failure_dev['replication_ip'],
                             failure_dev['device']) for failure_dev in nodes
                        ])
                    else:
                        self._add_failure_stats([
                            (failure_dev['replication_ip'],
                             failure_dev['device'])
                            for failure_dev in policy.object_ring.devs
                            if failure_dev
                        ])
                    continue
        if not found_local:
            self.logger.error(
                "Can't find itself in policy with index %d with"
                " ips %s and with port %s in ring file, not"
                " replicating", int(policy), ", ".join(ips), self.port)
        return jobs
示例#30
0
    def build_replication_jobs(self, policy, ips, old_dict,
                               new_dict, moving_map):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy

        :param policy: swift policy object
        :param ips: the local server ips
        :param old_dict: dictionary with devices from old ring
        :param new_dict: dictionary with devices from new ring
        :param moving_map: the dictionary that contains all the partitions
            that should be moved, their sources and destinations
        """

        jobs = []
        data_dir = get_data_dir(policy)
        devices = Set(map(lambda x: x[1], moving_map.values()))
        partitions = Set(map(lambda x: x[0], moving_map.values()))

        for local_dev in [dev for dev in policy.object_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              )]:

            if self.test:
                print local_dev['id']

            if unicode(local_dev['id']) not in devices:
                continue

            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn('%s is not mounted' % local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)

            for partition in os.listdir(obj_path):
                partition = unicode(partition)

                if (partition not in partitions):
                    continue

                try:

                    key = "%s_%s" % (local_dev['id'], partition)
                    if key not in moving_map:
                        continue

                    job_path = join(obj_path, partition)

                    _, source_id, dest_id = moving_map[key]

                    if source_id != unicode(local_dev['id']):
                        continue

                    node = {}
                    replication_ip, replication_device = new_dict[dest_id]
                    node['replication_ip'] = replication_ip
                    node['device'] = replication_device

                    remote_path = os.path.join(self.devices_dir,
                                               node['device'],
                                               self.mover_tmp_dir)

                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             node=node,
                             policy=policy,
                             partition=partition,
                             remote_path=remote_path))

                except ValueError:
                    continue
                except Exception as e:
                    self.logger.exception(
                        "an %s exception accure at build_replication_jobs" % e)
                    if self.test:
                        print e
        return jobs
示例#31
0
    def test_is_local_device(self):
        # localhost shows up in whataremyips() output as "::1" for IPv6
        my_ips = ["127.0.0.1", "::1"]
        my_port = 6200
        self.assertTrue(is_local_device(my_ips, my_port, "127.0.0.1", my_port))
        self.assertTrue(is_local_device(my_ips, my_port, "::1", my_port))
        self.assertTrue(
            is_local_device(my_ips, my_port,
                            "0000:0000:0000:0000:0000:0000:0000:0001",
                            my_port))
        self.assertTrue(is_local_device(my_ips, my_port, "localhost", my_port))
        self.assertFalse(
            is_local_device(my_ips, my_port, "localhost", my_port + 1))
        self.assertFalse(is_local_device(my_ips, my_port, "127.0.0.2",
                                         my_port))
        # for those that don't have a local port
        self.assertTrue(is_local_device(my_ips, None, my_ips[0], None))

        # When servers_per_port is active, the "my_port" passed in is None
        # which means "don't include port in the determination of locality
        # because it's not reliable in this deployment scenario"
        self.assertTrue(is_local_device(my_ips, None, "127.0.0.1", 6666))
        self.assertTrue(is_local_device(my_ips, None, "::1", 6666))
        self.assertTrue(
            is_local_device(my_ips, None,
                            "0000:0000:0000:0000:0000:0000:0000:0001", 6666))
        self.assertTrue(is_local_device(my_ips, None, "localhost", 6666))
        self.assertFalse(is_local_device(my_ips, None, "127.0.0.2", my_port))
示例#32
0
    def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        self.all_devs_info.update([(dev["replication_ip"], dev["device"]) for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [
            dev
            for dev in policy.object_ring.devs
            if (
                dev
                and is_local_device(ips, self.port, dev["replication_ip"], dev["replication_port"])
                and (override_devices is None or dev["device"] in override_devices)
            )
        ]:
            found_local = True
            dev_path = join(self.devices_dir, local_dev["device"])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self._add_failure_stats(
                    [
                        (failure_dev["replication_ip"], failure_dev["device"])
                        for failure_dev in policy.object_ring.devs
                        if failure_dev
                    ]
                )
                self.logger.warning(_("%s is not mounted"), local_dev["device"])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception("ERROR creating %s" % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if override_partitions is not None and partition not in override_partitions:
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(int(partition))
                    nodes = [node for node in part_nodes if node["id"] != local_dev["id"]]
                    jobs.append(
                        dict(
                            path=job_path,
                            device=local_dev["device"],
                            obj_path=obj_path,
                            nodes=nodes,
                            delete=len(nodes) > len(part_nodes) - 1,
                            policy=policy,
                            partition=partition,
                            region=local_dev["region"],
                        )
                    )
                except ValueError:
                    if part_nodes:
                        self._add_failure_stats(
                            [(failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in nodes]
                        )
                    else:
                        self._add_failure_stats(
                            [
                                (failure_dev["replication_ip"], failure_dev["device"])
                                for failure_dev in policy.object_ring.devs
                                if failure_dev
                            ]
                        )
                    continue
        if not found_local:
            self.logger.error(
                "Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port
            )
        return jobs