Пример #1
0
def get_hashes(partition_dir, recalculate=None, do_listdir=False,
               reclaim_age=ONE_WEEK):
    """
    Get a list of hashes for the suffix dir.  do_listdir causes it to mistrust
    the hash cache for suffix existence at the (unexpectedly high) cost of a
    listdir.  reclaim_age is just passed on to hash_suffix.

    :param partition_dir: absolute path of partition to get hashes for
    :param recalculate: list of suffixes which should be recalculated when got
    :param do_listdir: force existence check for all hashes in the partition
    :param reclaim_age: age at which to remove tombstones

    :returns: tuple of (number of suffix dirs hashed, dictionary of hashes)
    """

    hashed = 0
    hashes_file = join(partition_dir, HASH_FILE)
    modified = False
    force_rewrite = False
    hashes = {}
    mtime = -1

    if recalculate is None:
        recalculate = []

    try:
        with open(hashes_file, 'rb') as fp:
            hashes = pickle.load(fp)
        mtime = getmtime(hashes_file)
    except Exception:
        do_listdir = True
        force_rewrite = True
    if do_listdir:
        for suff in os.listdir(partition_dir):
            if len(suff) == 3:
                hashes.setdefault(suff, None)
        modified = True
    hashes.update((hash_, None) for hash_ in recalculate)
    for suffix, hash_ in hashes.items():
        if not hash_:
            suffix_dir = join(partition_dir, suffix)
            try:
                hashes[suffix] = hash_suffix(suffix_dir, reclaim_age)
                hashed += 1
            except PathNotDir:
                del hashes[suffix]
            except OSError:
                logging.exception(_('Error hashing suffix'))
            modified = True
    if modified:
        with lock_path(partition_dir):
            if force_rewrite or not exists(hashes_file) or \
                    getmtime(hashes_file) == mtime:
                write_pickle(
                    hashes, hashes_file, partition_dir, PICKLE_PROTOCOL)
                return hashed, hashes
        return get_hashes(partition_dir, recalculate, do_listdir,
                          reclaim_age)
    else:
        return hashed, hashes
Пример #2
0
def get_hashes(partition_dir, recalculate=None, do_listdir=False,
               reclaim_age=ONE_WEEK):
    """
    Get a list of hashes for the suffix dir.  do_listdir causes it to mistrust
    the hash cache for suffix existence at the (unexpectedly high) cost of a
    listdir.  reclaim_age is just passed on to hash_suffix.

    :param partition_dir: absolute path of partition to get hashes for
    :param recalculate: list of suffixes which should be recalculated when got
    :param do_listdir: force existence check for all hashes in the partition
    :param reclaim_age: age at which to remove tombstones

    :returns: tuple of (number of suffix dirs hashed, dictionary of hashes)
    """

    hashed = 0
    hashes_file = join(partition_dir, HASH_FILE)
    modified = False
    force_rewrite = False
    hashes = {}
    mtime = -1

    if recalculate is None:
        recalculate = []

    try:
        with open(hashes_file, 'rb') as fp:
            hashes = pickle.load(fp)
        mtime = getmtime(hashes_file)
    except Exception:
        do_listdir = True
        force_rewrite = True
    if do_listdir:
        for suff in os.listdir(partition_dir):
            if len(suff) == 3:
                hashes.setdefault(suff, None)
        modified = True
    hashes.update((hash_, None) for hash_ in recalculate)
    for suffix, hash_ in hashes.items():
        if not hash_:
            suffix_dir = join(partition_dir, suffix)
            try:
                hashes[suffix] = hash_suffix(suffix_dir, reclaim_age)
                hashed += 1
            except PathNotDir:
                del hashes[suffix]
            except OSError:
                logging.exception(_('Error hashing suffix'))
            modified = True
    if modified:
        with lock_path(partition_dir):
            if force_rewrite or not exists(hashes_file) or \
                    getmtime(hashes_file) == mtime:
                write_pickle(
                    hashes, hashes_file, partition_dir, PICKLE_PROTOCOL)
                return hashed, hashes
        return get_hashes(partition_dir, recalculate, do_listdir,
                          reclaim_age)
    else:
        return hashed, hashes
Пример #3
0
def recalculate_hashes(partition_dir, suffixes, reclaim_age=ONE_WEEK):
    """
    Recalculates hashes for the given suffixes in the partition and updates
    them in the partition's hashes file.

    :param partition_dir: directory of the partition in which to recalculate
    :param suffixes: list of suffixes to recalculate
    :param reclaim_age: age in seconds at which tombstones should be removed
    """
    def tpool_listdir(partition_dir):
        return dict(((suff, None) for suff in os.listdir(partition_dir)
                     if len(suff) == 3 and isdir(join(partition_dir, suff))))

    hashes_file = join(partition_dir, HASH_FILE)
    with lock_path(partition_dir):
        try:
            with open(hashes_file, 'rb') as fp:
                hashes = pickle.load(fp)
        except Exception:
            hashes = tpool.execute(tpool_listdir, partition_dir)
        for suffix in suffixes:
            suffix_dir = join(partition_dir, suffix)
            if os.path.exists(suffix_dir):
                hashes[suffix] = hash_suffix(suffix_dir, reclaim_age)
            elif suffix in hashes:
                del hashes[suffix]
        with open(hashes_file + '.tmp', 'wb') as fp:
            pickle.dump(hashes, fp, PICKLE_PROTOCOL)
        renamer(hashes_file + '.tmp', hashes_file)
Пример #4
0
def recalculate_hashes(partition_dir, suffixes, reclaim_age=ONE_WEEK):
    """
    Recalculates hashes for the given suffixes in the partition and updates
    them in the partition's hashes file.

    :param partition_dir: directory of the partition in which to recalculate
    :param suffixes: list of suffixes to recalculate
    :param reclaim_age: age in seconds at which tombstones should be removed
    """

    def tpool_listdir(partition_dir):
        return dict(((suff, None) for suff in os.listdir(partition_dir)
                     if len(suff) == 3 and isdir(join(partition_dir, suff))))
    hashes_file = join(partition_dir, HASH_FILE)
    with lock_path(partition_dir):
        try:
            with open(hashes_file, 'rb') as fp:
                hashes = pickle.load(fp)
        except Exception:
            hashes = tpool.execute(tpool_listdir, partition_dir)
        for suffix in suffixes:
            suffix_dir = join(partition_dir, suffix)
            if os.path.exists(suffix_dir):
                hashes[suffix] = hash_suffix(suffix_dir, reclaim_age)
            elif suffix in hashes:
                del hashes[suffix]
        with open(hashes_file + '.tmp', 'wb') as fp:
            pickle.dump(hashes, fp, PICKLE_PROTOCOL)
        renamer(hashes_file + '.tmp', hashes_file)
Пример #5
0
def get_lock(path, name):
   print '%s - %s: try to flock %s \n' % (time.ctime(time.time()), name, path)
   try:
       with scu.lock_path(path):
           print '%s - %s: sleep on lock for 60 sec \n' % (time.ctime(time.time()), name)
           time.sleep(60)
           print '%s - %s: done sleeping free the lock \n' % (time.ctime(time.time()), name)
   except exc.LockTimeout:
       print '%s - %s: Lock timeout' %(time.ctime(time.time()), name)
Пример #6
0
def get_hashes(partition_dir,
               recalculate=[],
               do_listdir=False,
               reclaim_age=ONE_WEEK):
    """
    Get a list of hashes for the suffix dir.  do_listdir causes it to mistrust
    the hash cache for suffix existence at the (unexpectedly high) cost of a
    listdir.  reclaim_age is just passed on to hash_suffix.

    :param partition_dir: absolute path of partition to get hashes for
    :param recalculate: list of suffixes which should be recalculated when got
    :param do_listdir: force existence check for all hashes in the partition
    :param reclaim_age: age at which to remove tombstones

    :returns: tuple of (number of suffix dirs hashed, dictionary of hashes)
    """

    hashed = 0
    hashes_file = join(partition_dir, HASH_FILE)
    with lock_path(partition_dir):
        modified = False
        hashes = {}
        try:
            with open(hashes_file, 'rb') as fp:
                hashes = pickle.load(fp)
        except Exception:
            do_listdir = True
        if do_listdir:
            hashes = dict(
                ((suff, hashes.get(suff, None))
                 for suff in os.listdir(partition_dir)
                 if len(suff) == 3 and isdir(join(partition_dir, suff))))
            modified = True
        for hash_ in recalculate:
            hashes[hash_] = None
        for suffix, hash_ in hashes.items():
            if not hash_:
                suffix_dir = join(partition_dir, suffix)
                if os.path.exists(suffix_dir):
                    try:
                        hashes[suffix] = hash_suffix(suffix_dir, reclaim_age)
                        hashed += 1
                    except OSError:
                        logging.exception(_('Error hashing suffix'))
                        hashes[suffix] = None
                else:
                    del hashes[suffix]
                modified = True
                sleep()
        if modified:
            with open(hashes_file + '.tmp', 'wb') as fp:
                pickle.dump(hashes, fp, PICKLE_PROTOCOL)
            renamer(hashes_file + '.tmp', hashes_file)
        return hashed, hashes
Пример #7
0
def get_hashes(partition_dir, recalculate=[], do_listdir=False,
               reclaim_age=ONE_WEEK):
    """
    Get a list of hashes for the suffix dir.  do_listdir causes it to mistrust
    the hash cache for suffix existence at the (unexpectedly high) cost of a
    listdir.  reclaim_age is just passed on to hash_suffix.

    :param partition_dir: absolute path of partition to get hashes for
    :param recalculate: list of suffixes which should be recalculated when got
    :param do_listdir: force existence check for all hashes in the partition
    :param reclaim_age: age at which to remove tombstones

    :returns: tuple of (number of suffix dirs hashed, dictionary of hashes)
    """

    hashed = 0
    hashes_file = join(partition_dir, HASH_FILE)
    with lock_path(partition_dir):
        modified = False
        hashes = {}
        try:
            with open(hashes_file, 'rb') as fp:
                hashes = pickle.load(fp)
        except Exception:
            do_listdir = True
        if do_listdir:
            hashes = dict(((suff, hashes.get(suff, None))
                       for suff in os.listdir(partition_dir)
                       if len(suff) == 3 and isdir(join(partition_dir, suff))))
            modified = True
        for hash_ in recalculate:
            hashes[hash_] = None
        for suffix, hash_ in hashes.items():
            if not hash_:
                suffix_dir = join(partition_dir, suffix)
                if os.path.exists(suffix_dir):
                    try:
                        hashes[suffix] = hash_suffix(suffix_dir, reclaim_age)
                        hashed += 1
                    except OSError:
                        logging.exception(_('Error hashing suffix'))
                        hashes[suffix] = None
                else:
                    del hashes[suffix]
                modified = True
                sleep()
        if modified:
            write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL)
        return hashed, hashes
Пример #8
0
def invalidate_hash(suffix_dir):
    """
    Invalidates the hash for a suffix_dir in the partition's hashes file.

    :param suffix_dir: absolute path to suffix dir whose hash needs
                       invalidating
    """

    suffix = basename(suffix_dir)
    partition_dir = dirname(suffix_dir)
    hashes_file = join(partition_dir, HASH_FILE)
    with lock_path(partition_dir):
        try:
            with open(hashes_file, 'rb') as fp:
                hashes = pickle.load(fp)
            if suffix in hashes and not hashes[suffix]:
                return
        except Exception:
            return
        hashes[suffix] = None
        write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL)
Пример #9
0
def invalidate_hash(suffix_dir):
    """
    Invalidates the hash for a suffix_dir in the partition's hashes file.

    :param suffix_dir: absolute path to suffix dir whose hash needs
                       invalidating
    """

    suffix = basename(suffix_dir)
    partition_dir = dirname(suffix_dir)
    hashes_file = join(partition_dir, HASH_FILE)
    with lock_path(partition_dir):
        try:
            with open(hashes_file, 'rb') as fp:
                hashes = pickle.load(fp)
            if suffix in hashes and not hashes[suffix]:
                return
        except Exception:
            return
        hashes[suffix] = None
        write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL)
Пример #10
0
def hook_post_partition(states, step, policy, diskfile_manager,
                        partition_path):
    datadir_path, part = os.path.split(os.path.abspath(partition_path))
    device_path, datadir_name = os.path.split(datadir_path)
    device = os.path.basename(device_path)
    state_tmp_file = os.path.join(device_path,
                                  STATE_TMP_FILE.format(datadir=datadir_name))
    state_file = os.path.join(device_path,
                              STATE_FILE.format(datadir=datadir_name))

    # We started with a partition space like
    #   |0              N|
    #   |ABCDEFGHIJKLMNOP|
    #
    # After relinking, it will be more like
    #   |0                             2N|
    #   |AABBCCDDEEFFGGHHIIJJKKLLMMNNOOPP|
    #
    # We want to hold off on rehashing until after cleanup, since that is the
    # point at which we've finished with filesystem manipulations. But there's
    # a slight complication: we know the upper half has nothing to clean up,
    # so the cleanup phase only looks at
    #   |0                             2N|
    #   |AABBCCDDEEFFGGHH                |
    #
    # To ensure that the upper half gets rehashed, too, do it as part of
    # relinking; as we finish
    #   |0              N|
    #   |        IJKLMNOP|
    # shift to the new partition space and rehash
    #   |0                             2N|
    #   |                IIJJKKLLMMNNOOPP|
    partition = int(part)
    if step == STEP_RELINK and partition >= 2**(states['part_power'] - 1):
        for new_part in (2 * partition, 2 * partition + 1):
            diskfile_manager.get_hashes(device, new_part, [], policy)
    elif step == STEP_CLEANUP:
        hashes = diskfile_manager.get_hashes(device, partition, [], policy)
        # In any reasonably-large cluster, we'd expect all old partitions P
        # to be empty after cleanup (i.e., it's unlikely that there's another
        # partition Q := P//2 that also has data on this device).
        #
        # Try to clean up empty partitions now, so operators can use existing
        # rebalance-complete metrics to monitor relinking progress (provided
        # there are few/no handoffs when relinking starts and little data is
        # written to handoffs during the increase).
        if not hashes:
            with lock_path(partition_path):
                # Same lock used by invalidate_hashes, consolidate_hashes,
                # get_hashes
                try:
                    os.unlink(os.path.join(partition_path, 'hashes.pkl'))
                    os.unlink(os.path.join(partition_path, 'hashes.invalid'))
                    os.unlink(os.path.join(partition_path, '.lock'))
                except OSError:
                    pass
            try:
                os.rmdir(partition_path)
            except OSError:
                # Most likely, some data landed in here or we hit an error
                # above. Let the replicator deal with things; it was worth
                # a shot.
                pass

    # Then mark this part as done, in case the process is interrupted and
    # needs to resume.
    states["state"][part] = True
    with open(state_tmp_file, 'wt') as f:
        json.dump(states, f)
        os.fsync(f.fileno())
    os.rename(state_tmp_file, state_file)
Пример #11
0
    def hook_post_partition(self, partition_path):
        datadir_path, partition = os.path.split(
            os.path.abspath(partition_path))
        device_path, datadir_name = os.path.split(datadir_path)
        device = os.path.basename(device_path)
        state_tmp_file = os.path.join(
            device_path, STATE_TMP_FILE.format(datadir=datadir_name))
        state_file = os.path.join(
            device_path, STATE_FILE.format(datadir=datadir_name))

        # We started with a partition space like
        #   |0              N|
        #   |ABCDEFGHIJKLMNOP|
        #
        # After relinking, it will be more like
        #   |0                             2N|
        #   |AABBCCDDEEFFGGHHIIJJKKLLMMNNOOPP|
        #
        # We want to hold off on rehashing until after cleanup, since that is
        # the point at which we've finished with filesystem manipulations. But
        # there's a slight complication: we know the upper half has nothing to
        # clean up, so the cleanup phase only looks at
        #   |0                             2N|
        #   |AABBCCDDEEFFGGHH                |
        #
        # To ensure that the upper half gets rehashed, too, do it as part of
        # relinking; as we finish
        #   |0              N|
        #   |        IJKLMNOP|
        # shift to the new partition space and rehash
        #   |0                             2N|
        #   |                IIJJKKLLMMNNOOPP|
        for dirty_partition in self.linked_into_partitions:
            if self.do_cleanup or \
                    dirty_partition >= 2 ** self.states['part_power']:
                self.diskfile_mgr.get_hashes(
                    device, dirty_partition, [], self.policy)

        if self.do_cleanup:
            hashes = self.diskfile_mgr.get_hashes(
                device, int(partition), [], self.policy)
            # In any reasonably-large cluster, we'd expect all old
            # partitions P to be empty after cleanup (i.e., it's unlikely
            # that there's another partition Q := P//2 that also has data
            # on this device).
            #
            # Try to clean up empty partitions now, so operators can use
            # existing rebalance-complete metrics to monitor relinking
            # progress (provided there are few/no handoffs when relinking
            # starts and little data is written to handoffs during the
            # increase).
            if not hashes:
                with lock_path(partition_path):
                    # Same lock used by invalidate_hashes, consolidate_hashes,
                    # get_hashes
                    try:
                        for f in ('hashes.pkl', 'hashes.invalid', '.lock'):
                            try:
                                os.unlink(os.path.join(partition_path, f))
                            except OSError as e:
                                if e.errno != errno.ENOENT:
                                    raise
                    except OSError:
                        pass
                try:
                    os.rmdir(partition_path)
                except OSError:
                    # Most likely, some data landed in here or we hit an error
                    # above. Let the replicator deal with things; it was worth
                    # a shot.
                    pass

        # If there were no errors, mark this partition as done. This is handy
        # in case the process is interrupted and needs to resume, or there
        # were errors and the relinker needs to run again.
        if self.pre_partition_errors == self.total_errors:
            self.states["state"][partition] = True
            with open(state_tmp_file, 'wt') as f:
                json.dump(self.states, f)
                os.fsync(f.fileno())
            os.rename(state_tmp_file, state_file)
        num_parts_done = sum(
            1 for part in self.states["state"].values()
            if part)
        step = STEP_CLEANUP if self.do_cleanup else STEP_RELINK
        num_total_parts = len(self.states["state"])
        self.logger.info(
            "Step: %s Device: %s Policy: %s Partitions: %d/%d",
            step, device, self.policy.name, num_parts_done, num_total_parts)
        self._update_recon(device)