Exemplo n.º 1
0
def compare_recursive(src_rp, dest_rp,
                      compare_hardlinks=True,
                      exclude_rbdir=True,
                      ignore_tmp_files=False,
                      compare_ownership=False,
                      compare_eas=False,
                      compare_acls=False):
    """Compare src_rp and dest_rp, which can be directories

    This only compares file attributes, not the actual data.  This
    will overwrite the hardlink dictionaries if compare_hardlinks is
    specified.

    """

    Log(
        "Comparing %s and %s, hardlinks %s, eas %s, acls %s" %
        (src_rp.get_safepath(), dest_rp.get_safepath(), compare_hardlinks,
         compare_eas, compare_acls), 3)
    if compare_hardlinks:
        reset_hardlink_dicts()
    src_iter, dest_iter = _get_selection_functions(
        src_rp, dest_rp,
        exclude_rbdir=exclude_rbdir,
        ignore_tmp_files=ignore_tmp_files)
    for src_rorp, dest_rorp in rorpiter.Collate2Iters(src_iter, dest_iter):
        if not _files_rorp_eq(src_rorp, dest_rorp,
                              compare_hardlinks=compare_hardlinks,
                              compare_ownership=compare_ownership,
                              compare_eas=compare_eas,
                              compare_acls=compare_acls):
            return 0
    return 1
Exemplo n.º 2
0
def ListChangedSince(mirror_rp, inc_rp, restore_to_time):
    """List the changed files under mirror_rp since rest time

    Notice the output is an iterator of RORPs.  We do this because we
    want to give the remote connection the data in buffered
    increments, and this is done automatically for rorp iterators.
    Encode the lines in the first element of the rorp's index.

    """
    assert mirror_rp.conn is Globals.local_connection, "Run locally only"
    MirrorStruct.set_mirror_and_rest_times(restore_to_time)
    MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp)

    old_iter = MirrorStruct.get_mirror_rorp_iter(MirrorStruct._rest_time, 1)
    cur_iter = MirrorStruct.get_mirror_rorp_iter(MirrorStruct._mirror_time, 1)
    collated = rorpiter.Collate2Iters(old_iter, cur_iter)
    for old_rorp, cur_rorp in collated:
        if not old_rorp:
            change = "new"
        elif not cur_rorp:
            change = "deleted"
        elif old_rorp == cur_rorp:
            continue
        else:
            change = "changed"
        path_desc = (old_rorp and str(old_rorp) or str(cur_rorp))
        yield rpath.RORPath(("%-7s %s" % (change, path_desc), ))
    MirrorStruct.close_rf_cache()
Exemplo n.º 3
0
def compare_recursive(src_rp, dest_rp,
                      compare_hardlinks=True,
                      exclude_rbdir=True,
                      ignore_tmp_files=False,
                      compare_ownership=False,
                      compare_eas=False,
                      compare_acls=False):
    """Compare src_rp and dest_rp, which can be directories

    This only compares file attributes, not the actual data.  This
    will overwrite the hardlink dictionaries if compare_hardlinks is
    specified.

    """

    Log("Comparing {srp} and {drp}, hardlinks {chl}, "
        "eas {cea}, acls {cacl}".format(
            srp=src_rp, drp=dest_rp, chl=compare_hardlinks,
            cea=compare_eas, cacl=compare_acls), 3)
    if compare_hardlinks:
        reset_hardlink_dicts()
    src_iter, dest_iter = _get_selection_functions(
        src_rp, dest_rp,
        exclude_rbdir=exclude_rbdir,
        ignore_tmp_files=ignore_tmp_files)
    for src_rorp, dest_rorp in rorpiter.Collate2Iters(src_iter, dest_iter):
        if not _files_rorp_eq(src_rorp, dest_rorp,
                              compare_hardlinks=compare_hardlinks,
                              compare_ownership=compare_ownership,
                              compare_eas=compare_eas,
                              compare_acls=compare_acls):
            return 0
    return 1
Exemplo n.º 4
0
    def attach_files(cls, src_iter, mirror_rp, inc_rp, compare_time):
        """Attach data to all the files that need checking

        Return an iterator of repo rorps that includes all the files
        that may have changed, and has the fileobj set on all rorps
        that need it.

        """
        repo_iter = cls.init_and_get_iter(mirror_rp, inc_rp, compare_time)
        base_index = cls.mirror_base.index
        for src_rorp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter):
            index = src_rorp and src_rorp.index or mir_rorp.index
            if src_rorp and mir_rorp:
                if not src_rorp.isreg() and src_rorp == mir_rorp:
                    _log_success(src_rorp, mir_rorp)
                    continue  # They must be equal, nothing else to check
                if (src_rorp.isreg() and mir_rorp.isreg()
                        and src_rorp.getsize() == mir_rorp.getsize()):
                    fp = cls.rf_cache.get_fp(base_index + index, mir_rorp)
                    mir_rorp.setfile(fp)
                    mir_rorp.set_attached_filetype('snapshot')

            if mir_rorp:
                yield mir_rorp
            else:
                yield rpath.RORPath(index)  # indicate deleted mir_rorp
Exemplo n.º 5
0
 def compare_meta(cls, repo_iter):
     """Compare rorps (metadata only) quickly, return report iter"""
     src_iter = cls.get_select()
     for src_rorp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter):
         report = cls._get_basic_report(src_rorp, mir_rorp)
         if report:
             yield report
         else:
             cls._log_success(src_rorp, mir_rorp)
Exemplo n.º 6
0
 def _get_diffiter(self, new_iter, old_iter):
     """
     Iterate meta diffs of new_iter -> old_iter
     """
     for new_rorp, old_rorp in rorpiter.Collate2Iters(new_iter, old_iter):
         if not old_rorp:
             yield rpath.RORPath(new_rorp.index)
         elif not new_rorp or new_rorp.data != old_rorp.data:
             # exact compare here, can't use == on rorps
             yield old_rorp
Exemplo n.º 7
0
    def set_rorp_cache(cls, baserp, source_iter, for_increment):
        """
        Initialize cls.CCPP, the destination rorp cache

        for_increment should be true if we are mirror+incrementing,
        false if we are just mirroring.
        """
        dest_iter = cls._get_dest_select(baserp, for_increment)
        collated = rorpiter.Collate2Iters(source_iter, dest_iter)
        cls.CCPP = CacheCollatedPostProcess(
            collated, Globals.pipeline_max_length * 4, baserp)
Exemplo n.º 8
0
    def get_diffs(cls, target_iter):
        """Given rorp iter of target files, return diffs

        Here the target_iter doesn't contain any actual data, just
        attribute listings.  Thus any diffs we generate will be
        snapshots.

        """
        mir_iter = cls.subtract_indices(cls.mirror_base.index,
                                        cls.get_mirror_rorp_iter())
        collated = rorpiter.Collate2Iters(mir_iter, target_iter)
        return cls._get_diffs_from_collated(collated)
Exemplo n.º 9
0
def _iterate_meta_rfs(mirror_rp, inc_rp):
    """Yield RegressFile objects with extra metadata information added

    Each RegressFile will have an extra object variable .metadata_rorp
    which will contain the metadata attributes of the mirror file at
    regress_time.

    """
    raw_rfs = _iterate_raw_rfs(mirror_rp, inc_rp)
    collated = rorpiter.Collate2Iters(raw_rfs, _yield_metadata())
    for raw_rf, metadata_rorp in collated:
        raw_rf = longname.update_rf(raw_rf, metadata_rorp, mirror_rp,
                                    RegressFile)
        if not raw_rf:
            log.Log("Warning, metadata file has entry for path {pa}, "
                    "but there are no associated files.".format(
                        pa=metadata_rorp), log.WARNING)
            continue
        raw_rf.set_metadata_rorp(metadata_rorp)
        yield raw_rf
Exemplo n.º 10
0
    def compare_hash(cls, repo_iter):
        """Like above, but also compare sha1 sums of any regular files"""

        def hashes_changed(src_rp, mir_rorp):
            """Return 0 if their data hashes same, 1 otherwise"""
            verify_sha1 = map_hardlinks.get_hash(mir_rorp)
            if not verify_sha1:
                log.Log("Metadata file has no digest for mirror file {mf}, "
                        "unable to compare.".format(mf=mir_rorp), log.WARNING)
                return 0
            elif (src_rp.getsize() == mir_rorp.getsize()
                  and hash.compute_sha1(src_rp) == verify_sha1):
                return 0
            return 1

        src_iter = cls.get_select()
        for src_rp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter):
            report = cls._get_basic_report(src_rp, mir_rorp, hashes_changed)
            if report:
                yield report
            else:
                cls._log_success(src_rp, mir_rorp)
Exemplo n.º 11
0
    def yield_sub_rfs(self):
        """Return RestoreFiles under current RestoreFile (which is dir)"""
        if not self.mirror_rp.isdir() and not self.inc_rp.isdir():
            return
        if self.mirror_rp.isdir():
            mirror_iter = self._yield_mirrorrps(self.mirror_rp)
        else:
            mirror_iter = iter([])
        if self.inc_rp.isdir():
            inc_pair_iter = self.yield_inc_complexes(self.inc_rp)
        else:
            inc_pair_iter = iter([])
        collated = rorpiter.Collate2Iters(mirror_iter, inc_pair_iter)

        for mirror_rp, inc_pair in collated:
            if not inc_pair:
                inc_rp = self.inc_rp.new_index(mirror_rp.index)
                inc_list = []
            else:
                inc_rp, inc_list = inc_pair
            if not mirror_rp:
                mirror_rp = self.mirror_rp.new_index_empty(inc_rp.index)
            yield self.__class__(mirror_rp, inc_rp, inc_list)
Exemplo n.º 12
0
def CompareRecursive(src_rp, dest_rp, compare_hardlinks = 1,
					 equality_func = None, exclude_rbdir = 1,
					 ignore_tmp_files = None, compare_ownership = 0,
					 compare_eas = 0, compare_acls = 0):
	"""Compare src_rp and dest_rp, which can be directories

	This only compares file attributes, not the actual data.  This
	will overwrite the hardlink dictionaries if compare_hardlinks is
	specified.

	"""
	def get_selection_functions():
		"""Return generators of files in source, dest"""
		src_rp.setdata()
		dest_rp.setdata()
		src_select = selection.Select(src_rp)
		dest_select = selection.Select(dest_rp)

		if ignore_tmp_files:
			# Ignoring temp files can be useful when we want to check the
			# correctness of a backup which aborted in the middle.  In
			# these cases it is OK to have tmp files lying around.
			src_select.add_selection_func(src_select.regexp_get_sf(
				".*rdiff-backup.tmp.[^/]+$", 0))
			dest_select.add_selection_func(dest_select.regexp_get_sf(
				".*rdiff-backup.tmp.[^/]+$", 0))

		if exclude_rbdir: # Exclude rdiff-backup-data directory
			src_select.parse_rbdir_exclude()
			dest_select.parse_rbdir_exclude()

		return src_select.set_iter(), dest_select.set_iter()

	def preprocess(src_rorp, dest_rorp):
		"""Initially process src and dest_rorp"""
		if compare_hardlinks and src_rorp:
			Hardlink.add_rorp(src_rorp, dest_rorp)

	def postprocess(src_rorp, dest_rorp):
		"""After comparison, process src_rorp and dest_rorp"""
		if compare_hardlinks and src_rorp:
			Hardlink.del_rorp(src_rorp)

	def equality_func(src_rorp, dest_rorp):
		"""Combined eq func returns true iff two files compare same"""
		if not src_rorp:
			Log("Source rorp missing: " + str(dest_rorp), 3)
			return 0
		if not dest_rorp:
			Log("Dest rorp missing: " + str(src_rorp), 3)
			return 0
		if not src_rorp.equal_verbose(dest_rorp,
									  compare_ownership = compare_ownership):
			return 0
		if compare_hardlinks and not Hardlink.rorp_eq(src_rorp, dest_rorp):
			Log("Hardlink compare failure", 3)
			Log("%s: %s" % (src_rorp.index,
							Hardlink.get_inode_key(src_rorp)), 3)
			Log("%s: %s" % (dest_rorp.index,
							Hardlink.get_inode_key(dest_rorp)), 3)
			return 0
		if compare_eas and not eas_acls.ea_compare_rps(src_rorp, dest_rorp):
			Log("Different EAs in files %s and %s" %
				(src_rorp.get_indexpath(), dest_rorp.get_indexpath()), 3)
			return 0
		if compare_acls and not eas_acls.acl_compare_rps(src_rorp, dest_rorp):
			Log("Different ACLs in files %s and %s" %
				(src_rorp.get_indexpath(), dest_rorp.get_indexpath()), 3)
			return 0
		return 1

	Log("Comparing %s and %s, hardlinks %s, eas %s, acls %s" %
		(src_rp.path, dest_rp.path, compare_hardlinks,
		 compare_eas, compare_acls), 3)
	if compare_hardlinks: reset_hardlink_dicts()
	src_iter, dest_iter = get_selection_functions()
	for src_rorp, dest_rorp in rorpiter.Collate2Iters(src_iter, dest_iter):
		preprocess(src_rorp, dest_rorp)
		if not equality_func(src_rorp, dest_rorp): return 0
		postprocess(src_rorp, dest_rorp)
	return 1


	def rbdir_equal(src_rorp, dest_rorp):
		"""Like hardlink_equal, but make allowances for data directories"""
		if not src_rorp.index and not dest_rorp.index: return 1
		if (src_rorp.index and src_rorp.index[0] == 'rdiff-backup-data' and
			src_rorp.index == dest_rorp.index):
			# Don't compare dirs - they don't carry significant info
			if dest_rorp.isdir() and src_rorp.isdir(): return 1
			if dest_rorp.isreg() and src_rorp.isreg():
				# Don't compare gzipped files because it is apparently
				# non-deterministic.
				if dest_rorp.index[-1].endswith('gz'): return 1
				# Don't compare .missing increments because they don't matter
				if dest_rorp.index[-1].endswith('.missing'): return 1
		if compare_eas and not eas_acls.ea_compare_rps(src_rorp, dest_rorp):
			Log("Different EAs in files %s and %s" %
				(src_rorp.get_indexpath(), dest_rorp.get_indexpath()))
			return None
		if compare_acls and not eas_acls.acl_compare_rps(src_rorp, dest_rorp):
			Log("Different ACLs in files %s and %s" %
				(src_rorp.get_indexpath(), dest_rorp.get_indexpath()), 3)
			return None
		if compare_hardlinks:
			if Hardlink.rorp_eq(src_rorp, dest_rorp): return 1
		elif src_rorp.equal_verbose(dest_rorp,
									compare_ownership = compare_ownership):
			return 1
		Log("%s: %s" % (src_rorp.index, Hardlink.get_indicies(src_rorp, 1)), 3)
		Log("%s: %s" % (dest_rorp.index,
						Hardlink.get_indicies(dest_rorp, None)), 3)
		return None