Пример #1
0
def diff_file_stats(file_stats, history_entries, groupids, slog):
    with slog.time("group history by path") as rt:
        history_by_gpath = group_history_by_gpath(history_entries)
        rt.set_result({"path count": len(history_by_gpath)})

    with slog.time("compare to latest history"):
        for (rpath, size, mtime) in file_stats:
            groupid = groupids.from_root(rpath.root)
            if groupid is None:
                slog.ignored_rpath_without_groupid(rpath)
            else:
                gpath = (groupid, rpath.rel)
                history = history_by_gpath.pop(gpath, None)
                if history is None:
                    yield FileDiff.created(gpath, rpath, size, mtime, None)
                else:
                    latest = history.latest
                    if latest.size != size \
                           or not mtimes_eq(latest.mtime, mtime):
                        yield FileDiff.changed(gpath, rpath, size, mtime, None)
                    else:
                        pass # unchanged

    with slog.time("find missing paths"):
        for missing_gpath, missing_history in history_by_gpath.iteritems():
            if not missing_history.latest.deleted:
                (groupid, path) = missing_gpath
                root = groupids.to_root(groupid)
                if root is None:
                    slog.ignored_gpath_without_root(missing_gpath)
                else:
                    yield FileDiff.deleted(
                        missing_gpath, RootedPath(root, path),
                        DELETED_SIZE, DELETED_MTIME, "")
Пример #2
0
def scan_and_update_history(fs, fs_root, root_mark, path_filter, hash_type,
                            history_store, peerid, groupids, clock, slog):
    with slog.time("read history") as rt:
        history_entries = history_store.read_entries(peerid)
        rt.set_result({"history entries": len(history_entries)})

    with slog.time("scan files") as rt:
        file_stats = list(fs.list_stats(
            fs_root, root_mark, names_to_ignore = path_filter.names_to_ignore))
        rt.set_result({"file stats": len(file_stats)})

    with slog.time("diff file stats") as rt:
        fdiffs = diff_file_stats(file_stats, history_entries, groupids, slog)
        ignored_fdiffs, fdiffs = partition(fdiffs,
            lambda fdiff: path_filter.ignore_path(fdiff.rpath.full))
        slog.ignored_rpaths(fdiff.rpath for fdiff in ignored_fdiffs)
        rt.set_result({"file diffs": len(fdiffs)})

    with slog.time("hash files") as rt:
        hashed_fdiffs = list(hash_file_diffs(fs, fdiffs, hash_type, slog))
        rt.set_result({"hashed file diffs": len(hashed_fdiffs)})

    # We rescan the files to make sure they are stable.  We might
    # decided to do this before hashing if there are lots of big
    # unstable files.  But I think we'll usually be stable.
    with slog.time("rescan files") as rt:
        rescan_stats = list(fs.stats(
            (fdiff.rpath for fdiff in hashed_fdiffs)))
        rt.set_result({"rescanned file stats": len(rescan_stats)})

    with slog.time("check change stability") as rt:
        rescan_stats_by_rpath = dict((rpath, (size, mtime))
                                     for rpath, size, mtime in file_stats)

        def is_stable(fdiff):
            (rescan_size, rescan_mtime) = rescan_stats_by_rpath.get(
                fdiff.rpath, (DELETED_SIZE, DELETED_MTIME))
            return fdiff.size == rescan_size and \
                   mtimes_eq(fdiff.mtime, rescan_mtime)

        stable_fdiffs, unstable_fdiffs = partition(hashed_fdiffs, is_stable)
        rt.set_result({"stable file diffs": len(stable_fdiffs),
                       "unstable file diffs": len(unstable_fdiffs)})

    with slog.time("insert new history entries"):
        new_entries = list(new_history_entries_from_file_diffs(
            stable_fdiffs, peerid, clock))
        if new_entries:
            history_store.add_entries(new_entries)

    # Techincally, we don't have to do this, but it's nice to log this
    # after every scan.
    with slog.time("reread history") as rt:
        history_entries = history_store.read_entries(peerid)
        history_by_gpath = group_history_by_gpath(history_entries)
        total_size = sum(history.latest.size for history in
                         history_by_gpath.itervalues())
        rt.set_result({"path count": len(history_by_gpath),
                       "total size": total_size})

    return history_entries