Exemplo n.º 1
0
    def schema(self):
        # Setup logging
        log.setLevel(self.settings.verbosity)

        # Display configuration
        log.debug("%s" % self.settings)

        # Setup database
        log.debug("* setup database's...")
        db = HashDatabase2(self.settings.database)
        if not db.open():
            return

        yield "-- [Tables]"
        yield "-- " + ", ".join(
            row["name"]
            for row in db.connection.execute(
                "SELECT name,sql FROM sqlite_master WHERE (type='table') AND (substr(name,1,7) <> 'sqlite_') ORDER BY name"
            )
        )
        for row in db.connection.execute(
            "SELECT name,sql FROM sqlite_master WHERE (type='table') AND (substr(name,1,7) <> 'sqlite_') ORDER BY name"
        ):
            yield row["sql"]
        yield "-- [Databases]"
        for row in db.connection.execute("PRAGMA database_list"):
            yield "-- %s: %s" % (row[1], row[2])
Exemplo n.º 2
0
    def path_select_duplicates(self, path, hash, size):
        path = hashdb_sql.normalize(path)
        for select, paramaters in self.build_selects(
                    sources=[None, self.locals, self.remotes],
                    columns=HashDatabase2.RowDetailed._fields,
                    filters=[
                        'hash = :hash',
                        'size = :size',
                        'path <> :path',
                    ]
                ):
            paramaters['path'] = path
            paramaters['size'] = size
            paramaters['hash'] = hash

            log.debug('-- select')
            log.debug(select)
            log.debug('-- paramaters')
            log.debug(paramaters)

            for row in self.connection.execute(select, paramaters):
                yield HashDatabase2.RowDetailed(**row)
Exemplo n.º 3
0
    def walk(self):
        '''Walks the directory tree specified by targets, yielding all accessible regular files as Walker.Target objects'''

        # compile all the information required to walk the targets
        targets = set(self._targets)
        fskip_fstype = self.build_fskip_globs(self._skip_fstypes)
        fskip_path = self.build_fskip_globs(self._skip_paths)
        fskip_name = self.build_fskip_globs(self._skip_names)
        fskip_dirname = self.build_fskip_globs(self._skip_dirnames)
        fskip_filename = self.build_fskip_globs(self._skip_filenames)
        fskip_access = None
        skip_binds = self._skip_binds
        skip_mounts = self._skip_mounts
        skip_symlinks = self._skip_symlinks
        is_linuxy = False
        mounts = MountEntries()

        if platform.system() != 'Windows':
            is_linuxy = True

            def fskip_access(target):
                access = stat.S_IMODE(target.stat.st_mode)
                if access & (stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) == 0:
                    # usr, grp, oth : no access
                    return True
                elif target.stat.st_uid == euid:
                    # usr
                    if access & stat.S_IRUSR == 0:
                        return True
                elif access & (stat.S_IRGRP | stat.S_IROTH) == 0:
                    # grp, oth : no access
                    return True
                elif target.stat.st_gid in groups:
                    # grp
                    if access & stat.S_IRGRP == 0:
                        return True
                elif access & (stat.S_IROTH) == 0:
                    # oth : no access
                    return True

                # Note that this may return some erronious negatives.
                # The objective is to try to avoid access errors, not prevent them entirely
                # Note that we could also use os.access, but that would cause additional
                # os.stat calls, and we want speed
                return False

            fskip_access.func_globals['uid'] = os.getuid()
            fskip_access.func_globals['euid'] = os.geteuid()
            fskip_access.func_globals['gid'] = os.getgid()
            fskip_access.func_globals['egid'] = os.getegid()
            fskip_access.func_globals['groups'] = os.getgroups()

            if os.geteuid() == 0:
                fskip_access = None  # No need to skip things if we are root

        try:
            todo = deque()
            dirs = deque()

            if self.walk_depth:
                fappend = dirs.appendleft

                def fdone():
                    todo.extendleft(dirs)
                    dirs.clear()
            else:
                fappend = todo.append

                def fdone():
                    pass

            for target in targets:
                log.verbose(PREFIX_ROOT + '%s (root)' % target.user)

                if stat.S_ISREG(target.stat.st_mode):
                    yield target
                if stat.S_ISDIR(target.stat.st_mode):
                    todo.clear()
                    todo.append((target, [(target.stat.st_ino,
                                           target.stat.st_dev)]))
                    while True:
                        try:
                            target, nodes = todo.popleft()
                        except IndexError, _:
                            break  # Reached the last element in the list

                        try:
                            filelist = os.listdir(target.true)
                            filelist.sort()
                        except OSError, ex:
                            log.warning('warning: Unable to list target %r: %s'
                                        % (target.user, ex))
                            continue

                        for name in filelist:
                            child = Walker.Target(
                                os.path.join(target.true, name),
                                os.path.join(target.user, name), None)

                            # skip name?
                            if fskip_name and fskip_name(name):
                                if log.is_debug:
                                    log.debug(PREFIX_SKIP +
                                              '%s (skip_name)' % child.user)
                                continue

                            # skip path?
                            if fskip_path and fskip_path(child.user):
                                if log.is_debug:
                                    log.debug(PREFIX_SKIP +
                                              '%s (skip_path)' % child.user)
                                continue

                            # stat
                            try:
                                child = child._replace(
                                    stat=os.lstat(child.true))
                            except OSError, ex:
                                log.warning('warning: Unable to lstat %r: %s' %
                                            (child.user, ex))
                                if log.is_debug:
                                    log.debug(PREFIX_SKIP +
                                              '%r (failed lstat)' % child.user)
                                continue

                            # recursive loop?
                            if (child.stat.st_ino, child.stat.st_dev) in nodes:
                                log.debug(
                                    PREFIX_SKIP +
                                    '%r (loop chain detected)' % child.user)
                                continue

                            # check access
                            if fskip_access and fskip_access(child):
                                log.debug(PREFIX_SKIP +
                                          '%r (no access)' % child.user)
                                continue

                            # resolve symlinks
                            if stat.S_ISLNK(child.stat.st_mode):
                                if skip_symlinks:
                                    log.debug(
                                        PREFIX_SKIP +
                                        '%r (skip_symlinks)' % child.user)
                                    continue

                                log.debug(PREFIX_SYM +
                                          '%s (sym link)' % child.user)

                                try:
                                    child = child._replace(
                                        true=mounts.truepath(child.true))
                                    child = child._replace(
                                        stat=os.lstat(child.true))
                                except OSError, ex:
                                    log.warning(
                                        'warning: Unable to read symlink target %r: %s'
                                        % (child.user, ex))
                                    if log.is_debug:
                                        log.debug(
                                            PREFIX_SKIP +
                                            '%r (failed to read symlink target)'
                                            % child.user)
                                    continue

                                # recursive loop?
                                if (child.stat.st_ino,
                                        child.stat.st_dev) in nodes:
                                    log.debug(PREFIX_SKIP +
                                              '%r (loop chain detected)' %
                                              child.user)
                                    continue

                                # check access
                                if fskip_access != None and fskip_access(
                                        child):
                                    log.debug(PREFIX_SKIP +
                                              '%r (no access)' % child.user)
                                    continue

                                # Need to recalculate child.true/..
                                parent_stat = None
                            else:
                                parent_stat = target.stat

                            # regular file?
                            if stat.S_ISREG(child.stat.st_mode):
                                # skip filename?
                                if fskip_filename and fskip_filename(name):
                                    if log.is_debug:
                                        log.debug(
                                            PREFIX_SKIP +
                                            '%r (skip_filename)' % child.user)
                                    continue

                                if log.is_verbose:
                                    log.verbose(
                                        PREFIX_REG +
                                        '%s (regular file)' % child.user)

                                yield child
                                continue

                            # directory?
                            if stat.S_ISDIR(child.stat.st_mode):
                                # skip dirname?
                                if fskip_dirname and fskip_dirname(name):
                                    if log.is_debug:
                                        log.debug(
                                            PREFIX_SKIP +
                                            '%s (skip_dirname)' % child.user)
                                    continue

                                # is bind? ToDo: Should this check be in a loop for bind chains?
                                if mounts.is_bind(child.true):
                                    # skip binds?
                                    if skip_binds:
                                        if log.is_debug:
                                            log.debug(
                                                PREFIX_SKIP +
                                                '%s (skip_binds)' % child.user)
                                        continue

                                    log.debug(PREFIX_BIND +
                                              '%s (bind mount)' % child.user)

                                    try:
                                        child = child._replace(
                                            true=mounts.truepath(child.true))
                                        child = child._replace(
                                            stat=os.lstat(child.true))
                                    except OSError, ex:
                                        log.warning(
                                            'warning: Unable to read bind target %r: %s'
                                            % (child.user, ex))
                                        if log.is_debug:
                                            log.debug(
                                                PREFIX_SKIP +
                                                '%r (failed to read bind target)'
                                                % (child.user, ex))
                                        continue

                                    # recursive loop?
                                    if (child.stat.st_ino,
                                            child.stat.st_dev) in nodes:
                                        log.debug(PREFIX_SKIP +
                                                  '%r (loop chain detected)' %
                                                  child.user)
                                        continue

                                    # check access
                                    if fskip_access != None and fskip_access(
                                            child):
                                        log.debug(
                                            PREFIX_SKIP +
                                            '%r (no access)' % child.user)
                                        continue

                                    parent_stat = None

                                # get parent stat
                                if is_linuxy and parent_stat == None:
                                    try:
                                        parent_stat = os.lstat(
                                            os.path.join(child.true, '..'))
                                    except OSError, ex:
                                        log.warning(
                                            'warning: Unable to read parent %r: %s'
                                            % (os.path.join(child.user, '..'),
                                               ex))

                                # is mount?
                                keep = None
                                if  (is_linuxy and (parent_stat != None))\
                                and ((parent_stat.st_dev != child.stat.st_dev)\
                                or   (parent_stat.st_ino == child.stat.st_ino)):
                                    # skip mounts?
                                    if skip_mounts:
                                        log.debug(
                                            PREFIX_SKIP +
                                            '%r (skip_mounts)' % child.user)
                                        continue

                                    # skip fstype?
                                    if fskip_fstype:
                                        # find fstype, updating mount points if required
                                        fstype = mounts.get_fstype(child.true)
                                        ##if fstype == None:
                                        ##    mounts = MountEntries()
                                        ##    fstype = mounts.get_fstype(child.true)
                                        if fstype == None:
                                            log.warning(
                                                'warning: Unable to resolve mount fstype %r'
                                                % child.user)
                                            log.debug(
                                                PREFIX_SKIP +
                                                '%s (failed to resolve mount fstype)'
                                                % child.user)
                                            continue
                                        mounts = mounts

                                        if fskip_fstype(fstype.type):
                                            if log.is_debug:
                                                log.debug(PREFIX_SKIP +
                                                          '%s (skip_fstype)' %
                                                          child.user)
                                            continue

                                # directory
                                if log.is_verbose:
                                    log.verbose(PREFIX_DIR +
                                                '%s (directory)' % child.user)

                                # put directory in the todo list
                                fappend((child, nodes + [(child.stat.st_ino,
                                                          child.stat.st_dev)]))
                                continue
Exemplo n.º 4
0
                                                % child.user)
                                            continue
                                        mounts = mounts

                                        if fskip_fstype(fstype.type):
                                            if log.is_debug:
                                                log.debug(PREFIX_SKIP +
                                                          '%s (skip_fstype)' %
                                                          child.user)
                                            continue

                                # directory
                                if log.is_verbose:
                                    log.verbose(PREFIX_DIR +
                                                '%s (directory)' % child.user)

                                # put directory in the todo list
                                fappend((child, nodes + [(child.stat.st_ino,
                                                          child.stat.st_dev)]))
                                continue

                            if log.is_debug:
                                log.debug(PREFIX_SKIP +
                                          '%s (unknown st_mode %r)' %
                                          (child.user, child.stat))

                        fdone()

        except Exception, ex:
            log.exception('Unexpected exception!')
Exemplo n.º 5
0
#!/usr/bin/python

from hashdb_output import log

try:
    from hashdb_mntent import mntent, setmntent, getmntent_r, endmntent
    import ctypes
except:
    log.debug('failed to load mntent stubs')
    setmntent = lambda filename: None

import os
from collections import namedtuple

MountEntry = namedtuple('MountEntry', 'fsname dir type opts freq passno')

def enum_mntent(filename):
    stream = setmntent(filename)
    if not stream:
        return []

    try:
        buf    = ctypes.create_string_buffer(4095)
        mount  = mntent()
        results = []
        while getmntent_r(stream, mount, ctypes.byref(buf), ctypes.sizeof(buf) - 1):
            results.append(MountEntry(
                mount.mnt_fsname,
                mount.mnt_dir,
                mount.mnt_type,
                set(mount.mnt_opts.split(',')),
Exemplo n.º 6
0
    def match(self):
        # Record matches (hash,size) pairs
        matches_done = set()

        # Iterate over targets
        try:
            for target, hash, db in self.hash(targets=self.settings.match_targets):
                matches = []

                # Find row data
                target_data = db.path_get_prime(target.true)
                if not target_data:
                    log.debug(PREFIX_SKIP + "%r (unable to find data)" % target.user)
                    continue

                # Already reported?
                if (target_data.hash, target_data.size) in matches_done:
                    log.debug(PREFIX_SKIP + "%r (already reported match)" % target.user)
                    continue
                matches_done.add((target_data.hash, target_data.size))

                # Search for duplicates
                for match_data in db.path_select_duplicates(
                    path=target_data.path, hash=target_data.hash, size=target_data.size
                ):
                    if not match_data.is_remote:
                        # local
                        if self.settings.match_verify:
                            try:
                                match_stat = os.lstat(match_data.path)
                            except OSError, ex:
                                log.debug(PREFIX_SKIP + "%r (unable to lstat match): %s" % (match_data.path, ex))
                                continue
                            except IOError, ex:
                                log.debug(PREFIX_SKIP + "%r (unable to lstat match): %s" % (match_data.path, ex))
                                continue

                            if not stat.S_ISREG(match_stat.st_mode):
                                log.debug(PREFIX_SKIP + "%r (not a regular file)" % match_data.path)
                                continue  # No longer a regular file

                            # Verify/update hash
                            match_hash = db.path_hash(match_data.path, match_stat)
                            if match_hash == None:
                                match_hash = build_hash(Walker.Target(match_data.path, match_data.path, match_stat))
                                if match_hash != None:
                                    db.path_setstat(match_data.path, match_stat, match_hash)
                            if match_hash == None:
                                log.debug(PREFIX_SKIP + "%r (unable to determine hash)" % match_data.path)
                                continue

                            # update data
                            match_data = match_data._replace(
                                hash=match_hash, size=match_stat.st_size, time=match_stat.st_mtime, mark=db.mark
                            )
                            if (match_data.hash != target_data.hash) or (match_data.size != target_data.size):
                                log.debug(PREFIX_SKIP + "%r (files no longer match)" % match_data.path)
                                continue  # skip if its no longer identical

                        log.verbose("comp %s" % match_data.path)
Exemplo n.º 7
0
    def hash(self, targets=None):
        # Setup logging
        log.setLevel(self.settings.verbosity)

        # Display configuration
        log.debug("%s" % self.settings)

        # Setup database
        log.debug("* setup database's...")
        db = HashDatabase2(self.settings.database)
        db.extend_locals(self.settings.databases_locals)
        if not db.open():
            return

        # Setup the walker
        log.debug("* setup walker...")
        walker = Walker()
        walker.walk_depth = self.settings.walk_depth
        walker.extend_targets(targets or self.settings.hash_targets)
        walker.extend_skip_fstypes(self.settings.skip_fstypes)
        walker.extend_skip_paths(self.settings.skip_paths)
        walker.extend_skip_names(self.settings.skip_names)
        walker.extend_skip_dirnames(self.settings.skip_dirnames)
        walker.extend_skip_filenames(self.settings.skip_filenames)
        walker.skip_mounts = self.settings.skip_mounts
        walker.skip_binds = self.settings.skip_binds
        walker.skip_symlinks = self.settings.skip_symlinks

        log.debug("* walk...")

        try:
            start_time = time.time()
            start_changes = db.connection.total_changes
            for target in walker.walk():
                target_hash = db.path_hash(target.true, target.stat)
                if target_hash == None:
                    target_hash = build_hash(target)
                    if target_hash != None:
                        db.path_setstat(target.true, target.stat, target_hash)
                if target_hash != None:
                    yield (target, target_hash, db)

                # Only commit every so often since we are limited by disk speed
                now = time.time()
                if ((now - start_time) >= THRESHHOLD_TIMEDELTA) or (
                    (db.connection.total_changes - start_changes) > THRESHHOLD_CHANGEDELTA
                ):
                    log.debug("* committing changes...")
                    db.connection.commit()
                start_time = time.time()
                start_changes = db.connection.total_changes
        finally:
            log.debug("* committing changes...")
            db.connection.commit()
Exemplo n.º 8
0
    def view(self):
        # Setup logging
        log.setLevel(self.setting_verbosity)

        # Display configuration
        display_settings(self.settings, log.debug)

        # Setup database
        log.debug("* setup database's...")
        db = HashDatabase(self.setting_database)
        db.add_combines(self.setting_combine)
        if not db.open():
            return

        # Read mounts (for truepath)
        mounts = MountEntries()

        # Build a query string, filtering on targets
        targets = [mounts.truepath(t) for t in self.setting_targets]
        qfilters = []
        qargmap = {}
        if ("/" not in targets) and ("\\" not in targets) and ("//" not in targets) and ("\\\\" not in targets):
            for i, target in enumerate(targets):
                target = mounts.truepath(target)
                qfilters.append(
                    r"""(path = :%(name)s) OR (substr(path, 1, :%(name)s_len + 1) = :%(name)s || '/')"""
                    % {"name": "t%02d" % i}
                )
                qargmap.update({"t%02d" % i: target, "t%02d_len" % i: len(target)})
        qfilter = (r"""WHERE """ + r""" OR """.join(qfilters)) if len(qfilters) != 0 else r""""""
        qorder = (
            r"""
            ORDER BY
                path,
                mark DESC
        """
            if self.setting_walk_depth
            else r"""
            ORDER BY
                count_components(path),
                path,
                mark DESC
        """
        )

        query = (
            r"""
            SELECT
                *
            FROM
                combinedtab
        """
            + qfilter
            + qorder
        )

        # yield all results as a HashRowData blob (don't expose the underlying row)
        for row in db.connection.execute(query, qargmap):
            yield (
                HashRowData(path=row["path"], hash=row["hash"], mark=row["mark"], time=row["time"], size=row["size"]),
                db,
            )