def __init__(self, settings=None, cmdline=False): object.__init__(self) self._settings = ConfigSettings() if settings: self._settings.update(settings) self._settings = parse_config(self._settings, cmdline)
class AppHashDB(object): _defaults = {} def __init__(self, settings=None, cmdline=False): object.__init__(self) self._settings = ConfigSettings() if settings: self._settings.update(settings) self._settings = parse_config(self._settings, cmdline) @property def settings(self): return self._settings def run(self): if self.settings.cmd == "hash": return self.run_hash() elif self.settings.cmd == "match": return self.run_match() elif self.settings.cmd == "view": return self.run_view() elif self.settings.cmd == "query": return self.run_query() elif self.settings.cmd == "schema": return self.run_schema() def run_schema(self): for x in self.schema(): log.default(x) def schema(self): # Setup logging log.setLevel(self.settings.verbosity) # Display configuration log.debug("%s" % self.settings) # Setup database log.debug("* setup database's...") db = HashDatabase2(self.settings.database) if not db.open(): return yield "-- [Tables]" yield "-- " + ", ".join( row["name"] for row in db.connection.execute( "SELECT name,sql FROM sqlite_master WHERE (type='table') AND (substr(name,1,7) <> 'sqlite_') ORDER BY name" ) ) for row in db.connection.execute( "SELECT name,sql FROM sqlite_master WHERE (type='table') AND (substr(name,1,7) <> 'sqlite_') ORDER BY name" ): yield row["sql"] yield "-- [Databases]" for row in db.connection.execute("PRAGMA database_list"): yield "-- %s: %s" % (row[1], row[2]) def run_match(self): for target, target_data, matches, db in self.match(): log.default("* %s" % target.user) log.default(" %s" % target_data.path) matches.sort() for match in matches: if not match.is_remote: log.default(" %s" % match.path) else: log.default(" %s:%s" % (match.source, match.path)) def run_view(self): try: timeformat = locale.nl_langinfo(locale.D_T_FMT) # '%a %d %b %Y %T %Z' except: timeformat = "%Y-%m-%d %H:%M:%S" numpadd = len(locale.format("%d", 2 ** 31, True)) for row, db in self.view(): m = "*" if row.mark == 0 else " " h = row.hash s = locale.format("%d", row.size, True) t = time.strftime(timeformat, time.localtime(row.time)) p = row.path log.default( # hash:mark time size path "%s%s %s %*s %s" % (h, m, t, numpadd, s, p) ) def view(self): # Setup logging log.setLevel(self.setting_verbosity) # Display configuration display_settings(self.settings, log.debug) # Setup database log.debug("* setup database's...") db = HashDatabase(self.setting_database) db.add_combines(self.setting_combine) if not db.open(): return # Read mounts (for truepath) mounts = MountEntries() # Build a query string, filtering on targets targets = [mounts.truepath(t) for t in self.setting_targets] qfilters = [] qargmap = {} if ("/" not in targets) and ("\\" not in targets) and ("//" not in targets) and ("\\\\" not in targets): for i, target in enumerate(targets): target = mounts.truepath(target) qfilters.append( r"""(path = :%(name)s) OR (substr(path, 1, :%(name)s_len + 1) = :%(name)s || '/')""" % {"name": "t%02d" % i} ) qargmap.update({"t%02d" % i: target, "t%02d_len" % i: len(target)}) qfilter = (r"""WHERE """ + r""" OR """.join(qfilters)) if len(qfilters) != 0 else r"""""" qorder = ( r""" ORDER BY path, mark DESC """ if self.setting_walk_depth else r""" ORDER BY count_components(path), path, mark DESC """ ) query = ( r""" SELECT * FROM combinedtab """ + qfilter + qorder ) # yield all results as a HashRowData blob (don't expose the underlying row) for row in db.connection.execute(query, qargmap): yield ( HashRowData(path=row["path"], hash=row["hash"], mark=row["mark"], time=row["time"], size=row["size"]), db, ) def run_hash(self): for target, hash, db in self.hash(): log.default("%s %s" % (hash, target.user)) def hash(self, targets=None): # Setup logging log.setLevel(self.settings.verbosity) # Display configuration log.debug("%s" % self.settings) # Setup database log.debug("* setup database's...") db = HashDatabase2(self.settings.database) db.extend_locals(self.settings.databases_locals) if not db.open(): return # Setup the walker log.debug("* setup walker...") walker = Walker() walker.walk_depth = self.settings.walk_depth walker.extend_targets(targets or self.settings.hash_targets) walker.extend_skip_fstypes(self.settings.skip_fstypes) walker.extend_skip_paths(self.settings.skip_paths) walker.extend_skip_names(self.settings.skip_names) walker.extend_skip_dirnames(self.settings.skip_dirnames) walker.extend_skip_filenames(self.settings.skip_filenames) walker.skip_mounts = self.settings.skip_mounts walker.skip_binds = self.settings.skip_binds walker.skip_symlinks = self.settings.skip_symlinks log.debug("* walk...") try: start_time = time.time() start_changes = db.connection.total_changes for target in walker.walk(): target_hash = db.path_hash(target.true, target.stat) if target_hash == None: target_hash = build_hash(target) if target_hash != None: db.path_setstat(target.true, target.stat, target_hash) if target_hash != None: yield (target, target_hash, db) # Only commit every so often since we are limited by disk speed now = time.time() if ((now - start_time) >= THRESHHOLD_TIMEDELTA) or ( (db.connection.total_changes - start_changes) > THRESHHOLD_CHANGEDELTA ): log.debug("* committing changes...") db.connection.commit() start_time = time.time() start_changes = db.connection.total_changes finally: log.debug("* committing changes...") db.connection.commit() def match(self): # Record matches (hash,size) pairs matches_done = set() # Iterate over targets try: for target, hash, db in self.hash(targets=self.settings.match_targets): matches = [] # Find row data target_data = db.path_get_prime(target.true) if not target_data: log.debug(PREFIX_SKIP + "%r (unable to find data)" % target.user) continue # Already reported? if (target_data.hash, target_data.size) in matches_done: log.debug(PREFIX_SKIP + "%r (already reported match)" % target.user) continue matches_done.add((target_data.hash, target_data.size)) # Search for duplicates for match_data in db.path_select_duplicates( path=target_data.path, hash=target_data.hash, size=target_data.size ): if not match_data.is_remote: # local if self.settings.match_verify: try: match_stat = os.lstat(match_data.path) except OSError, ex: log.debug(PREFIX_SKIP + "%r (unable to lstat match): %s" % (match_data.path, ex)) continue except IOError, ex: log.debug(PREFIX_SKIP + "%r (unable to lstat match): %s" % (match_data.path, ex)) continue if not stat.S_ISREG(match_stat.st_mode): log.debug(PREFIX_SKIP + "%r (not a regular file)" % match_data.path) continue # No longer a regular file # Verify/update hash match_hash = db.path_hash(match_data.path, match_stat) if match_hash == None: match_hash = build_hash(Walker.Target(match_data.path, match_data.path, match_stat)) if match_hash != None: db.path_setstat(match_data.path, match_stat, match_hash) if match_hash == None: log.debug(PREFIX_SKIP + "%r (unable to determine hash)" % match_data.path) continue # update data match_data = match_data._replace( hash=match_hash, size=match_stat.st_size, time=match_stat.st_mtime, mark=db.mark ) if (match_data.hash != target_data.hash) or (match_data.size != target_data.size): log.debug(PREFIX_SKIP + "%r (files no longer match)" % match_data.path) continue # skip if its no longer identical log.verbose("comp %s" % match_data.path) else: