def build_hash(target, width=None): hasher = hashlib.md5() if target.stat.st_size == 0: return 'd41d8cd98f00b204e9800998ecf8427e' # doesn't just speed up zero byte files. Unending files are also reported as 0 bytes long display = log.is_default and isatty(sys.stdout) and (target.stat.st_size > 2**20) if display: if width == None: width = hashdb_progress.find_terminal_width() total = 0 try: if display: print '\x1B[?25l\r', # hide cursor print hashdb_progress.build_progress(total, target.stat.st_size, width) + '\r', with open(target.true, 'rb') as f: while True: data = f.read(2**17) if not data: break hasher.update(data) if display: total += len(data) print hashdb_progress.build_progress(total, target.stat.st_size, width) + '\r', if display: print hashdb_progress.build_progress(target.stat.st_size, target.stat.st_size, width) + '\r', print '\x1B[?25h' # display cursor return hasher.hexdigest() except OSError, ex: log.warning('warning: Unable to hash file %r: %s' % (target.user, ex))
def parse_config_files(filenames=None, settings=None, override=False): if settings != None: filenames = settings.configs if (filenames == None) else filenames + settings.configs if filenames == CONFIG_CONFIGS: todo = [(f,True) for f in filenames] else: todo = [(f,False) for f in filenames] done = set() found = False subsettings = ConfigSettings() while len(todo) != 0: filename, is_default = todo.pop() try: with open(filename, 'rt') as fp: stat = os.fstat(fp.fileno()) if (stat.st_dev, stat.st_ino) not in done: done.add((stat.st_dev, stat.st_ino)) parse_config_file(fp, subsettings, False) todo.extend([(f,False) for f in subsettings.configs]) except OSError, ex: if not is_default: log.warning('warning: unable to open/process config file (%s): %s' % (filename, ex)) except IOError, ex: if not is_default: log.warning('warning: unable to open/process config file (%s): %s' % (filename, ex))
def add_target(self, root): try: root_user = root root_true = MountEntries().truepath(root) root_stat = os.lstat(root_true) self._targets.append(Walker.Target(root_true, root_user, root_stat)) except OSError, ex: log.warning('warning: unable to stat %r: %s' % (root, ex))
def parse_config_updatedb(filename=None, settings=None, override=False): if (filename == None) and (settings != None): filename = settings.updatedb if filename == None: return settings if settings == None: settings = ConfigSettings() if filename == None: return settings re_namevalue = re.compile( r''' ^ # begining of line \s* # whitespace (?P<n>[a-zA-Z_]+) # name \s* # whitespace = # = \s* # whitespace "(?P<v>.*?)" # quoted value .* # whitespace/garbage/ignored $ # end of string ''', re.VERBOSE ) parse_mappings = { 'prunefs' : ('skip_fstypes' , parse_text__filenames), 'prunenames' : ('skip_names' , parse_text__filenames), 'prunepaths' : ('skip_paths' , parse_text__filenames), 'prune_bind_mounts' : ('skip_binds' , parse_text__boolean), } try: with open(filename, "rt") as f: for name, value in [(m.group('n').lower(), m.group('v')) for m in [re_namevalue.match(line) for line in f.readlines()] if m != None]: if name not in parse_mappings: log.warning('warning: unknown setting (%s) in updatedb config (%s)' % (name, filename)) else: target, fparse = parse_mappings[name] settings.set(target, fparse(value), override) except IOError, ex: log.warning('warning: unable to open updatedb config file (%s): %s' % (filename, ex)) print >> stderr, 'warning: %s' % ex
def walk(self): '''Walks the directory tree specified by targets, yielding all accessible regular files as Walker.Target objects''' # compile all the information required to walk the targets targets = set(self._targets) fskip_fstype = self.build_fskip_globs(self._skip_fstypes) fskip_path = self.build_fskip_globs(self._skip_paths) fskip_name = self.build_fskip_globs(self._skip_names) fskip_dirname = self.build_fskip_globs(self._skip_dirnames) fskip_filename = self.build_fskip_globs(self._skip_filenames) fskip_access = None skip_binds = self._skip_binds skip_mounts = self._skip_mounts skip_symlinks = self._skip_symlinks is_linuxy = False mounts = MountEntries() if platform.system() != 'Windows': is_linuxy = True def fskip_access(target): access = stat.S_IMODE(target.stat.st_mode) if access & (stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) == 0: # usr, grp, oth : no access return True elif target.stat.st_uid == euid: # usr if access & stat.S_IRUSR == 0: return True elif access & (stat.S_IRGRP | stat.S_IROTH) == 0: # grp, oth : no access return True elif target.stat.st_gid in groups: # grp if access & stat.S_IRGRP == 0: return True elif access & (stat.S_IROTH) == 0: # oth : no access return True # Note that this may return some erronious negatives. # The objective is to try to avoid access errors, not prevent them entirely # Note that we could also use os.access, but that would cause additional # os.stat calls, and we want speed return False fskip_access.func_globals['uid'] = os.getuid() fskip_access.func_globals['euid'] = os.geteuid() fskip_access.func_globals['gid'] = os.getgid() fskip_access.func_globals['egid'] = os.getegid() fskip_access.func_globals['groups'] = os.getgroups() if os.geteuid() == 0: fskip_access = None # No need to skip things if we are root try: todo = deque() dirs = deque() if self.walk_depth: fappend = dirs.appendleft def fdone(): todo.extendleft(dirs) dirs.clear() else: fappend = todo.append def fdone(): pass for target in targets: log.verbose(PREFIX_ROOT + '%s (root)' % target.user) if stat.S_ISREG(target.stat.st_mode): yield target if stat.S_ISDIR(target.stat.st_mode): todo.clear() todo.append((target, [(target.stat.st_ino, target.stat.st_dev)])) while True: try: target, nodes = todo.popleft() except IndexError, _: break # Reached the last element in the list try: filelist = os.listdir(target.true) filelist.sort() except OSError, ex: log.warning('warning: Unable to list target %r: %s' % (target.user, ex)) continue for name in filelist: child = Walker.Target( os.path.join(target.true, name), os.path.join(target.user, name), None) # skip name? if fskip_name and fskip_name(name): if log.is_debug: log.debug(PREFIX_SKIP + '%s (skip_name)' % child.user) continue # skip path? if fskip_path and fskip_path(child.user): if log.is_debug: log.debug(PREFIX_SKIP + '%s (skip_path)' % child.user) continue # stat try: child = child._replace( stat=os.lstat(child.true)) except OSError, ex: log.warning('warning: Unable to lstat %r: %s' % (child.user, ex)) if log.is_debug: log.debug(PREFIX_SKIP + '%r (failed lstat)' % child.user) continue # recursive loop? if (child.stat.st_ino, child.stat.st_dev) in nodes: log.debug( PREFIX_SKIP + '%r (loop chain detected)' % child.user) continue # check access if fskip_access and fskip_access(child): log.debug(PREFIX_SKIP + '%r (no access)' % child.user) continue # resolve symlinks if stat.S_ISLNK(child.stat.st_mode): if skip_symlinks: log.debug( PREFIX_SKIP + '%r (skip_symlinks)' % child.user) continue log.debug(PREFIX_SYM + '%s (sym link)' % child.user) try: child = child._replace( true=mounts.truepath(child.true)) child = child._replace( stat=os.lstat(child.true)) except OSError, ex: log.warning( 'warning: Unable to read symlink target %r: %s' % (child.user, ex)) if log.is_debug: log.debug( PREFIX_SKIP + '%r (failed to read symlink target)' % child.user) continue # recursive loop? if (child.stat.st_ino, child.stat.st_dev) in nodes: log.debug(PREFIX_SKIP + '%r (loop chain detected)' % child.user) continue # check access if fskip_access != None and fskip_access( child): log.debug(PREFIX_SKIP + '%r (no access)' % child.user) continue # Need to recalculate child.true/.. parent_stat = None else: parent_stat = target.stat # regular file? if stat.S_ISREG(child.stat.st_mode): # skip filename? if fskip_filename and fskip_filename(name): if log.is_debug: log.debug( PREFIX_SKIP + '%r (skip_filename)' % child.user) continue if log.is_verbose: log.verbose( PREFIX_REG + '%s (regular file)' % child.user) yield child continue # directory? if stat.S_ISDIR(child.stat.st_mode): # skip dirname? if fskip_dirname and fskip_dirname(name): if log.is_debug: log.debug( PREFIX_SKIP + '%s (skip_dirname)' % child.user) continue # is bind? ToDo: Should this check be in a loop for bind chains? if mounts.is_bind(child.true): # skip binds? if skip_binds: if log.is_debug: log.debug( PREFIX_SKIP + '%s (skip_binds)' % child.user) continue log.debug(PREFIX_BIND + '%s (bind mount)' % child.user) try: child = child._replace( true=mounts.truepath(child.true)) child = child._replace( stat=os.lstat(child.true)) except OSError, ex: log.warning( 'warning: Unable to read bind target %r: %s' % (child.user, ex)) if log.is_debug: log.debug( PREFIX_SKIP + '%r (failed to read bind target)' % (child.user, ex)) continue # recursive loop? if (child.stat.st_ino, child.stat.st_dev) in nodes: log.debug(PREFIX_SKIP + '%r (loop chain detected)' % child.user) continue # check access if fskip_access != None and fskip_access( child): log.debug( PREFIX_SKIP + '%r (no access)' % child.user) continue parent_stat = None # get parent stat if is_linuxy and parent_stat == None: try: parent_stat = os.lstat( os.path.join(child.true, '..')) except OSError, ex: log.warning( 'warning: Unable to read parent %r: %s' % (os.path.join(child.user, '..'), ex)) # is mount? keep = None if (is_linuxy and (parent_stat != None))\ and ((parent_stat.st_dev != child.stat.st_dev)\ or (parent_stat.st_ino == child.stat.st_ino)): # skip mounts? if skip_mounts: log.debug( PREFIX_SKIP + '%r (skip_mounts)' % child.user) continue # skip fstype? if fskip_fstype: # find fstype, updating mount points if required fstype = mounts.get_fstype(child.true) ##if fstype == None: ## mounts = MountEntries() ## fstype = mounts.get_fstype(child.true) if fstype == None: log.warning( 'warning: Unable to resolve mount fstype %r' % child.user) log.debug( PREFIX_SKIP + '%s (failed to resolve mount fstype)' % child.user) continue mounts = mounts if fskip_fstype(fstype.type): if log.is_debug: log.debug(PREFIX_SKIP + '%s (skip_fstype)' % child.user) continue # directory if log.is_verbose: log.verbose(PREFIX_DIR + '%s (directory)' % child.user) # put directory in the todo list fappend((child, nodes + [(child.stat.st_ino, child.stat.st_dev)])) continue
while True: data = f.read(2**17) if not data: break hasher.update(data) if display: total += len(data) print hashdb_progress.build_progress(total, target.stat.st_size, width) + '\r', if display: print hashdb_progress.build_progress(target.stat.st_size, target.stat.st_size, width) + '\r', print '\x1B[?25h' # display cursor return hasher.hexdigest() except OSError, ex: log.warning('warning: Unable to hash file %r: %s' % (target.user, ex)) except IOError, ex: log.warning('warning: Unable to hash file %r: %s' % (target.user, ex)) if __name__ == '__main__': import hashdb_walk w = hashdb_walk.Walker() w.add_target('test/Burn.Notice.S04E18.Last.Stand.HDTV.XviD-FQM.[VTV].avi') hash = build_hash(w._targets[0]) print '\x1B[0K' + hash + ' %s' % w._targets[0].user # clear line and display has
def parse_config_file(fp, settings=None, override=True): re_namevalue = re.compile( r''' ^ # begining of line \s* # whitespace (?P<n>[a-zA-Z_]+) # name \s* # whitespace = # = \s* # whitespace (?P<v>.*?) # value \s* # whitespace $ # end of string ''', re.VERBOSE ) re_skip = re.compile( r''' ^ (?: \s* | (?: ; | \# | // ) .* ) $ ''', re.VERBOSE ) # name: (target, parser, default (for first argument of fcombine), fcombine) parse_mappings = { 'verbosity' : ('verbosity' , parse_text__verbosity, None, lambda x,y: y), 'config' : ('configs' , parse_text__filename, [], lambda x,y: x + [y]), 'use_updatedb' : ('updatedb' , parse_text__boolean, None, lambda x,y: CONFIG_UPDATEDB), 'updatedb' : ('updatedb' , parse_text__filename, None, lambda x,y: y), 'walk_depth' : ('walk_depth' , parse_text__boolean, None, lambda x,y: y), 'database' : ('database' , parse_text__filename, None, lambda x,y: y), 'databases_local' : ('databases_locals' , parse_text__filename, [], lambda x,y: x + [y]), 'databases_remote': ('databases_remotes', parse_text__filename, [], lambda x,y: x + [y]), 'databases_join' : ('databases_joins' , parse_text__filenames, [], lambda x,y: x + [CombineDB(*y)]), # Todo: make this a proper parse with decent error reporting 'skip_mounts' : ('skip_mounts' , parse_text__boolean, None, lambda x,y: y), 'skip_binds' : ('skip_binds' , parse_text__boolean, None, lambda x,y: y), 'skip_symlinks' : ('skip_symlinks' , parse_text__boolean, None, lambda x,y: y), 'skip_fstype' : ('skip_fstypes' , parse_text__filename, [], lambda x,y: x + [y]), 'skip_path' : ('skip_paths' , parse_text__filename, [], lambda x,y: x + [y]), 'skip_name' : ('skip_names' , parse_text__filename, [], lambda x,y: x + [y]), 'skip_dirname' : ('skip_dirnames' , parse_text__filename, [], lambda x,y: x + [y]), 'skip_filename' : ('skip_filenames' , parse_text__filename, [], lambda x,y: x + [y]), 'skip_fstypes' : ('skip_fstypes' , parse_text__filenames, [], lambda x,y: x + y), 'skip_paths' : ('skip_paths' , parse_text__filenames, [], lambda x,y: x + y), 'skip_names' : ('skip_names' , parse_text__filenames, [], lambda x,y: x + y), 'skip_dirnames' : ('skip_dirnames' , parse_text__filenames, [], lambda x,y: x + y), 'skip_filenames' : ('skip_filenames' , parse_text__filenames, [], lambda x,y: x + y), ## 'hash_definitive' : ('hash_definitive' , parse_text__boolean, None, lambda x,y: y), ## 'hash_force' : ('hash_force' , parse_text__boolean, None, lambda x,y: y), ## 'hash_target' : ('hash_targets' , parse_text__filename, [], lambda x,y: x + [y]), ## 'hash_targets' : ('hash_targets' , parse_text__filenames, [], lambda x,y: x + y), ## ## 'match_verify' : ('match_verify' , parse_text__boolean, None, lambda x,y: y), ## 'match_target' : ('match_targets' , parse_text__filename, [], lambda x,y: x + [y]), ## 'match_targets' : ('match_targets' , parse_text__filenames, [], lambda x,y: x + y), ## ## 'view_local' : ('view_locals' , parse_text__filename, [], lambda x,y: x + [y]), ## 'view_locals' : ('view_locals' , parse_text__filenames, [], lambda x,y: x + y), ## 'view_remote' : ('view_remotes' , parse_text__filename, [], lambda x,y: x + [y]), ## 'view_remotes' : ('view_remotes' , parse_text__filenames, [], lambda x,y: x + y), ## 'view_database' : ('view_databases' , parse_text__filename, [], lambda x,y: x + [y]), ## 'view_databases' : ('view_databases' , parse_text__filenames, [], lambda x,y: x + y), ## ## 'query_sql' : ('query_sql' , parse_text__verbatim, None, lambda x,y: y), ## ## 'cmd' : ('cmd' , parse_text__choice(['hash','match','view','query','cmd']), None, lambda x,y: y), } if settings == None: settings = ConfigSettings() for lineno, line in enumerate(fp): match = re_namevalue.match(line) if not match: match = re_skip.match(line) if not match: log.warning('warning: invalid line in config file (lineno: %d)' % lineno) continue name = match.group('n').lower() value = match.group('v') if name not in parse_mappings: log.warning('warning: unknown setting (%s) in config file (%s)' % (name, filename)) else: target, fparse, default, fcombine = parse_mappings[name] try: value = fparse(value) except Exception, ex: log.warning('warning: invalid setting (%s) for (%s) in config file (%s)' % (value, name, filename)) if override or settings.is_default(target): settings[target] = fcombine(settings.get(target, default) if ((not override) or settings.is_default(target)) else default, value)