def test_hashdelete_ok_str(self): """ If hashdelete gets a string argument, it should work """ try: paths = self.paths + " %s/hashnot" % self.hdir h = hpss.HSI(verbose=("verbose" in testhelp.testargs())) # make sure the hashables all have a checksum stored x = h.hashlist(self.plist) for path in self.plist: if util.rgxin("\(?none\)? %s" % path, x): h.hashcreate(path) # run hashdelete on the string result = h.hashdelete(paths) h.quit() # verify the results self.expected_in("hashdelete", result) for path in self.paths.split(): exp = "hash deleted: \(?md5\)? %s" % path self.expected_in(exp, result) exp = "hash deleted: \(?md5\)? %s/hashnot" % self.hdir self.assertFalse(util.rgxin(exp, result), "'%s' not expected in %s" % (exp, util.line_quote(result))) except hpss.HSIerror as e: if MSG.hpss_unavailable in str(e): pytest.skip(str(e))
def test_hashlist_ok_str(self): """ If hashlist gets a string argument, it should work """ try: paths = self.paths + " %s/hashnot" % self.hdir h = hpss.HSI(verbose=("verbose" in testhelp.testargs())) # make sure the hashables all have a checksum stored x = h.hashlist(self.plist) for path in self.plist: if util.rgxin("\(?none\)? %s" % path, x): h.hashcreate(path) # run the test payload result = h.hashlist(paths) h.quit() self.expected_in("hashlist", result) for path in self.paths.split(): exp = "\(?md5\)? %s" % path self.expected_in(exp, result) exp = "\(?none\)? %s/hashnot" % self.hdir self.expected_in(exp, result) except hpss.HSIerror as e: if MSG.hpss_unavailable in str(e): pytest.skip(str(e))
def test_hashdelete_ok_list(self): """ If hashdelete get a list argument, it should work """ try: plist = self.plist + [self.hdir + "/hashnot"] h = hpss.HSI(verbose=("verbose" in testhelp.testargs())) # make sure the hashables all have a checksum stored x = h.hashlist(self.plist) for path in self.plist: if util.rgxin("\(?none\)? %s" % path, x): h.hashcreate(path) # run hashdelete on the list result = h.hashdelete(plist) h.quit() # verify the results self.expected_in("hashdelete", result) for path in self.plist: self.expected_in("hash deleted: md5 %s" % path, result) exp = "\(?none\)? %s/hashnot" % self.hdir self.assertTrue(exp not in result, "'%s' not expected in %s" % (exp, util.line_quote(result))) except hpss.HSIerror as e: if MSG.hpss_unavailable in str(e): pytest.skip(str(e))
def test_rgxin(self): """ Routine rgxin(needle, haystack) is analogous to the Python expression "needle in haystack" with needle being a regexp. """ self.dbgfunc() rgx = "a\(?b\)?c" rgx2 = "(dog|fox|over)" fstring = "The quick brown fox jumps over the lazy dog" tstring1 = "Now we know our abc's" tstring2 = "With parens: a(b)c" self.assertTrue(U.rgxin(rgx, tstring1), "'%s' should match '%s'" % (rgx, tstring1)) self.assertTrue(U.rgxin(rgx, tstring2), "'%s' should match '%s'" % (rgx, tstring2)) self.assertFalse(U.rgxin(rgx, fstring), "'%s' should NOT match '%s'" % (rgx, fstring)) self.expected('abc', U.rgxin(rgx, tstring1)) self.expected('a(b)c', U.rgxin(rgx, tstring2)) self.expected('fox', U.rgxin(rgx2, fstring))
def main(cfg): """ Main entry point for the cv plugin """ # Get stuff we need -- the logger object, dataroot, etc. CrawlConfig.log("firing up") plugdir = cfg.get('crawler', 'plugin-dir') dataroot = util.csv_list(cfg.get(plugin_name, 'dataroot')) odds = cfg.getfloat(plugin_name, 'odds') n_ops = int(cfg.get(plugin_name, 'operations')) # Initialize our statistics (t_checksums, t_matches, t_failures) = get_stats() (checksums, matches, failures) = (0, 0, 0) # Fetch the list of HPSS objects that we're looking at from the # database try: clist = Checkable.Checkable.get_list(prob=odds, rootlist=dataroot) except CrawlDBI.DBIerror as e: if any([util.rgxin(msg, str(e)) for msg in ["no such table: checkables", "Table '.*' doesn't exist"]]): CrawlConfig.log("calling ex_nihilo") Checkable.Checkable.ex_nihilo(dataroot=dataroot) clist = Checkable.Checkable.get_list(prob=odds) else: raise except StandardError as e: if 'Please call .ex_nihilo()' in str(e): CrawlConfig.log("calling ex_nihilo") Checkable.Checkable.ex_nihilo(dataroot=dataroot) clist = Checkable.Checkable.get_list(prob=odds) else: raise # We're going to process n_ops things in the HPSS namespace for op in range(n_ops): # if the list from the database is empty, there's nothing to do if 0 < len(clist): # but it's not, so grab the first item and check it item = clist.pop(0) CrawlConfig.log("[%d] checking %s" % (item.rowid, item)) ilist = item.check() # Expected outcomes that check can return: # list of Checkables: read dir or checksummed files (may be empty) # Alert: checksum verify failed # 'access denied': unaccessible directory # 'matched': a checksum was verified # 'checksummed': file was checksummed # 'skipped': file was skipped # 'unavailable': HPSS is temporarily unavailable # StandardError: invalid Checkable type (not 'f' or 'd') # if type(ilist) == str: if ilist == "access denied": CrawlConfig.log("dir %s not accessible" % item.path) # clist.remove(item) elif ilist == "matched": matches += 1 CrawlConfig.log("%s checksums matched" % item.path) elif ilist == "checksummed": # checksums += 1 CrawlConfig.log("%s checksummed" % item.path) elif ilist == "skipped": CrawlConfig.log("%s skipped" % item.path) elif ilist == "unavailable": CrawlConfig.log("HPSS is not available") break else: CrawlConfig.log("unexpected string returned " + "from Checkable: '%s'" % ilist) elif type(ilist) == list: CrawlConfig.log("in %s, found:" % item) for n in ilist: CrawlConfig.log(">>> %s" % str(n)) if 'f' == n.type and n.checksum != 0: CrawlConfig.log(".. previously checksummed") # checksums += 1 elif isinstance(ilist, Checkable.Checkable): CrawlConfig.log("Checkable returned - file checksummed" + " - %s, %s" % (ilist.path, ilist.checksum)) # checksums += 1 elif isinstance(ilist, Alert.Alert): CrawlConfig.log("Alert generated: '%s'" % ilist.msg()) failures += 1 else: CrawlConfig.log("unexpected return val from " + "Checkable.check: %s: %r" % (type(ilist), ilist)) # Report the statistics in the log # ** For checksums, we report the current total minus the previous # ** For matches and failures, we counted them up during the iteration # ** See the description of get_stats for why we don't store total # checksums p_checksums = t_checksums t_matches += matches t_failures += failures cv_lib.update_stats((t_matches, t_failures)) (t_checksums, t_matches, t_failures) = get_stats() CrawlConfig.log("files checksummed: %d; " % (t_checksums - p_checksums) + "checksums matched: %d; " % matches + "failures: %d" % failures) CrawlConfig.log("totals checksummed: %d; " % t_checksums + "matches: %d; " % t_matches + "failures: %d" % t_failures) # Report the dimension data in the log d = Dimension.Dimension(name='cos') t = Dimension.Dimension(name='cart') CrawlConfig.log(d.report()) CrawlConfig.log(t.report()) return failures
def main(cfg): """ Main entry point for the cv plugin """ # Get stuff we need -- the logger object, dataroot, etc. CrawlConfig.log("firing up") plugdir = cfg.get('crawler', 'plugin-dir') dataroot = util.csv_list(cfg.get(plugin_name, 'dataroot')) odds = cfg.getfloat(plugin_name, 'odds') n_ops = int(cfg.get(plugin_name, 'operations')) # Initialize our statistics (t_checksums, t_matches, t_failures) = get_stats() (checksums, matches, failures) = (0, 0, 0) # Fetch the list of HPSS objects that we're looking at from the # database try: clist = Checkable.Checkable.get_list(prob=odds, rootlist=dataroot) except CrawlDBI.DBIerror as e: if any([ util.rgxin(msg, str(e)) for msg in ["no such table: checkables", "Table '.*' doesn't exist"] ]): CrawlConfig.log("calling ex_nihilo") Checkable.Checkable.ex_nihilo(dataroot=dataroot) clist = Checkable.Checkable.get_list(prob=odds) else: raise except StandardError as e: if 'Please call .ex_nihilo()' in str(e): CrawlConfig.log("calling ex_nihilo") Checkable.Checkable.ex_nihilo(dataroot=dataroot) clist = Checkable.Checkable.get_list(prob=odds) else: raise # We're going to process n_ops things in the HPSS namespace for op in range(n_ops): # if the list from the database is empty, there's nothing to do if 0 < len(clist): # but it's not, so grab the first item and check it item = clist.pop(0) CrawlConfig.log("[%d] checking %s" % (item.rowid, item)) ilist = item.check() # Expected outcomes that check can return: # list of Checkables: read dir or checksummed files (may be empty) # Alert: checksum verify failed # 'access denied': unaccessible directory # 'matched': a checksum was verified # 'checksummed': file was checksummed # 'skipped': file was skipped # 'unavailable': HPSS is temporarily unavailable # StandardError: invalid Checkable type (not 'f' or 'd') # if type(ilist) == str: if ilist == "access denied": CrawlConfig.log("dir %s not accessible" % item.path) # clist.remove(item) elif ilist == "matched": matches += 1 CrawlConfig.log("%s checksums matched" % item.path) elif ilist == "checksummed": # checksums += 1 CrawlConfig.log("%s checksummed" % item.path) elif ilist == "skipped": CrawlConfig.log("%s skipped" % item.path) elif ilist == "unavailable": CrawlConfig.log("HPSS is not available") break else: CrawlConfig.log("unexpected string returned " + "from Checkable: '%s'" % ilist) elif type(ilist) == list: CrawlConfig.log("in %s, found:" % item) for n in ilist: CrawlConfig.log(">>> %s" % str(n)) if 'f' == n.type and n.checksum != 0: CrawlConfig.log(".. previously checksummed") # checksums += 1 elif isinstance(ilist, Checkable.Checkable): CrawlConfig.log("Checkable returned - file checksummed" + " - %s, %s" % (ilist.path, ilist.checksum)) # checksums += 1 elif isinstance(ilist, Alert.Alert): CrawlConfig.log("Alert generated: '%s'" % ilist.msg()) failures += 1 else: CrawlConfig.log("unexpected return val from " + "Checkable.check: %s: %r" % (type(ilist), ilist)) # Report the statistics in the log # ** For checksums, we report the current total minus the previous # ** For matches and failures, we counted them up during the iteration # ** See the description of get_stats for why we don't store total # checksums p_checksums = t_checksums t_matches += matches t_failures += failures cv_lib.update_stats((t_matches, t_failures)) (t_checksums, t_matches, t_failures) = get_stats() CrawlConfig.log("files checksummed: %d; " % (t_checksums - p_checksums) + "checksums matched: %d; " % matches + "failures: %d" % failures) CrawlConfig.log("totals checksummed: %d; " % t_checksums + "matches: %d; " % t_matches + "failures: %d" % t_failures) # Report the dimension data in the log d = Dimension.Dimension(name='cos') t = Dimension.Dimension(name='cart') CrawlConfig.log(d.report()) CrawlConfig.log(t.report()) return failures