def age_report(table, age, count, result, f, path=False): """ Generate the report based on the data passed in """ if count: f.write("%s records older than %s: %d\n" % (table, dhms(age), result[0]['1'])) elif table == 'migr': f.write("Migration Records Older Than %s\n" % dhms(age)) f.write("%-67s %-18s %s\n" % ("BFID", "Created", "MigrFails")) for row in result: f.write("%s %s %9d\n" % (CrawlDBI.DBIdb2.hexstr(row['BFID']), util.ymdhms(row['RECORD_CREATE_TIME']), row['MIGRATION_FAILURE_COUNT'])) if path: path = tcc_lib.get_bitfile_path(row['BFID']) f.write(" %s\n" % path) elif table == 'purge': f.write("Purge Records Older Than %s\n" % dhms(age)) f.write("%-67s %-18s\n" % ("BFID", "Created")) for row in result: f.write("%s %s\n" % (CrawlDBI.DBIdb2.hexstr(row['BFID']), util.ymdhms(row['RECORD_CREATE_TIME']))) if path: path = tcc_lib.get_bitfile_path(row['BFID']) f.write(" %s\n" % path)
def get_html_mpra_report(db, last_rpt_time): """ Format the MPRA report in HTML """ if not db.table_exists(table="mpra"): return "" rval = ("<h2>%s</h2>\n" % mpra_sublib.report_title()) body = '' hfmt = " %-5s %-20s %-20s %-20s %8s\n" bfmt = " %-5s %-20s %-20s %-20s %8d\n" mdelta = 0 pdelta = 0 rows = mpra_sublib.recent_records(last_rpt_time, db=db) for r in rows: if r[0] == 'migr': start = "beginning of time" if r[2] == 0 else util.ymdhms(r[2]) end = util.ymdhms(r[3]) mdelta += int(r[4]) else: start = '...' end = '...' body += bfmt % (r[0], util.ymdhms(r[1]), start, end, r[4]) if 0 < len(body): body = (hfmt % ('Type', 'Scan Time', 'Start', 'End', 'Records') + body) else: body = " No records found to report" delta = sum([x[4] for x in rows]) rows = db.select(table="mpra", fields=["type", "sum(hits)"], groupby="type") total = {} for r in rows: total[r[0]] = int(r[1]) body += "\n\n %s Migration Purge\n" % (" " * 20) body += (" Since %-18s %10d %10d\n" % (util.ymdhms(last_rpt_time), mdelta, pdelta)) body += (" Total %10d %10d\n" % (total['migr'], total['purge'])) rval += "<pre>\n" + body + "\n</pre>\n<br>\n" return rval
def cvv_ttype_missing(argv): """ttype_missing - Report records missing ttype information usage: cv ttype_missing [-d] """ p = optparse.OptionParser() p.add_option('-d', '--debug', action='store_true', default=False, dest='debug', help='run the debugger') p.add_option('-c', '--config', action='store', default='', dest='config', help='configuration to use') try: (o, a) = p.parse_args(argv) except SystemExit: return if o.debug: pdb.set_trace() CrawlConfig.get_config(o.config) rec_l = cv_lib.ttype_missing() for rec in rec_l: print("%-40s %-10s %s %s" % (rec[1], rec[4], rec[5], U.ymdhms(int(rec[7]))))
def history_show_raw(): """ Display a list of records from table history in chronological order """ fmt = "%-20s %-10s %7s" rows = crawl_lib.retrieve_history() print(fmt % ("Run Time", "Plugin", "Errors")) for row in rows: print(fmt % (U.ymdhms(row[1]), row[0], str(row[2])))
def get_tcc_report(db, last_rpt_time): """ Generate the TCC portion of the report """ rval = "\n" + ("-" * 79) + "\n" rval += "Tape Copy Checker:\n\n" if db.table_exists(table='tcc_data'): checks = correct = error = 0 rows = db.select(table="tcc_data", fields=[ 'check_time', 'low_nsobj_id', 'high_nsobj_id', 'correct', 'error', ], where="? < check_time", data=(last_rpt_time, )) for (t, l, h, c, e) in rows: checks += (h - l + 1) correct += c error += e rows = db.select(table="tcc_data", fields=[ "distinct(low_nsobj_id)", ]) t_check = len(rows) rows = db.select(table="tcc_data", fields=["distinct(low_nsobj_id)", "correct"], where="correct = 1") t_correct = len(rows) c_obj_id_l = [x[0] for x in rows] t_error = 0 erows = db.select(table="tcc_data", fields=["distinct(low_nsobj_id)", "correct"], where="correct <> 1") for r in erows: if r[0] not in c_obj_id_l: t_error += 1 rval += " %s Checked Correct Errors\n" % (" " * 29) rval += (" Since %-18s: %6d %6d %6d\n" % (util.ymdhms(last_rpt_time), checks, correct, error)) rval += (" Total: %s %6d %6d %6d\n" % (" " * 21, t_check, t_correct, t_error)) else: rval += (" No Tape Copy Checker results to report") return rval
def load_recheck_list(cls, how_many): """ Look to see whether any of the already checksummed items in the database have a last check time over the threshold for rechecking. If so, we'll shove some of them to the front of the list based on the configuration. """ cfg = CrawlConfig.add_config() r_fraction = float(cfg.get_d('cv', 'recheck_fraction', '0.0')) r_age = cfg.get_time('cv', 'recheck_age', 365*24*3600) threshold = int(time.time() - r_age) CrawlConfig.log("threshold = %s (%d)", U.ymdhms(threshold), threshold) if r_fraction == 0.0: return [] limit = round(r_fraction * how_many) db = CrawlDBI.DBI(dbtype='crawler') kw = {'table': 'checkables', 'fields': ['rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum', 'last_check', 'fails', 'reported'], 'where': 'checksum <> 0 and last_check < %d' % threshold, 'orderby': 'last_check', 'limit': limit} rows = db.select(**kw) db.close() rval = [] for row in rows: tmp = list(row) new = Checkable(rowid=tmp.pop(0), path=tmp.pop(0), type=tmp.pop(0), cos=tmp.pop(0), cart=tmp.pop(0), ttypes=tmp.pop(0), checksum=tmp.pop(0), last_check=tmp.pop(0), fails=tmp.pop(0), reported=tmp.pop(0), in_db=True, dirty=False) rval.append(new) return rval
def get_tcc_report(db, last_rpt_time): """ Generate the TCC portion of the report """ rval = "\n" + ("-" * 79) + "\n" rval += "Tape Copy Checker:\n\n" if db.table_exists(table='tcc_data'): checks = correct = error = 0 rows = db.select(table="tcc_data", fields=['check_time', 'low_nsobj_id', 'high_nsobj_id', 'correct', 'error', ], where="? < check_time", data=(last_rpt_time,)) for (t, l, h, c, e) in rows: checks += (h - l + 1) correct += c error += e rows = db.select(table="tcc_data", fields=["distinct(low_nsobj_id)", ]) t_check = len(rows) rows = db.select(table="tcc_data", fields=["distinct(low_nsobj_id)", "correct"], where="correct = 1") t_correct = len(rows) c_obj_id_l = [x[0] for x in rows] t_error = 0 erows = db.select(table="tcc_data", fields=["distinct(low_nsobj_id)", "correct"], where="correct <> 1") for r in erows: if r[0] not in c_obj_id_l: t_error += 1 rval += " %s Checked Correct Errors\n" % (" " * 29) rval += (" Since %-18s: %6d %6d %6d\n" % (util.ymdhms(last_rpt_time), checks, correct, error)) rval += (" Total: %s %6d %6d %6d\n" % (" " * 21, t_check, t_correct, t_error)) else: rval += (" No Tape Copy Checker results to report") return rval
def load_recheck_list(cls, how_many): """ Look to see whether any of the already checksummed items in the database have a last check time over the threshold for rechecking. If so, we'll shove some of them to the front of the list based on the configuration. """ cfg = CrawlConfig.add_config() r_fraction = float(cfg.get_d('cv', 'recheck_fraction', '0.0')) r_age = cfg.get_time('cv', 'recheck_age', 365 * 24 * 3600) threshold = int(time.time() - r_age) CrawlConfig.log("threshold = %s (%d)", U.ymdhms(threshold), threshold) if r_fraction == 0.0: return [] limit = round(r_fraction * how_many) db = CrawlDBI.DBI(dbtype='crawler') kw = { 'table': 'checkables', 'fields': [ 'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum', 'last_check', 'fails', 'reported' ], 'where': 'checksum <> 0 and last_check < %d' % threshold, 'orderby': 'last_check', 'limit': limit } rows = db.select(**kw) db.close() rval = [] for row in rows: tmp = list(row) new = Checkable(rowid=tmp.pop(0), path=tmp.pop(0), type=tmp.pop(0), cos=tmp.pop(0), cart=tmp.pop(0), ttypes=tmp.pop(0), checksum=tmp.pop(0), last_check=tmp.pop(0), fails=tmp.pop(0), reported=tmp.pop(0), in_db=True, dirty=False) rval.append(new) return rval
def cvv_show_next(argv): """show_next - Report the Checkables in the order they will be checked usage: cvtool show_next """ p = optparse.OptionParser() p.add_option('-c', '--config', action='store', default='', dest='config', help='alternate configuration') p.add_option('-d', '--debug', action='store_true', default=False, dest='debug', help='run the debugger') p.add_option('-i', '--id', action='store', default='', dest='id', help='id of entry to be checked') p.add_option('-l', '--limit', action='store', default=-1, dest='limit', type=int, help='max records to get') p.add_option('-p', '--path', action='store', default='', dest='path', help='name of path to be checked') p.add_option('-v', '--verbose', action='store_true', default=False, dest='verbose', help='more information') try: (o, a) = p.parse_args(argv) except SystemExit: return if o.debug: pdb.set_trace() if o.config: cfg = CrawlConfig.add_config(close=True, filename=o.config) else: cfg = CrawlConfig.add_config() if o.limit < 0: limit = int(cfg.get_d('cv', 'operations', '10')) else: limit = o.limit clist = Checkable.Checkable.get_list(limit) for c in clist: if c.last_check == 0: print("%18d %s %s" % (c.last_check, c.type, c.path)) else: print("%s %s %s" % (U.ymdhms(c.last_check), c.type, c.path))
def mprf_times(args): """times - list (unique) record create times in table BFMIGRREC usage: mpra unique_times """ p = optparse.OptionParser() p.add_option('-d', '--debug', action='store_true', default=False, dest='debug', help='run the debugger') p.add_option('-l', '--limit', action='store', default='', dest='limit', help='how many records to fetch') p.add_option('-b', '--before', action='store', default='', dest='before', help='fetch records from before the date/time') p.add_option('-a', '--after', action='store', default='', dest='after', help='fetch records from after the date/time') p.add_option('-u', '--unique', action='store', default='', dest='unique', help='count unique timestamps') (o, a) = p.parse_args(args) if o.debug: pdb.set_trace() if o.unique: fields = ['unique(record_create_time)'] else: fields = ['record_create_time'] dbargs = {'table': 'bfmigrrec', 'fields': fields, 'orderby': 'record_creaet_time'} rows = mpra_lib.lookup_migr_recs(**dbargs) last = '' count = 0 for row in rows: ymd = util.ymdhms(row['RECORD_CREATE_TIME'])[0:10] count += 1 if ymd != last: if last != '': print("%s (%d)" % (last, count)) last = ymd count = 1 if 0 < count: print("%s (%d)" % (last, count))
def highest_nsobject_id(): """ Cache and return the largest NSOBJECT id in the DB2 database. The variables highest_nsobject_id._max_obj_id and highest_nsobject_id._when are local to this function but do not lose their values between invocations. """ if (not hasattr(highest_nsobject_id, '_max_obj_id') or not hasattr(highest_nsobject_id, '_when') or 60 < time.time() - highest_nsobject_id._when): highest_nsobject_id._max_obj_id = max_nsobj_id() highest_nsobject_id._when = time.time() CrawlConfig.log("max object id = %d at %s" % (highest_nsobject_id._max_obj_id, util.ymdhms(highest_nsobject_id._when))) rval = highest_nsobject_id._max_obj_id return rval
def get_html_tcc_report(db, last_rpt_time): """ Format the TCC report in HTML """ rval = "" if not db.table_exists(table='tcc_data'): return rval rval = ("<h2>%s</h2>\n" % tcc_sublib.report_title()) checks = correct = error = 0 rows = tcc_sublib.recent_records(last_rpt_time, db=db) for (t, l, h, c, e) in rows: checks += (h - l + 1) correct += c error += e rows = tcc_sublib.distinct_objects(db=db) t_check = len(rows) rows = tcc_sublib.distinct_objects(db=db, where="correct = 1") t_correct = len(rows) c_obj_id_l = [x[0] for x in rows] t_error = 0 erows = tcc_sublib.distinct_objects(db=db, where="correct <> 1") for r in erows: if r[0] not in c_obj_id_l: t_error += 1 rval += "<pre>\n" rval += " %s Checked Correct Errors\n" % (" " * 29) rval += (" Since %-18s: %6d %6d %6d\n" % (util.ymdhms(last_rpt_time), checks, correct, error)) rval += (" Total: %s %6d %6d %6d\n" % (" " * 21, t_check, t_correct, t_error)) rval += "</pre>\n" return rval
def xplocks(output=None, mark=False): """ Look for expired purge locks in bfpurgerec. """ cfg = CrawlConfig.get_config() now = time.time() hits = 0 opened = True if output is None: f = open(cfg.get('mpra', 'report_file'), 'a') elif type(output) == str: f = open(output, 'a') elif type(output) == file: f = output opened = False else: raise StandardError("output type must be 'str' or 'file' ") dbs = CrawlDBI.DBI(dbtype='hpss', dbname='sub') lock_min = cfg.getint('mpra', 'lock_duration') rows = dbs.select(table='bfpurgerec', fields=['bfid', 'record_lock_time'], where='record_lock_time <> 0') if 0 < len(rows): f.write("Expired Purge Locks\n") for r in rows: if (lock_min * 60) < (now - r['RECORD_LOCK_TIME']): hits += 1 f.write(" %s %s\n" % (CrawlDBI.DBIdb2.hexstr(r['BFID']), util.ymdhms(r['RECORD_LOCK_TIME']))) if mark: mpra_record_recent('purge', 0, 0, hits) if opened: f.close() return hits
def get_mpra_report(db=None, last_rpt_time=0): """ Generate the MPRA portion of the report """ close = False if db is None: db = CrawlDBI.DBI(dbtype="crawler") close = True rval = "\n" + ("-" * 79) + "\n" rval += "Migration/Purge Record Checks\n\n" hfmt = " %-5s %-20s %-20s %-20s %8s\n" bfmt = " %-5s %-20s %-20s %-20s %8d\n" body = '' mdelta = 0 pdelta = 0 if db.table_exists(table='mpra'): rows = db.select(table="mpra", fields=['type', 'scan_time', 'start_time', 'end_time', 'hits', ], where="? < scan_time", data=(last_rpt_time,), orderby="type") for r in rows: if r[0] == 'migr': start = "beginning of time" if r[2] == 0 else util.ymdhms(r[2]) end = util.ymdhms(r[3]) mdelta += int(r[4]) else: start = '...' end = '...' body += bfmt % (r[0], util.ymdhms(r[1]), start, end, r[4]) if 0 < len(body): body = (hfmt % ('Type', 'Scan Time', 'Start', 'End', 'Records') + body) else: body = " No records found to report" delta = sum([x[4] for x in rows]) rows = db.select(table="mpra", fields=["type", "sum(hits)"], groupby="type") total = {} for r in rows: total[r[0]] = int(r[1]) body += "\n\n %s Migration Purge\n" % (" " * 20) body += (" Since %-18s %10d %10d\n" % (util.ymdhms(last_rpt_time), mdelta, pdelta)) body += (" Total %10d %10d\n" % (total['migr'], total['purge'])) else: body = " No MPRA result to report at this time." rval += body + "\n" if close: db.close() return rval
def mprf_migr_recs(args): """migr_recs - list the records in table BFMIGRREC usage: mpra migr_recs [-l/limit N] [-b/--before DATE-TIME] [-a/--after DATE-TIME] with -l N, only report the first N records with -b DATE-TIME, only report the records with create times before DATE-TIME. with -a DATE-TIME, only report the records with create times after DATE-TIME. """ p = optparse.OptionParser() p.add_option('-c', '--count', action='store_true', default=False, dest='count', help='report record counts rather than records') p.add_option('-d', '--debug', action='store_true', default=False, dest='debug', help='run the debugger') p.add_option('-l', '--limit', action='store', default='', dest='limit', help='how many records to fetch') p.add_option('-b', '--before', action='store', default='', dest='before', help='fetch records from before the date/time') p.add_option('-a', '--after', action='store', default='', dest='after', help='fetch records from after the date/time') (o, a) = p.parse_args(args) if o.debug: pdb.set_trace() cfg = CrawlConfig.get_config() dbargs = {'table': 'bfmigrrec'} if o.limit == '' and o.before == '' and o.after == '': dbargs['limit'] = 30 elif o.limit == '' and o.before == '' and o.after != '': dbargs['where'] = '? < record_create_time' dbargs['data'] = (util.epoch(o.after),) elif o.limit == '' and o.before != '' and o.after == '': dbargs['where'] = 'record_create_time < ?' dbargs['data'] = (util.epoch(o.before),) elif o.limit == '' and o.before != '' and o.after != '': dbargs['where'] = '? < record_create_time and record_create_time < ?' dbargs['data'] = (util.epoch(o.after), util.epoch(o.before)) elif o.limit != '' and o.before == '' and o.after == '': dbargs['limit'] = int(o.limit) elif o.limit != '' and o.before == '' and o.after != '': dbargs['limit'] = int(o.limit) dbargs['where'] = '? < record_create_time' dbargs['data'] = (util.epoch(o.after),) elif o.limit != '' and o.before != '' and o.after == '': dbargs['limit'] = int(o.limit) dbargs['where'] = 'record_create_time < ?' dbargs['data'] = (util.epoch(o.before),) elif o.limit != '' and o.before != '' and o.after != '': dbargs['limit'] = int(o.limit) dbargs['where'] = '? < record_create_time and record_create_time < ?' dbarsg['data'] = (util.epoch(o.after), util.epoch(o.before)) if o.count: dbargs['fields'] = ['count(*)'] else: dbargs['fields'] = ['bfid', 'record_create_time', 'migration_failure_count'] dbargs['orderby'] = 'record_create_time' rows = mpra_lib.lookup_migr_recs(**dbargs) for row in rows: if o.count: print("Records found: %d" % row['1']) else: print("%s %s %d" % (CrawlDBI.DBIdb2.hexstr(row['BFID']), util.ymdhms(row['RECORD_CREATE_TIME']), row['MIGRATION_FAILURE_COUNT']))
def mprf_migr_recs(args): """migr_recs - list the records in table BFMIGRREC usage: mpra migr_recs [-l/limit N] [-b/--before DATE-TIME] [-a/--after DATE-TIME] with -l N, only report the first N records with -b DATE-TIME, only report the records with create times before DATE-TIME. with -a DATE-TIME, only report the records with create times after DATE-TIME. """ p = optparse.OptionParser() p.add_option('-c', '--count', action='store_true', default=False, dest='count', help='report record counts rather than records') p.add_option('-d', '--debug', action='store_true', default=False, dest='debug', help='run the debugger') p.add_option('-l', '--limit', action='store', default='', dest='limit', help='how many records to fetch') p.add_option('-b', '--before', action='store', default='', dest='before', help='fetch records from before the date/time') p.add_option('-a', '--after', action='store', default='', dest='after', help='fetch records from after the date/time') (o, a) = p.parse_args(args) if o.debug: pdb.set_trace() cfg = CrawlConfig.get_config() dbargs = {'table': 'bfmigrrec'} if o.limit == '' and o.before == '' and o.after == '': dbargs['limit'] = 30 elif o.limit == '' and o.before == '' and o.after != '': dbargs['where'] = '? < record_create_time' dbargs['data'] = (util.epoch(o.after), ) elif o.limit == '' and o.before != '' and o.after == '': dbargs['where'] = 'record_create_time < ?' dbargs['data'] = (util.epoch(o.before), ) elif o.limit == '' and o.before != '' and o.after != '': dbargs['where'] = '? < record_create_time and record_create_time < ?' dbargs['data'] = (util.epoch(o.after), util.epoch(o.before)) elif o.limit != '' and o.before == '' and o.after == '': dbargs['limit'] = int(o.limit) elif o.limit != '' and o.before == '' and o.after != '': dbargs['limit'] = int(o.limit) dbargs['where'] = '? < record_create_time' dbargs['data'] = (util.epoch(o.after), ) elif o.limit != '' and o.before != '' and o.after == '': dbargs['limit'] = int(o.limit) dbargs['where'] = 'record_create_time < ?' dbargs['data'] = (util.epoch(o.before), ) elif o.limit != '' and o.before != '' and o.after != '': dbargs['limit'] = int(o.limit) dbargs['where'] = '? < record_create_time and record_create_time < ?' dbarsg['data'] = (util.epoch(o.after), util.epoch(o.before)) if o.count: dbargs['fields'] = ['count(*)'] else: dbargs['fields'] = [ 'bfid', 'record_create_time', 'migration_failure_count' ] dbargs['orderby'] = 'record_create_time' rows = mpra_lib.lookup_migr_recs(**dbargs) for row in rows: if o.count: print("Records found: %d" % row['1']) else: print("%s %s %d" % (CrawlDBI.DBIdb2.hexstr( row['BFID']), util.ymdhms(row['RECORD_CREATE_TIME']), row['MIGRATION_FAILURE_COUNT']))
def get_mpra_report(db=None, last_rpt_time=0): """ Generate the MPRA portion of the report """ close = False if db is None: db = CrawlDBI.DBI(dbtype="crawler") close = True rval = "\n" + ("-" * 79) + "\n" rval += "Migration/Purge Record Checks\n\n" hfmt = " %-5s %-20s %-20s %-20s %8s\n" bfmt = " %-5s %-20s %-20s %-20s %8d\n" body = '' mdelta = 0 pdelta = 0 if db.table_exists(table='mpra'): rows = db.select(table="mpra", fields=[ 'type', 'scan_time', 'start_time', 'end_time', 'hits', ], where="? < scan_time", data=(last_rpt_time, ), orderby="type") for r in rows: if r[0] == 'migr': start = "beginning of time" if r[2] == 0 else util.ymdhms(r[2]) end = util.ymdhms(r[3]) mdelta += int(r[4]) else: start = '...' end = '...' body += bfmt % (r[0], util.ymdhms(r[1]), start, end, r[4]) if 0 < len(body): body = (hfmt % ('Type', 'Scan Time', 'Start', 'End', 'Records') + body) else: body = " No records found to report" delta = sum([x[4] for x in rows]) rows = db.select(table="mpra", fields=["type", "sum(hits)"], groupby="type") total = {} for r in rows: total[r[0]] = int(r[1]) body += "\n\n %s Migration Purge\n" % (" " * 20) body += (" Since %-18s %10d %10d\n" % (util.ymdhms(last_rpt_time), mdelta, pdelta)) body += (" Total %10d %10d\n" % (total['migr'], total['purge'])) else: body = " No MPRA result to report at this time." rval += body + "\n" if close: db.close() return rval
def mprf_times(args): """times - list (unique) record create times in table BFMIGRREC usage: mpra unique_times """ p = optparse.OptionParser() p.add_option('-d', '--debug', action='store_true', default=False, dest='debug', help='run the debugger') p.add_option('-l', '--limit', action='store', default='', dest='limit', help='how many records to fetch') p.add_option('-b', '--before', action='store', default='', dest='before', help='fetch records from before the date/time') p.add_option('-a', '--after', action='store', default='', dest='after', help='fetch records from after the date/time') p.add_option('-u', '--unique', action='store', default='', dest='unique', help='count unique timestamps') (o, a) = p.parse_args(args) if o.debug: pdb.set_trace() if o.unique: fields = ['unique(record_create_time)'] else: fields = ['record_create_time'] dbargs = { 'table': 'bfmigrrec', 'fields': fields, 'orderby': 'record_creaet_time' } rows = mpra_lib.lookup_migr_recs(**dbargs) last = '' count = 0 for row in rows: ymd = util.ymdhms(row['RECORD_CREATE_TIME'])[0:10] count += 1 if ymd != last: if last != '': print("%s (%d)" % (last, count)) last = ymd count = 1 if 0 < count: print("%s (%d)" % (last, count))