示例#1
0
    def test_alert_email_mtcaller(self):
        """
        Generate an e-mail alert and verify that it was sent (this is where we
        use 'monkey patching'). For this case, caller is ''.
        """
        self.dbgfunc()
        fakesmtp.inbox = []
        logfile = self.tmpdir('alert_email.log')
        targets = "[email protected], [email protected], [email protected]"
        payload = 'this is an e-mail alert'
        sender = 'hpssic@' + util.hostname(long=True)

        cfg = CrawlConfig.CrawlConfig()
        cfg.add_section('crawler')
        cfg.add_section('alerts')
        cfg.set('crawler', 'logpath', logfile)
        cfg.set('alerts', 'email', targets)
        CrawlConfig.log(logpath=logfile, close=True)

        x = Alert.Alert(caller='', msg=payload, cfg=cfg)
        m = fakesmtp.inbox[0]
        self.expected(targets, ', '.join(m.to_address))
        self.expected(m.from_address, sender)
        self.expected_in('sent mail to', util.contents(logfile))
        self.expected_in(payload, m.fullmessage)
示例#2
0
 def test_alert_shell_nospec(self):
     """
     Generate a shell alert and verify that it ran. With no '%s' in the
     shell alert string, no message should be offered for formatting.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_shell.log')
     outfile = self.tmpdir('alert_shell.out')
     runfile = self.tmpdir('runme')
     f = open(runfile, 'w')
     f.write("#!/bin/bash\n")
     f.write("echo \"ALERT: $*\" > %s\n" % outfile)
     f.close()
     os.chmod(runfile,
              stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
              stat.S_IRGRP | stat.S_IWGRP | stat.S_IXGRP |
              stat.S_IROTH | stat.S_IXOTH)
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'shell', runfile)
     CrawlConfig.log(logpath=logfile, close=True)
     x = Alert.Alert(caller='AlertTest', msg='this is a test message',
                     cfg=cfg)
     expected = "ran: '%s'" % runfile
     self.expected_in(expected, util.contents(logfile))
     self.assertPathPresent(outfile)
示例#3
0
 def test_alert_shell_nospec(self):
     """
     Generate a shell alert and verify that it ran. With no '%s' in the
     shell alert string, no message should be offered for formatting.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_shell.log')
     outfile = self.tmpdir('alert_shell.out')
     runfile = self.tmpdir('runme')
     f = open(runfile, 'w')
     f.write("#!/bin/bash\n")
     f.write("echo \"ALERT: $*\" > %s\n" % outfile)
     f.close()
     os.chmod(
         runfile, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP
         | stat.S_IWGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'shell', runfile)
     CrawlConfig.log(logpath=logfile, close=True)
     x = Alert.Alert(caller='AlertTest',
                     msg='this is a test message',
                     cfg=cfg)
     expected = "ran: '%s'" % runfile
     self.expected_in(expected, util.contents(logfile))
     self.assertPathPresent(outfile)
示例#4
0
    def test_alert_email_mtcaller(self):
        """
        Generate an e-mail alert and verify that it was sent (this is where we
        use 'monkey patching'). For this case, caller is ''.
        """
        self.dbgfunc()
        fakesmtp.inbox = []
        logfile = self.tmpdir('alert_email.log')
        targets = "[email protected], [email protected], [email protected]"
        payload = 'this is an e-mail alert'
        sender = 'hpssic@' + util.hostname(long=True)

        cfg = CrawlConfig.CrawlConfig()
        cfg.add_section('crawler')
        cfg.add_section('alerts')
        cfg.set('crawler', 'logpath', logfile)
        cfg.set('alerts', 'email', targets)
        CrawlConfig.log(logpath=logfile, close=True)

        x = Alert.Alert(caller='', msg=payload,
                        cfg=cfg)
        m = fakesmtp.inbox[0]
        self.expected(targets, ', '.join(m.to_address))
        self.expected(m.from_address, sender)
        self.expected_in('sent mail to', util.contents(logfile))
        self.expected_in(payload, m.fullmessage)
示例#5
0
def main(cfg):
    """
    Plugin example
    """
    try:
        msg = cfg.get('example', 'message')
    except ConfigParser.NoOptionError:
        msg = 'No message in configuration'

    CrawlConfig.log('EXAMPLE: This is plugin EXAMPLE saying "%s"' % msg)
示例#6
0
def main(cfg):
    """
    Migration Purge Record Ager (mpra) reads the database tables BFMIGRREC and
    BFPURGEREC and reports migration and purge records that are older than the
    age specified in the configuration.
    """
    if cfg is None:
        cfg = CrawlConfig.get_config()
    age = cfg.get_time('mpra', 'age')

    end = time.time() - age

    start = mpra_lib.mpra_fetch_recent("migr")
    #
    # If the configured age has been moved back in time, so that end is before
    # start, we need to reset and start scanning from the beginning of time.
    #
    if end < start:
        start = 0
    CrawlConfig.log("migr recs after %d (%s) before %d (%s)" %
                    (start, util.ymdhms(start), end, util.ymdhms(end)))
    result = mpra_lib.age("migr", start=start, end=end, mark=True)
    CrawlConfig.log("found %d migration records in the range" % result)
    rval = result

    start = mpra_lib.mpra_fetch_recent("purge")
    CrawlConfig.log("Looking for expired purge locks")
    result = mpra_lib.xplocks(mark=True)
    CrawlConfig.log("found %d expired purge locks" % result)
    rval += result

    return rval
示例#7
0
 def test_html_report(self):
     """
     Try running 'html report > filename' and verify that 1) no traceback
     occurs and 2) something is actually written to the output file.
     """
     self.dbgfunc()
     cfpath = self.tmpdir("crawl.cfg")
     cfg = CrawlConfig.add_config()
     cfg.crawl_write(open(cfpath, 'w'))
     cmd = "html report --config %s" % cfpath
     CrawlConfig.log(cmd, close=True)
     result = pexpect.run(cmd)
     if "HPSS Unavailable" in result:
         pytest.skip("HPSS Unavailable")
     self.validate_report(result)
示例#8
0
 def test_html_report(self):
     """
     Try running 'html report > filename' and verify that 1) no traceback
     occurs and 2) something is actually written to the output file.
     """
     self.dbgfunc()
     cfpath = self.tmpdir("crawl.cfg")
     cfg = CrawlConfig.add_config()
     cfg.crawl_write(open(cfpath, 'w'))
     cmd = "html report --config %s" % cfpath
     CrawlConfig.log(cmd, close=True)
     result = pexpect.run(cmd)
     if "HPSS Unavailable" in result:
         pytest.skip("HPSS Unavailable")
     self.validate_report(result)
示例#9
0
def main(cfg):
    """
    Generate an html-formatted report and store it at the designated location
    """
    CrawlConfig.log("html_plugin starting")
    fpath = cfg.get('html', 'output_path')
    rpt = html_lib.get_html_report(cfg=cfg)

    npath = fpath + '.new'
    opath = fpath + '.old'
    with open(npath, 'w') as out:
        out.write(rpt)

    if os.path.exists(fpath):
        os.rename(fpath, opath)
    os.rename(npath, fpath)
    CrawlConfig.log("html_plugin finished")
示例#10
0
def test_maybe_update_hsi_cant(muh_prep, tmpdir):
    """
    If we don't have write permission on the target, then even if we should
    update, we can't. In this case, should log a message.
    """
    pytest.dbgfunc()
    lp = tmpdir.join('crawl.test.log')
    rf = test_maybe_update_hsi_cant
    path = ":".join([rf.bin.strpath, rf.hsihome])
    with U.tmpenv('PATH', path):
        CrawlConfig.log(logpath=lp.strpath, close=True)
        hpss.maybe_update_hsi()
    c = rf.file.read()
    assert 'not changed' in c
    assert os.path.exists(lp.strpath)
    c = lp.read()
    assert MSG.hsi_wrap_ood in c
    CrawlConfig.log(close=True)
示例#11
0
def main(cfg):
    """
    HSI demo
    """
    CrawlConfig.log("hsi-demo: sending output to hsi.out")
    hsi_prompt = "]:"

    S = pexpect.spawn("/opt/public/bin/hsi")
    S.logfile = f = open("hsi.out", 'a')
    S.expect(hsi_prompt)
    S.sendline("ls")

    S.expect(hsi_prompt)
    S.sendline("quit")

    S.expect(pexpect.EOF)
    S.logfile.close()
    S.close()
示例#12
0
 def test_alert_log(self):
     """
     Generate a log alert and verify that the message was written to the
     correct log file.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_log.log')
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'log', "%s")
     CrawlConfig.log(logpath=logfile, close=True)
     x = Alert.Alert(caller='AlertTest', msg='this is a test message',
                     cfg=cfg)
     self.expected_in('this is a test message', util.contents(logfile))
示例#13
0
 def test_alert_log(self):
     """
     Generate a log alert and verify that the message was written to the
     correct log file.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_log.log')
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'log', "%s")
     CrawlConfig.log(logpath=logfile, close=True)
     x = Alert.Alert(caller='AlertTest',
                     msg='this is a test message',
                     cfg=cfg)
     self.expected_in('this is a test message', util.contents(logfile))
示例#14
0
 def test_alert_use_other(self):
     """
     A use directive sends us to another config section where we generate a
     log alert and verify that the message was written to the correct log
     file.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_use.log')
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.add_section('other_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'use', "other_section")
     cfg.set('other_section', 'log', "%s")
     CrawlConfig.log(logpath=logfile, close=True)
     payload = 'this is a test message from %s' % util.my_name()
     x = Alert.Alert(caller='AlertTest', msg=payload, cfg=cfg)
     self.expected_in(payload, util.contents(logfile))
示例#15
0
 def test_dlog(self):
     """
     Test method dlog on daemon object
     """
     lfname = self.tmpdir('daemon.dlog.log')
     lf = CrawlConfig.log(logpath=lfname)
     a = daemon.Daemon(self.tmpdir("daemon_pid"), logger=lf)
     logmsg = "testing the dlog method of %s" % a
     a.dlog(logmsg)
     self.assertTrue(
         logmsg in util.contents(lfname), "Expected '%s' in '%s'" %
         (logmsg, util.line_quote(util.contents(lfname))))
示例#16
0
 def test_alert_use_other(self):
     """
     A use directive sends us to another config section where we generate a
     log alert and verify that the message was written to the correct log
     file.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_use.log')
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.add_section('other_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'use', "other_section")
     cfg.set('other_section', 'log', "%s")
     CrawlConfig.log(logpath=logfile, close=True)
     payload = 'this is a test message from %s' % util.my_name()
     x = Alert.Alert(caller='AlertTest', msg=payload,
                     cfg=cfg)
     self.expected_in(payload, util.contents(logfile))
示例#17
0
    def test_alert_email_defcfg(self):
        """
        Generate an e-mail alert using the default config and verify that it
        was sent (this is where we use 'monkey patching').
        """
        self.dbgfunc()
        fakesmtp.inbox = []
        CrawlConfig.add_config(close=True)
        # with U.tmpenv('CRAWL_CONF', 'hpssic_test.cfg'):
        with U.tmpenv('CRAWL_CONF', None):
            logfile = self.tmpdir('alert_email.log')
            targets = "[email protected], [email protected]"
            payload = 'this is an e-mail alert'
            sender = 'hpssic@' + util.hostname(long=True)
            CrawlConfig.log(logpath=logfile, close=True)

            x = Alert.Alert(caller='cv', msg=payload)
            m = fakesmtp.inbox[0]
            self.expected(', '.join(m.to_address), targets)
            self.expected(m.from_address, sender)
            self.expected_in('sent mail to', util.contents(logfile))
            self.expected_in(payload, m.fullmessage)
示例#18
0
 def test_dlog(self):
     """
     Test method dlog on daemon object
     """
     lfname = self.tmpdir('daemon.dlog.log')
     lf = CrawlConfig.log(logpath=lfname)
     a = daemon.Daemon(self.tmpdir("daemon_pid"), logger=lf)
     logmsg = "testing the dlog method of %s" % a
     a.dlog(logmsg)
     self.assertTrue(logmsg in util.contents(lfname),
                     "Expected '%s' in '%s'" %
                     (logmsg,
                      util.line_quote(util.contents(lfname))))
示例#19
0
    def test_alert_email_defcfg(self):
        """
        Generate an e-mail alert using the default config and verify that it
        was sent (this is where we use 'monkey patching').
        """
        self.dbgfunc()
        fakesmtp.inbox = []
        CrawlConfig.add_config(close=True)
        # with U.tmpenv('CRAWL_CONF', 'hpssic_test.cfg'):
        with U.tmpenv('CRAWL_CONF', None):
            logfile = self.tmpdir('alert_email.log')
            targets = "[email protected], [email protected]"
            payload = 'this is an e-mail alert'
            sender = 'hpssic@' + util.hostname(long=True)
            CrawlConfig.log(logpath=logfile, close=True)

            x = Alert.Alert(caller='cv', msg=payload)
            m = fakesmtp.inbox[0]
            self.expected(', '.join(m.to_address), targets)
            self.expected(m.from_address, sender)
            self.expected_in('sent mail to', util.contents(logfile))
            self.expected_in(payload, m.fullmessage)
示例#20
0
def main(cfg):
    """
    This plugin will generate a report and send it to the designated e-mail
    address(es).
    """
    rval = 0
    try:
        if cfg is None:
            cfg = CrawlConfig.get_config()

        subject = "%s %s" % (cfg.get('rpt', 'subject'),
                             time.strftime("%Y.%m%d %H:%M:%S",
                                           time.localtime()))

        CrawlMail.send(sender=cfg.get('rpt', 'sender'),
                       to='rpt.recipients',
                       subj=subject,
                       msg=rpt_lib.get_report())
    except Exception as e:
        rval = 1
        CrawlConfig.log("Failure in rpt_lib: '%s'" % str(e))

    return rval
示例#21
0
def main(cfg):
    """
    This plugin will generate a report and send it to the designated e-mail
    address(es).
    """
    rval = 0
    try:
        if cfg is None:
            cfg = CrawlConfig.get_config()

        subject = "%s %s" % (cfg.get(
            'rpt',
            'subject'), time.strftime("%Y.%m%d %H:%M:%S", time.localtime()))

        CrawlMail.send(sender=cfg.get('rpt', 'sender'),
                       to='rpt.recipients',
                       subj=subject,
                       msg=rpt_lib.get_report())
    except Exception as e:
        rval = 1
        CrawlConfig.log("Failure in rpt_lib: '%s'" % str(e))

    return rval
示例#22
0
def main(cfg):
    """
    Tape Copy Checker retrieves the necessary information from the DB2 database
    to find files where the number of copies stored may not match the number
    called for by the COS.
    """
    # retrieve configuration items as needed
    how_many = int(cfg.get_d(tcc_lib.sectname(), 'operations', 10))
    CrawlConfig.log("tape-copy-checker: firing up for %d items" % how_many)

    # retrieve COS info
    cosinfo = tcc_lib.get_cos_info()

    # check for priority file(s)
    pri_glob = cfg.get_d(tcc_lib.sectname(), 'priority', '')
    if pri_glob != '':
        if 0 < tcc_priority(pri_glob, cosinfo):
            return

    # get the nsobject_id of the next bitfile to process from mysql
    next_nsobj_id = tcc_lib.get_next_nsobj_id(cfg)
    CrawlConfig.log("next nsobject id = %d" % next_nsobj_id)

    # fetch the next N bitfiles from DB2
    CrawlConfig.log("looking for nsobject ids between %d and %d" %
                    (next_nsobj_id, next_nsobj_id + how_many - 1))
    try:
        bfl = tcc_lib.get_bitfile_set(int(next_nsobj_id), how_many)
    except U.HpssicError as e:
        bfl = []
        pass

    CrawlConfig.log("got %d bitfiles" % len(bfl))

    errcount = 0
    if len(bfl) == 0:
        for oid in range(next_nsobj_id, next_nsobj_id + how_many):
            tcc_lib.record_checked_ids(cfg, oid, oid, 1, 0)
            if cfg.getboolean(tcc_lib.sectname(), 'verbose'):
                CrawlConfig.log("Object %d is not complete" % oid)
                errcount += 1
    else:
        # for each bitfile, if it does not have the right number of copies,
        # report it
        for bf in bfl:
            correct = 1
            error = 0
            if bf['SC_COUNT'] != cosinfo[bf['BFATTR_COS_ID']]:
                tcc_lib.tcc_report(bf, cosinfo)
                correct = 0
                error = 1
                CrawlConfig.log("%s %s %d != %d" %
                                (bf['OBJECT_ID'], tcc_lib.hexstr(bf['BFID']),
                                 bf['SC_COUNT'], cosinfo[bf['BFATTR_COS_ID']]))
            elif cfg.getboolean(tcc_lib.sectname(), 'verbose'):
                CrawlConfig.log("%s %s %d == %d" %
                                (bf['OBJECT_ID'], tcc_lib.hexstr(bf['BFID']),
                                 bf['SC_COUNT'], cosinfo[bf['BFATTR_COS_ID']]))

            last_obj_id = int(bf['OBJECT_ID'])
            tcc_lib.record_checked_ids(cfg, last_obj_id, last_obj_id, correct,
                                       error)
            errcount += error

        CrawlConfig.log("last nsobject in range: %d" % last_obj_id)

    return errcount
示例#23
0
def main(cfg):
    """
    Main entry point for the cv plugin
    """
    # Get stuff we need -- the logger object, dataroot, etc.
    CrawlConfig.log("firing up")
    plugdir = cfg.get('crawler', 'plugin-dir')
    dataroot = util.csv_list(cfg.get(plugin_name, 'dataroot'))
    odds = cfg.getfloat(plugin_name, 'odds')
    n_ops = int(cfg.get(plugin_name, 'operations'))

    # Initialize our statistics
    (t_checksums, t_matches, t_failures) = get_stats()
    (checksums, matches, failures) = (0, 0, 0)

    # Fetch the list of HPSS objects that we're looking at from the
    # database
    try:
        clist = Checkable.Checkable.get_list(prob=odds, rootlist=dataroot)
    except CrawlDBI.DBIerror as e:
        if any([util.rgxin(msg, str(e))
                for msg in ["no such table: checkables",
                            "Table '.*' doesn't exist"]]):
            CrawlConfig.log("calling ex_nihilo")
            Checkable.Checkable.ex_nihilo(dataroot=dataroot)
            clist = Checkable.Checkable.get_list(prob=odds)
        else:
            raise
    except StandardError as e:
        if 'Please call .ex_nihilo()' in str(e):
            CrawlConfig.log("calling ex_nihilo")
            Checkable.Checkable.ex_nihilo(dataroot=dataroot)
            clist = Checkable.Checkable.get_list(prob=odds)
        else:
            raise

    # We're going to process n_ops things in the HPSS namespace
    for op in range(n_ops):
        # if the list from the database is empty, there's nothing to do
        if 0 < len(clist):
            # but it's not, so grab the first item and check it
            item = clist.pop(0)
            CrawlConfig.log("[%d] checking %s" % (item.rowid, item))
            ilist = item.check()

            # Expected outcomes that check can return:
            #  list of Checkables: read dir or checksummed files (may be empty)
            #  Alert:              checksum verify failed
            #  'access denied':    unaccessible directory
            #  'matched':          a checksum was verified
            #  'checksummed':      file was checksummed
            #  'skipped':          file was skipped
            #  'unavailable':      HPSS is temporarily unavailable
            #  StandardError:      invalid Checkable type (not 'f' or 'd')
            #
            if type(ilist) == str:
                if ilist == "access denied":
                    CrawlConfig.log("dir %s not accessible" % item.path)
                    # clist.remove(item)
                elif ilist == "matched":
                    matches += 1
                    CrawlConfig.log("%s checksums matched" % item.path)
                elif ilist == "checksummed":
                    # checksums += 1
                    CrawlConfig.log("%s checksummed" % item.path)
                elif ilist == "skipped":
                    CrawlConfig.log("%s skipped" % item.path)
                elif ilist == "unavailable":
                    CrawlConfig.log("HPSS is not available")
                    break
                else:
                    CrawlConfig.log("unexpected string returned " +
                                    "from Checkable: '%s'" % ilist)
            elif type(ilist) == list:
                CrawlConfig.log("in %s, found:" % item)
                for n in ilist:
                    CrawlConfig.log(">>> %s" % str(n))
                    if 'f' == n.type and n.checksum != 0:
                        CrawlConfig.log(".. previously checksummed")
                        # checksums += 1
            elif isinstance(ilist, Checkable.Checkable):
                CrawlConfig.log("Checkable returned - file checksummed" +
                                " - %s, %s" % (ilist.path, ilist.checksum))
                # checksums += 1
            elif isinstance(ilist, Alert.Alert):
                CrawlConfig.log("Alert generated: '%s'" %
                                ilist.msg())
                failures += 1
            else:
                CrawlConfig.log("unexpected return val from " +
                                "Checkable.check: %s: %r" %
                                (type(ilist), ilist))

    # Report the statistics in the log
    # ** For checksums, we report the current total minus the previous
    # ** For matches and failures, we counted them up during the iteration
    # ** See the description of get_stats for why we don't store total
    #    checksums
    p_checksums = t_checksums
    t_matches += matches
    t_failures += failures
    cv_lib.update_stats((t_matches, t_failures))

    (t_checksums, t_matches, t_failures) = get_stats()
    CrawlConfig.log("files checksummed: %d; " % (t_checksums - p_checksums) +
                    "checksums matched: %d; " % matches +
                    "failures: %d" % failures)
    CrawlConfig.log("totals checksummed: %d; " % t_checksums +
                    "matches: %d; " % t_matches +
                    "failures: %d" % t_failures)

    # Report the dimension data in the log
    d = Dimension.Dimension(name='cos')
    t = Dimension.Dimension(name='cart')
    CrawlConfig.log(d.report())
    CrawlConfig.log(t.report())
    return failures
示例#24
0
def main(cfg):
    """
    Main entry point for the cv plugin
    """
    # Get stuff we need -- the logger object, dataroot, etc.
    CrawlConfig.log("firing up")
    plugdir = cfg.get('crawler', 'plugin-dir')
    dataroot = util.csv_list(cfg.get(plugin_name, 'dataroot'))
    odds = cfg.getfloat(plugin_name, 'odds')
    n_ops = int(cfg.get(plugin_name, 'operations'))

    # Initialize our statistics
    (t_checksums, t_matches, t_failures) = get_stats()
    (checksums, matches, failures) = (0, 0, 0)

    # Fetch the list of HPSS objects that we're looking at from the
    # database
    try:
        clist = Checkable.Checkable.get_list(prob=odds, rootlist=dataroot)
    except CrawlDBI.DBIerror as e:
        if any([
                util.rgxin(msg, str(e)) for msg in
            ["no such table: checkables", "Table '.*' doesn't exist"]
        ]):
            CrawlConfig.log("calling ex_nihilo")
            Checkable.Checkable.ex_nihilo(dataroot=dataroot)
            clist = Checkable.Checkable.get_list(prob=odds)
        else:
            raise
    except StandardError as e:
        if 'Please call .ex_nihilo()' in str(e):
            CrawlConfig.log("calling ex_nihilo")
            Checkable.Checkable.ex_nihilo(dataroot=dataroot)
            clist = Checkable.Checkable.get_list(prob=odds)
        else:
            raise

    # We're going to process n_ops things in the HPSS namespace
    for op in range(n_ops):
        # if the list from the database is empty, there's nothing to do
        if 0 < len(clist):
            # but it's not, so grab the first item and check it
            item = clist.pop(0)
            CrawlConfig.log("[%d] checking %s" % (item.rowid, item))
            ilist = item.check()

            # Expected outcomes that check can return:
            #  list of Checkables: read dir or checksummed files (may be empty)
            #  Alert:              checksum verify failed
            #  'access denied':    unaccessible directory
            #  'matched':          a checksum was verified
            #  'checksummed':      file was checksummed
            #  'skipped':          file was skipped
            #  'unavailable':      HPSS is temporarily unavailable
            #  StandardError:      invalid Checkable type (not 'f' or 'd')
            #
            if type(ilist) == str:
                if ilist == "access denied":
                    CrawlConfig.log("dir %s not accessible" % item.path)
                    # clist.remove(item)
                elif ilist == "matched":
                    matches += 1
                    CrawlConfig.log("%s checksums matched" % item.path)
                elif ilist == "checksummed":
                    # checksums += 1
                    CrawlConfig.log("%s checksummed" % item.path)
                elif ilist == "skipped":
                    CrawlConfig.log("%s skipped" % item.path)
                elif ilist == "unavailable":
                    CrawlConfig.log("HPSS is not available")
                    break
                else:
                    CrawlConfig.log("unexpected string returned " +
                                    "from Checkable: '%s'" % ilist)
            elif type(ilist) == list:
                CrawlConfig.log("in %s, found:" % item)
                for n in ilist:
                    CrawlConfig.log(">>> %s" % str(n))
                    if 'f' == n.type and n.checksum != 0:
                        CrawlConfig.log(".. previously checksummed")
                        # checksums += 1
            elif isinstance(ilist, Checkable.Checkable):
                CrawlConfig.log("Checkable returned - file checksummed" +
                                " - %s, %s" % (ilist.path, ilist.checksum))
                # checksums += 1
            elif isinstance(ilist, Alert.Alert):
                CrawlConfig.log("Alert generated: '%s'" % ilist.msg())
                failures += 1
            else:
                CrawlConfig.log("unexpected return val from " +
                                "Checkable.check: %s: %r" %
                                (type(ilist), ilist))

    # Report the statistics in the log
    # ** For checksums, we report the current total minus the previous
    # ** For matches and failures, we counted them up during the iteration
    # ** See the description of get_stats for why we don't store total
    #    checksums
    p_checksums = t_checksums
    t_matches += matches
    t_failures += failures
    cv_lib.update_stats((t_matches, t_failures))

    (t_checksums, t_matches, t_failures) = get_stats()
    CrawlConfig.log("files checksummed: %d; " % (t_checksums - p_checksums) +
                    "checksums matched: %d; " % matches +
                    "failures: %d" % failures)
    CrawlConfig.log("totals checksummed: %d; " % t_checksums +
                    "matches: %d; " % t_matches + "failures: %d" % t_failures)

    # Report the dimension data in the log
    d = Dimension.Dimension(name='cos')
    t = Dimension.Dimension(name='cart')
    CrawlConfig.log(d.report())
    CrawlConfig.log(t.report())
    return failures
示例#25
0
def main(cfg):
    """
    Tape Copy Checker retrieves the necessary information from the DB2 database
    to find files where the number of copies stored may not match the number
    called for by the COS.
    """
    # retrieve configuration items as needed
    how_many = int(cfg.get_d(tcc_lib.sectname(), 'operations', 10))
    CrawlConfig.log("tape-copy-checker: firing up for %d items" % how_many)

    # retrieve COS info
    cosinfo = tcc_lib.get_cos_info()

    # check for priority file(s)
    pri_glob = cfg.get_d(tcc_lib.sectname(), 'priority', '')
    if pri_glob != '':
        if 0 < tcc_priority(pri_glob, cosinfo):
            return

    # get the nsobject_id of the next bitfile to process from mysql
    next_nsobj_id = tcc_lib.get_next_nsobj_id(cfg)
    CrawlConfig.log("next nsobject id = %d" % next_nsobj_id)

    # fetch the next N bitfiles from DB2
    CrawlConfig.log("looking for nsobject ids between %d and %d"
                    % (next_nsobj_id, next_nsobj_id+how_many-1))
    try:
        bfl = tcc_lib.get_bitfile_set(int(next_nsobj_id),
                                      how_many)
    except U.HpssicError as e:
        bfl = []
        pass

    CrawlConfig.log("got %d bitfiles" % len(bfl))

    errcount = 0
    if len(bfl) == 0:
        for oid in range(next_nsobj_id, next_nsobj_id+how_many):
            tcc_lib.record_checked_ids(cfg, oid, oid, 1, 0)
            if cfg.getboolean(tcc_lib.sectname(), 'verbose'):
                CrawlConfig.log("Object %d is not complete" % oid)
                errcount += 1
    else:
        # for each bitfile, if it does not have the right number of copies,
        # report it
        for bf in bfl:
            correct = 1
            error = 0
            if bf['SC_COUNT'] != cosinfo[bf['BFATTR_COS_ID']]:
                tcc_lib.tcc_report(bf, cosinfo)
                correct = 0
                error = 1
                CrawlConfig.log("%s %s %d != %d" %
                                (bf['OBJECT_ID'],
                                 tcc_lib.hexstr(bf['BFID']),
                                 bf['SC_COUNT'],
                                 cosinfo[bf['BFATTR_COS_ID']]))
            elif cfg.getboolean(tcc_lib.sectname(), 'verbose'):
                CrawlConfig.log("%s %s %d == %d" %
                                (bf['OBJECT_ID'],
                                 tcc_lib.hexstr(bf['BFID']),
                                 bf['SC_COUNT'],
                                 cosinfo[bf['BFATTR_COS_ID']]))

            last_obj_id = int(bf['OBJECT_ID'])
            tcc_lib.record_checked_ids(cfg,
                                       last_obj_id,
                                       last_obj_id,
                                       correct,
                                       error)
            errcount += error

        CrawlConfig.log("last nsobject in range: %d" % last_obj_id)

    return errcount