def test_init(self): """ Get an Alert object and make sure it has the correct attributes """ self.dbgfunc() x = Alert.Alert('this is the message', caller=util.my_name(), dispatch=False) self.expected('this is the message', x.msg) self.expected(util.my_name(), x.caller) self.expected_in('dispatch', dir(x))
def test_init(self): """ Get an Alert object and make sure it has the correct attributes """ self.dbgfunc() x = Alert.Alert('this is the message', caller=util.my_name(), dispatch=False) self.expected('this is the message', x.msg) self.expected(util.my_name(), x.caller) self.expected_in('dispatch', dir(x))
def test_touch_newpath_atime(self): """ Call touch on a path that does not exist with atime, no mtime """ self.dbgfunc() testpath = self.tmpdir(util.my_name()) self.touch_payload(testpath, offs=(-75, None), new=True)
def test_touch_oldpath_default(self): """ Call touch on a path that does exist with no amtime tuple """ self.dbgfunc() testpath = self.tmpdir(util.my_name()) self.touch_payload(testpath, offs=())
def test_touch_oldpath_mtime(self): """ Call touch on a path that does exist with mtime, no atime """ self.dbgfunc() testpath = self.tmpdir(util.my_name()) self.touch_payload(testpath, offs=(None, -32))
def test_touch_oldpath_both(self): """ Call touch on a path that does exist with both atime and mtime """ self.dbgfunc() testpath = self.tmpdir(util.my_name()) self.touch_payload(testpath, offs=(-175, -3423))
def test_touch_oldpath_both(self): """ Call touch on a path that does exist with both atime and mtime """ self.dbgfunc() testpath = self.tmpdir(util.my_name()) self.touch_payload(testpath, offs=(-175, -3423))
def test_touch_oldpath_default(self): """ Call touch on a path that does exist with no amtime tuple """ self.dbgfunc() testpath = self.tmpdir(util.my_name()) self.touch_payload(testpath, offs=())
def test_touch_oldpath_mtime(self): """ Call touch on a path that does exist with mtime, no atime """ self.dbgfunc() testpath = self.tmpdir(util.my_name()) self.touch_payload(testpath, offs=(None, -32))
def test_load_new(self): """ With the database and checkables table in place, create a new Dimension that is not in the table. Calling load() on it should be a no-op -- the object should not be stored to the database and its contents should not be changed. """ self.dbgfunc() # reboot the database and call persist() to create the table without # adding any data U.conditional_rm(self.dbname()) CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() ignore = Dimension(name='foobar') # get a Dimension object that is not in the table test = Dimension(name='notindb') # make a copy of the object for reference (not just a handle to the # same ojbect) ref = copy.deepcopy(test) # call load(), which should be a no op test.load() # verify that the object didn't change self.expected(ref.name, test.name) self.expected(ref.sampsize, test.sampsize) self.expected(ref.p_sum, test.p_sum) self.expected(ref.s_sum, test.s_sum)
def test_touch_newpath_atime(self): """ Call touch on a path that does not exist with atime, no mtime """ self.dbgfunc() testpath = self.tmpdir(util.my_name()) self.touch_payload(testpath, offs=(-75, None), new=True)
def test_sum_total(self): """ Return the sum of all the 'count' values in either the p_sum or s_sum dictionary. """ CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() a = Dimension(name='sum_total') a.p_sum = { '6001': { 'count': 2, 'pct': 50.0 }, '5081': { 'count': 2, 'pct': 50.0 } } a.s_sum = { '6001': { 'count': 2, 'pct': 40.0 }, '5081': { 'count': 3, 'pct': 60.0 } } self.expected(4, a.sum_total()) self.expected(4, a.sum_total(dict=a.p_sum)) self.expected(5, a.sum_total(which='s')) self.expected(5, a.sum_total(dict=a.s_sum))
def test_load_new(self): """ With the database and checkables table in place, create a new Dimension that is not in the table. Calling load() on it should be a no-op -- the object should not be stored to the database and its contents should not be changed. """ self.dbgfunc() # reboot the database and call persist() to create the table without # adding any data U.conditional_rm(self.dbname()) CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() ignore = Dimension(name='foobar') # get a Dimension object that is not in the table test = Dimension(name='notindb') # make a copy of the object for reference (not just a handle to the # same ojbect) ref = copy.deepcopy(test) # call load(), which should be a no op test.load() # verify that the object didn't change self.expected(ref.name, test.name) self.expected(ref.sampsize, test.sampsize) self.expected(ref.p_sum, test.p_sum) self.expected(ref.s_sum, test.s_sum)
def test_ctor_attrs(self): """ Verify that a newly created Dimension object has the following attributes: - name (string) - sampsize (small float value, e.g., 0.005) - p_sum (empty dict) - s_sum (empty dict) - methods > sum_total > load """ dimname = 'cos' CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() a = Dimension(name=dimname, sampsize=0.005) for attr in ['name', 'sampsize', 'p_sum', 's_sum', 'sum_total', 'load', ]: self.assertTrue(hasattr(a, attr), "Object %s does not have expected attribute %s" % (a, attr))
def test_ctor_attrs(self): """ Verify that a newly created Dimension object has the following attributes: - name (string) - sampsize (small float value, e.g., 0.005) - p_sum (empty dict) - s_sum (empty dict) - methods > sum_total > load """ dimname = 'cos' CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() a = Dimension(name=dimname, sampsize=0.005) for attr in [ 'name', 'sampsize', 'p_sum', 's_sum', 'sum_total', 'load', ]: self.assertTrue( hasattr(a, attr), "Object %s does not have expected attribute %s" % (a, attr))
def test_my_name(self): """ Return the name of the calling function. """ self.dbgfunc() actual = util.my_name() expected = 'test_my_name' self.expected(expected, actual)
def test_my_name(self): """ Return the name of the calling function. """ self.dbgfunc() actual = util.my_name() expected = 'test_my_name' self.expected(expected, actual)
def test_repr(self): """ Method __repr__ should return <Dimension(name='foo')>. """ CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) exp = "Dimension(name='foo')" a = eval(exp) self.expected(exp, a.__repr__())
def test_repr(self): """ Method __repr__ should return <Dimension(name='foo')>. """ CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) exp = "Dimension(name='foo')" a = eval(exp) self.expected(exp, a.__repr__())
def test_load_already(self): """ With the database and a checkables table in place and records in the table, calling load() on a Dimension should load the information from the table into the object. However, it should only count records where last_check <> 0. """ self.dbgfunc() U.conditional_rm(self.dbname()) CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() chk = Checkable.Checkable testdata = [ chk(rowid=1, path="/abc/001", type='f', cos='6001', checksum=0, last_check=0), chk(rowid=2, path="/abc/002", type='f', cos='6002', checksum=0, last_check=5), chk(rowid=3, path="/abc/003", type='f', cos='6003', checksum=1, last_check=0), chk(rowid=4, path="/abc/004", type='f', cos='6001', checksum=1, last_check=17), chk(rowid=5, path="/abc/005", type='f', cos='6002', checksum=0, last_check=0), chk(rowid=6, path="/abc/006", type='f', cos='6003', checksum=0, last_check=8), chk(rowid=7, path="/abc/007", type='f', cos='6001', checksum=0, last_check=0), chk(rowid=8, path="/abc/008", type='f', cos='6002', checksum=0, last_check=19), chk(rowid=9, path="/abc/009", type='f', cos='6003', checksum=0, last_check=0), ] # insert some test data into the table for t in testdata: t.persist() # get a default Dimension with the same name as the data in the table q = Dimension(name='cos') # this should load the data from the table into the object q.load() # verify the loaded data in the object self.expected('cos', q.name) self.assertTrue('6001' in q.p_sum.keys(), "Expected '6001' in p_sum.keys()") self.assertTrue('6002' in q.p_sum.keys(), "Expected '6001' in p_sum.keys()") self.assertTrue('6003' in q.p_sum.keys(), "Expected '6003' in p_sum.keys()") self.assertTrue('6001' in q.s_sum.keys(), "Expected '6001' in s_sum.keys()") self.assertTrue('6002' in q.s_sum.keys(), "Expected '6002' in s_sum.keys()") self.assertTrue('6003' in q.s_sum.keys(), "Expected '6003' in s_sum.keys()")
def test_touch_newpath_default(self): """ Call touch on a path that does not exist with no amtime tuple This test code assumes that file system operations truncate atime and mtime rather than rounding them. """ self.dbgfunc() testpath = self.tmpdir(util.my_name()) self.touch_payload(testpath, offs=(), new=True)
def test_touch_newpath_default(self): """ Call touch on a path that does not exist with no amtime tuple This test code assumes that file system operations truncate atime and mtime rather than rounding them. """ self.dbgfunc() testpath = self.tmpdir(util.my_name()) self.touch_payload(testpath, offs=(), new=True)
def test_ctor_defaults(self): """ A new Dimension with only the name specified should have the right defaults. """ dimname = 'cos' CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) a = Dimension(name=dimname) self.expected(dimname, a.name) self.expected(0.01, a.sampsize) self.expected({}, a.p_sum) self.expected({}, a.s_sum)
def test_ctor_defaults(self): """ A new Dimension with only the name specified should have the right defaults. """ dimname = 'cos' CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) a = Dimension(name=dimname) self.expected(dimname, a.name) self.expected(0.01, a.sampsize) self.expected({}, a.p_sum) self.expected({}, a.s_sum)
def test_ctor_bad_attr(self): """ Attempting to create a Dimension with attrs that are not in the settable list should get an exception. """ dimname = 'bad_attr' CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) got_exception = False self.assertRaisesMsg(StandardError, "Attribute 'catl' is not valid", Dimension, name=dimname, catl=[1, 2, 3]) self.assertRaisesMsg(StandardError, "Attribute 'aardvark' is not valid", Dimension, name=dimname, aardvark='Fanny Brice')
def test_date_start(self): """ Given a file containing several log records (with some irrelevant introductory material), return the timestamp on the first one. """ self.dbgfunc() tdata = ["This line should be ignored\n", "2014.0412 12:25:50 This is the timestamp to return\n", "2014.0430 19:30:00 This should not be returned\n"] tfilename = self.tmpdir("%s.data" % (util.my_name())) f = open(tfilename, 'w') f.writelines(tdata) f.close() self.expected("2014.0412", util.date_start(tfilename))
def test_ctor_reset_atime_cfg_true(self): """ If reset_atime is specified in the config as True, it should be True """ cf_name = self.tmpdir(util.my_name() + ".cfg") # write out a config file with no reset_atime spec self.write_cfg_file(cf_name, self.cfg_d) # make the test config the default CrawlConfig.get_config(cfname=cf_name, reset=True) # get an hpss.HSI object and check its reset_atime attribute h = hpss.HSI(connect=False) self.expected(True, h.reset_atime) CrawlConfig.get_config(reset=True, soft=True)
def test_date_start(self): """ Given a file containing several log records (with some irrelevant introductory material), return the timestamp on the first one. """ self.dbgfunc() tdata = [ "This line should be ignored\n", "2014.0412 12:25:50 This is the timestamp to return\n", "2014.0430 19:30:00 This should not be returned\n" ] tfilename = self.tmpdir("%s.data" % (util.my_name())) f = open(tfilename, 'w') f.writelines(tdata) f.close() self.expected("2014.0412", util.date_start(tfilename))
def test_sum_total(self): """ Return the sum of all the 'count' values in either the p_sum or s_sum dictionary. """ CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() a = Dimension(name='sum_total') a.p_sum = {'6001': {'count': 2, 'pct': 50.0}, '5081': {'count': 2, 'pct': 50.0} } a.s_sum = {'6001': {'count': 2, 'pct': 40.0}, '5081': {'count': 3, 'pct': 60.0} } self.expected(4, a.sum_total()) self.expected(4, a.sum_total(dict=a.p_sum)) self.expected(5, a.sum_total(which='s')) self.expected(5, a.sum_total(dict=a.s_sum))
def test_ctor_bad_attr(self): """ Attempting to create a Dimension with attrs that are not in the settable list should get an exception. """ dimname = 'bad_attr' CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) got_exception = False self.assertRaisesMsg(StandardError, "Attribute 'catl' is not valid", Dimension, name=dimname, catl=[1, 2, 3]) self.assertRaisesMsg(StandardError, "Attribute 'aardvark' is not valid", Dimension, name=dimname, aardvark='Fanny Brice')
def test_rrfile_short(self): """ Test the reverse read file class """ self.dbgfunc() tdfile = self.tmpdir(util.my_name()) clist = [chr(ord('a') + x) for x in range(0, 4)] with open(tdfile, 'w') as f: for c in clist: f.write(c * 16) rf = util.RRfile.open(tdfile, 'r') zlist = clist buf = rf.revread() self.expected(64, len(buf)) for exp in ["aaa", "bbb", "ccc", "ddd"]: self.expected_in(exp, buf) rf.close()
def test_alert_use_same(self): """ Generate a log alert and verify that the message was written to the correct log file. """ self.dbgfunc() logfile = self.tmpdir('alert_use.log') cfg = CrawlConfig.CrawlConfig() cfg.add_section('crawler') cfg.add_section('AlertTest') cfg.add_section('alert_section') cfg.set('crawler', 'logpath', logfile) cfg.set('AlertTest', 'alerts', 'alert_section') cfg.set('alert_section', 'log', "%s") cfg.set('alert_section', 'use', 'alert_section') CrawlConfig.log(logpath=logfile, close=True) payload = 'this is a test message from %s' % util.my_name() x = Alert.Alert(caller='AlertTest', msg=payload, cfg=cfg) self.expected_in(payload, util.contents(logfile))
def test_rrfile_short(self): """ Test the reverse read file class """ self.dbgfunc() tdfile = self.tmpdir(util.my_name()) clist = [chr(ord('a') + x) for x in range(0, 4)] with open(tdfile, 'w') as f: for c in clist: f.write(c * 16) rf = util.RRfile.open(tdfile, 'r') zlist = clist buf = rf.revread() self.expected(64, len(buf)) for exp in ["aaa", "bbb", "ccc", "ddd"]: self.expected_in(exp, buf) rf.close()
def test_ctor_reset_atime_call_true(self): """ If reset_atime is specified in the call as True, it should be True, even if it's specified as False in the config """ cf_name = self.tmpdir(util.my_name() + ".cfg") # write out a config file with no reset_atime spec cfg = copy.deepcopy(self.cfg_d) cfg['cv']['reset_atime'] = 'no' self.write_cfg_file(cf_name, cfg) # make the test config the default CrawlConfig.get_config(cfname=cf_name, reset=True) # get an hpss.HSI object and check its reset_atime attribute h = hpss.HSI(connect=False, reset_atime=True) self.expected(True, h.reset_atime) CrawlConfig.get_config(reset=True, soft=True)
def test_date_end(self): """ Given a file containing several log records, return the timestamp on the last one. """ self.dbgfunc() tdata = ["This line should be ignored\n", "2014.0412 12:25:50 This is not the timestamp to return\n", "2014.0430 19:30:00 This should not be returned\n", "2014.0501 19:30:00 Return this one\n", "We want plenty of data here at the end of the file\n", "with no timestamp so we'll be forced to read\n", "backward a time or two, not just find the timestamp\n", "on the first read so we exercise revread.\n"] tfilename = self.tmpdir("%s.data" % util.my_name()) f = open(tfilename, 'w') f.writelines(tdata) f.close() self.expected("2014.0501", util.date_end(tfilename))
def test_alert_use_same(self): """ Generate a log alert and verify that the message was written to the correct log file. """ self.dbgfunc() logfile = self.tmpdir('alert_use.log') cfg = CrawlConfig.CrawlConfig() cfg.add_section('crawler') cfg.add_section('AlertTest') cfg.add_section('alert_section') cfg.set('crawler', 'logpath', logfile) cfg.set('AlertTest', 'alerts', 'alert_section') cfg.set('alert_section', 'log', "%s") cfg.set('alert_section', 'use', 'alert_section') CrawlConfig.log(logpath=logfile, close=True) payload = 'this is a test message from %s' % util.my_name() x = Alert.Alert(caller='AlertTest', msg=payload, cfg=cfg) self.expected_in(payload, util.contents(logfile))
def test_ctor_reset_atime_default(self): """ If reset_atime is not specified in the config or argument list, it should default to False """ cf_name = self.tmpdir(util.my_name() + ".cfg") # write out a config file with no reset_atime spec cd = copy.deepcopy(self.cfg_d) del cd['cv']['reset_atime'] self.write_cfg_file(cf_name, cd) # make the test config the default CrawlConfig.get_config(cfname=cf_name, reset=True) # get an hpss.HSI object and check its reset_atime attribute h = hpss.HSI(connect=False) self.expected(False, h.reset_atime) CrawlConfig.get_config(reset=True, soft=True)
def test_date_end(self): """ Given a file containing several log records, return the timestamp on the last one. """ self.dbgfunc() tdata = [ "This line should be ignored\n", "2014.0412 12:25:50 This is not the timestamp to return\n", "2014.0430 19:30:00 This should not be returned\n", "2014.0501 19:30:00 Return this one\n", "We want plenty of data here at the end of the file\n", "with no timestamp so we'll be forced to read\n", "backward a time or two, not just find the timestamp\n", "on the first read so we exercise revread.\n" ] tfilename = self.tmpdir("%s.data" % util.my_name()) f = open(tfilename, 'w') f.writelines(tdata) f.close() self.expected("2014.0501", util.date_end(tfilename))
def test_rrfile_long(self): """ Test the reverse read file class """ self.dbgfunc() tdfile = self.tmpdir(util.my_name()) clist = [chr(ord('a') + x) for x in range(0, 16)] with open(tdfile, 'w') as f: for c in clist: f.write(c * 64) rf = util.RRfile.open(tdfile, 'r') zlist = clist buf = rf.revread() while buf != '': ref = zlist[-2:] del zlist[-1] self.expected(ref[0], buf[0]) self.expected(ref[-1], buf[-1]) buf = rf.revread() rf.close()
def test_rrfile_long(self): """ Test the reverse read file class """ self.dbgfunc() tdfile = self.tmpdir(util.my_name()) clist = [chr(ord('a') + x) for x in range(0, 16)] with open(tdfile, 'w') as f: for c in clist: f.write(c * 64) rf = util.RRfile.open(tdfile, 'r') zlist = clist buf = rf.revread() while buf != '': ref = zlist[-2:] del zlist[-1] self.expected(ref[0], buf[0]) self.expected(ref[-1], buf[-1]) buf = rf.revread() rf.close()
def test_hashalg_default(self): """ With no hash_algorithm in config, the default should be 'md5' """ self.check_hash_algorithm(util.my_name(), '(none)', 'md5')
def test_load_already(self): """ With the database and a checkables table in place and records in the table, calling load() on a Dimension should load the information from the table into the object. However, it should only count records where last_check <> 0. """ self.dbgfunc() U.conditional_rm(self.dbname()) CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() chk = Checkable.Checkable testdata = [ chk(rowid=1, path="/abc/001", type='f', cos='6001', checksum=0, last_check=0), chk(rowid=2, path="/abc/002", type='f', cos='6002', checksum=0, last_check=5), chk(rowid=3, path="/abc/003", type='f', cos='6003', checksum=1, last_check=0), chk(rowid=4, path="/abc/004", type='f', cos='6001', checksum=1, last_check=17), chk(rowid=5, path="/abc/005", type='f', cos='6002', checksum=0, last_check=0), chk(rowid=6, path="/abc/006", type='f', cos='6003', checksum=0, last_check=8), chk(rowid=7, path="/abc/007", type='f', cos='6001', checksum=0, last_check=0), chk(rowid=8, path="/abc/008", type='f', cos='6002', checksum=0, last_check=19), chk(rowid=9, path="/abc/009", type='f', cos='6003', checksum=0, last_check=0), ] # insert some test data into the table for t in testdata: t.persist() # get a default Dimension with the same name as the data in the table q = Dimension(name='cos') # this should load the data from the table into the object q.load() # verify the loaded data in the object self.expected('cos', q.name) self.assertTrue('6001' in q.p_sum.keys(), "Expected '6001' in p_sum.keys()") self.assertTrue('6002' in q.p_sum.keys(), "Expected '6001' in p_sum.keys()") self.assertTrue('6003' in q.p_sum.keys(), "Expected '6003' in p_sum.keys()") self.assertTrue('6001' in q.s_sum.keys(), "Expected '6001' in s_sum.keys()") self.assertTrue('6002' in q.s_sum.keys(), "Expected '6002' in s_sum.keys()") self.assertTrue('6003' in q.s_sum.keys(), "Expected '6003' in s_sum.keys()")
def test_hashalg_sha384(self): """ With hash_algorithm = sha1 in config """ self.check_hash_algorithm(util.my_name(), 'sha384')
def test_hashalg_sha512(self): """ With hash_algorithm = sha1 in config """ self.dbgfunc() self.check_hash_algorithm(util.my_name(), 'sha512')
def test_hashalg_adler32(self): """ With hash_algorithm = sha1 in config """ self.check_hash_algorithm(util.my_name(), 'adler32')
def test_hashalg_md5(self): """ With hash_algorithm = md5 in config """ self.check_hash_algorithm(util.my_name(), 'md5')