def test_thesis(self): workdir = get_working_dir() for testname in ['T001', 'T002', 'T003', 'T004', 'T005', 'T006']: for domain in sorted(self.journals): journal = self.journals[domain] maschine = MaschineLSH(config, journal, outdir=workdir, testname=testname) maschine.process()
def test_journal_001(self): test_domain = self.test_domain test_path = config.get(test_domain, 'check_path1') test_ts = config.getint(test_domain, 'check_ts11') test_wlid = config.get(test_domain, 'check_uid11') workdir = get_working_dir() maschine = MaschineLSH(config, self.journal, outdir=workdir) self.assertEqual(test_domain, self.journal.domain) journal_entry = maschine.journal.get_entry(test_path, test_ts) self.assertEqual(test_wlid, journal_entry['warc_entry'].wlid) cross = maschine.get_cross(journal_entry) self.assertEqual( config.get(test_domain, 'check_cross11'), cross.get_unique_id() ) self.assertEqual((1, 1, 1, 1, 1), cross.to_bits()) self.assertEqual(test_wlid, cross.cur['warc_entry'].wlid) diffmethod = DBCEMethodLSH(config, cross)
def test_process_001(self): test_domain = self.test_domain test_path = config.get(test_domain, 'check_path1') test_ts = config.getint(test_domain, 'check_ts11') test_wlid = config.get(test_domain, 'check_uid11') workdir = get_working_dir() for testname in ['T001', 'T002', 'T006']: maschine = MaschineLSH(config, self.journal, outdir=workdir, testname=testname) maschine.process()
parser.error('missing arguments') elif args.report_dir: try: erc = load_eval_results(config, args.report_dir) except: import sys import traceback atype, value, tb = sys.exc_info() traceback.print_exc() pdb.post_mortem(tb) elif args.mode == 'idx': journals = idx_to_journals(config, args.idx_sources) if args.lsh_only and not args.manual_annotate: workdir = get_working_dir() for domain in sorted(journals): journal = journals[domain] try: maschine = MaschineLSH(config, journal, outdir=workdir) maschine.process() except: import sys import traceback atype, value, tb = sys.exc_info() traceback.print_exc() pdb.post_mortem(tb) elif args.manual_annotate: if len(args.domain) != 1: parser.error('manual annotation needs exactly one domain')