def test_local_end_to_end(self): os.system("cp data/test/*.txt %s" % Config.input_path) worker = Worker() worker.process_input() master = Master(['.']) master.synch_all_workers() master.tally() output1 = master.output() expected_output1 = '''the: 56510 and: 37915 to: 27984 of: 27884 a: 22899 i: 22159 in: 17366 it: 15182 that: 14578 was: 13184 ''' self.assertEqual(output1, expected_output1, 'End to End output1 not as expected') uri = 'http://www.constitution.org/usdeclar.txt' worker.injest(uri) worker.process_input() master.synch_all_workers() master.tally() output2 = master.output() expected_output2 = '''the: 56588 and: 37972 to: 28049 of: 27964 a: 22914 i: 22159 in: 17387 it: 15188 that: 14591 was: 13184 ''' self.assertEqual(output2, expected_output2, 'End to End output2 not as expected')
def test_output(self): expected = '''the: 3 and: 2 of: 2 for: 2 to: 2 all: 1 pledge: 1 allegiance: 1 america: 1 one: 1 ''' os.system("cp testdata/output/pledge.json %s" % Config.import_path) master = Master(Config.export_path, "data/test_output.json") master.tally() output = master.output() self.assertEqual(output, expected, 'Pledge output not as expected')
def test_remote_workers(self): remote_workers = environ['TEST_REMOTE_WORKERS'] workers = remote_workers.split() self.assertTrue( len(workers) > 0, "must configure TEST_REMOTE_WORKERS environmenet variable to ssh_path for one or more remote workers" ) master = Master(workers) expected_output = '''the: 12464 and: 9022 i: 7697 to: 6919 of: 6508 a: 4466 in: 3756 that: 3537 he: 3194 my: 3040 ''' for worker in workers: remote_worker = RemoteWorker(worker) remote_worker.clean() file_uris = [ "testdata/remote_workers/dracula.txt", "testdata/remote_workers/frankenstein.txt" ] remote_worker.remote_injest(file_uris) remote_worker.process_input() master.synch_all_workers( ) #each pass through, only one will have the two files master.tally() output = master.output(10) self.assertEqual(output, expected_output, "worker %s output wrong" % remote_worker.ssh_path) self.setUp()