def test_remote_workers(self): remote_workers = environ['TEST_REMOTE_WORKERS'] workers = remote_workers.split() self.assertTrue( len(workers) > 0, "must configure TEST_REMOTE_WORKERS environmenet variable to ssh_path for one or more remote workers" ) master = Master(workers) expected_output = '''the: 12464 and: 9022 i: 7697 to: 6919 of: 6508 a: 4466 in: 3756 that: 3537 he: 3194 my: 3040 ''' for worker in workers: remote_worker = RemoteWorker(worker) remote_worker.clean() file_uris = [ "testdata/remote_workers/dracula.txt", "testdata/remote_workers/frankenstein.txt" ] remote_worker.remote_injest(file_uris) remote_worker.process_input() master.synch_all_workers( ) #each pass through, only one will have the two files master.tally() output = master.output(10) self.assertEqual(output, expected_output, "worker %s output wrong" % remote_worker.ssh_path) self.setUp()
def test_synch(self): os.system("cp testdata/synch/*.json %s" % Config.export_path) remote_worker = RemoteWorker(".") remote_worker.synch() self.assertEqual(len(ls(Config.export_path)), len(ls(Config.import_path)))