예제 #1
0
파일: test.py 프로젝트: bwtaylor/wordfreq
    def test_local_end_to_end(self):

        os.system("cp data/test/*.txt %s" % Config.input_path)
        worker = Worker()
        worker.process_input()
        master = Master(['.'])
        master.synch_all_workers()
        master.tally()
        output1 = master.output()
        expected_output1 = '''the: 56510
and: 37915
to: 27984
of: 27884
a: 22899
i: 22159
in: 17366
it: 15182
that: 14578
was: 13184
'''
        self.assertEqual(output1, expected_output1,
                         'End to End output1 not as expected')

        uri = 'http://www.constitution.org/usdeclar.txt'
        worker.injest(uri)
        worker.process_input()
        master.synch_all_workers()
        master.tally()
        output2 = master.output()
        expected_output2 = '''the: 56588
and: 37972
to: 28049
of: 27964
a: 22914
i: 22159
in: 17387
it: 15188
that: 14591
was: 13184
'''
        self.assertEqual(output2, expected_output2,
                         'End to End output2 not as expected')
예제 #2
0
파일: test.py 프로젝트: bwtaylor/wordfreq
    def test_remote_workers(self):

        remote_workers = environ['TEST_REMOTE_WORKERS']
        workers = remote_workers.split()
        self.assertTrue(
            len(workers) > 0,
            "must configure TEST_REMOTE_WORKERS environmenet variable to ssh_path for one or more remote workers"
        )

        master = Master(workers)

        expected_output = '''the: 12464
and: 9022
i: 7697
to: 6919
of: 6508
a: 4466
in: 3756
that: 3537
he: 3194
my: 3040
'''

        for worker in workers:
            remote_worker = RemoteWorker(worker)
            remote_worker.clean()
            file_uris = [
                "testdata/remote_workers/dracula.txt",
                "testdata/remote_workers/frankenstein.txt"
            ]
            remote_worker.remote_injest(file_uris)
            remote_worker.process_input()
            master.synch_all_workers(
            )  #each pass through, only one will have the two files
            master.tally()
            output = master.output(10)
            self.assertEqual(output, expected_output,
                             "worker %s output wrong" % remote_worker.ssh_path)
            self.setUp()