示例#1
0
    def test_group_processing(self):
        FILES = ['test_data/a_modest_proposal.txt',
                 'test_data/metamorphosis.txt',
                 'test_data/leviathan.txt']
        texts = [open(name, 'r').read() for name in FILES]
        top10 = json.loads(open('test_data/combined.json', 'r').read())

        result = group(word_count.s(text) for text in texts).apply_async()
        self.assertEqual(top_ten(reduce_word_count(result.get())), top10)
示例#2
0
    def test_group_processing(self):
        FILES = [
            'test_data/a_modest_proposal.txt', 'test_data/metamorphosis.txt',
            'test_data/leviathan.txt'
        ]
        texts = [open(name, 'r').read() for name in FILES]
        top10 = json.loads(open('test_data/combined.json', 'r').read())

        result = group(word_count.s(text) for text in texts).apply_async()
        self.assertEqual(top_ten(reduce_word_count(result.get())), top10)
示例#3
0
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import

from celery import group
import sys
from worker import word_count
from collector import reduce_word_count, top_ten

if len(sys.argv) == 1:
    print(
        "Simple distributed file indexer: counts top 10 words in files in\n"
        "provided directories.\n"
        "Usage: python scheduler.py FILE1 FILE2 ...")
else:
    texts = [open(name, 'r').read() for name in sys.argv[1:]]
    result = group(word_count.s(text) for text in texts).apply_async()
    print top_ten(reduce_word_count(result.get()))
示例#4
0
 def test_two_dicts(self):
     self.assertEqual(
         reduce_word_count([Counter({'word': 1}),
                            Counter({'word': 2})]), Counter({'word': 3}))
示例#5
0
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import

from celery import group
import sys
from worker import word_count
from collector import reduce_word_count, top_ten


if len(sys.argv) == 1:
    print (
        "Simple distributed file indexer: counts top 10 words in files in\n"
        "provided directories.\n"
        "Usage: python scheduler.py FILE1 FILE2 ..."
    )
else:
    texts = [open(name, "r").read() for name in sys.argv[1:]]
    result = group(word_count.s(text) for text in texts).apply_async()
    print top_ten(reduce_word_count(result.get()))
示例#6
0
 def test_single_dict(self):
     self.assertEqual(reduce_word_count([Counter({'word': 1})]),
                      Counter({'word': 1}))
示例#7
0
 def test_empty(self):
     self.assertEqual(reduce_word_count([]), Counter())
 def test_two_dicts(self):
     self.assertEqual(reduce_word_count(
         [Counter({'word': 1}), Counter({'word': 2})]),
         Counter({'word': 3}))
 def test_single_dict(self):
     self.assertEqual(reduce_word_count(
         [Counter({'word': 1})]),
         Counter({'word': 1}))
示例#10
0
 def test_empty(self):
     self.assertEqual(reduce_word_count([]), Counter())