def get_random_corpus(): d = choice(pc.get_categories()) dict = pc.get_file(d, choice(pc.get_files(d))) global title for k,v in dict.items(): if k == 'description': if 'List' in v or 'list' in v: v = v.replace('list', 'Marriage') v = v.replace('List', 'Marriage') title = '##'+v else: title = '##The Marriage of '+v else: corpus = list(v) return title, corpus
def test_get_categories(self): import pycorpora cats = pycorpora.get_categories() self.assertIn('pycorpora_test', cats) subcats = pycorpora.get_categories("pycorpora_test") self.assertEqual(subcats, ['subdir'])