''' Created on Apr 24, 2013 @author: inesmeya ''' import unittest from wiki_knows import wiki_knowledge from io_test_utils import getOutputFile, getInputFile from model.stemmers import PorterStemmer from model.semantic_interpreter import SemanticComparer from model.logger import getTestLogger _log = getTestLogger("wiki_exec") """ Articles -------- Politics, Technology, Computer, Nature, Art, Busyness, Internet, Biology, Physics, Law, Economics, History, Education, War, Love, Emotion, Medicine, Research, Sociology, Wealth, Science Code to download articles ------------------------- articles_titles = '''Politics, Technology, Computer, Nature, Art, Busyness, Internet, Biology, Physics, Law, Economics, History, Education, War, Love, Emotion, Medicine, Research, Sociology,
import unittest import wiki_knows.wiki_knowledge as wn import io_test_utils as io_tu from model.stemmers import StopWordsStemmer from model.semantic_interpreter import SemanticComparer import test.test_utils as test_utils from io_test_utils import getOutputFile, getInputFile import os from model.logger import getTestLogger _log = getTestLogger("Test") class Test(unittest.TestCase): def setUp(self): pass def tearDown(self): pass def test__parse_dump(self): wiki_dump_path = io_tu.getInputFile(io_tu.FilesList.test__parse_tools) wiki_parsed_dump_path = io_tu.getOutputFile( io_tu.FilesList.test__parse_tools) wn.parse_dump(wiki_dump_path, wiki_parsed_dump_path) def test_number_of_concepts(self): """ db builder reads parsed xml properly"""
''' Created on Apr 24, 2013 @author: inesmeya ''' import unittest from wiki_knows import wiki_knowledge from io_test_utils import getOutputFile, getInputFile from model.stemmers import PorterStemmer from model.semantic_interpreter import SemanticComparer from model.logger import getTestLogger _log = getTestLogger("wiki_exec") """ Articles -------- Politics, Technology, Computer, Nature, Art, Busyness, Internet, Biology, Physics, Law, Economics, History, Education, War, Love, Emotion, Medicine, Research, Sociology, Wealth, Science Code to download articles ------------------------- articles_titles = '''Politics, Technology, Computer, Nature, Art, Busyness, Internet, Biology, Physics, Law, Economics, History, Education, War, Love, Emotion, Medicine, Research, Sociology, Wealth, Science'''
import xml.etree.ElementTree as etree import gzip import WikiExtractor from parsers.wikitext_processor import WikiTextProcessor from model.wiki_doc import WikiDocument from model.wiki_doc import doc_from_xml, WdNames from model.logger import getTestLogger _log = getTestLogger(__name__) #================================================================================================== WIKIPEDIA_NAMESPACE = 'http://www.mediawiki.org/xml/export-0.8/' def make_wikipedia_tag(tag): '''Creates tag full name according to wikipedia namespace @param tag: string. Tag in wikipedia xml dump file, such as 'page' @return: tag with namespace: {WIKIPEDIA_NAMESPACE}{tag}" ''' return "{%s}%s" % (WIKIPEDIA_NAMESPACE, tag) # wikipedia page tag PAGE_TAG = make_wikipedia_tag('page') # parsed doc tag DOC_TAG = 'doc'
import unittest import wiki_knows.wiki_knowledge as wn import io_test_utils as io_tu from model.stemmers import StopWordsStemmer from model.semantic_interpreter import SemanticComparer import test.test_utils as test_utils from io_test_utils import getOutputFile, getInputFile import os from model.logger import getTestLogger _log = getTestLogger("Test") class Test(unittest.TestCase): def setUp(self): pass def tearDown(self): pass def test__parse_dump(self): wiki_dump_path = io_tu.getInputFile(io_tu.FilesList.test__parse_tools) wiki_parsed_dump_path = io_tu.getOutputFile(io_tu.FilesList.test__parse_tools) wn.parse_dump(wiki_dump_path, wiki_parsed_dump_path)