class TestMySQLExtractor(object): def setUp(self): setup_basic_test() self.options = mock.Mock() self.options.dump_dir = '/tmp/w2m_test' # monkey-patch MySQLExtractor for test def pages(self): yield {'page_id': 1, 'title': 'Test title'} yield {'page_id': 2, 'title': 'Main_Page'} yield {'page_id': 3, 'title': 'Test'} def history(self, page_id): data = { 1: [ {'timestamp': 1, 'text': "Test", 'username': '******'}, {'timestamp': 2, 'text': "Test Text", 'username': '******'} ], 2: [ {'timestamp': 1, 'text': "Main_Page", 'username': '******'}, {'timestamp': 2, 'text': "Main_Page text", 'username': '******'} ], 3: [ {'timestamp': 1, 'text': "Some test text", 'username': ''}, {'timestamp': 2, 'text': "", 'username': ''} ] } revisions = data[page_id] for rev in revisions: yield rev def talk(self, page_title): return { 'text': 'Talk for page %s.' % page_title, 'timestamp': 1, 'username': '******' } def attachments(self, *args, **kwargs): # make 'empty' iterator if False: yield MySQLExtractor._pages = pages MySQLExtractor._history = history MySQLExtractor._talk = talk MySQLExtractor._attachments = attachments self.extractor = MySQLExtractor(self.options) def test_extract_pages(self): """Test that pages and edit history extracted properly""" self.extractor.extract_pages() # rev 1 of page 1 with open('/tmp/w2m_test/pages/1/history/1.json', 'r') as f: page = json.load(f) res_page = { 'timestamp': 1, 'text': 'Test', 'page_id': 1, 'title': 'Test title', 'username': '******' } assert page == res_page # rev 2 of page 1 with open('/tmp/w2m_test/pages/1/history/2.json', 'r') as f: page = json.load(f) res_page = { 'timestamp': 2, 'text': 'Test Text', 'page_id': 1, 'title': 'Test title', 'username': '******' } assert page == res_page # rev 1 of page 2 with open('/tmp/w2m_test/pages/2/history/1.json', 'r') as f: page = json.load(f) res_page = { 'timestamp': 1, 'text': 'Main_Page', 'page_id': 2, 'title': 'Main_Page', 'username': '******' } assert page == res_page # rev 2 of page 2 with open('/tmp/w2m_test/pages/2/history/2.json', 'r') as f: page = json.load(f) res_page = { 'timestamp': 2, 'text': 'Main_Page text', 'page_id': 2, 'title': 'Main_Page', 'username': '******' } assert page == res_page # rev 1 of page 3 with open('/tmp/w2m_test/pages/3/history/1.json', 'r') as f: page = json.load(f) res_page = { 'timestamp': 1, 'text': 'Some test text', 'page_id': 3, 'title': 'Test', 'username': '' } assert page == res_page # rev 2 of page 3 with open('/tmp/w2m_test/pages/3/history/2.json', 'r') as f: page = json.load(f) res_page = { 'timestamp': 2, 'text': '', 'page_id': 3, 'title': 'Test', 'username': '' } assert page == res_page def test_extract_talk(self): """Test that talk pages extracted properly.""" pages = [ {'page_id': 1, 'title': 'Test 1'}, {'page_id': 2, 'title': 'Test 2'}, {'page_id': 3, 'title': 'Test 3'}, ] for page in pages: self.extractor.extract_talk(page) with open('/tmp/w2m_test/pages/1/discussion.json', 'r') as f: page = json.load(f) assert page == { 'text': 'Talk for page Test 1.', 'username': '******', 'timestamp': 1} with open('/tmp/w2m_test/pages/2/discussion.json', 'r') as f: page = json.load(f) assert page == { 'text': 'Talk for page Test 2.', 'timestamp': 1, 'username': '******'} with open('/tmp/w2m_test/pages/3/discussion.json', 'r') as f: page = json.load(f) assert page == { 'text': 'Talk for page Test 3.', 'timestamp': 1, 'username': '******'}