示例#1
0
    def handle_options(self):
        if not self.options.dump_dir:
            allura_base.log.error('You must specify directory for dump files')
            exit(2)

        if not self.options.extract and not self.options.load:
            # if action doesn't specified - do both
            self.options.extract = True
            self.options.load = True

        if self.options.load and (not self.options.project
                                  or not self.options.nbhd):
            allura_base.log.error('You must specify neighborhood and project '
                                  'to load data')
            exit(2)

        if self.options.extract:
            if self.options.source == 'mysql':
                self.extractor = MySQLExtractor(self.options)
            elif self.options.source in ('sqlite', 'postgres', 'sql-dump'):
                allura_base.log.error('This source not implemented yet.'
                                      'Only mysql for now')
                exit(2)
            else:
                allura_base.log.error('You must specify valid data source')
                exit(2)

            if not self.options.attachments_dir:
                allura_base.log.error('You must specify path to directory '
                                      'with mediawiki attachmets dump.')
                exit(2)
示例#2
0
    def setUp(self):
        setup_basic_test()
        self.options = mock.Mock()
        self.options.dump_dir = '/tmp/w2m_test'

        # monkey-patch MySQLExtractor for test
        def pages(self):
            yield {'page_id': 1, 'title': 'Test title'}
            yield {'page_id': 2, 'title': 'Main_Page'}
            yield {'page_id': 3, 'title': 'Test'}

        def history(self, page_id):
            data = {
                1: [
                    {'timestamp': 1, 'text': "Test", 'username': '******'},
                    {'timestamp': 2, 'text': "Test Text", 'username': '******'}
                ],
                2: [
                    {'timestamp': 1, 'text': "Main_Page", 'username': '******'},
                    {'timestamp': 2, 'text': "Main_Page text", 'username': '******'}
                ],
                3: [
                    {'timestamp': 1, 'text': "Some test text", 'username': ''},
                    {'timestamp': 2, 'text': "", 'username': ''}
                ]
            }
            revisions = data[page_id]
            for rev in revisions:
                yield rev

        def talk(self, page_title):
            return {
                'text': 'Talk for page %s.' % page_title,
                'timestamp': 1,
                'username': '******'
            }

        def attachments(self, *args, **kwargs):
            # make 'empty' iterator
            if False:
                yield

        MySQLExtractor._pages = pages
        MySQLExtractor._history = history
        MySQLExtractor._talk = talk
        MySQLExtractor._attachments = attachments
        self.extractor = MySQLExtractor(self.options)
示例#3
0
class TestMySQLExtractor(object):

    def setUp(self):
        setup_basic_test()
        self.options = mock.Mock()
        self.options.dump_dir = '/tmp/w2m_test'

        # monkey-patch MySQLExtractor for test
        def pages(self):
            yield {'page_id': 1, 'title': 'Test title'}
            yield {'page_id': 2, 'title': 'Main_Page'}
            yield {'page_id': 3, 'title': 'Test'}

        def history(self, page_id):
            data = {
                1: [
                    {'timestamp': 1, 'text': "Test", 'username': '******'},
                    {'timestamp': 2, 'text': "Test Text", 'username': '******'}
                ],
                2: [
                    {'timestamp': 1, 'text': "Main_Page", 'username': '******'},
                    {'timestamp': 2, 'text': "Main_Page text", 'username': '******'}
                ],
                3: [
                    {'timestamp': 1, 'text': "Some test text", 'username': ''},
                    {'timestamp': 2, 'text': "", 'username': ''}
                ]
            }
            revisions = data[page_id]
            for rev in revisions:
                yield rev

        def talk(self, page_title):
            return {
                'text': 'Talk for page %s.' % page_title,
                'timestamp': 1,
                'username': '******'
            }

        def attachments(self, *args, **kwargs):
            # make 'empty' iterator
            if False:
                yield

        MySQLExtractor._pages = pages
        MySQLExtractor._history = history
        MySQLExtractor._talk = talk
        MySQLExtractor._attachments = attachments
        self.extractor = MySQLExtractor(self.options)

    def test_extract_pages(self):
        """Test that pages and edit history extracted properly"""
        self.extractor.extract_pages()

        # rev 1 of page 1
        with open('/tmp/w2m_test/pages/1/history/1.json', 'r') as f:
            page = json.load(f)
        res_page = {
            'timestamp': 1,
            'text': 'Test',
            'page_id': 1,
            'title': 'Test title',
            'username': '******'
        }
        assert page == res_page

        # rev 2 of page 1
        with open('/tmp/w2m_test/pages/1/history/2.json', 'r') as f:
            page = json.load(f)
        res_page = {
            'timestamp': 2,
            'text': 'Test Text',
            'page_id': 1,
            'title': 'Test title',
            'username': '******'
        }
        assert page == res_page

        # rev 1 of page 2
        with open('/tmp/w2m_test/pages/2/history/1.json', 'r') as f:
            page = json.load(f)
        res_page = {
            'timestamp': 1,
            'text': 'Main_Page',
            'page_id': 2,
            'title': 'Main_Page',
            'username': '******'
        }
        assert page == res_page

        # rev 2 of page 2
        with open('/tmp/w2m_test/pages/2/history/2.json', 'r') as f:
            page = json.load(f)
        res_page = {
            'timestamp': 2,
            'text': 'Main_Page text',
            'page_id': 2,
            'title': 'Main_Page',
            'username': '******'
        }
        assert page == res_page

        # rev 1 of page 3
        with open('/tmp/w2m_test/pages/3/history/1.json', 'r') as f:
            page = json.load(f)
        res_page = {
            'timestamp': 1,
            'text': 'Some test text',
            'page_id': 3,
            'title': 'Test',
            'username': ''
        }
        assert page == res_page

        # rev 2 of page 3
        with open('/tmp/w2m_test/pages/3/history/2.json', 'r') as f:
            page = json.load(f)
        res_page = {
            'timestamp': 2,
            'text': '',
            'page_id': 3,
            'title': 'Test',
            'username': ''
        }
        assert page == res_page

    def test_extract_talk(self):
        """Test that talk pages extracted properly."""
        pages = [
            {'page_id': 1, 'title': 'Test 1'},
            {'page_id': 2, 'title': 'Test 2'},
            {'page_id': 3, 'title': 'Test 3'},
        ]
        for page in pages:
            self.extractor.extract_talk(page)

        with open('/tmp/w2m_test/pages/1/discussion.json', 'r') as f:
            page = json.load(f)
        assert page == {
                        'text': 'Talk for page Test 1.',
                        'username': '******',
                        'timestamp': 1}

        with open('/tmp/w2m_test/pages/2/discussion.json', 'r') as f:
            page = json.load(f)
        assert page == {
                        'text': 'Talk for page Test 2.',
                        'timestamp': 1,
                        'username': '******'}

        with open('/tmp/w2m_test/pages/3/discussion.json', 'r') as f:
            page = json.load(f)
        assert page == {
                        'text': 'Talk for page Test 3.',
                        'timestamp': 1,
                        'username': '******'}