Пример #1
0
class UtilsJournalMigratedPageTestCase(BaseTestCase):
    def setUp(self):
        original_website = 'http://www.scielo.br'
        revistas_path = REVISTAS_PATH
        img_revistas_path = IMG_REVISTAS_PATH
        static_files_path = HTDOCS
        self.migration = PageMigration(original_website, revistas_path,
                                       img_revistas_path, static_files_path)
        self.page = MigratedPage(self.migration, '', acron='acron', lang='es')

    def test_content(self):
        self.page.content = """<html><body><a href="acron.jpg"/>
            <a href="www.scielo.br/jxabc.png"/>
            <a href="*****@*****.**"/>
            <a href="www.site.org"/>
            <a href="xyz.txt"/>
            </body></html>"""
        self.assertIn('/revistas/acron/acron.jpg', self.page.content)
        self.assertIn('"*****@*****.**"', self.page.content)
        self.assertIn('"www.site.org"', self.page.content)
        self.assertIn('"xyz.txt"', self.page.content)
        self.assertIn('"www.scielo.br/jxabc.png"', self.page.content)

        self.page.fix_urls()
        self.assertIn('"/jxabc.png"', self.page.content)
Пример #2
0
def migrate_page_content(content, language, acron=None, page_name=None):
    """
    Função que migra o conteúdo de qualquer página HTML
    Retorna o novo conteúdo
    Parâmetro content: conteúdo em HTML da página
    Parâmetro acron: acrônimo do periódico se a página for de periódico
    Parâmetro page_name: título da página se não é de periódico
    Parâmetro language: idioma do texto da página
    """
    if content:
        if not acron and not page_name:
            raise IOError('migrate_page_content() requer acron ou page_name')

        pages_source_path = current_app.config['JOURNAL_PAGES_SOURCE_PATH']
        images_source_path = current_app.config['JOURNAL_IMAGES_SOURCE_PATH']
        original_website = current_app.config['JOURNAL_PAGES_ORIGINAL_WEBSITE']

        migration = PageMigration(
            original_website, pages_source_path, images_source_path)

        page = MigratedPage(
            migration, content,
            acron=acron, page_name=page_name, lang=language)
        page.migrate_urls(migrate_page_create_file, migrate_page_create_image)
        return page.content
Пример #3
0
class UtilsMigratedABMVZJournalPageTestCase(BaseTestCase):
    def create_item(self, source, dest, check_if_exists=False):
        return ''

    def setUp(self):
        original_website = 'http://www.scielo.br'
        self.revistas_path = TESTS_REVISTAS_PATH
        self.img_revistas_path = TESTS_IMG_REVISTAS_PATH
        self.static_files_path = None
        create_image = self.create_item
        create_file = self.create_item
        self.migration = PageMigration(original_website, self.revistas_path,
                                       self.img_revistas_path,
                                       self.static_files_path)
        self.page = MigratedPage(self.migration, '', acron='abmvz', lang='es')

    @patch('requests.get')
    @patch.object(wutils, 'migrate_page_create_file')
    def test_create_files_from_downloaded_files(
        self,
        mocked_create_file_function,
        mocked_requests_get,
    ):
        mocked_response = Mock()
        mocked_response.status_code = 200
        mocked_response.content = b'content'
        mocked_requests_get.return_value = mocked_response

        pdf_file_path = (
            'PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf'
        )
        self.page.content = '<a href="{}"/>'.format(pdf_file_path)
        self.assertIn('/revistas/abmvz/{}'.format(pdf_file_path),
                      self.page.content)

        files = list(self.page.files)
        result = self.migration.get_possible_locations(files[0]['href'])
        self.assertIn('{}/abmvz/{}'.format(TESTS_REVISTAS_PATH, pdf_file_path),
                      result)

        mocked_create_file_function.side_effect = [
            '/media/files/abmvz_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
        ]
        _file_info = self.page.get_file_info(files[0]['href'])
        file_info = (
            '/tmp/tmpcjnmoyos/PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf',
            'abmvz_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
            True)
        self.assertEqual(file_info[1], _file_info[1])
        self.assertEqual(file_info[2], _file_info[2])

        self.page.create_files(mocked_create_file_function)
        results = [item['href'] for item in self.page.files]
        expected_items = [
            '/media/files/abmvz_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
        ]
        expected = pdf_file_path
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
Пример #4
0
 def setUp(self):
     original_website = 'http://www.scielo.br'
     revistas_path = REVISTAS_PATH
     img_revistas_path = IMG_REVISTAS_PATH
     static_files_path = HTDOCS
     self.migration = PageMigration(original_website, revistas_path,
                                    img_revistas_path, static_files_path)
     self.page = MigratedPage(self.migration, '', acron='acron', lang='es')
Пример #5
0
 def setUp(self):
     original_website = 'http://www.scielo.br'
     self.revistas_path = TESTS_REVISTAS_PATH
     self.img_revistas_path = TESTS_IMG_REVISTAS_PATH
     self.static_files_path = None
     self.migration = PageMigration(original_website, self.revistas_path,
                                    self.img_revistas_path,
                                    self.static_files_path)
     self.page = MigratedPage(self.migration, '', acron='aa', lang='es')
Пример #6
0
class UtilsMigratedJournalPageTestCase(BaseTestCase):
    def setUp(self):
        original_website = 'http://www.scielo.br'
        self.revistas_path = TESTS_REVISTAS_PATH
        self.img_revistas_path = TESTS_IMG_REVISTAS_PATH
        self.static_files_path = None
        self.migration = PageMigration(original_website, self.revistas_path,
                                       self.img_revistas_path,
                                       self.static_files_path)
        self.page = MigratedPage(self.migration, '', acron='aa', lang='es')

    @patch.object(wutils, 'migrate_page_create_file')
    def test_create_files(self, mocked_create_file_function):
        pdf_file_path = 'PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf'
        self.page.content = '<a href="{}"/>'.format(pdf_file_path)
        self.assertIn('/revistas/aa/{}'.format(pdf_file_path),
                      self.page.content)

        for a in self.page.files:
            result = self.migration.get_possible_locations(a['href'])
            self.assertIn(
                '{}/aa/{}'.format(TESTS_REVISTAS_PATH, pdf_file_path), result)

        mocked_create_file_function.side_effect = [
            '/media/files/aa_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
        ]
        _file_info = self.page.get_file_info(list(self.page.files)[0]['href'])

        file_info = (
            'opac/tests/fixtures/pages/revistas/aa/PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf',
            'aa_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
            False)
        self.assertEqual(file_info, _file_info)
        self.page.create_files(mocked_create_file_function)
        results = [item['href'] for item in self.page.files]
        expected_items = [
            '/media/files/aa_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf',
        ]
        expected = pdf_file_path
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
Пример #7
0
class UtilsMigratedPageTestCase(BaseTestCase):
    def setUp(self):
        original_website = 'http://www.scielo.br'
        revistas_path = REVISTAS_PATH
        img_revistas_path = IMG_REVISTAS_PATH
        static_files_path = HTDOCS
        self.migration = PageMigration(original_website, revistas_path,
                                       img_revistas_path, static_files_path)
        self.page = MigratedPage(self.migration,
                                 '',
                                 acron='abc',
                                 page_name='criterio',
                                 lang='es')

    def test_content(self):
        self.page.content = '<html><body>x</body></html>'
        self.assertEqual(self.page.content, 'x')

    def test_find_old_website_uri_items(self):
        self.page.content = '''<img src="http://www.scielo.br"/>
                            <img src="http://www.scielo.br/abc"/>
                            <img src="/img/revistas/abc.jpg"/>
                            <img src="http://www.scielo.br/abc/iaboutj.htm"/>
                            <img src="http://scielo.br/img/revistas"/>'''

        result = list(self.page.find_old_website_uri_items('img', 'src'))
        self.assertEqual(result[0]['src'], 'http://www.scielo.br')
        self.assertEqual(result[1]['src'], 'http://www.scielo.br/abc')
        self.assertEqual(result[2]['src'], '/img/revistas/abc.jpg')
        self.assertEqual(result[3]['src'],
                         'http://www.scielo.br/abc/iaboutj.htm')
        self.assertEqual(len(result), 4)

    def test_fix_urls(self):
        self.page.content = '''
            <img src="/img/revistas/img1.jpg"/>
            <img src="http://www.scielo.br/abc/img2.jpg"/>
            <img src="/revistas/img3.jpg"/>
            <img src="http://www.scielo.org/local/Image/scielo20_pt.png"/>'''

        self.page.fix_urls()

        results = [img['src'] for img in self.page.images]
        expected_items = [
            '/img/revistas/img1.jpg', '/abc/img2.jpg', '/revistas/img3.jpg',
            'http://www.scielo.org/local/Image/scielo20_pt.png'
        ]
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
        self.assertEqual(results, expected_items)

    def test_fix_urls_2(self):
        self.page.content = u'''
            <a href="/journal/abmvz/"/>
        '''
        self.page.fix_urls()

        results = [item['href'] for item in self.page.files]
        expected_items = []
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
        self.assertEqual(results, expected_items)

    def test_fix_urls_files(self):
        self.page.content = '''
            <a href="/img/revistas/img1.jpg"/>
            <a href="http://www.scielo.br/abc/img2.jpg"/>
            <a href="/revistas/img3.jpg"/>
            <a href="http://www.scielo.org/local/Image/scielo20_pt.png"/>'''
        self.page.fix_urls()
        results = [item['href'] for item in self.page.files]
        expected_items = [
            '/img/revistas/img1.jpg', '/abc/img2.jpg', '/revistas/img3.jpg',
            'http://www.scielo.org/local/Image/scielo20_pt.png'
        ]
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
        self.assertEqual(results, expected_items)

    def test_get_prefixed_slug_name(self):
        expected = 'criterio_es_criterio-brasil.jpg'
        ret = self.page.get_prefixed_slug_name('/abc/abc/Critério_Brasil.jpg')
        self.assertEqual(ret, expected)

    @patch.object(os.path, 'isfile', return_value=True)
    @patch.object(page_migration, 'confirm_file_location', return_value=True)
    def test_get_file_info_img1(self, mocked_confirm_file_location,
                                mocked_isfile):
        self.page.prefixes = ['criterios', 'es']
        result = self.page.get_file_info('/img/revistas/img1.jpg')
        img_location = os.path.join(IMG_REVISTAS_PATH, 'img1.jpg')
        img_dest_name = 'criterios_es_img1.jpg'
        self.assertEqual(result, (img_location, img_dest_name, False))

    @patch.object(os.path, 'isfile', return_value=True)
    @patch.object(page_migration, 'confirm_file_location', return_value=True)
    def test_get_file_info_img2(self, mocked_confirm_file_location,
                                mocked_isfile):
        self.page.prefixes = ['criterios', 'es']
        result = self.page.get_file_info('/abc/img2.jpg')
        img_location = os.path.join(HTDOCS, 'abc/img2.jpg')
        img_dest_name = 'criterios_es_img2.jpg'
        self.assertEqual(result, (img_location, img_dest_name, False))

    @patch.object(os.path, 'isfile', return_value=True)
    @patch.object(page_migration, 'confirm_file_location', return_value=True)
    @patch.object(wutils, 'migrate_page_create_image')
    def test_create_images_from_local_files(self, mocked_create_image_function,
                                            mocked_confirm_file_location,
                                            mocked_isfile):
        self.page.content = '''
            <img src="/img/revistas/img1.jpg"/>
            <img src="http://www.scielo.br/abc/img2.jpg"/>
            <img src="/revistas/img3.jpg"/>
            <img src="http://www.scielo.org/local/Image/scielo20_pt.png"/>'''

        mocked_confirm_file_location.side_effect = [True, True, True]
        mocked_create_image_function.side_effect = [
            '/media/criterios_es_img1.jpg',
            '/media/criterios_es_img2.jpg',
            '/media/criterios_es_img3.jpg',
        ]
        self.page.fix_urls()
        self.page.create_images(mocked_create_image_function)

        results = [img['src'] for img in self.page.images]
        expected_items = [
            '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg',
            '/media/criterios_es_img3.jpg',
            'http://www.scielo.org/local/Image/scielo20_pt.png'
        ]
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
        self.assertEqual(results, expected_items)

    @patch.object(os.path, 'isfile', return_value=True)
    @patch.object(page_migration, 'confirm_file_location')
    @patch.object(wutils, 'migrate_page_create_file')
    def test_create_files_from_local_file(self, mocked_create_file_function,
                                          mocked_confirm_file_location,
                                          mocked_isfile):
        self.page.content = '''
            <a href="/img/revistas/img1.jpg"/>
        '''
        mocked_confirm_file_location.side_effect = [
            True,
        ]
        mocked_create_file_function.side_effect = [
            '/media/criterios_es_img1.jpg',
        ]

        files = list(self.page.files)
        self.assertEqual(files[0]['href'], '/img/revistas/img1.jpg')
        file_locations = self.page.migration.get_possible_locations(
            '/img/revistas/img1.jpg')
        self.assertEqual(file_locations,
                         ['{}/{}'.format(IMG_REVISTAS_PATH, 'img1.jpg')])

        self.page.create_files(mocked_create_file_function)

        results = [item['href'] for item in self.page.files]
        expected_items = [
            '/media/criterios_es_img1.jpg',
        ]
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
        self.assertEqual(results, expected_items)

    @patch.object(os.path, 'isfile', return_value=True)
    @patch.object(page_migration, 'confirm_file_location', return_value=True)
    @patch.object(wutils, 'migrate_page_create_file')
    def test_create_files_from_local_files(self, mocked_create_file_function,
                                           mocked_confirm_file_location,
                                           mocked_isfile):
        self.page.content = '''
            <a href="/img/revistas/img1.jpg"/>
            <a href="http://www.scielo.br/abc/img2.jpg"/>
            <a href="/revistas/img3.jpg"/>
            <a href="http://www.scielo.org/local/Image/scielo20_pt.png"/>'''
        mocked_create_file_function.side_effect = [
            '/media/criterios_es_img1.jpg',
            '/media/criterios_es_img2.jpg',
            '/media/criterios_es_img3.jpg',
        ]
        self.page.fix_urls()
        self.page.create_files(mocked_create_file_function)

        results = [item['href'] for item in self.page.files]
        expected_items = [
            '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg',
            '/media/criterios_es_img3.jpg',
            'http://www.scielo.org/local/Image/scielo20_pt.png'
        ]
        self.assertEqual(results, expected_items)
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)

    @patch('webapp.utils.page_migration.downloaded_file')
    @patch('webapp.utils.page_migration.confirm_file_location')
    @patch.object(wutils, 'migrate_page_create_image')
    def test_create_images_from_downloaded_files(self, mocked_create_item,
                                                 mocked_confirm_file_location,
                                                 mocked_downloaded_file):
        self.page.content = '''
            <img src="/img/revistas/img1.jpg"/>
            <img src="http://www.scielo.br/abc/img2.jpg"/>
            <img src="/revistas/img3.jpg"/>
            <img src="http://www.scielo.org/local/Image/scielo20_pt.png"/>'''
        mocked_create_item.side_effect = [
            '/media/criterios_es_img1.jpg',
            '/media/criterios_es_img2.jpg',
            '/media/criterios_es_img3.jpg',
        ]
        mocked_downloaded_file.side_effect = [
            '/tmp/img1.jpg',
            '/tmp/img2.jpg',
            '/tmp/img3.jpg',
        ]
        mocked_confirm_file_location.side_effect = [
            False, True, False, True, False, True
        ]
        self.page.fix_urls()
        self.page.create_images(mocked_create_item)

        results = [img['src'] for img in self.page.images]
        expected_items = [
            '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg',
            '/media/criterios_es_img3.jpg',
            'http://www.scielo.org/local/Image/scielo20_pt.png'
        ]
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
        self.assertEqual(results, expected_items)

    @patch.object(page_migration, 'downloaded_file')
    @patch.object(page_migration, 'confirm_file_location')
    @patch.object(wutils, 'migrate_page_create_file')
    def test_create_files_from_downloaded_files(self,
                                                mocked_create_file_function,
                                                mocked_confirm_file_location,
                                                mocked_downloaded_file):
        self.page.content = '''
            <a href="/img/revistas/img1.jpg"/>
            <a href="http://www.scielo.br/abc/img2.jpg"/>
            <a href="/revistas/img3.jpg"/>
            <a href="http://www.scielo.org/local/Image/scielo20_pt.png"/>'''
        mocked_create_file_function.side_effect = [
            '/media/criterios_es_img1.jpg',
            '/media/criterios_es_img2.jpg',
            '/media/criterios_es_img3.jpg',
        ]
        mocked_downloaded_file.side_effect = [
            '/tmp/img1.jpg',
            '/tmp/img2.jpg',
            '/tmp/img3.jpg',
        ]
        mocked_confirm_file_location.side_effect = [
            False, True, False, True, False, True
        ]
        self.page.fix_urls()
        self.page.create_files(mocked_create_file_function)

        results = [item['href'] for item in self.page.files]
        expected_items = [
            '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg',
            '/media/criterios_es_img3.jpg',
            'http://www.scielo.org/local/Image/scielo20_pt.png'
        ]
        for result, expected in zip(results, expected_items):
            self.assertEqual(result, expected)
        self.assertEqual(results, expected_items)

    @patch.object(page_migration, 'downloaded_file', side_effect=None)
    @patch.object(page_migration,
                  'confirm_file_location',
                  side_effect=[False, False])
    @patch.object(wutils, 'migrate_page_create_file')
    @patch.object(page_migration, 'logging')
    def test_create_files_failure(self, mock_logger,
                                  mocked_create_file_function,
                                  mocked_confirm_file_location,
                                  mocked_downloaded_file):
        self.page.content = '''<a href="/img/revistas/img1.jpg"/>'''
        self.page.fix_urls()
        self.page.create_files(mocked_create_file_function)
        mock_logger.info.assert_called_with(
            "CONFERIR: /img/revistas/img1.jpg não encontrado")