class UtilsJournalMigratedPageTestCase(BaseTestCase): def setUp(self): original_website = 'http://www.scielo.br' revistas_path = REVISTAS_PATH img_revistas_path = IMG_REVISTAS_PATH static_files_path = HTDOCS self.migration = PageMigration(original_website, revistas_path, img_revistas_path, static_files_path) self.page = MigratedPage(self.migration, '', acron='acron', lang='es') def test_content(self): self.page.content = """<html><body><a href="acron.jpg"/> <a href="www.scielo.br/jxabc.png"/> <a href="*****@*****.**"/> <a href="www.site.org"/> <a href="xyz.txt"/> </body></html>""" self.assertIn('/revistas/acron/acron.jpg', self.page.content) self.assertIn('"*****@*****.**"', self.page.content) self.assertIn('"www.site.org"', self.page.content) self.assertIn('"xyz.txt"', self.page.content) self.assertIn('"www.scielo.br/jxabc.png"', self.page.content) self.page.fix_urls() self.assertIn('"/jxabc.png"', self.page.content)
def migrate_page_content(content, language, acron=None, page_name=None): """ Função que migra o conteúdo de qualquer página HTML Retorna o novo conteúdo Parâmetro content: conteúdo em HTML da página Parâmetro acron: acrônimo do periódico se a página for de periódico Parâmetro page_name: título da página se não é de periódico Parâmetro language: idioma do texto da página """ if content: if not acron and not page_name: raise IOError('migrate_page_content() requer acron ou page_name') pages_source_path = current_app.config['JOURNAL_PAGES_SOURCE_PATH'] images_source_path = current_app.config['JOURNAL_IMAGES_SOURCE_PATH'] original_website = current_app.config['JOURNAL_PAGES_ORIGINAL_WEBSITE'] migration = PageMigration( original_website, pages_source_path, images_source_path) page = MigratedPage( migration, content, acron=acron, page_name=page_name, lang=language) page.migrate_urls(migrate_page_create_file, migrate_page_create_image) return page.content
class UtilsMigratedABMVZJournalPageTestCase(BaseTestCase): def create_item(self, source, dest, check_if_exists=False): return '' def setUp(self): original_website = 'http://www.scielo.br' self.revistas_path = TESTS_REVISTAS_PATH self.img_revistas_path = TESTS_IMG_REVISTAS_PATH self.static_files_path = None create_image = self.create_item create_file = self.create_item self.migration = PageMigration(original_website, self.revistas_path, self.img_revistas_path, self.static_files_path) self.page = MigratedPage(self.migration, '', acron='abmvz', lang='es') @patch('requests.get') @patch.object(wutils, 'migrate_page_create_file') def test_create_files_from_downloaded_files( self, mocked_create_file_function, mocked_requests_get, ): mocked_response = Mock() mocked_response.status_code = 200 mocked_response.content = b'content' mocked_requests_get.return_value = mocked_response pdf_file_path = ( 'PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf' ) self.page.content = '<a href="{}"/>'.format(pdf_file_path) self.assertIn('/revistas/abmvz/{}'.format(pdf_file_path), self.page.content) files = list(self.page.files) result = self.migration.get_possible_locations(files[0]['href']) self.assertIn('{}/abmvz/{}'.format(TESTS_REVISTAS_PATH, pdf_file_path), result) mocked_create_file_function.side_effect = [ '/media/files/abmvz_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf', ] _file_info = self.page.get_file_info(files[0]['href']) file_info = ( '/tmp/tmpcjnmoyos/PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf', 'abmvz_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf', True) self.assertEqual(file_info[1], _file_info[1]) self.assertEqual(file_info[2], _file_info[2]) self.page.create_files(mocked_create_file_function) results = [item['href'] for item in self.page.files] expected_items = [ '/media/files/abmvz_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf', ] expected = pdf_file_path for result, expected in zip(results, expected_items): self.assertEqual(result, expected)
def setUp(self): original_website = 'http://www.scielo.br' revistas_path = REVISTAS_PATH img_revistas_path = IMG_REVISTAS_PATH static_files_path = HTDOCS self.migration = PageMigration(original_website, revistas_path, img_revistas_path, static_files_path) self.page = MigratedPage(self.migration, '', acron='acron', lang='es')
def setUp(self): original_website = 'http://www.scielo.br' self.revistas_path = TESTS_REVISTAS_PATH self.img_revistas_path = TESTS_IMG_REVISTAS_PATH self.static_files_path = None self.migration = PageMigration(original_website, self.revistas_path, self.img_revistas_path, self.static_files_path) self.page = MigratedPage(self.migration, '', acron='aa', lang='es')
class UtilsMigratedJournalPageTestCase(BaseTestCase): def setUp(self): original_website = 'http://www.scielo.br' self.revistas_path = TESTS_REVISTAS_PATH self.img_revistas_path = TESTS_IMG_REVISTAS_PATH self.static_files_path = None self.migration = PageMigration(original_website, self.revistas_path, self.img_revistas_path, self.static_files_path) self.page = MigratedPage(self.migration, '', acron='aa', lang='es') @patch.object(wutils, 'migrate_page_create_file') def test_create_files(self, mocked_create_file_function): pdf_file_path = 'PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf' self.page.content = '<a href="{}"/>'.format(pdf_file_path) self.assertIn('/revistas/aa/{}'.format(pdf_file_path), self.page.content) for a in self.page.files: result = self.migration.get_possible_locations(a['href']) self.assertIn( '{}/aa/{}'.format(TESTS_REVISTAS_PATH, pdf_file_path), result) mocked_create_file_function.side_effect = [ '/media/files/aa_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf', ] _file_info = self.page.get_file_info(list(self.page.files)[0]['href']) file_info = ( 'opac/tests/fixtures/pages/revistas/aa/PASSO A PASSO – SISTEMA DE SUBMISSÃO DE ARTIGOS POR INTERMÉDIO DO SCHOLARONE.pdf', 'aa_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf', False) self.assertEqual(file_info, _file_info) self.page.create_files(mocked_create_file_function) results = [item['href'] for item in self.page.files] expected_items = [ '/media/files/aa_passo-a-passo-sistema-de-submissao-de-artigos-por-intermedio-do-scholarone.pdf', ] expected = pdf_file_path for result, expected in zip(results, expected_items): self.assertEqual(result, expected)
class UtilsMigratedPageTestCase(BaseTestCase): def setUp(self): original_website = 'http://www.scielo.br' revistas_path = REVISTAS_PATH img_revistas_path = IMG_REVISTAS_PATH static_files_path = HTDOCS self.migration = PageMigration(original_website, revistas_path, img_revistas_path, static_files_path) self.page = MigratedPage(self.migration, '', acron='abc', page_name='criterio', lang='es') def test_content(self): self.page.content = '<html><body>x</body></html>' self.assertEqual(self.page.content, 'x') def test_find_old_website_uri_items(self): self.page.content = '''<img src="http://www.scielo.br"/> <img src="http://www.scielo.br/abc"/> <img src="/img/revistas/abc.jpg"/> <img src="http://www.scielo.br/abc/iaboutj.htm"/> <img src="http://scielo.br/img/revistas"/>''' result = list(self.page.find_old_website_uri_items('img', 'src')) self.assertEqual(result[0]['src'], 'http://www.scielo.br') self.assertEqual(result[1]['src'], 'http://www.scielo.br/abc') self.assertEqual(result[2]['src'], '/img/revistas/abc.jpg') self.assertEqual(result[3]['src'], 'http://www.scielo.br/abc/iaboutj.htm') self.assertEqual(len(result), 4) def test_fix_urls(self): self.page.content = ''' <img src="/img/revistas/img1.jpg"/> <img src="http://www.scielo.br/abc/img2.jpg"/> <img src="/revistas/img3.jpg"/> <img src="http://www.scielo.org/local/Image/scielo20_pt.png"/>''' self.page.fix_urls() results = [img['src'] for img in self.page.images] expected_items = [ '/img/revistas/img1.jpg', '/abc/img2.jpg', '/revistas/img3.jpg', 'http://www.scielo.org/local/Image/scielo20_pt.png' ] for result, expected in zip(results, expected_items): self.assertEqual(result, expected) self.assertEqual(results, expected_items) def test_fix_urls_2(self): self.page.content = u''' <a href="/journal/abmvz/"/> ''' self.page.fix_urls() results = [item['href'] for item in self.page.files] expected_items = [] for result, expected in zip(results, expected_items): self.assertEqual(result, expected) self.assertEqual(results, expected_items) def test_fix_urls_files(self): self.page.content = ''' <a href="/img/revistas/img1.jpg"/> <a href="http://www.scielo.br/abc/img2.jpg"/> <a href="/revistas/img3.jpg"/> <a href="http://www.scielo.org/local/Image/scielo20_pt.png"/>''' self.page.fix_urls() results = [item['href'] for item in self.page.files] expected_items = [ '/img/revistas/img1.jpg', '/abc/img2.jpg', '/revistas/img3.jpg', 'http://www.scielo.org/local/Image/scielo20_pt.png' ] for result, expected in zip(results, expected_items): self.assertEqual(result, expected) self.assertEqual(results, expected_items) def test_get_prefixed_slug_name(self): expected = 'criterio_es_criterio-brasil.jpg' ret = self.page.get_prefixed_slug_name('/abc/abc/Critério_Brasil.jpg') self.assertEqual(ret, expected) @patch.object(os.path, 'isfile', return_value=True) @patch.object(page_migration, 'confirm_file_location', return_value=True) def test_get_file_info_img1(self, mocked_confirm_file_location, mocked_isfile): self.page.prefixes = ['criterios', 'es'] result = self.page.get_file_info('/img/revistas/img1.jpg') img_location = os.path.join(IMG_REVISTAS_PATH, 'img1.jpg') img_dest_name = 'criterios_es_img1.jpg' self.assertEqual(result, (img_location, img_dest_name, False)) @patch.object(os.path, 'isfile', return_value=True) @patch.object(page_migration, 'confirm_file_location', return_value=True) def test_get_file_info_img2(self, mocked_confirm_file_location, mocked_isfile): self.page.prefixes = ['criterios', 'es'] result = self.page.get_file_info('/abc/img2.jpg') img_location = os.path.join(HTDOCS, 'abc/img2.jpg') img_dest_name = 'criterios_es_img2.jpg' self.assertEqual(result, (img_location, img_dest_name, False)) @patch.object(os.path, 'isfile', return_value=True) @patch.object(page_migration, 'confirm_file_location', return_value=True) @patch.object(wutils, 'migrate_page_create_image') def test_create_images_from_local_files(self, mocked_create_image_function, mocked_confirm_file_location, mocked_isfile): self.page.content = ''' <img src="/img/revistas/img1.jpg"/> <img src="http://www.scielo.br/abc/img2.jpg"/> <img src="/revistas/img3.jpg"/> <img src="http://www.scielo.org/local/Image/scielo20_pt.png"/>''' mocked_confirm_file_location.side_effect = [True, True, True] mocked_create_image_function.side_effect = [ '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg', '/media/criterios_es_img3.jpg', ] self.page.fix_urls() self.page.create_images(mocked_create_image_function) results = [img['src'] for img in self.page.images] expected_items = [ '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg', '/media/criterios_es_img3.jpg', 'http://www.scielo.org/local/Image/scielo20_pt.png' ] for result, expected in zip(results, expected_items): self.assertEqual(result, expected) self.assertEqual(results, expected_items) @patch.object(os.path, 'isfile', return_value=True) @patch.object(page_migration, 'confirm_file_location') @patch.object(wutils, 'migrate_page_create_file') def test_create_files_from_local_file(self, mocked_create_file_function, mocked_confirm_file_location, mocked_isfile): self.page.content = ''' <a href="/img/revistas/img1.jpg"/> ''' mocked_confirm_file_location.side_effect = [ True, ] mocked_create_file_function.side_effect = [ '/media/criterios_es_img1.jpg', ] files = list(self.page.files) self.assertEqual(files[0]['href'], '/img/revistas/img1.jpg') file_locations = self.page.migration.get_possible_locations( '/img/revistas/img1.jpg') self.assertEqual(file_locations, ['{}/{}'.format(IMG_REVISTAS_PATH, 'img1.jpg')]) self.page.create_files(mocked_create_file_function) results = [item['href'] for item in self.page.files] expected_items = [ '/media/criterios_es_img1.jpg', ] for result, expected in zip(results, expected_items): self.assertEqual(result, expected) self.assertEqual(results, expected_items) @patch.object(os.path, 'isfile', return_value=True) @patch.object(page_migration, 'confirm_file_location', return_value=True) @patch.object(wutils, 'migrate_page_create_file') def test_create_files_from_local_files(self, mocked_create_file_function, mocked_confirm_file_location, mocked_isfile): self.page.content = ''' <a href="/img/revistas/img1.jpg"/> <a href="http://www.scielo.br/abc/img2.jpg"/> <a href="/revistas/img3.jpg"/> <a href="http://www.scielo.org/local/Image/scielo20_pt.png"/>''' mocked_create_file_function.side_effect = [ '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg', '/media/criterios_es_img3.jpg', ] self.page.fix_urls() self.page.create_files(mocked_create_file_function) results = [item['href'] for item in self.page.files] expected_items = [ '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg', '/media/criterios_es_img3.jpg', 'http://www.scielo.org/local/Image/scielo20_pt.png' ] self.assertEqual(results, expected_items) for result, expected in zip(results, expected_items): self.assertEqual(result, expected) @patch('webapp.utils.page_migration.downloaded_file') @patch('webapp.utils.page_migration.confirm_file_location') @patch.object(wutils, 'migrate_page_create_image') def test_create_images_from_downloaded_files(self, mocked_create_item, mocked_confirm_file_location, mocked_downloaded_file): self.page.content = ''' <img src="/img/revistas/img1.jpg"/> <img src="http://www.scielo.br/abc/img2.jpg"/> <img src="/revistas/img3.jpg"/> <img src="http://www.scielo.org/local/Image/scielo20_pt.png"/>''' mocked_create_item.side_effect = [ '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg', '/media/criterios_es_img3.jpg', ] mocked_downloaded_file.side_effect = [ '/tmp/img1.jpg', '/tmp/img2.jpg', '/tmp/img3.jpg', ] mocked_confirm_file_location.side_effect = [ False, True, False, True, False, True ] self.page.fix_urls() self.page.create_images(mocked_create_item) results = [img['src'] for img in self.page.images] expected_items = [ '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg', '/media/criterios_es_img3.jpg', 'http://www.scielo.org/local/Image/scielo20_pt.png' ] for result, expected in zip(results, expected_items): self.assertEqual(result, expected) self.assertEqual(results, expected_items) @patch.object(page_migration, 'downloaded_file') @patch.object(page_migration, 'confirm_file_location') @patch.object(wutils, 'migrate_page_create_file') def test_create_files_from_downloaded_files(self, mocked_create_file_function, mocked_confirm_file_location, mocked_downloaded_file): self.page.content = ''' <a href="/img/revistas/img1.jpg"/> <a href="http://www.scielo.br/abc/img2.jpg"/> <a href="/revistas/img3.jpg"/> <a href="http://www.scielo.org/local/Image/scielo20_pt.png"/>''' mocked_create_file_function.side_effect = [ '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg', '/media/criterios_es_img3.jpg', ] mocked_downloaded_file.side_effect = [ '/tmp/img1.jpg', '/tmp/img2.jpg', '/tmp/img3.jpg', ] mocked_confirm_file_location.side_effect = [ False, True, False, True, False, True ] self.page.fix_urls() self.page.create_files(mocked_create_file_function) results = [item['href'] for item in self.page.files] expected_items = [ '/media/criterios_es_img1.jpg', '/media/criterios_es_img2.jpg', '/media/criterios_es_img3.jpg', 'http://www.scielo.org/local/Image/scielo20_pt.png' ] for result, expected in zip(results, expected_items): self.assertEqual(result, expected) self.assertEqual(results, expected_items) @patch.object(page_migration, 'downloaded_file', side_effect=None) @patch.object(page_migration, 'confirm_file_location', side_effect=[False, False]) @patch.object(wutils, 'migrate_page_create_file') @patch.object(page_migration, 'logging') def test_create_files_failure(self, mock_logger, mocked_create_file_function, mocked_confirm_file_location, mocked_downloaded_file): self.page.content = '''<a href="/img/revistas/img1.jpg"/>''' self.page.fix_urls() self.page.create_files(mocked_create_file_function) mock_logger.info.assert_called_with( "CONFERIR: /img/revistas/img1.jpg não encontrado")