Exemplo n.º 1
0
def article_html_generator(file_xml_path: str, dest_path: str) -> None:

    logger.debug("file: %s", file_xml_path)

    parsed_xml = XML(file_xml_path, no_network=False)
    html_generator = HTMLGenerator.parse(
        parsed_xml,
        valid_only=False,
        css="https://new.scielo.br/static/css/scielo-article.css",
        print_css="https://new.scielo.br/static/css/scielo-bundle-print.css",
        js="https://new.scielo.br/static/js/scielo-article-min.js",
    )

    for lang, trans_result in html_generator:
        fpath, fname = os.path.split(file_xml_path)
        fname, fext = fname.rsplit(".", 1)
        out_fname = ".".join([fname, lang, "html"])

        new_file_html_path = os.path.join(dest_path, out_fname)

        files.write_file(
            new_file_html_path,
            etree.tostring(
                trans_result,
                doctype=u"<!DOCTYPE html>",
                pretty_print=True,
                encoding="utf-8",
                method="html",
            ).decode("utf-8"),
        )
Exemplo n.º 2
0
 def html_languages(self):
     """
     Extract all the HTML languages in XML content.
     """
     try:
         generator = HTMLGenerator.parse(self._content, valid_only=False)
     except ValueError as e:
         logger.error('Error getting htmlgenerator: {}.'.format(e.message))
     else:
         return [
             {'type': 'html', 'lang': lang}
             for lang, __ in generator
         ]
Exemplo n.º 3
0
def render_html_from_xml(article, lang):
    result = fetch_data(normalize_ssm_url(article.xml))

    xml = etree.parse(BytesIO(result))

    generator = HTMLGenerator.parse(xml, valid_only=False)

    # Criamos um objeto do tip soup
    soup = BeautifulSoup(
        etree.tostring(generator.generate(lang),
                       encoding="UTF-8",
                       method="html"), 'html.parser')

    # Fatiamos o HTML pelo div com class: articleTxt
    return soup.find('div', {'id': 'standalonearticle'}), generator.languages
Exemplo n.º 4
0
    def register_htmls(self):
        """
        Register HTML contents from XML for all the text languages.
        """
        try:
            generator = HTMLGenerator.parse(
                self._content,
                valid_only=False,
                css=config.OPAC_PROC_ARTICLE_CSS_URL,
                print_css=config.OPAC_PROC_ARTICLE_PRINT_CSS_URL,
                js=config.OPAC_PROC_ARTICLE_JS_URL)
        except ValueError as e:
            logger.error('Error getting htmlgenerator: {}.'.format(e.message))
            return None

        registered_htmls = []
        for lang, trans_result in generator:
            html_as_bytes = None
            try:
                html = etree.tostring(trans_result,
                                      pretty_print=True,
                                      encoding='utf-8',
                                      method='html',
                                      doctype="<!DOCTYPE html>")
                html_as_bytes = BytesIO(html)
            except Exception as e:
                logger.error(
                    'Error converting etree {} to string. '.format(lang))
            else:
                metadata = self.get_metadata()
                metadata.update({
                    'bucket_name': self.bucket_name,
                    'type': 'html',
                    'version': 'xml'
                })
                __, html_url = self._register_ssm_asset(
                    html_as_bytes, self._get_file_name('html', lang), 'html',
                    metadata)
                registered_htmls.append({
                    'type': 'html',
                    'lang': lang,
                    'url': html_url
                })

        return registered_htmls
Exemplo n.º 5
0
def render_html_converted_view(request):

    file_xml_path = os.path.join(config.get("CONVERSION_PATH"),
                                 request.matchdict["file_xml"])

    parsed_xml = XML(file_xml_path, no_network=False)
    html_generator = HTMLGenerator.parse(
        parsed_xml,
        valid_only=False,
        css="/static/css/scielo-article.css",
        print_css="/static/css/scielo-bundle-print.css",
        js="/static/js/scielo-article-min.js",
    )

    html = html_generator.generate(request.matchdict["language"])

    return Response(
        etree.tostring(
            html,
            doctype=u"<!DOCTYPE html>",
            pretty_print=True,
            encoding="utf-8",
            method="html",
        ).decode("utf-8"))