Python html_to_text 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: xmodule.annotator_mixin

메소드/함수: html_to_text

hotexamples.com에서의 예제들: 10

Python html_to_text - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 xmodule.annotator_mixin.html_to_text에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def escape_html_characters(content):
    """
    Remove HTML characters that shouldn't be indexed using ElasticSearch indexer
    This method is complementary to html_to_text method found in xmodule/annotator_mixin.py

    Args:
        content (str): variable to escape html characters from

    Returns:
        content (str): content ready to be index by ElasticSearch

    """

    # Removing HTML comments
    return re.sub(
        r"<!--.*-->",
        "",
        # Removing HTML CDATA
        re.sub(
            r"<!\[CDATA\[.*\]\]>",
            "",
            # Removing HTML-encoded non-breaking space characters
            re.sub(
                r"(\s|&nbsp;|//)+",
                " ",
                html_to_text(content)
            )
        )
    )

예제 #2

파일 보기

def strip_html_content_to_text(html_content):
    """ Gets only the textual part for html content - useful for building text to be searched """
    # Removing HTML-encoded non-breaking space characters
    text_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(html_content))
    # Removing HTML CDATA
    text_content = re.sub(r"<!\[CDATA\[.*\]\]>", "", text_content)
    # Removing HTML comments
    text_content = re.sub(r"<!--.*-->", "", text_content)

    return text_content

예제 #3

파일 보기

파일: imageannotation_module.py 프로젝트: uncletomiwa/edx-platform

    def __init__(self, *args, **kwargs):
        super(ImageAnnotationModule, self).__init__(*args, **kwargs)

        xmltree = etree.fromstring(self.data)

        self.instructions = self._extract_instructions(xmltree)
        self.openseadragonjson = html_to_text(etree.tostring(xmltree.find('json'), encoding='unicode'))
        self.user = ""
        if self.runtime.get_real_user is not None:
            self.user = self.runtime.get_real_user(self.runtime.anonymous_student_id).email

예제 #4

파일 보기

파일: courseware_index.py 프로젝트: marcore/edx-platform

def strip_html_content_to_text(html_content):
    """ Gets only the textual part for html content - useful for building text to be searched """
    # Removing HTML-encoded non-breaking space characters
    text_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(html_content))
    # Removing HTML CDATA
    text_content = re.sub(r"<!\[CDATA\[.*\]\]>", "", text_content)
    # Removing HTML comments
    text_content = re.sub(r"<!--.*-->", "", text_content)

    return text_content

예제 #5

파일 보기

파일: html_module.py 프로젝트: sigberto/edx-platform

 def index_dictionary(self):
     xblock_body = super(HtmlDescriptor, self).index_dictionary()
     # Removing HTML-encoded non-breaking space characters
     html_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(self.data))
     # Removing HTML CDATA
     html_content = re.sub(r"<!\[CDATA\[.*\]\]>", "", html_content)
     # Removing HTML comments
     html_content = re.sub(r"<!--.*-->", "", html_content)
     html_body = {"html_content": html_content, "display_name": self.display_name}
     if "content" in xblock_body:
         xblock_body["content"].update(html_body)
     else:
         xblock_body["content"] = html_body
     xblock_body["content_type"] = "Text"
     return xblock_body

예제 #6

파일 보기

파일: imageannotation_module.py 프로젝트: CDOT-EDX/edx-platform

    def __init__(self, *args, **kwargs):
        super(ImageAnnotationModule, self).__init__(*args, **kwargs)

        xmltree = etree.fromstring(self.data)

        self.instructions = self._extract_instructions(xmltree)
        self.openseadragonjson = html_to_text(etree.tostring(xmltree.find('json'), encoding='unicode'))
        self.user_email = ""
        self.is_course_staff = False
        if self.runtime.get_user_role() in ['instructor', 'staff']:
            self.is_course_staff = True
        if self.runtime.get_real_user is not None:
            try:
                self.user_email = self.runtime.get_real_user(self.runtime.anonymous_student_id).email
            except Exception:  # pylint: disable=broad-except
                self.user_email = _("No email address found.")

예제 #7

파일 보기

파일: imageannotation_module.py 프로젝트: devs1991/test_edx_docmode

    def __init__(self, *args, **kwargs):
        super(ImageAnnotationModule, self).__init__(*args, **kwargs)

        xmltree = etree.fromstring(self.data)

        self.instructions = self._extract_instructions(xmltree)
        self.openseadragonjson = html_to_text(etree.tostring(xmltree.find('json'), encoding='unicode'))
        self.user_email = ""
        self.is_course_staff = False
        if self.runtime.get_user_role() in ['instructor', 'staff']:
            self.is_course_staff = True
        if self.runtime.get_real_user is not None:
            try:
                self.user_email = self.runtime.get_real_user(self.runtime.anonymous_student_id).email
            except Exception:  # pylint: disable=broad-except
                self.user_email = _("No email address found.")

예제 #8

파일 보기

파일: html_module.py 프로젝트: echines/edx-platform

 def index_dictionary(self):
     xblock_body = super(HtmlDescriptor, self).index_dictionary()
     # Removing HTML-encoded non-breaking space characters
     html_content = re.sub(r"(\s|&nbsp;|//)+", " ", html_to_text(self.data))
     # Removing HTML CDATA
     html_content = re.sub(r"<!\[CDATA\[.*\]\]>", "", html_content)
     # Removing HTML comments
     html_content = re.sub(r"<!--.*-->", "", html_content)
     html_body = {
         "html_content": html_content,
         "display_name": self.display_name,
     }
     if "content" in xblock_body:
         xblock_body["content"].update(html_body)
     else:
         xblock_body["content"] = html_body
     xblock_body["content_type"] = "Text"
     return xblock_body

예제 #9

파일 보기

파일: test_annotator_mixin.py 프로젝트: tiwariricha/edx-platform

 def test_html_to_text(self):
     expectedtext = "Testing here and not bolded here"
     result = html_to_text(self.sample_html)
     assert expectedtext == result

예제 #10

파일 보기

 def test_html_to_text(self):
     expectedtext = "Testing here and not bolded here"
     result = html_to_text(self.sample_html)
     self.assertEqual(expectedtext, result)