Python h2t примеры использования

Язык программирования: Python

Пространство имен/Пакет: html2text

Метод/Функция: h2t

Примеров на hotexamples.com: 13

Python h2t - 13 примеров найдено. Это лучшие примеры Python кода для html2text.h2t, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: _spider.py Проект: OMGhozlan/SearchEngine

def save_page(url, title, intro):
    try:
        w = '/wiki/'
        name = url[url.find(w) + len(w):]
        file = open(name + '.txt', 'a', encoding='utf-8')
        file.write(url + "\n")
        file.write(clean(h2t(title)) + ": " + "\n")
        file.write(clean(h2t(intro)))
        file.close()
    except Exception as e:
        print(str(e))
        return "Error"
    return name

Пример #2

Показать файл

def get_card(soup, final, kwargs):
    """Getting cards if present, here started the pain"""
    # common card
    if card := soup.find("div", class_="g mnr-c g-blk"):
        if desc := card.find("span", class_="hgKElc"):
            final.append(s(None, "Google Info Card:", h2t(str(desc))))
            return

Пример #3

Показать файл

    def parse_reference(self, text, full_chap, title, emb_color):
        # Remove cross references
        for sup in text.find_all("sup", {"class": "crossreference"}):
            sup.decompose()

        # Change headers to markdown
        for h3 in text.find_all("h3"):
            h3.name = "b"
        for h4 in text.find_all("h4"):
            h4.name = "b"

        text = h2t(str(text))
        pages = []
        raw = list(pagify(text, page_length=4000))
        size = len(raw)

        for i, page in enumerate(raw, 1):
            emb = discord.Embed(title=title,
                                description=page,
                                colour=emb_color)
            emb.url = full_chap
            emb.set_footer(
                text=f"Powered by Biblegateway.com | Page {i}/{size}")
            pages.append(emb)
        return pages

Пример #4

Показать файл

Файл: Content.py Проект: ycbayrak/lyrics-finder

 def parseContent(self):
     if self.html is not None:    
         data = h2t(self.html.decode('UTF-8'))
         self.logger.info("Content Parsed")
         return data
     else:
         return None

Пример #5

Показать файл

Файл: google.py Проект: ultralegendary/npc-cogs

    def parser_text(self, text, soup=None, cards: bool = True):
        """My bad logic for scraping"""
        if not soup:
            soup = BeautifulSoup(text, features="html.parser")

        final = []
        kwargs = {"stats": h2t(str(soup.find("div", id="result-stats")))}

        if cards:
            get_card(soup, final, kwargs)

        for res in soup.findAll("div", class_="g"):
            if name := res.find("div", class_="yuRUbf"):
                url = name.a["href"]
                if title := name.find("h3", "LC20lb DKV0Md"):
                    title = title.text
                else:
                    title = url

Пример #6

Показать файл

def html2text(s):
    s = re.compile('</*en-media[^>]*?>').sub('', s)
    return h2t(s)

Пример #7

Показать файл

    return query


def get_card(soup, final, kwargs):
    """Getting cards if present, here started the pain"""
    # common card
    if card := soup.find("div", class_="g mnr-c g-blk"):
        if desc := card.find("span", class_="hgKElc"):
            final.append(s(None, "Google Info Card:", h2t(str(desc))))
            return
    # another webpull card: what is the language JetBrains made? TODO fix this, depends on too many classes as of now
    if card := soup.find("div", class_="kp-blk c2xzTb"):
        if head := card.find("div", class_="Z0LcW XcVN5d AZCkJd"):
            if desc := card.find("div", class_="iKJnec"):
                final.append(s(None, f"Answer: {head.text}", h2t(str(desc))))
                return

    # calculator card
    if card := soup.find("div", class_="tyYmIf"):
        if question := card.find("span", class_="vUGUtc"):
            if answer := card.find("span", class_="qv3Wpe"):
                tmp = h2t(str(question)).strip("\n")
                final.append(
                    s(None, "Google Calculator:",
                      f"**{tmp}** {h2t(str(answer))}"))
                return

    # sidepage card
    if card := soup.find("div", class_="liYKde g VjDLd"):
        if thumbnail := card.find("g-img", attrs={"data-lpage": True}):

Пример #8

Показать файл

Файл: html2md.py Проект: horidream/myPrototype

#! python3

from html2text import html2text as h2t
import pyperclip

pyperclip.copy(h2t(pyperclip.paste()))

Пример #9

Показать файл

Файл: box.py Проект: kiberpipa/Intranet

def html2text(value):
    """Convert HTML text to plaintext"""
    return h2t(value)

Пример #10

Показать файл

Файл: tools.py Проект: HowieWang/LocalNote

def html2text(s):
    s = re.compile('</*en-media[^>]*?>').sub('', s)
    return h2t(s)

Пример #11

Показать файл

def html2text(text):
    return h2t(text).replace('#','').replace('**','').replace('__','')

Пример #12

Показать файл

 def raw_text(self, html_field): # makes practically raw text from html
     #delete additional signs after h2t function
     return sub('[\t\n\r*#]+', ' ', h2t(html_field))

Пример #13

Показать файл

Файл: google.py Проект: ultralegendary/npc-cogs

        if cards:
            get_card(soup, final, kwargs)

        for res in soup.findAll("div", class_="g"):
            if name := res.find("div", class_="yuRUbf"):
                url = name.a["href"]
                if title := name.find("h3", "LC20lb DKV0Md"):
                    title = title.text
                else:
                    title = url
            else:
                url = None
                title = None
            if desc := res.find("div", class_="IsZvec"):
                if remove := desc.find("span", class_="f"):
                    remove.decompose()
                if final_desc := desc.find_all("span"):
                    desc = h2t(str(final_desc[-1]))[:500]
                else:
                    desc = "Nothing found"
            else:
                desc = "Not found"
            if title:
                final.append(s(url, title, desc.replace("\n", " ")))
        return final, kwargs

    def parser_image(self, html):
        # first 2 are google static logo images
        return self.link_regex.findall(html)[2:], {}