Exemplos de to_utf8 em Python, exemplos de tools.to_utf8 em Python

Exemplo n.º 1

0

Exibir arquivo

def parse_title(text, proxy=None):
    r = r_subject.search(text)
    if r:
        subject = r.group(1)
    else:
        subject = 'unknow'
    return tools.to_utf8(subject)

Exemplo n.º 2

0

Exibir arquivo

def parse_page(title, text, proxy=None):
    #    r = r_meta.search(text)
    #    if r:
    #        encoding=r.group(1)
    #        if encoding.lower() == 'gb2312':
    #            encoding = 'gb18030'
    #    else:
    #        encoding='gb18030'
    r = r_title.search(text)
    if r:
        title = r.group(1).strip().split('/')[1]
        title = tools.to_utf8(title)

    r = r_content.search(text)
    if r:
        url = r.group(1)
        text = tools.get_url(url, proxy).strip()
        b = "document.write('"
        e = "');"
        if text.startswith(b) and text.endswith(e):
            text = text[len(b):-1 * len(e)]
        text = tools.format_html_text(text)
    else:
        text = ''
    return title + '\r\n' * 2 + text

Exemplo n.º 3

0

Exibir arquivo

Arquivo: 2100book.py Projeto: BGCX261/zipbook-svn-to-git

def parse_title(text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    r = r_subject.search(text)
    if r:
        subject = r.group(1)
    else:
        subject = 'unknow'
    return tools.to_utf8(subject, encoding)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: duyidu.py Projeto: BGCX261/zipbook-svn-to-git

def parse_title(text, proxy=None):
    r = r_subject.search(text)
    if r:
        subject = r.group(1)
        subject = subject[:len(subject)-27]
    else:
        subject = 'unknow'
    return tools.to_utf8(subject)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: booksina.py Projeto: BGCX261/zipbook-svn-to-git

def parse_title(text, proxy=None):
    r = r_subject.search(text)
    if r:
        subject = r.group(1)
#        subject = subject.split('_', 1)[0]
    else:
        subject = 'unknow'
    return tools.to_utf8(subject)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: 2100book.py Projeto: BGCX261/zipbook-svn-to-git

def parse_title(text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    r = r_subject.search(text)
    if r:
        subject = r.group(1)
    else:
        subject = 'unknow'
    return tools.to_utf8(subject, encoding)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: 2100book.py Projeto: BGCX261/zipbook-svn-to-git

def parse_index(text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    b = r_bookid.findall(text)
    bookid = b[0]
    s = []
    for (url, title) in r_index.findall(text):
        title = title.replace('&nbsp;', ' ').strip()
        #        url = bookid + url
        yield url, tools.to_utf8(title, encoding)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: 2100book.py Projeto: BGCX261/zipbook-svn-to-git

def parse_index(text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    b = r_bookid.findall(text)
    bookid = b[0]
    s = []
    for (url, title) in r_index.findall(text):
        title = title.replace('&nbsp;', ' ').strip()
#        url = bookid + url
        yield url, tools.to_utf8(title, encoding)

Exemplo n.º 9

0

Exibir arquivo

def parse_index(text, proxy=None):
    #    r = r_meta.search(text)
    #    if r:
    #        encoding=r.group(1)
    #        if encoding.lower() == 'gb2312':
    #            encoding = 'gb18030'
    #    else:
    #        encoding='gb18030'
    s = []
    for (url, title) in r_index.findall(text):
        title = title.replace('&nbsp;', ' ')
        yield url, tools.to_utf8(title)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: duyidu.py Projeto: BGCX261/zipbook-svn-to-git

def parse_index(text, proxy=None):
#    r = r_meta.search(text)
#    if r:
#        encoding=r.group(1)
#        if encoding.lower() == 'gb2312':
#            encoding = 'gb18030'
#    else:
#        encoding='gb18030'
    s = []
    for (url, title) in r_index.findall(text):
        title = title.replace('&nbsp;', ' ')
        yield url, tools.to_utf8(title)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: 2100book.py Projeto: BGCX261/zipbook-svn-to-git

def parse_page(title, text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    r = r_title.search(text)
    if r:
        title = r.group(1).strip()
        title = tools.to_utf8(title, encoding)

    r = r_content.search(text)
    if r:
        text = tools.format_html_text(r.group(1), encoding)
    else:
        text = ''
    return title + '\r\n' * 2 + text

Exemplo n.º 12

0

Exibir arquivo

Arquivo: bookqq.py Projeto: BGCX261/zipbook-svn-to-git

def parse_page(title, text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    r = r_title.search(text)
    if r:
        title = r.group(1).strip()
        title = tools.to_utf8(title, encoding)
        
    r = r_content.search(text)
    if r:
        text = tools.format_html_text(r.group(1), encoding)
    else:
        text = ''
    return title + '\r\n'*2 + text

Exemplo n.º 13

0

Exibir arquivo

def parse_index(text, proxy=None):
    #    r = r_meta.search(text)
    #    if r:
    #        encoding=r.group(1)
    #        if encoding.lower() == 'gb2312':
    #            encoding = 'gb18030'
    #    else:
    #        encoding='gb18030'
    r = r_bookid.search(text)
    if r:
        id = r.group(1)
    else:
        raise Exception, "can't find the book id"
    s = []
    for (c_id, title) in r_index.findall(text):
        url = "readchapter.asp?bu_id=" + c_id + "&bl_id=" + id
        title = title.replace('&nbsp;', ' ')
        yield url, tools.to_utf8(title)

Exemplo n.º 14

0

Exibir arquivo

def parse_page(title, text, proxy=None):
    #    r = r_meta.search(text)
    #    if r:
    #        encoding=r.group(1)
    #        if encoding.lower() == 'gb2312':
    #            encoding = 'gb18030'
    #    else:
    #        encoding='gb18030'
    r = r_title.search(text)
    if r:
        title = r.group(1).strip()
        title = tools.to_utf8(title)

    r = r_content.search(text)
    if r:
        text = tools.format_html_text(r.group(1))
    else:
        text = ''
    return title + '\r\n' * 2 + text

Exemplo n.º 15

0

Exibir arquivo

Arquivo: duyidu.py Projeto: BGCX261/zipbook-svn-to-git

def parse_page(title, text, proxy=None):
#    r = r_meta.search(text)
#    if r:
#        encoding=r.group(1)
#        if encoding.lower() == 'gb2312':
#            encoding = 'gb18030'
#    else:
#        encoding='gb18030'
    r = r_title.search(text)
    if r:
        title = r.group(1)
        title = tools.to_utf8(title)
        
    r = r_content.search(text)
    if r:
        text = tools.format_html_text(r.group(1))
    else:
        text = ''
    return title + '\r\n'*2 + text