Exemplos de bf em Python, exemplos de BeautifulSoup.bf em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: dmm.py Projeto: willame/avseed

def javbooks(avlist):
    s = requests.session()
    j = 0
    f = open('url.txt', 'w')
    for i in avlist:
        a = conn.execute("select * from name where id=?", (i, ))
        if a.fetchone() != None:
            url = 'http://javbooks.com/serch_censored/' + i + '/serialall_1.htm'
            r = s.get(url)
            get_bt_url = bf(r.text)
            try:
                bt_url = get_bt_url.find('div', {
                    'class': 'Po_topic_title'
                }).find('a')['href']
                r = s.get(bt_url)
                result = bf(r.text)
                #f.write(result.find('div',{'class':'dht_dl_title_content'}).find('a')['href']+str('\n'))
                conn.execute("insert into name (id) values(?)", (i, ))
                os.system("start " +
                          result.find('div', {
                              'class': 'dht_dl_title_content'
                          }).find('a')['href'])
                print "------------------------"
                sleep(5)
            except:
                j = j + 1
                print i
    print j
    f.close()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: mdown.py Projeto: jeanbon/pycoblog

def print_post(blog, text):
    #debug = blog.logger.debug
    def unescape(text):
        chars = {"&quot;": "\""}
        for c, r in chars.iteritems():
            text = text.replace(c, r)
        return text
    from BeautifulSoup import BeautifulSoup as bf
    from re import findall
    from os import popen3
    if findall(r"% [^\n]*mdown", text):
        # Le texte devra contenir une ligne avec une instruction au format mdown
        # contenant ce nom.
        # On utilise un format spécial pour préciser le langage de code qui suivra.
        # ceci signifie que chaque code devra être précédé de cette description,
        # sinon, ça mettrait le chaos dans les balises. Et on ne veut pas ça.
        code_infos = findall(r"% \[code\] lang *[:=] *(\w+) *", text)
        f_in, f_out, f_err = popen3("mdown -f xhtml -B tdql -b xml")
        f_in.write(text.encode("utf-8", "ignore"))
        f_in.close()
        out, err = f_out.read(), f_err.read()
        f_out.close()
        f_err.close()
        if err:
            blog.logger.warning(u"Mdown : %s" % err)
        soup = bf(unescape(out))
        for elem, lang in zip(soup.findAll("code"), code_infos):
            elem.parent.replaceWith(elem)
            elem["lang"] = lang
        return unicode(soup.prettify(), "utf-8")
    else:
        return text

Exemplo n.º 3

0

Exibir arquivo

Arquivo: dmm.py Projeto: willame/avseed

def get_avlist():
    avlist = []
    pat = '[\w+\W+]+cid=([a-z]+)00([0-9]+)'
    pat1 = '[\w+\W+]+cid=[0-9]+([a-z]+)00([0-9]+)'
    pat2 = '[\w+\W+]+cid=[a-z]_[0-9]+([a-zA-Z]+)00([0-9]+)'
    pat3 = '[\w+\W+]+cid=([\w+])/'
    s = requests.session()
    r = s.get('http://www.dmm.co.jp/digital/videoa/-/list/=/sort=ranking/')
    alllist = bf(r.text)
    result = alllist.find('ul', {'id': 'list'}).findAll('li')
    for i in result:
        cid = i.find('p', {'class': 'tmb'}).find('a')['href']
        if re.match(pat, cid) is not None:
            video_id = re.match(pat, cid).group(1) + '-' + re.match(
                pat, cid).group(2)
        elif re.match(pat1, cid) is not None:
            video_id = re.match(pat1, cid).group(1) + '-' + re.match(
                pat1, cid).group(2)
        elif re.match(pat2, cid) is not None:
            video_id = re.match(pat2, cid).group(1) + '-' + re.match(
                pat2, cid).group(2)
        else:
            print re.match(pat3, cid).group(1)
            #print 'error'
        avlist.append(video_id)
    return avlist

Exemplo n.º 4

0

Exibir arquivo

Arquivo: dmm.py Projeto: willame/avseed

def sukebei(avlist):
    s = requests.session()
    j = 0
    for i in avlist:
        url = 'https://sukebei.nyaa.se/?page=search&cats=8_0&filter=0&term=' + i
        r = s.get(url)
        get_bt_url = bf(r.text)
        try:
            print get_bt_url.find('tr', {
                'class': 'tlistrow trusted'
            }).find('td', {
                'class': 'tlistname'
            }).find('a')['href']
        except:
            j = j + 1
            print i
    print j

Exemplo n.º 5

0

Exibir arquivo

Arquivo: hilight.py Projeto: jeanbon/pycoblog

def print_post(blog, text):
    from pygments import highlight
    from pygments.lexers import guess_lexer, get_lexer_by_name
    from pygments.formatters import HtmlFormatter
    from pygments.util import ClassNotFound
    from BeautifulSoup import BeautifulSoup as bf
    def debug(text):
        import codecs
        with codecs.open("debug.log", "a", "utf-8") as f:
            f.write(str(text)+"\n")
    def unescape(text):
        return text.replace("&amp;", "&")
                    #replace("&lt;", "<").    \
                    #replace("&gt;", ">").    \
                    #replace("&quot;", "\""). \
                    #replace("'", "&#39;")

    debug = blog.logger.debug
    soup = bf(text)
    for elem in soup.findAll("code"):
        try:
            lang = elem["lang"]
        except KeyError:
            lang = "text"
        new_elem = elem.findChild("pre") if elem.findChild("pre") else elem
        level = 0
        #while not (isinstance(new_elem, unicode) or level > 3):
        #    level += 1
        #    new_elem = new_elem.next
        content = unicode(new_elem.renderContents(), "utf-8")
        try:
            lexer = get_lexer_by_name(lang)
        except ClassNotFound:
            try:
                lexer = guess_lexer(content.lstrip())
            except ClassNotFound:
                lexer = get_lexer_by_name("text")
        formatter = HtmlFormatter(linenos="inline")
        elem.next.extract()
        new_content = highlight(content, lexer, formatter)
        elem.replaceWith(unescape(new_content))
    return unicode(soup.prettify(), "utf-8")