Exemplo n.º 1
0
def main():
    rssSpider = RssSpider(myrss, '91ri.xml', '91ri.db')
    rssSpider.get_list(
        r'<div class="right-col">\s+<h1><a href="(.*?)" data-no-turbolink="true" target="_blank" title="(.*?)">',
        flag=re.S)
    rssSpider.get_content('<article class="single-post">', '</article>')
    rssSpider.save_rss_file()
Exemplo n.º 2
0
def main():
    rssSpider = RssSpider(myrss, '8She.xml', '8She.db')
    rssSpider.get_list(
        r'<header>.*?<i></i></a><h2><a href="(.*?)" title=".*?">(.*?)</a></h2></header>',
        flag=re.S)
    rssSpider.get_content('<article class="article-content">', '</article>')
    rssSpider.save_rss_file()
Exemplo n.º 3
0
def main():
    repl0 = {
        'old':
        '<img src="extremesex/memder/ng/logo31.gif" width="47" height="25" border="0" alt="New!">',
        'new': '',
        'reg': False,
        'flags': 0
    }
    repl1 = {'old': '</a>P<br>', 'new': 'P</a><br>', 'reg': False, 'flags': 0}
    repl2 = {
        'old': '<BR>\s+</A>',
        'new': '</a><br>',
        'reg': True,
        'flags': re.S | re.I
    }
    repl3 = {
        'old':
        '<A target="_blank" href="extremesex/memder/photo/messi1/index.html"></a><br>',
        'new': '',
        'reg': False,
        'flags': 0
    }
    replaces = [repl0, repl1, repl2, repl3]
    rssSpider = RssSpider(myrss, 'elm-world.xml', charset='CP932')
    rssSpider.get_list(r'<a.*?href="(.*?)".*?>(.*?)</a>',
                       replaces=replaces,
                       flag=re.S | re.I)
    rssSpider.get_content('', '')
    rssSpider.save_rss_file()
Exemplo n.º 4
0
def main():
    repl0 = {'old': '<img src="extremesex/memder/ng/logo31.gif" width="47" height="25" border="0" alt="New!">', 'new': '', 'reg': False, 'flags': 0}
    repl1 = {'old': '</a>P<br>', 'new': 'P</a><br>', 'reg': False, 'flags': 0}
    repl2 = {'old': '<BR>\s+</A>', 'new': '</a><br>', 'reg': True, 'flags': re.S|re.I}
    repl3 = {'old': '<A target="_blank" href="extremesex/memder/photo/messi1/index.html"></a><br>', 'new': '', 'reg': False, 'flags': 0}
    replaces = [repl0,repl1,repl2,repl3]
    rssSpider = RssSpider(myrss, 'elm-world.xml', charset='CP932')
    rssSpider.get_list(r'<a.*?href="(.*?)".*?>(.*?)</a>', replaces=replaces, flag=re.S|re.I)
    rssSpider.get_content('', '')
    rssSpider.save_rss_file()
Exemplo n.º 5
0
def main():
    rssSpider = RssSpider(myrss, '91ri.xml', '91ri.db')
    rssSpider.get_list(r'<div class="right-col">\s+<h1><a href="(.*?)" data-no-turbolink="true" target="_blank" title="(.*?)">', flag=re.S)
    rssSpider.get_content('<article class="single-post">', '</article>')
    rssSpider.save_rss_file()
Exemplo n.º 6
0
def main():
    rssSpider = RssSpider(myrss, '8She.xml', '8She.db')
    rssSpider.get_list(r'<header>.*?<i></i></a><h2><a href="(.*?)" title=".*?">(.*?)</a></h2></header>', flag=re.S)
    rssSpider.get_content('<article class="article-content">', '</article>')
    rssSpider.save_rss_file()