Пример #1
0
    def visit(self, link, source=None):
        #        print 'visited:', repr(link.url), 'from:', link.referrer
        if "/p" in repr(link.url):
            #print repr(link.url)
            s = URL(link.url).download()
            s = plaintext(s, keep={})
            print s.encode('utf-8')  # append to file?
            outfile.write(s.encode('utf-8'))

        def fail(self, link):
            print 'failed:', repr(link.url)
Пример #2
0
        def fail(self, link):
            print 'failed:', repr(link.url)


for list in trynow:
    # p = Polly(links=list, delay=1)
    #while not p.done:
    #    p.crawl(method=1, cached=True, throttle=1)
    for xx in range(1, 10):
        try:
            lister = str(list) + "p-" + str(xx) + ".html"
            #        print(list)
            s = URL(lister).download()
            s = plaintext(s, keep={})
            print s.encode('utf-8')  # append to file?
            outfile.write(s.encode('utf-8'))
        except:
            print ""

# s = URL('http://lustybooks.com/erotica/6/party-wife-rex-weldon/p-6.html').download()
# s = plaintext(s, keep={})

# print s.encode('utf-8')

# # save as file

# outfile = open("test_txt", "w")
# outfile.write(s.encode('utf-8'))
outfile.close()