def visit(self, link, source=None): # print 'visited:', repr(link.url), 'from:', link.referrer if "/p" in repr(link.url): #print repr(link.url) s = URL(link.url).download() s = plaintext(s, keep={}) print s.encode('utf-8') # append to file? outfile.write(s.encode('utf-8')) def fail(self, link): print 'failed:', repr(link.url)
def fail(self, link): print 'failed:', repr(link.url) for list in trynow: # p = Polly(links=list, delay=1) #while not p.done: # p.crawl(method=1, cached=True, throttle=1) for xx in range(1, 10): try: lister = str(list) + "p-" + str(xx) + ".html" # print(list) s = URL(lister).download() s = plaintext(s, keep={}) print s.encode('utf-8') # append to file? outfile.write(s.encode('utf-8')) except: print "" # s = URL('http://lustybooks.com/erotica/6/party-wife-rex-weldon/p-6.html').download() # s = plaintext(s, keep={}) # print s.encode('utf-8') # # save as file # outfile = open("test_txt", "w") # outfile.write(s.encode('utf-8')) outfile.close()