def export(self): """ """ logger.debug("Begin RSS Export:") db = CrawlDB() rep = Pattern() for pat in db.getPatterns(): pid = pat["pid"] pattern = pat["pattern"] description = pat["name"] items = [] for page in db.getPages("where pid=%d limit 10" % pid): items.append(self.rssitem % (page["url"], page["title"], "", pattern, "", page["url"], rep.sub(page["content"]))) itemout = "\n".join(items) output = self.rssframe % (pattern, "http://hjbbs.com/bbs", description, "Learning English Tool", itemout) logger.debug("LET %d:\n%s\n" % (pid, output)) # write out fp = open("%slet%d.xml" % (config.RSSDIR, pid), "w") fp.write(output.encode('utf8')) fp.close() logger.debug("End RSS Export.")
if crawl.loginHjbbs(): for link in crawl.parseTitles(): page = crawl.crawlPage(link) fp.write(link + "\r\n") fp.write(page["title"]) fp.write(page["content"]) print link else: print "login failed" elif o in ("-m", "--mail"): from mail import SendMail db = CrawlDB() mail = SendMail() # search db pages = db.getPages() if pages: for page in pages: if mail.sendMail(page["title"], page["content"]): db.setUrl(page["url"]) else: print "no mail is sent" mail.close() else: assert False, "unhandled option" else: # from time import strftime, gmtime # timefmt = strftime("%y-%m-%d", gmtime()) # print "%s run crawl.crawlPages()" % (timefmt) crawl.crawlPages()