def updateContent(self): appParser = AppParser('<html></html>') apps = self.src_mq.queryAll("select id,content from tb_yjs_project where id=134") for row in apps: cont = appParser.replaceP(row['content']) self.src_mq.queryAll("update tb_yjs_project set content=%s ,update_date=update_date where id = %d" % (cont, int(row['id']))) print "update row ",len(apps)
def parserApp(self, html, app_url): try: appParser = AppParser(html) appInfo = AppInfo() appInfo.yjs_url = app_url res = appParser.getInfo(appInfo) if res == -1 or res == -2: #res <= 0: return None ## -1 and -2 return appInfo except Exception, data: print Exception, ":", data return None
def start(self, type, start, max_page): total_count = 0 for i in range(int(start), int(max_page) + 1): src = "http://www.yingjiesheng.com/commend-%s-%s.html/" % (type, i) print "src=", src if src.find("fulltime"): recruit_type = "1" else: recruit_type = "2" appUrls = AppParser.getAppUrls(src) ## repeat to get app urls if len(appUrls) == 0: if type == 'fulltime': src = "http://www.yingjiesheng.com/commend_job/fulltime_%s.html" % (i) else: src = "http://www.yingjiesheng.com/commend_job/parttime_%s.html" % (i) appUrls = AppParser.getAppUrls(src) ## parser app urls for app_url in appUrls: print ">>>>>>>>>>>>>>>" + app_url total_count = total_count + 1 # how to save to db if self.addAppUrl(app_url, recruit_type) > 0: html = self.download(app_url) if html != None and html != '': appInfo = self.parserApp(html, app_url) if appInfo:#todo and len(appInfo.content) > 0: appInfo.recruit_type = recruit_type self.addAppProject(appInfo) else: self.updateAppUrl(app_url, 1) time.sleep(0.1) print "total_count=", total_count