示例#1
0
 def updateContent(self):
     appParser = AppParser('<html></html>')
     apps = self.src_mq.queryAll("select id,content from tb_yjs_project where id=134")
     for row in apps:
         cont = appParser.replaceP(row['content'])
         self.src_mq.queryAll("update tb_yjs_project set content=%s ,update_date=update_date where id = %d" % (cont, int(row['id'])))
     print "update row ",len(apps)
示例#2
0
 def parserApp(self, html, app_url):
     try:
         appParser = AppParser(html)
         appInfo = AppInfo()
         appInfo.yjs_url = app_url
         res = appParser.getInfo(appInfo)
         if res == -1 or res == -2: #res <= 0:
             return None ## -1 and -2
         return appInfo
     except Exception, data:
         print Exception, ":", data
         return None
示例#3
0
    def start(self, type, start, max_page):
        total_count = 0
        for i in range(int(start), int(max_page) + 1):
            src = "http://www.yingjiesheng.com/commend-%s-%s.html/" % (type, i)
            print "src=", src

            if src.find("fulltime"):
                recruit_type = "1"
            else:
                recruit_type = "2"

            appUrls = AppParser.getAppUrls(src)
            ## repeat to get app urls
            if len(appUrls) == 0:
                if type == 'fulltime':
                    src = "http://www.yingjiesheng.com/commend_job/fulltime_%s.html" % (i)
                else:
                    src = "http://www.yingjiesheng.com/commend_job/parttime_%s.html" % (i)
                appUrls = AppParser.getAppUrls(src)

            ## parser app urls
            for app_url in appUrls:
                print ">>>>>>>>>>>>>>>" + app_url
                total_count = total_count + 1
                # how to save to db
                if self.addAppUrl(app_url, recruit_type) > 0:
                    html = self.download(app_url)
                    if html != None and html != '':
                        appInfo = self.parserApp(html, app_url)
                        if appInfo:#todo and len(appInfo.content) > 0:
                            appInfo.recruit_type = recruit_type
                            self.addAppProject(appInfo)
                        else:
                            self.updateAppUrl(app_url, 1)
            time.sleep(0.1)
        print "total_count=", total_count