Пример #1
0
 def parseZXMeta(self, text, id):
     kv_dic = KYZhuanXian.get_k_v_dic()
     result = {}
     result["webSiteId"] = id
     soup = BeautifulSoup(text)
     soup.prettify()
     table = soup.find(attrs={"id": "product_details"})
     tds = table.findAll("td")
     allInfo = {}
     for info in tds:
         key = info.fetchText(text=True)[0].strip()
         if key.startswith(u"下载"):
             continue
         k = key.split(":")[0]
         v = key.split(":")[1]
         allInfo[k] = v
     for k, v in allInfo.iteritems():
         result[kv_dic.get(k)] = v
     return result
Пример #2
0
        url = "http://wl.kywmall.com/wl_pages/wl_find_line_details.aspx?id=%s" % id
        try:
            KYZhuanXian.objects.get(webSiteId=id)
            logger.info("Zhuanxian %s already exsists" % id)
            return
        except:
            pass
        text = self.httpClient.geturlcon(url)

        result = {}
        try:
            result = self.parseZXMeta(text, id)
        except Exception, e:
            logger.error(e)
        if (result.get("telephoneNumber")) and (result.get("postedTo")):
            source = KYZhuanXian()
            try:
                source.save_from_dic(result)
            except Exception, e:
                logger.info(traceback.format_exc())
            else:
                logger.info("saved")
        else:
            logger.error(url + " is a null page")

    def getAndSaveHYFromId(self, id):
        url = "http://wl.kywmall.com/wl_pages/wl_find_product_details.aspx?id=%s" % id

        try:
            KYHuoYuan.objects.get(webSiteId=id)
            logger.info("HuoYuan %s already exsists" % id)