def parseZXMeta(self, text, id): kv_dic = KYZhuanXian.get_k_v_dic() result = {} result["webSiteId"] = id soup = BeautifulSoup(text) soup.prettify() table = soup.find(attrs={"id": "product_details"}) tds = table.findAll("td") allInfo = {} for info in tds: key = info.fetchText(text=True)[0].strip() if key.startswith(u"下载"): continue k = key.split(":")[0] v = key.split(":")[1] allInfo[k] = v for k, v in allInfo.iteritems(): result[kv_dic.get(k)] = v return result
url = "http://wl.kywmall.com/wl_pages/wl_find_line_details.aspx?id=%s" % id try: KYZhuanXian.objects.get(webSiteId=id) logger.info("Zhuanxian %s already exsists" % id) return except: pass text = self.httpClient.geturlcon(url) result = {} try: result = self.parseZXMeta(text, id) except Exception, e: logger.error(e) if (result.get("telephoneNumber")) and (result.get("postedTo")): source = KYZhuanXian() try: source.save_from_dic(result) except Exception, e: logger.info(traceback.format_exc()) else: logger.info("saved") else: logger.error(url + " is a null page") def getAndSaveHYFromId(self, id): url = "http://wl.kywmall.com/wl_pages/wl_find_product_details.aspx?id=%s" % id try: KYHuoYuan.objects.get(webSiteId=id) logger.info("HuoYuan %s already exsists" % id)