示例#1
0
    def pharseHYMeta(self, text, id):
        kv_dic = WTHuoYuan.get_k_v_dic()
        result = {}
        result["webSiteId"] = id

        soup = BeautifulSoup(text)
        soup.prettify()

        table = soup.findAll(attrs={"class": "mt10"})
        tds = table[0].findAll("td")
        tds.extend(table[1].findAll("td"))
        key = None
        value = None
        allInfo = {}
        for td in tds:
            text = "".join(td.fetchText(True)).strip()
            if text.endswith(":"):
                key = text.replace(":", "")
            else:
                value = text
                allInfo[key] = value
                key = None
                value = None
        for k, v in allInfo.iteritems():
            if k:
                result[kv_dic.get(k)] = v
        return result
示例#2
0
    def getAndSaveHYFromId(self, id):
        url = "http://www.chinawutong.com/203/%s.html" % id
        result = {}
        try:
            WTHuoYuan.objects.get(webSiteId=id)
            logger.info("%s already exsists" % id)
            return
        except:
            pass
        text = self.httpClient.geturlcon(url)
        try:
            result = self.pharseHYMeta(text, id)
        except Exception, e:
            logger.info(traceback.format_exc())
        if (result.get("startPlace")) and (result.get("destPlace")):
            huoyuan = WTHuoYuan()
            try:
                huoyuan.save_from_dic(result)
            except Exception, e:
                logger.error(traceback.format_exc())
            else:
                logger.info("saved %s" % (id))
        else:
            logger.info(url + " is a null page")


    def getAndSaveZhuanXianFromId(self, id):
        url = "http://www.chinawutong.com/201/%s.html" % id
        result = {}
        try:
            WTZhuanXian.objects.get(webSiteId=id)