Пример #1
0
    def parseHYDetailsMeta(self, text, id):
        key_value_dic = GateHuoYuanSource.get_k_v_dic()
        """
        {
            u"货源名称": "sourceName",
            u"货源地址": "sourceAddress",
            u"货物名称": "startPlace",
            u"货物类型": "goodsType",
            u"重量": "weight",
            u"数量": "number",
            u"外形": "appearance",
            u"体积": "volume",
            u"所需车型": "vehicleDependent",
            u"运送方式": "shippingMethod",
            u"运费": "freight",
            u"包装方式": "packingManer",
            u"注意事项": "attention",
            u"货物情况": "cargoCondition",
            u"最早起运": "earliestShipment",
            u"最晚起运": "latestShipment",
            u"是否长期货源": "isLongTerm",
            u"出发地": "departure",
            u"目的地": "destPlace",
            u"发布时间": "postTime",
            u"webSiteId": "webSiteId",
            u"出发地联系人": "sourceContact",
            u"目的地联系人": "destContact",
            u"补充说明": "remark",
        }
        """

        result = {}
        soup = BeautifulSoup(text)
        soup.prettify()
        sourceDetailinfo = soup.find(attrs={"class": "detail_top_content_1"})

        sourceName = sourceDetailinfo.findAll("li")[0].find("span").fetchText(text=True)[0].replace("\t", "").strip(
            "\t")
        sourceName = "".join(sourceName.split())
        sourceAdress = sourceDetailinfo.findAll("li")[1].fetchText(text=True)[1].strip()
        remark = soup.find(attrs={"class": "detail_left_content_1_4"}).find("li").fetchText(text=True)
        if (len(remark) > 1):
            remark = "".join(remark[1:])
        else:
            remark = None
        result["remark"] = remark
        result["sourceName"] = sourceName
        result["sourceAddress"] = sourceAdress
        result["webSiteId"] = id
        sourceInfo = soup.findAll(attrs={"class": "detail_left_content_1_1"})
        infos = list(sourceInfo[0].findAll("li"))
        infos.extend(sourceInfo[1].findAll("li"))
        allInfo = {}

        for info in infos:
            key = info.fetchText(text=True)[0].replace(":", "")
            if key in ["最早起运", "最晚起运"]:
                value = "".join(info.fetchText(text=True))
            else:
                value = "".join(info.fetchText(text=True))
            allInfo[key] = value
        for k, v in allInfo.iteritems():
            result[key_value_dic.get(k)] = v
        contact = self.parseHYContact(id)
        result["source"] = contact[0]
        result["dest"] = contact[1]

        return result
Пример #2
0
        try:
            GateHuoYuanSource.objects.get(webSiteId=id)
            logger.info("HuoYuan %s already exsists" % id)
            return
        except:
            pass

        text = self.httpClient.geturlcon(url)
        result = {}
        try:
            result = self.parseHYDetailsMeta(text, id)
        except Exception, e:
            traceback.print_exc()
            logger.error(e)
        if True:
            source = GateHuoYuanSource()
            try:
                for k, v in result.iteritems():
                    logger.info("gateee " + str(k) + " " + str(v))
                source.save_from_a_source(result)
            except Exception, e:
                traceback.print_exc()
                logger.error(e)
            else:
                logger.info("%s saved" % (id))
        else:
            logger.error(url + " is a null page")

    def getAndSaveCYFromId(self, id):
        url = "http://56gate.com/html/cy/cydetail_%s.html" % id
        try: