Exemplo n.º 1
0
    def parse(self, response):
        result = json.loads(json.loads(response.text))
        # print(result)
        # print(result["status"])
        if result["status"]["pageIndex"] == "1":
            pagenum = int(math.ceil(int(result["status"]["pagenum"])/30))
            if pagenum > 100:
                pagenum = 100
            for i in range(2, pagenum + 1):
                url = "http://res.ggang.cn/SteelList/GetProductList/?productType=全部&productName=全部&material=%s&standard=全部&productId=0&mills=全部&home=&maxPrice=&maxThick=&maxlength=&maxwidth=&minPrice=&minThick=&minlength=&minwidth=&pageIndex=%d&productCity=&key=&IsPriceSort=0&pageSize=30&pageType=1" % (response.meta["material"], i)
                meta = {
                    "pageIndex": i,
                    "material": response.meta["material"],
                }
                yield scrapy.Request(method="get", url=url, meta=meta, headers=self.headers, callback=self.parse)

        for product in result["msg"]:
            item = GangItem()
            item['url'] = response.url
            item['status'] = product["ID"]
            item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime())
            item['name'] = product["name"]
            item['material'] = product["material"]
            item['weight'] = product["piece"]
            item['type'] = product["norms"]
            item['factory'] = product["plant"]
            item['price'] = product["price"]
            item['storage'] = product["wareHouse"]
            item['city'] = product["place"]
            item['company'] = product["source"]
            # item['posttime'] = response.xpath("ul/li/div[1]/text()").extract_first()

            # print(item)
            yield item
Exemplo n.º 2
0
    def parse(self, response):
        result = json.loads(response.text)

        if result["has_more"] == 1:
            url = "https://www.ouyeel.com/jk-mobile/search/main-search/"
            data = {
                "page": str(int(response.meta["page"]) + 1),
                "keywords": response.meta["keywords"],
            }
            yield scrapy.FormRequest(method="post",
                                     url=url,
                                     meta=data,
                                     formdata=data,
                                     headers=self.headers,
                                     dont_filter=True)

        for product in result["data"]:
            item = GangItem()
            item['url'] = response.url
            item['status'] = str(product["id"])
            item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime())
            item['name'] = product["product_name"]
            item['material'] = product["shop_sign"]
            item['weight'] = product["weight"]
            item['type'] = product["spec"]
            item['factory'] = product["manufacturer"]
            item['price'] = product["price"]
            item['storage'] = product["warehouse_name"]
            item['city'] = product["store_city_name"]
            item['company'] = product["provider_name"]
            item['posttime'] = product["active_date"]

            # print(item)
            yield item
Exemplo n.º 3
0
    def parse(self, response):
        # print(response.text)
        result = json.loads(response.text)
        if response.meta["page"] == "1":
            pagenum = result["data"]["totalPages"]
            for i in range(2, pagenum + 1):
                url = "http://www.opsteel.cn/search/getResourcesList/"
                meta = {
                    "page": str(i),
                    "shopSign": response.meta["shopSign"],
                    "pageSize": "20",
                }
                yield scrapy.FormRequest(method="post", url=url, meta=meta, formdata=meta, headers=self.headers, callback=self.parse, dont_filter=True)

        for product in result["data"]["items"]:
            item = GangItem()
            item['url'] = response.url
            item['status'] = product["id"]
            item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime())
            item['name'] = product["product_name"]
            item['material'] = product["shop_sign"]
            item['weight'] = product["weight"]
            item['type'] = product["spec"]
            item['factory'] = product["producing_name"]
            item['price'] = product["price"]
            item['storage'] = product["storage_place_name"]
            # item['city'] = product["place"]
            item['company'] = product["provider_name"]
            item['posttime'] = product["upload_time"]
            item['quantity'] = product["pieces"]
            # print(item)
            yield item
Exemplo n.º 4
0
    def parse(self, response):

        next = response.xpath("//*[@class='page-nex page-control']")
        if next:
            url = "http://www.csesteel.com/online/resource/hall/goodList?ajaxCmd=goodContent&securityToken="
            data = {
                "current": str(int(response.meta["current"]) + 1),
                "ph": response.meta["ph"],
            }
            yield scrapy.FormRequest(method="post",
                                     url=url,
                                     meta=data,
                                     formdata=data,
                                     headers=self.headers,
                                     dont_filter=True)

        products = response.xpath(
            "//*[@class='main-content']/div[@class='list']")
        for product in products:
            item = GangItem()
            item['url'] = response.url
            item['status'] = product.xpath("ul/li/div[1]/a/@href").re("\d+")[0]
            item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime())
            item['name'] = product.xpath(
                "ul/li/div[1]/a/text()").extract_first()
            item['material'] = product.xpath(
                "ul/li/div[2]/text()").extract_first()
            item['weight'] = product.xpath(
                "ul/li/div[8]/text()").extract_first()
            item['type'] = product.xpath("ul/li/div[3]/text()").extract_first()
            item['factory'] = product.xpath(
                "ul/li/div[4]/text()").extract_first()
            item['price'] = product.xpath(
                "ul/li/div[9]/span/text()").extract_first().strip().replace(
                    "¥", "")
            item['storage'] = product.xpath(
                "ul/li/div[5]/div/div[2]/text()").extract_first()
            item['city'] = product.xpath(
                "ul/li/div[5]/span/text()").extract_first()
            # item['company'] = response.xpath("ul/li/div[1]/text()").extract_first()
            # item['posttime'] = response.xpath("ul/li/div[1]/text()").extract_first()

            # print(item)
            yield item
Exemplo n.º 5
0
    def parse_product(self, response):
        result = json.loads(response.text)
        for product in result["recordList"]:
            item = GangItem()
            item['url'] = response.url
            item['status'] = product["summaryCode"]
            item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime())
            item['name'] = product["categoryName"]
            item['material'] = product["materialName"]
            item['weight'] = product["qty"]
            item['quantity'] = product["num"]
            item['type'] = product["specName"]
            item['factory'] = product["factoryName"]
            item['price'] = product["price"]
            item['storage'] = product["warehouseName"]
            item['city'] = product["areaName"]

            # print(item)
            yield item
Exemplo n.º 6
0
    def parse_product(self, response):
        result = json.loads(response.text)
        for product in result["PaperList"]:
            item = GangItem()
            item['url'] = response.url
            item['status'] = str(product["Id"])
            item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime())
            item['name'] = product["Pm"]
            item['material'] = product["Cz"]
            item['weight'] = product["Ton"]
            item['type'] = product["Gg"]
            item['factory'] = product["ProductArea"]
            item['price'] = product["Price"]
            item['storage'] = product["StockArea"]
            item['city'] = product["DeliveryArea"]
            item['company'] = response.meta["company"]
            item['posttime'] = response.meta["posttime"]

            # print(item)
            yield item
Exemplo n.º 7
0
    def parse_products(self, response):
        # print(response.request.meta)
        # print(response.text)
        result = json.loads(response.text)
        for product in result["data"]["list"]:
            item = GangItem()
            item['url'] = response.url
            item['status'] = product["id"]
            item['grabtime'] = time.strftime('%Y-%m-%d %X', time.localtime())
            item['name'] = product["categoryName"]
            item['material'] = product["material"]
            item['weight'] = product["pieceWeight"]
            item['type'] = product["specification"]
            item['factory'] = product["factory"]
            item['price'] = product["mallPrice"]
            item['storage'] = product["warehouse"]
            item['city'] = product["city"]
            item['company'] = product["orgTitleName"]
            item['posttime'] = response.request.meta["posttime"]

            # print(item)
            yield item
Exemplo n.º 8
0
    def parse(self, response):
        result = json.loads(response.text)
        for product in result["data"]["list"]:
            productids = product["productIds"]
            posttime = product["lastUpdateTime"]
            data_dict = {
                "dataVersion": 9,
                "pageIndex": 1,
                "sonDataIndex": result["data"]["list"].index(product),
                "spotIds": productids,
                "posttime": posttime
            }
            if not product["companyName"]:
                item = GangItem()
                item['url'] = response.url
                item['status'] = product["id"]
                item['grabtime'] = time.strftime('%Y-%m-%d %X',
                                                 time.localtime())
                item['name'] = product["categoryName"]
                item['material'] = product["materialName"]
                item['weight'] = product["pieceWeight"]
                item['type'] = product["specificationName"]
                item['factory'] = product["factoryName"]
                item['price'] = product["mallPrice"]
                item['storage'] = product["warehouseName"]
                item['city'] = product["city"]
                item['company'] = "找钢网"
                item['posttime'] = posttime

                # print(item)
                yield item
            else:
                yield scrapy.Request(
                    url=
                    "http://mall.zhaogang.com/api/goods/findThirdGoodsItems",
                    meta=data_dict,
                    callback=self.parse_products,
                    dont_filter=True)