Пример #1
0
class Model_Scraper_Product_Offer1(Model_Scraper_Standard):
    def __init__(self, region):
        self.region = region
        self.processor = Service_Functions().getProcessor(
            'Product_Offer', region)

    def process(self, asin):
        self.processOffer = Model_Scraper_Standard(self.region)
        content = self.processOffer.processOffer(self.region, asin)
        if (content):
            return content

    # def scrapeInventory(self, data):
    #     if (data == '' or data == None):
    #         return Model_Static_Scrape_Status.FAILED
    #     url ="http://www.amazon."+self.region+"/gp/aws/cart/add.html"
    #     fields = []
    #     session_id = None

    def scrape(self, asin):
        content = self.process(asin)
        if (content):
            # 这边写解析代码, 通过解析返回的数据再进行库存的抓取
            print(content)
            data = self.processor.process(content.encode('utf-8'))
            if (data):
                print(data)
                # 通过解析得到的数据进行库存的计算
                # Inventory = self.scrapeInventory(asin, data)
                # print (Inventory)
        pageCount = self.processor.getPageCount(content)
        # print (pageCount)
        if (pageCount > 1):
            for i in range(2, int(pageCount) + 1):
                # print (i)
                index = str((i - 1) * 10)
                pageUrl = "http://www.amazon." + "com" + "/gp/offer-listing/" + asin + "/ref=olpOffersSuppressed?ie=UTF8&f_new=true&overridePriceSuppression=1&startIndex=" + index
                # print (pageUrl)
                pageContent = self.processPageOffer(pageUrl)
                if (pageContent):
                    print(pageContent)
                    pageResult = self.processor.process(
                        pageContent.encode('utf-8'))
                    if (pageResult):
                        print(pageResult)
Пример #2
0
class Model_Scraper_Keywords(Model_Scraper_Standard):
    def __init__(self, region):
        super(Model_Scraper_Keywords, self).__init__(region)
        self.region = region
        self.processor = Service_Functions().getProcessor('Keywords', region)

    def scraper(self, keywords):
        self.process = Model_Scraper_Standard(self.region)
        url = "https://www.amazon."+self.region+"/gp/search?keywords="+keywords+"&page=1"
        # 不显示浏览器
        # with Display(backend="xvfb", size=(1440, 900)):
        print (url)
        try:
            content = self.process.processkeywords(url)
        except Exception as err:
            print (err)
        try:
            if (content):
                # 这边写解析代码
                data = []
                result = self.processor.process(content.encode('utf-8'), 1)
                if (result):
                    # print (result)
                    data.append(result)
                    pagecount = int(self.processor.getPageCount(content))
                    if (pagecount > 5):
                        pagecount = 5
                    # pagecount = 1
                    if (pagecount > 1):
                        for i in range(2, pagecount + 1):
                            pageurl = "https://www.amazon." + self.region + "/gp/search?keywords=" + keywords + "&page=" + str(i)
                            print (pageurl)
                            pagecontent = self.process.processkeywords(pageurl)
                            if (pagecontent):
                                pageresult = self.processor.process(pagecontent.encode('utf-8'), i)
                                # print (pageresult)
                                data.append(pageresult)
                    return data
            elif (content == None):
                return None
            else:
                return False
        except:
            return False
Пример #3
0
class Model_Scraper_Seller_Product(Model_Scraper_Standard):
    def __init__(self, region):
        self.region = region
        self.processor = Service_Functions().getProcessor(
            'Seller_Product', region)

    def scrape(self, merchantId):
        if not merchantId:
            return False
        url = "https://www.amazon." + self.region + "/s?merchant=" + merchantId
        print(url)
        content = Model_Scraper_Standard(self.region).processSellerProduct(url)
        if (content):
            result = self.processor.process(content)
            if (result):
                data = []
                data.append(result)
                pagecount = int(self.processor.getPageCount(content))
                pagecount = 1  # 测试
                if (pagecount > 1):
                    if (pagecount > 50):
                        pagecount = 50  # 测试 原为50
                    for i in range(2, pagecount + 1):
                        pageurl = "https://www.amazon." + self.region + "/s?merchant=" + merchantId + "&page=" + str(
                            i)
                        print(pageurl)
                        pageContent = Model_Scraper_Standard(
                            self.region).processSellerProduct(pageurl)
                        if not pageContent:
                            continue
                        pageResult = self.processor.process(pageContent)
                        if (pageResult):
                            data.append(pageResult)
                return data
            return Model_Static_DownloadQueue_Status().SCRAPED_NO_DATA
        return Model_Static_DownloadQueue_Status().FAILED