Python Scraper示例

编程语言: Python

命名空间/包名称: _scrapers

类/类型: Scraper

hotexamples.com的示例: 5

Python Scraper - 已找到5个示例。这些是从开源项目中提取的最受好评的_scrapers.Scraper现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Scraper(3)

get_scraper(3)

get_item_info(2)

download(1)

load(1)

示例#1

显示文件

文件： test_scrapers.py 项目： bosswissam/scraper

    def setUpClass(cls):
        '''Sets up saved pages for all urls in URLS, and loads them to be easily accessible 
        for tests.
        The most important thing happening here is population of cls.rows. Here's what's 
        happening:
        - at the start, cls.rows contains only the item and image urls from the csv file
        - since my code will load a file from disk from each test case, I pre-load it into 
        cls.rows. The code appends after the second element a list of objects the test will 
        use (e.g. etsy listing object and etsy seller object)
        - finally, I append to that row the appropriate scraper object, this way I don't have 
        to keep calling constructors in my code, instead I can simply retrieve the last 
        element.

        Note: the only requirement here is that the writer of the test knows which row to use
        for each test
        '''
        cls.cur_dir = os.getcwd()
        reader = sopen(TEST_URLS)
        scraper = Scraper()
        chdir(DOWNLOAD_DIR)        

        cls.rows = []
        for row in reader:
            row = row.split(",", 1)
            domain = get_domain(row[0])
            scraper = scraper.get_scraper(domain)
            if (REWRITE or exists(DOWNLOAD_DIR) is False):
                scraper.download(row[0])
            row.extend(scraper.load(row[0]))
            row.append(scraper)
            cls.rows.append(row)

        cls._test_is_set = True

示例#2

显示文件

文件： test_scrapers.py 项目： bosswissam/scraper

 def test_amazon_scraper(self):
     '''Test get_item_info for AmazonScraper
     '''
     scraper = Scraper()
     scraper = scraper.get_scraper('www.amazon.com')
     item = scraper.get_item_info('http://www.amazon.com/gp/product/B002P8T0L0/ref=s9_simh_gw_p23_d0_g23_i1?pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0WQ1VFHRSY7ZTB93FGYG&pf_rd_t=101&pf_rd_p=470938631&pf_rd_i=507846','http://ecx.images-amazon.com/images/I/31hak2cSIOL.jpg')
     self.assertEqual(item.price, 75.99)
     self.assertEqual(item.currency_code, '$')
     self.assertEqual(item.user_interaction.likes, 42)
     self.assertEqual(item.quantity.new, 5)
     self.assertEqual(item.details.discount.value, 43.96)

示例#3

显示文件

文件： test_scrapers.py 项目： skyelong/scraper

 def test_amazon_scraper(self):
     '''Test get_item_info for AmazonScraper
     '''
     scraper = Scraper()
     scraper = scraper.get_scraper('www.amazon.com')
     item = scraper.get_item_info(
         'http://www.amazon.com/gp/product/B002P8T0L0/ref=s9_simh_gw_p23_d0_g23_i1?pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0WQ1VFHRSY7ZTB93FGYG&pf_rd_t=101&pf_rd_p=470938631&pf_rd_i=507846',
         'http://ecx.images-amazon.com/images/I/31hak2cSIOL.jpg')
     self.assertEqual(item.price, 75.99)
     self.assertEqual(item.currency_code, '$')
     self.assertEqual(item.user_interaction.likes, 42)
     self.assertEqual(item.quantity.new, 5)
     self.assertEqual(item.details.discount.value, 43.96)

示例#4

显示文件

def _pinscraperow(row, row_num):
    scraper = Scraper()
    url = row[0].strip()
    img_url = row[1].strip()
    dir_name = urllib.parse.quote_plus(url)
    mkdir(dir_name)
    download_image(img_url, dir_name)
    domain = get_domain(url)
    scraper = scraper.get_scraper(domain)
    if (scraper):
        print("Getting information from {0}... ".format(domain))
        content = scraper.get_item_info(url, img_url)
        if (content):
            json_dump_to_file('{0}/info.json'.format(dir_name), content)
        else:
            write_to_file('{0}/not_found.txt'.format(dir_name), 'w',
                          'The url at {0} was not found'.format(url))
        return True
    else:
        return domain

示例#5

显示文件

文件： test_scrapers.py 项目： skyelong/scraper

    def setUpClass(cls):
        '''Sets up saved pages for all urls in URLS, and loads them to be easily accessible 
        for tests.
        The most important thing happening here is population of cls.rows. Here's what's 
        happening:
        - at the start, cls.rows contains only the item and image urls from the csv file
        - since my code will load a file from disk from each test case, I pre-load it into 
        cls.rows. The code appends after the second element a list of objects the test will 
        use (e.g. etsy listing object and etsy seller object)
        - finally, I append to that row the appropriate scraper object, this way I don't have 
        to keep calling constructors in my code, instead I can simply retrieve the last 
        element.

        Note: the only requirement here is that the writer of the test knows which row to use
        for each test
        '''
        cls.cur_dir = os.getcwd()
        reader = sopen(TEST_URLS)
        scraper = Scraper()
        chdir(DOWNLOAD_DIR)

        cls.rows = []
        for row in reader:
            row = row.split(",", 1)
            domain = get_domain(row[0])
            scraper = scraper.get_scraper(domain)
            if (REWRITE or exists(DOWNLOAD_DIR) is False):
                scraper.download(row[0])
            row.extend(scraper.load(row[0]))
            row.append(scraper)
            cls.rows.append(row)

        cls._test_is_set = True