示例#1
0
    def setUpClass(cls):
        '''Sets up saved pages for all urls in URLS, and loads them to be easily accessible 
        for tests.
        The most important thing happening here is population of cls.rows. Here's what's 
        happening:
        - at the start, cls.rows contains only the item and image urls from the csv file
        - since my code will load a file from disk from each test case, I pre-load it into 
        cls.rows. The code appends after the second element a list of objects the test will 
        use (e.g. etsy listing object and etsy seller object)
        - finally, I append to that row the appropriate scraper object, this way I don't have 
        to keep calling constructors in my code, instead I can simply retrieve the last 
        element.

        Note: the only requirement here is that the writer of the test knows which row to use
        for each test
        '''
        cls.cur_dir = os.getcwd()
        reader = sopen(TEST_URLS)
        scraper = Scraper()
        chdir(DOWNLOAD_DIR)        

        cls.rows = []
        for row in reader:
            row = row.split(",", 1)
            domain = get_domain(row[0])
            scraper = scraper.get_scraper(domain)
            if (REWRITE or exists(DOWNLOAD_DIR) is False):
                scraper.download(row[0])
            row.extend(scraper.load(row[0]))
            row.append(scraper)
            cls.rows.append(row)

        cls._test_is_set = True
示例#2
0
 def test_amazon_scraper(self):
     '''Test get_item_info for AmazonScraper
     '''
     scraper = Scraper()
     scraper = scraper.get_scraper('www.amazon.com')
     item = scraper.get_item_info('http://www.amazon.com/gp/product/B002P8T0L0/ref=s9_simh_gw_p23_d0_g23_i1?pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0WQ1VFHRSY7ZTB93FGYG&pf_rd_t=101&pf_rd_p=470938631&pf_rd_i=507846','http://ecx.images-amazon.com/images/I/31hak2cSIOL.jpg')
     self.assertEqual(item.price, 75.99)
     self.assertEqual(item.currency_code, '$')
     self.assertEqual(item.user_interaction.likes, 42)
     self.assertEqual(item.quantity.new, 5)
     self.assertEqual(item.details.discount.value, 43.96)
示例#3
0
 def test_amazon_scraper(self):
     '''Test get_item_info for AmazonScraper
     '''
     scraper = Scraper()
     scraper = scraper.get_scraper('www.amazon.com')
     item = scraper.get_item_info(
         'http://www.amazon.com/gp/product/B002P8T0L0/ref=s9_simh_gw_p23_d0_g23_i1?pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0WQ1VFHRSY7ZTB93FGYG&pf_rd_t=101&pf_rd_p=470938631&pf_rd_i=507846',
         'http://ecx.images-amazon.com/images/I/31hak2cSIOL.jpg')
     self.assertEqual(item.price, 75.99)
     self.assertEqual(item.currency_code, '$')
     self.assertEqual(item.user_interaction.likes, 42)
     self.assertEqual(item.quantity.new, 5)
     self.assertEqual(item.details.discount.value, 43.96)
示例#4
0
def _pinscraperow(row, row_num):
    scraper = Scraper()
    url = row[0].strip()
    img_url = row[1].strip()
    dir_name = urllib.parse.quote_plus(url)
    mkdir(dir_name)
    download_image(img_url, dir_name)
    domain = get_domain(url)
    scraper = scraper.get_scraper(domain)
    if (scraper):
        print("Getting information from {0}... ".format(domain))
        content = scraper.get_item_info(url, img_url)
        if (content):
            json_dump_to_file('{0}/info.json'.format(dir_name), content)
        else:
            write_to_file('{0}/not_found.txt'.format(dir_name), 'w',
                          'The url at {0} was not found'.format(url))
        return True
    else:
        return domain
示例#5
0
    def setUpClass(cls):
        '''Sets up saved pages for all urls in URLS, and loads them to be easily accessible 
        for tests.
        The most important thing happening here is population of cls.rows. Here's what's 
        happening:
        - at the start, cls.rows contains only the item and image urls from the csv file
        - since my code will load a file from disk from each test case, I pre-load it into 
        cls.rows. The code appends after the second element a list of objects the test will 
        use (e.g. etsy listing object and etsy seller object)
        - finally, I append to that row the appropriate scraper object, this way I don't have 
        to keep calling constructors in my code, instead I can simply retrieve the last 
        element.

        Note: the only requirement here is that the writer of the test knows which row to use
        for each test
        '''
        cls.cur_dir = os.getcwd()
        reader = sopen(TEST_URLS)
        scraper = Scraper()
        chdir(DOWNLOAD_DIR)

        cls.rows = []
        for row in reader:
            row = row.split(",", 1)
            domain = get_domain(row[0])
            scraper = scraper.get_scraper(domain)
            if (REWRITE or exists(DOWNLOAD_DIR) is False):
                scraper.download(row[0])
            row.extend(scraper.load(row[0]))
            row.append(scraper)
            cls.rows.append(row)

        cls._test_is_set = True