Exemplo n.º 1
0
    def setUp(self):
        self.pages = {
            'lenarguile': get_file_handle('le-narguile.com.json'),
            'royaledeco': get_file_handle('royaledeco.com.json'),
            '10k00nt': get_file_handle('10k00nt.com.json')
        }

        data = [(None, value[1].encode()) for key, value in self.pages.items()]
        roots, scrape_data = init_data(data)
        self.s = Scraper(scrape_data)
Exemplo n.º 2
0
    def test_age(self):
        with self.assertRaises(ValueError):
            s = Scraper("any", "any", used=False, new=False, nearlyNew=False)
        used = Scraper("any", "any", used=True, new=False, nearlyNew=False)
        self.assertIn('onesearchad=Used', used.url)
        self.assertNotIn('onesearchad=New', used.url)
        self.assertNotIn('onesearchad=Nearly%20New', used.url)

        new = Scraper("any", "any", used=False, new=True, nearlyNew=False)
        self.assertIn('onesearchad=New', new.url)
        self.assertNotIn('onesearchad=Nearly%20New', new.url)
        self.assertNotIn('onesearchad=Used', new.url)

        nearlyNew = Scraper("any",
                            "any",
                            used=False,
                            new=False,
                            nearlyNew=True)
        self.assertIn('onesearchad=Nearly%20New', nearlyNew.url)
        self.assertNotIn('onesearchad=New', nearlyNew.url)
        self.assertNotIn('onesearchad=Used', nearlyNew.url)
Exemplo n.º 3
0
def main():
    scraper = Scraper(db_path='./products.db',
                      run_every=config.scraper['run_every'])

    products = scraper.get_products_list()
    if len(products) == 0:
        return False

    n_workers = config.scraper['workers']
    n_groups = int(math.ceil(float(len(products)) / float(n_workers)))

    groups = list(chunks(products, n_groups))
    workers = Queue.Queue()

    for i in range(n_workers):
        if i >= n_groups:
            break

        wp = WireProtocol(*os.pipe())
        pid = os.fork()

        if pid == 0:
            scraper.get_prices(wp, groups[i])
            sys.exit(0)

        workers.put((pid, wp))

    while not workers.empty():
        pid, wp = workers.get()
        scraper.save_prices(wp)

        try:
            os.waitpid(pid, 0)
        except OSError:
            pass

    sys.stdout.flush()
    sys.stderr.flush()

    return True
Exemplo n.º 4
0
def scrape():
    """Scrap Puzzles from Site
    """
    scraper = Scraper()
    scraper.scrape()