示例#1
0
def main():
    logging.info('Initializing Scrapers')
    initialized_scrapers = []
    for name, ad in settings.ads.items():
        try:
            scraperClass = scrapers.get_scraper(ad["url"])
        except ValueError as e:
            #If the url is not compatible with any scraper
            logging.warning(e)

        scraper = scraperClass(ad["url"], **ad["filters"])
        initialized_scrapers.append(scraper)

    logging.info('Starting Loop')
    while True:
        ads = []
        for scraper in initialized_scrapers:
            ads += scraper.scrape()

        if mailer.mail_ads(ads):  #Only dump if mail is actually sent
            for scraper in initialized_scrapers:
                scraper.dump_ids()

        logging.info('Hinernating for {} seconds'.format(
            str(settings.SLEEP_SECONDS)))

        sleep(settings.SLEEP_SECONDS)
示例#2
0
def fetch_resource(resource_type):
    args = Args(resource_type)
    db_manager = DbManager(args)
    scraper = get_scraper(args)
    if args.refetch:
        db_manager.delete_resource(args.db_key)
    if not db_manager.resource_exists(args.db_key):
        resource_data = scraper.get_resource(args.query_params)
        if scraper.driver:
            scraper.driver.quit()
        db_manager.save_resource(args.db_key, resource_data)
    return db_manager.fetch_resource(args.db_key)
示例#3
0
    def get(self, provider):
        scraper_cls = get_scraper(provider)
        if not scraper_cls:
            self.set_status(404)
            self.write({
                "error": "Unkown provider",
            })
            return

        scraper = scraper_cls()
        results = yield scraper.run()
        self.write({
            "results": [r.serialize() for r in results],
        })
示例#4
0
    def get(self, provider):
        scraper_cls = get_scraper(provider)
        if not scraper_cls:
            self.set_status(400)
            self.write({
                "error": "Unkown provider",
            })
            return

        scraper = scraper_cls()
        results = yield scraper.run()

        self.write({
            "results": results,
        })
示例#5
0
def check_availability(item, logger=None):
    stock = None
    price = None

    r = requests.get(item["url"], headers={'User-Agent': 'Mozilla/5.0'})
    if r.status_code != 200:
        if logger:
            logging.warning("Got {} status code in {}".format(
                r.status_code, item["url"]))

        return stock, price

    webpage = r.text
    soup = BeautifulSoup(webpage, 'html.parser')

    scraper = get_scraper(item["store"])
    stock, price = scraper(soup)

    return stock, price