Пример #1
0
    def spider_closed(self, spider, reason):
        """
        When the spider closes then
        store the stats(start time, end time, items scraped,
        pages crawled) into the database for each scraper.
        """
        items_scraped_count = 0
        start_time = self.stats._stats['start_time']
        finish_time = self.stats._stats['finish_time']
        if 'item_scraped_count' in self.stats._stats:
            items_scraped_count = self.stats._stats['item_scraped_count']
        spider_name = spider.name
        pages_crawled_count = self.stats._stats[
            'downloader/request_method_count/GET']

        # add the scrapy stats to DB
        stats = ScrapyStats(scrapername=spider_name,
                            start_time=start_time,
                            finish_time=finish_time,
                            items_scraped=items_scraped_count,
                            pages_crawled=pages_crawled_count,
                            servername=SERVER_NAME)
        add_stats(stats)

        # update column to db when it stops running
        spider_ = get_spider(spider_name)
        spider_.is_running = 0
        print spider_
        db.session.commit()
Пример #2
0
def start_crawler(spider_name):
    item = get_spider(spider_name)
    spider_location = 'spiders'+ '.'  + \
                      '.'.join([item.spidercls, item.spidercls])
    spider = load_object(spider_location)
    settings = get_spider_settings()
    crawler = create_crawler_object(spider(), settings)
    crawler.start()
Пример #3
0
def start_crawler(spider_name):
    item = get_spider(spider_name)
    spider_location = 'spiders'+ '.'  + \
                      '.'.join([item.spidercls, item.spidercls])
    spider = load_object(spider_location)
    settings = get_spider_settings()
    crawler = create_crawler_object(spider(), settings)
    crawler.start()
Пример #4
0
def update_spider_post():
    """
    Upload file to spiders/ directory and save the mapping
    in the spider_mapping file
    """
    spider_name = request.form['spider-name']
    spider_desc = request.form['spider-desc']
    spider_cls = request.form['spider-cls']

    # add spider to db
    spider = get_spider(spider_name)
    spider.name = spider_name
    spider.description = spider_desc
    spider.spidercls = spider_cls
    db.session.commit()

    return redirect(url_for('spiders_bp.list_spiders'))
Пример #5
0
def update_spider_post():
    """
    Upload file to spiders/ directory and save the mapping
    in the spider_mapping file
    """
    spider_name = request.form['spider-name']
    spider_desc = request.form['spider-desc']
    spider_cls = request.form['spider-cls']

    # add spider to db
    spider = get_spider(spider_name)
    spider.name = spider_name
    spider.description = spider_desc
    spider.spidercls = spider_cls
    db.session.commit()

    return redirect(url_for('spiders_bp.list_spiders'))
Пример #6
0
def update_spider_form(spider_name):
    spider = get_spider(spider_name)
    return render_template('add-spider.html', update=True, spider=spider)
Пример #7
0
def update_spider_form(spider_name):
    spider = get_spider(spider_name)
    return render_template('add-spider.html', update=True, spider=spider)
Пример #8
0
 def spider_opened(self, spider):
     spider_ = get_spider(spider.name)
     spider_.is_running = 1
     db.session.commit()