def __init__(self, *args, **kwargs):
        super(MarketSpider, self).__init__(*args, **kwargs)
        self._baseclass = MarketSpider

        self.configure_request_sharing()
        db.init(dbsettings)

        if not hasattr(self, 'request_queue_chunk'):
            self.request_queue_chunk = 100

        if 'dao' in kwargs:
            self.dao = kwargs['dao']
        else:
            self.dao = self.make_dao()

        self.set_timezone()

        try:
            self.market = Market.get(spider=self.name)
        except:
            raise Exception(
                "No market entry exist in the database for spider %s" %
                self.name)

        if not hasattr(
                self._baseclass,
                '_cache_preloaded') or not self._baseclass._cache_preloaded:
            self.dao.cache.reload(User, User.market == self.market)
            self._baseclass._cache_preloaded = True

        self.register_new_scrape()
        self.start_statistics()

        self.manual_input = None
        self.request_after_manual_input = None
Пример #2
0
	def __init__(self, *args, **kwargs):
		super(ForumSpider, self).__init__( *args, **kwargs)
		self._baseclass = ForumSpider

		self.configure_request_sharing()

		db.init(dbsettings)
		if 'dao' in kwargs:
			self.dao = kwargs['dao']
		else:
			self.dao = self.make_dao()
						

		self.set_timezone()

		try:
			self.forum = Forum.get(spider=self.name)
		except:
			raise Exception("No forum entry exist in the database for spider " + self.name)

		if not hasattr(self._baseclass, '_cache_preloaded') or not self._baseclass._cache_preloaded:
			self.dao.cache.reload(User, User.forum == self.forum)
			self.dao.cache.reload(Thread, Thread.forum == self.forum)
			self._baseclass._cache_preloaded = True

		self.request_after_manual_input = None
		self.register_new_scrape()
		self.start_statistics()
		self.manual_input = None
Пример #3
0
	def setUp(self):
		configure_logging(install_root_handler=False)
		logging.basicConfig(
		    format='%(levelname)s: %(message)s',
		    level=logging.INFO
		)
		self.spider = MockedSpider();

		self.dao = DatabaseDAO('markets')
		db.init(dbsetting)
Пример #4
0
 def setUp(self):
     db.init(dbsetting)
					if not issubclass(spcls, ForumSpider):
						raise Exception('Spider %s is not a Forum Spider. Please use the right script for your spider.' % spider_name)

if __name__ == '__main__':
	
	parser = argparse.ArgumentParser()

	parser.add_argument('--spider',  required=True, help='The spider name to launch')
	parser.add_argument('--instances', default=1, type=int, help='Number of instance of the spider to launch')
	parser.add_argument('--login', nargs='*', help='List of logins to use by the spider. Each item represent to name of the key in the spider settings file.')
	parser.add_argument('--mode', choices=['crawl', 'replay'], default='crawl', help='Select the crawl mode. When "crawl", download all pages from target website. When "replay", uses the downlaoded response in the HTTP cache.')

	args = parser.parse_args()
	settings = get_project_settings()
	assert_good_spider_type(settings, args.spider)
	db.init(dbsettings);

	
	settings.set('login', args.login)	# List of allowed login to use
	settings.set('MODE', args.mode)		# replay : use filesystem cache to read response

	crawlerprocess = CrawlerProcess(settings)
	dbprocess = start_dbprocess()	# Create an Process entry in the database. We'll pass this object to the spider so we knows they have been launched together.

	spider_attributes = {
			'process' : dbprocess,
			'dao' : ForumSpider.make_dao()	# DAO is shared between spider.
		}

	for i in range(0,args.instances):
		crawlerprocess.crawl(args.spider, **spider_attributes)
 def __init__(self, *args, **kwargs):
     super(ChangerateSpider, self).__init__(*args, **kwargs)
     db.init(dbsettings)
     self.download_delay = 60 * 60
     self.max_concurrent_requests = 1