def __init__(self, base_url, conf_urls={}, verbosity=1, output_dir=None, ascend=True, **kwargs): self.base_url = base_url self.conf_urls = conf_urls self.verbosity = verbosity self.ascend = ascend self.auth = kwargs.get('auth') if output_dir: assert os.path.isdir(output_dir) self.output_dir = os.path.realpath(output_dir) LOG.info("Output will be saved to %s" % self.output_dir) else: self.output_dir = None #These two are what keep track of what to crawl and what has been. self.not_crawled = [(0, 'START',self.base_url)] self.crawled = {} self.c = Client(REMOTE_ADDR='127.0.0.1') #login, and remember the user which was logged in if self.auth: self._login(self.auth) self.user = self.c.session['_auth_user_id'] self.plugins = [] for plug in Plugin.__subclasses__(): active = getattr(plug, 'active', True) if active: #TODO: Check if plugin supports writing CSV (or to a file in general?) self.plugins.append(plug())
def __init__(self, base_url, conf_urls={}, verbosity=1, output_dir=None, ascend=True, **kwargs): self.base_url = base_url self.conf_urls = conf_urls self.verbosity = verbosity self.ascend = ascend auth = kwargs.get('auth') if output_dir: assert os.path.isdir(output_dir) self.output_dir = os.path.realpath(output_dir) LOG.info("Output will be saved to %s" % self.output_dir) else: self.output_dir = None #These two are what keep track of what to crawl and what has been. self.not_crawled = [(0, 'START',self.base_url)] self.crawled = {} self.c = Client(REMOTE_ADDR='127.0.0.1') if auth: printable_auth = ', '.join( '%s: %s' % (key, cleanse_setting(key.upper(), value)) for key, value in auth.items()) LOG.info('Log in with %s' % printable_auth) self.c.login(**auth) self.plugins = [] for plug in Plugin.__subclasses__(): active = getattr(plug, 'active', True) if active: #TODO: Check if plugin supports writing CSV (or to a file in general?) self.plugins.append(plug())