def __init__(self, name, spec, item_schemas, all_extractors, settings=None, **kw): self.start_url_generators = { 'start_urls': IdentityGenerator(), 'generated_urls': UrlGenerator(settings, kw), 'url': IdentityGenerator(), 'feed': FeedGenerator(self.parse), 'generated': FragmentGenerator(), } self.generic_form = GenericForm(**kw) super(IblSpider, self).__init__(name, **kw) spec = deepcopy(spec) self._add_spider_args_to_spec(spec, kw) self._configure_js(spec, settings) self.plugins = self._configure_plugins(settings, spec, item_schemas, all_extractors) self.login_requests, self.form_requests = [], [] self._start_urls = self._create_start_urls(spec) self._start_requests = self._create_start_requests(spec) self._create_init_requests(spec) self._add_allowed_domains(spec) self.page_actions = spec.get('page_actions', [])
def setUp(self): self.generators = { 'start_urls': IdentityGenerator(), 'generated_urls': UrlGenerator(), 'url': IdentityGenerator(), 'generated': FragmentGenerator(), }
def __init__(self, name, spec, item_schemas, all_extractors, settings=None, **kw): self.start_url_generators = { 'start_urls': IdentityGenerator(), 'generated_urls': UrlGenerator(settings, kw), 'url': IdentityGenerator(), 'feed': FeedGenerator(self.parse), 'generated': FragmentGenerator(), } self.generic_form = GenericForm(**kw) super(IblSpider, self).__init__(name, **kw) spec = deepcopy(spec) self._add_spider_args_to_spec(spec, kw) #if actions configured, then set js_enabled as true, and put url of each action to js_enable_patterns. self.actions = spec.get('actions', []) ''' if len(self.actions): spec['js_enabled']= True enable_patterns = spec.get('js_enable_patterns', [] ) for action in self.actions: enable_patterns.append(action.get('url')) spec['js_enable_patterns']= enable_patterns ''' self._configure_js(spec, settings) self.plugins = self._configure_plugins( settings, spec, item_schemas, all_extractors) self.login_requests, self.form_requests = [], [] self._start_urls = self._create_start_urls(spec) self._start_requests = self._create_start_requests(spec) self._create_init_requests(spec) self._add_allowed_domains(spec) self.page_actions = spec.get('page_actions', [])
def test_start_urls(self): self.assertEqual(self.github_start_urls, IdentityGenerator()(self.github_start_urls))