def next_request(self): block_pop_timeout = self.idle_before_close item = self.queue.pop(block_pop_timeout) if item: try: req = Request(item['url']) except ValueError: # need absolute url # need better url validation here req = Request(item['url']) try: if 'callback' in item and item['callback'] is not None: req.callback = getattr(self.spider, item['callback']) except AttributeError: print 'kk' try: if 'errback' in item and item['errback'] is not None: req.errback = getattr(self.spider, item['errback']) except AttributeError: print 'kk' # defaults not in schema if 'curdepth' not in item: item['curdepth'] = 0 if "retry_times" not in item: item['retry_times'] = 0 req.meta['field_css'] = item['meta'] if 'item' in item['meta']: req.meta['item'] = item['meta']['item'] if 'field_css' in item['meta']: req.meta['field_css'] = item['meta']['field_css'] # extra check to add items to request if 'useragent' in item and item['useragent'] is not None: req.headers['User-Agent'] = item['useragent'] return req