class PhantomJSMiddleware(object): #phantomJSService = PhantomJSService() def __init__(self): self.proxyFactory = HttpProxyFactory.getHttpProxyFactory() #self.proxy = self.proxyFactory.currentProxy self.phantomJSService = PhantomJSService() #logging.info('[PID:%s] PhantomJSMiddleware initialized with proxy:%s' %(os.getpid(),self.proxy)) pass # overwrite process request def process_request(self, request, spider): if request.meta.has_key('phantom'): logging.info('[PID:%s] PhantomJS Requesting: %s' %(os.getpid(),request.url)) if request.meta.has_key('proxy'): proxy = request.meta['proxy'] proxy = proxy.replace("http://","",1) content = self.phantomJSService.requestWithProxy(request.url,proxy) else: content = self.phantomJSService.requestByURL(request.url) if content is None or content.strip()=="" or content == '<html><head></head><body></body></html>':# logging.debug("[PID:%s] PhantomJS Request failed!" %os.getpid()) return HtmlResponse(request.url, encoding = 'utf-8', status = 503, body = '') else: logging.debug("[PID:%s]PhantomJS Request success!" %os.getpid()) return HtmlResponse(request.url, encoding = 'utf-8', status = 200, body = content)
def __init__(self): self.proxyFactory = HttpProxyFactory.getHttpProxyFactory() #self.proxy = self.proxyFactory.currentProxy self.phantomJSService = PhantomJSService() #logging.info('[PID:%s] PhantomJSMiddleware initialized with proxy:%s' %(os.getpid(),self.proxy)) pass