def __init__(self, scraper, pool=None): self.scraper = scraper self._pool = pool self._agents = {} #map proxy->an agent redirectLimit = scraper.config.get('max_redirects') if redirectLimit is None: redirectLimit = 3 #create an agent for direct requests self._direct_agent = Agent(reactor, pool=self._pool, connectTimeout=scraper.config.get('timeout') or 30) if redirectLimit>0: self._direct_agent = BrowserLikeRedirectAgent(self._direct_agent, redirectLimit=redirectLimit) self._direct_agent = ContentDecoderAgent(self._direct_agent, [('gzip', GzipDecoder)]) self.cj = self.scraper.client.opener.cj if self.cj is not None: self._direct_agent = CookieAgent(self._direct_agent, self.cj) #create an agent for http-proxy requests #no endpoint yet, use __ instead of _ to backup the instance self.__http_proxy_agent = ProxyAgent(None, pool=self._pool) if redirectLimit>0: self._http_proxy_agent = BrowserLikeRedirectAgent(self.__http_proxy_agent, redirectLimit=redirectLimit) self._http_proxy_agent = ContentDecoderAgent(self._http_proxy_agent, [('gzip', GzipDecoder)]) else: self._http_proxy_agent = ContentDecoderAgent(self.__http_proxy_agent, [('gzip', GzipDecoder)]) if self.cj is not None: self._http_proxy_agent = CookieAgent(self._http_proxy_agent, self.cj) #create an agent for https-proxy requests #no endpoint yet, use __ instead of _ to backup the instance self.__https_proxy_agent = TunnelingAgent(reactor=reactor, proxy=None, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool) #no proxy yet if redirectLimit>0: self._https_proxy_agent = BrowserLikeRedirectAgent(self.__https_proxy_agent, redirectLimit=redirectLimit) self._https_proxy_agent = ContentDecoderAgent(self._https_proxy_agent, [('gzip', GzipDecoder)]) else: self._https_proxy_agent = ContentDecoderAgent(self.__https_proxy_agent, [('gzip', GzipDecoder)]) if self.cj is not None: self._https_proxy_agent = CookieAgent(self._https_proxy_agent, self.cj)
def startRequest(self, request, url, feed_config = None, selector_defer=None, sanitize=False): downloader = self.downloadercls(self.feed, self.debug, self.snapshot_dir, self.stat_tool, self.memon, request=request, url=url, feed_config=feed_config, selector_defer=selector_defer, sanitize=sanitize, max_size=self.max_size) sresponse = self.tryLocalPage(url) if sresponse: if selector_defer: reactor.callLater(0, selector_defer.callback, sresponse) else: downloader.writeResponse(request, sresponse, feed_config) else: agent = BrowserLikeRedirectAgent( Agent(reactor, contextFactory=ScrapyClientContextFactory(), # skip certificate verification connectTimeout=10), #pool=pool), redirectLimit=5 ) d = agent.request( 'GET', url, twisted_headers({ 'Accept': ['text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'], 'Accept-Encoding': ['gzip, deflate, sdch'], 'User-Agent': [self.user_agent] }), None ) print('Request <GET %s> started' % (url,)) d.addCallback(downloader.downloadStarted) d.addErrback(downloader.downloadError)
def auth(): predix_client_id = PREDIX_CONFIG.uaa_client_id predix_client_secret = PREDIX_CONFIG.uaa_client_secret predix_uaa_endpoint = "{0}/oauth/token".format(PREDIX_CONFIG.uaa_url) agent = Agent(reactor) client = BrowserLikeRedirectAgent(agent) headers = Headers({"Content-Type": ["application/x-www-form-urlencoded"]}) body = WebFormProducer({ "grant_type": "client_credentials", "client_id": predix_client_id, "client_secret": predix_client_secret, "response_type": "token" }) def handle_response(response): if response.code != 200: raise IOError( "Predix UAA token request failed. Check Predix config values.") return readBody(response) def handle_body(body): data = deserialize_json(body) token = data["access_token"] return token return client \ .request("POST", predix_uaa_endpoint, headers, body) \ .addCallback(handle_response) \ .addCallback(handle_body)
def __init__(self, hs): SimpleHttpClient.__init__(self, hs) # clobber the base class's agent and UA: self.agent = ContentDecoderAgent( BrowserLikeRedirectAgent( Agent.usingEndpointFactory(reactor, SpiderEndpointFactory(hs))), [(b'gzip', GzipDecoder)])
def __init__(self, session, windowTitle=_("TwitchTV")): Screen.__init__(self, session, windowTitle=windowTitle) self.skinName = "TwitchStreamGrid" self["actions"] = ActionMap(["OkCancelActions", "ColorActions"], { "ok": self._onOk, "cancel": self.close, "red": self._onRed, "green": self._onGreen, "yellow" : self._onYellow, "blue": self._onBlue, }, -1) self["key_red"] = StaticText() self["key_green"] = StaticText() self["key_blue"] = StaticText() self["key_yellow"] = StaticText() self._setupButtons() sizes = componentSizes[TwitchStreamGrid.SKIN_COMPONENT_KEY] self._itemWidth = sizes.get(ComponentSizes.ITEM_WIDTH, 280) self._itemHeight = sizes.get(ComponentSizes.ITEM_HEIGHT, 162) self._bannerHeight = sizes.get(TwitchStreamGrid.SKIN_COMPONENT_HEADER_HEIGHT, 30) self._footerHeight = sizes.get(TwitchStreamGrid.SKIN_COMPONENT_FOOTER_HEIGHT, 60) self._itemPadding = sizes.get(TwitchStreamGrid.SKIN_COMPONENT_ITEM_PADDING, 5) #one-off calculations pad = self._itemPadding * 2 self._contentWidth = self._itemWidth - pad self._contentHeight = self._itemHeight - pad self._footerOffset = self._itemHeight - self._itemPadding - self._footerHeight self._items = [] self._list = MenuList(self._items, mode=eListbox.layoutGrid, content=eListboxPythonMultiContent, itemWidth=self._itemWidth, itemHeight=self._itemHeight) self["list"] = self._list tlf = TemplatedListFonts() self._list.l.setFont(0, gFont(tlf.face(tlf.MEDIUM), tlf.size(tlf.MEDIUM))) self._list.l.setFont(1, gFont(tlf.face(tlf.SMALLER), tlf.size(tlf.SMALL))) self._list.l.setBuildFunc(self._buildFunc, True) self.twitch = Twitch() self.twitchMiddleware = TwitchMiddleware.instance self._picload = ePicLoad() self._picload.setPara((self._itemWidth, self._itemHeight, self._itemWidth, self._itemHeight, False, 0, '#000000')) self._picload_conn = self._picload.PictureData.connect(self._onDefaultPixmapReady) agent = Agent(reactor, contextFactory=TLSSNIContextFactory(), pool=HTTPConnectionPool(reactor)) self._agent = BrowserLikeRedirectAgent(agent) self._cachingDeferred = None self._loadDefaultPixmap() self._pixmapCache = {} self._currentEntry = 0 self._endEntry = 0 self.onLayoutFinish.append(self._onLayoutFinish) self.onClose.append(self.__onClose)
def _create_agent(self, req): """ create right agent for specific request """ agent = None uri = URI.fromBytes(req.url) proxy = req.get('proxy') if req.get('use_proxy') is False: proxy = None if proxy: if uri.scheme == 'https': agent_key = 'httpsproxy-%s-%s' % (proxy.host, proxy.port) agent = self._agents.get(agent_key) if not agent: agent = TunnelingAgent(reactor=reactor, proxy=proxy, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool) self._agents[agent_key] = agent else: #http agent_key = 'httpproxy-%s-%s' % (proxy.host, proxy.port) agent = self._agents.get(agent_key) if not agent: endpoint = TCP4ClientEndpoint(reactor, host=proxy.host, port=proxy.port , timeout=req.get('timeout')) agent = ProxyAgent(endpoint, pool=self._pool) self._agents[agent_key] = agent if proxy.auth_header: req.get('headers')['Proxy-Authorization'] = proxy.auth_header else: agent = self._direct_agent #use single agent when no proxies used redirectLimit = self.scraper.config.get('max_redirects') if redirectLimit is None: redirectLimit = 3 if redirectLimit>0: agent = BrowserLikeRedirectAgent(agent, redirectLimit=redirectLimit) agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)]) if self.cj is not None: agent = CookieAgent(agent, self.cj) return agent
def url_get_data_async(url, callback=None, data=None, headers=None, timeout=60): def handle_failure(failure): failure.printTraceback() callback(None) def handle_result(data): callback(data) assert data is None, "sorry data is currently not supported" if headers is not None: headers = {k:[v] for k,v in headers.items()} agent = BrowserLikeRedirectAgent(Agent(reactor, connectTimeout=timeout)) d = agent.request('GET', url, Headers(headers)) d.addCallback(readBody) if callback is not None: d.addCallbacks(handle_result, handle_failure) return d
def _request(self, method, url, headers, success, unconditional=None): """Sends an HTTP request. Args: method (str): 'SUBSCRIBE' or 'UNSUBSCRIBE'. url (str): The full endpoint to which the request is being sent. headers (dict): A dict of headers, each key and each value being of type `str`. success (function): A function to be called if the request succeeds. The function will be called with a dict of response headers as its only parameter. unconditional (function): An optional function to be called after the request is complete, regardless of its success. Takes no parameters. """ agent = BrowserLikeRedirectAgent(Agent(reactor)) if headers: for k in headers.keys(): header = headers[k] del headers[k] if isinstance(header, (list, )): header = header[0] if not isinstance(header, (bytes, bytearray)): header = header.encode("latin-1") k = k.encode("latin-1") headers[k] = [header] args = (method.encode("latin-1"), url.encode("latin-1"), Headers(headers)) d = agent.request(*args) # pylint: disable=invalid-name def on_success(response): # pylint: disable=missing-docstring response_headers = {} for header in response.headers.getAllRawHeaders(): decoded_key = header[0].decode("utf8").lower() decoded_header = header[1][0].decode("utf8") response_headers[decoded_key] = decoded_header success(response_headers) return self d.addCallback(on_success) if unconditional: d.addBoth(unconditional) return d
def push(message): sap_mms_endpoint = SAP_CONFIG.mms_endpoint sap_device_id = SAP_CONFIG.device_id sap_message_type_id = SAP_CONFIG.message_type_id sap_oauth_token = SAP_CONFIG.oauth_token sap_data_endpoint = "{0}/{1}".format(sap_mms_endpoint, sap_device_id) print("SAP MMS data endpoint: {0}".format(sap_data_endpoint)) agent = Agent(reactor) client = BrowserLikeRedirectAgent(agent) headers = Headers({ "Authorization": ["Bearer {0}".format(sap_oauth_token)], "Content-Type": ["application/json;charset=utf-8"] }) body = { "mode": "sync", "messageType": sap_message_type_id, "messages": [{ "sensor": "IntelHowToSample", "value": serialize_json(message), "timestamp": int(time()) }] } print("SAP MMS payload: {0}".format(serialize_json(body))) def handle_response(response): if response.code == 200: print("SAP MMS publish succeeded.") else: print("SAP Error response: {0}".format(response.code)) client.request("POST", sap_data_endpoint, headers, JsonProducer(body)) \ .addCallback(lambda x: print("Published to SAP MMS service.")).addErrback(lambda x: print("SAP publish error:", x))
def request( self, method, url, *, params=None, headers=None, data=None, files=None, json=_NOTHING, auth=None, cookies=None, allow_redirects=True, browser_like_redirects=False, unbuffered=False, reactor=None, timeout=None, _stacklevel=2, ): """ See :func:`treq.request()`. """ method = method.encode('ascii').upper() if isinstance(url, DecodedURL): parsed_url = url.encoded_url elif isinstance(url, EncodedURL): parsed_url = url elif isinstance(url, str): # We use hyperlink in lazy mode so that users can pass arbitrary # bytes in the path and querystring. parsed_url = EncodedURL.from_text(url) else: parsed_url = EncodedURL.from_text(url.decode('ascii')) # Join parameters provided in the URL # and the ones passed as argument. if params: parsed_url = parsed_url.replace( query=parsed_url.query + tuple(_coerced_query_params(params))) url = parsed_url.to_uri().to_text().encode('ascii') headers = self._request_headers(headers, _stacklevel + 1) bodyProducer, contentType = self._request_body(data, files, json, stacklevel=_stacklevel + 1) if contentType is not None: headers.setRawHeaders(b'Content-Type', [contentType]) if not isinstance(cookies, CookieJar): cookies = cookiejar_from_dict(cookies) cookies = merge_cookies(self._cookiejar, cookies) wrapped_agent = CookieAgent(self._agent, cookies) if allow_redirects: if browser_like_redirects: wrapped_agent = BrowserLikeRedirectAgent(wrapped_agent) else: wrapped_agent = RedirectAgent(wrapped_agent) wrapped_agent = ContentDecoderAgent(wrapped_agent, [(b'gzip', GzipDecoder)]) if auth: wrapped_agent = add_auth(wrapped_agent, auth) d = wrapped_agent.request(method, url, headers=headers, bodyProducer=bodyProducer) if reactor is None: from twisted.internet import reactor if timeout: delayedCall = reactor.callLater(timeout, d.cancel) def gotResult(result): if delayedCall.active(): delayedCall.cancel() return result d.addBoth(gotResult) if not unbuffered: d.addCallback(_BufferedResponse) return d.addCallback(_Response, cookies)
def request(self, method, url, **kwargs): """ See :func:`treq.request()`. """ method = method.encode('ascii').upper() if isinstance(url, unicode): parsed_url = URL.from_text(url) else: parsed_url = URL.from_text(url.decode('ascii')) # Join parameters provided in the URL # and the ones passed as argument. params = kwargs.get('params') if params: parsed_url = parsed_url.replace( query=parsed_url.query + tuple(_coerced_query_params(params))) url = parsed_url.to_uri().to_text().encode('ascii') # Convert headers dictionary to # twisted raw headers format. headers = kwargs.get('headers') if headers: if isinstance(headers, dict): h = Headers({}) for k, v in headers.items(): if isinstance(v, (bytes, unicode)): h.addRawHeader(k, v) elif isinstance(v, list): h.setRawHeaders(k, v) headers = h else: headers = Headers({}) # Here we choose a right producer # based on the parameters passed in. bodyProducer = None data = kwargs.get('data') files = kwargs.get('files') # since json=None needs to be serialized as 'null', we need to # explicitly check kwargs for this key has_json = 'json' in kwargs if files: # If the files keyword is present we will issue a # multipart/form-data request as it suits better for cases # with files and/or large objects. files = list(_convert_files(files)) boundary = str(uuid.uuid4()).encode('ascii') headers.setRawHeaders( b'content-type', [b'multipart/form-data; boundary=' + boundary]) if data: data = _convert_params(data) else: data = [] bodyProducer = multipart.MultiPartProducer(data + files, boundary=boundary) elif data: # Otherwise stick to x-www-form-urlencoded format # as it's generally faster for smaller requests. if isinstance(data, (dict, list, tuple)): headers.setRawHeaders(b'content-type', [b'application/x-www-form-urlencoded']) data = urlencode(data, doseq=True) bodyProducer = self._data_to_body_producer(data) elif has_json: # If data is sent as json, set Content-Type as 'application/json' headers.setRawHeaders(b'content-type', [b'application/json; charset=UTF-8']) content = kwargs['json'] json = json_dumps(content, separators=(u',', u':')).encode('utf-8') bodyProducer = self._data_to_body_producer(json) cookies = kwargs.get('cookies', {}) if not isinstance(cookies, CookieJar): cookies = cookiejar_from_dict(cookies) cookies = merge_cookies(self._cookiejar, cookies) wrapped_agent = CookieAgent(self._agent, cookies) if kwargs.get('allow_redirects', True): if kwargs.get('browser_like_redirects', False): wrapped_agent = BrowserLikeRedirectAgent(wrapped_agent) else: wrapped_agent = RedirectAgent(wrapped_agent) wrapped_agent = ContentDecoderAgent(wrapped_agent, [(b'gzip', GzipDecoder)]) auth = kwargs.get('auth') if auth: wrapped_agent = add_auth(wrapped_agent, auth) d = wrapped_agent.request(method, url, headers=headers, bodyProducer=bodyProducer) timeout = kwargs.get('timeout') if timeout: delayedCall = default_reactor(kwargs.get('reactor')).callLater( timeout, d.cancel) def gotResult(result): if delayedCall.active(): delayedCall.cancel() return result d.addBoth(gotResult) if not kwargs.get('unbuffered', False): d.addCallback(_BufferedResponse) return d.addCallback(_Response, cookies)
r.set(url, int(time.time())) return 0 GC_PERIOD_SECONDS = 3 * 60 * 60 # 3 hours def periodical_garbage_collect(): tm = int(time.time()) if tm - periodical_garbage_collect.time >= GC_PERIOD_SECONDS: print('GC: the number of unreachable objects: %s' % gc.collect()) periodical_garbage_collect.time = tm periodical_garbage_collect.time = int(time.time()) agent = BrowserLikeRedirectAgent( Agent(reactor, contextFactory=ScrapyClientContextFactory(), # skip certificate verification connectTimeout=10), redirectLimit=5 ) def html2json(el): return [ el.tag, {"tag-id": el.attrib["tag-id"]}, [html2json(e) for e in el.getchildren() if isinstance(e, etree.ElementBase)] ] def setBaseAndRemoveScriptsAndMore(response, url): response.selector.remove_namespaces() tree = response.selector.root.getroottree()
def request(self, method, url, **kwargs): """ See :func:`treq.request()`. """ method = method.encode('ascii').upper() stacklevel = kwargs.pop('_stacklevel', 2) if isinstance(url, DecodedURL): parsed_url = url elif isinstance(url, EncodedURL): parsed_url = DecodedURL(url) elif isinstance(url, six.text_type): parsed_url = DecodedURL.from_text(url) else: parsed_url = DecodedURL.from_text(url.decode('ascii')) # Join parameters provided in the URL # and the ones passed as argument. params = kwargs.pop('params', None) if params: parsed_url = parsed_url.replace( query=parsed_url.query + tuple(_coerced_query_params(params))) url = parsed_url.to_uri().to_text().encode('ascii') # Convert headers dictionary to # twisted raw headers format. headers = kwargs.pop('headers', None) if headers: if isinstance(headers, dict): h = Headers({}) for k, v in headers.items(): if isinstance(v, (bytes, six.text_type)): h.addRawHeader(k, v) elif isinstance(v, list): h.setRawHeaders(k, v) headers = h else: headers = Headers({}) bodyProducer, contentType = self._request_body( data=kwargs.pop('data', None), files=kwargs.pop('files', None), json=kwargs.pop('json', _NOTHING), stacklevel=stacklevel, ) if contentType is not None: headers.setRawHeaders(b'Content-Type', [contentType]) cookies = kwargs.pop('cookies', {}) if not isinstance(cookies, CookieJar): cookies = cookiejar_from_dict(cookies) cookies = merge_cookies(self._cookiejar, cookies) wrapped_agent = CookieAgent(self._agent, cookies) browser_like_redirects = kwargs.pop('browser_like_redirects', False) if kwargs.pop('allow_redirects', True): if browser_like_redirects: wrapped_agent = BrowserLikeRedirectAgent(wrapped_agent) else: wrapped_agent = RedirectAgent(wrapped_agent) wrapped_agent = ContentDecoderAgent(wrapped_agent, [(b'gzip', GzipDecoder)]) auth = kwargs.pop('auth', None) if auth: wrapped_agent = add_auth(wrapped_agent, auth) d = wrapped_agent.request(method, url, headers=headers, bodyProducer=bodyProducer) reactor = kwargs.pop('reactor', None) if reactor is None: from twisted.internet import reactor timeout = kwargs.pop('timeout', None) if timeout: delayedCall = reactor.callLater(timeout, d.cancel) def gotResult(result): if delayedCall.active(): delayedCall.cancel() return result d.addBoth(gotResult) if not kwargs.pop('unbuffered', False): d.addCallback(_BufferedResponse) if kwargs: warnings.warn( ("Got unexpected keyword argument: {}." " treq will ignore this argument," " but will raise TypeError in the next treq release.").format( ", ".join(repr(k) for k in kwargs)), DeprecationWarning, stacklevel=stacklevel, ) return d.addCallback(_Response, cookies)
def __init__(self): self._agent = BrowserLikeRedirectAgent(Agent(reactor))
def request(self, method, url, **kwargs): method = method.encode('ascii').upper() # Join parameters provided in the URL # and the ones passed as argument. params = kwargs.get('params') if params: url = _combine_query_params(url, params) if isinstance(url, unicode): url = URL.fromText(url).asURI().asText().encode('ascii') # Convert headers dictionary to # twisted raw headers format. headers = kwargs.get('headers') if headers: if isinstance(headers, dict): h = Headers({}) for k, v in headers.items(): if isinstance(k, unicode): k = k.encode('ascii') if isinstance(v, bytes): h.addRawHeader(k, v) elif isinstance(v, unicode): h.addRawHeader(k, v.encode('ascii')) elif isinstance(v, list): cleanHeaders = [] for item in v: if isinstance(item, unicode): cleanHeaders.append(item.encode('ascii')) else: cleanHeaders.append(item) h.setRawHeaders(k, cleanHeaders) else: h.setRawHeaders(k, v) headers = h else: headers = Headers({}) # Here we choose a right producer # based on the parameters passed in. bodyProducer = None data = kwargs.get('data') files = kwargs.get('files') if files: # If the files keyword is present we will issue a # multipart/form-data request as it suits better for cases # with files and/or large objects. files = list(_convert_files(files)) boundary = str(uuid.uuid4()).encode('ascii') headers.setRawHeaders( b'content-type', [b'multipart/form-data; boundary=' + boundary]) if data: data = _convert_params(data) else: data = [] bodyProducer = multipart.MultiPartProducer(data + files, boundary=boundary) elif data: # Otherwise stick to x-www-form-urlencoded format # as it's generally faster for smaller requests. if isinstance(data, (dict, list, tuple)): headers.setRawHeaders(b'content-type', [b'application/x-www-form-urlencoded']) data = urlencode(data, doseq=True) bodyProducer = self._data_to_body_producer(data) cookies = kwargs.get('cookies', {}) if not isinstance(cookies, CookieJar): cookies = cookiejar_from_dict(cookies) cookies = merge_cookies(self._cookiejar, cookies) wrapped_agent = CookieAgent(self._agent, cookies) if kwargs.get('allow_redirects', True): if kwargs.get('browser_like_redirects', False): wrapped_agent = BrowserLikeRedirectAgent(wrapped_agent) else: wrapped_agent = RedirectAgent(wrapped_agent) wrapped_agent = ContentDecoderAgent(wrapped_agent, [(b'gzip', GzipDecoder)]) auth = kwargs.get('auth') if auth: wrapped_agent = add_auth(wrapped_agent, auth) d = wrapped_agent.request(method, url, headers=headers, bodyProducer=bodyProducer) timeout = kwargs.get('timeout') if timeout: delayedCall = default_reactor(kwargs.get('reactor')).callLater( timeout, d.cancel) def gotResult(result): if delayedCall.active(): delayedCall.cancel() return result d.addBoth(gotResult) if not kwargs.get('unbuffered', False): d.addCallback(_BufferedResponse) return d.addCallback(_Response, cookies)