示例#1
0
def pastie(data, prefix="", postfix="", user="******", lang="text", private="true", password=None):
    data = {
        'paste_user': user,
        'paste_data': data,
        'paste_lang': lang,
        'api_submit': 'true',
        'mode': 'json',
        'paste_private': private
    }
    
    if password:
        data['paste_password'] = password
    
    headers = {
        'User-agent': ['Mozilla/5.0',],
        'Content-type': ['application/x-www-form-urlencoded',],
    }
    
    agent = RedirectAgent(Agent(reactor))
    headers = Headers(headers)
    datz = urllib.urlencode(data)
    
    d = agent.request('POST', "http://paste.thezomg.com/", headers=headers, bodyProducer=StringProducer(datz))
    
    def cbRequest(response):
        finished = Deferred()
        response.deliverBody(DeferredPrinter(finished, prefix, postfix))
        return finished
    
    d.addCallback(cbRequest)
    
    return d
示例#2
0
    def connect_to_tracker(self):
        # create the HTTP GET message
        # Note: some trackers have strange URLs, e.g.,
        #       http://moviezone.ws/announce.php?passkey=8ae51c4b47d3e7d0774a720fa511cc2a
        #       which has some sort of 'key' as parameter, so we need to use the add_url_params
        #       utility function to handle such cases.

        url = add_url_params("http://%s:%s%s" %
                             (self._tracker_address[0], self._tracker_address[1],
                              self._announce_page.replace(u'announce', u'scrape')),
                             {"info_hash": self._infohash_list})

        # no more requests can be appended to this session
        self._is_initiated = True
        self._last_contact = int(time.time())

        agent = RedirectAgent(Agent(reactor, connectTimeout=self.timeout, pool=self._connection_pool))
        try:
            self.request = self.register_task("request", agent.request('GET', bytes(url)))
            self.request.addCallback(self.on_response)
            self.request.addErrback(self.on_error)
            self._logger.debug(u"%s HTTP SCRAPE message sent: %s", self, url)

            self.start_timeout()

            # Return deferred that will evaluate when the whole chain is done.
            self.result_deferred = self.register_task("result", Deferred(canceller=self._on_cancel))

        except UnicodeEncodeError as e:
            self.result_deferred = defer.fail(e)

        return self.result_deferred
示例#3
0
def getIssue(baseurl, num, template, conf):
    if baseurl.endswith("/"):
        baseurl = baseurl[:-1]
    
    cookiesession = conf['cookie']
    
    headers = {
        'User-agent': ['Mozilla/5.0',],
        "Cookie": [cookiesession,]
    }
    
    url = "%s/%s.json"%(baseurl, num)
    
    agent = RedirectAgent(Agent(reactor))
    headers = Headers(headers)
    d = agent.request('GET', url, headers=headers)
    
    def cbRequest(response):
        finished = Deferred()
        response.deliverBody(DeferredPrinter(finished, baseurl, num, template))
        
        return finished
    
    d.addCallback(cbRequest)
    return d
示例#4
0
 def normal_crawl(self, request, reactor):
     url = self.parse_uri(request)
     agent = RedirectAgent(Agent(reactor, connectTimeout=10))
     response = agent.request(b'GET', url.encode('ascii'),
                              Headers(self.headers),
                              None)
     return response
示例#5
0
    def connect_to_tracker(self):
        # create the HTTP GET message
        # Note: some trackers have strange URLs, e.g.,
        #       http://moviezone.ws/announce.php?passkey=8ae51c4b47d3e7d0774a720fa511cc2a
        #       which has some sort of 'key' as parameter, so we need to use the add_url_params
        #       utility function to handle such cases.

        url = add_url_params("http://%s:%s/%s" %
                             (self._tracker_address[0], self._tracker_address[1],
                              self._announce_page.replace(u'announce', u'scrape')),
                             {"info_hash": self._infohash_list})

        agent = RedirectAgent(Agent(reactor, connectTimeout=self.timeout, pool=self._connection_pool))
        self.request = self.register_task("request", agent.request('GET', bytes(url)))
        self.request.addCallback(self.on_response)
        self.request.addErrback(self.on_error)

        self._logger.debug(u"%s HTTP SCRAPE message sent: %s", self, url)

        # no more requests can be appended to this session
        self._is_initiated = True
        self._last_contact = int(time.time())

        # Return deferred that will evaluate when the whole chain is done.
        self.result_deferred = self.register_task("result", Deferred(canceller=self._on_cancel))
        return self.result_deferred
 def __call__(self, url):
     self.log.debug('calling: GET -> %s', url)
     agent = RedirectAgent(Agent(reactor))
     on_end_defer = defer.Deferred()
     response_obj = yield agent.request('GET', url)
     response_obj.deliverBody(HTMLResponseProtocol(on_end_defer))
     links_obj = yield on_end_defer
     defer.returnValue(links_obj)
 def __call__(self, url):
     self.log.debug('calling: GET -> %s', url)
     agent = RedirectAgent(Agent(reactor))
     on_end_defer = defer.Deferred()
     response_obj = yield agent.request('GET', url)
     response_obj.deliverBody(HTMLResponseProtocol(on_end_defer))
     links_obj = yield on_end_defer
     defer.returnValue(links_obj)
示例#8
0
    def _webhook_request(self,
                         method,
                         uri,
                         headers,
                         verify_ssl=False,
                         bodyProducer=None):
        """Send the webhook request and return the response."""

        agent = RedirectAgent(
            Agent(
                reactor,
                contextFactory=WebClientContextFactory(verify=verify_ssl),
            ))
        d = agent.request(
            method,
            uri,
            headers=headers,
            bodyProducer=bodyProducer,
        )

        def render_response(response):
            """Render the HTTPS response received."""
            def eb_catch_partial(failure):
                # Twisted is raising PartialDownloadError because the responses
                # do not contain a Content-Length header. Since every response
                # holds the whole body we just take the result.
                failure.trap(PartialDownloadError)
                if int(failure.value.status) == HTTPStatus.OK:
                    return failure.value.response
                else:
                    return failure

            # Error out if the response has a status code of 400 or above.
            if response.code >= int(HTTPStatus.BAD_REQUEST):
                # if there was no trailing slash, retry with a trailing slash
                # because of varying requirements of BMC manufacturers
                if response.code == HTTPStatus.NOT_FOUND and uri[-1] != b"/":
                    d = agent.request(
                        method,
                        uri + b"/",
                        headers=headers,
                        bodyProducer=bodyProducer,
                    )
                else:
                    raise PowerActionError(
                        "Request failed with response status code: "
                        "%s." % response.code)

            d = readBody(response)
            d.addErrback(eb_catch_partial)
            return d

        d.addCallback(render_response)
        return d
示例#9
0
文件: api.py 项目: pythonmobile/treq
def request(
    method, url, headers=None, body=None, allow_redirects=True, agent=_agent
):
    if body:
        body = _StringProducer(body)
    if allow_redirects:
        agent = RedirectAgent(agent)

    d = agent.request(method, url, Headers(headers), body)
    d.addCallback(Response, method)

    return d
示例#10
0
 def __call__(self, url):
     self.log.debug('calling: GET -> %s', url)
     agent = RedirectAgent(Agent(reactor))
     code = None
     try:
         # State can be either OK
         response_obj = yield agent.request('GET', url)
         code = response_obj.code
         self.log.debug('response code: %s', code)
     except Exception, err:
         # Or state can fail due to any reason: timeouts, dns errors, etc
         # I catch all errors
         self.log.error('call error: -> %s, reson: %s', url, err)
示例#11
0
 def __call__(self, url):
     self.log.debug('calling: GET -> %s', url)
     agent = RedirectAgent(Agent(reactor))
     code = None
     try:
         # State can be either OK
         response_obj = yield agent.request('GET', url)
         code = response_obj.code
         self.log.debug('response code: %s', code)
     except Exception, err:
         # Or state can fail due to any reason: timeouts, dns errors, etc
         # I catch all errors
         self.log.error('call error: -> %s, reson: %s', url, err)
示例#12
0
 def __ssl_request__(query_url):
     headers = self.createOnionooHeaders()
     contextFactory = OnionooClientContextFactory()
     agent = RedirectAgent(Agent(reactor, contextFactory))
     d = agent.request(
         'GET',
         str(query_url),
         Headers(headers),
         None)
     d.addCallback(self.onionooCallback)
     d.addErrback(err)
     d.addCallback(self.onionooPaste, msg, channel)
     d.addErrback(err)
     return d
示例#13
0
文件: scanner.py 项目: freddyb/clickr
    def do_http(self, url):
	print "Visiting", url

	#url = "http://127.0.0.1:8000/?" + url.encode("base64").replace("\n","") + '/'
	agent = RedirectAgent(Agent(reactor))
	d = agent.request('GET', url, 
	    Headers({'User-Agent': ['Clickr']}),
	    None)
	def cbResponse(response):
	    from pprint import pformat
	    from twisted.internet.defer import Deferred	    
	    print 'Response version:', response.version
	    print 'Response code:', response.code
	    print 'Response phrase:', response.phrase
	    print 'Response headers:'
	    print pformat(list(response.headers.getAllRawHeaders()))	    
	    print 'Response received'
	    finished = Deferred()
	    response.deliverBody(BeginningPrinter(finished))
	    return finished
示例#14
0
    def on_title(self, bot, user, details):
        """TITLE [url] - If provided, prints the title of url. If not, prints the title of the last mentioned url."""
        if not len(details['splitmsg']):
            if not bot in self.bots:
                return "No URL has been said recently."
            
            if not details["channel"].lower() in self.bots[bot]:
                return "No URL has been said recently in this channel."
            
            url = self.bots[bot][details['channel'].lower()]
        else:
            url = details['splitmsg'][0]
            
            match = titlepattern.match(str(url))
            
            if not match:
                url = "http://" + url
                match = titlepattern.match(url)
            
            if not match:
                return "Oops, try a valid url!"

        try:
            agent = RedirectAgent(Agent(reactor))
            d = agent.request('GET', str(url))
            
            def cbRequest(response):
                finished = Deferred()
                response.deliverBody(DeferredPrinter(finished, url))
                return finished
            
            d.addCallback(cbRequest)
            return d
        
        except urllib2.HTTPError as e:
            return "Looks like that page has an error on it! (%s: %i)" % (url, e.code)
        except urllib2.URLError, e:
            return "There was an error retrieving the page's data. (%s: %s)" % (url, e)
示例#15
0
class WellKnownResolver(object):
    """Handles well-known lookups for matrix servers.
    """
    def __init__(self, reactor, agent, well_known_cache=None):
        self._reactor = reactor
        self._clock = Clock(reactor)

        if well_known_cache is None:
            well_known_cache = _well_known_cache

        self._well_known_cache = well_known_cache
        self._well_known_agent = RedirectAgent(agent)

    @defer.inlineCallbacks
    def get_well_known(self, server_name):
        """Attempt to fetch and parse a .well-known file for the given server

        Args:
            server_name (bytes): name of the server, from the requested url

        Returns:
            Deferred[WellKnownLookupResult]: The result of the lookup
        """
        try:
            result = self._well_known_cache[server_name]
        except KeyError:
            # TODO: should we linearise so that we don't end up doing two .well-known
            # requests for the same server in parallel?
            with Measure(self._clock, "get_well_known"):
                result, cache_period = yield self._do_get_well_known(
                    server_name)

            if cache_period > 0:
                self._well_known_cache.set(server_name, result, cache_period)

        return WellKnownLookupResult(delegated_server=result)

    @defer.inlineCallbacks
    def _do_get_well_known(self, server_name):
        """Actually fetch and parse a .well-known, without checking the cache

        Args:
            server_name (bytes): name of the server, from the requested url

        Returns:
            Deferred[Tuple[bytes|None|object],int]:
                result, cache period, where result is one of:
                 - the new server name from the .well-known (as a `bytes`)
                 - None if there was no .well-known file.
                 - INVALID_WELL_KNOWN if the .well-known was invalid
        """
        uri = b"https://%s/.well-known/matrix/server" % (server_name, )
        uri_str = uri.decode("ascii")
        logger.info("Fetching %s", uri_str)
        try:
            response = yield make_deferred_yieldable(
                self._well_known_agent.request(b"GET", uri))
            body = yield make_deferred_yieldable(readBody(response))
            if response.code != 200:
                raise Exception("Non-200 response %s" % (response.code, ))

            parsed_body = json.loads(body.decode("utf-8"))
            logger.info("Response from .well-known: %s", parsed_body)
            if not isinstance(parsed_body, dict):
                raise Exception("not a dict")
            if "m.server" not in parsed_body:
                raise Exception("Missing key 'm.server'")
        except Exception as e:
            logger.info("Error fetching %s: %s", uri_str, e)

            # add some randomness to the TTL to avoid a stampeding herd every hour
            # after startup
            cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD
            cache_period += random.uniform(
                0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER)
            return (None, cache_period)

        result = parsed_body["m.server"].encode("ascii")

        cache_period = _cache_period_from_headers(
            response.headers, time_now=self._reactor.seconds)
        if cache_period is None:
            cache_period = WELL_KNOWN_DEFAULT_CACHE_PERIOD
            # add some randomness to the TTL to avoid a stampeding herd every 24 hours
            # after startup
            cache_period += random.uniform(
                0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER)
        else:
            cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD)
            cache_period = max(cache_period, WELL_KNOWN_MIN_CACHE_PERIOD)

        return (result, cache_period)
示例#16
0
class HTTPTest(NetTestCase):
    """
    A utility class for dealing with HTTP based testing. It provides methods to
    be overriden for dealing with HTTP based testing.
    The main functions to look at are processResponseBody and
    processResponseHeader that are invoked once the headers have been received
    and once the request body has been received.
    """
    name = "HTTP Test"
    version = 0.1

    randomizeUA = True
    followRedirects = False

    def setUp(self):
        log.debug("Setting up HTTPTest")
        try:
            import OpenSSL
        except:
            log.err("Warning! pyOpenSSL is not installed. https websites will"
                     "not work")

        self.agent = Agent(reactor)

        if self.followRedirects:
            try:
                from twisted.web.client import RedirectAgent
                self.agent = RedirectAgent(self.agent)
            except:
                log.err("Warning! You are running an old version of twisted"\
                        "(<= 10.1). I will not be able to follow redirects."\
                        "This may make the testing less precise.")
                self.report['errors'].append("Could not import RedirectAgent")

        self.request = {}
        self.response = {}
        self.processInputs()
        log.debug("Finished test setup")

    def processInputs(self):
        pass

    def _processResponseBody(self, data, body_processor):
        log.debug("Processing response body")
        self.response['body'] = data
        self.report['response'] = self.response

        if body_processor:
            body_processor(data)
        else:
            self.processResponseBody(data)

    def processResponseBody(self, data):
        """
        This should handle all the response body smushing for getting it ready
        to be passed onto the control.

        @param data: The content of the body returned.
        """
        pass

    def processResponseHeaders(self, headers):
        """
        This should take care of dealing with the returned HTTP headers.

        @param headers: The content of the returned headers.
        """
        pass

    def processRedirect(self, location):
        """
        Handle a redirection via a 3XX HTTP status code.

        @param location: the url that is being redirected to.
        """
        pass

    def doRequest(self, url, method="GET",
                  headers=None, body=None, headers_processor=None,
                  body_processor=None):
        """
        Perform an HTTP request with the specified method.

        url: the full url path of the request

        method: the HTTP Method to be used

        headers: the request headers to be sent as a dict

        body: the request body

        headers_processor: a function to be used for processing the HTTP header
                          responses (defaults to self.processResponseHeaders).
                          This function takes as argument the HTTP headers as a
                          dict.

        body_processory: a function to be used for processing the HTTP response
                         body (defaults to self.processResponseBody).
                         This function takes the response body as an argument.

        """
        log.debug("Performing request %s %s %s" % (url, method, headers))

        d = self.build_request(url, method, headers, body)

        def errback(data):
            log.err("Error in test %s" % data)
            self.report["error"] = data

        def finished(data):
            return

        d.addErrback(errback)
        d.addCallback(self._cbResponse, headers_processor, body_processor)
        d.addCallback(finished)
        return d

    def build_request(self, url, method="GET", headers=None, body=None):
        self.request['method'] = method
        self.request['url'] = url
        self.request['headers'] = headers if headers else {}
        self.request['body'] = body

        if self.randomizeUA:
            self.randomize_useragent()

        self.report['request'] = self.request
        self.report['url'] = url

        # If we have a request body payload, set the request body to such
        # content
        if body:
            body_producer = StringProducer(self.request['body'])
        else:
            body_producer = None

        headers = Headers(self.request['headers'])

        req = self.agent.request(self.request['method'], self.request['url'],
                                  headers, body_producer)
        return req

    def _cbResponse(self, response, headers_processor, body_processor):
        log.debug("Got response %s" % response)
        if not response:
            self.report['response'] = None
            log.err("We got an empty response")
            return

        self.response['headers'] = list(response.headers.getAllRawHeaders())
        self.response['code'] = response.code
        self.response['length'] = response.length
        self.response['version'] = response.length

        if str(self.response['code']).startswith('3'):
            self.processRedirect(response.headers.getRawHeaders('Location')[0])

        if headers_processor:
            headers_processor(self.response['headers'])
        else:
            self.processResponseHeaders(self.response['headers'])

        finished = defer.Deferred()
        response.deliverBody(BodyReceiver(finished))
        finished.addCallback(self._processResponseBody, body_processor)

        return finished

    def randomize_useragent(self):
        user_agent = random.choice(userAgents)
        self.request['headers']['User-Agent'] = [user_agent]
示例#17
0
class WellKnownResolver:
    """Handles well-known lookups for matrix servers."""
    def __init__(
        self,
        reactor: IReactorTime,
        agent: IAgent,
        user_agent: bytes,
        well_known_cache: Optional[TTLCache] = None,
        had_well_known_cache: Optional[TTLCache] = None,
    ):
        self._reactor = reactor
        self._clock = Clock(reactor)

        if well_known_cache is None:
            well_known_cache = _well_known_cache

        if had_well_known_cache is None:
            had_well_known_cache = _had_valid_well_known_cache

        self._well_known_cache = well_known_cache
        self._had_valid_well_known_cache = had_well_known_cache
        self._well_known_agent = RedirectAgent(agent)
        self.user_agent = user_agent

    async def get_well_known(self,
                             server_name: bytes) -> WellKnownLookupResult:
        """Attempt to fetch and parse a .well-known file for the given server

        Args:
            server_name: name of the server, from the requested url

        Returns:
            The result of the lookup
        """
        try:
            prev_result, expiry, ttl = self._well_known_cache.get_with_expiry(
                server_name)

            now = self._clock.time()
            if now < expiry - WELL_KNOWN_GRACE_PERIOD_FACTOR * ttl:
                return WellKnownLookupResult(delegated_server=prev_result)
        except KeyError:
            prev_result = None

        # TODO: should we linearise so that we don't end up doing two .well-known
        # requests for the same server in parallel?
        try:
            with Measure(self._clock, "get_well_known"):
                result, cache_period = await self._fetch_well_known(
                    server_name)  # type: Optional[bytes], float

        except _FetchWellKnownFailure as e:
            if prev_result and e.temporary:
                # This is a temporary failure and we have a still valid cached
                # result, so lets return that. Hopefully the next time we ask
                # the remote will be back up again.
                return WellKnownLookupResult(delegated_server=prev_result)

            result = None

            if self._had_valid_well_known_cache.get(server_name, False):
                # We have recently seen a valid well-known record for this
                # server, so we cache the lack of well-known for a shorter time.
                cache_period = WELL_KNOWN_DOWN_CACHE_PERIOD
            else:
                cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD

            # add some randomness to the TTL to avoid a stampeding herd
            cache_period *= random.uniform(
                1 - WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER,
                1 + WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER,
            )

        if cache_period > 0:
            self._well_known_cache.set(server_name, result, cache_period)

        return WellKnownLookupResult(delegated_server=result)

    async def _fetch_well_known(self,
                                server_name: bytes) -> Tuple[bytes, float]:
        """Actually fetch and parse a .well-known, without checking the cache

        Args:
            server_name: name of the server, from the requested url

        Raises:
            _FetchWellKnownFailure if we fail to lookup a result

        Returns:
            The lookup result and cache period.
        """

        had_valid_well_known = self._had_valid_well_known_cache.get(
            server_name, False)

        # We do this in two steps to differentiate between possibly transient
        # errors (e.g. can't connect to host, 503 response) and more permanent
        # errors (such as getting a 404 response).
        response, body = await self._make_well_known_request(
            server_name, retry=had_valid_well_known)

        try:
            if response.code != 200:
                raise Exception("Non-200 response %s" % (response.code, ))

            parsed_body = json_decoder.decode(body.decode("utf-8"))
            logger.info("Response from .well-known: %s", parsed_body)

            result = parsed_body["m.server"].encode("ascii")
        except defer.CancelledError:
            # Bail if we've been cancelled
            raise
        except Exception as e:
            logger.info("Error parsing well-known for %s: %s", server_name, e)
            raise _FetchWellKnownFailure(temporary=False)

        cache_period = _cache_period_from_headers(
            response.headers, time_now=self._reactor.seconds)
        if cache_period is None:
            cache_period = WELL_KNOWN_DEFAULT_CACHE_PERIOD
            # add some randomness to the TTL to avoid a stampeding herd every 24 hours
            # after startup
            cache_period *= random.uniform(
                1 - WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER,
                1 + WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER,
            )
        else:
            cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD)
            cache_period = max(cache_period, WELL_KNOWN_MIN_CACHE_PERIOD)

        # We got a success, mark as such in the cache
        self._had_valid_well_known_cache.set(
            server_name,
            bool(result),
            cache_period + WELL_KNOWN_REMEMBER_DOMAIN_HAD_VALID,
        )

        return result, cache_period

    async def _make_well_known_request(self, server_name: bytes,
                                       retry: bool) -> Tuple[IResponse, bytes]:
        """Make the well known request.

        This will retry the request if requested and it fails (with unable
        to connect or receives a 5xx error).

        Args:
            server_name: name of the server, from the requested url
            retry: Whether to retry the request if it fails.

        Raises:
            _FetchWellKnownFailure if we fail to lookup a result

        Returns:
            Returns the response object and body. Response may be a non-200 response.
        """
        uri = b"https://%s/.well-known/matrix/server" % (server_name, )
        uri_str = uri.decode("ascii")

        headers = {
            b"User-Agent": [self.user_agent],
        }

        i = 0
        while True:
            i += 1

            logger.info("Fetching %s", uri_str)
            try:
                response = await make_deferred_yieldable(
                    self._well_known_agent.request(b"GET",
                                                   uri,
                                                   headers=Headers(headers)))
                body_stream = BytesIO()
                await make_deferred_yieldable(
                    read_body_with_max_size(response, body_stream,
                                            WELL_KNOWN_MAX_SIZE))
                body = body_stream.getvalue()

                if 500 <= response.code < 600:
                    raise Exception("Non-200 response %s" % (response.code, ))

                return response, body
            except defer.CancelledError:
                # Bail if we've been cancelled
                raise
            except BodyExceededMaxSize:
                # If the well-known file was too large, do not keep attempting
                # to download it, but consider it a temporary error.
                logger.warning(
                    "Requested .well-known file for %s is too large > %r bytes",
                    server_name.decode("ascii"),
                    WELL_KNOWN_MAX_SIZE,
                )
                raise _FetchWellKnownFailure(temporary=True)
            except Exception as e:
                if not retry or i >= WELL_KNOWN_RETRY_ATTEMPTS:
                    logger.info("Error fetching %s: %s", uri_str, e)
                    raise _FetchWellKnownFailure(temporary=True)

                logger.info("Error fetching %s: %s. Retrying", uri_str, e)

            # Sleep briefly in the hopes that they come back up
            await self._clock.sleep(0.5)
示例#18
0
文件: client.py 项目: DASPRiD/DASBiT
    def request(self, method, url, **kwargs):
        method = method.upper()

        # Join parameters provided in the URL
        # and the ones passed as argument.
        params = kwargs.get('params')
        if params:
            url = _combine_query_params(url, params)

        # Convert headers dictionary to
        # twisted raw headers format.
        headers = kwargs.get('headers')
        if headers:
            if isinstance(headers, dict):
                h = Headers({})
                for k, v in headers.iteritems():
                    if isinstance(v, str):
                        h.addRawHeader(k, v)
                    else:
                        h.setRawHeaders(k, v)

                headers = h
        else:
            headers = Headers({})

        # Here we choose a right producer
        # based on the parameters passed in.
        bodyProducer = None
        data = kwargs.get('data')
        files = kwargs.get('files')
        if files:
            # If the files keyword is present we will issue a
            # multipart/form-data request as it suits better for cases
            # with files and/or large objects.
            files = list(_convert_files(files))
            boundary = uuid.uuid4()
            headers.setRawHeaders(
                'content-type', [
                    'multipart/form-data; boundary=%s' % (boundary,)])
            if data:
                data = _convert_params(data)
            else:
                data = []

            bodyProducer = multipart.MultiPartProducer(
                data + files, boundary=boundary)
        elif data:
            # Otherwise stick to x-www-form-urlencoded format
            # as it's generally faster for smaller requests.
            if isinstance(data, (dict, list, tuple)):
                headers.setRawHeaders(
                    'content-type', ['application/x-www-form-urlencoded'])
                data = urlencode(data, doseq=True)
            bodyProducer = IBodyProducer(data)

        wrapped_agent = self._agent

        if kwargs.get('allow_redirects', True):
            wrapped_agent = RedirectAgent(wrapped_agent)

        wrapped_agent = ContentDecoderAgent(wrapped_agent,
                                            [('gzip', GzipDecoder)])

        auth = kwargs.get('auth')
        if auth:
            wrapped_agent = add_auth(wrapped_agent, auth)

        d = wrapped_agent.request(
            method, url, headers=headers,
            bodyProducer=bodyProducer)

        timeout = kwargs.get('timeout')
        if timeout:
            delayedCall = default_reactor(kwargs.get('reactor')).callLater(
                timeout, d.cancel)

            def gotResult(result):
                if delayedCall.active():
                    delayedCall.cancel()
                return result

            d.addBoth(gotResult)

        if not kwargs.get('unbuffered', False):
            d.addCallback(_BufferedResponse)

        return d.addCallback(_Response)
示例#19
0
文件: http.py 项目: duy/ooni-probe
class HTTPTest(OONITest):
    """
    A utility class for dealing with HTTP based testing. It provides methods to
    be overriden for dealing with HTTP based testing.
    The main functions to look at are processResponseBody and
    processResponseHeader that are invoked once the headers have been received
    and once the request body has been received.
    """
    randomize_ua = True
    follow_redirects = False

    def initialize(self):

        logging.debug("HTTPTest.initialize")

        from twisted.web.client import Agent
        import yaml

        self.agent = Agent(self.reactor)
        if self.follow_redirects:
            from twisted.web.client import RedirectAgent
            self.agent = RedirectAgent(self.agent)

        self.request = {}
        self.response = {}

    def _processResponseBody(self, data):

        log.debug("HTTPTest._processResponseBody")

        self.response['body'] = data
        #self.result['response'] = self.response
        self.processResponseBody(data)

    def processResponseBody(self, data):
        """
        This should handle all the response body smushing for getting it ready
        to be passed onto the control.

        @param data: The content of the body returned.
        """

        logging.debug("HTTPTest.processResponseBody")

    def processResponseHeaders(self, headers):
        """
        This should take care of dealing with the returned HTTP headers.

        @param headers: The content of the returned headers.
        """

        log.debug("HTTPTest.processResponseHeaders")

    def processRedirect(self, location):
        """
        Handle a redirection via a 3XX HTTP status code.

        @param location: the url that is being redirected to.
        """

        logging.debug("HTTPTest.processRedirect")


    def experiment(self, args):
        log.msg("HTTPTest.experiment")
        url = self.local_options['url'] if 'url' not in args else args['url']

        d = self.build_request(url)
        def finished(data):
            return data

        d.addCallback(self._cbResponse)
        d.addCallback(finished)
        return d

    def _cbResponse(self, response):

        log.debug("HTTPTest._cbResponse")

        self.response['headers'] = list(response.headers.getAllRawHeaders())
        self.response['code'] = response.code
        self.response['length'] = response.length
        self.response['version'] = response.length

        if str(self.response['code']).startswith('3'):
            self.processRedirect(response.headers.getRawHeaders('Location')[0])
        self.processResponseHeaders(self.response['headers'])
        #self.result['response'] = self.response

        finished = defer.Deferred()
        response.deliverBody(BodyReceiver(finished))
        finished.addCallback(self._processResponseBody)

    def randomize_useragent(self):

        log.debug("HTTPTest.randomize_useragent")

        user_agent = random.choice(useragents)
        self.request['headers']['User-Agent'] = [user_agent]

    def build_request(self, url, method="GET", headers=None, body=None):

        log.debug("HTTPTest.build_request")

        self.request['method'] = method
        self.request['url'] = url
        self.request['headers'] = headers if headers else {}
        self.request['body'] = body
        if self.randomize_ua:
            self.randomize_useragent()

        #self.result['request'] = self.request
        self.result['url'] = url
        return self.agent.request(self.request['method'], self.request['url'],
                                  Headers(self.request['headers']),
                                  self.request['body'])

    def load_assets(self):

        log.debug("HTTPTest.load_assets")

        if self.local_options:
            return {'url': Asset(self.local_options['asset'])}
        else:
            return {}
    def connectionLost(self, reason):
        print('Finished receiving body:', self._bytes_received,
              reason.getErrorMessage())
        #for r in reason.value:

        result = self._bodybuf.getvalue()
        r = json.loads(result)
        #callback(data)调用后,能够向defer数据链中传入一个list数据:[True,传入的参数data],可以实现将获取的
        #body传输到下一个函数中去
        self.finished.callback(r)


url = 'https://sh.lianjia.com/ershoufang/pg1'
contextFactory = WebClientContextFactory()

agent = RedirectAgent(Agent(reactor, contextFactory))
result = list()
t1 = time.time()
for i in range(1):
    i = str(i)
    u = url + i
    print(u)
    d = agent.request(b"GET", u.encode("utf-8"))
    d.addCallback(cbRequest, u)
    d.addCallback(lambda f: print(f))
    result.append(d)

dd = defer.DeferredList(result)
dd.addBoth(end_crawl, t1)
reactor.run()
示例#21
0
文件: rest.py 项目: bverdu/onDemand
    def request(self, method='GET',
                path='',
                headers={'User-Agent': ['onDemand/1.0 (Rest_Client)'],
                         'Accept': ['application/json']},
                body=None):

        data = None
        if self.loc:
            host = '/'.join((self.loc, path))
        else:
            host = '/'.join((self.host, path))
        if self.token:
            host += '?auth=' + self.token
        if body:
            headers.update({'Content-Type': ['application/json']})
            data = FileBodyProducer(StringIO(json.dumps(body)))
        agent = RedirectAgent(Agent(reactor, pool=self.pool))
        d = agent.request(method, host, Headers(headers), data)

        def cbFail(fail):

            if hasattr(fail.value, 'response'):
                if hasattr(fail.value.response, 'code'):
                    if fail.value.response.code == 307:
                        loc = fail.value.response.headers.getRawHeaders(
                            'location')
                        new = urlparse(loc[0])
                        newhost = '://'.join((new.scheme, new.netloc))
                        if newhost == self.host:
                            self.loc = None
                        else:
                            self.loc = newhost
                        self.log.debug('redirect: %s' % self.loc)
                        data = FileBodyProducer(StringIO(json.dumps(body)))
                        d = agent.request(
                            method, loc[0], Headers(headers), data)
                        d.addCallbacks(cbRequest, cbFail)
                        return d
                    elif fail.value.response.code == 404 and self.loc:
                        self.loc = None
                        host = '/'.join((self.host, path))
                        if self.token:
                            host += '?auth=' + self.token
                        d = self.request(method, host, Headers(headers), body)
                        d.addCallbacks(cbRequest, cbFail)
                        return d
                else:
                    print(dir(fail.value))
                    print(fail.value.message)
                    print(fail.value.args)

            self.log.error('unhandled failure: %s -- %s' % (
                fail.value.message, fail.value))

        def cbRequest(response):
            #  print 'Response version:', response.version
            #  print 'Response code:', response.code
            #  print 'Response phrase:', response.phrase
            #  print 'Response headers:'
            #  print pformat(list(response.headers.getAllRawHeaders()))
            finished = Deferred()
            response.deliverBody(RestHandle(finished, self.event_handler))
            return finished
        d.addCallbacks(cbRequest, cbFail)
        return d
示例#22
0
class TwAgentHelper:

	def __init__(self, proxy_host="scorpion.premiumize.me", use_proxy=False, p_user='', p_pass=''):
		print "Twisted Agent in use", __TW_VER__
		# can not follow rel. url redirects (location header)
		self.headers = Headers(agent_headers)
		self.useProxy = use_proxy and twEndpoints
		if self.useProxy:
			self.endpoint = TCP4ClientEndpoint(reactor, proxy_host, 80)
			self.agent = RedirectAgent(ProxyAgent(self.endpoint))
			auth = base64.b64encode("%s:%s" % (p_user, p_pass))
			self.headers.addRawHeader('Proxy-Authorization', 'Basic ' + auth.strip())
		else:
			self.agent = RedirectAgent(Agent(reactor))
			
	def getRedirectedUrl(self, callback, cb_err, url, *args, **kwargs):
		print "getRedirectedUrl: ", url
		self._rd_callback = callback
		self.url = url
		self.data = ""

		self.agent.request('HEAD', url, headers=self.headers).addCallback(self.__getResponse, *args, **kwargs).addErrback(cb_err)

	def __getResponse(self, response, *args, **kwargs):
		print "__getResponse:"
		print "Status code: ", response.phrase
		for header, value in response.headers.getAllRawHeaders():
			print header, value

		r = response.headers.getRawHeaders("location")
		if r:
			r_url = r[0]
			p = self._parse(r_url)

			if b'http' not in p[0]:
				print "Rel. URL correction"
				scheme, host, port, path = self._parse(self.url)
				r_url = b'%s://%s/%s' % (scheme, host, r_url)
		else:
			r_url = self.url
		print "Location: ", r_url

		self._rd_callback(r_url, *args, **kwargs)

	def getWebPage(self, callback, cb_err, url, follow_redir, *args, **kwargs):
		print "getWebPage: ", url
		self._wp_callback = callback
		self._errback = cb_err
		self.data = ""
		if follow_redir:
			self.getRedirectedUrl(self.__getWebPageDef, cb_err, url, *args, **kwargs)
		else:
			self.__getWebPageDef(url, *args, **kwargs)

	def __getWebPageDef(self, url, *args, **kwargs):
		d = self.agent.request('GET', url, headers=self.headers)
		d.addCallback(self.__getResource)
		d.addCallbacks(self._wp_callback, self._errback, callbackArgs=args, callbackKeywords=kwargs)
		
	"""
	def __getWebPageDef(self, url, *args, **kwargs):
		#getPage(url, followRedirect=True, agent=self.headers, headers={'Content-Type':'application/x-www-form-urlencoded'}).addCallback(self._wp_callback, *args, **kwargs).addErrback(self._errback)
		getPage(url, followRedirect=True, headers={'Content-Type':'application/x-www-form-urlencoded'}).addCallback(self._wp_callback, *args, **kwargs).addErrback(self._errback)
	"""
	
	def __getResource(self, response):
		print "__getResource:"
		finished = Deferred()
		response.deliverBody(GetResource(finished))
		return finished

	@staticmethod
	def _parse(url, defaultPort=None):
		url = url.strip()
		parsed = http.urlparse(url)
		scheme = parsed[0]
		path = urlunparse(('', '') + parsed[2:])

		if defaultPort is None:
			if scheme == 'https':
				defaultPort = 443
			else:
				defaultPort = 80

		host, port = parsed[1], defaultPort
		if ':' in host:
			host, port = host.split(':')
			try:
				port = int(port)
			except ValueError:
				port = defaultPort

		if path == '':
			path = '/'

		return scheme, host, port, path
示例#23
0
    def request(self, method, url, **kwargs):
        method = method.upper()

        # Join parameters provided in the URL
        # and the ones passed as argument.
        params = kwargs.get('params')
        if params:
            url = _combine_query_params(url, params)

        # Convert headers dictionary to
        # twisted raw headers format.
        headers = kwargs.get('headers')
        if headers:
            if isinstance(headers, dict):
                h = Headers({})
                for k, v in headers.iteritems():
                    if isinstance(v, str):
                        h.addRawHeader(k, v)
                    else:
                        h.setRawHeaders(k, v)

                headers = h
        else:
            headers = Headers({})

        # Here we choose a right producer
        # based on the parameters passed in.
        bodyProducer = None
        data = kwargs.get('data')
        files = kwargs.get('files')
        if files:
            # If the files keyword is present we will issue a
            # multipart/form-data request as it suits better for cases
            # with files and/or large objects.
            files = list(_convert_files(files))
            boundary = uuid.uuid4()
            headers.setRawHeaders(
                'content-type',
                ['multipart/form-data; boundary=%s' % (boundary, )])
            if data:
                data = _convert_params(data)
            else:
                data = []

            bodyProducer = multipart.MultiPartProducer(data + files,
                                                       boundary=boundary)
        elif data:
            # Otherwise stick to x-www-form-urlencoded format
            # as it's generally faster for smaller requests.
            if isinstance(data, (dict, list, tuple)):
                headers.setRawHeaders('content-type',
                                      ['application/x-www-form-urlencoded'])
                data = urlencode(data, doseq=True)
            bodyProducer = IBodyProducer(data)

        wrapped_agent = self._agent

        if kwargs.get('allow_redirects', True):
            wrapped_agent = RedirectAgent(wrapped_agent)

        wrapped_agent = ContentDecoderAgent(wrapped_agent,
                                            [('gzip', GzipDecoder)])

        auth = kwargs.get('auth')
        if auth:
            wrapped_agent = add_auth(wrapped_agent, auth)

        d = wrapped_agent.request(method,
                                  url,
                                  headers=headers,
                                  bodyProducer=bodyProducer)

        timeout = kwargs.get('timeout')
        if timeout:
            delayedCall = default_reactor(kwargs.get('reactor')).callLater(
                timeout, d.cancel)

            def gotResult(result):
                if delayedCall.active():
                    delayedCall.cancel()
                return result

            d.addBoth(gotResult)

        if not kwargs.get('unbuffered', False):
            d.addCallback(_BufferedResponse)

        return d.addCallback(_Response)
示例#24
0
    def request(
            self,
            method='GET',
            path='',
            headers={
                'User-Agent': ['onDemand/1.0 (Rest_Client)'],
                'Accept': ['application/json']
            },
            body=None):

        data = None
        if self.loc:
            host = '/'.join((self.loc, path))
        else:
            host = '/'.join((self.host, path))
        if self.token:
            host += '?auth=' + self.token
        if body:
            headers.update({'Content-Type': ['application/json']})
            data = FileBodyProducer(StringIO(json.dumps(body)))
        agent = RedirectAgent(Agent(reactor, pool=self.pool))
        d = agent.request(method, host, Headers(headers), data)

        def cbFail(fail):

            if hasattr(fail.value, 'response'):
                if hasattr(fail.value.response, 'code'):
                    if fail.value.response.code == 307:
                        loc = fail.value.response.headers.getRawHeaders(
                            'location')
                        new = urlparse(loc[0])
                        newhost = '://'.join((new.scheme, new.netloc))
                        if newhost == self.host:
                            self.loc = None
                        else:
                            self.loc = newhost
                        self.log.debug('redirect: %s' % self.loc)
                        data = FileBodyProducer(StringIO(json.dumps(body)))
                        d = agent.request(method, loc[0], Headers(headers),
                                          data)
                        d.addCallbacks(cbRequest, cbFail)
                        return d
                    elif fail.value.response.code == 404 and self.loc:
                        self.loc = None
                        host = '/'.join((self.host, path))
                        if self.token:
                            host += '?auth=' + self.token
                        d = self.request(method, host, Headers(headers), body)
                        d.addCallbacks(cbRequest, cbFail)
                        return d
                else:
                    print(dir(fail.value))
                    print(fail.value.message)
                    print(fail.value.args)

            self.log.error('unhandled failure: %s -- %s' %
                           (fail.value.message, fail.value))

        def cbRequest(response):
            #  print 'Response version:', response.version
            #  print 'Response code:', response.code
            #  print 'Response phrase:', response.phrase
            #  print 'Response headers:'
            #  print pformat(list(response.headers.getAllRawHeaders()))
            finished = Deferred()
            response.deliverBody(RestHandle(finished, self.event_handler))
            return finished

        d.addCallbacks(cbRequest, cbFail)
        return d
示例#25
0
    def redfish_request(self, method, uri, headers=None, bodyProducer=None):
        """Send the redfish request and return the response."""
        agent = RedirectAgent(
            Agent(reactor, contextFactory=WebClientContextFactory())
        )
        d = agent.request(
            method, uri, headers=headers, bodyProducer=bodyProducer
        )

        def render_response(response):
            """Render the HTTPS response received."""

            def eb_catch_partial(failure):
                # Twisted is raising PartialDownloadError because the responses
                # do not contain a Content-Length header. Since every response
                # holds the whole body we just take the result.
                failure.trap(PartialDownloadError)
                if int(failure.value.status) == HTTPStatus.OK:
                    return failure.value.response
                else:
                    return failure

            def cb_json_decode(data):
                data = data.decode("utf-8")
                # Only decode non-empty response bodies.
                if data:
                    # occasionally invalid json is returned. provide a clear
                    # error in that case
                    try:
                        return json.loads(data)
                    except ValueError as error:
                        raise PowerActionError(
                            "Redfish request failed from a JSON parse error:"
                            " %s." % error
                        )

            def cb_attach_headers(data, headers):
                return data, headers

            # Error out if the response has a status code of 400 or above.
            if response.code >= int(HTTPStatus.BAD_REQUEST):
                # if there was no trailing slash, retry with a trailing slash
                # because of varying requirements of BMC manufacturers
                if (
                    response.code == HTTPStatus.NOT_FOUND
                    and uri.decode("utf-8")[-1] != "/"
                ):
                    d = agent.request(
                        method,
                        uri + "/".encode("utf-8"),
                        headers=headers,
                        bodyProducer=bodyProducer,
                    )
                else:
                    raise PowerActionError(
                        "Redfish request failed with response status code:"
                        " %s." % response.code
                    )

            d = readBody(response)
            d.addErrback(eb_catch_partial)
            d.addCallback(cb_json_decode)
            d.addCallback(cb_attach_headers, headers=response.headers)
            return d

        d.addCallback(render_response)
        return d
示例#26
0
    return finished


user_name = base64.b64encode('spider:123456'.encode('utf-8')).strip()
encode_user = b'Basic '+user_name
header = {'Proxy-Authorization': [encode_user]}
proxy_config = ('47.105.165.81',5527,encode_user)

url = "http://go2mars.top/solo/articles/2018/12/18/1545131102995.html"



print(time.clock())
contextFactory = ScrapyClientContextFactory()

agent = RedirectAgent(TunnelingAgent(reactor,proxy_config,contextFactory,10,None,None))




host = '47.105.165.81'
port = 5527
# endpoint = TCP4ClientEndpoint(reactor, host, port)
# agent_http = ProxyAgent(endpoint)
# d = agent_http.request(b'GET',b'http://go2mars.top/solo/articles/2018/12/18/1545131102995.html',Headers({'Proxy-Authorization': [encode_user]}),None)
d = agent.request(b'GET',b'http://go2mars.top/solo/articles/2018/12/18/1545131102995.html')
d.addCallback(cbRequest)
d.addErrback(lambda _:print(_))
d.addCallback(lambda _:print(time.clock()))
d.addBoth(lambda _:reactor.stop())
reactor.run()