Пример #1
0
    def makeRequest(self, request):
        """Connect to the remote HTTP server and send request"""
        self.tcpclient = None
        self.httpparser = None
        port = intval(request.requestobject.get("uri-port", ""))
        if port == None:
            port = 80

        self.tcpclient = self.connectionclass(
            request.requestobject["uri-server"], port)
        self.httpparser = HTTPParser(mode="response")

        self.link((self, "_tcpoutbox"), (self.tcpclient, "inbox"))
        self.link((self, "_tcpsignal"), (self.tcpclient, "control"))
        self.link((self.tcpclient, "signal"), (self, "_tcpcontrol"))

        self.link((self.tcpclient, "outbox"),
                  (self.httpparser,
                   "inbox"))  #incoming TCP data -> HTTPParser directly

        self.link((self, "_parsersignal"), (self.httpparser, "control"))
        self.link((self.httpparser, "outbox"), (self, "_parserinbox"))
        self.link((self.httpparser, "signal"), (self, "_parsercontrol"))

        self.addChildren(self.tcpclient, self.httpparser)
        self.tcpclient.activate()
        self.httpparser.activate()
        self.response = ""
        if isinstance(request.requestobject["request"], str):
            self.send(request.requestobject["request"], "_tcpoutbox")
        else:
            for part in request.requestobject["request"]:
                self.send(part, "_tcpoutbox")
Пример #2
0
    def makeRequest(self, request):
        """Connect to the remote HTTP server and send request"""
        self.tcpclient = None
        self.httpparser = None
        port = intval(request.requestobject.get("uri-port", ""))
        if port == None:
            port = 80

        self.tcpclient = self.connectionclass(
            request.requestobject["uri-server"], port)
        self.httpparser = HTTPParser(mode="response")

        self.link((self, "_tcpoutbox"), (self.tcpclient, "inbox"))
        self.link((self, "_tcpsignal"), (self.tcpclient, "control"))
        self.link((self.tcpclient, "signal"), (self, "_tcpcontrol"))

        self.link((self.tcpclient, "outbox"),
                  (self.httpparser,
                   "inbox"))  #incoming TCP data -> HTTPParser directly

        self.link((self, "_parsersignal"), (self.httpparser, "control"))
        self.link((self.httpparser, "outbox"), (self, "_parserinbox"))
        self.link((self.httpparser, "signal"), (self, "_parsercontrol"))

        self.addChildren(self.tcpclient, self.httpparser)
        self.tcpclient.activate()
        self.httpparser.activate()
        self.response = ""
        if isinstance(request.requestobject["request"], str):
            self.send(request.requestobject["request"], "_tcpoutbox")
        else:
            for part in request.requestobject["request"]:
                self.send(part, "_tcpoutbox")
Пример #3
0
def HTTPServer(createRequestHandler, **argd):
    """\
    HTTPServer() -> new HTTPServer component capable of handling a single connection

    Arguments:
       -- createRequestHandler - a function required by HTTPRequestHandler that
                                 creates the appropriate request-handler component
                                 for each request, see HTTPResourceGlue
    """
    return Graphline(
        PARSER=HTTPParser(**argd),  # Since this is where the data goes first!
        HANDLER=HTTPRequestHandler(createRequestHandler),
        CORELOGIC=HTTPShutdownLogicHandling(),
        linkages={
            # Data Handling
            ("self", "inbox"): ("PARSER", "inbox"),
            ("PARSER", "outbox"): ("HANDLER", "inbox"),
            ("HANDLER", "outbox"): ("self", "outbox"),

            # Signalling Handling
            ("self", "control"): ("CORELOGIC", "control"),
            ("CORELOGIC", "Psignal"): ("PARSER", "control"),
            ("CORELOGIC", "Hsignal"): ("HANDLER", "control"),
            ("CORELOGIC", "signal"): ("self", "signal"),
            ("PARSER", "signal"): ("CORELOGIC", "Pcontrol"),
            ("HANDLER", "signal"): ("CORELOGIC", "Hcontrol"),
        })
Пример #4
0
class SingleShotHTTPClient(component):
    """\
    SingleShotHTTPClient() -> component that can download a file using HTTP by URL

    Arguments:
    - starturl     -- the URL of the file to download
    - [postbody]   -- data to POST to that URL - if set to None becomes an empty body in to a POST (of PUT) request
    - [connectionclass] -- specify a class other than TCPClient to connect with
    - [method]     -- the HTTP method for the request (default to GET normally or POST if postbody != ""
    """

    Inboxes = {
        "inbox": "UNUSED",
        "control": "UNUSED",
        "_parserinbox": "Data from HTTP parser",
        "_parsercontrol": "Signals from HTTP parser",
        "_tcpcontrol": "Signals from TCP client",
    }

    Outboxes = {
        "outbox": "Requested file",
        "debug": "Output to aid debugging",
        "_parsersignal": "Signals for HTTP parser",
        "_tcpoutbox": "Send over TCP connection",
        "_tcpsignal": "Signals shutdown of TCP connection",
        "signal": "UNUSED"
    }

    def __init__(self,
                 starturl,
                 postbody="",
                 connectionclass=TCPClient,
                 extraheaders=None,
                 method=None):
        #        print "SingleShotHTTPClient.__init__()"
        super(SingleShotHTTPClient, self).__init__()
        self.tcpclient = None
        self.httpparser = None
        self.requestqueue = []
        self.starturl = starturl
        self.connectionclass = connectionclass
        self.method = method

        self.postbody = postbody
        if extraheaders is not None:
            self.extraheaders = extraheaders
        else:
            self.extraheaders = {}

    def formRequest(self, url):
        """Craft a HTTP request string for the supplied url"""
        splituri = splitUri(url)

        host = splituri["uri-server"]
        if splituri.has_key("uri-port"):
            host += ":" + splituri["uri-port"]

        splituri["request"] = []
        method = self.method
        if self.postbody == "":
            if not method:
                method = 'GET'
            splituri["request"].append(method + " " + splituri["raw-uri"] +
                                       " HTTP/1.1\r\n")
        else:
            if not method:
                method = 'POST'
            splituri["request"].append(method + " " + splituri["raw-uri"] +
                                       " HTTP/1.1\r\n")
            if self.postbody != None:
                splituri["request"].append("Content-Length: " +
                                           str(len(self.postbody)) + "\r\n")
            else:
                splituri["request"].append("Content-Length: 0\r\n")

        splituri["request"].append("Host: " + host + "\r\n")
        splituri["request"].append(
            "User-agent: Kamaelia HTTP Client 0.3 (RJL)\r\n")
        splituri["request"].append(
            "Connection: Keep-Alive\r\n"
        )  # keep-alive is a work around for lack of shutdown notification in TCPClient
        for header in self.extraheaders:
            splituri["request"].append(
                "%s: %s\r\n" % (header, self.extraheaders[header]))

        splituri["request"].append("\r\n")

        splituri["request"] = [string.join(
            splituri["request"],
            "")]  # might improve performance by sending more together

        #        print splituri["request"]

        if self.postbody not in [None, ""]:
            splituri["request"].append(self.postbody)

        return splituri

    def makeRequest(self, request):
        """Connect to the remote HTTP server and send request"""
        self.tcpclient = None
        self.httpparser = None
        port = intval(request.requestobject.get("uri-port", ""))
        if port == None:
            port = 80

        self.tcpclient = self.connectionclass(
            request.requestobject["uri-server"], port)
        self.httpparser = HTTPParser(mode="response")

        self.link((self, "_tcpoutbox"), (self.tcpclient, "inbox"))
        self.link((self, "_tcpsignal"), (self.tcpclient, "control"))
        self.link((self.tcpclient, "signal"), (self, "_tcpcontrol"))

        self.link((self.tcpclient, "outbox"),
                  (self.httpparser,
                   "inbox"))  #incoming TCP data -> HTTPParser directly

        self.link((self, "_parsersignal"), (self.httpparser, "control"))
        self.link((self.httpparser, "outbox"), (self, "_parserinbox"))
        self.link((self.httpparser, "signal"), (self, "_parsercontrol"))

        self.addChildren(self.tcpclient, self.httpparser)
        self.tcpclient.activate()
        self.httpparser.activate()
        self.response = ""
        if isinstance(request.requestobject["request"], str):
            self.send(request.requestobject["request"], "_tcpoutbox")
        else:
            for part in request.requestobject["request"]:
                self.send(part, "_tcpoutbox")

    def shutdownKids(self):
        """Close TCP connection and HTTP parser"""
        if self.tcpclient != None and self.httpparser != None:
            self.send(producerFinished(), "_tcpsignal")
            self.send(shutdown(), "_parsersignal")
            self.removeChild(self.tcpclient)
            self.removeChild(self.httpparser)
            self.tcpclient = None
            self.httpparser = None

    def handleRedirect(self, header):
        """Check for a redirect response and queue the fetching the page it points to if it is such a response.
        Returns true if it was a redirect page and false otherwise."""

        if header["responsecode"] in ["301", "302", "303", "307"]:
            # location header gives the redirect URL
            newurl = header["headers"].get("location", "")
            if newurl != "":
                self.send(ParsedHTTPRedirect(redirectto=newurl), "outbox")
                redirectedrequest = HTTPRequest(
                    self.formRequest(newurl),
                    self.currentrequest.redirectcount + 1)
                self.requestqueue.append(redirectedrequest)
                return True
            else:
                return False
                # do something equivalent to what we'd do for 404
        else:
            return False

    def main(self):
        """Main loop."""
        self.requestqueue.append(
            HTTPRequest(self.formRequest(self.starturl), 0))
        while self.mainBody():
            #            print "SingleShotHTTPClient.main"
            yield 1
        self.send(producerFinished(self), "signal")
        yield 1
        return

    def mainBody(self):
        """Called repeatedly by main loop. Checks inboxes and processes messages received.
        Start the fetching of the new page if the current one is a redirect and has been
        completely fetched."""

        self.send("SingleShotHTTPClient.mainBody()", "debug")
        while self.dataReady("_parserinbox"):
            msg = self.recv("_parserinbox")
            if isinstance(msg, ParsedHTTPHeader):
                self.send(
                    "SingleShotHTTPClient received a ParsedHTTPHeader on _parserinbox",
                    "debug")
                # if the page is a redirect page
                if not self.handleRedirect(msg.header):
                    if msg.header["responsecode"] == "200":
                        self.send(
                            msg, "outbox"
                        )  # if not redirecting then send the response on
                    else:  #treat as not found
                        pass

            elif isinstance(msg, ParsedHTTPBodyChunk):
                self.send(
                    "SingleShotHTTPClient received a ParsedHTTPBodyChunk on _parserinbox",
                    "debug")
                if len(self.requestqueue
                       ) == 0:  # if not redirecting then send the response on
                    self.send(msg, "outbox")

            elif isinstance(msg, ParsedHTTPEnd):
                self.send(
                    "SingleShotHTTPClient received a ParsedHTTPEnd on _parserinbox",
                    "debug")
                if len(self.requestqueue
                       ) == 0:  # if not redirecting then send the response on
                    self.send(msg, "outbox")
                self.shutdownKids()
                return 1

        while self.dataReady("_parsercontrol"):
            temp = self.recv("_parsercontrol")
            self.send(
                "SingleShotHTTPClient received something on _parsercontrol",
                "debug")

        while self.dataReady("_tcpcontrol"):
            msg = self.recv("_tcpcontrol")
            self.send(msg, "_parsersignal")

        while self.dataReady("control"):
            msg = self.recv("control")
            if isinstance(msg, shutdown):
                self.shutdownKids()
                return 0

        # if we're not currently downloading a page
        if self.tcpclient == None:
            # then either we've finished or we should download the next URL (if we've been redirected)
            if len(self.requestqueue) > 0:
                self.currentrequest = self.requestqueue.pop(0)
                if self.currentrequest.redirectcount == 3:  # 3 redirects is excessive, give up, we're probably in a loop anyway
                    return 0
                else:
                    self.makeRequest(self.currentrequest)
            else:
                return 0

        self.pause()
        return 1
Пример #5
0
class SingleShotHTTPClient(component):
    """\
    SingleShotHTTPClient() -> component that can download a file using HTTP by URL

    Arguments:
    - starturl     -- the URL of the file to download
    - [postbody]   -- data to POST to that URL - if set to None becomes an empty body in to a POST (of PUT) request
    - [connectionclass] -- specify a class other than TCPClient to connect with
    - [method]     -- the HTTP method for the request (default to GET normally or POST if postbody != ""
    """

    Inboxes = {
        "inbox": "UNUSED",
        "control": "UNUSED",
        "_parserinbox": "Data from HTTP parser",
        "_parsercontrol": "Signals from HTTP parser",
        "_tcpcontrol": "Signals from TCP client",
    }

    Outboxes = {
        "outbox": "Requested file",
        "debug": "Output to aid debugging",
        "_parsersignal": "Signals for HTTP parser",
        "_tcpoutbox": "Send over TCP connection",
        "_tcpsignal": "Signals shutdown of TCP connection",
        "signal": "UNUSED"
    }

    def __init__(self,
                 starturl,
                 postbody="",
                 connectionclass=TCPClient,
                 extraheaders=None,
                 method=None):
        #        print "SingleShotHTTPClient.__init__()"
        super(SingleShotHTTPClient, self).__init__()
        self.tcpclient = None
        self.httpparser = None
        self.requestqueue = []
        self.starturl = starturl
        self.connectionclass = connectionclass
        self.method = method

        self.postbody = postbody
        if extraheaders is not None:
            self.extraheaders = extraheaders
        else:
            self.extraheaders = {}

    def formRequest(self, url):
        """Craft a HTTP request string for the supplied url"""
        splituri = splitUri(url)

        host = splituri["uri-server"]
        if ("uri-port" in splituri):
            host += ":" + splituri["uri-port"]

        splituri["request"] = []
        method = self.method
        if self.postbody == "":
            if not method:
                method = 'GET'
            splituri["request"].append(method + " " + splituri["raw-uri"] +
                                       " HTTP/1.1\r\n")
        else:
            if not method:
                method = 'POST'
            splituri["request"].append(method + " " + splituri["raw-uri"] +
                                       " HTTP/1.1\r\n")
            if self.postbody != None:
                splituri["request"].append("Content-Length: " +
                                           str(len(self.postbody)) + "\r\n")
            else:
                splituri["request"].append("Content-Length: 0\r\n")

        splituri["request"].append("Host: " + host + "\r\n")
        splituri["request"].append(
            "User-agent: Kamaelia HTTP Client 0.3 (RJL)\r\n")
        splituri["request"].append(
            "Connection: Keep-Alive\r\n"
        )  # keep-alive is a work around for lack of shutdown notification in TCPClient
        for header in self.extraheaders:
            splituri["request"].append("%s: %s\r\n" %
                                       (header, self.extraheaders[header]))

        splituri["request"].append("\r\n")

        splituri["request"] = [string.join(
            splituri["request"],
            "")]  # might improve performance by sending more together

        #        print splituri["request"]

        if self.postbody not in [None, ""]:
            splituri["request"].append(self.postbody)

        return splituri

    def makeRequest(self, request):
        """Connect to the remote HTTP server and send request"""
        self.tcpclient = None
        self.httpparser = None
        port = intval(request.requestobject.get("uri-port", ""))
        if port == None:
            port = 80

        self.tcpclient = self.connectionclass(
            request.requestobject["uri-server"], port)
        self.httpparser = HTTPParser(mode="response")

        self.link((self, "_tcpoutbox"), (self.tcpclient, "inbox"))
        self.link((self, "_tcpsignal"), (self.tcpclient, "control"))
        self.link((self.tcpclient, "signal"), (self, "_tcpcontrol"))

        self.link((self.tcpclient, "outbox"),
                  (self.httpparser,
                   "inbox"))  #incoming TCP data -> HTTPParser directly

        self.link((self, "_parsersignal"), (self.httpparser, "control"))
        self.link((self.httpparser, "outbox"), (self, "_parserinbox"))
        self.link((self.httpparser, "signal"), (self, "_parsercontrol"))

        self.addChildren(self.tcpclient, self.httpparser)
        self.tcpclient.activate()
        self.httpparser.activate()
        self.response = ""
        if isinstance(request.requestobject["request"], str):
            self.send(request.requestobject["request"], "_tcpoutbox")
        else:
            for part in request.requestobject["request"]:
                self.send(part, "_tcpoutbox")

    def shutdownKids(self):
        """Close TCP connection and HTTP parser"""
        if self.tcpclient != None and self.httpparser != None:
            self.send(producerFinished(), "_tcpsignal")
            self.send(shutdown(), "_parsersignal")
            self.removeChild(self.tcpclient)
            self.removeChild(self.httpparser)
            self.tcpclient = None
            self.httpparser = None

    def handleRedirect(self, header):
        """Check for a redirect response and queue the fetching the page it points to if it is such a response.
        Returns true if it was a redirect page and false otherwise."""

        if header["responsecode"] in ["301", "302", "303", "307"]:
            # location header gives the redirect URL
            newurl = header["headers"].get("location", "")
            if newurl != "":
                self.send(ParsedHTTPRedirect(redirectto=newurl), "outbox")
                redirectedrequest = HTTPRequest(
                    self.formRequest(newurl),
                    self.currentrequest.redirectcount + 1)
                self.requestqueue.append(redirectedrequest)
                return True
            else:
                return False
                # do something equivalent to what we'd do for 404
        else:
            return False

    def main(self):
        """Main loop."""
        self.requestqueue.append(
            HTTPRequest(self.formRequest(self.starturl), 0))
        while self.mainBody():
            #            print "SingleShotHTTPClient.main"
            yield 1
        self.send(producerFinished(self), "signal")
        yield 1
        return

    def mainBody(self):
        """Called repeatedly by main loop. Checks inboxes and processes messages received.
        Start the fetching of the new page if the current one is a redirect and has been
        completely fetched."""

        self.send("SingleShotHTTPClient.mainBody()", "debug")
        while self.dataReady("_parserinbox"):
            msg = self.recv("_parserinbox")
            if isinstance(msg, ParsedHTTPHeader):
                self.send(
                    "SingleShotHTTPClient received a ParsedHTTPHeader on _parserinbox",
                    "debug")
                # if the page is a redirect page
                if not self.handleRedirect(msg.header):
                    if msg.header["responsecode"] == "200":
                        self.send(
                            msg, "outbox"
                        )  # if not redirecting then send the response on
                    else:  #treat as not found
                        pass

            elif isinstance(msg, ParsedHTTPBodyChunk):
                self.send(
                    "SingleShotHTTPClient received a ParsedHTTPBodyChunk on _parserinbox",
                    "debug")
                if len(self.requestqueue
                       ) == 0:  # if not redirecting then send the response on
                    self.send(msg, "outbox")

            elif isinstance(msg, ParsedHTTPEnd):
                self.send(
                    "SingleShotHTTPClient received a ParsedHTTPEnd on _parserinbox",
                    "debug")
                if len(self.requestqueue
                       ) == 0:  # if not redirecting then send the response on
                    self.send(msg, "outbox")
                self.shutdownKids()
                return 1

        while self.dataReady("_parsercontrol"):
            temp = self.recv("_parsercontrol")
            self.send(
                "SingleShotHTTPClient received something on _parsercontrol",
                "debug")

        while self.dataReady("_tcpcontrol"):
            msg = self.recv("_tcpcontrol")
            self.send(msg, "_parsersignal")

        while self.dataReady("control"):
            msg = self.recv("control")
            if isinstance(msg, shutdown):
                self.shutdownKids()
                return 0

        # if we're not currently downloading a page
        if self.tcpclient == None:
            # then either we've finished or we should download the next URL (if we've been redirected)
            if len(self.requestqueue) > 0:
                self.currentrequest = self.requestqueue.pop(0)
                if self.currentrequest.redirectcount == 3:  # 3 redirects is excessive, give up, we're probably in a loop anyway
                    return 0
                else:
                    self.makeRequest(self.currentrequest)
            else:
                return 0

        self.pause()
        return 1