def makeRequest(self, request): """Connect to the remote HTTP server and send request""" self.tcpclient = None self.httpparser = None port = intval(request.requestobject.get("uri-port", "")) if port == None: port = 80 self.tcpclient = self.connectionclass( request.requestobject["uri-server"], port) self.httpparser = HTTPParser(mode="response") self.link((self, "_tcpoutbox"), (self.tcpclient, "inbox")) self.link((self, "_tcpsignal"), (self.tcpclient, "control")) self.link((self.tcpclient, "signal"), (self, "_tcpcontrol")) self.link((self.tcpclient, "outbox"), (self.httpparser, "inbox")) #incoming TCP data -> HTTPParser directly self.link((self, "_parsersignal"), (self.httpparser, "control")) self.link((self.httpparser, "outbox"), (self, "_parserinbox")) self.link((self.httpparser, "signal"), (self, "_parsercontrol")) self.addChildren(self.tcpclient, self.httpparser) self.tcpclient.activate() self.httpparser.activate() self.response = "" if isinstance(request.requestobject["request"], str): self.send(request.requestobject["request"], "_tcpoutbox") else: for part in request.requestobject["request"]: self.send(part, "_tcpoutbox")
def HTTPServer(createRequestHandler, **argd): """\ HTTPServer() -> new HTTPServer component capable of handling a single connection Arguments: -- createRequestHandler - a function required by HTTPRequestHandler that creates the appropriate request-handler component for each request, see HTTPResourceGlue """ return Graphline( PARSER=HTTPParser(**argd), # Since this is where the data goes first! HANDLER=HTTPRequestHandler(createRequestHandler), CORELOGIC=HTTPShutdownLogicHandling(), linkages={ # Data Handling ("self", "inbox"): ("PARSER", "inbox"), ("PARSER", "outbox"): ("HANDLER", "inbox"), ("HANDLER", "outbox"): ("self", "outbox"), # Signalling Handling ("self", "control"): ("CORELOGIC", "control"), ("CORELOGIC", "Psignal"): ("PARSER", "control"), ("CORELOGIC", "Hsignal"): ("HANDLER", "control"), ("CORELOGIC", "signal"): ("self", "signal"), ("PARSER", "signal"): ("CORELOGIC", "Pcontrol"), ("HANDLER", "signal"): ("CORELOGIC", "Hcontrol"), })
class SingleShotHTTPClient(component): """\ SingleShotHTTPClient() -> component that can download a file using HTTP by URL Arguments: - starturl -- the URL of the file to download - [postbody] -- data to POST to that URL - if set to None becomes an empty body in to a POST (of PUT) request - [connectionclass] -- specify a class other than TCPClient to connect with - [method] -- the HTTP method for the request (default to GET normally or POST if postbody != "" """ Inboxes = { "inbox": "UNUSED", "control": "UNUSED", "_parserinbox": "Data from HTTP parser", "_parsercontrol": "Signals from HTTP parser", "_tcpcontrol": "Signals from TCP client", } Outboxes = { "outbox": "Requested file", "debug": "Output to aid debugging", "_parsersignal": "Signals for HTTP parser", "_tcpoutbox": "Send over TCP connection", "_tcpsignal": "Signals shutdown of TCP connection", "signal": "UNUSED" } def __init__(self, starturl, postbody="", connectionclass=TCPClient, extraheaders=None, method=None): # print "SingleShotHTTPClient.__init__()" super(SingleShotHTTPClient, self).__init__() self.tcpclient = None self.httpparser = None self.requestqueue = [] self.starturl = starturl self.connectionclass = connectionclass self.method = method self.postbody = postbody if extraheaders is not None: self.extraheaders = extraheaders else: self.extraheaders = {} def formRequest(self, url): """Craft a HTTP request string for the supplied url""" splituri = splitUri(url) host = splituri["uri-server"] if splituri.has_key("uri-port"): host += ":" + splituri["uri-port"] splituri["request"] = [] method = self.method if self.postbody == "": if not method: method = 'GET' splituri["request"].append(method + " " + splituri["raw-uri"] + " HTTP/1.1\r\n") else: if not method: method = 'POST' splituri["request"].append(method + " " + splituri["raw-uri"] + " HTTP/1.1\r\n") if self.postbody != None: splituri["request"].append("Content-Length: " + str(len(self.postbody)) + "\r\n") else: splituri["request"].append("Content-Length: 0\r\n") splituri["request"].append("Host: " + host + "\r\n") splituri["request"].append( "User-agent: Kamaelia HTTP Client 0.3 (RJL)\r\n") splituri["request"].append( "Connection: Keep-Alive\r\n" ) # keep-alive is a work around for lack of shutdown notification in TCPClient for header in self.extraheaders: splituri["request"].append( "%s: %s\r\n" % (header, self.extraheaders[header])) splituri["request"].append("\r\n") splituri["request"] = [string.join( splituri["request"], "")] # might improve performance by sending more together # print splituri["request"] if self.postbody not in [None, ""]: splituri["request"].append(self.postbody) return splituri def makeRequest(self, request): """Connect to the remote HTTP server and send request""" self.tcpclient = None self.httpparser = None port = intval(request.requestobject.get("uri-port", "")) if port == None: port = 80 self.tcpclient = self.connectionclass( request.requestobject["uri-server"], port) self.httpparser = HTTPParser(mode="response") self.link((self, "_tcpoutbox"), (self.tcpclient, "inbox")) self.link((self, "_tcpsignal"), (self.tcpclient, "control")) self.link((self.tcpclient, "signal"), (self, "_tcpcontrol")) self.link((self.tcpclient, "outbox"), (self.httpparser, "inbox")) #incoming TCP data -> HTTPParser directly self.link((self, "_parsersignal"), (self.httpparser, "control")) self.link((self.httpparser, "outbox"), (self, "_parserinbox")) self.link((self.httpparser, "signal"), (self, "_parsercontrol")) self.addChildren(self.tcpclient, self.httpparser) self.tcpclient.activate() self.httpparser.activate() self.response = "" if isinstance(request.requestobject["request"], str): self.send(request.requestobject["request"], "_tcpoutbox") else: for part in request.requestobject["request"]: self.send(part, "_tcpoutbox") def shutdownKids(self): """Close TCP connection and HTTP parser""" if self.tcpclient != None and self.httpparser != None: self.send(producerFinished(), "_tcpsignal") self.send(shutdown(), "_parsersignal") self.removeChild(self.tcpclient) self.removeChild(self.httpparser) self.tcpclient = None self.httpparser = None def handleRedirect(self, header): """Check for a redirect response and queue the fetching the page it points to if it is such a response. Returns true if it was a redirect page and false otherwise.""" if header["responsecode"] in ["301", "302", "303", "307"]: # location header gives the redirect URL newurl = header["headers"].get("location", "") if newurl != "": self.send(ParsedHTTPRedirect(redirectto=newurl), "outbox") redirectedrequest = HTTPRequest( self.formRequest(newurl), self.currentrequest.redirectcount + 1) self.requestqueue.append(redirectedrequest) return True else: return False # do something equivalent to what we'd do for 404 else: return False def main(self): """Main loop.""" self.requestqueue.append( HTTPRequest(self.formRequest(self.starturl), 0)) while self.mainBody(): # print "SingleShotHTTPClient.main" yield 1 self.send(producerFinished(self), "signal") yield 1 return def mainBody(self): """Called repeatedly by main loop. Checks inboxes and processes messages received. Start the fetching of the new page if the current one is a redirect and has been completely fetched.""" self.send("SingleShotHTTPClient.mainBody()", "debug") while self.dataReady("_parserinbox"): msg = self.recv("_parserinbox") if isinstance(msg, ParsedHTTPHeader): self.send( "SingleShotHTTPClient received a ParsedHTTPHeader on _parserinbox", "debug") # if the page is a redirect page if not self.handleRedirect(msg.header): if msg.header["responsecode"] == "200": self.send( msg, "outbox" ) # if not redirecting then send the response on else: #treat as not found pass elif isinstance(msg, ParsedHTTPBodyChunk): self.send( "SingleShotHTTPClient received a ParsedHTTPBodyChunk on _parserinbox", "debug") if len(self.requestqueue ) == 0: # if not redirecting then send the response on self.send(msg, "outbox") elif isinstance(msg, ParsedHTTPEnd): self.send( "SingleShotHTTPClient received a ParsedHTTPEnd on _parserinbox", "debug") if len(self.requestqueue ) == 0: # if not redirecting then send the response on self.send(msg, "outbox") self.shutdownKids() return 1 while self.dataReady("_parsercontrol"): temp = self.recv("_parsercontrol") self.send( "SingleShotHTTPClient received something on _parsercontrol", "debug") while self.dataReady("_tcpcontrol"): msg = self.recv("_tcpcontrol") self.send(msg, "_parsersignal") while self.dataReady("control"): msg = self.recv("control") if isinstance(msg, shutdown): self.shutdownKids() return 0 # if we're not currently downloading a page if self.tcpclient == None: # then either we've finished or we should download the next URL (if we've been redirected) if len(self.requestqueue) > 0: self.currentrequest = self.requestqueue.pop(0) if self.currentrequest.redirectcount == 3: # 3 redirects is excessive, give up, we're probably in a loop anyway return 0 else: self.makeRequest(self.currentrequest) else: return 0 self.pause() return 1
class SingleShotHTTPClient(component): """\ SingleShotHTTPClient() -> component that can download a file using HTTP by URL Arguments: - starturl -- the URL of the file to download - [postbody] -- data to POST to that URL - if set to None becomes an empty body in to a POST (of PUT) request - [connectionclass] -- specify a class other than TCPClient to connect with - [method] -- the HTTP method for the request (default to GET normally or POST if postbody != "" """ Inboxes = { "inbox": "UNUSED", "control": "UNUSED", "_parserinbox": "Data from HTTP parser", "_parsercontrol": "Signals from HTTP parser", "_tcpcontrol": "Signals from TCP client", } Outboxes = { "outbox": "Requested file", "debug": "Output to aid debugging", "_parsersignal": "Signals for HTTP parser", "_tcpoutbox": "Send over TCP connection", "_tcpsignal": "Signals shutdown of TCP connection", "signal": "UNUSED" } def __init__(self, starturl, postbody="", connectionclass=TCPClient, extraheaders=None, method=None): # print "SingleShotHTTPClient.__init__()" super(SingleShotHTTPClient, self).__init__() self.tcpclient = None self.httpparser = None self.requestqueue = [] self.starturl = starturl self.connectionclass = connectionclass self.method = method self.postbody = postbody if extraheaders is not None: self.extraheaders = extraheaders else: self.extraheaders = {} def formRequest(self, url): """Craft a HTTP request string for the supplied url""" splituri = splitUri(url) host = splituri["uri-server"] if ("uri-port" in splituri): host += ":" + splituri["uri-port"] splituri["request"] = [] method = self.method if self.postbody == "": if not method: method = 'GET' splituri["request"].append(method + " " + splituri["raw-uri"] + " HTTP/1.1\r\n") else: if not method: method = 'POST' splituri["request"].append(method + " " + splituri["raw-uri"] + " HTTP/1.1\r\n") if self.postbody != None: splituri["request"].append("Content-Length: " + str(len(self.postbody)) + "\r\n") else: splituri["request"].append("Content-Length: 0\r\n") splituri["request"].append("Host: " + host + "\r\n") splituri["request"].append( "User-agent: Kamaelia HTTP Client 0.3 (RJL)\r\n") splituri["request"].append( "Connection: Keep-Alive\r\n" ) # keep-alive is a work around for lack of shutdown notification in TCPClient for header in self.extraheaders: splituri["request"].append("%s: %s\r\n" % (header, self.extraheaders[header])) splituri["request"].append("\r\n") splituri["request"] = [string.join( splituri["request"], "")] # might improve performance by sending more together # print splituri["request"] if self.postbody not in [None, ""]: splituri["request"].append(self.postbody) return splituri def makeRequest(self, request): """Connect to the remote HTTP server and send request""" self.tcpclient = None self.httpparser = None port = intval(request.requestobject.get("uri-port", "")) if port == None: port = 80 self.tcpclient = self.connectionclass( request.requestobject["uri-server"], port) self.httpparser = HTTPParser(mode="response") self.link((self, "_tcpoutbox"), (self.tcpclient, "inbox")) self.link((self, "_tcpsignal"), (self.tcpclient, "control")) self.link((self.tcpclient, "signal"), (self, "_tcpcontrol")) self.link((self.tcpclient, "outbox"), (self.httpparser, "inbox")) #incoming TCP data -> HTTPParser directly self.link((self, "_parsersignal"), (self.httpparser, "control")) self.link((self.httpparser, "outbox"), (self, "_parserinbox")) self.link((self.httpparser, "signal"), (self, "_parsercontrol")) self.addChildren(self.tcpclient, self.httpparser) self.tcpclient.activate() self.httpparser.activate() self.response = "" if isinstance(request.requestobject["request"], str): self.send(request.requestobject["request"], "_tcpoutbox") else: for part in request.requestobject["request"]: self.send(part, "_tcpoutbox") def shutdownKids(self): """Close TCP connection and HTTP parser""" if self.tcpclient != None and self.httpparser != None: self.send(producerFinished(), "_tcpsignal") self.send(shutdown(), "_parsersignal") self.removeChild(self.tcpclient) self.removeChild(self.httpparser) self.tcpclient = None self.httpparser = None def handleRedirect(self, header): """Check for a redirect response and queue the fetching the page it points to if it is such a response. Returns true if it was a redirect page and false otherwise.""" if header["responsecode"] in ["301", "302", "303", "307"]: # location header gives the redirect URL newurl = header["headers"].get("location", "") if newurl != "": self.send(ParsedHTTPRedirect(redirectto=newurl), "outbox") redirectedrequest = HTTPRequest( self.formRequest(newurl), self.currentrequest.redirectcount + 1) self.requestqueue.append(redirectedrequest) return True else: return False # do something equivalent to what we'd do for 404 else: return False def main(self): """Main loop.""" self.requestqueue.append( HTTPRequest(self.formRequest(self.starturl), 0)) while self.mainBody(): # print "SingleShotHTTPClient.main" yield 1 self.send(producerFinished(self), "signal") yield 1 return def mainBody(self): """Called repeatedly by main loop. Checks inboxes and processes messages received. Start the fetching of the new page if the current one is a redirect and has been completely fetched.""" self.send("SingleShotHTTPClient.mainBody()", "debug") while self.dataReady("_parserinbox"): msg = self.recv("_parserinbox") if isinstance(msg, ParsedHTTPHeader): self.send( "SingleShotHTTPClient received a ParsedHTTPHeader on _parserinbox", "debug") # if the page is a redirect page if not self.handleRedirect(msg.header): if msg.header["responsecode"] == "200": self.send( msg, "outbox" ) # if not redirecting then send the response on else: #treat as not found pass elif isinstance(msg, ParsedHTTPBodyChunk): self.send( "SingleShotHTTPClient received a ParsedHTTPBodyChunk on _parserinbox", "debug") if len(self.requestqueue ) == 0: # if not redirecting then send the response on self.send(msg, "outbox") elif isinstance(msg, ParsedHTTPEnd): self.send( "SingleShotHTTPClient received a ParsedHTTPEnd on _parserinbox", "debug") if len(self.requestqueue ) == 0: # if not redirecting then send the response on self.send(msg, "outbox") self.shutdownKids() return 1 while self.dataReady("_parsercontrol"): temp = self.recv("_parsercontrol") self.send( "SingleShotHTTPClient received something on _parsercontrol", "debug") while self.dataReady("_tcpcontrol"): msg = self.recv("_tcpcontrol") self.send(msg, "_parsersignal") while self.dataReady("control"): msg = self.recv("control") if isinstance(msg, shutdown): self.shutdownKids() return 0 # if we're not currently downloading a page if self.tcpclient == None: # then either we've finished or we should download the next URL (if we've been redirected) if len(self.requestqueue) > 0: self.currentrequest = self.requestqueue.pop(0) if self.currentrequest.redirectcount == 3: # 3 redirects is excessive, give up, we're probably in a loop anyway return 0 else: self.makeRequest(self.currentrequest) else: return 0 self.pause() return 1