async def handle_url(self, request: HttpParser) -> HttpResponse: method = request.get_method().upper() if method not in ('GET', 'HEAD'): return HttpResponse(405) path = request.get_path() if path.endswith('/'): path += 'index.html' relative = os.path.relpath(url2pathname(path), '/') filename = os.path.join(self.root_dir, relative) try: byte_range = None if 'Range' in request.get_headers(): # Not RFC 7233 compliant range_match = re.match(r'bytes=(\d+)-(\d+)', request.get_headers()['Range']) if not range_match: return HttpResponse(400, 'Invalid Range header') start, end = map(int, range_match.groups()) # Python range is exclusive, HTTP Range is inclusive byte_range = range(start, end + 1) length = 0 async with aiofiles.open(filename, 'rb') as f: if method == 'GET': if byte_range is not None: await f.seek(byte_range.start) data = await f.read(len(byte_range)) byte_range = range(byte_range.start, byte_range.start + len(data)) await f.seek(0, os.SEEK_END) length = await f.tell() response = HttpResponse(206, data) else: data = await f.read() response = HttpResponse(200, data) else: # Used instead of os.stat to ensure the file can be accessed response = HttpResponse(200) await f.seek(0, os.SEEK_END) length = await f.tell() if byte_range is not None: byte_range = range(byte_range.start, min(length, byte_range.stop)) response.headers['Content-Length'] = length if byte_range is not None: response.headers['Content-Range'] = 'bytes %d-%d/%d' % ( byte_range.start, byte_range.stop - 1, length) except FileNotFoundError: return HttpResponse(404, 'This is not the file you are looking for') except PermissionError: return HttpResponse(403) _, extension = os.path.splitext(filename) extension = extension[1:] if extension.lower() in self.mime_types: response.headers['Content-Type'] = self.mime_types[ extension.lower()] response.headers['Last-Modified'] = formatdate( os.stat(filename).st_mtime, False, True) return response
def main(): p = HttpParser() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) body = [] header_done = False try: s.connect(('gunicorn.org', 80)) s.send(b("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")) while True: data = s.recv(1024) if not data: break recved = len(data) nparsed = p.execute(data, recved) assert nparsed == recved if p.is_headers_complete() and not header_done: print(p.get_headers()) print(p.get_headers()['content-length']) print(p.get_method()) header_done = True if p.is_partial_body(): body.append(p.recv_body()) if p.is_message_complete(): break print(b("").join(body)) finally: s.close()
def run(self): HTTP_Request = self.client.recv(self.max) p = HttpParser() header_done = False destination_host = '' if HTTP_Request: print 'Got something from ' + str(self.address) + '...' request_length = len(HTTP_Request) nparsed = p.execute(HTTP_Request, request_length) assert nparsed == request_length if p.is_headers_complete() and not header_done: print(p.get_headers()) print(p.get_headers()['Host']) destination_host = p.get_headers()['Host'] header_done = True Relay_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) Relay_socket.connect((destination_host, 80)) Relay_socket.sendall(HTTP_Request) print 'Forwarding data to destination host...' while True: HTTP_Response = Relay_socket.recv(self.max) if not HTTP_Response: break else: print 'Received data back. Forwarding to the client...' self.client.sendall(HTTP_Response) self.client.close() Relay_socket.close()
class Response_Parser(): def __init__(self): self.parser = HttpParser() self.len_response = 0 self.len_body = 0 self.body = None def parse(self, raw_response): self.len_response = len(bytearray(raw_response)) self.parser.execute(raw_response, self.len_response) self.body = self.parser.recv_body() self.len_body = len(bytearray(self.body)) def get_all_keys(self): """Get All the key in request headers.""" return self.parser.get_headers().keys() def get_keys(self, *args): header_keys = {} for key in args: header_keys[key] = self.parser.get_headers().get(key, '-') return header_keys def get_reponse(self, *args): values = self.get_keys(*args) status_code = self.parser.get_status_code() obj = HTTP_Response(status_code, values, self.len_response, self.len_body) return obj def get_body(self): return self.body
def main(): p = HttpParser() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) body = [] header_done = False try: s.connect(("gunicorn.org", 80)) s.send("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n") while True: data = s.recv(1024) if not data: break recved = len(data) nparsed = p.execute(data, recved) assert nparsed == recved if p.is_headers_complete() and not header_done: print p.get_headers() print p.get_headers()["content-length"] header_done = True if p.is_partial_body(): body.append(p.recv_body()) if p.is_message_complete(): break print "".join(body) finally: s.close()
class Request_Parser(): def __init__(self): self.parser = HttpParser() self.len_request = 0 self.len_body = 0 def parse(self, raw_requset): self.len_request = len(bytearray(raw_requset)) self.parser.execute(raw_requset, self.len_request) self.len_body = len(bytearray(self.parser.recv_body())) def get_all_keys(self): """Get All the key in request headers.""" return self.parser.get_headers().keys() def get_keys(self, *args): header_keys = {} for key in args: header_keys[key] = self.parser.get_headers().get(key, '-') return header_keys def get_request(self, *args): values = self.get_keys(*args) obj = HTTP_Requset(values, self.len_request, self.len_body) return obj def get_body(self): return self.parser.recv_body()
def run(self): HTTP_Request = self.client.recv(self.max) p = HttpParser() header_done = False destination_host = '' if HTTP_Request: print 'Got something from ' + str(self.address) + '...' request_length = len(HTTP_Request) nparsed = p.execute(HTTP_Request, request_length) assert nparsed == request_length if p.is_headers_complete() and not header_done: print(p.get_headers()) print(p.get_headers()['Host']) destination_host = p.get_headers()['Host'] header_done = True Relay_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) Relay_socket.connect((destination_host,80)) Relay_socket.sendall(HTTP_Request) print 'Forwarding data to destination host...' while True: HTTP_Response = Relay_socket.recv(self.max) if not HTTP_Response: break else: print 'Received data back. Forwarding to the client...' self.client.sendall(HTTP_Response) self.client.close() Relay_socket.close()
def start(self): signal.signal(signal.SIGTERM, SIG_DFL) print(f"Worker booted with pid: {os.getpid()}") while True: body = [] conn, addr = self.socket.accept() http_parser = HttpParser() with conn: while True: data = conn.recv(1024) if not data: break recved = len(data) nparsed = http_parser.execute(data, recved) assert nparsed == recved if http_parser.is_headers_complete(): print(http_parser.get_headers()) if http_parser.is_partial_body(): body.append(http_parser.recv_body()) if http_parser.is_message_complete(): break buffered_body = io.StringIO("".join(body)) koi(self.app, conn, request_method=http_parser.get_method(), headers=http_parser.get_headers(), body=buffered_body, content_length=http_parser.get_headers().get( 'content-length', 0))
def handle(connection, address, pid, queue_obj): import logging import json from queue import Full logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger("process-%r" % (address,)) content = [] parser = HttpParser() try: logger.debug("Connected %r at %r", connection, address) while True: resp = connection.recv(psize) recved = len(resp) parsed = parser.execute(resp, recved) assert parsed == recved if parser.is_headers_complete(): parser.get_headers() if parser.is_partial_body(): content.append(parser.recv_body()) if parser.is_message_complete(): break except: logger.exception("Problem handling request: %s", sys.exc_info()[1]) send_and_close(connection, 500) return parsed_json = {} data = None try: parsed_json = json.loads("".join(map(lambda s: s.decode("utf-8"), content))) data = parsed_json.get('data') url = parsed_json.get('callback') key = parsed_json.get('private_key') except: logger.exception("Problem decoding JSON: %s", sys.exc_info()[1]) finally: if data is None: send_and_close(connection, 400, {"message": "JSON Parse Error"}) elif data == 'ping': send_and_close(connection, 200, {"started": started, "queue": queue_obj.qsize()}) elif data == 'stop': send_and_close(connection, 200, {"message": "Shutting down"}) os.kill(pid, signal.SIGUSR1) elif 'trackers' in data and 'hashes' in data: try: queue_obj.put({"data": [data, url, key], "address": address}, False) send_and_close(connection, 200, {"message": ("in queue [%r]" % (address,))}) except Full: send_and_close(connection, 429, {"message": "Server queue is full. Try another one."})
def get_appropriate_response(self): try: # try to use the fast C parser from http_parser.parser import HttpParser except ImportError: # fall back to the Python parser from http_parser.pyparser import HttpParser p = HttpParser() nparsed = p.execute(self.content.encode('utf-8'), len(self.content)) if not p.is_headers_complete(): return HttpResponseBadRequest(content_f=BAD_REQUEST_HTML) # check method if p.get_method() not in SUPPORTED_METHODS: return HttpResponseNotImplemented(content_f=NOT_IMPLEMENTED_HTML) base_filepath = '' try: base_filepath = settings.HOSTS[p.get_headers()['Host'].split(':') [0]] except KeyError: base_filepath = settings.HOSTS['default'] req_file = self.content.split(' ')[1] if req_file == '/': req_file = '/index.html' try: full_path = base_filepath + req_file open(full_path) # check if modified if 'If-None-Match' in p.get_headers() and p.get_headers( )['If-None-Match'] == etag_for_file(full_path): return HttpResponse(status=304, content_f=full_path) if p.get_method() == 'HEAD': return HttpResponse(content_f=full_path, method='HEAD') if 'Range' in p.get_headers(): return HttpResponsePartialContent( content_f=full_path, h_range=p.get_headers()['Range']) return HttpResponse(content_f=full_path) except IOError as err: if err.errno == 13: return HttpResponseForbidden(content_f=FORBIDDEN_HTML) elif err.errno == 2: return HttpResponseNotFound(content_f=NOT_FOUND_HTML) return HttpResponseServerError(content_f=SERVER_ERROR_HTML)
def process(indir, outdir): findstr = os.path.join(indir, '*') for fn in glob.glob(findstr): print fn with open(fn, 'rb') as f: http_bin = f.read() n = 0 while n < len(http_bin): http = HttpParser() nparsed = http.execute(http_bin[n:], len(http_bin) - n) if not http.is_message_complete(): break if http.get_path() != '': # send http_method = http_bin[n:].split()[ 0] #http.get_method() -- seems bugged http_path = http_bin[n:].split()[1] http_request = parse_http_packet(http.get_headers(), http.recv_body()) http_hostname = 'unknown' if 'Host' in http.get_headers(): http_hostname = http.get_headers()['Host'] print http_hostname nparsed -= 1 full_http = http_method + ' ' + http_path + '\n' full_http += http_request + '\n' save_http_packet(outdir, os.path.basename(fn), http_hostname, http_path, 'send', full_http) else: # recv http_status = http.get_status_code() http_reply = parse_http_packet(http.get_headers(), http.recv_body()) full_http += str(http_status) + '\n' full_http += http_reply save_http_packet(outdir, os.path.basename(fn), http_hostname, '', 'recv', full_http) n += nparsed
def recv_http_response(self, conn): response = HttpParser(kind=1) status_code = None headers = None try: while True: chunk = conn.recv(1024) response.execute(chunk, len(chunk)) if response.is_headers_complete(): headers = response.get_headers() status_code = response.get_status_code() content_length = headers.get('content-length') if not content_length or int(content_length) == 0: break if response.is_message_complete(): break if not chunk: raise EOFError('Incomplete Message') except Exception as e: raise GeneralProxyError( 'HTTP Proxy communication error ({})'.format(e)) return status_code, headers
def iter_warc_records(warc_file, domain_whitelist=None, only_homepages=None): """ Selective iterator over records in a WARC file """ for _, record in enumerate(warc_file): if not record.url: continue if record['Content-Type'] != 'application/http; msgtype=response': continue url = URL(record.url, check_encoding=True) if domain_whitelist is not None: if url.domain not in domain_whitelist: continue elif only_homepages: if url.parsed.path != "/" or url.parsed.query != "": continue payload = record.payload.read() parser = HttpParser() parser.execute(payload, len(payload)) headers = parser.get_headers() if 'text/html' not in headers.get("content-type", ""): # print "Not HTML?", record.url, headers continue yield url, headers, parser.recv_body()
def parse_request(http_request, protocol, host, port): """ Parse HTTP request form Burp Suite to dict TODO cookie parse """ httpParser = HttpParser() httpParser.execute(http_request, len(http_request)) header = dict(httpParser.get_headers()) header.pop("Content-Length") # remove Content-Length # cookie = header["Cookie"] body = httpParser.recv_body() method = httpParser.get_method() url = protocol + "://" + host + httpParser.get_path() query = httpParser.get_query_string() params = dict(urlparse.parse_qsl(query)) data = dict(urlparse.parse_qsl(body)) if method == "POST" else {} try: jsondata = json.loads( body) if method == "POST" and header["Content-Type"] == "application/json" else {} except Exception as e: print "[!] " + e jsondata = {} return method, url, header, params, data, jsondata
def iter_items(self, partition): """ Yields objects in the source's native format """ warc_stream = self.open_warc_stream(partition["path"]) for record in warc_stream: if not record.url: continue if record['Content-Type'] != 'application/http; msgtype=response': continue url = URL(record.url, check_encoding=True) do_parse, index_level = self.qualify_url(url) if not do_parse: continue payload = record.payload.read() parser = HttpParser() parser.execute(payload, len(payload)) headers = parser.get_headers() if 'text/html' not in headers.get("content-type", ""): # print "Not HTML?", record.url, headers continue yield url, headers, "html", index_level, parser.recv_body()
def handle_batch_client(sock): recvbuf = "" while True: rds, _, _ = select.select([sock], [], [], 60 * 5) if not rds: break data = sock.recv(1024) if not data: break recvbuf += data pos = recvbuf.find("\r\n\r\n") if pos == -1: continue parser = HttpParser() nparsed = parser.execute(recvbuf, pos + 4) if nparsed != pos + 4: logging.debug("pos:%d, nparsed:%d, recvbuf:%r", pos, nparsed, recvbuf) assert nparsed == pos + 4 assert parser.is_headers_complete() headers = parser.get_headers() content_length = int(headers["Content-Length"]) if headers.has_key("Content-Length") else 0 logging.debug("content length:%d", content_length) recvbuf = recvbuf[pos + 4 :] preread = recvbuf[:content_length] recvbuf = recvbuf[content_length:] keepalived = handle_request(sock, parser, preread) if not keepalived: break logging.debug("close client") sock.close()
def make_request(sock, server_name): """ Given an open socket, makes a simple HTTP request, parses the response, and returns a dictionary containing the HTTP headers that were returned by the server. """ p = HttpParser() request = ('GET / HTTP/1.0\r\n' + 'User-Agent: pySSLScan\r\n' + 'Host: %s\r\n\r\n' % (server_name,)) sock.write(request.encode('ascii')) headers = None while True: data = sock.recv(1024) if not data: break recved = len(data) nparsed = p.execute(data, recved) assert nparsed == recved if p.is_headers_complete(): headers = p.get_headers() break return headers
def findhue(): msg = \ 'M-SEARCH * HTTP/1.1\r\n' \ 'HOST:239.255.255.250:1900\r\n' \ 'ST:upnp:rootdevice\r\n' \ 'MX:2\r\n' \ 'MAN:"ssdp:discover"\r\n' \ '\r\n' # Set up UDP socket s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP) s.settimeout(5) s.sendto(msg.encode('utf-8'), ('239.255.255.250', 1900) ) try: while True: data, addr = s.recvfrom(65507) p = HttpParser() recved = len(data) nparsed = p.execute(data, recved) assert nparsed == recved if p.is_headers_complete(): headers = p.get_headers() if 'hue-bridgeid' in headers: return addr,headers if p.is_message_complete(): break except timeout: pass return None
class Request: parser = None _body = None def __init__(self, data): self.parser = HttpParser() self.parser.execute(data, len(data)) self.method = self.parser.get_method() self.path = self.parser.get_path() self.headers = self.parser.get_headers() self.querystring = parse_qs(unquote(self.parser.get_query_string()), keep_blank_values=True) if self.querystring: self.path += "?{}".format(self.parser.get_query_string()) def add_data(self, data): self.parser.execute(data, len(data)) @property def body(self): if self._body is None: self._body = decode_from_bytes(self.parser.recv_body()) return self._body def __str__(self): return "{} - {} - {}".format(self.method, self.path, self.headers)
def run(self): while self.running: data, addr = self.listener.recvfrom(4096) http_pareser = HttpParser() http_pareser.execute(data, len(data)) headers = http_pareser.get_headers() try: if headers['NTS'] == 'ssdp:alive' and headers[ 'NT'] == 'urn:zenterio-net:service:X-CTC_RemotePairing:1': stb = STB(uuid=headers['USN'][5:41], location=headers['LOCATION'], nt=headers['NT']) self.mutex.acquire(1) for x in self.stbs: if x.uuid == stb.uuid: break else: self.stbs.append(stb) log.info('-------------------------------------------') log.info("New STB detected!") log.info("UUID: " + stb.uuid) log.info("Location: " + stb.location) log.info("NT: " + stb.nt) self.mutex.release() except: pass
def __init__(self, raw): resp = HttpParser() resp.execute(raw.response, len(raw.response)) self.headers = resp.get_headers() self.body = "".join(resp._body) self.raw = raw self.code = resp.get_status_code() self._json = None
def get_appropriate_response(self): try: # try to use the fast C parser from http_parser.parser import HttpParser except ImportError: # fall back to the Python parser from http_parser.pyparser import HttpParser p = HttpParser() nparsed = p.execute(self.content.encode('utf-8'), len(self.content)) if not p.is_headers_complete(): return HttpResponseBadRequest(content_f=BAD_REQUEST_HTML) # check method if p.get_method() not in SUPPORTED_METHODS: return HttpResponseNotImplemented(content_f=NOT_IMPLEMENTED_HTML) base_filepath = '' try: base_filepath = settings.HOSTS[p.get_headers()['Host'].split(':')[0]] except KeyError: base_filepath = settings.HOSTS['default'] req_file = self.content.split(' ')[1] if req_file == '/': req_file = '/index.html' try: full_path = base_filepath + req_file open(full_path) if p.get_method() == 'HEAD': return HttpResponse(content_f=full_path, method='HEAD') if 'Range' in p.get_headers(): return HttpResponsePartialContent(content_f=full_path, h_range=p.get_headers()['Range']) return HttpResponse(content_f=full_path) except IOError as err: if err.errno == 13: return HttpResponseForbidden(content_f=FORBIDDEN_HTML) elif err.errno == 2: return HttpResponseNotFound(content_f=NOT_FOUND_HTML) return HttpResponseServerError(content_f=SERVER_ERROR_HTML)
def request(self, method, url, headers={}, body=None, timeout=None): '''Issues a `method` request to `path` on the connected server. Sends along `headers`, and body. Very low level--you must set "host" yourself, for example. It will set Content-Length, however. ''' url_info = urlparse(url) fake_wsgi = dict( (cgi_name(n), v) for n, v in headers.iteritems()) fake_wsgi.update({ 'HTTP_METHOD' : method, 'SCRIPT_NAME' : '', 'PATH_INFO' : url_info[2], 'QUERY_STRING' : url_info[4], 'wsgi.version' : (1,0), 'wsgi.url_scheme' : 'http', # XXX incomplete 'wsgi.input' : cStringIO.StringIO(body or ''), 'wsgi.errors' : FileLikeErrorLogger(hlog), 'wsgi.multithread' : False, 'wsgi.multiprocess' : False, 'wsgi.run_once' : False, }) req = Request(fake_wsgi) timeout_handler = TimeoutHandler(timeout or 60) send('%s %s HTTP/1.1\r\n%s' % (req.method, req.url, str(req.headers))) if body: send(body) h = HttpParser() body = [] data = None while True: if data: used = h.execute(data, len(data)) if h.is_headers_complete(): body.append(h.recv_body()) if h.is_message_complete(): data = data[used:] break ev, val = first(receive_any=True, sleep=timeout_handler.remaining()) if ev == 'sleep': timeout_handler.timeout() data = val resp = Response( response=''.join(body), status=h.get_status_code(), headers=h.get_headers(), ) return resp
def main(): p = HttpParser() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) body = [] header_done = False try: s.connect(('install2.optimum-installer.com', 80)) s.send(b("GET /o/PDFCreator/Express_Installer.exe.exe HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")) while True: data = s.recv(1024) if not data: break recved = len(data) nparsed = p.execute(data, recved) assert nparsed == recved if p.is_headers_complete() and not header_done: print(p.get_headers()) print(p.get_headers()['content-length']) header_done = True if p.is_partial_body(): body.append(p.recv_body()) print p.recv_body() print "BDy++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" if p.is_message_complete(): break body = b("").join(body) print "Writing file\n" data_write = open("mal.exe","wb") data_write.write(body) data_write.close() print "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" finally: s.close()
def findhue(): #Auto-find bridges on network & get list r = requests.get("https://discovery.meethue.com/") bridgelist = json.loads(r.text) i = 0 for b in bridgelist: i += 1 if commandlineargs.bridgeid is not None: found = False for idx, b in enumerate(bridgelist): if b["id"] == commandlineargs.bridgeid: bridge = idx found = True break if not found: sys.exit("bridge {} was not found".format( commandlineargs.bridgeid)) elif len(bridgelist) > 1: print("Multiple bridges found. Select one of the bridges below (", list(bridgelist), ")") bridge = int(input()) else: bridge = 0 #Default to the only bridge if only one is found hueip = bridgelist[bridge][ 'internalipaddress'] #Logic currently assumes 1 bridge on the network print("I will use the bridge at ", hueip) msg = \ 'M-SEARCH * HTTP/1.1\r\n' \ 'HOST:' + hueip +':1900\r\n' \ 'ST:upnp:rootdevice\r\n' \ 'MX:2\r\n' \ 'MAN:"ssdp:discover"\r\n' \ '\r\n' s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP) s.settimeout(12) s.sendto(msg.encode('utf-8'), (hueip, 1900)) try: while True: data, addr = s.recvfrom(65507) p = HttpParser() recved = len(data) nparsed = p.execute(data, recved) assert nparsed == recved if p.is_headers_complete(): headers = p.get_headers() if 'hue-bridgeid' in headers: return addr, headers if p.is_message_complete(): break except timeout: verbose('Timed out, better luck next time') pass return None
def __init__(self, raw): self.raw = raw req = HttpParser() req.execute(raw.request, len(raw.request)) self.headers = req.get_headers() self.body = b"".join(req._body) self.url = req.get_url() self.path = req.get_path() self.method = req.get_method() self.arguments = req.get_query_string() self.slug = [a for a in self.path.split('/') if a != '']
def proxy(data): """ the function called by tproxy to determine where to send traffic tproxy will call this function repeatedly for the same connection, as we receive more incoming data, until we return something other than None. typically our response tells tproxy where to proxy the connection to, but may also tell it to hang up, or respond with some error message. """ log = logging.getLogger("proxy") bytes_received = len(data) parser = HttpParser() bytes_parsed = parser.execute(data, bytes_received) if bytes_parsed != bytes_received: return { 'close': 'HTTP/1.0 400 Bad Request\r\n\r\nParse error' } if not parser.is_headers_complete(): if bytes_received > MAX_HEADER_LENGTH: return { 'close': 'HTTP/1.0 400 Bad Request\r\n' '\r\nHeaders are too large' } return None headers = parser.get_headers() # the hostname may be in the form of hostname:port, in which case we want # to discard the port, and route just on hostname route_host = headers.get('HOST', None) if route_host: match = _HOST_PORT_REGEXP.match(route_host) if match: route_host = match.group(1) try: log.debug("Routing %r" % ( parser.get_url(), )) return _ROUTER.route( route_host, parser.get_method(), parser.get_path(), parser.get_query_string()) except Exception, err: log.error("error routing %r, %s" % ( parser.get_url(), traceback.format_exc(), )) gevent.sleep(ERROR_DELAY) return { 'close': 'HTTP/1.0 502 Gateway Error\r\n' '\r\nError routing request' }
def proxy(data): """ the function called by tproxy to determine where to send traffic tproxy will call this function repeatedly for the same connection, as we receive more incoming data, until we return something other than None. typically our response tells tproxy where to proxy the connection to, but may also tell it to hang up, or respond with some error message. """ log = logging.getLogger("proxy") bytes_received = len(data) parser = HttpParser() bytes_parsed = parser.execute(data, bytes_received) if bytes_parsed != bytes_received: return {'close': 'HTTP/1.0 400 Bad Request\r\n\r\nParse error'} if not parser.is_headers_complete(): if bytes_received > MAX_HEADER_LENGTH: return { 'close': 'HTTP/1.0 400 Bad Request\r\n' '\r\nHeaders are too large' } return None headers = parser.get_headers() # the hostname may be in the form of hostname:port, in which case we want # to discard the port, and route just on hostname route_host = headers.get('HOST', None) if route_host: match = _HOST_PORT_REGEXP.match(route_host) if match: route_host = match.group(1) try: log.debug("Routing %r" % (parser.get_url(), )) return _ROUTER.route(route_host, parser.get_method(), parser.get_path(), parser.get_query_string()) except Exception, err: log.error("error routing %r, %s" % ( parser.get_url(), traceback.format_exc(), )) gevent.sleep(ERROR_DELAY) return { 'close': 'HTTP/1.0 502 Gateway Error\r\n' '\r\nError routing request' }
def parse_request(self, message): try: from http_parser.parser import HttpParser except ImportError: from http_parser.pyparser import HttpParser p = HttpParser() nparsed = p.execute(message,len(message)) self.method = p.get_method() self.path = p.get_path() self.headers = p.get_headers() if p.get_method() == 'GET': self.status = 200 #if "Range" in p.get_headers(): # strings = self.headers["Range"] # print strings elif p.get_method() != 'GET': self.status = 501 #if the method is not a GET #TODO maybe make this a head request eventually if you do the download accelerator if not p.get_method(): self.status = 400 if p.get_path() == '/': self.path = '/index.html' elif p.get_path().endswith('/'): self.path += 'index.html' if p.get_path() is None: self.status = 501 #print self.path """ print '\nMethod: ' print p.get_method() print '\nPath: ' print p.get_path() print '\nHeaders: ' print p.get_headers() print '\nVersion: ' version = p.get_version() print version """ #print '\nRESPONSE CODE: ' + str(self.status) + '\n' #print self.path #print self.status #working so far
def request(self): request_buff = "" request_parser = HttpParser() while True: r_data = self.socket.recv(ProxyHandler.BUFF_LEN) request_buff += r_data r_size = len(r_data) request_parser.execute(r_data, r_size) if request_parser.is_message_complete(): break host = request_parser.get_headers().get('Host') url, port = self._analyse_host_and_port(host) remote_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
def handleData(self,fd): self.debug("Entering handleData") if '\r\n\r\n' not in self.con_cache[fd]: self.debug("Partial message - Exiting handleData") return p = HttpParser() nparsed = p.execute(self.con_cache[fd],len(self.con_cache[fd])) method = p.get_method() path = p.get_path() headers = p.get_headers() debugStr = "\nMethod: %s\nPath: %s\nHeaders: %s\n" % (method,path,headers) #self.debug(debugStr) rangeRequest = None if 'Range' in headers: rangeRequest = headers['Range'] self.debug("Range: %s" % (rangeRequest)) validMethods = ['GET','HEAD','PUT','DELETE','POST'] isValid = False if method not in validMethods: response = self.makeError('400','Bad Request') elif method != 'GET' and method != 'HEAD': response = self.makeError('501','Not Implemented') else: if path == '/': path = '/index.html' path = self.hosts['default'] + path (isValid,response) = self.makeResponse(path,rangeRequest) self.clients[fd].send(response) self.debug("PATH:%s"%(path)) if isValid and not rangeRequest and method != "HEAD": self.sendAll(path,fd) elif isValid and rangeRequest and method != "HEAD": (start,end) = self.getByteRange(rangeRequest) self.send(path,fd,start,end) self.debug("Exiting handleData")
def makeRequest(self, host, url="/", port=80, method='GET', headers=None, postdata=None): assert self.e is not None evSet = self.e.wait() # noqa: F841 # log.debug("Generating raw http request") self.s.connect((host, port)) if headers is None: headers = { "Accept": "*/*", "User-Agent": self.useragent } req = self.rawHttpReq(host, url, method, headers, postdata) self.s.sendall(req.encode()) h = [] body = [] p = HttpParser() tlen = 0 while True: data = self.s.recv(2048) if not data: break rlen = len(data) tlen += rlen nparsed = p.execute(data, rlen) assert nparsed == rlen if p.is_headers_complete(): h = p.get_headers() # log.debug(p.get_headers()) if p.is_partial_body(): body.append(p.recv_body()) if p.is_message_complete(): break self.s.close() res = {'status': p.get_status_code(), 'length': tlen, 'headers': h, 'body': body, 'request': req} print(res)
def receive(self): h = HttpParser() body = [] data = None while True: if data: used = h.execute(data, len(data)) if h.is_headers_complete(): body.append(h.recv_body()) if h.is_message_complete(): data = data[used:] break data = self.s.recv(BUFSIZE) return Response(response=''.join(body), status=h.get_status_code(), headers=h.get_headers(), )
def test_constructor(self): ''' Instance attributes autosubstitution. ''' headers = { 'Host': 'httpbin.org', 'Connection': 'close', } hc = HttpCompiler(method='PATCH', headers=headers) qs = '/path/to/check' req = hc.build_raw(qs) p = HttpParser() p.execute(req, len(req)) result_hdrs = p.get_headers() self.assertTrue(p.get_method(), 'PATCH') self.assertTrue( all([result_hdrs[h] == headers[h] for h in headers.keys()]))
def test_constructor(self): ''' Instance attributes autosubstitution. ''' headers = { 'Host': 'httpbin.org', 'Connection': 'close', } hc = HttpCompiler(method='PATCH', headers=headers) qs = '/path/to/check' req = hc.build_raw(qs) p = HttpParser() p.execute(req, len(req)) result_hdrs = p.get_headers() self.assertTrue(p.get_method(), 'PATCH') self.assertTrue(all( [result_hdrs[h] == headers[h] for h in headers.keys()]))
def parseData(self, data, fd): p = HttpParser() nparsed = p.execute(data,len(data)) resp = Response() if self.debugging: print p.get_method(),p.get_path(),p.get_headers() if (p.get_method() == 'GET'): resp = self.handleGet(p, resp) elif (p.get_method() == 'DELETE'): resp.setCode(501) else: resp.setCode(400) self.clients[fd].send(str(resp)) try: self.clients[fd].send(resp.body) except: pass
def do_request(conn): body = [] p = HttpParser() while True: data = conn.recv(1024) recved = len(data) nparsed = p.execute(data, recved) assert nparsed == recved if not data: break if p.is_headers_complete(): logger.debug(p.get_headers()) if p.is_partial_body(): logger.debug("is partial body") body.append(p.recv_body()) if p.is_message_complete(): break logger.debug(body) conn.sendall(EXAMPLE_RESPONSE)
def parse_request(self, text): p=HttpParser() data=text recved = len(data) nparsed = p.execute(data, recved) if p.is_headers_complete(): self.request_headers=p.get_headers() else: self.request_headers={} print p.recv_body() self.request_body=(p.recv_body()) request_line = text.splitlines()[0] request_line = request_line.rstrip('\r\n') # Break down the request line into components (self.request_method, # GET self.path, # /hello self.request_version # HTTP/1.1 ) = request_line.split()
def parse_html(self): try: resolve_ip = '' data = [] fitler_list = ['*', '> ', '< ', '{'] for item in self.result.split("\n"): if 'Trying' in item: resolve_ip = item.replace('*', "").replace( "Trying", "").replace("...", "").strip() log.logger.info('resolve_ip: %s ' % (resolve_ip)) matching = [s for s in fitler_list if s in item[:2]] if len(matching) == 0: data.append(item.encode('utf-8')) parsing_string = b("\r\n").join(data) p = HttpParser() p.execute(parsing_string, len(parsing_string)) status_code = str(p.get_status_code()) header_obj = p.get_headers() #body = str(p.recv_body()) header_list = [] if resolve_ip: header_list.append('%s:%s' % ("resolve ip", resolve_ip.strip())) for key, value in header_obj.items(): header_list.append('%s:%s' % (key, value)) header = ("<br/>").join(header_list) body = self.content["result"] log.logger.info('resolve_ip :%s ' % (resolve_ip)) log.logger.info('status_code :%s ' % (status_code)) log.logger.info('header :%s ' % (header)) log.logger.info('body :%s ' % (body)) return status_code, header, body except Exception as e: log.logger.info('Exception: %s ' % (str(e))) return None, None, str(e)
class HttpRequest(object): __cgi_config = None def __init__(self, request_text, server_config): self.__parser = HttpParser() self.__parser.execute(request_text, len(request_text)) self.__server_config = server_config def get_body(self): if self.__parser.is_partial_body(): return self.__parser.recv_body() return None def get_headers(self): return self.__parser.get_headers() def get_request_method(self): return self.__parser.get_method() def get_request_path(self): return self.__parser.get_path() def get_cgi_config(self): if self.__cgi_config is None: __cgi_config = {} #WSGI required variable #__cgi_config['wsgi.input'] = io.StringIO(self.get_body()) #CGI __cgi_config['SERVER_NAME'] = self.__server_config['server_name'] __cgi_config['SERVER_PORT'] = self.__server_config['server_port'] __cgi_config['SERVER_PROTOCOL']: 'HTTP/1.1' __cgi_config['REQUEST_METHOD'] = self.get_request_method() __cgi_config['PATH_INFO'] = self.get_request_path() for header, value in self.get_headers().items(): __cgi_config[f'HTTP_{header}'] = value self.__cgi_config = __cgi_config return self.__cgi_config
def process_item(self, item): response_text = item.response.get('#text') if response_text is None: return response = base64decode(response_text) response = response.encode('ascii', errors='ignore') p = HttpParser() p.execute(response, len(response)) matching_headers = [] for header, value in p.get_headers().iteritems(): if self.CORS_RE.search(header): matching_headers.append((header, value)) if not matching_headers: return headers_str = '\n'.join(sorted('%s: %s' % (k, v) for (k, v) in matching_headers)) self.unique_cors.add(headers_str)
def handle(self): p = HttpParser() while True: data = self.socket.recv(ProxyHandler.BUFF_LEN) self.buff += data size = len(data) p.execute(data, size) if p.is_message_complete(): break remote_url = p.get_headers().get('Host') remote_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) remote_sock.connect((remote_url, 80)) p2 = HttpParser() bf = "" remote_sock.send(self.buff) while True: data = remote_sock.recv(ProxyHandler.BUFF_LEN) bf += data s = len(data) p2.execute(data, s) if p2.is_message_complete(): self.socket.send(bf) break
class TitleFetcher: status_code = 0 followed_times = 0 # 301, 302 finder = None addr = None stream = None max_follows = 10 timeout = 15 _finished = False _cookie = None _connected = False _redirected_stream = None _content_finders = (TitleFinder, PNGFinder, JPEGFinder, GIFFinder) _url_finders = () def __init__(self, url, callback, timeout=None, max_follows=None, io_loop=None, content_finders=None, url_finders=None ): ''' url: the (full) url to fetch callback: called with title or MediaType or an instance of SingletonFactory timeout: total time including redirection before giving up max_follows: max redirections ''' self._callback = callback if max_follows is not None: self.max_follows = max_follows if timeout is not None: self.timeout = timeout if hasattr(tornado.ioloop, 'current'): default_io_loop = tornado.ioloop.IOLoop.current else: default_io_loop = tornado.ioloop.IOLoop.instance self.io_loop = io_loop or default_io_loop() if content_finders is not None: self._content_finders = content_finders if url_finders is not None: self._url_finders = url_finders self.start_time = self.io_loop.time() self._timeout = self.io_loop.add_timeout( self.timeout + self.start_time, self.on_timeout, ) self.origurl = url self.url_visited = [] self.new_url(url) def on_timeout(self): self.run_callback(Timeout) def parse_url(self, url): '''parse `url`, set self.host and return address and stream class''' self.url = u = urlsplit(url) self.host = u.netloc if u.scheme == 'http': addr = u.hostname, u.port or 80 stream = tornado.iostream.IOStream elif u.scheme == 'https': addr = u.hostname, u.port or 443 stream = tornado.iostream.SSLIOStream else: raise ValueError('bad url: %r' % url) return addr, stream def new_connection(self, addr, StreamClass): '''set self.addr, self.stream and connect to host''' s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.addr = addr self.stream = StreamClass(s) logger.debug('%s: connecting to %s...', self.origurl, addr) self.stream.set_close_callback(self.before_connected) self.stream.connect(addr, self.send_request) def new_url(self, url): self.url_visited.append(url) self.fullurl = url for finder in self._url_finders: f = finder.match_url(url, self) if f: self.finder = f f() return addr, StreamClass = self.parse_url(url) if addr != self.addr: if self.stream: self.stream.close() self.new_connection(addr, StreamClass) else: logger.debug('%s: try to reuse existing connection to %s', self.origurl, self.addr) try: self.send_request(nocallback=True) except tornado.iostream.StreamClosedError: logger.debug('%s: server at %s doesn\'t like keep-alive, will reconnect.', self.origurl, self.addr) # The close callback should have already run self.stream.close() self.new_connection(addr, StreamClass) def run_callback(self, arg): self.io_loop.remove_timeout(self._timeout) self._finished = True if self.stream: self.stream.close() self._callback(arg, self) def send_request(self, nocallback=False): self._connected = True req = ('GET %s HTTP/1.1', 'Host: %s', # t.co will return 200 and use js/meta to redirect using the following :-( # 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0', 'User-Agent: %s' % UserAgent, 'Accept: text/html,application/xhtml+xml;q=0.9,*/*;q=0.7', 'Accept-Language: zh-cn,zh;q=0.7,en;q=0.3', 'Accept-Charset: utf-8,gb18030;q=0.7,*;q=0.7', 'Accept-Encoding: gzip, deflate', 'Connection: keep-alive', ) path = self.url.path or '/' if self.url.query: path += '?' + self.url.query req = '\r\n'.join(req) % ( path, self._prepare_host(self.host), ) if self._cookie: req += '\r\n' + self._cookie req += '\r\n\r\n' self.stream.write(req.encode()) self.headers_done = False self.parser = HttpParser(decompress=True) if not nocallback: self.stream.read_until_close( # self.addr will have been changed when close callback is run partial(self.on_data, close=True, addr=self.addr), streaming_callback=self.on_data, ) def _prepare_host(self, host): host = encodings.idna.nameprep(host) return b'.'.join(encodings.idna.ToASCII(x) for x in host.split('.')).decode('ascii') def on_data(self, data, close=False, addr=None): if close: logger.debug('%s: connection to %s closed.', self.origurl, addr) if (close and self._redirected_stream is self.stream) or self._finished: # The connection is closing, and we are being redirected or we're done. self._redirected_stream = None return recved = len(data) logger.debug('%s: received data: %d bytes', self.origurl, recved) p = self.parser nparsed = p.execute(data, recved) if close: # feed EOF p.execute(b'', 0) if not self.headers_done and p.is_headers_complete(): if not self.on_headers_done(): return if p.is_partial_body(): chunk = p.recv_body() if self.finder is None: # redirected but has body received return t = self.feed_finder(chunk) if t is not None: self.run_callback(t) return if p.is_message_complete(): if self.finder is None: # redirected but has body received return t = self.feed_finder(None) # if title not found, t is None self.run_callback(t) elif close: self.run_callback(self.stream.error or ConnectionClosed) def before_connected(self): '''check if something wrong before connected''' if not self._connected and not self._finished: self.run_callback(self.stream.error) def process_cookie(self): setcookie = self.headers.get('Set-Cookie', None) if not setcookie: return cookies = [c.rsplit(None, 1)[-1] for c in setcookie.split('; expires')[:-1]] self._cookie = 'Cookie: ' + '; '.join(cookies) def on_headers_done(self): '''returns True if should proceed, None if should stop for current chunk''' self.headers_done = True self.headers = self.parser.get_headers() self.status_code = self.parser.get_status_code() if self.status_code in (301, 302): self.process_cookie() # or we may be redirecting to a loop logger.debug('%s: redirect to %s', self.origurl, self.headers['Location']) self.followed_times += 1 if self.followed_times > self.max_follows: self.run_callback(TooManyRedirection) else: newurl = urljoin(self.fullurl, self.headers['Location']) self._redirected_stream = self.stream self.new_url(newurl) return try: l = int(self.headers.get('Content-Length', None)) except (ValueError, TypeError): l = None ctype = self.headers.get('Content-Type', 'text/html') mt = defaultMediaType._replace(type=ctype, size=l) for finder in self._content_finders: f = finder.match_type(mt) if f: self.finder = f break else: self.run_callback(mt) return return True def feed_finder(self, chunk): '''feed data to TitleFinder, return the title if found''' t = self.finder(chunk) if t is not None: return t
class TitleFetcher: status_code = 0 followed_times = 0 # 301, 302 finder = None addr = None stream = None max_follows = 10 timeout = 15 _finished = False _cookie = None _connected = False _redirected_stream = None _content_finders = (TitleFinder, PNGFinder, JPEGFinder, GIFFinder) _url_finders = () def __init__( self, url, callback, timeout=None, max_follows=None, io_loop=None, content_finders=None, url_finders=None, referrer=None, run_at_init=True, ): ''' url: the (full) url to fetch callback: called with title or MediaType or an instance of SingletonFactory timeout: total time including redirection before giving up max_follows: max redirections may raise: <UnicodeError: label empty or too long> in host preparation ''' self._callback = callback self.referrer = referrer if max_follows is not None: self.max_follows = max_follows if timeout is not None: self.timeout = timeout if hasattr(tornado.ioloop, 'current'): default_io_loop = tornado.ioloop.IOLoop.current else: default_io_loop = tornado.ioloop.IOLoop.instance self.io_loop = io_loop or default_io_loop() if content_finders is not None: self._content_finders = content_finders if url_finders is not None: self._url_finders = url_finders self.origurl = url self.url_visited = [] if run_at_init: self.run() def run(self): if self.url_visited: raise Exception("can't run again") else: self.start_time = self.io_loop.time() self._timeout = self.io_loop.add_timeout( self.timeout + self.start_time, self.on_timeout, ) try: self.new_url(self.origurl) except: self.io_loop.remove_timeout(self._timeout) raise def on_timeout(self): logger.debug('%s: request timed out', self.origurl) self.run_callback(Timeout) def parse_url(self, url): '''parse `url`, set self.host and return address and stream class''' self.url = u = urlsplit(url) self.host = u.netloc if u.scheme == 'http': addr = u.hostname, u.port or 80 stream = tornado.iostream.IOStream elif u.scheme == 'https': addr = u.hostname, u.port or 443 stream = tornado.iostream.SSLIOStream else: raise ValueError('bad url: %r' % url) return addr, stream def new_connection(self, addr, StreamClass): '''set self.addr, self.stream and connect to host''' s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.addr = addr self.stream = StreamClass(s) logger.debug('%s: connecting to %s...', self.origurl, addr) self.stream.set_close_callback(self.before_connected) self.stream.connect(addr, self.send_request) def new_url(self, url): self.url_visited.append(url) self.fullurl = url for finder in self._url_finders: f = finder.match_url(url, self) if f: self.finder = f f() return addr, StreamClass = self.parse_url(url) if addr != self.addr: if self.stream: self.stream.close() self.new_connection(addr, StreamClass) else: logger.debug('%s: try to reuse existing connection to %s', self.origurl, self.addr) try: self.send_request(nocallback=True) except tornado.iostream.StreamClosedError: logger.debug( '%s: server at %s doesn\'t like keep-alive, will reconnect.', self.origurl, self.addr) # The close callback should have already run self.stream.close() self.new_connection(addr, StreamClass) def run_callback(self, arg): self.io_loop.remove_timeout(self._timeout) self._finished = True if self.stream: self.stream.close() self._callback(arg, self) def send_request(self, nocallback=False): self._connected = True req = [ 'GET %s HTTP/1.1', 'Host: %s', # t.co will return 200 and use js/meta to redirect using the following :-( # 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0', 'User-Agent: %s' % UserAgent, 'Accept: text/html,application/xhtml+xml;q=0.9,*/*;q=0.7', 'Accept-Language: zh-cn,zh;q=0.7,en;q=0.3', 'Accept-Charset: utf-8,gb18030;q=0.7,*;q=0.7', 'Accept-Encoding: gzip, deflate', 'Connection: keep-alive', ] if self.referrer is not None: req.append('Referer: ' + self.referrer.replace('%', '%%')) path = self.url.path or '/' if self.url.query: path += '?' + self.url.query req = '\r\n'.join(req) % ( path, self._prepare_host(self.host), ) if self._cookie: req += '\r\n' + self._cookie req += '\r\n\r\n' self.stream.write(req.encode()) self.headers_done = False self.parser = HttpParser(decompress=True) if not nocallback: self.stream.read_until_close( # self.addr and self.stream may have been changed when close callback is run partial(self.on_data, close=True, addr=self.addr, stream=self.stream), streaming_callback=self.on_data, ) def _prepare_host(self, host): host = encodings.idna.nameprep(host) return b'.'.join( encodings.idna.ToASCII(x) if x else b'' for x in host.split('.')).decode('ascii') def on_data(self, data, close=False, addr=None, stream=None): if close: logger.debug('%s: connection to %s closed.', self.origurl, addr) if self.stream.error: self.run_callback(self.stream.error) return if (close and stream and self._redirected_stream is stream) or self._finished: # The connection is closing, and we are being redirected or we're done. self._redirected_stream = None return recved = len(data) logger.debug('%s: received data: %d bytes', self.origurl, recved) p = self.parser nparsed = p.execute(data, recved) if close: # feed EOF p.execute(b'', 0) if not self.headers_done and p.is_headers_complete(): if not self.on_headers_done(): return if p.is_partial_body(): chunk = p.recv_body() if self.finder is None: # redirected but has body received return t = self.feed_finder(chunk) if t is not None: self.run_callback(t) return if p.is_message_complete(): if self.finder is None: # redirected but has body received return t = self.feed_finder(None) # if title not found, t is None self.run_callback(t) elif close: self.run_callback(self.stream.error or ConnectionClosed) def before_connected(self): '''check if something wrong before connected''' if not self._connected and not self._finished: self.run_callback(self.stream.error) def process_cookie(self): setcookie = self.headers.get('Set-Cookie', None) if not setcookie: return cookies = [ c.rsplit(None, 1)[-1] for c in setcookie.split('; expires')[:-1] ] self._cookie = 'Cookie: ' + '; '.join(cookies) def on_headers_done(self): '''returns True if should proceed, None if should stop for current chunk''' self.headers_done = True self.headers = self.parser.get_headers() self.status_code = self.parser.get_status_code() if self.status_code in (301, 302): self.process_cookie() # or we may be redirecting to a loop logger.debug('%s: redirect to %s', self.origurl, self.headers['Location']) self.followed_times += 1 if self.followed_times > self.max_follows: self.run_callback(TooManyRedirection) else: newurl = urljoin(self.fullurl, self.headers['Location']) self._redirected_stream = self.stream self.new_url(newurl) return try: l = int(self.headers.get('Content-Length', None)) except (ValueError, TypeError): l = None ctype = self.headers.get('Content-Type', 'text/html') mt = defaultMediaType._replace(type=ctype, size=l) for finder in self._content_finders: f = finder.match_type(mt) if f: self.finder = f break else: self.run_callback(mt) return return True def feed_finder(self, chunk): '''feed data to finder, return the title if found''' t = self.finder(chunk) if t is not None: return t
class HttpStream(object): """ An HTTP parser providing higher-level access to a readable, sequential io.RawIOBase object. You can use implementions of http_parser.reader (IterReader, StringReader, SocketReader) or create your own. """ def __init__(self, stream, kind=HTTP_BOTH, decompress=False): """ constructor of HttpStream. :attr stream: an io.RawIOBase object :attr kind: Int, could be 0 to parseonly requests, 1 to parse only responses or 2 if we want to let the parser detect the type. """ self.parser = HttpParser(kind=kind, decompress=decompress) self.stream = stream def _check_headers_complete(self): if self.parser.is_headers_complete(): return while True: try: next(self) except StopIteration: if self.parser.is_headers_complete(): return raise NoMoreData("Can't parse headers") if self.parser.is_headers_complete(): return def _wait_status_line(self, cond): if self.parser.is_headers_complete(): return True data = [] if not cond(): while True: try: d = next(self) data.append(d) except StopIteration: if self.parser.is_headers_complete(): return True raise BadStatusLine(b"".join(data)) if cond(): return True return True def _wait_on_url(self): return self._wait_status_line(self.parser.get_url) def _wait_on_status(self): return self._wait_status_line(self.parser.get_status_code) def url(self): """ get full url of the request """ self._wait_on_url() return self.parser.get_url() def path(self): """ get path of the request (url without query string and fragment """ self._wait_on_url() return self.parser.get_path() def query_string(self): """ get query string of the url """ self._wait_on_url() return self.parser.get_query_string() def fragment(self): """ get fragment of the url """ self._wait_on_url() return self.parser.get_fragment() def version(self): self._wait_on_status() return self.parser.get_version() def status_code(self): """ get status code of a response as integer """ self._wait_on_status() return self.parser.get_status_code() def status(self): """ return complete status with reason """ status_code = self.status_code() reason = status_reasons.get(int(status_code), 'unknown') return "%s %s" % (status_code, reason) def method(self): """ get HTTP method as string""" self._wait_on_status() return self.parser.get_method() def headers(self): """ get request/response headers, headers are returned in a OrderedDict that allows you to get value using insensitive keys.""" self._check_headers_complete() return self.parser.get_headers() def should_keep_alive(self): """ return True if the connection should be kept alive """ self._check_headers_complete() return self.parser.should_keep_alive() def is_chunked(self): """ return True if Transfer-Encoding header value is chunked""" self._check_headers_complete() return self.parser.is_chunked() def wsgi_environ(self, initial=None): """ get WSGI environ based on the current request. :attr initial: dict, initial values to fill in environ. """ self._check_headers_complete() return self.parser.get_wsgi_environ() def body_file(self, buffering=None, binary=True, encoding=None, errors=None, newline=None): """ return the body as a buffered stream object. If binary is true an io.BufferedReader will be returned, else an io.TextIOWrapper. """ self._check_headers_complete() if buffering is None: buffering = -1 if buffering < 0: buffering = DEFAULT_BUFFER_SIZE raw = HttpBodyReader(self) buf = BufferedReader(raw, buffering) if binary: return buf text = TextIOWrapper(buf, encoding, errors, newline) return text def body_string(self, binary=True, encoding=None, errors=None, newline=None): """ return body as string """ return self.body_file(binary=binary, encoding=encoding, newline=newline).read() def __iter__(self): return self def __next__(self): if self.parser.is_message_complete(): raise StopIteration # fetch data b = bytearray(DEFAULT_BUFFER_SIZE) recved = self.stream.readinto(b) if recved is None: raise NoMoreData("no more data") del b[recved:] to_parse = bytes(b) # parse data nparsed = self.parser.execute(to_parse, recved) if nparsed != recved and not self.parser.is_message_complete(): raise ParserError("nparsed != recved (%s != %s) [%s]" % (nparsed, recved, bytes_to_str(to_parse))) if recved == 0: raise StopIteration return to_parse next = __next__
class ConnectionHandler: def __init__(self, connection, address, timeout): self.body_file = "" self.p = HttpParser() self.body = [] self.request_url = "" self.response_header = [] self.header_done = False self.url = "" self.controller = [] self.controller_ip = [] self.client = connection self.client_buffer = '' self.timeout = timeout self.method, self.path, self.protocol = self.get_base_header() if self.method == 'CONNECT': self.method_CONNECT() elif self.method in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE'): self.method_others() self.client.close() self.target.close() #clear #print self.controller , self.controller_ip def get_base_header(self): while 1: self.client_buffer += self.client.recv(BUFLEN) end = self.client_buffer.find('\n') if end != -1: break #We dont wann those google.com urls. if not "127.0.0.1" in self.client_buffer[:end]: #Insert Url into database here self.url = '%s' % self.client_buffer[:end] data = (self.client_buffer[:end + 1]).split() self.client_buffer = self.client_buffer[end + 1:] #print data return data def method_CONNECT(self): self._connect_target(self.path) self.client.send(HTTPVER + ' 200 Connection established\n' + 'Proxy-agent: %s\n\n' % VERSION) self.client_buffer = '' self._read_write() def method_others(self): self.path = self.path[7:] i = self.path.find('/') host = self.path[:i] path = self.path[i:] self._connect_target(host) self.target.send('%s %s %s\n' % (self.method, path, self.protocol) + self.client_buffer) self.client_buffer = '' self._read_write() def _connect_target(self, host): i = host.find(':') if i != -1: port = int(host[i + 1:]) host = host[:i] #print host else: port = 80 try: (soc_family, _, _, _, address) = socket.getaddrinfo(host, port)[0] print "Adress is ", address self.target = socket.socket(soc_family) self.target.connect(address) except Exception as e: print "Error Connecting to:" + str(host) + ":" + str(port) self.request_url = "Error Connecting to:" + str(host) + ":" + str( port) # insert to db here #Concat data to string self.request_url = str(host) + " | " + str(address) + " | " + str( self.url) #debug #print self.request_url def _read_write(self): time_out_max = self.timeout / 3 socs = [self.client, self.target] count = 0 while 1: count += 1 (recv, _, error) = select.select(socs, [], socs, 3) if error: break if recv: for in_ in recv: try: #print " Receving Data " data = in_.recv(84333) except Exception as e: print e pass if in_ is self.client: out = self.target else: out = self.client try: if data: #column 25 #Dig here to analysis the traffic #print data try: #Lets parse the data using http_parser modules recved = len(data) # #print "We received so far "+ str(recved) nparsed = self.p.execute(data, recved) assert nparsed == recved # Check if self.p.is_headers_complete( ) and not self.header_done: #Header is an ordered dictionary header_s = self.p.get_headers() # Lets beautify it and print it. for header, value in header_s.items(): #Print Response # Connection : close format res_header = header + ": " + value self.response_header.append(res_header) self.header_done = True #Put header to Database. #Check if the boday is partial, if then append the body if self.p.is_partial_body(): self.body.append(self.p.recv_body()) #print "appending body" +self.p.recv_body() #Append data body recived to a list #print self.body # If the parsing of current request is compleated if self.p.is_message_complete(): try: try: content_length = self.p.get_headers( )['content-length'] except Exception as e: print "Exception in Body retrive-sub" + str( e) content_length = 0 pass self.body_file = "".join(self.body) body_file_type = ms.buffer( self.body_file[:1024]) #print self.request_url #print self.response_header #print body_file_type print urlid update_traffic(urlid, self.request_url, self.response_header, body_file_type) except Exception as e: print "Exception in Body retrive" + str( e) content_length = 0 pass except Exception as e: print e pass #if filetype in traffice == jar,class , pdf,flash, execute #save those files out.send(data) count = 0 except Exception as e: print e pass if count == time_out_max: break
class CometaClient(object): """Connect a device to the Cometa infrastructure""" errors = {0:'ok', 1:'timeout', 2:'network error', 3:'protocol error', 4:'authorization error', 5:'wrong parameters', 9:'internal error'} def __init__(self,server, port, application_id): """ The Cometa instance constructor. server: the Cometa server FQDN port: the Cometa server port application_id: the Cometa application ID """ self.error = 9 self.debug = False self._server = server self._port = port self._app_id = application_id self._message_cb = None self._device_id = "" self._platform = "" self._hparser = None self._sock = None #socket.socket(socket.AF_INET, socket.SOCK_STREAM) self._heartbeat_rate = 60 self._trecv = None self._thbeat = None self._hb_lock = threading.Lock() self._reconnecting = False return def attach(self, device_id, device_info): """ Attach the specified device to a Cometa registered application. Authentication is done using only the application_id (one-way authentication). device_id: the device unique identifier device_info: a description of the platform or the device (used only as a comment) """ self._device_id = device_id self._platform = device_info self._hparser = HttpParser() self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: self._sock.connect((self._server, self._port)) sendBuf="POST /v1/applications/%s/devices/%s HTTP/1.1\r\nHost: api.cometa.io\r\nContent-Length:%d\r\n\r\n%s" % (self._app_id,device_id,len(device_info),device_info) self._sock.send(sendBuf) recvBuf = "" while True: data = self._sock.recv(1024) if not data: break dataLen = len(data) nparsed = self._hparser.execute(data, dataLen) assert nparsed == dataLen if self._hparser.is_headers_complete(): if self.debug: print "connection for device %s complete" % (device_id) print self._hparser.get_headers() # reading the attach complete message from the server # i.e. {"msg":"200 OK","heartbeat":60,"timestamp":1441382935} recvBuf = self._hparser.recv_body() #TODO: check for error in connecting, i.e. 403 already connected if len(recvBuf) < 16 or recvBuf[1:15] != '"msg":"200 OK"': self.error = 5 return recvBuf # reset error self.error = 0 # set the socket non blocking self._sock.setblocking(0) # do not (re)start the threads during a reconnection if self._reconnecting: self._reconnecting = False return recvBuf # start the hearbeat thread self._thbeat = threading.Thread(target=self._heartbeat) self._thbeat.daemon = True self._thbeat.start() # start the receive thread self._trecv = threading.Thread(target=self._receive) self._trecv.daemon = True # force to exit on SIGINT self._trecv.start() return recvBuf except: self.error = 2 return def send_data(self, msg): """ Send a data event message upstream to the Cometa server. If a Webhook is specified for the Application in the Cometa configuration file /etc/cometa.conf on the server, the message is relayed to the Webhook. Also, the Cometa server propagates the message to all open devices Websockets. """ sendBuf = "%x\r\n%c%s\r\n" % (len(msg) + 1,'\07',msg) if self._reconnecting: if self.debug: print "Error in Cometa.send_data(): device is reconnecting." return -1 try: self._hb_lock.acquire() self._sock.send(sendBuf) self._hb_lock.release() except Exception, e: if self.debug: print "Error in Cometa.send_data(): socket write failed." return -1 return 0
def request(self, method, url, headers=None, body=None, timeout=None): """Issues a `method` request to `path` on the connected server. Sends along `headers`, and body. Very low level--you must set "host" yourself, for example. It will set Content-Length, however. """ headers = headers or {} url_info = urlparse(url) fake_wsgi = dict((cgi_name(n), str(v).strip()) for n, v in headers.iteritems()) if body and "CONTENT_LENGTH" not in fake_wsgi: # If the caller hasn't set their own Content-Length but submitted # a body, we auto-set the Content-Length header here. fake_wsgi["CONTENT_LENGTH"] = str(len(body)) fake_wsgi.update( { "REQUEST_METHOD": method, "SCRIPT_NAME": "", "PATH_INFO": url_info[2], "QUERY_STRING": url_info[4], "wsgi.version": (1, 0), "wsgi.url_scheme": "http", # XXX incomplete "wsgi.input": cStringIO.StringIO(body or ""), "wsgi.errors": FileLikeErrorLogger(hlog), "wsgi.multithread": False, "wsgi.multiprocess": False, "wsgi.run_once": False, } ) req = Request(fake_wsgi) timeout_handler = TimeoutHandler(timeout or 60) url = str(req.path) if req.query_string: url += "?" + str(req.query_string) send("%s %s HTTP/1.1\r\n%s" % (req.method, url, str(req.headers))) if body: send(body) h = HttpParser() body = [] data = None while True: if data: used = h.execute(data, len(data)) if h.is_headers_complete(): body.append(h.recv_body()) if h.is_message_complete(): data = data[used:] break ev, val = first(receive_any=True, sleep=timeout_handler.remaining()) if ev == "sleep": timeout_handler.timeout() data = val resp = Response(response="".join(body), status=h.get_status_code(), headers=h.get_headers()) return resp
def request(self, method, url, headers=None, body=None, timeout=None): '''Issues a `method` request to `path` on the connected server. Sends along `headers`, and body. Very low level--you must set "host" yourself, for example. It will set Content-Length, however. ''' headers = headers or {} url_info = urlparse(url) fake_wsgi = dict( (cgi_name(n), str(v).strip()) for n, v in headers.iteritems()) if body and 'CONTENT_LENGTH' not in fake_wsgi: # If the caller hasn't set their own Content-Length but submitted # a body, we auto-set the Content-Length header here. fake_wsgi['CONTENT_LENGTH'] = str(len(body)) fake_wsgi.update({ 'REQUEST_METHOD' : method, 'SCRIPT_NAME' : '', 'PATH_INFO' : url_info[2], 'QUERY_STRING' : url_info[4], 'wsgi.version' : (1,0), 'wsgi.url_scheme' : 'http', # XXX incomplete 'wsgi.input' : cStringIO.StringIO(body or ''), 'wsgi.errors' : FileLikeErrorLogger(hlog), 'wsgi.multithread' : False, 'wsgi.multiprocess' : False, 'wsgi.run_once' : False, }) req = Request(fake_wsgi) timeout_handler = TimeoutHandler(timeout or 60) url = str(req.path) if req.query_string: url += '?' + str(req.query_string) send('%s %s HTTP/1.1\r\n%s' % (req.method, url, str(req.headers))) if body: send(body) h = HttpParser() body = [] data = None while True: if data: used = h.execute(data, len(data)) if h.is_headers_complete(): body.append(h.recv_body()) if h.is_message_complete(): data = data[used:] break ev, val = first(receive_any=True, sleep=timeout_handler.remaining()) if ev == 'sleep': timeout_handler.timeout() data = val resp = Response( response=''.join(body), status=h.get_status_code(), headers=h.get_headers(), ) return resp
class HTTPProtocol(FlowControlMixin, asyncio.Protocol): def __init__(self, stream_reader, callback, loop=None): super().__init__(loop=loop) self._stream_reader = stream_reader self._stream_writer = None self._callback = callback self._task = None self._server = None def connection_made(self, transport): self._parser = HttpParser() self._stream_reader.set_transport(transport) self._stream_writer = asyncio.StreamWriter( transport, self, self._stream_reader, self._loop, ) # Grab the name of our socket if we have it self._server = transport.get_extra_info("sockname") def connection_lost(self, exc): if exc is None: self._stream_reader.feed_eof() else: self._stream_reader.set_exception(exc) super().connection_lost(exc) def data_received(self, data): # Parse our incoming data with our HTTP parser self._parser.execute(data, len(data)) # If we have not already handled the headers and we've gotten all of # them, then invoke the callback with the headers in them. if self._task is None and self._parser.is_headers_complete(): coro = self.dispatch( { "server": self._server, "protocol": b"HTTP/" + b".".join( str(x).encode("ascii") for x in self._parser.get_version() ), "method": self._parser.get_method().encode("latin1"), "path": self._parser.get_path().encode("latin1"), "query": self._parser.get_query_string().encode("latin1"), "headers": self._parser.get_headers(), }, self._stream_reader, self._stream_writer, ) self._task = asyncio.Task(coro, loop=self._loop) # Determine if we have any data in the body buffer and if so feed it # to our StreamReader if self._parser.is_partial_body(): self._stream_reader.feed_data(self._parser.recv_body()) # Determine if we've completed the end of the HTTP request, if we have # then we should close our stream reader because there is nothing more # to read. if self._parser.is_message_complete(): self._stream_reader.feed_eof() def eof_received(self): # We've gotten an EOF from the client, so we'll propagate this to our # StreamReader self._stream_reader.feed_eof() @asyncio.coroutine def dispatch(self, request, request_body, response): # Get the status, headers, and body from the callback. The body must # be iterable, and each item can either be a bytes object, or an # asyncio coroutine, in which case we'll ``yield from`` on it to wait # for it's value. status, resp_headers, body = yield from self._callback( request, request_body, ) # Write out the status line to the client for this request # TODO: We probably don't want to hard code HTTP/1.1 here response.write(b"HTTP/1.1 " + status + b"\r\n") # Write out the headers, taking special care to ensure that any # mandatory headers are added. # TODO: We need to handle some required headers for key, values in resp_headers.items(): # In order to handle headers which need to have multiple values # like Set-Cookie, we allow the value of the header to be an # iterable instead of a bytes object, in which case we'll write # multiple header lines for this header. if isinstance(values, (bytes, bytearray)): values = [values] for value in values: response.write(key + b": " + value + b"\r\n") # Before we get to the body, we need to write a blank line to separate # the headers and the response body response.write(b"\r\n") for chunk in body: # If the chunk is a coroutine, then we want to wait for the result # before we write it. if asyncio.iscoroutine(chunk): chunk = yield from chunk # Write our chunk out to the connect client response.write(chunk) # We've written everything in our iterator, so we want to close the # connection. response.close()
header = "// Retrieved %sfrom %s\n" % (date + ", " if date else "", url) path = args.prefix + "/" + path try: os.makedirs(os.path.dirname(path)) except OSError: pass jsfile = open(path, "w") jsfile.write(header) jsfile.write(script) jsfile.close() extractor = ScriptExtractor() for filename in args.files: print >>sys.stderr, "Extracting JS files from %s..." % filename for record in warc.open(filename): if record.type == "response": http_parser = HttpParser() http_parser.execute(record.payload, record.header.content_length) header = http_parser.get_headers() extractor.init(record.url, header.get("Date", None)) try: extractor.feed(http_parser.recv_body().decode("latin_1")) except Exception: extractor.handle_error() print_exc() except: extractor.handle_error() raise
class ConnectionHandler: def __init__(self, connection, address, timeout): self.body_file ="" self.p = HttpParser() self.body = [] self.request_url = "" self.response_header = [] self.header_done = False self.url ="" self.controller = [] self.controller_ip = [] self.client = connection self.client_buffer = '' self.timeout = timeout self.method, self.path, self.protocol = self.get_base_header() if self.method=='CONNECT': self.method_CONNECT() elif self.method in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE'): self.method_others() self.client.close() self.target.close() #clear #print self.controller , self.controller_ip def get_base_header(self): while 1: self.client_buffer += self.client.recv(BUFLEN) end = self.client_buffer.find('\n') if end!=-1: break #We dont wann those google.com urls. if not "127.0.0.1" in self.client_buffer[:end]: #Insert Url into database here self.url = '%s'%self.client_buffer[:end] data = (self.client_buffer[:end+1]).split() self.client_buffer = self.client_buffer[end+1:] #print data return data def method_CONNECT(self): self._connect_target(self.path) self.client.send(HTTPVER+' 200 Connection established\n'+ 'Proxy-agent: %s\n\n'%VERSION) self.client_buffer = '' self._read_write() def method_others(self): self.path = self.path[7:] i = self.path.find('/') host = self.path[:i] path = self.path[i:] self._connect_target(host) self.target.send('%s %s %s\n'%(self.method, path, self.protocol)+ self.client_buffer) self.client_buffer = '' self._read_write() def _connect_target(self, host): i = host.find(':') if i!=-1: port = int(host[i+1:]) host = host[:i] #print host else: port = 80 try: (soc_family, _, _, _, address) = socket.getaddrinfo(host, port)[0] self.target = socket.socket(soc_family) self.target.connect(address) except Exception as e: address =host print "Error Connecting to:"+str(address) connect_ip = "Error Connecting to:"+str(address) update_traffic_link(urlid,connect_ip,"Unable to Connect","Nil","") # insert to db here #Concat data to string self.request_url = str(host)+" | "+str(address)+" | "+str(self.url) #debug #print self.request_url def _read_write(self): time_out_max = self.timeout/3 socs = [self.client, self.target] count = 0 while 1: count += 1 (recv, _, error) = select.select(socs, [], socs, 3) if error: break if recv: for in_ in recv: try: #print " Receving Data " data = in_.recv(10000) except Exception as e: print e pass if in_ is self.client: out = self.target else: out = self.client try: if data: #column 25 #Dig here to analysis the traffic #print data try: #Lets parse the data using http_parser modules recved = len(data) # #print "We received so far "+ str(recved) nparsed = self.p.execute(data, recved) assert nparsed == recved # Check if self.p.is_headers_complete() and not self.header_done: #Header is an ordered dictionary header_s = self.p.get_headers() # Lets beautify it and print it. for header, value in header_s.items(): #Print Response # Connection : close format res_header = header+": "+value self.response_header.append(res_header) self.header_done = True #Put header to Database. #Check if the boday is partial, if then append the body if self.p.is_partial_body(): self.body.append(self.p.recv_body()) #print "appending body" +self.p.recv_body() #Append data body recived to a list #print self.body # If the parsing of current request is compleated if self.p.is_message_complete(): try: try: content_length = self.p.get_headers()['content-length'] except Exception as e: print "Exception in Body retrive-sub"+str(e) content_length = 0 pass self.body_file = "".join(self.body) body_file_type = ms.buffer(self.body_file[:1024]) signature_scan = "" html_source ="" html_body="" html_body = self.body_file if "gzip" in body_file_type: try: print " Decoding GZIp html\n" html_body = zlib.decompress(html_body, 16+zlib.MAX_WBITS) #print "source"+str(html_body) except Exception as e: print "Error gzip decoding:"+str(e) print urlid signature_scan_body = yara_match(html_body) signature_scan_request = yara_match(self.request_url) signature_scan_response ="" self_response = "" try: #This is a list convert to string and do the check print self.response_header self_response = ''.join(self.response_header) if "Content-Disposition:" in self_response and "attachment;" in self_response: signature_scan_response = "Forced-file-download" print " Signatured matched in response" except Exception as e: print e,"Error in header_match" signature_scan = str(signature_scan_body) +""+str(signature_scan_request)+""+signature_scan_response #print self.request_url #print self.response_header #print body_file_type if len(signature_scan) > 6: try: print " Signatured found and Updating\n" body_file_type = "Signature_Matched: "+signature_scan+" ing "+body_file_type insert_html(urlid,html_body,signature_scan) html_source = html_body except Exception as e: print "Error in Traffic Signature"+str(e) print " Trffic Updated\n" update_traffic_link(urlid,self.request_url,self.response_header,body_file_type,html_source) if "executable" in body_file_type: print "\nExecutable found\n" binary_found(urlid) except Exception as e: print "Exception in Body retrive"+str(e) content_length = 0 pass except Exception as e: print e pass #if filetype in traffice == jar,class , pdf,flash, execute #save those files out.send(data) count = 0 except Exception as e: print e pass if count == time_out_max: break
assert nparsed == recved if p.is_message_complete(): print "message complete" else: print "message incomplete" print p.recv_body() recved = len(rsp_2) nparsed = p.execute(rsp_2, recved) assert nparsed == recved if p.is_message_complete(): print "message complete" print p.recv_body() print p.get_headers() else: print "message incomplete" print p.recv_body() print "--------------------" rsp_1 = "" with open("../testing/test2_response_part1.http", "r") as f: rsp_1 = f.readlines() rsp_1 = "".join(rsp_1) rsp_1 = rsp_1[:-2] rsp_2 = "" with open("../testing/test2_response_part2.http", "r") as f: rsp_2 = f.readlines() rsp_2 = "".join(rsp_2)
def run(self): self._log.info('Watch %s for new data.' % self.extension) while True: try: self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) if self.authmethod == "basic": self.client = ssl.wrap_socket(self.sock) elif self.authmethod == "cert": self.client = ssl.wrap_socket( self.sock, keyfile=self.config['client_cert_key_path'], certfile=self.config['client_cert_path']) else: raise KeyError('No authentication mechanisms defined') self._log.debug('Connecting to %s %i' % (self.host, self.port)) # self.client.settimeout(10) self.client.connect((self.host, self.port)) except socket.error as exc: self._log.exception('unable to connect to %s: %s' % (self.host, exc)) raise except KeyError: raise KeyError('No authentication mechanisms defined') if self.authhead is not None: self.client.send("GET %s HTTP/1.1\r\nHost: %s\r\n%s\r\n\r\n" % (self.extension, self.host, self.authhead)) else: self.client.send("GET %s HTTP/1.1\r\nHost: %s\r\n\r\n" % (self.extension, self.host)) readers = [self.client] writers = out_of_band = [] pending = b'' parser = HttpParser() self._log.debug("+") while not parser.is_headers_complete(): self._log.debug(".") try: chunk = self.client.recv(io.DEFAULT_BUFFER_SIZE) except socket.error as exc: err = exc.args[0] self._log.debug('a recv err (%s): %s' % (err, exc)) break if not chunk: self._log.exception('a No response from %s' % self.extension) break self._log.debug('a chunk %s' % chunk) nreceived = len(chunk) nparsed = parser.execute(chunk, nreceived) if nparsed != nreceived: self._log.exception('a nparsed %i != nreceived %i' % (nparsed, nreceived)) break self._log.debug('parser headers complete %s' % parser.get_headers()) while True: self._log.debug("-") try: readable, _, _ = select.select(readers, writers, out_of_band) except select.error as exc: self._log.debug("b select error: %s" % exc) if not readable: self._log.debug('b not readable') break try: chunk = self.client.recv(io.DEFAULT_BUFFER_SIZE) except socket.error as exc: err = exc.args[0] self._log.debug('b recv err (%s): %s' % (err, exc)) break if not chunk: self._log.debug('b not chunk') self.client.close() # pylint: disable=no-member break nreceived = len(chunk) self._log.debug('b chunk %s' % chunk) self._log.debug("repr: %s" % repr(chunk)) if re.match(r'0\r\n\r\n', chunk, re.M): self._log.debug('b end end end') break nparsed = parser.execute(chunk, nreceived) if nparsed != nreceived: self._log.exception('b nparsed %i != nreceived %i' % (nparsed, nreceived)) break data = pending + parser.recv_body() msg = "DATA: %s" % data self._log.debug(msg) lines = data.split(b'\n') pending = lines.pop(-1) for line in lines: trigger_payload = self._get_trigger_payload_from_line(line) if trigger_payload == 0: pass else: self._log.info('Triggering Dispatch Now') self._sensor_service.dispatch(trigger=self.TRIGGER_REF, payload=trigger_payload) self._log.debug('main loop done') self.client.close() # pylint: disable=no-member
class HTTPSession(base_object.BaseObject): _http_header = "" _method = "" _version = "" _req_obj = "" _user_agent = "User-Agent: COS-598C-Project-Client\r\n" _accept = "Accept: */*\r\n" _accept_enc = "Accept-Encoding: *\r\n" _accept_charset = "Accept-Charset: *\r\n" _host = "" _writer = "" _closeable = False _http_parser = "" _nr_bytes = 0 def __init__(self, method, req_obj, version): self._method = method self._req_obj = req_obj self._version = version self._http_parser = HttpParser() def _build_first_line(self): first_line = self._method + " " + self._req_obj + " " + self._version + "\r\n" return first_line def set_host(self, host): self._host = "Host: " + host + "\r\n" def set_writer(self, writer): self._writer = writer def write_response(self, data): recved = len(data) nparsed = self._http_parser.execute(data, recved) assert nparsed == recved self._nr_bytes += recved if self._http_parser.is_partial_body(): self._writer.write(str(self._http_parser.recv_body())) if self._http_parser.is_message_complete(): self._closeable = True return self._nr_bytes def get_response_headers(self): if self._http_parser.is_headers_complete(): return self._http_parser.get_headers() def closeable(self): return self._closeable def set_port(self, port): return def get_request(self): self._http_header = self._build_first_line()+\ self._host+\ self._user_agent+\ self._accept+\ self._accept_enc+\ self._accept_charset+\ "\r\n" return self._http_header
class ConnectionHandler: def __init__(self, connection, address, timeout): self.body_file ="" self.p = HttpParser() self.body = [] self.request_url = "" self.response_header = [] self.header_done = False self.url ="" self.controller = [] self.controller_ip = [] self.client = connection self.client_buffer = '' self.timeout = timeout self.method, self.path, self.protocol = self.get_base_header() if self.method=='CONNECT': self.method_CONNECT() elif self.method in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE'): self.method_others() self.client.close() self.target.close() #clear #print self.controller , self.controller_ip def get_base_header(self): while 1: self.client_buffer += self.client.recv(BUFLEN) end = self.client_buffer.find('\n') if end!=-1: break #We dont wann those google.com urls. if not "127.0.0.1" in self.client_buffer[:end]: #Insert Url into database here self.url = '%s'%self.client_buffer[:end] data = (self.client_buffer[:end+1]).split() self.client_buffer = self.client_buffer[end+1:] #print data return data def method_CONNECT(self): self._connect_target(self.path) self.client.send(HTTPVER+' 200 Connection established\n'+ 'Proxy-agent: %s\n\n'%VERSION) self.client_buffer = '' self._read_write() def method_others(self): self.path = self.path[7:] i = self.path.find('/') host = self.path[:i] path = self.path[i:] self._connect_target(host) self.target.send('%s %s %s\n'%(self.method, path, self.protocol)+ self.client_buffer) self.client_buffer = '' self._read_write() def _connect_target(self, host): i = host.find(':') if i!=-1: port = int(host[i+1:]) host = host[:i] #print host else: port = 80 try: (soc_family, _, _, _, address) = socket.getaddrinfo(host, port)[0] print "Adress is ",address self.target = socket.socket(soc_family) self.target.connect(address) except Exception as e: print "Error Connecting to:"+str(host)+":"+str(port) self.request_url = "Error Connecting to:"+str(host)+":"+str(port) # insert to db here #Concat data to string self.request_url = str(host)+" | "+str(address)+" | "+str(self.url) #debug #print self.request_url def _read_write(self): time_out_max = self.timeout/3 socs = [self.client, self.target] count = 0 while 1: count += 1 (recv, _, error) = select.select(socs, [], socs, 3) if error: break if recv: for in_ in recv: try: #print " Receving Data " data = in_.recv(84333) except Exception as e: print e pass if in_ is self.client: out = self.target else: out = self.client try: if data: #column 25 #Dig here to analysis the traffic #print data try: #Lets parse the data using http_parser modules recved = len(data) # #print "We received so far "+ str(recved) nparsed = self.p.execute(data, recved) assert nparsed == recved # Check if self.p.is_headers_complete() and not self.header_done: #Header is an ordered dictionary header_s = self.p.get_headers() # Lets beautify it and print it. for header, value in header_s.items(): #Print Response # Connection : close format res_header = header+": "+value self.response_header.append(res_header) self.header_done = True #Put header to Database. #Check if the boday is partial, if then append the body if self.p.is_partial_body(): self.body.append(self.p.recv_body()) #print "appending body" +self.p.recv_body() #Append data body recived to a list #print self.body # If the parsing of current request is compleated if self.p.is_message_complete(): try: try: content_length = self.p.get_headers()['content-length'] except Exception as e: print "Exception in Body retrive-sub"+str(e) content_length = 0 pass self.body_file = "".join(self.body) body_file_type = ms.buffer(self.body_file[:1024]) #print self.request_url #print self.response_header #print body_file_type print urlid update_traffic(urlid,self.request_url,self.response_header,body_file_type) except Exception as e: print "Exception in Body retrive"+str(e) content_length = 0 pass except Exception as e: print e pass #if filetype in traffice == jar,class , pdf,flash, execute #save those files out.send(data) count = 0 except Exception as e: print e pass if count == time_out_max: break