def __decode_data(resp: HTTPResponse): type = resp.getheader('Content-Encoding') if 'gzip' == type: from io import BytesIO import gzip buff = BytesIO(resp.read()) f = gzip.GzipFile(fileobj=buff) return f.read().decode('utf-8') elif 'br' == type: import brotli f = brotli.decompress(resp.read()) return f.decode('utf-8') else: return resp.read().decode('utf-8') # 返回的数据为byte类型,使用utf-8解码
def success(self, data: HTTPResponse) -> dict: return { 'code': data.status, 'reason': data.reason, 'headers': data.getheaders(), 'data': data.read() }
def _chunked_download( filename: str, response: HTTPResponse, size: int, progress_signal: pyqtSignal ) -> None: """ Downloads the zip file and emits progress. :param response: the HTTP response to download the file from :param size: the size of the download, in bytes; used for progress :param progress_signal: the PyQt signal with which to emit download progress """ if not size: size = 10 ** 6 * 44 with open(filename, "wb") as f: bytes_downloaded = 0 block_size = 1024 * 10 while True: buffer = response.read(block_size) if not buffer: break f.write(buffer) bytes_downloaded += block_size progress_signal.emit(bytes_downloaded / size * 100)
def from_raw(raw): # Raw is the output of curl, which is a full HTTP response. # But in the case of a redirect, it is multiple concatenated responses. # We want the final response, so we keep constructing new responses from this stream until we have reached the end while True: sock = FakeSocket(raw) response = HTTPResponse(sock) response.begin() response_len = sock.file.tell() + response.length raw_len = len(raw) if raw_len == response_len: break raw = raw[response_len:] raw_body = response.read() return Response( response.status, body=RawResponseBody(raw_body), seqno=int_or_none(response.getheader(CCF_TX_SEQNO_HEADER)), view=int_or_none(response.getheader(CCF_TX_VIEW_HEADER)), global_commit=int_or_none( response.getheader(CCF_GLOBAL_COMMIT_HEADER)), headers=response.headers, )
def processHttpResponse(http_response_text, key_enrich, value_enrich): return_http_response = bytearray(b'') response = None http_first_chars = b'' if (len(http_response_text) > 4): http_first_chars = http_response_text[:4] if (http_first_chars == b'HTTP'): response = HTTPResponse(HTTPResponseSocket(http_response_text)) response.begin() ##??? print (str(http_first_chars)) if (response != None): str_version = "HTTP/1.0" if (response.version == 11): str_version = "HTTP/1.1" line_text = (str_version + ' ' + str(response.status) + ' ' + str(response.reason) + '\r\n') return_http_response.extend(line_text.encode('utf-8')) return_http_response.extend( modify_http_header(response.getheaders(), key_enrich, value_enrich)) return_http_response.extend(response.read()) return return_http_response
def _extract_variables(self, data: bytes) -> Mapping[str, bytes]: """ Subclasses must implement this method, from the response, it parses it and returns a dictionary with variables and their respective values. All variables set in self.required_vars must be set in this method, or parse() will raise a FuzzowskiRuntimeError Args: data: The response bytes Returns: A dictionary with all required variables (and optionally others) """ response_vars = {} try: source = FakeSocket(data) response = HTTPResponse(source) response.begin() content_length = int(response.getheader('Content-Length')) body = response.read(content_length) json_body = json.loads(body) for var in self.required_vars + self.optional_vars: if var in json_body: response_vars[var] = bytes(json_body[var], encoding='utf-8') except json.decoder.JSONDecodeError: pass except Exception: raise return response_vars
def from_raw(raw): sock = FakeSocket(raw) response = HTTPResponse(sock) response.begin() raw_body = response.read(raw) ok = response.status == 200 content_type = response.headers.get("content-type") if content_type == "application/json": parsed_body = json.loads(raw_body) elif content_type == "text/plain": parsed_body = raw_body.decode() elif content_type is None: parsed_body = None else: raise ValueError(f"Unhandled content type: {content_type}") return Response( status=response.status, result=parsed_body if ok else None, error=None if ok else parsed_body, commit=int_or_none(response.getheader(CCF_COMMIT_HEADER)), term=int_or_none(response.getheader(CCF_TERM_HEADER)), global_commit=int_or_none( response.getheader(CCF_GLOBAL_COMMIT_HEADER)), )
def from_raw(raw): sock = FakeSocket(raw) response = HTTPResponse(sock) response.begin() raw_body = response.read(raw) content_type = response.headers.get("content-type") if content_type == "application/json": parsed_body = json.loads(raw_body) elif content_type == "text/plain": parsed_body = raw_body.decode() elif content_type is None: parsed_body = None else: raise ValueError(f"Unhandled content type: {content_type}") return Response( response.status, body=parsed_body, seqno=int_or_none(response.getheader(CCF_TX_SEQNO_HEADER)), view=int_or_none(response.getheader(CCF_TX_VIEW_HEADER)), global_commit=int_or_none( response.getheader(CCF_GLOBAL_COMMIT_HEADER)), headers=response.headers, )
def __init__(self, request, proxy_socket): HttpTransfer.__init__(self) self.request = request h = HTTPResponse(proxy_socket) h.begin() # HTTPResponse会将所有chunk拼接到一起,因此会直接得到所有内容,所以不能有Transfer-Encoding del h.msg['Transfer-Encoding'] del h.msg['Content-Length'] self.response_version = self.version_dict[h.version] self.status = h.status self.reason = h.reason self.set_headers(h.msg) self.decoding = None self.language = self.system = self.webserver = None try: data = h.read() body_data = self._decode_content_body( data, self.get_header('Content-Encoding')) except http.client.IncompleteRead: body_data = b'' except zlib.error: body_data = b'' except _socket.timeout: body_data = b'' self.set_body_data(body_data) self._text() # 尝试将文本进行解码 h.close() proxy_socket.close()
def get(addr, url, cert_checksum, user_agent=None, type=None): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(1) wrapped_socket = ssl.create_default_context().wrap_socket( sock, server_hostname=addr) try: wrapped_socket.connect((addr, 443)) except: response = None else: der_cert_bin = wrapped_socket.getpeercert(True) if sha256(der_cert_bin).hexdigest() != cert_checksum: raise Exception("Incorrect certificate checksum") request_header = b"GET " + url + b" HTTP/1.0\r\nHost: " + addr if user_agent: request_header += b"\r\nUser-Agent: " + user_agent request_header += b"\r\n\r\n" wrapped_socket.send(request_header) response = HTTPResponse(wrapped_socket) response.begin() if type == "json": if response.getheader( "Content-Type") != "application/json; charset=utf-8": raise Exception( "Content-Type isn't application/json; charset=utf-8") body = response.read() wrapped_socket.close() return response, body
def processHttpResponse(http_response_text, key_enrich, value_enrich): return_http_response = bytearray(b'') response = None http_first_chars = b'' if (len(http_response_text) > 4): http_first_chars = http_response_text[:4] if (http_first_chars == b'HTTP'): response = HTTPResponse(HTTPResponseSocket(http_response_text)) response.begin() ##??? print (str(http_first_chars)) if (response != None): str_version = "HTTP/1.0" if (response.version == 11): str_version = "HTTP/1.1" line_text = ( str_version + ' ' + str(response.status) + ' ' + str(response.reason) + '\r\n' ) return_http_response.extend(line_text.encode('utf-8')) return_http_response.extend( modify_http_header(response.getheaders(), key_enrich, value_enrich)) return_http_response.extend(response.read()) return return_http_response
def test_chunked_w_trailer() -> str: client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) client.connect((config.SERVER_ADDR, config.SERVER_PORT)) request_header = "POST /post/print.cgi HTTP/1.1\r\nHost: localhost\r\nTransfer-encoding: chunked\r\n\r\n" client.send(request_header.encode()) request_header = "5\r\ntest\n\r\n" client.send(request_header.encode()) request_header = "0\r\naccept-language: fr\r\ntest-header: blabla\r\n\r\n" client.send(request_header.encode()) # read and parse http response http_response = HTTPResponse(client) http_response.begin() if http_response.status != 226: return "Bad status code: {}, expected: {}".format( str(http_response.status), "226") if http_response.headers["Content-Type"] != "CGI/MINE": return "Bad content-type: {}, expected: {}".format( http_response.headers["Content-Type"], "CGI/MINE") body = http_response.read().decode("UTF-8") # print(body) if (body.find("HTTP_TEST_HEADER=blabla") == -1 or body.find("HTTP_ACCEPT_LANGUAGE=fr") == -1): return "Missing headers from request" if body.find("test") == -1: return "Missing content in request" # print(body) return ""
def from_string(data): """Parse http response to status code, headers and body Based on: https://stackoverflow.com/questions/24728088/python-parse-http-response-string """ class FakeSocket: """Fake socket to simulate received http response content""" def __init__(self, response_str): self._file = BytesIO(response_str.encode('utf-8')) def makefile(self, *args, **kwargs): """Fake file that provides string content""" # pylint: disable=unused-argument return self._file source = FakeSocket(data) response = HTTPResponse(source) response.begin() return ODataHttpResponse( response.getheaders(), response.status, response.read(len(data)) # the len here will give a 'big enough' value to read the whole content )
def _decode_json_response(r:HTTPResponse) -> Any: """ Decode JSON HTTP response """ charset = r.getheader('charset', 'utf-8') if r.headers.get_content_type() != 'application/json': return None return json.loads(r.read().decode(charset))
def do_COMMAND(self): log.info(f"{self.command} {self.path}") try: # Connect to destination self._connect_to_host() except Exception as e: self.send_error(500, str(e)) return # The request that got sent to the website: self # Browser <--> [Proxy <--> Website] content_length = int(self.headers.get("Content-Length", 0)) request_body = self.rfile.read(content_length) # Build request which will be sent to the client # [Browser <--> Proxy] <--> Website client_request = ( # Add "GET / HTTP/1.1..." to the request" b" ".join([ to_bytes(self.command), to_bytes(self.path), to_bytes(self.request_version), ]) + CRLF # Add Headers to the request (Host:..., User-Agent:...) + self.headers.as_bytes() + CRLF + request_body) # Send it down the pipe! self.server.send(to_bytes(client_request)) # Parse response h = HTTPResponse(self.server.conn) h.begin() # Get rid of hop-by-hop headers orig_response = h self.filter_headers(h.msg) # Time to relay the message across # read response body response_body = h.read() res = ( # HTTP/1.1 OK b" ".join([ to_bytes(self.request_version), to_bytes(h.status), to_bytes(h.reason), ]) # Content-Type, Content-Length, Server... + CRLF + h.msg.as_bytes() + CRLF + response_body) # Let's close off the remote end h.close() self.server.close() # Relay the message self.client.send(res) self.print_info(self, request_body, orig_response, response_body)
async def get_encoded_content(response: HTTPResponse) -> str: """This function is used to decode gzip and deflate responses. It also parses unencoded/plain text responses.""" content_encoding = response.headers.get("Content-Encoding") if content_encoding is None: content_encoding = "identity" if content_encoding == "identity": content_as_bytes = response.read() elif content_encoding == "gzip": content_as_bytes = await decode_from_gzip(response.read()) elif content_encoding == "deflate": content_as_bytes = await decode_from_deflate(response.read()) else: raise InvalidContentEncoding( f"Expected 'identity', 'gzip' or 'deflate', but got: {content_encoding}" ) return content_as_bytes.decode()
def _http_response_asserts(response: HTTPResponse, fuzz_data_logger): if response.status >= 500: fuzz_data_logger.log_fail("Status code higher or equal than 500!") if response.getheader("Content-Type") == "application/json": try: json.loads(response.read()) except ValueError: fuzz_data_logger.log_fail( "application/json body is not valid JSON structure")
def gather_download_links(page_url): html_string = '' try: response = request.urlopen(page_url) html_bytes = HTTPResponse.read(response) html_string = str(html_bytes.decode("utf-8")) except: print("error finding page") return '' return html_string
def handle_response(response: client.HTTPResponse) -> list: encoded_response_data = response.read() decoded_response_data = encoded_response_data.decode() dict_response = json.loads(decoded_response_data) results_key = 'results' results = [] if results_key in dict_response and dict_response[results_key] is not None: for result in dict_response[results_key]: results.append(result['url']) return results
def parse_response(self, response_text): """ Given a raw HTTP response, create a httplib.HTTResponse out of it. """ source = FakeSocket(response_text) response = HTTPResponse(source) response.begin() res = response.read() return (res, response.status)
def read_response(self, sock): response = HTTPResponse(sock) response.begin() content = response.read() status_code = response.status reason = response.reason headers = dict(response.getheaders()) response.close() return FakeResponse(status_code, reason, headers, content)
def should_drop_response(response): response = HTTPResponse(FakeSocket(response)) response.begin() content_type = response.getheader('Content-Type') for forbidden_type in TYPES_TO_FILTER: if forbidden_type in content_type: return True data = response.read() if is_source_code(data): return True return False
def from_http_client_response(obj: HTTPResponse) -> Response: body = obj.read() res = ResponseBuilder.from_dict( dict( statusCode=obj.getcode(), headers={k: v for k, v in obj.getheaders()}, body=body if isinstance(body, str) else body.decode("utf8") if isinstance(body, bytes) else None, )) ResponseBuilder.validate(res) return res
def parse_http_response(raw_bytes): """Pretty hacky way to parse an HTTP response using the python standard library http utilities. Probably should be replaced at some point. """ f = _FakeSocket(raw_bytes) response = HTTPResponse(f) response.begin() return Response( status_code=response.status, headers=dict(response.getheaders()), body=response.read(len(raw_bytes)), # Reads slightly too many bytes )
def parse(resp: HTTPResponse): try: # html = resp.read().decode() html = resp.read() charset = detect(html) except: # 如果网页编码不是utf-8或ISO8859-1,如gbk,gb2312 # 在响应头中存在-> Content-Type: text/html; charset=UTF-8 content_type = resp.headers.get('Content-Type') mime_type, charset = tuple(content_type.split(';')) print(mime_type, charset) if charset: html = resp.read().decode(encoding=charset) else: html = resp.read().decode(encoding='gbk') et = etree.HTML(html) # 通过xpath选择所有的段子 class = "article block untagged mb15 typs_long" article_divs = et.xpath( '//div[starts-with(@class, "article")]') # 默认情况选择48个Element(div) for article_div in article_divs: # 获取作者的信息 author = article_div.xpath('./div[1]//img') # Element->img if author: author_name = author[0].xpath('./@alt')[0] author_src = 'https:' + author[0].xpath('./@src')[0] # 获取段子文本数据 text = article_div.xpath('./a[1]//span/text()') # span内容可能包含<br>标签 text = ''.join(text) item = { 'author_name': author_name, 'author_photo': author_src, 'text': text } item_pipeline(**item)
def test_resolve_uri_to_response_ok_working_for_html(fake_socket): """Test that response_ok returns the file from resolve_uri as the body.""" from server import response_ok, resolve_uri if sys.version_info.major == 3: from http.client import HTTPResponse else: from httplib import HTTPResponse response_str = response_ok(*resolve_uri('/a_web_page.html')) source = fake_socket(response_str) response = HTTPResponse(source) response.begin() assert response.read(len(response_str)) == b"""<!DOCTYPE html>
def __init__(self, resp: HTTPResponse): self.code = resp.code self.res = json.loads(resp.read()) if self.code == 200: pass elif self.code == 401: raise ArukasAPINotAuthorizedException(self) elif self.code == 404: raise ArukasAPINotFoundException(self) elif self.code == 422: raise ArukasAPIParamError(self) else: raise ArukasAPIRemoteException(self)
def test_ok_response_body_is_there(fake_socket): """Test that request has a body.""" from server import response_ok if sys.version_info.major == 3: from http.client import HTTPResponse else: from httplib import HTTPResponse response_str = response_ok(b'htmlhtml', 'text/plain') source = fake_socket(response_str) response = HTTPResponse(source) response.begin() assert response.read(len(response_str)) == b'htmlhtml'
def process_file(filename): global file_cntr, resp_file_cntr, inspected_cntr, valid_cntr with open(filename, 'rb') as file: file_cntr += 1 #increment examined file counter data = file.read(100000000) #100MB #data = file.read() #there may be more than one response in one stream so bytes are splitten in #place of (for example) HTTP/1.1 200 which should indicate response start splitlist = re.split(b'(HTTP/\d.\d \d{3})', data) splitlistlen = len(splitlist) #length of split list #print(splitlist) #length have to be odd: # first item should be empty (bytes before first header) # second item is header (HTTP/1.1 200) # third item is rest of response # same for next responses if not (splitlistlen % 2): sys.stderr.write('Splitlist is even in {}\n'.format(filename)) exit(1) if splitlist[0] == data: #there is no HTTP request found at all return if splitlist[0] != b'': #there are some data before first response #sys.stderr.write('Data before first request in {}\n'.format(filename)) pass resp_file_cntr += 1 #for every response header for i in range(1, len(splitlist), 2): inspected_cntr += 1 #increment counter #join header with rest of the response, create fake socket from #response bytes and pass it into HTTPResponse constructor resp = HTTPResponse(FakeSocket(splitlist[i] + splitlist[i + 1])) payload = bytes() try: resp.begin() #HTTPResponse object have to be initialized payload = resp.read() except: continue ################################################## #now the response is parsed and ready valid_cntr += 1 #count_headers(resp.getheaders()) #content_type(resp.getheaders()) if payload != b'': process_payload(resp.getheaders(), payload)
def myProxy(self): if self.remote is None or self.lastHost != self.headers["Host"]: self.remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.remote.settimeout(RemoteTimeout) if ':' in self.headers["Host"]: port = int(self.headers["Host"].split(':')[1]) else: port = 80 self.remote.connect((self.headers["Host"].split(':')[0], port)) if not self.headers['Host'].startswith('http'):self.headers['Host'] = 'http://' + self.headers['Host'] if 'Cookie' in self.headers: self.sessionid = self.headers['Cookie'] open('.session', 'w').write(self.headers['Cookie']) # print sessionid else: sessionid = '' if self.requestline.startswith('HEAD /'): # Adjust 'HEAD /xxx' situation self.requestline = self.requestline.replace('HEAD ', 'HEAD ' + self.headers['Host']) self.remote.sendall(self.requestline.encode('ascii') + b"\r\n") headerstr = str(self.headers).replace("\r\n", "\n").replace("\n", "\r\n") self.remote.sendall(headerstr.encode('ascii', 'ignore') + b"\r\n") # Send Post data if self.command == 'POST': postdata = self.rfile.read(int(self.headers['Content-Length'])) self.remote.sendall(postdata) # if 'cardselect/savedeckcard' in self.requestline: # open('.carddeck', 'w').write(postdata) response = HTTPResponse(self.remote, method = self.command) response.begin() # Reply to the browser status = "HTTP/1.1 " + str(response.status) + " " + response.reason self.wfile.write(status.encode('ascii') + b'\r\n') hlist = [] for line in response.msg.headers: # Fixed multiple values of a same name if 'TRANSFER-ENCODING' not in line.upper(): hlist.append(line) self.wfile.write("".join(hlist) + b'\r\n') if self.command == "CONNECT" and response.status == 200: return self.transfer(self.remote, self.connection) elif self.command == "HEAD": pass else: r = '' while True: response_data = response.read(BufferSize) if not response_data: break self.wfile.write(response_data)
def from_raw(raw): sock = FakeSocket(raw) response = HTTPResponse(sock) response.begin() raw_body = response.read(raw) return Response( response.status, body=RawResponseBody(raw_body), seqno=int_or_none(response.getheader(CCF_TX_SEQNO_HEADER)), view=int_or_none(response.getheader(CCF_TX_VIEW_HEADER)), global_commit=int_or_none(response.getheader(CCF_GLOBAL_COMMIT_HEADER)), headers=response.headers, )
def get_content(response: HTTPResponse) -> str: """Get content from HTTP response. Handles gzipped content. :param HTTPResponse response: HTTP response :returns: HTTP response content :rtype: str """ content = response.read() if response.getheader('Content-encoding') == 'gzip': content = gzip.decompress(content) return content.decode()
def from_raw(raw): sock = FakeSocket(raw) response = HTTPResponse(sock) response.begin() raw_body = response.read(raw) ok = response.status == 200 return Response( status=response.status, result=json.loads(raw_body) if ok else None, error=None if ok else raw_body.decode(), commit=int_or_none(response.getheader(CCF_COMMIT_HEADER)), term=int_or_none(response.getheader(CCF_TERM_HEADER)), global_commit=int_or_none(response.getheader(CCF_GLOBAL_COMMIT_HEADER)), )
def http(self, verb, endpoint, **params): if Path(self.snapd).exists(): uri = f"{endpoint}?{urlencode(params)}" if params else endpoint host = socket.gethostname() with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s: s.connect(self.snapd) s.sendall( f"{verb.upper()} {uri} HTTP/1.1\r\nHost: {host}\r\n\r\n". encode()) response = HTTPResponse(s) response.begin() return json.loads(response.read()) else: return False
def __init__(self, request,proxy_socket): HttpTransfer.__init__(self) self.request = request h = HTTPResponse(proxy_socket) h.begin() ##HTTPResponse会将所有chunk拼接到一起,因此会直接得到所有内容,所以不能有Transfer-Encoding del h.msg['Transfer-Encoding'] del h.msg['Content-Length'] self.response_version =self.version_dict[h.version] self.status = h.status self.reason = h.reason self.set_headers(h.msg) body_data = self._decode_content_body(h.read(),self.get_header('Content-Encoding')) self.set_body_data(body_data) self._text()#尝试将文本进行解码 h.close() proxy_socket.close()
def read(self, *args): try: return HTTPResponse.read(self, *args) except (IncompleteRead) as e: return e.partial