def cache_check(self): # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler(self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist) self.cached_response = self.cache_handler.load() self.process_request()
def __init__(self, cache_file_name, root, client): self.path = '' self.id = "0" self.type = self.BOX_FOLDER self.modified_at = None self.size = 0 self.cache = CacheHandler(cache_file_name) self.root = root self.client = client
def main(): config = get_config() geocode_function = get_geocode_function(config) writer = None try: with CacheHandler(config['cache_location'], enabled=config['cache_enabled'], size_limit=config['cache_size'], eviction_policy=config['cache_eviction']) as cache: for current_df in config['input_ds'].iter_dataframes(chunksize=max(10000, config['batch_size'])): columns = current_df.columns.tolist() columns_to_append = [f['column'] for f in config['features'] if not f['column'] in columns] if columns_to_append: index = max(columns.index(config['lat_column']), columns.index(config['lng_column'])) current_df = current_df.reindex(columns=columns[:index + 1] + columns_to_append + columns[index + 1:], copy=False) if not config['batch_enabled']: results = zip(*current_df.apply(perform_geocode, axis=1, args=(config, geocode_function, cache))) for feature, result in zip(config['features'], results): current_df[feature['column']] = result else: batch = [] for i, row in current_df.iterrows(): if len(batch) == config['batch_size']: perform_geocode_batch(current_df, config, geocode_function, cache, batch) batch = [] lat = row[config['lat_column']] lng = row[config['lng_column']] try: if any([is_empty(row[f['column']]) for f in config['features']]): res = cache[(lat, lng)] else: res = {} for f in config['features']: res[f['name']] = row[f['column']] for feature in config['features']: current_df.loc[i, feature['column']] = res[feature['name']] except KeyError as e: batch.append((i, (lat, lng))) if len(batch) > 0: perform_geocode_batch(current_df, config, geocode_function, cache, batch) # First loop, we write the schema before creating the dataset writer if writer is None: config['output_ds'].write_schema_from_dataframe(current_df) writer = config['output_ds'].get_writer() writer.write_dataframe(current_df) finally: if writer is not None: writer.close()
def on_close(self): """ Called when websocket is closed. So handshake request-response pair along with websocket data as response body is saved """ # Required for cache_handler self.handshake_response = tornado.httpclient.HTTPResponse( self.handshake_request, self.upstream_connection.code, headers=self.upstream_connection.headers, request_time=0) # Procedure for dumping a tornado request-response self.cache_handler = CacheHandler(self.application.cache_dir, self.handshake_request, self.application.cookie_regex, self.application.cookie_blacklist) self.cached_response = self.cache_handler.load() self.cache_handler.dump(self.handshake_response)
def main(): config = get_config() geocode_function = get_geocode_function(config) input_df = config['input_ds'].get_dataframe() writer = None try: # Creating a fake or real cache depending on user's choice with CacheHandler(config['cache_location'], enabled=config['cache_enabled'], \ size_limit=config['cache_size'], eviction_policy=config['cache_eviction']) as cache: for current_df in config['input_ds'].iter_dataframes(chunksize=max(10000, config['batch_size'])): columns = current_df.columns.tolist() # Adding columns to the schema columns_to_append = [config[c] for c in ['latitude', 'longitude'] if not config[c] in columns] if columns_to_append: index = columns.index(config['address_column']) current_df = current_df.reindex(columns = columns[:index + 1] + columns_to_append + columns[index + 1:], copy=False) # Normal, 1 by 1 geocoding when batch is not enabled/available if not config['batch_enabled']: current_df[config['latitude']], current_df[config['longitude']] = \ zip(*current_df.apply(perform_geocode, axis=1, args=(config, geocode_function, cache))) # Batch creation and geocoding otherwise else: batch = [] for i, row in current_df.iterrows(): if len(batch) == config['batch_size']: perform_geocode_batch(current_df, config, geocode_function, cache, batch) batch = [] address = row[config['address_column']] try: if any([is_empty(row[config[c]]) for c in ['latitude', 'longitude']]): res = cache[address] else: res = [row[config[c]] for c in ['latitude', 'longitude']] current_df.loc[i, config['latitude']] = res[0] current_df.loc[i, config['longitude']] = res[1] except KeyError: batch.append((i, address)) if len(batch) > 0: perform_geocode_batch(current_df, config, geocode_function, cache, batch) # First loop, we write the schema before creating the dataset writer if writer is None: config['output_ds'].write_schema_from_dataframe(current_df) writer = config['output_ds'].get_writer() writer.write_dataframe(current_df) finally: if writer is not None: writer.close()
def cache_check(self): # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler( self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist ) self.cached_response = self.cache_handler.load() self.process_request()
def on_close(self): """Called when websocket is closed. So handshake request-response pair along with websocket data as response body is saved """ # Required for cache_handler self.handshake_response = tornado.httpclient.HTTPResponse( self.handshake_request, self.upstream_connection.code, headers=self.upstream_connection.headers, request_time=0) # Procedure for dumping a tornado request-response self.cache_handler = CacheHandler( self.application.cache_dir, self.handshake_request, self.application.cookie_regex, self.application.cookie_blacklist) self.cached_response = self.cache_handler.load() self.cache_handler.dump(self.handshake_response)
def get(self): """ * This function handles all requests except the connect request. * Once ssl stream is formed between browser and proxy, the requests are then processed by this function """ # The flow starts here self.request.response_buffer = '' # The requests that come through ssl streams are relative requests, so transparent # proxying is required. The following snippet decides the url that should be passed # to the async client if self.request.uri.startswith(self.request.protocol,0): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host + self.request.uri # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler( self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist ) request_hash = yield tornado.gen.Task(self.cache_handler.calculate_hash) self.cached_response = self.cache_handler.load() if self.cached_response: if self.cached_response.body: self.write(self.cached_response.body) self.finish_response(self.cached_response) else: # Request header cleaning for header in restricted_request_headers: try: del self.request.headers[header] except: continue # HTTP auth if exists http_auth_username = None http_auth_password = None http_auth_mode = None if self.application.http_auth: # HTTP AUTH settings host = self.request.host # If default ports are not provided, they are added try: test = self.request.host.index(':') except ValueError: default_ports = {'http':'80', 'https':'443'} try: host = self.request.host + ':' + default_ports[self.request.protocol] except KeyError: pass # Check if auth is provided for that host try: index = self.application.http_auth_hosts.index(host) http_auth_username = self.application.http_auth_usernames[index] http_auth_password = self.application.http_auth_passwords[index] http_auth_mode = self.application.http_auth_modes[index] except ValueError: pass # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() # httprequest object is created and then passed to async client with a callback request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=self.request.body if self.request.body else None, headers=self.request.headers, auth_username=http_auth_username, auth_password=http_auth_password, auth_mode=http_auth_mode, follow_redirects=False, use_gzip=True, streaming_callback=self.handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, allow_nonstandard_methods=True, prepare_curl_callback=prepare_curl_callback if self.application.outbound_proxy_type == "socks"\ else None, # socks callback function validate_cert=False) try: response = yield tornado.gen.Task(async_client.fetch, request) except Exception: pass # Request retries for i in range(0,3): if response.code in [408, 599]: self.request.response_buffer = '' response = yield tornado.gen.Task(async_client.fetch, request) else: break self.finish_response(response) # Cache the response after finishing the response, so caching time is not included in response time self.cache_handler.dump(response)
class ProxyHandler(tornado.web.RequestHandler): """ This RequestHandler processes all the requests that the application received """ SUPPORTED_METHODS = [ 'GET', 'POST', 'CONNECT', 'HEAD', 'PUT', 'DELETE', 'OPTIONS', 'TRACE' ] def __new__(cls, application, request, **kwargs): # http://stackoverflow.com/questions/3209233/how-to-replace-an-instance-in-init-with-a-different-object # Based on upgrade header, websocket request handler must be used try: if request.headers['Upgrade'].lower() == 'websocket': return CustomWebSocketHandler(application, request, **kwargs) except KeyError: pass return tornado.web.RequestHandler.__new__(cls, application, request, **kwargs) def set_status(self, status_code, reason=None): """ Sets the status code for our response. Overriding is done so as to handle unknown response codes gracefully. """ self._status_code = status_code if reason is not None: self._reason = tornado.escape.native_str(reason) else: try: self._reason = tornado.httputil.responses[status_code] except KeyError: self._reason = tornado.escape.native_str("Server Not Found") def calculate_delay(self, response): self.application.throttle_variables["hosts"][ self.request.host]["request_times"].append(response.request_time) if len(self.application.throttle_variables["hosts"][ self.request.host]) > 20: self.application.throttle_variables["hosts"][ self.request.host]["request_times"].pop(0) response_times = self.application.throttle_variables["hosts"][ self.request.host]["request_times"] last_ten = sum( response_times[:int(len(response_times) / 2)]) / int( len(response_times) / 2) second_last_ten = sum( response_times[int(len(response_times) / 2):]) / ( len(response_times) - int(len(response_times) / 2)) if round(last_ten - second_last_ten, 3) > self.application.throttle_variables["threshold"]: self.application.throttle_variables["hosts"][ self.request.host]["delay"] = round( last_ten - second_last_ten, 3) else: self.application.throttle_variables["hosts"][ self.request.host]["delay"] = 0 # This function is a callback after the async client gets the full response # This method will be improvised with more headers from original responses def handle_response(self, response): if self.application.throttle_variables: self.calculate_delay(response) if response.code in [408, 599]: try: old_count = self.request.retries self.request.retries = old_count + 1 except AttributeError: self.request.retries = 1 finally: if self.request.retries < 3: self.request.response_buffer = '' self.clear() self.process_request() else: self.write_response(response) else: self.write_response(response) # This function writes a new response & caches it def write_response(self, response): self.set_status(response.code) del self._headers['Server'] for header, value in list(response.headers.items()): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_response_headers: self.set_header(header, value) if self.request.response_buffer: self.cache_handler.dump(response) self.finish() # This function handles a dummy response object which is created from cache def write_cached_response(self, response): self.set_status(response.code) for header, value in response.headers.items(): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_response_headers: self.set_header(header, value) self.write(response.body) self.finish() # This function is a callback when a small chunk is received def handle_data_chunk(self, data): if data: self.write(data) self.request.response_buffer += data # This function creates and makes the request to upstream server def process_request(self): if self.cached_response: self.write_cached_response(self.cached_response) else: # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() # httprequest object is created and then passed to async client with a callback request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=self.request.body, headers=self.request.headers, follow_redirects=False, use_gzip=True, streaming_callback=self.handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, allow_nonstandard_methods=True, validate_cert=False) try: async_client.fetch(request, callback=self.handle_response) except Exception: pass def cache_check(self): # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler(self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist) self.cached_response = self.cache_handler.load() self.process_request() @tornado.web.asynchronous def get(self): """ * This function handles all requests except the connect request. * Once ssl stream is formed between browser and proxy, the requests are then processed by this function """ # The flow starts here self.request.response_buffer = '' # Request header cleaning for header in restricted_request_headers: try: del self.request.headers[header] except: continue # The requests that come through ssl streams are relative requests, so transparent # proxying is required. The following snippet decides the url that should be passed # to the async client if self.request.uri.startswith(self.request.protocol, 0): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host + self.request.uri if self.application.throttle_variables: try: throttle_delay = self.application.throttle_variables["hosts"][ self.request.host]["delay"] except KeyError: self.application.throttle_variables["hosts"][ self.request.host] = { "request_times": [], "delay": 0 } throttle_delay = 0 finally: if throttle_delay == 0: self.cache_check() else: tornado.ioloop.IOLoop.instance().add_timeout( datetime.timedelta(seconds=throttle_delay), self.cache_check) else: self.cache_check() # The following 5 methods can be handled through the above implementation @tornado.web.asynchronous def post(self): return self.get() @tornado.web.asynchronous def head(self): return self.get() @tornado.web.asynchronous def put(self): return self.get() @tornado.web.asynchronous def delete(self): return self.get() @tornado.web.asynchronous def options(self): return self.get() @tornado.web.asynchronous def trace(self): return self.get() @tornado.web.asynchronous def connect(self): """ This function gets called when a connect request is received. * The host and port are obtained from the request uri * A socket is created, wrapped in ssl and then added to SSLIOStream * This stream is used to connect to speak to the remote host on given port * If the server speaks ssl on that port, callback start_tunnel is called * An OK response is written back to client * The client side socket is wrapped in ssl * If the wrapping is successful, a new SSLIOStream is made using that socket * The stream is added back to the server for monitoring """ host, port = self.request.uri.split(':') def start_tunnel(): try: self.request.connection.stream.write( b"HTTP/1.1 200 Connection established\r\n\r\n") wrap_socket(self.request.connection.stream.socket, host, self.application.ca_cert, self.application.ca_key, self.application.certs_folder, success=ssl_success) except tornado.iostream.StreamClosedError: pass def ssl_success(client_socket): client = tornado.iostream.SSLIOStream(client_socket) server.handle_stream(client, self.application.inbound_ip) # Tiny Hack to satisfy proxychains CONNECT request to HTTP port. # HTTPS fail check has to be improvised #def ssl_fail(): # self.request.connection.stream.write(b"HTTP/1.1 200 Connection established\r\n\r\n") # server.handle_stream(self.request.connection.stream, self.application.inbound_ip) # Hacking to be done here, so as to check for ssl using proxy and auth try: s = ssl.wrap_socket( socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)) upstream = tornado.iostream.SSLIOStream(s) #start_tunnel() #upstream.set_close_callback(ssl_fail) upstream.connect((host, int(port)), start_tunnel) except Exception: self.finish()
class ProxyHandler(tornado.web.RequestHandler): """ This RequestHandler processes all the requests that the application received """ SUPPORTED_METHODS = ['GET', 'POST', 'CONNECT', 'HEAD', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'] def __new__(cls, application, request, **kwargs): # http://stackoverflow.com/questions/3209233/how-to-replace-an-instance-in-init-with-a-different-object # Based on upgrade header, websocket request handler must be used try: if request.headers['Upgrade'].lower() == 'websocket': return CustomWebSocketHandler(application, request, **kwargs) except KeyError: pass return tornado.web.RequestHandler.__new__(cls, application, request, **kwargs) def set_status(self, status_code, reason=None): """ Sets the status code for our response. Overriding is done so as to handle unknown response codes gracefully. """ self._status_code = status_code if reason is not None: self._reason = tornado.escape.native_str(reason) else: try: self._reason = tornado.httputil.responses[status_code] except KeyError: self._reason = tornado.escape.native_str("Server Not Found") def calculate_delay(self, response): self.application.throttle_variables["hosts"][self.request.host]["request_times"].append(response.request_time) if len(self.application.throttle_variables["hosts"][self.request.host]) > 20: self.application.throttle_variables["hosts"][self.request.host]["request_times"].pop(0) response_times = self.application.throttle_variables["hosts"][self.request.host]["request_times"] last_ten = sum(response_times[:int(len(response_times)/2)])/int(len(response_times)/2) second_last_ten = sum(response_times[int(len(response_times)/2):])/(len(response_times)-int(len(response_times)/2)) if round(last_ten - second_last_ten, 3) > self.application.throttle_variables["threshold"]: self.application.throttle_variables["hosts"][self.request.host]["delay"] = round(last_ten - second_last_ten, 3) else: self.application.throttle_variables["hosts"][self.request.host]["delay"] = 0 # This function is a callback after the async client gets the full response # This method will be improvised with more headers from original responses def handle_response(self, response): if self.application.throttle_variables: self.calculate_delay(response) if response.code in [408, 599]: try: old_count = self.request.retries self.request.retries = old_count + 1 except AttributeError: self.request.retries = 1 finally: if self.request.retries < 3: self.request.response_buffer = '' self.clear() self.process_request() else: self.write_response(response) else: self.write_response(response) # This function writes a new response & caches it def write_response(self, response): self.set_status(response.code) del self._headers['Server'] for header, value in list(response.headers.items()): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_response_headers: self.set_header(header, value) if self.request.response_buffer: self.cache_handler.dump(response) self.finish() # This function handles a dummy response object which is created from cache def write_cached_response(self, response): self.set_status(response.code) for header, value in response.headers.items(): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_response_headers: self.set_header(header, value) self.write(response.body) self.finish() # This function is a callback when a small chunk is received def handle_data_chunk(self, data): if data: self.write(data) self.request.response_buffer += data # This function creates and makes the request to upstream server def process_request(self): if self.cached_response: self.write_cached_response(self.cached_response) else: # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() # httprequest object is created and then passed to async client with a callback request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=self.request.body, headers=self.request.headers, follow_redirects=False, use_gzip=True, streaming_callback=self.handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, allow_nonstandard_methods=True, validate_cert=False) try: async_client.fetch(request, callback=self.handle_response) except Exception: pass def cache_check(self): # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler( self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist ) self.cached_response = self.cache_handler.load() self.process_request() @tornado.web.asynchronous def get(self): """ * This function handles all requests except the connect request. * Once ssl stream is formed between browser and proxy, the requests are then processed by this function """ # The flow starts here self.request.response_buffer = '' # Request header cleaning for header in restricted_request_headers: try: del self.request.headers[header] except: continue # The requests that come through ssl streams are relative requests, so transparent # proxying is required. The following snippet decides the url that should be passed # to the async client if self.request.uri.startswith(self.request.protocol,0): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host + self.request.uri if self.application.throttle_variables: try: throttle_delay = self.application.throttle_variables["hosts"][self.request.host]["delay"] except KeyError: self.application.throttle_variables["hosts"][self.request.host] = {"request_times":[], "delay":0} throttle_delay = 0 finally: if throttle_delay == 0: self.cache_check() else: tornado.ioloop.IOLoop.instance().add_timeout(datetime.timedelta(seconds=throttle_delay), self.cache_check) else: self.cache_check() # The following 5 methods can be handled through the above implementation @tornado.web.asynchronous def post(self): return self.get() @tornado.web.asynchronous def head(self): return self.get() @tornado.web.asynchronous def put(self): return self.get() @tornado.web.asynchronous def delete(self): return self.get() @tornado.web.asynchronous def options(self): return self.get() @tornado.web.asynchronous def trace(self): return self.get() @tornado.web.asynchronous def connect(self): """ This function gets called when a connect request is received. * The host and port are obtained from the request uri * A socket is created, wrapped in ssl and then added to SSLIOStream * This stream is used to connect to speak to the remote host on given port * If the server speaks ssl on that port, callback start_tunnel is called * An OK response is written back to client * The client side socket is wrapped in ssl * If the wrapping is successful, a new SSLIOStream is made using that socket * The stream is added back to the server for monitoring """ host, port = self.request.uri.split(':') def start_tunnel(): try: self.request.connection.stream.write(b"HTTP/1.1 200 Connection established\r\n\r\n") wrap_socket( self.request.connection.stream.socket, host, self.application.ca_cert, self.application.ca_key, self.application.certs_folder, success=ssl_success ) except tornado.iostream.StreamClosedError: pass def ssl_success(client_socket): client = tornado.iostream.SSLIOStream(client_socket) server.handle_stream(client, self.application.inbound_ip) # Tiny Hack to satisfy proxychains CONNECT request to HTTP port. # HTTPS fail check has to be improvised #def ssl_fail(): # self.request.connection.stream.write(b"HTTP/1.1 200 Connection established\r\n\r\n") # server.handle_stream(self.request.connection.stream, self.application.inbound_ip) # Hacking to be done here, so as to check for ssl using proxy and auth try: s = ssl.wrap_socket(socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)) upstream = tornado.iostream.SSLIOStream(s) #start_tunnel() #upstream.set_close_callback(ssl_fail) upstream.connect((host, int(port)), start_tunnel) except Exception: self.finish()
for feature in config['features']: df.loc[i, feature['column']] = res[feature['name']] except Exception as e: logging.error("Failed to geocode %s (%s)" % (loc, e)) if __name__ == '__main__': config = get_config() geocode_function = get_geocode_function(config) input_df = config['input_ds'].get_dataframe() writer = None try: with CacheHandler(config['cache_location'], enabled=config['cache_enabled'], \ size_limit=config['cache_size'], eviction_policy=config['cache_eviction']) as cache: for current_df in config['input_ds'].iter_dataframes(chunksize=max(10000, config['batch_size'])): columns = current_df.columns.tolist() columns_to_append = [f['column'] for f in config['features'] if not f['column'] in columns] if columns_to_append: index = max(columns.index(config['lat_column']), columns.index(config['lng_column'])) current_df = current_df.reindex(columns = columns[:index + 1] + columns_to_append + columns[index + 1:], copy=False) if not config['batch_enabled']: results = zip(*current_df.apply(perform_geocode, axis=1, args=(config, geocode_function, cache))) for feature, result in zip(config['features'], results): current_df[feature['column']] = result else:
from throttle import Throttle throttle = Throttle() except ImportError: throttle = None app = Flask(__name__) app.config.from_object('config.ConfigProduction') cache = Cache(app, config={'CACHE_TYPE': 'simple'}) sentry = Sentry(app) # db.init_app(app) # JL HACK ~ disable mysql # Optional Redis cache, for caching Google spreadsheet campaign overrides cache_handler = CacheHandler(app.config['REDIS_URL']) # FFTF Leaderboard handler. Only used if FFTF Leadboard params are passed in leaderboard = FFTFLeaderboard(app.debug, app.config['FFTF_LB_ASYNC_POOL_SIZE'], app.config['FFTF_CALL_LOG_API_KEY']) call_methods = ['GET', 'POST'] data = PoliticalData(cache_handler, app.debug) print "Call Congress is starting up!" def make_cache_key(*args, **kwargs): path = request.path args = str(hash(frozenset(request.args.items())))
class ProxyHandler(tornado.web.RequestHandler): """ This RequestHandler processes all the requests that the application recieves """ SUPPORTED_METHODS = ['GET', 'POST', 'CONNECT', 'HEAD', 'PUT', 'DELETE', 'OPTIONS'] def set_status(self, status_code, reason=None): """Sets the status code for our response. Overriding is done so as to handle unknown response codes gracefully. """ self._status_code = status_code if reason is not None: self._reason = tornado.escape.native_str(reason) else: try: self._reason = tornado.httputil.responses[status_code] except KeyError: self._reason = tornado.escape.native_str("Server Not Found") @tornado.web.asynchronous def get(self): """ * This function handles all requests except the connect request. * Once ssl stream is formed between browser and proxy, the requests are then processed by this function """ # Data for handling headers through a streaming callback self.request.response_buffer = '' restricted_headers = ['Content-Length', 'Content-Encoding', 'Etag', 'Transfer-Encoding', 'Connection', 'Vary', 'Accept-Ranges', 'Pragma'] # This function is a callback after the async client gets the full response # This method will be improvised with more headers from original responses def handle_response(response): self.set_status(response.code) for header, value in list(response.headers.items()): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_headers: self.set_header(header, value) if self.request.response_buffer: self.cache_handler.dump(response) else: self.write(response.body) self.finish() # This function is a callback when a small chunk is recieved def handle_data_chunk(data): if data: self.write(data) self.request.response_buffer += data # More headers are to be removed for header in ('Connection', 'Pragma', 'Cache-Control'): try: del self.request.headers[header] except: continue # The requests that come through ssl streams are relative requests, so transparent # proxying is required. The following snippet decides the url that should be passed # to the async client if self.request.host in self.request.uri.split('/'): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host + self.request.uri # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler(self.application.cache_dir, self.request) cached_response = self.cache_handler.load() #if cached_response: #handle_response(cached_response) # httprequest object is created and then passed to async client with a callback # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=self.request.body, headers=self.request.headers, follow_redirects=False, use_gzip=True, streaming_callback=handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, allow_nonstandard_methods=True, validate_cert=False) try: async_client.fetch(request, callback=handle_response) except Exception: pass # The following 5 methods can be handled through the above implementation @tornado.web.asynchronous def post(self): return self.get() @tornado.web.asynchronous def head(self): return self.get() @tornado.web.asynchronous def put(self): return self.get() @tornado.web.asynchronous def delete(self): return self.get() @tornado.web.asynchronous def options(self): return self.get() @tornado.web.asynchronous def connect(self): """ This function gets called when a connect request is recieved. * The host and port are obtained from the request uri * A socket is created, wrapped in ssl and then added to SSLIOStream * This stream is used to connect to speak to the remote host on given port * If the server speaks ssl on that port, callback start_tunnel is called * An OK response is written back to client * The client side socket is wrapped in ssl * If the wrapping is successful, a new SSLIOStream is made using that socket * The stream is added back to the server for monitoring """ host, port = self.request.uri.split(':') def start_tunnel(): try: self.request.connection.stream.write(b"HTTP/1.1 200 OK CONNECTION ESTABLISHED\r\n\r\n") wrap_socket(self.request.connection.stream.socket, host, success=ssl_success) except tornado.iostream.StreamClosedError: pass def ssl_success(client_socket): client = tornado.iostream.SSLIOStream(client_socket) server.handle_stream(client, self.application.inbound_ip) # lint:ok try: s = ssl.wrap_socket(socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)) upstream = tornado.iostream.SSLIOStream(s) upstream.connect((host, int(port)), start_tunnel) except Exception: self.write(b"Server Not Found") self.finish()
class CustomWebSocketHandler(tornado.websocket.WebSocketHandler): """ * See docs XD * This class is used for handling websocket traffic. * Object of this class replaces the main request handler for a request with header => "Upgrade: websocket" * wss:// - CONNECT request is handled by main handler """ def upstream_connect(self, io_loop=None, callback=None): """ Implemented as a custom alternative to tornado.websocket.websocket_connect """ # io_loop is needed, how else will it work with tornado :P if io_loop is None: io_loop = tornado.ioloop.IOLoop.current() # During secure communication, we get relative URI, so make them absolute if self.request.uri.startswith(self.request.protocol, 0): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host + self.request.uri # WebSocketClientConnection expects ws:// & wss:// self.request.url = self.request.url.replace("http", "ws", 1) # Have to add cookies and stuff request_headers = tornado.httputil.HTTPHeaders() for name, value in self.request.headers.iteritems(): if name not in restricted_request_headers: request_headers.add(name, value) # Build a custom request request = tornado.httpclient.HTTPRequest( url=self.request.url, headers=request_headers, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password) self.upstream_connection = CustomWebSocketClientConnection( io_loop, request) if callback is not None: io_loop.add_future(self.upstream_connection.connect_future, callback) return self.upstream_connection.connect_future # This returns a future def _execute(self, transforms, *args, **kwargs): """ Overriding of a method of WebSocketHandler """ def start_tunnel(future): """ A callback which is called when connection to url is successful """ self.upstream = future.result( ) # We need upstream to write further messages self.handshake_request = self.upstream_connection.request # HTTPRequest needed for caching :P self.handshake_request.response_buffer = "" # Needed for websocket data & compliance with cache_handler stuff self.handshake_request.version = "HTTP/1.1" # Tiny hack to protect caching (But according to websocket standards) self.handshake_request.body = self.handshake_request.body or "" # I dont know why a None is coming :P tornado.websocket.WebSocketHandler._execute( self, transforms, *args, **kwargs) # The regular procedures are to be done # We try to connect to provided URL & then we proceed with connection on client side. self.upstream = self.upstream_connect(callback=start_tunnel) def store_upstream_data(self, message): """ Save websocket data sent from client to server, i.e add it to HTTPRequest.response_buffer with direction (>>) """ try: # Cannot write binary content as a string, so catch it self.handshake_request.response_buffer += (">>> %s\r\n" % (message)) except TypeError: self.handshake_request.response_buffer += (">>> May be binary\r\n") def store_downstream_data(self, message): """ Save websocket data sent from client to server, i.e add it to HTTPRequest.response_buffer with direction (<<) """ try: # Cannot write binary content as a string, so catch it self.handshake_request.response_buffer += ("<<< %s\r\n" % (message)) except TypeError: self.handshake_request.response_buffer += ("<<< May be binary\r\n") def on_message(self, message): """ Everytime a message is received from client side, this instance method is called """ self.upstream.write_message( message) # The obtained message is written to upstream self.store_upstream_data(message) # The following check ensures that if a callback is added for reading message from upstream, another one is not added if not self.upstream.read_future: self.upstream.read_message( callback=self.on_response ) # A callback is added to read the data when upstream responds def on_response(self, message): """ A callback when a message is recieved from upstream *** Here message is a future """ # The following check ensures that if a callback is added for reading message from upstream, another one is not added if not self.upstream.read_future: self.upstream.read_message(callback=self.on_response) if self.ws_connection: # Check if connection still exists if message.result( ): # Check if it is not NULL ( Indirect checking of upstream connection ) self.write_message( message.result()) # Write obtained message to client self.store_downstream_data(message.result()) else: self.close() def on_close(self): """ Called when websocket is closed. So handshake request-response pair along with websocket data as response body is saved """ # Required for cache_handler self.handshake_response = tornado.httpclient.HTTPResponse( self.handshake_request, self.upstream_connection.code, headers=self.upstream_connection.headers, request_time=0) # Procedure for dumping a tornado request-response self.cache_handler = CacheHandler(self.application.cache_dir, self.handshake_request, self.application.cookie_regex, self.application.cookie_blacklist) self.cached_response = self.cache_handler.load() self.cache_handler.dump(self.handshake_response)
import json from flask import Flask, request, jsonify from flask_cors import CORS from cache_handler import CacheHandler from slugify import slugify from db_controller import DbController app = Flask(__name__) db = CacheHandler() app.config["DEBUG"] = True CORS(app) @app.route("/test") def test(): return "test" @app.route("/search") def get_data(): keys = db.get_keys() source = slugify(request.args.get("source", ""), separator="-") destination = slugify(request.args.get("destination", ""), separator="-") price = int(request.args.get("price", "9999999999999")) if source: keys = filter(lambda key: key.find(source) == 9, keys) if destination: keys = filter(lambda key: key.find(destination) > 9, keys)
def get(self): """Handle all requests except the connect request. Once ssl stream is formed between browser and proxy, the requests are then processed by this function. """ # The flow starts here self.request.local_timestamp = datetime.datetime.now() self.request.response_buffer = '' # The requests that come through ssl streams are relative requests, so transparent proxying is required. The # following snippet decides the url that should be passed to the async client if self.request.uri.startswith(self.request.protocol, 0): # Normal Proxy Request. self.request.url = self.request.uri else: # Transparent Proxy Request. self.request.url = self.request.protocol + "://" + self.request.host if self.request.uri != '/': # Add uri only if needed. self.request.url += self.request.uri # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler( self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist) request_hash = yield tornado.gen.Task(self.cache_handler.calculate_hash) self.cached_response = self.cache_handler.load() if self.cached_response: if self.cached_response.body: self.write(self.cached_response.body) self.finish_response(self.cached_response) else: # Request header cleaning for header in ProxyHandler.restricted_request_headers: try: del self.request.headers[header] except: continue # HTTP auth if exists http_auth_username = None http_auth_password = None http_auth_mode = None if self.application.http_auth: host = self.request.host # If default ports are not provided, they are added if ':' not in self.request.host: default_ports = {'http': '80', 'https': '443'} if self.request.protocol in default_ports: host = self.request.host + ':' + default_ports[self.request.protocol] # Check if auth is provided for that host try: index = self.application.http_auth_hosts.index(host) http_auth_username = self.application.http_auth_usernames[index] http_auth_password = self.application.http_auth_passwords[index] http_auth_mode = self.application.http_auth_modes[index] except ValueError: pass # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() # httprequest object is created and then passed to async client with a callback success_response = False # is used to check the response in the botnet mode while not success_response: # Proxy Switching (botnet_mode) code if self.application.proxy_manager: proxy = self.application.proxy_manager.get_next_available_proxy() self.application.outbound_ip = proxy["proxy"][0] self.application.outbound_port = int(proxy["proxy"][1]) # httprequest object is created and then passed to async client with a callback callback = None if self.application.outbound_proxy_type == 'socks': callback = prepare_curl_callback # socks callback function. body = self.request.body or None request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=body, headers=self.request.headers, auth_username=http_auth_username, auth_password=http_auth_password, auth_mode=http_auth_mode, follow_redirects=False, use_gzip=True, streaming_callback=self.handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, allow_nonstandard_methods=True, prepare_curl_callback=callback, validate_cert=False) try: response = yield tornado.gen.Task(async_client.fetch, request) except Exception: response = None pass # Request retries for i in range(0, 3): if (response is None) or response.code in [408, 599]: self.request.response_buffer = '' response = yield tornado.gen.Task(async_client.fetch, request) else: success_response = True break # Botnet mode code (proxy switching). # Checking the status of the proxy (asynchronous). if self.application.proxy_manager and not success_response: proxy_check_req = tornado.httpclient.HTTPRequest( url=self.application.proxy_manager.testing_url, # testing url is google.com. use_gzip=True, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, prepare_curl_callback=callback, # socks callback function. validate_cert=False) try: proxy_check_resp = yield tornado.gen.Task(async_client.fetch, proxy_check_req) except Exception: pass if proxy_check_resp.code != 200: self.application.proxy_manager.remove_proxy(proxy["index"]) else: success_response = True else: success_response = True self.finish_response(response) # Cache the response after finishing the response, so caching time is not included in response time self.cache_handler.dump(response)
def get(self): """ * This function handles all requests except the connect request. * Once ssl stream is formed between browser and proxy, the requests are then processed by this function """ # Data for handling headers through a streaming callback self.request.response_buffer = '' restricted_headers = ['Content-Length', 'Content-Encoding', 'Etag', 'Transfer-Encoding', 'Connection', 'Vary', 'Accept-Ranges', 'Pragma'] # This function is a callback after the async client gets the full response # This method will be improvised with more headers from original responses def handle_response(response): self.set_status(response.code) for header, value in list(response.headers.items()): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_headers: self.set_header(header, value) if self.request.response_buffer: self.cache_handler.dump(response) else: self.write(response.body) self.finish() # This function is a callback when a small chunk is recieved def handle_data_chunk(data): if data: self.write(data) self.request.response_buffer += data # More headers are to be removed for header in ('Connection', 'Pragma', 'Cache-Control'): try: del self.request.headers[header] except: continue # The requests that come through ssl streams are relative requests, so transparent # proxying is required. The following snippet decides the url that should be passed # to the async client if self.request.host in self.request.uri.split('/'): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host + self.request.uri # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler(self.application.cache_dir, self.request) cached_response = self.cache_handler.load() #if cached_response: #handle_response(cached_response) # httprequest object is created and then passed to async client with a callback # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=self.request.body, headers=self.request.headers, follow_redirects=False, use_gzip=True, streaming_callback=handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, allow_nonstandard_methods=True, validate_cert=False) try: async_client.fetch(request, callback=handle_response) except Exception: pass
class CustomWebSocketHandler(tornado.websocket.WebSocketHandler): """ * See docs XD * This class is used for handling websocket traffic. * Object of this class replaces the main request handler for a request with header => "Upgrade: websocket" * wss:// - CONNECT request is handled by main handler """ def upstream_connect(self, io_loop=None, callback=None): """ Implemented as a custom alternative to tornado.websocket.websocket_connect """ # io_loop is needed, how else will it work with tornado :P if io_loop is None: io_loop = tornado.ioloop.IOLoop.current() # During secure communication, we get relative URI, so make them absolute if self.request.uri.startswith(self.request.protocol,0): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host + self.request.uri # WebSocketClientConnection expects ws:// & wss:// self.request.url = self.request.url.replace("http", "ws", 1) # Have to add cookies and stuff request_headers = tornado.httputil.HTTPHeaders() for name, value in self.request.headers.iteritems(): if name not in restricted_request_headers: request_headers.add(name, value) # Build a custom request request = tornado.httpclient.HTTPRequest( url=self.request.url, headers=request_headers, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password ) self.upstream_connection = CustomWebSocketClientConnection(io_loop, request) if callback is not None: io_loop.add_future(self.upstream_connection.connect_future, callback) return self.upstream_connection.connect_future # This returns a future def _execute(self, transforms, *args, **kwargs): """ Overriding of a method of WebSocketHandler """ def start_tunnel(future): """ A callback which is called when connection to url is successful """ self.upstream = future.result() # We need upstream to write further messages self.handshake_request = self.upstream_connection.request # HTTPRequest needed for caching :P self.handshake_request.response_buffer = "" # Needed for websocket data & compliance with cache_handler stuff self.handshake_request.version = "HTTP/1.1" # Tiny hack to protect caching (But according to websocket standards) self.handshake_request.body = self.handshake_request.body or "" # I dont know why a None is coming :P tornado.websocket.WebSocketHandler._execute(self, transforms, *args, **kwargs) # The regular procedures are to be done # We try to connect to provided URL & then we proceed with connection on client side. self.upstream = self.upstream_connect(callback=start_tunnel) def store_upstream_data(self, message): """ Save websocket data sent from client to server, i.e add it to HTTPRequest.response_buffer with direction (>>) """ try: # Cannot write binary content as a string, so catch it self.handshake_request.response_buffer += (">>> %s\r\n"%(message)) except TypeError: self.handshake_request.response_buffer += (">>> May be binary\r\n") def store_downstream_data(self, message): """ Save websocket data sent from client to server, i.e add it to HTTPRequest.response_buffer with direction (<<) """ try: # Cannot write binary content as a string, so catch it self.handshake_request.response_buffer += ("<<< %s\r\n"%(message)) except TypeError: self.handshake_request.response_buffer += ("<<< May be binary\r\n") def on_message(self, message): """ Everytime a message is received from client side, this instance method is called """ self.upstream.write_message(message) # The obtained message is written to upstream self.store_upstream_data(message) # The following check ensures that if a callback is added for reading message from upstream, another one is not added if not self.upstream.read_future: self.upstream.read_message(callback=self.on_response) # A callback is added to read the data when upstream responds def on_response(self, message): """ A callback when a message is recieved from upstream *** Here message is a future """ # The following check ensures that if a callback is added for reading message from upstream, another one is not added if not self.upstream.read_future: self.upstream.read_message(callback=self.on_response) if self.ws_connection: # Check if connection still exists if message.result(): # Check if it is not NULL ( Indirect checking of upstream connection ) self.write_message(message.result()) # Write obtained message to client self.store_downstream_data(message.result()) else: self.close() def on_close(self): """ Called when websocket is closed. So handshake request-response pair along with websocket data as response body is saved """ # Required for cache_handler self.handshake_response = tornado.httpclient.HTTPResponse( self.handshake_request, self.upstream_connection.code, headers=self.upstream_connection.headers, request_time=0 ) # Procedure for dumping a tornado request-response self.cache_handler = CacheHandler( self.application.cache_dir, self.handshake_request, self.application.cookie_regex, self.application.cookie_blacklist ) self.cached_response = self.cache_handler.load() self.cache_handler.dump(self.handshake_response)
def get(self): """ * This function handles all requests except the connect request. * Once ssl stream is formed between browser and proxy, the requests are then processed by this function """ self.request.response_buffer = '' # Data for handling headers through a streaming callback # Need to work around for something restricted_response_headers = ['Content-Length', 'Content-Encoding', 'Etag', 'Transfer-Encoding', 'Connection', 'Vary', 'Accept-Ranges', 'Pragma'] # This function is a callback after the async client gets the full response # This method will be improvised with more headers from original responses def handle_response(response): self.set_status(response.code) del self._headers['Server'] for header, value in list(response.headers.items()): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_response_headers: self.set_header(header, value) if self.request.response_buffer: self.cache_handler.dump(response) self.finish() def handle_cached_response(response): self.set_status(response.code) for header, value in list(response.headers.items()): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_response_headers: self.set_header(header, value) self.write(response.body) self.finish() # This function is a callback when a small chunk is received def handle_data_chunk(data): if data: self.write(data) self.request.response_buffer += data # More headers are to be removed for header in ('Connection', 'Pragma', 'Cache-Control', 'If-Modified-Since'): try: del self.request.headers[header] except: continue # The requests that come through ssl streams are relative requests, so transparent # proxying is required. The following snippet decides the url that should be passed # to the async client if self.request.host in self.request.uri.split('/'): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host + self.request.uri # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler( self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist ) cached_response = self.cache_handler.load() if cached_response: handle_cached_response(cached_response) else: # httprequest object is created and then passed to async client with a callback # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=self.request.body, headers=self.request.headers, follow_redirects=False, use_gzip=True, streaming_callback=handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, allow_nonstandard_methods=True, validate_cert=False) try: async_client.fetch(request, callback=handle_response) except Exception: pass
class ProxyHandler(tornado.web.RequestHandler): """ This RequestHandler processes all the requests that the application received """ SUPPORTED_METHODS = ['GET', 'POST', 'CONNECT', 'HEAD', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'] def __new__(cls, application, request, **kwargs): # http://stackoverflow.com/questions/3209233/how-to-replace-an-instance-in-init-with-a-different-object # Based on upgrade header, websocket request handler must be used try: if request.headers['Upgrade'].lower() == 'websocket': return CustomWebSocketHandler(application, request, **kwargs) except KeyError: pass return tornado.web.RequestHandler.__new__(cls, application, request, **kwargs) def set_default_headers(self): # This is automatically called by Tornado :P # XD Using this to remove "Server" header set by tornado del self._headers["Server"] def set_status(self, status_code, reason=None): """ Sets the status code for our response. Overriding is done so as to handle unknown response codes gracefully. """ self._status_code = status_code if reason is not None: self._reason = tornado.escape.native_str(reason) else: try: self._reason = tornado.httputil.responses[status_code] except KeyError: self._reason = tornado.escape.native_str("Server Not Found") # This function writes a new response & caches it def finish_response(self, response): self.set_status(response.code) for header, value in list(response.headers.items()): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_response_headers: self.set_header(header, value) self.finish() # This function is a callback when a small chunk is received def handle_data_chunk(self, data): if data: self.write(data) self.request.response_buffer += data @tornado.web.asynchronous @tornado.gen.coroutine def get(self): """ * This function handles all requests except the connect request. * Once ssl stream is formed between browser and proxy, the requests are then processed by this function """ # The flow starts here self.request.response_buffer = '' # The requests that come through ssl streams are relative requests, so transparent # proxying is required. The following snippet decides the url that should be passed # to the async client if self.request.uri.startswith(self.request.protocol,0): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host + self.request.uri # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler( self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist ) request_hash = yield tornado.gen.Task(self.cache_handler.calculate_hash) self.cached_response = self.cache_handler.load() if self.cached_response: if self.cached_response.body: self.write(self.cached_response.body) self.finish_response(self.cached_response) else: # Request header cleaning for header in restricted_request_headers: try: del self.request.headers[header] except: continue # HTTP auth if exists http_auth_username = None http_auth_password = None http_auth_mode = None if self.application.http_auth: # HTTP AUTH settings host = self.request.host # If default ports are not provided, they are added try: test = self.request.host.index(':') except ValueError: default_ports = {'http':'80', 'https':'443'} try: host = self.request.host + ':' + default_ports[self.request.protocol] except KeyError: pass # Check if auth is provided for that host try: index = self.application.http_auth_hosts.index(host) http_auth_username = self.application.http_auth_usernames[index] http_auth_password = self.application.http_auth_passwords[index] http_auth_mode = self.application.http_auth_modes[index] except ValueError: pass # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() # httprequest object is created and then passed to async client with a callback request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=self.request.body if self.request.body else None, headers=self.request.headers, auth_username=http_auth_username, auth_password=http_auth_password, auth_mode=http_auth_mode, follow_redirects=False, use_gzip=True, streaming_callback=self.handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, allow_nonstandard_methods=True, prepare_curl_callback=prepare_curl_callback if self.application.outbound_proxy_type == "socks"\ else None, # socks callback function validate_cert=False) try: response = yield tornado.gen.Task(async_client.fetch, request) except Exception: pass # Request retries for i in range(0,3): if response.code in [408, 599]: self.request.response_buffer = '' response = yield tornado.gen.Task(async_client.fetch, request) else: break self.finish_response(response) # Cache the response after finishing the response, so caching time is not included in response time self.cache_handler.dump(response) # The following 5 methods can be handled through the above implementation @tornado.web.asynchronous def post(self): return self.get() @tornado.web.asynchronous def head(self): return self.get() @tornado.web.asynchronous def put(self): return self.get() @tornado.web.asynchronous def delete(self): return self.get() @tornado.web.asynchronous def options(self): return self.get() @tornado.web.asynchronous def trace(self): return self.get() @tornado.web.asynchronous def connect(self): """ This function gets called when a connect request is received. * The host and port are obtained from the request uri * A socket is created, wrapped in ssl and then added to SSLIOStream * This stream is used to connect to speak to the remote host on given port * If the server speaks ssl on that port, callback start_tunnel is called * An OK response is written back to client * The client side socket is wrapped in ssl * If the wrapping is successful, a new SSLIOStream is made using that socket * The stream is added back to the server for monitoring """ host, port = self.request.uri.split(':') def start_tunnel(): try: self.request.connection.stream.write(b"HTTP/1.1 200 Connection established\r\n\r\n") wrap_socket( self.request.connection.stream.socket, host, self.application.ca_cert, self.application.ca_key, self.application.certs_folder, success=ssl_success ) except tornado.iostream.StreamClosedError: pass def ssl_success(client_socket): client = tornado.iostream.SSLIOStream(client_socket) server.handle_stream(client, self.application.inbound_ip) # Tiny Hack to satisfy proxychains CONNECT request to HTTP port. # HTTPS fail check has to be improvised #def ssl_fail(): # self.request.connection.stream.write(b"HTTP/1.1 200 Connection established\r\n\r\n") # server.handle_stream(self.request.connection.stream, self.application.inbound_ip) # Hacking to be done here, so as to check for ssl using proxy and auth try: s = ssl.wrap_socket(socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)) upstream = tornado.iostream.SSLIOStream(s) #start_tunnel() #upstream.set_close_callback(ssl_fail) upstream.connect((host, int(port)), start_tunnel) except Exception: self.finish()
from requests_html import HTMLSession from journey import Journey from cache_handler import CacheHandler from website_parser import WebsiteParser import fire session = HTMLSession() cache_handler = CacheHandler() class Connections: session = HTMLSession() data = { "post-type": "shop", "currentstepnumber": "1", "search-from": "Split", "search-to": "Zagreb", "search-datetime": "21.10.2018.", "ticket-type": "oneway", } url = "https://www.arriva.com.hr/hr-hr/odabir-polaska" def cache(self, output): cache_handler.add_journeys( self.data["search-from"], self.data["search-to"], self.data["search-datetime"], output,
class ProxyHandler(tornado.web.RequestHandler): """ This RequestHandler processes all the requests that the application received """ SUPPORTED_METHODS = [ 'GET', 'POST', 'CONNECT', 'HEAD', 'PUT', 'DELETE', 'OPTIONS', 'TRACE' ] def __new__(cls, application, request, **kwargs): # http://stackoverflow.com/questions/3209233/how-to-replace-an-instance-in-init-with-a-different-object # Based on upgrade header, websocket request handler must be used try: if request.headers['Upgrade'].lower() == 'websocket': return CustomWebSocketHandler(application, request, **kwargs) except KeyError: pass return tornado.web.RequestHandler.__new__(cls, application, request, **kwargs) def set_default_headers(self): # This is automatically called by Tornado :P # XD Using this to remove "Server" header set by tornado del self._headers["Server"] def set_status(self, status_code, reason=None): """ Sets the status code for our response. Overriding is done so as to handle unknown response codes gracefully. """ self._status_code = status_code if reason is not None: self._reason = tornado.escape.native_str(reason) else: try: self._reason = tornado.httputil.responses[status_code] except KeyError: self._reason = tornado.escape.native_str("Server Not Found") # This function writes a new response & caches it def finish_response(self, response): self.set_status(response.code) for header, value in response.headers.get_all(): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_response_headers: self.set_header(header, value) self.finish() # This function is a callback when a small chunk is received def handle_data_chunk(self, data): if data: self.write(data) self.request.response_buffer += data @tornado.web.asynchronous @tornado.gen.coroutine def get(self): """ * This function handles all requests except the connect request. * Once ssl stream is formed between browser and proxy, the requests are then processed by this function """ # The flow starts here self.request.local_timestamp = datetime.datetime.now() self.request.response_buffer = '' # The requests that come through ssl streams are relative requests, so transparent # proxying is required. The following snippet decides the url that should be passed # to the async client if self.request.uri.startswith(self.request.protocol, 0): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host if self.request.uri != '/': # Add uri only if needed self.request.url += self.request.uri # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler(self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist) request_hash = yield tornado.gen.Task( self.cache_handler.calculate_hash) self.cached_response = self.cache_handler.load() if self.cached_response: if self.cached_response.body: self.write(self.cached_response.body) self.finish_response(self.cached_response) else: # Request header cleaning for header in restricted_request_headers: try: del self.request.headers[header] except: continue # HTTP auth if exists http_auth_username = None http_auth_password = None http_auth_mode = None if self.application.http_auth: # HTTP AUTH settings host = self.request.host # If default ports are not provided, they are added try: test = self.request.host.index(':') except ValueError: default_ports = {'http': '80', 'https': '443'} try: host = self.request.host + ':' + default_ports[ self.request.protocol] except KeyError: pass # Check if auth is provided for that host try: index = self.application.http_auth_hosts.index(host) http_auth_username = self.application.http_auth_usernames[ index] http_auth_password = self.application.http_auth_passwords[ index] http_auth_mode = self.application.http_auth_modes[index] except ValueError: pass # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() # httprequest object is created and then passed to async client with a callback success_response = False # is used to check the response in the botnet mode while not success_response: #Proxy Switching (botnet_mode) code if self.application.proxy_manager: proxy = self.application.proxy_manager.get_next_available_proxy( ) #print proxy self.application.outbound_ip = proxy["proxy"][0] self.application.outbound_port = int(proxy["proxy"][1]) # httprequest object is created and then passed to async client with a callback request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=self.request.body if self.request.body else None, headers=self.request.headers, auth_username=http_auth_username, auth_password=http_auth_password, auth_mode=http_auth_mode, follow_redirects=False, use_gzip=True, streaming_callback=self.handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, allow_nonstandard_methods=True, prepare_curl_callback=prepare_curl_callback if self.application.outbound_proxy_type == "socks"\ else None, # socks callback function validate_cert=False) try: response = yield tornado.gen.Task(async_client.fetch, request) except Exception: response = None pass # Request retries for i in range(0, 3): if (response is None) or response.code in [408, 599]: self.request.response_buffer = '' response = yield tornado.gen.Task( async_client.fetch, request) else: success_response = True break #botnet mode code (proxy switching) #checking the status of the proxy (asynchronous) if self.application.proxy_manager and not success_response: proxy_check_req = tornado.httpclient.HTTPRequest( url=self.application.proxy_manager.testing_url, #testing url is google.com use_gzip=True, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, prepare_curl_callback=prepare_curl_callback if self.application.outbound_proxy_type == "socks"\ else None, # socks callback function validate_cert=False) try: proxy_check_resp = yield tornado.gen.Task( async_client.fetch, proxy_check_req) except Exception: pass if proxy_check_resp.code != 200: #self.application.proxy_manager.remove_proxy(proxy) self.application.proxy_manager.remove_proxy( proxy["index"]) else: success_response = True else: success_response = True self.finish_response(response) # Cache the response after finishing the response, so caching time is not included in response time self.cache_handler.dump(response) # The following 5 methods can be handled through the above implementation @tornado.web.asynchronous def post(self): return self.get() @tornado.web.asynchronous def head(self): return self.get() @tornado.web.asynchronous def put(self): return self.get() @tornado.web.asynchronous def delete(self): return self.get() @tornado.web.asynchronous def options(self): return self.get() @tornado.web.asynchronous def trace(self): return self.get() @tornado.web.asynchronous def connect(self): """ This function gets called when a connect request is received. * The host and port are obtained from the request uri * A socket is created, wrapped in ssl and then added to SSLIOStream * This stream is used to connect to speak to the remote host on given port * If the server speaks ssl on that port, callback start_tunnel is called * An OK response is written back to client * The client side socket is wrapped in ssl * If the wrapping is successful, a new SSLIOStream is made using that socket * The stream is added back to the server for monitoring """ host, port = self.request.uri.split(':') def start_tunnel(): try: self.request.connection.stream.write( b"HTTP/1.1 200 Connection established\r\n\r\n") wrap_socket(self.request.connection.stream.socket, host, self.application.ca_cert, self.application.ca_key, self.application.ca_key_pass, self.application.certs_folder, success=ssl_success) except tornado.iostream.StreamClosedError: pass def ssl_success(client_socket): client = tornado.iostream.SSLIOStream(client_socket) server.handle_stream(client, self.application.inbound_ip) # Tiny Hack to satisfy proxychains CONNECT request to HTTP port. # HTTPS fail check has to be improvised def ssl_fail(): try: self.request.connection.stream.write( b"HTTP/1.1 200 Connection established\r\n\r\n") except tornado.iostream.StreamClosedError: pass server.handle_stream(self.request.connection.stream, self.application.inbound_ip) # Hacking to be done here, so as to check for ssl using proxy and auth try: s = ssl.wrap_socket( socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)) upstream = tornado.iostream.SSLIOStream(s) #start_tunnel() upstream.set_close_callback(ssl_fail) upstream.connect((host, int(port)), start_tunnel) except Exception: self.finish()
class BoxItem(): BOX_FOLDER = "folder" BOX_FILE = "file" BOX_ERR_NOT_FOUND = 404 BOX_ERR_CONFLICT = 409 BOX_ERR_RESERVED = 'name_temporarily_reserved' BOX_ERR_DUPLICATE = 'item_name_in_use' def __init__(self, cache_file_name, root, client): self.path = '' self.id = "0" self.type = self.BOX_FOLDER self.modified_at = None self.size = 0 self.cache = CacheHandler(cache_file_name) self.root = root self.client = client def get_by_path(self, path, create_if_not_exist=False, force_no_cache=False): rel_path = get_rel_path(path) if rel_path == '': self.set_root() return self item_id, item_type = self.cache.query(rel_path, force_no_cache) if item_id is not None: try: item = self.get_details(item_id, item_type) self.path = rel_path self.id = item_id self.type = item_type self.size = (item.size if self.is_file() else 0) return self except Exception as error: logger.info("Exception:{}".format(error)) self.cache.reset() # Start iterating path from root id "0" item_id = '0' item_type = self.BOX_FOLDER elts = rel_path.split('/') current_path = '' for elt in elts: current_path = os.path.join(current_path, elt) items_iter = self.client.folder(folder_id=item_id).get_items( fields=['modified_at', 'name', 'type', 'size']) found = False for item in items_iter: if item.name == elt: self.path = rel_path self.id = item.id item_id = item.id self.type = item.type self.modified_at = self.format_date(item.modified_at) self.size = item.size self.cache.add(current_path, item.id, item.type) found = True break if not found: if create_if_not_exist: new_folder = self.create_subfolder(elt) item_id = new_folder.id self.cache.add(current_path, new_folder.id, self.BOX_FOLDER) else: self.set_none() return self def get_details(self, id, type): if type == self.BOX_FOLDER: return self.client.folder(id).get( fields=['modified_at', 'name', 'type', 'size']) elif type == self.BOX_FILE: return self.client.file(id).get( fields=['modified_at', 'name', 'type', 'size']) def set_root(self): self.id = "0" self.type = self.BOX_FOLDER self.size = 0 def set_none(self): self.id = None self.type = None self.modified_at = None self.size = None def create_subfolder(self, name): new_folder = {} new_id = None while new_id is None: try: new_folder = self.client.folder(self.id).create_subfolder(name) new_id = self.fix_any_duplicate(name, new_folder['id']) except BoxAPIException as err: if err.status == self.BOX_ERR_CONFLICT: if err.code == self.BOX_ERR_RESERVED: # Item name is reserved but there is no ID yet, so we have to loop until we get a BOX_ERR_DUPLICATE time.sleep(1) pass elif err.code == self.BOX_ERR_DUPLICATE: new_id = err.context_info['conflicts'][0]['id'] else: raise Exception( 'Unimplemented Box.com conflict error while creating subfolder' ) else: raise Exception( 'Unimplemented Box.com error while creating subfolder') self.id = new_id self.type = self.BOX_FOLDER self.size = 0 return self def get_last_modified(self, item=None): if item is None: return self.modified_at elif "modified_at" in item: return self.format_date(item["modified_at"]) else: return def format_date(self, date): if date is not None: utc_time = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S-%f:00") epoch_time = (utc_time - datetime(1970, 1, 1)).total_seconds() return int(epoch_time) * 1000 else: return None def not_exists(self): return (self.id == None) def exists(self): return (self.id is not None) def is_folder(self): return self.type == self.BOX_FOLDER def is_file(self): return self.type == self.BOX_FILE def get_stat(self): ret = { 'path': get_normalized_path(self.path), 'size': self.size if self.is_file() else 0, 'isDirectory': self.is_folder() } if self.modified_at is not None: ret["lastModified"] = self.modified_at return ret def get_children(self, internal_path): full_path = get_full_path(self.root, self.path) intra_path = self.path.replace('/' + self.root, '') children = [] for sub in self.client.folder(self.id).get_items( fields=['modified_at', 'name', 'type', 'size']): sub_path = get_normalized_path( os.path.join(internal_path, sub.name)) ret = { 'fullPath': sub_path, 'exists': True, 'directory': sub.type == self.BOX_FOLDER, 'size': sub.size, 'lastModified': self.get_last_modified(sub) } children.append(ret) self.cache.add(get_rel_path(sub.name), sub.id, sub.type) return children def get_id(self): return self.id def get_as_browse(self): return { 'fullPath': get_normalized_path(self.path), 'exists': self.exists(), 'directory': self.is_folder(), 'size': self.size, 'lastModified': self.get_last_modified() } def get_stream(self, byte_range=None): if byte_range: ws = self.client.file(self.id).content(byte_range=byte_range) else: ws = self.client.file(self.id).content() return BytesIO(ws) def write_stream(self, stream): file_name = self.path.split('/')[-1] sio = BytesIO() shutil.copyfileobj(stream, sio) sio.seek(0) ret = self.client.folder(self.id).upload_stream(sio, file_name=file_name) self.id = ret.id self.cache.add(self.path, ret.id, ret.type) return self def create_path(self, path, force_no_cache=False): target_path = '/'.join(path.split('/')[:-1]) ret = self.get_by_path(target_path, create_if_not_exist=True, force_no_cache=force_no_cache) ret.path = path return ret def delete(self): if self.is_file(): try: self.client.file(self.id).delete() except BoxAPIException as err: if err.status == self.BOX_ERR_NOT_FOUND: # Probably deleted by competing process pass else: raise Exception("Error while deleting box.com item") self.cache.remove(self.id) return 1 if self.is_folder(): return self.recursive_delete() def recursive_delete(self, id=None): counter = 0 if id is None: id = self.id try: for child in self.client.folder(id).get_items(): if child.type == self.BOX_FOLDER: counter = counter + self.recursive_delete(id=child.id) try: self.cache.remove(child.id) counter = counter + 1 except Exception as error: logger.info("Exception:{}".format(error)) elif child.type == self.BOX_FILE: try: self.client.file(child.id).delete() self.cache.remove(child.id) counter = counter + 1 except Exception as error: # File already deleted logger.info("Exception:{}".format(error)) except Exception as error: logger.info("Folder already deleted") self.cache.remove(self.id) return counter def fix_any_duplicate(self, name, new_id): # Several plugin instances creating the same folder on box.com can lead to duplicate folder names if self.is_duplicated(name, new_id): self.cache.reset() time.sleep(1) # waiting for dust to settle on box.com side id_default_folder = self.id_default_folder(name) if id_default_folder != new_id: try: self.client.folder(new_id).delete() except Exception as error: logger.info("Folder already deleted:{}".format(error)) return id_default_folder return new_id def is_duplicated(self, name, new_id): instances = 0 my_child = False try: for child in self.client.folder(self.id).get_items(): if child.name == name: instances = instances + 1 if child.id == new_id: my_child = True except BoxAPIException as err: raise Exception( 'Error while accessing box.com item:{0}'.format(err)) return (instances > 1) and my_child def id_default_folder(self, name): try: probe_folder = self.client.folder(self.id).create_subfolder(name) return probe_folder.id except BoxAPIException as err: if err.status == self.BOX_ERR_CONFLICT: if err.code == self.BOX_ERR_DUPLICATE: return err.context_info['conflicts'][0]['id'] else: raise Exception( 'Unimplemented Box.com conflict error while creating subfolder' ) else: raise Exception( 'Unimplemented Box.com error while creating subfolder: {0}' .format(err)) return None def check_path_format(self, path): special_names = [".", ".."] if not all(c in string.printable for c in path): raise Exception('The path contains non-printable char(s)') for element in path.split('/'): if len(element) > 255: raise Exception( 'An element of the path is longer than the allowed 255 characters' ) if element in special_names: raise Exception( 'Special name "{0}" is not allowed in a box.com path'. format(element)) if element.endswith(' '): raise Exception( 'An element of the path contains a trailing space') def close(self): self.cache.write_onto_disk()
def get(self): """ * This function handles all requests except the connect request. * Once ssl stream is formed between browser and proxy, the requests are then processed by this function """ # The flow starts here self.request.local_timestamp = datetime.datetime.now() self.request.response_buffer = '' # The requests that come through ssl streams are relative requests, so transparent # proxying is required. The following snippet decides the url that should be passed # to the async client if self.request.uri.startswith(self.request.protocol, 0): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host if self.request.uri != '/': # Add uri only if needed self.request.url += self.request.uri # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler(self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist) request_hash = yield tornado.gen.Task( self.cache_handler.calculate_hash) self.cached_response = self.cache_handler.load() if self.cached_response: if self.cached_response.body: self.write(self.cached_response.body) self.finish_response(self.cached_response) else: # Request header cleaning for header in restricted_request_headers: try: del self.request.headers[header] except: continue # HTTP auth if exists http_auth_username = None http_auth_password = None http_auth_mode = None if self.application.http_auth: # HTTP AUTH settings host = self.request.host # If default ports are not provided, they are added try: test = self.request.host.index(':') except ValueError: default_ports = {'http': '80', 'https': '443'} try: host = self.request.host + ':' + default_ports[ self.request.protocol] except KeyError: pass # Check if auth is provided for that host try: index = self.application.http_auth_hosts.index(host) http_auth_username = self.application.http_auth_usernames[ index] http_auth_password = self.application.http_auth_passwords[ index] http_auth_mode = self.application.http_auth_modes[index] except ValueError: pass # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() # httprequest object is created and then passed to async client with a callback success_response = False # is used to check the response in the botnet mode while not success_response: #Proxy Switching (botnet_mode) code if self.application.proxy_manager: proxy = self.application.proxy_manager.get_next_available_proxy( ) #print proxy self.application.outbound_ip = proxy["proxy"][0] self.application.outbound_port = int(proxy["proxy"][1]) # httprequest object is created and then passed to async client with a callback request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=self.request.body if self.request.body else None, headers=self.request.headers, auth_username=http_auth_username, auth_password=http_auth_password, auth_mode=http_auth_mode, follow_redirects=False, use_gzip=True, streaming_callback=self.handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, allow_nonstandard_methods=True, prepare_curl_callback=prepare_curl_callback if self.application.outbound_proxy_type == "socks"\ else None, # socks callback function validate_cert=False) try: response = yield tornado.gen.Task(async_client.fetch, request) except Exception: response = None pass # Request retries for i in range(0, 3): if (response is None) or response.code in [408, 599]: self.request.response_buffer = '' response = yield tornado.gen.Task( async_client.fetch, request) else: success_response = True break #botnet mode code (proxy switching) #checking the status of the proxy (asynchronous) if self.application.proxy_manager and not success_response: proxy_check_req = tornado.httpclient.HTTPRequest( url=self.application.proxy_manager.testing_url, #testing url is google.com use_gzip=True, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, prepare_curl_callback=prepare_curl_callback if self.application.outbound_proxy_type == "socks"\ else None, # socks callback function validate_cert=False) try: proxy_check_resp = yield tornado.gen.Task( async_client.fetch, proxy_check_req) except Exception: pass if proxy_check_resp.code != 200: #self.application.proxy_manager.remove_proxy(proxy) self.application.proxy_manager.remove_proxy( proxy["index"]) else: success_response = True else: success_response = True self.finish_response(response) # Cache the response after finishing the response, so caching time is not included in response time self.cache_handler.dump(response)
class ProxyHandler(tornado.web.RequestHandler): """ This RequestHandler processes all the requests that the application received """ SUPPORTED_METHODS = ['GET', 'POST', 'CONNECT', 'HEAD', 'PUT', 'DELETE', 'OPTIONS'] def set_status(self, status_code, reason=None): """Sets the status code for our response. Overriding is done so as to handle unknown response codes gracefully. """ self._status_code = status_code if reason is not None: self._reason = tornado.escape.native_str(reason) else: try: self._reason = tornado.httputil.responses[status_code] except KeyError: self._reason = tornado.escape.native_str("Server Not Found") @tornado.web.asynchronous def get(self): """ * This function handles all requests except the connect request. * Once ssl stream is formed between browser and proxy, the requests are then processed by this function """ self.request.response_buffer = '' # Data for handling headers through a streaming callback # Need to work around for something restricted_response_headers = ['Content-Length', 'Content-Encoding', 'Etag', 'Transfer-Encoding', 'Connection', 'Vary', 'Accept-Ranges', 'Pragma'] # This function is a callback after the async client gets the full response # This method will be improvised with more headers from original responses def handle_response(response): self.set_status(response.code) del self._headers['Server'] for header, value in list(response.headers.items()): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_response_headers: self.set_header(header, value) if self.request.response_buffer: self.cache_handler.dump(response) self.finish() def handle_cached_response(response): self.set_status(response.code) for header, value in list(response.headers.items()): if header == "Set-Cookie": self.add_header(header, value) else: if header not in restricted_response_headers: self.set_header(header, value) self.write(response.body) self.finish() # This function is a callback when a small chunk is received def handle_data_chunk(data): if data: self.write(data) self.request.response_buffer += data # More headers are to be removed for header in ('Connection', 'Pragma', 'Cache-Control', 'If-Modified-Since'): try: del self.request.headers[header] except: continue # The requests that come through ssl streams are relative requests, so transparent # proxying is required. The following snippet decides the url that should be passed # to the async client if self.request.host in self.request.uri.split('/'): # Normal Proxy Request self.request.url = self.request.uri else: # Transparent Proxy Request self.request.url = self.request.protocol + "://" + self.request.host + self.request.uri # This block here checks for already cached response and if present returns one self.cache_handler = CacheHandler( self.application.cache_dir, self.request, self.application.cookie_regex, self.application.cookie_blacklist ) cached_response = self.cache_handler.load() if cached_response: handle_cached_response(cached_response) else: # httprequest object is created and then passed to async client with a callback # pycurl is needed for curl client async_client = tornado.curl_httpclient.CurlAsyncHTTPClient() request = tornado.httpclient.HTTPRequest( url=self.request.url, method=self.request.method, body=self.request.body, headers=self.request.headers, follow_redirects=False, use_gzip=True, streaming_callback=handle_data_chunk, header_callback=None, proxy_host=self.application.outbound_ip, proxy_port=self.application.outbound_port, proxy_username=self.application.outbound_username, proxy_password=self.application.outbound_password, allow_nonstandard_methods=True, validate_cert=False) try: async_client.fetch(request, callback=handle_response) except Exception: pass # The following 5 methods can be handled through the above implementation @tornado.web.asynchronous def post(self): return self.get() @tornado.web.asynchronous def head(self): return self.get() @tornado.web.asynchronous def put(self): return self.get() @tornado.web.asynchronous def delete(self): return self.get() @tornado.web.asynchronous def options(self): return self.get() @tornado.web.asynchronous def connect(self): """ This function gets called when a connect request is received. * The host and port are obtained from the request uri * A socket is created, wrapped in ssl and then added to SSLIOStream * This stream is used to connect to speak to the remote host on given port * If the server speaks ssl on that port, callback start_tunnel is called * An OK response is written back to client * The client side socket is wrapped in ssl * If the wrapping is successful, a new SSLIOStream is made using that socket * The stream is added back to the server for monitoring """ host, port = self.request.uri.split(':') def start_tunnel(): try: self.request.connection.stream.write(b"HTTP/1.1 200 Connection established\r\n\r\n") wrap_socket( self.request.connection.stream.socket, host, self.application.ca_cert, self.application.ca_key, self.application.certs_folder, success=ssl_success ) except tornado.iostream.StreamClosedError: pass def ssl_success(client_socket): client = tornado.iostream.SSLIOStream(client_socket) server.handle_stream(client, self.application.inbound_ip) # Tiny Hack to satisfy proxychains CONNECT request to HTTP port. # HTTPS fail check has to be improvised #def ssl_fail(): # self.request.connection.stream.write(b"HTTP/1.1 200 Connection established\r\n\r\n") # server.handle_stream(self.request.connection.stream, self.application.inbound_ip) # Hacking to be done here, so as to check for ssl using proxy and auth try: s = ssl.wrap_socket(socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)) upstream = tornado.iostream.SSLIOStream(s) #start_tunnel() #upstream.set_close_callback(ssl_fail) upstream.connect((host, int(port)), start_tunnel) except Exception: self.finish()