class CacheHandler(object): """ This class will be used by the request handler to either load or dump to cache. Main things that are done here :- * The request_hash is generated here * The file locks are managed here * .rd files are created here """ def __init__(self, cache_dir, request, cookie_regex, blacklist): # Initialized with the root cache directory, HTTP request object, cookie_regex, blacklist boolean self.request = request self.cache_dir = cache_dir self.cookie_regex = cookie_regex self.blacklist = blacklist def calculate_hash(self, callback=None): # Based on blacklist boolean the cookie regex is used for filtering of cookies in request_hash # generation. However the original request is untampered. cookie_string = '' try: if self.blacklist: string_with_spaces = re.sub( self.cookie_regex, '', self.request.headers['Cookie']).strip() cookie_string = ''.join(string_with_spaces.split(' ')) else: cookies_matrix = re.findall(self.cookie_regex, self.request.headers['Cookie']) for cookie_tuple in cookies_matrix: for item in cookie_tuple: if item: cookie_string += item.strip() except KeyError: pass request_mod = self.request.method + self.request.url + self.request.version request_mod = request_mod + self.request.body + cookie_string # Websocket caching technique try: request_mod = request_mod + self.request.headers[ "Sec-Websocket-Key"] except KeyError: pass md5_hash = hashlib.md5() md5_hash.update(request_mod) self.request_hash = md5_hash.hexdigest() # This is the path to file inside url folder. This can be used for updating a html file self.file_path = os.path.join(self.cache_dir, 'url', self.request_hash) if callback: callback(self.request_hash) def create_response_object(self): return response_from_cache(self.request_hash, self.cache_dir) def dump(self, response): # This function takes in a HTTPResponse object and dumps the request # and response data. It also creates a .rd file with same file name # This is used by transaction logger """ cache_dict = { 'request_method':self.request.method, 'request_url':self.request.url, 'request_version':self.request.version, 'request_headers':self.request.headers, 'request_body':self.request.body, 'response_code':response.code, 'response_headers':response.headers, 'response_body':self.request.response_buffer } """ #cache_file = open(self.file_path, 'wb') #pickle.dump(cache_dict, cache_file) #cache_file.close() # The whole request and response is saved across 6 folder - # url, req-headers, req-body, resp-code, resp-headers, resp-body url_file = open(self.file_path, 'w') url_file.write( "%s %s %s" % (self.request.method, self.request.url, self.request.version)) url_file.close() reqHeaders_file = open( os.path.join(self.cache_dir, 'req-headers', self.request_hash), 'w') #reqHeaders_string = '' for name, value in self.request.headers.iteritems(): reqHeaders_file.write("%s: %s\r\n" % (name, value)) #reqHeaders_string += ("%s: %s\r\n"%(name, value)) #reqHeaders_file.write(reqHeaders_string) reqHeaders_file.close() reqBody_file = open( os.path.join(self.cache_dir, 'req-body', self.request_hash), 'w') reqBody_file.write(self.request.body) reqBody_file.close() resCode_file = open( os.path.join(self.cache_dir, 'resp-code', self.request_hash), 'w') resCode_file.write(str(response.code)) resCode_file.close() resHeaders_file = open( os.path.join(self.cache_dir, 'resp-headers', self.request_hash), 'w') #resHeaders_string = '' for name, value in response.headers.iteritems(): resHeaders_file.write("%s: %s\r\n" % (name, value)) #resHeaders_string += "%s: %s\r\n"%(name, value) #resHeaders_file.write(resHeaders_string) resHeaders_file.close() resBody_file = open( os.path.join(self.cache_dir, 'resp-body', self.request_hash), 'w') try: resBody_file.write(self.request.response_buffer) except: resBody_file = open( os.path.join(self.cache_dir, 'resp-body', self.request_hash), 'wb') resBody_file.write(self.request.response_buffer) finally: resBody_file.close() reqTime_file = open( os.path.join(self.cache_dir, 'resp-time', self.request_hash), 'w') reqTime_file.write("%s" % (str(response.request_time))) # This approach can be used as an alternative for object sharing # This creates a file with hash as name and .rd as extension open(self.file_path + '.rd', 'w').close() self.file_lock.release() def load(self): # This is the function which is called for every request. If file is not # found in cache, then a file lock is created for that and a None is # returned. """ self.file_lock = FileLock(self.file_path) self.file_lock.acquire() """ try: dummy = self.file_path except Exception: self.calculate_hash() finally: if os.path.isfile(self.file_path): return (self.create_response_object()) else: self.file_lock = FileLock(self.file_path) self.file_lock.acquire() # For handling race conditions if os.path.isfile(self.file_path): self.file_lock.release() return (self.create_response_object()) else: return None
class CacheHandler(object): """ This class will be used by the request handler to either load or dump to cache. Main things that are done here :- * The request_hash is generated here * The file locks are managed here * .rd files are created here """ def __init__(self, cache_dir, request, cookie_regex, blacklist): # Initialized with the root cache directory, HTTP request object, cookie_regex, blacklist boolean self.request = request self.cache_dir = cache_dir self.cookie_regex = cookie_regex self.blacklist = blacklist def calculate_hash(self, callback=None): # Based on blacklist boolean the cookie regex is used for filtering of cookies in request_hash # generation. However the original request is untampered. cookie_string = '' try: if self.blacklist: string_with_spaces = re.sub(self.cookie_regex, '', self.request.headers['Cookie']).strip() cookie_string = ''.join(string_with_spaces.split(' ')) else: cookies_matrix = re.findall(self.cookie_regex, self.request.headers['Cookie']) for cookie_tuple in cookies_matrix: for item in cookie_tuple: if item: cookie_string += item.strip() except KeyError: pass request_mod = self.request.method + self.request.url + self.request.version request_mod = request_mod + self.request.body + cookie_string # To support proxying of ua-tester try: request_mod = request_mod + self.request.headers["User-Agent"] except KeyError: pass # Websocket caching technique try: request_mod = request_mod + self.request.headers["Sec-Websocket-Key"] except KeyError: pass md5_hash = hashlib.md5() md5_hash.update(request_mod) self.request_hash = md5_hash.hexdigest() # This is the path to file inside url folder. This can be used for updating a html file self.file_path = os.path.join(self.cache_dir, 'url', self.request_hash) if callback: callback(self.request_hash) def create_response_object(self): return response_from_cache(self.request_hash, self.cache_dir) def dump(self, response): # This function takes in a HTTPResponse object and dumps the request # and response data. It also creates a .rd file with same file name # This is used by transaction logger """ cache_dict = { 'request_method':self.request.method, 'request_url':self.request.url, 'request_version':self.request.version, 'request_headers':self.request.headers, 'request_body':self.request.body, 'response_code':response.code, 'response_headers':response.headers, 'response_body':self.request.response_buffer } """ #cache_file = open(self.file_path, 'wb') #pickle.dump(cache_dict, cache_file) #cache_file.close() # The whole request and response is saved across 6 folder - # url, req-headers, req-body, resp-code, resp-headers, resp-body url_file = open(self.file_path, 'w') url_file.write("%s %s %s"%(self.request.method, self.request.url, self.request.version)) url_file.close() reqHeaders_file = open(os.path.join(self.cache_dir, 'req-headers', self.request_hash), 'w') #reqHeaders_string = '' for name, value in self.request.headers.iteritems(): reqHeaders_file.write("%s: %s\r\n"%(name, value)) #reqHeaders_string += ("%s: %s\r\n"%(name, value)) #reqHeaders_file.write(reqHeaders_string) reqHeaders_file.close() reqBody_file = open(os.path.join(self.cache_dir, 'req-body', self.request_hash), 'w') reqBody_file.write(self.request.body) reqBody_file.close() resCode_file = open(os.path.join(self.cache_dir, 'resp-code', self.request_hash), 'w') resCode_file.write(str(response.code)) resCode_file.close() resHeaders_file = open(os.path.join(self.cache_dir, 'resp-headers', self.request_hash), 'w') #resHeaders_string = '' for name, value in response.headers.iteritems(): resHeaders_file.write("%s: %s\r\n"%(name, value)) #resHeaders_string += "%s: %s\r\n"%(name, value) #resHeaders_file.write(resHeaders_string) resHeaders_file.close() resBody_file = open(os.path.join(self.cache_dir, 'resp-body', self.request_hash), 'w') try: resBody_file.write(self.request.response_buffer) except: resBody_file = open(os.path.join(self.cache_dir, 'resp-body', self.request_hash), 'wb') resBody_file.write(self.request.response_buffer) finally: resBody_file.close() reqTime_file = open(os.path.join(self.cache_dir, 'resp-time', self.request_hash), 'w') reqTime_file.write("%s"%(str(response.request_time))) # This approach can be used as an alternative for object sharing # This creates a file with hash as name and .rd as extension open(self.file_path + '.rd', 'w').close() self.file_lock.release() def load(self): # This is the function which is called for every request. If file is not # found in cache, then a file lock is created for that and a None is # returned. """ self.file_lock = FileLock(self.file_path) self.file_lock.acquire() """ try: dummy = self.file_path except Exception: self.calculate_hash() finally: if os.path.isfile(self.file_path): return(self.create_response_object()) else: self.file_lock = FileLock(self.file_path) self.file_lock.acquire() # For handling race conditions if os.path.isfile(self.file_path): self.file_lock.release() return(self.create_response_object()) else: return None
class CacheHandler(object): """ This class will be used by the request handler to either load or dump to cache. Main things that are done here :- * The request_hash is generated here * The file locks are managed here * .rd files are created here """ def __init__(self, cache_dir, request, cookie_regex, blacklist): # Initialized with the root cache directory, HTTP request object, cookie_regex, blacklist boolean self.request = request self.cache_dir = cache_dir # Based on blacklist boolean the cookie regex is used for filtering of cookies in request_hash # generation. However the original request is untampered. cookie_string = '' try: if blacklist: string_with_spaces = re.sub(cookie_regex, '', self.request.headers['Cookie']).strip() cookie_string = ''.join(string_with_spaces.split(' ')) else: cookies_matrix = re.findall(cookie_regex, self.request.headers['Cookie']) for cookie_tuple in cookies_matrix: for item in cookie_tuple: if item: cookie_string += item.strip() except KeyError: pass request_mod = request.method + request.full_url() + request.version request_mod = request_mod + request.body + cookie_string md5_hash = hashlib.md5() md5_hash.update(request_mod) self.request_hash = md5_hash.hexdigest() # This is the path to file inside url folder. This can be used for updating a html file self.file_path = os.path.join(self.cache_dir, 'url', self.request_hash) def create_response_object(self): # A fake response object is created with necessary attributes #cache_dict = pickle.load(open(self.file_path, 'rb')) # The request-response saved across 6 unique folders is retrieved in following snippet # transactions/resp-code/ response_code = int(open(os.path.join(self.cache_dir, 'resp-code', self.request_hash), 'r').read()) # transactions/resp-headers/ response_headers = {} resHeaders = open(os.path.join(self.cache_dir, 'resp-headers', self.request_hash), 'r').readlines() for line in resHeaders: name, value = line.split(":", 1) response_headers[name] = value.rstrip() # transactions/resp-body response_body = open(os.path.join(self.cache_dir, 'resp-body', self.request_hash), 'r').read() # Temp object is created as an alternative to use lists (or) dictionaries for passing values return DummyResponse(response_code, response_headers, response_body) def dump(self, response): # This function takes in a HTTPResponse object and dumps the request # and response data. It also creates a .rd file with same file name # This is used by transaction logger """ cache_dict = { 'request_method':self.request.method, 'request_url':self.request.url, 'request_version':self.request.version, 'request_headers':self.request.headers, 'request_body':self.request.body, 'response_code':response.code, 'response_headers':response.headers, 'response_body':self.request.response_buffer } """ #cache_file = open(self.file_path, 'wb') #pickle.dump(cache_dict, cache_file) #cache_file.close() # The whole request and response is saved across 6 folder - # url, req-headers, req-body, resp-code, resp-headers, resp-body url_file = open(self.file_path, 'w') url_file.write("%s %s %s\r\n"%(self.request.method, self.request.url, self.request.version)) url_file.close() reqHeaders_file = open(os.path.join(self.cache_dir, 'req-headers', self.request_hash), 'w') #reqHeaders_string = '' for name, value in self.request.headers.iteritems(): reqHeaders_file.write("%s: %s\r\n"%(name, value)) #reqHeaders_string += ("%s: %s\r\n"%(name, value)) #reqHeaders_file.write(reqHeaders_string) reqHeaders_file.close() reqBody_file = open(os.path.join(self.cache_dir, 'req-body', self.request_hash), 'w') reqBody_file.write(self.request.body) reqBody_file.close() resCode_file = open(os.path.join(self.cache_dir, 'resp-code', self.request_hash), 'w') resCode_file.write(str(response.code)) resCode_file.close() resHeaders_file = open(os.path.join(self.cache_dir, 'resp-headers', self.request_hash), 'w') #resHeaders_string = '' for name, value in response.headers.iteritems(): resHeaders_file.write("%s: %s\r\n"%(name, value)) #resHeaders_string += "%s: %s\r\n"%(name, value) #resHeaders_file.write(resHeaders_string) resHeaders_file.close() resBody_file = open(os.path.join(self.cache_dir, 'resp-body', self.request_hash), 'w') try: resBody_file.write(self.request.response_buffer) except: resBody_file = open(os.path.join(self.cache_dir, 'resp-body', self.request_hash), 'wb') resBody_file.write(self.request.response_buffer) finally: resBody_file.close() # This approach can be used as an alternative for object sharing open(self.file_path + '.rd', 'w').close() self.file_lock.release() def load(self): # This is the function which is called for every request. If file is not # found in cache, then a file lock is created for that and a None is # returned. """ self.file_lock = FileLock(self.file_path) self.file_lock.acquire() """ if os.path.isfile(self.file_path): return(self.create_response_object()) else: self.file_lock = FileLock(self.file_path) self.file_lock.acquire() # For handling race conditions if os.path.isfile(self.file_path): self.file_lock.release() return(self.create_response_object()) else: return None
class CacheHandler(object): """ This class will be used by the request handler to either load or dump to cache. Main things that are done here :- * The request_hash is generated here * The file locks are managed here * .rd files are created here """ def __init__(self, cache_dir, request, cookie_regex, blacklist): # Initialized with the root cache directory, HTTP request object, cookie_regex, blacklist boolean self.request = request self.cache_dir = cache_dir self.cookie_regex = cookie_regex self.blacklist = blacklist def calculate_hash(self, callback=None): # Based on blacklist boolean the cookie regex is used for filtering of cookies in request_hash # generation. However the original request is untampered. cookie_string = '' try: if self.blacklist: string_with_spaces = re.sub( self.cookie_regex, '', self.request.headers['Cookie']).strip() cookie_string = ''.join(string_with_spaces.split(' ')) else: cookies_matrix = re.findall(self.cookie_regex, self.request.headers['Cookie']) for cookie_tuple in cookies_matrix: for item in cookie_tuple: if item: cookie_string += item.strip() except KeyError: pass request_mod = self.request.method + self.request.url + self.request.version request_mod = request_mod + self.request.body + cookie_string # To support proxying of ua-tester try: request_mod = request_mod + self.request.headers["User-Agent"] except KeyError: pass # Websocket caching technique try: request_mod = request_mod + self.request.headers[ "Sec-Websocket-Key"] except KeyError: pass md5_hash = hashlib.md5() md5_hash.update(request_mod) self.request_hash = md5_hash.hexdigest() # This is the path to file inside url folder. This can be used for updating a html file self.file_path = os.path.join(self.cache_dir, self.request_hash) if callback: callback(self.request_hash) def create_response_object(self): return response_from_cache( os.path.join(self.cache_dir, self.request_hash)) def dump(self, response): # This function takes in a HTTPResponse object and dumps the request # and response data. It also creates a .rd file with same file name # This is used by transaction logger try: response_body = self.request.response_buffer.decode("utf-8") binary_response = False except UnicodeDecodeError: response_body = base64.b64encode(self.request.response_buffer) binary_response = True cache_dict = { 'request_method': self.request.method, 'request_url': self.request.url, 'request_version': self.request.version, 'request_headers': dict(self.request.headers), 'request_body': self.request.body.decode('utf-8'), 'request_time': response.request_time, 'request_local_timestamp': self.request.local_timestamp.isoformat(), 'response_code': response.code, 'response_headers': dict(response.headers), 'response_body': response_body, 'response_cookies': response.headers.get_list("Set-Cookie"), 'binary_response': binary_response } with open(self.file_path, 'w') as outfile: json.dump(cache_dict, outfile) # This approach can be used as an alternative for object sharing # This creates a file with hash as name and .rd as extension open('%s.rd' % self.file_path, 'w').close() self.file_lock.release() def load(self): # This is the function which is called for every request. If file is not # found in cache, then a file lock is created for that and a None is # returned. try: dummy = self.file_path except Exception: self.calculate_hash() finally: if os.path.isfile(self.file_path): return self.create_response_object() else: self.file_lock = FileLock(self.file_path) self.file_lock.acquire() # For handling race conditions if os.path.isfile(self.file_path): self.file_lock.release() return self.create_response_object() else: return None
class CacheHandler(object): """ This class will be used by the request handler to either load or dump to cache. Main things that are done here :- * The request_hash is generated here * The file locks are managed here * .rd files are created here """ def __init__(self, cache_dir, request, cookie_regex, blacklist): # Initialized with the root cache directory, HTTP request object, cookie_regex, blacklist boolean self.request = request self.cache_dir = cache_dir self.cookie_regex = cookie_regex self.blacklist = blacklist def calculate_hash(self, callback=None): # Based on blacklist boolean the cookie regex is used for filtering of cookies in request_hash # generation. However the original request is untampered. cookie_string = '' try: if self.blacklist: string_with_spaces = re.sub(self.cookie_regex, '', self.request.headers['Cookie']).strip() cookie_string = ''.join(string_with_spaces.split(' ')) else: cookies_matrix = re.findall(self.cookie_regex, self.request.headers['Cookie']) for cookie_tuple in cookies_matrix: for item in cookie_tuple: if item: cookie_string += item.strip() except KeyError: pass request_mod = self.request.method + self.request.url + self.request.version request_mod = request_mod + self.request.body + cookie_string # To support proxying of ua-tester try: request_mod = request_mod + self.request.headers["User-Agent"] except KeyError: pass # Websocket caching technique try: request_mod = request_mod + self.request.headers["Sec-Websocket-Key"] except KeyError: pass md5_hash = hashlib.md5() md5_hash.update(request_mod) self.request_hash = md5_hash.hexdigest() # This is the path to file inside url folder. This can be used for updating a html file self.file_path = os.path.join(self.cache_dir, self.request_hash) if callback: callback(self.request_hash) def create_response_object(self): return response_from_cache(os.path.join(self.cache_dir, self.request_hash)) def dump(self, response): # This function takes in a HTTPResponse object and dumps the request # and response data. It also creates a .rd file with same file name # This is used by transaction logger try: response_body = unicode(self.request.response_buffer, "utf-8") binary_response = False except UnicodeDecodeError: response_body = base64.b64encode(self.request.response_buffer) binary_response = True cache_dict = { 'request_method':self.request.method, 'request_url':self.request.url, 'request_version':self.request.version, 'request_headers':dict(self.request.headers), 'request_body':self.request.body, 'request_time':response.request_time, 'request_local_timestamp':self.request.local_timestamp.isoformat(), 'response_code':response.code, 'response_headers':dict(response.headers), 'response_body':response_body, 'response_cookies':response.headers.get_list("Set-Cookie"), 'binary_response':binary_response } with open(self.file_path, 'w') as outfile: json.dump(cache_dict, outfile) # This approach can be used as an alternative for object sharing # This creates a file with hash as name and .rd as extension open(self.file_path + '.rd', 'w').close() self.file_lock.release() def load(self): # This is the function which is called for every request. If file is not # found in cache, then a file lock is created for that and a None is # returned. try: dummy = self.file_path except Exception: self.calculate_hash() finally: if os.path.isfile(self.file_path): return(self.create_response_object()) else: self.file_lock = FileLock(self.file_path) self.file_lock.acquire() # For handling race conditions if os.path.isfile(self.file_path): self.file_lock.release() return(self.create_response_object()) else: return None