def test_detector_prop_update_extension(self): from SnapSearch import Detector, error detector = Detector(check_file_extensions=True) request = self.ADSBOT_GOOG_MP3 self.assertFalse(detector(request)) detector.extensions['generic'].append("mp3") self.assertTrue(detector(request)) # try to damange the structure of ``extensions`` detector.extensions['generic'] = None self.assertRaises(error.SnapSearchError, detector, request) pass # void return
def test_detector_prop_update_robots(self): from SnapSearch import Detector, error detector = Detector() request = self.ADSBOT_GOOG_GET # append a robot to the white list self.assertTrue(detector(request)) detector.robots['ignore'].append("Adsbot-Google") self.assertFalse(detector(request)) # try to damange the structure of ``robots`` detector.robots['ignore'] = None self.assertRaises(error.SnapSearchError, detector, request) pass # void return
def test_detector_call_return_escape_frag_vars(self): from SnapSearch import Detector detector = Detector() request = self.ESCAPE_FRAG_VARS self.assertEqual(detector(request), "http://localhost/snapsearch/path1" "?key1=value1#!/path2?key2=value2") pass # void return
def test_detector_call_bad_request_non_http(self): from SnapSearch import Detector detector = Detector() request = {'SERVER_NAME': "localhost", 'SERVER_PORT': "80", 'wsgi.url_scheme': "non-http", } self.assertFalse(detector(request)) # should *not* be intercepted pass # void return
def setUpClass(cls): from SnapSearch import Client, Detector cls.api_email, cls.api_key = _config.get_api_credentials() cls.client = Client(cls.api_email, cls.api_key, {'test': 1}) cls.detector = Detector() cls.ADSBOT_GOOG_GET = json.loads(_config.DATA_ADSBOT_GOOG_GET) cls.FIREFOX_REQUEST = json.loads(_config.DATA_FIREFOX_REQUEST) cls.NORMAL_SITE_URL = "http://snapsearch.io/" pass # void return
def test_detector_init_external_robots_json(self): # initialize with external `robots.json` from SnapSearch import Detector detector = Detector(robots_json=self.EXTERNAL_ROBOTS_JSON) self.assertTrue(detector.robots) self.assertTrue("Testbot" in detector.robots['match']) # non-existent json file self.assertRaises( IOError, Detector, robots_json=self.NON_EXISTENT_JSON) pass # void return
def test_detector_init(self): # initialize with default arguments from SnapSearch import Detector detector = Detector() # make sure the default `robots.json` is loaded self.assertTrue(hasattr(detector, 'robots')) self.assertTrue(detector.robots) self.assertTrue("Bingbot" in detector.robots['match']) # make sure the default `extensions.json` is loaded self.assertTrue(hasattr(detector, 'extensions')) self.assertTrue(detector.extensions) self.assertTrue("html" in detector.extensions['generic']) pass # void return
def test_detector_init_external_extensions_json(self): # initialize with external `extensions.json` from SnapSearch import Detector detector = Detector( check_file_extensions=True, extensions_json=self.EXTERNAL_EXTENSIONS_JSON) self.assertTrue(detector.extensions) self.assertTrue("test" in detector.extensions['generic']) # specified `extensions.json` but `check_file_extensions` is False self.assertRaises( AssertionError, Detector, check_file_extensions=False, extensions_json=self.EXTERNAL_EXTENSIONS_JSON) # non-existent json file self.assertRaises( IOError, Detector, check_file_extensions=True, extensions_json=self.NON_EXISTENT_JSON) pass # void return
def setUpClass(cls): from SnapSearch import Client, Detector cls.api_email, cls.api_key = _config.get_api_credentials() cls.client = Client(cls.api_email, cls.api_key, {'test': 1}) cls.detector = Detector() cls.ADSBOT_GOOG_GET = json.loads(_config.DATA_ADSBOT_GOOG_GET) cls.FIREFOX_REQUEST = json.loads(_config.DATA_FIREFOX_REQUEST) cls.NORMAL_SITE_URL = "http://snapsearch.io/" cls.NORMAL_SITE_ENVIRON = { 'HTTP_USER_AGENT': "AdsBot-Google", 'SERVER_NAME': "snapsearch.io", 'SERVER_PORT': "80", 'SCRIPT_NAME': "/", 'PATH_INFO': "", 'REQUEST_METHOD': "GET", 'SERVER_PROTOCOL': "HTTP/1.1", 'QUERY_STRING': "", 'GATEWAY_INTERFACE': "CGI/1.1", 'HTTPS': "off", } pass # void return
def test_detector_call_snapsearch_bot(self): from SnapSearch import Detector detector = Detector() request = self.SNAPSEARCH_GET self.assertFalse(detector(request)) # should *not* be intercepted pass # void return
def test_detector_call_search_engine_bot(self): from SnapSearch import Detector detector = Detector() request = self.ADSBOT_GOOG_GET self.assertTrue(detector(request)) # should be intercepted pass # void return
def test_detector_call_normal_browser_safari(self): from SnapSearch import Detector detector = Detector() request = self.SAFARI_REQUEST self.assertFalse(detector(request)) # should *not* be intercepted pass # void return
def test_detector_call_search_engine_bot_matched_route(self): from SnapSearch import Detector detector = Detector(matched_routes=["^\/other", "^\/matched", ]) request = self.MSNBOT_MATCHED self.assertTrue(detector(request)) # should be intercepted pass # void return
def test_detector_call_search_engine_bot_ignored_route(self): from SnapSearch import Detector detector = Detector(ignored_routes=["^\/other", "^\/ignored", ]) request = self.GOOGBOT_IGNORED self.assertFalse(detector(request)) # should *not* be intercepted pass # void return
def test_detector_call_search_engine_bot_non_matched_route(self): from SnapSearch import Detector detector = Detector(matched_routes=["^\/x", "^\/non_matched_route", ]) request = self.MSNBOT_MATCHED self.assertFalse(detector(request)) # should *not* be intercepted pass # void return
def test_detector_call_return_escape_frag_null(self): from SnapSearch import Detector detector = Detector() request = self.ESCAPE_FRAG_NULL self.assertEqual(detector(request), "http://localhost/snapsearch") pass # void return
def test_detector_call_check_file_ext_non_existent(self): from SnapSearch import Detector detector = Detector(check_file_extensions=True) request = self.ADSBOT_GOOG_HTML self.assertTrue(detector(request)) # should be intercepted pass # void return
def test_detector_call_bad_request_no_method(self): from SnapSearch import Detector detector = Detector() request = {'SERVER_NAME': "localhost", 'SERVER_PORT': "80"} self.assertFalse(detector(request)) # should *not* be intercepted pass # void return
def test_detector_call_search_engine_bot_post(self): from SnapSearch import Detector detector = Detector() request = self.ADSBOT_GOOG_POST self.assertFalse(detector(request)) # should *not* be intercepted pass # void return
def hello_world(): msg = b"Hello World!" sys.stdout.write(b"Status: 200 OK\r\n") sys.stdout.write(b"Content-Type: text/html; charset=utf-8\r\n") sys.stdout.write(b"Content-Length: ") sys.stdout.write(bytes(len(msg))) sys.stdout.write(b"\r\n\r\n") sys.stdout.write(msg) sys.stdout.write(b"\r\n") return 0 if __name__ == '__main__': # load SnapSearch API credentials import os credentials = os.environ.get('SNAPSEARCH_API_CREDENTIALS', ":") api_email, sep, api_key = credentials.partition(":") # initialize the interceptor from SnapSearch import Client, Detector, Interceptor interceptor = Interceptor(Client(api_email, api_key), Detector()) # deploy the interceptor from SnapSearch.cgi import InterceptorController InterceptorController(interceptor).start() # start servicing sys.exit(hello_world())
def test_detector_call_check_file_ext_ineligible(self): from SnapSearch import Detector detector = Detector(check_file_extensions=True) request = self.ADSBOT_GOOG_MP3 self.assertFalse(detector(request)) # should *not* be intercepted pass # void return