Exemplo n.º 1
0
 def test_detector_prop_update_extension(self):
     from SnapSearch import Detector, error
     detector = Detector(check_file_extensions=True)
     request = self.ADSBOT_GOOG_MP3
     self.assertFalse(detector(request))
     detector.extensions['generic'].append("mp3")
     self.assertTrue(detector(request))
     # try to damange the structure of ``extensions``
     detector.extensions['generic'] = None
     self.assertRaises(error.SnapSearchError, detector, request)
     pass  # void return
Exemplo n.º 2
0
 def test_detector_prop_update_robots(self):
     from SnapSearch import Detector, error
     detector = Detector()
     request = self.ADSBOT_GOOG_GET
     # append a robot to the white list
     self.assertTrue(detector(request))
     detector.robots['ignore'].append("Adsbot-Google")
     self.assertFalse(detector(request))
     # try to damange the structure of ``robots``
     detector.robots['ignore'] = None
     self.assertRaises(error.SnapSearchError, detector, request)
     pass  # void return
Exemplo n.º 3
0
 def test_detector_call_return_escape_frag_vars(self):
     from SnapSearch import Detector
     detector = Detector()
     request = self.ESCAPE_FRAG_VARS
     self.assertEqual(detector(request), "http://localhost/snapsearch/path1"
                                         "?key1=value1#!/path2?key2=value2")
     pass  # void return
Exemplo n.º 4
0
 def test_detector_call_bad_request_non_http(self):
     from SnapSearch import Detector
     detector = Detector()
     request = {'SERVER_NAME': "localhost", 'SERVER_PORT': "80",
                'wsgi.url_scheme': "non-http", }
     self.assertFalse(detector(request))  # should *not* be intercepted
     pass  # void return
 def setUpClass(cls):
     from SnapSearch import Client, Detector
     cls.api_email, cls.api_key = _config.get_api_credentials()
     cls.client = Client(cls.api_email, cls.api_key, {'test': 1})
     cls.detector = Detector()
     cls.ADSBOT_GOOG_GET = json.loads(_config.DATA_ADSBOT_GOOG_GET)
     cls.FIREFOX_REQUEST = json.loads(_config.DATA_FIREFOX_REQUEST)
     cls.NORMAL_SITE_URL = "http://snapsearch.io/"
     pass  # void return
Exemplo n.º 6
0
 def test_detector_init_external_robots_json(self):
     # initialize with external `robots.json`
     from SnapSearch import Detector
     detector = Detector(robots_json=self.EXTERNAL_ROBOTS_JSON)
     self.assertTrue(detector.robots)
     self.assertTrue("Testbot" in detector.robots['match'])
     # non-existent json file
     self.assertRaises(
         IOError, Detector, robots_json=self.NON_EXISTENT_JSON)
     pass  # void return
Exemplo n.º 7
0
 def test_detector_init(self):
     # initialize with default arguments
     from SnapSearch import Detector
     detector = Detector()
     # make sure the default `robots.json` is loaded
     self.assertTrue(hasattr(detector, 'robots'))
     self.assertTrue(detector.robots)
     self.assertTrue("Bingbot" in detector.robots['match'])
     # make sure the default `extensions.json` is loaded
     self.assertTrue(hasattr(detector, 'extensions'))
     self.assertTrue(detector.extensions)
     self.assertTrue("html" in detector.extensions['generic'])
     pass  # void return
Exemplo n.º 8
0
 def test_detector_init_external_extensions_json(self):
     # initialize with external `extensions.json`
     from SnapSearch import Detector
     detector = Detector(
         check_file_extensions=True,
         extensions_json=self.EXTERNAL_EXTENSIONS_JSON)
     self.assertTrue(detector.extensions)
     self.assertTrue("test" in detector.extensions['generic'])
     # specified `extensions.json` but `check_file_extensions` is False
     self.assertRaises(
         AssertionError, Detector, check_file_extensions=False,
         extensions_json=self.EXTERNAL_EXTENSIONS_JSON)
     # non-existent json file
     self.assertRaises(
         IOError, Detector, check_file_extensions=True,
         extensions_json=self.NON_EXISTENT_JSON)
     pass  # void return
 def setUpClass(cls):
     from SnapSearch import Client, Detector
     cls.api_email, cls.api_key = _config.get_api_credentials()
     cls.client = Client(cls.api_email, cls.api_key, {'test': 1})
     cls.detector = Detector()
     cls.ADSBOT_GOOG_GET = json.loads(_config.DATA_ADSBOT_GOOG_GET)
     cls.FIREFOX_REQUEST = json.loads(_config.DATA_FIREFOX_REQUEST)
     cls.NORMAL_SITE_URL = "http://snapsearch.io/"
     cls.NORMAL_SITE_ENVIRON = {
         'HTTP_USER_AGENT': "AdsBot-Google",
         'SERVER_NAME': "snapsearch.io",
         'SERVER_PORT': "80",
         'SCRIPT_NAME': "/",
         'PATH_INFO': "",
         'REQUEST_METHOD': "GET",
         'SERVER_PROTOCOL': "HTTP/1.1",
         'QUERY_STRING': "",
         'GATEWAY_INTERFACE': "CGI/1.1",
         'HTTPS': "off",
     }
     pass  # void return
Exemplo n.º 10
0
 def test_detector_call_snapsearch_bot(self):
     from SnapSearch import Detector
     detector = Detector()
     request = self.SNAPSEARCH_GET
     self.assertFalse(detector(request))  # should *not* be intercepted
     pass  # void return
Exemplo n.º 11
0
 def test_detector_call_search_engine_bot(self):
     from SnapSearch import Detector
     detector = Detector()
     request = self.ADSBOT_GOOG_GET
     self.assertTrue(detector(request))  # should be intercepted
     pass  # void return
Exemplo n.º 12
0
 def test_detector_call_normal_browser_safari(self):
     from SnapSearch import Detector
     detector = Detector()
     request = self.SAFARI_REQUEST
     self.assertFalse(detector(request))  # should *not* be intercepted
     pass  # void return
Exemplo n.º 13
0
 def test_detector_call_search_engine_bot_matched_route(self):
     from SnapSearch import Detector
     detector = Detector(matched_routes=["^\/other", "^\/matched", ])
     request = self.MSNBOT_MATCHED
     self.assertTrue(detector(request))  # should be intercepted
     pass  # void return
Exemplo n.º 14
0
 def test_detector_call_search_engine_bot_ignored_route(self):
     from SnapSearch import Detector
     detector = Detector(ignored_routes=["^\/other", "^\/ignored", ])
     request = self.GOOGBOT_IGNORED
     self.assertFalse(detector(request))  # should *not* be intercepted
     pass  # void return
Exemplo n.º 15
0
 def test_detector_call_search_engine_bot_non_matched_route(self):
     from SnapSearch import Detector
     detector = Detector(matched_routes=["^\/x", "^\/non_matched_route", ])
     request = self.MSNBOT_MATCHED
     self.assertFalse(detector(request))  # should *not* be intercepted
     pass  # void return
Exemplo n.º 16
0
 def test_detector_call_return_escape_frag_null(self):
     from SnapSearch import Detector
     detector = Detector()
     request = self.ESCAPE_FRAG_NULL
     self.assertEqual(detector(request), "http://localhost/snapsearch")
     pass  # void return
Exemplo n.º 17
0
 def test_detector_call_check_file_ext_non_existent(self):
     from SnapSearch import Detector
     detector = Detector(check_file_extensions=True)
     request = self.ADSBOT_GOOG_HTML
     self.assertTrue(detector(request))  # should be intercepted
     pass  # void return
Exemplo n.º 18
0
 def test_detector_call_bad_request_no_method(self):
     from SnapSearch import Detector
     detector = Detector()
     request = {'SERVER_NAME': "localhost", 'SERVER_PORT': "80"}
     self.assertFalse(detector(request))  # should *not* be intercepted
     pass  # void return
Exemplo n.º 19
0
 def test_detector_call_search_engine_bot_post(self):
     from SnapSearch import Detector
     detector = Detector()
     request = self.ADSBOT_GOOG_POST
     self.assertFalse(detector(request))  # should *not* be intercepted
     pass  # void return
Exemplo n.º 20
0
def hello_world():
    msg = b"Hello World!"
    sys.stdout.write(b"Status: 200 OK\r\n")
    sys.stdout.write(b"Content-Type: text/html; charset=utf-8\r\n")
    sys.stdout.write(b"Content-Length: ")
    sys.stdout.write(bytes(len(msg)))
    sys.stdout.write(b"\r\n\r\n")
    sys.stdout.write(msg)
    sys.stdout.write(b"\r\n")
    return 0


if __name__ == '__main__':

    # load SnapSearch API credentials
    import os
    credentials = os.environ.get('SNAPSEARCH_API_CREDENTIALS', ":")
    api_email, sep, api_key = credentials.partition(":")

    # initialize the interceptor
    from SnapSearch import Client, Detector, Interceptor
    interceptor = Interceptor(Client(api_email, api_key), Detector())

    # deploy the interceptor
    from SnapSearch.cgi import InterceptorController
    InterceptorController(interceptor).start()

    # start servicing
    sys.exit(hello_world())
Exemplo n.º 21
0
 def test_detector_call_check_file_ext_ineligible(self):
     from SnapSearch import Detector
     detector = Detector(check_file_extensions=True)
     request = self.ADSBOT_GOOG_MP3
     self.assertFalse(detector(request))  # should *not* be intercepted
     pass  # void return