def setUp(self): self.kafka_monitor = KafkaMonitor("localsettings.py") new_settings = self.kafka_monitor.wrapper.load("localsettings.py") new_settings['KAFKA_INCOMING_TOPIC'] = "demo.incoming_test" new_settings['KAFKA_CONSUMER_TIMEOUT'] = 5000 new_settings['STATS_TOTAL'] = False new_settings['STATS_PLUGINS'] = False new_settings['PLUGINS'] = { 'plugins.scraper_handler.ScraperHandler': None, 'plugins.action_handler.ActionHandler': None, 'tests.online.CustomHandler': 100, } self.kafka_monitor.wrapper.load = MagicMock(return_value=new_settings) self.kafka_monitor.setup() @MethodTimer.timeout(10, False) def timer(): self.kafka_monitor._setup_kafka() return True retval = timer() if not retval: self.fail("Unable to connect to Kafka") self.kafka_monitor._load_plugins() self.kafka_monitor._setup_stats() self.redis_conn = redis.Redis( host=self.kafka_monitor.settings['REDIS_HOST'], port=self.kafka_monitor.settings['REDIS_PORT'], db=self.kafka_monitor.settings['REDIS_DB'])
def setUp(self): self.kafka_monitor = KafkaMonitor("localsettings.py") new_settings = self.kafka_monitor.wrapper.load("localsettings.py") new_settings['KAFKA_INCOMING_TOPIC'] = "demo.incoming_test" new_settings['STATS_TOTAL'] = False new_settings['STATS_PLUGINS'] = False new_settings['PLUGINS'] = { 'plugins.scraper_handler.ScraperHandler': None, 'plugins.action_handler.ActionHandler': None, 'tests.tests_online.CustomHandler': 100, } self.kafka_monitor.wrapper.load = MagicMock(return_value=new_settings) self.kafka_monitor.setup() self.kafka_monitor._setup_kafka() self.kafka_monitor._load_plugins() self.kafka_monitor._setup_stats() self.kafka_monitor.logger = MagicMock() print self.kafka_monitor.settings['KAFKA_HOSTS'] print self.kafka_monitor.settings['REDIS_HOST'] print self.kafka_monitor.settings['REDIS_PORT'] self.redis_conn = redis.Redis( host=self.kafka_monitor.settings['REDIS_HOST'], port=self.kafka_monitor.settings['REDIS_PORT'])
def setUp(self): self.kafka_monitor = KafkaMonitor("localsettings.py") new_settings = self.kafka_monitor.wrapper.load("localsettings.py") new_settings['KAFKA_INCOMING_TOPIC'] = "demo.incoming_test" new_settings['STATS_TOTAL'] = False new_settings['STATS_PLUGINS'] = False new_settings['PLUGINS'] = { 'plugins.scraper_handler.ScraperHandler': None, 'plugins.action_handler.ActionHandler': None, 'tests.tests_online.CustomHandler': 100, } self.kafka_monitor.wrapper.load = MagicMock(return_value=new_settings) self.kafka_monitor.setup() self.kafka_monitor._setup_kafka() self.kafka_monitor._load_plugins() self.kafka_monitor._setup_stats() self.kafka_monitor.logger = MagicMock() self.redis_conn = redis.Redis( host=self.kafka_monitor.settings['REDIS_HOST'], port=self.kafka_monitor.settings['REDIS_PORT'])
class TestKafkaMonitor(TestCase): def setUp(self): self.kafka_monitor = KafkaMonitor("localsettings.py") new_settings = self.kafka_monitor.wrapper.load("localsettings.py") new_settings['KAFKA_INCOMING_TOPIC'] = "demo.incoming_test" new_settings['STATS_TOTAL'] = False new_settings['STATS_PLUGINS'] = False new_settings['PLUGINS'] = { 'plugins.scraper_handler.ScraperHandler': None, 'plugins.action_handler.ActionHandler': None, 'tests.tests_online.CustomHandler': 100, } self.kafka_monitor.wrapper.load = MagicMock(return_value=new_settings) self.kafka_monitor.setup() self.kafka_monitor._setup_kafka() self.kafka_monitor._load_plugins() self.kafka_monitor._setup_stats() self.kafka_monitor.logger = MagicMock() self.redis_conn = redis.Redis( host=self.kafka_monitor.settings['REDIS_HOST'], port=self.kafka_monitor.settings['REDIS_PORT']) def test_feed(self): json_req = "{\"uuid\":\"mytestid\"," \ "\"appid\":\"testapp\",\"action\":\"info\",\"spiderid\":\"link\"}" parsed = json.loads(json_req) self.kafka_monitor.feed(parsed) def test_run(self): self.kafka_monitor._process_messages() self.assertTrue(self.redis_conn.exists("cluster:test")) value = self.redis_conn.get("cluster:test") self.assertEqual(value, "mytestid") def tearDown(self): self.redis_conn.delete("cluster:test")
def setUp(self): self.kafka_monitor = KafkaMonitor("settings.py", True) self.kafka_monitor.settings = self.kafka_monitor.wrapper.load("settings.py") self.kafka_monitor.logger = MagicMock()
class TestKafkaMonitor(TestCase): def setUp(self): self.kafka_monitor = KafkaMonitor("settings.py", True) self.kafka_monitor.settings = self.kafka_monitor.wrapper.load("settings.py") self.kafka_monitor.logger = MagicMock() def test_load_plugins(self): # test loading default plugins assert_keys = [100, 200, 300] self.kafka_monitor._load_plugins() self.assertEqual(self.kafka_monitor.plugins_dict.keys(), assert_keys) # test removing a plugin from settings assert_keys = [200,300] self.kafka_monitor.settings['PLUGINS'] \ ['plugins.scraper_handler.ScraperHandler'] = None self.kafka_monitor._load_plugins() self.assertEqual(self.kafka_monitor.plugins_dict.keys(), assert_keys) self.kafka_monitor.settings['PLUGINS'] \ ['plugins.scraper_handler.ScraperHandler'] = 100 # fail if the class is not found self.kafka_monitor.settings['PLUGINS'] \ ['plugins.crazy_class.CrazyHandler'] = 300 self.assertRaises(ImportError, self.kafka_monitor._load_plugins) del self.kafka_monitor.settings['PLUGINS'] \ ['plugins.crazy_class.CrazyHandler'] # Throw error if schema could not be found self.kafka_monitor.settings['PLUGINS'] \ ['tests.tests_offline.ExampleHandler'] = 300, self.assertRaises(IOError, self.kafka_monitor._load_plugins) del self.kafka_monitor.settings['PLUGINS'] \ ['tests.tests_offline.ExampleHandler'] def test_load_stats_total(self): # test no rolling stats, only total self.kafka_monitor.stats_dict = {} self.kafka_monitor.settings['STATS_TIMES'] = [] self.kafka_monitor._setup_stats_total(MagicMock()) self.assertEquals(self.kafka_monitor.stats_dict['total'].keys(), ['lifetime']) self.assertEquals(self.kafka_monitor.stats_dict['fail'].keys(), ['lifetime']) # test good/bad rolling stats self.kafka_monitor.stats_dict = {} self.kafka_monitor.settings['STATS_TIMES'] = [ 'SECONDS_15_MINUTE', 'SECONDS_1_HOUR', 'SECONDS_DUMB', ] good = [ 'lifetime', # for totals, not DUMB 900, 3600, ] self.kafka_monitor._setup_stats_total(MagicMock()) self.assertEquals( sorted(self.kafka_monitor.stats_dict['total'].keys()), sorted(good)) self.assertEquals( sorted(self.kafka_monitor.stats_dict['fail'].keys()), sorted(good)) k1 = 'stats:kafka-monitor:total' k2 = 'stats:kafka-monitor:fail' for time_key in self.kafka_monitor.stats_dict['total']: if time_key == 0: self.assertEquals( self.kafka_monitor.stats_dict['total'][0].key, '{k}:lifetime'.format(k=k1) ) else: self.assertEquals( self.kafka_monitor.stats_dict['total'][time_key].key, '{k}:{t}'.format(k=k1, t=time_key) ) for time_key in self.kafka_monitor.stats_dict['fail']: if time_key == 0: self.assertEquals( self.kafka_monitor.stats_dict['fail'][0].key, '{k}:lifetime'.format(k=k2) ) else: self.assertEquals( self.kafka_monitor.stats_dict['fail'][time_key].key, '{k}:{t}'.format(k=k2, t=time_key) ) def test_load_stats_plugins(self): # lets assume we are loading the default plugins self.kafka_monitor._load_plugins() # test no rolling stats self.kafka_monitor.stats_dict = {} self.kafka_monitor.settings['STATS_TIMES'] = [] self.kafka_monitor._setup_stats_plugins(MagicMock()) defaults = [ 'ScraperHandler', 'ActionHandler', 'StatsHandler' ] self.assertEquals( sorted(self.kafka_monitor.stats_dict['plugins'].keys()), sorted(defaults)) for key in self.kafka_monitor.plugins_dict: plugin_name = self.kafka_monitor.plugins_dict[key]['instance'].__class__.__name__ self.assertEquals( self.kafka_monitor.stats_dict['plugins'][plugin_name].keys(), ['lifetime']) # test good/bad rolling stats self.kafka_monitor.stats_dict = {} self.kafka_monitor.settings['STATS_TIMES'] = [ 'SECONDS_15_MINUTE', 'SECONDS_1_HOUR', 'SECONDS_DUMB', ] good = [ 'lifetime', # for totals, not DUMB 900, 3600, ] self.kafka_monitor._setup_stats_plugins(MagicMock()) self.assertEquals( sorted(self.kafka_monitor.stats_dict['plugins'].keys()), sorted(defaults)) for key in self.kafka_monitor.plugins_dict: plugin_name = self.kafka_monitor.plugins_dict[key]['instance'].__class__.__name__ self.assertEquals( sorted(self.kafka_monitor.stats_dict['plugins'][plugin_name].keys()), sorted(good)) for plugin_key in self.kafka_monitor.stats_dict['plugins']: k1 = 'stats:kafka-monitor:{p}'.format(p=plugin_key) for time_key in self.kafka_monitor.stats_dict['plugins'][plugin_key]: if time_key == 0: self.assertEquals( self.kafka_monitor.stats_dict['plugins'][plugin_key][0].key, '{k}:lifetime'.format(k=k1) ) else: self.assertEquals( self.kafka_monitor.stats_dict['plugins'][plugin_key][time_key].key, '{k}:{t}'.format(k=k1, t=time_key) ) def test_process_messages(self): self.kafka_monitor.consumer = MagicMock() self.kafka_monitor.stats_dict = {} # handle kafka offset errors self.kafka_monitor.consumer.get_messages = MagicMock( side_effect=OffsetOutOfRangeError("1")) try: self.kafka_monitor._process_messages() except OffsetOutOfRangeError: self.fail("_process_messages did not handle Kafka Offset Error") # handle bad json errors message_string = "{\"sdasdf sd}" # fake class so we can use dot notation class a: pass m = a() m.message = a() m.message.value = message_string messages = [m] self.kafka_monitor.consumer.get_messages = MagicMock( return_value=messages) try: self.kafka_monitor._process_messages() except OffsetOutOfRangeError: self.fail("_process_messages did not handle bad json") # set up to process messages self.kafka_monitor.consumer.get_messages = MagicMock( return_value=messages) self.kafka_monitor._load_plugins() self.kafka_monitor.validator = self.kafka_monitor.extend_with_default(Draft4Validator) self.kafka_monitor.plugins_dict.items()[0][1]['instance'].handle = MagicMock(side_effect=AssertionError("scrape")) self.kafka_monitor.plugins_dict.items()[1][1]['instance'].handle = MagicMock(side_effect=AssertionError("action")) # test that handler function is called for the scraper message_string = "{\"url\":\"www.stuff.com\",\"crawlid\":\"1234\"," \ "\"appid\":\"testapp\"}" m.message.value = message_string messages = [m] try: self.kafka_monitor._process_messages() self.fail("Scrape not called") except AssertionError as e: self.assertEquals("scrape", e.message) # test that handler function is called for the actions message_string = "{\"uuid\":\"blah\",\"crawlid\":\"1234\"," \ "\"appid\":\"testapp\",\"action\":\"info\",\"spiderid\":\"link\"}" m.message.value = message_string messages = [m] try: self.kafka_monitor._process_messages() self.fail("Action not called") except AssertionError as e: self.assertEquals("action", e.message)
def main(): kafka_monitor = KafkaMonitor(sys.argv[1:]) kafka_monitor.run()
def setUp(self): self.kafka_monitor = KafkaMonitor("settings.py", True) self.kafka_monitor.settings = self.kafka_monitor.wrapper.load( "settings.py") self.kafka_monitor.logger = MagicMock()
class TestKafkaMonitor(TestCase): def setUp(self): self.kafka_monitor = KafkaMonitor("settings.py", True) self.kafka_monitor.settings = self.kafka_monitor.wrapper.load( "settings.py") self.kafka_monitor.logger = MagicMock() def test_load_plugins(self): # test loading default plugins assert_keys = [100, 200, 300] self.kafka_monitor._load_plugins() self.assertEqual(self.kafka_monitor.plugins_dict.keys(), assert_keys) # test removing a plugin from settings assert_keys = [200, 300] self.kafka_monitor.settings['PLUGINS'] \ ['plugins.scraper_handler.ScraperHandler'] = None self.kafka_monitor._load_plugins() self.assertEqual(self.kafka_monitor.plugins_dict.keys(), assert_keys) self.kafka_monitor.settings['PLUGINS'] \ ['plugins.scraper_handler.ScraperHandler'] = 100 # fail if the class is not found self.kafka_monitor.settings['PLUGINS'] \ ['plugins.crazy_class.CrazyHandler'] = 300 self.assertRaises(ImportError, self.kafka_monitor._load_plugins) del self.kafka_monitor.settings['PLUGINS'] \ ['plugins.crazy_class.CrazyHandler'] # Throw error if schema could not be found self.kafka_monitor.settings['PLUGINS'] \ ['tests.tests_offline.ExampleHandler'] = 300, self.assertRaises(IOError, self.kafka_monitor._load_plugins) del self.kafka_monitor.settings['PLUGINS'] \ ['tests.tests_offline.ExampleHandler'] def test_load_stats_total(self): # test no rolling stats, only total self.kafka_monitor.stats_dict = {} self.kafka_monitor.settings['STATS_TIMES'] = [] self.kafka_monitor._setup_stats_total(MagicMock()) self.assertEquals(self.kafka_monitor.stats_dict['total'].keys(), ['lifetime']) self.assertEquals(self.kafka_monitor.stats_dict['fail'].keys(), ['lifetime']) # test good/bad rolling stats self.kafka_monitor.stats_dict = {} self.kafka_monitor.settings['STATS_TIMES'] = [ 'SECONDS_15_MINUTE', 'SECONDS_1_HOUR', 'SECONDS_DUMB', ] good = [ 'lifetime', # for totals, not DUMB 900, 3600, ] self.kafka_monitor._setup_stats_total(MagicMock()) self.assertEquals( sorted(self.kafka_monitor.stats_dict['total'].keys()), sorted(good)) self.assertEquals(sorted(self.kafka_monitor.stats_dict['fail'].keys()), sorted(good)) k1 = 'stats:kafka-monitor:total' k2 = 'stats:kafka-monitor:fail' for time_key in self.kafka_monitor.stats_dict['total']: if time_key == 0: self.assertEquals( self.kafka_monitor.stats_dict['total'][0].key, '{k}:lifetime'.format(k=k1)) else: self.assertEquals( self.kafka_monitor.stats_dict['total'][time_key].key, '{k}:{t}'.format(k=k1, t=time_key)) for time_key in self.kafka_monitor.stats_dict['fail']: if time_key == 0: self.assertEquals(self.kafka_monitor.stats_dict['fail'][0].key, '{k}:lifetime'.format(k=k2)) else: self.assertEquals( self.kafka_monitor.stats_dict['fail'][time_key].key, '{k}:{t}'.format(k=k2, t=time_key)) def test_load_stats_plugins(self): # lets assume we are loading the default plugins self.kafka_monitor._load_plugins() # test no rolling stats self.kafka_monitor.stats_dict = {} self.kafka_monitor.settings['STATS_TIMES'] = [] self.kafka_monitor._setup_stats_plugins(MagicMock()) defaults = ['ScraperHandler', 'ActionHandler', 'StatsHandler'] self.assertEquals( sorted(self.kafka_monitor.stats_dict['plugins'].keys()), sorted(defaults)) for key in self.kafka_monitor.plugins_dict: plugin_name = self.kafka_monitor.plugins_dict[key][ 'instance'].__class__.__name__ self.assertEquals( self.kafka_monitor.stats_dict['plugins'][plugin_name].keys(), ['lifetime']) # test good/bad rolling stats self.kafka_monitor.stats_dict = {} self.kafka_monitor.settings['STATS_TIMES'] = [ 'SECONDS_15_MINUTE', 'SECONDS_1_HOUR', 'SECONDS_DUMB', ] good = [ 'lifetime', # for totals, not DUMB 900, 3600, ] self.kafka_monitor._setup_stats_plugins(MagicMock()) self.assertEquals( sorted(self.kafka_monitor.stats_dict['plugins'].keys()), sorted(defaults)) for key in self.kafka_monitor.plugins_dict: plugin_name = self.kafka_monitor.plugins_dict[key][ 'instance'].__class__.__name__ self.assertEquals( sorted(self.kafka_monitor.stats_dict['plugins'] [plugin_name].keys()), sorted(good)) for plugin_key in self.kafka_monitor.stats_dict['plugins']: k1 = 'stats:kafka-monitor:{p}'.format(p=plugin_key) for time_key in self.kafka_monitor.stats_dict['plugins'][ plugin_key]: if time_key == 0: self.assertEquals( self.kafka_monitor.stats_dict['plugins'][plugin_key] [0].key, '{k}:lifetime'.format(k=k1)) else: self.assertEquals( self.kafka_monitor.stats_dict['plugins'][plugin_key] [time_key].key, '{k}:{t}'.format(k=k1, t=time_key)) def test_process_messages(self): self.kafka_monitor.consumer = MagicMock() self.kafka_monitor.stats_dict = {} # handle kafka offset errors self.kafka_monitor.consumer.get_messages = MagicMock( side_effect=OffsetOutOfRangeError("1")) try: self.kafka_monitor._process_messages() except OffsetOutOfRangeError: self.fail("_process_messages did not handle Kafka Offset Error") # handle bad json errors message_string = "{\"sdasdf sd}" # fake class so we can use dot notation class a: pass m = a() m.message = a() m.message.value = message_string messages = [m] self.kafka_monitor.consumer.get_messages = MagicMock( return_value=messages) try: self.kafka_monitor._process_messages() except OffsetOutOfRangeError: self.fail("_process_messages did not handle bad json") # set up to process messages self.kafka_monitor.consumer.get_messages = MagicMock( return_value=messages) self.kafka_monitor._load_plugins() self.kafka_monitor.validator = self.kafka_monitor.extend_with_default( Draft4Validator) self.kafka_monitor.plugins_dict.items( )[0][1]['instance'].handle = MagicMock( side_effect=AssertionError("scrape")) self.kafka_monitor.plugins_dict.items( )[1][1]['instance'].handle = MagicMock( side_effect=AssertionError("action")) # test that handler function is called for the scraper message_string = "{\"url\":\"www.stuff.com\",\"crawlid\":\"1234\"," \ "\"appid\":\"testapp\"}" m.message.value = message_string messages = [m] try: self.kafka_monitor._process_messages() self.fail("Scrape not called") except AssertionError as e: self.assertEquals("scrape", e.message) # test that handler function is called for the actions message_string = "{\"uuid\":\"blah\",\"crawlid\":\"1234\"," \ "\"appid\":\"testapp\",\"action\":\"info\",\"spiderid\":\"link\"}" m.message.value = message_string messages = [m] try: self.kafka_monitor._process_messages() self.fail("Action not called") except AssertionError as e: self.assertEquals("action", e.message)
class TestKafkaMonitor(TestCase): def setUp(self): self.kafka_monitor = KafkaMonitor("localsettings.py") new_settings = self.kafka_monitor.wrapper.load("localsettings.py") new_settings['KAFKA_INCOMING_TOPIC'] = "demo.incoming_test" new_settings['KAFKA_CONSUMER_TIMEOUT'] = 5000 new_settings['STATS_TOTAL'] = False new_settings['STATS_PLUGINS'] = False new_settings['PLUGINS'] = { 'plugins.scraper_handler.ScraperHandler': None, 'plugins.action_handler.ActionHandler': None, 'tests.online.CustomHandler': 100, } self.kafka_monitor.wrapper.load = MagicMock(return_value=new_settings) self.kafka_monitor.setup() @MethodTimer.timeout(10, False) def timer(): self.kafka_monitor._setup_kafka() return True retval = timer() if not retval: self.fail("Unable to connect to Kafka") self.kafka_monitor._load_plugins() self.kafka_monitor._setup_stats() self.redis_conn = redis.Redis( host=self.kafka_monitor.settings['REDIS_HOST'], port=self.kafka_monitor.settings['REDIS_PORT'], db=self.kafka_monitor.settings['REDIS_DB']) def test_feed(self): json_req = "{\"uuid\":\"mytestid\"," \ "\"appid\":\"testapp\",\"action\":\"info\",\"spiderid\":\"link\"}" parsed = json.loads(json_req) # ensure the group id is present so we pick up the 1st message self.kafka_monitor._process_messages() self.kafka_monitor.feed(parsed) def test_run(self): self.kafka_monitor._process_messages() self.assertTrue(self.redis_conn.exists("cluster:test")) value = self.redis_conn.get("cluster:test") self.assertEqual(value, "mytestid") def tearDown(self): self.redis_conn.delete("cluster:test") self.kafka_monitor.close()
#!/usr/bin/python from kafka.client import KafkaClient from kafka.producer import SimpleProducer import json import importlib from kafka_monitor import KafkaMonitor __KafkaMonitor = KafkaMonitor(settings_name='localsettings.py') __KafkaMonitor.setup() def _feed(settings_file, json_item): settings = importlib.import_module(settings_file[:-3]) kafka_conn = KafkaClient(settings.KAFKA_HOSTS) topic = settings.KAFKA_INCOMING_TOPIC producer = SimpleProducer(kafka_conn) print "=> feeding JSON request into {0}...".format(topic) print json.dumps(json_item, indent=4) kafka_conn.ensure_topic_exists(topic) producer.send_messages(topic, json.dumps(json_item)) print "=> done feeding request." def feed(settings_file, json_req): print('json_req==', json_req) try: parsed = json.loads(json_req) except ValueError: