def ingest_to_db(model, session, **kwargs): if not session: from cqlengine import connection LOG.info("No c* session found.. creating one.") connection.setup([config.get('cassandra', 'db_host')], config.get('cassandra', 'keyspace')) model.create(**kwargs)
def request_stats_mfc_cb(client, device, sample_id, q_key): req_uri = '/admin/agentd_comm' req_body = """<mfc-request><header><type>GET</type></header> <data>stats mfc-cluster mfc</data></mfc-request>""" req_headers = { "Content-Type": "application/x-www-form-urlencoded", "Content-Length": len(req_body), "Accept": "*/*", "connection": "Keep-Alive", } cleanup_pattern = re.compile(r"\n\s*") #Default response. resp = """<?xml version="1.0"?><mfc-response><header><status><code>504</code> <message>No Response</message></status></header></mfc-response>""" try: LOG.info("Sending stat request to agentd %s %s %s " % device) agentd_resp = client.post(req_uri, body=req_body, headers=req_headers) resp = agentd_resp.read() resp = cleanup_pattern.sub("", resp) except: LOG.error("Stat request timedout for device: %s %s %s " % device) finally: r.rpush(q_key, [device, sample_id, resp]) return
def request_config_mfc_cb(client, device, q_key): req_uri = '/admin/agentd_comm' req_body = """<mfc-request><header><type>GET</type></header> <data>running-config mfc-cluster mfc</data></mfc-request>""" req_headers = { "Content-Type": "application/x-www-form-urlencoded", "Content-Length": len(req_body), "Accept": "*/*", "connection": "Keep-Alive" } #Default response. resp = """<?xml version="1.0"?><mfc-response><header><status><code>504</code> <message>No Response</message></status></header></mfc-response>""" try: LOG.info("Sending config sync request to agentd device: %s %s %s " % device) agentd_resp = client.post(req_uri, body=req_body, headers=req_headers) resp = agentd_resp.read() except: LOG.error("Config sync request timed out for device: %s %s %s " % device) finally: r.rpush(q_key, [device, resp]) return
def parse_counters(data=None): if data is None: data = r.blpop(config.get('constants', 'REDIS_XML_QUEUE_KEY')) LOG.debug(data) data = eval(data[1]) p_obj = Parser.parse_mfc_counters(data[0], data[1], data[2]) r.rpush(config.get('constants', 'REDIS_PARSER_QUEUE_KEY'), Serialize.to_json(p_obj)) return p_obj
def recheck_unsync_devices(): unsync_list = List(key=config.get('constants', 'REDIS_UNSYNC_DEV_LIST_KEY'), redis=r) recheck_devices = [] while len(unsync_list) > 0: recheck_devices.append(unsync_list.pop()) LOG.info("Processing unsync device list") recheck_task = chain(request_cluster_config.s(recheck_devices), update_unsync_list.s()) recheck_task.apply_async()
def run_store_ingestion(): LOG.info("Starting Storage task") store_mfc_task = store_mfc_stats.apply_async(args=[], queue='store', routing_key='store.mfc_stats') store_cluster_task = store_cluster_stats.apply_async(args=[], queue='store', routing_key='store.cluster_stats') store_mfc_conf_task = store_mfc_config.apply_async(args=[], queue='store', routing_key='store.mfc_conf') LOG.info("Store MFC stats task runner with task ID: " + store_mfc_task.task_id) LOG.info("Store Cluster stats task runner with task ID: " + store_cluster_task.task_id) LOG.info("Store MFC Conf task runner with task ID: " + store_mfc_conf_task.task_id)
def connect_mysql(): import MySQLdb import _mysql_exceptions try: mysql_db = MySQLdb.connect(host=config.get('mysql', 'db_host'), port=int(config.get('mysql', 'db_port')), user=config.get('mysql', 'db_user'), passwd=config.get('mysql', 'db_password'), db=config.get('mysql', 'db_name'), connect_timeout=config.get('constants', 'MYSQL_CONNECT_TIMEOUT'), charset='utf8', use_unicode=True) return mysql_db except _mysql_exceptions.OperationalError as e: LOG.error(e)
def run_collector(): LOG.info("Starting collector..") gevent.signal(signal.SIGQUIT, gevent.kill) redis_flush_keys() get_device_list() create_cluster_tables() create_daily_tables() try: tasks = [run_scheduler, run_store_ingestion, run_process_counters, run_request_parser, run_request_fetch] g_thread_pool = [gevent.spawn(task) for task in tasks] gevent.joinall(g_thread_pool) except KeyboardInterrupt: print "Keyboard interrupt.." finally: #pass gevent.killall(g_thread_pool, exception=gevent.GreenletExit)
def connect_mysql(): import MySQLdb import _mysql_exceptions try: mysql_db = MySQLdb.connect(host=config.get('mysql', 'db_host'), port=int(config.get('mysql', 'db_port')), user=config.get('mysql', 'db_user'), passwd=config.get('mysql', 'db_password'), db=config.get('mysql', 'db_name'), connect_timeout=config.get( 'constants', 'MYSQL_CONNECT_TIMEOUT'), charset='utf8', use_unicode=True) return mysql_db except _mysql_exceptions.OperationalError as e: LOG.error(e)
def parse_config_and_sync(data=None, sync_list=True): if sync_list: sync_list = List(key=config.get('constants', 'REDIS_SYNC_DEV_LIST_KEY'), redis=r) else: sync_list = List(key=config.get('constants', 'REDIS_NEW_SYNC_DEV_LIST_KEY'), redis=r) unsync_list = List(key=config.get('constants', 'REDIS_UNSYNC_DEV_LIST_KEY'), redis=r) if data is None: data = r.blpop(config.get('constants', 'REDIS_CONFIG_XML_QUEUE_KEY')) data = eval(data[1]) device = data[0] xml = data[1] p_obj = Parser.parse_mfc_config(device, xml) if p_obj.header.status_code == 0: """ Update the gloabl DS extend the sync_dev_list with the device tuple Store the UUID in a global hashmap. will be retrieved using IP key. """ try: mfc_uuid.update({device[2] + '_uuid': p_obj.data.config.host_id}) mfc_uuid.update( {device[2] + '_hostname': p_obj.data.config.hostname}) #Update to the sync list if its able to retrieve the data attributes sync_list.extend((device, )) except AttributeError: LOG.error("Something wrong with the Config data from MFC: " + device[2]) LOG.error("Restart agentd or make sure the config data is valid.") unsync_list.extend((device, )) finally: r.rpush(config.get('constants', 'REDIS_CONFIG_STORE_QUEUE_KEY'), Serialize.to_json(p_obj)) else: LOG.error("Unable to get config from MFC: " + device[2]) LOG.error("Status Code: %s Message: %s" % (p_obj.header.status_code, p_obj.header.status_msg)) LOG.error("Check MFC state make sure Agentd is working fine.") unsync_list.extend((device, )) return p_obj
def send_request(self, data=None): self.build_req(data=data) if self.request is not None: try: LOG.info("sending request to MFC agentd - " + self.ip) req_open = urlopen(self.request) except HTTPError, e: LOG.critical("Error code: %s Error Message: %s"%(e.code, e.msg)) raise e except URLError, e: if hasattr(e, 'code'): LOG.critical('ERROR code: ', e.code) elif hasattr(e, 'reason') : LOG.critical('URL ERROR: ' + str(e.reason)) else: LOG.debug("No HTTP errors.." + str(e)) raise e
def get_device_list(get_dev_from_file=False): from analyticsengine import dbmanager if get_dev_from_file: with open(config.get('constants', 'CONF_BASE_PATH') + config.get('constants', 'IP_LIST_FILENAME'), 'r') as fp: dev_list = fp.readlines() mfa_dev_list.extend(dev_list) else: LOG.info("Querying MySQL to get the list of devices..") mysql_db = dbmanager.connect_mysql() mysql_cur = mysql_db.cursor() mysql_cur.execute(config.get('constants', 'MFC_DEV_LIST_QUERY')) rows = mysql_cur.fetchmany(500) while len(rows) > 0: mfa_dev_list.extend(rows) for device in rows: LOG.info("Found Device: %s %s %s" % device) rows = mysql_cur.fetchmany(500) mysql_db.close()
def send_request(self, data=None): self.build_req(data=data) if self.request is not None: try: LOG.info("sending request to MFC agentd - " + self.ip) req_open = urlopen(self.request) except HTTPError, e: LOG.critical("Error code: %s Error Message: %s" % (e.code, e.msg)) raise e except URLError, e: if hasattr(e, 'code'): LOG.critical('ERROR code: ', e.code) elif hasattr(e, 'reason'): LOG.critical('URL ERROR: ' + str(e.reason)) else: LOG.debug("No HTTP errors.." + str(e)) raise e
def create_daily_tables(date_str=None): db_connection = dbmanager.connect_cassandra() daily_tables = dict() if date_str is None: date_str = date.today().strftime('%m%d%Y') """This table will store counters from different MFCs per day. indexed with mfcid which is UUID as row key composite key(mfcid, type, name, ts) """ daily_tables['mfc_stats'] = """ CREATE TABLE %s%s (mfcid varchar, hostname varchar, ip varchar, type varchar, name varchar, ts timestamp, value map<text, BigInt>, PRIMARY KEY (mfcid, type, name, ts)) WITH CLUSTERING ORDER BY(type ASC, name ASC, ts DESC) """ % (MFC_STATS_TABLE_NAME, date_str) daily_tables['mfc_summary'] = """ CREATE TABLE %s%s (mfcid varchar, hostname varchar, ip varchar, ts timestamp, sample_id varchar, value map<text, text>, PRIMARY KEY (mfcid)) """ % (MFC_SUMMARY_TABLE_NAME, date_str) daily_tables['cluster_stats'] = """ CREATE TABLE %s%s (name varchar, ts timestamp, value map<text, BigInt>, sample_id varchar, PRIMARY KEY (name, ts)) WITH CLUSTERING ORDER BY(ts DESC) """ % (CLUSTER_STATS_TABLE_NAME, date_str) daily_tables['cluster_summary'] = """ CREATE TABLE %s%s (name varchar, ts timestamp, value map<text, BigInt>, sample_id varchar, PRIMARY KEY (name)) """ % (CLUSTER_SUMMARY_TABLE_NAME, date_str) daily_tables['cluster_sample_map'] = """ CREATE TABLE %s%s (sample_id varchar, ts timestamp, ip_list list<text>, PRIMARY KEY (sample_id)) """ % (CLUSTER_SAMPLE_MAP_TABLE_NAME, date_str) for t_name, create_t in daily_tables.items(): try: LOG.info("Creating Table: %s" % t_name) db_connection.execute(create_t) except AlreadyExists: LOG.info("Table already exist for %s" % t_name) db_connection.shutdown()
def run_collector(): LOG.info("Starting collector..") gevent.signal(signal.SIGQUIT, gevent.kill) redis_flush_keys() get_device_list() create_cluster_tables() create_daily_tables() try: tasks = [ run_scheduler, run_store_ingestion, run_process_counters, run_request_parser, run_request_fetch ] g_thread_pool = [gevent.spawn(task) for task in tasks] gevent.joinall(g_thread_pool) except KeyboardInterrupt: print "Keyboard interrupt.." finally: #pass gevent.killall(g_thread_pool, exception=gevent.GreenletExit)
def create_cluster_tables(): db_connection = dbmanager.connect_cassandra() main_tables = dict() """These tables will store values for life time. Indexed with mfcid as row key """ main_tables['mfc_config'] = """ CREATE TABLE %s (mfcid varchar, hostname varchar, ip varchar, type varchar, ts timestamp, value map<text, text>, PRIMARY KEY (mfcid, ts, type)) """ % MFC_CONFIG_TABLE_NAME for t_name, create_t in main_tables.items(): try: LOG.info("Creating Table: %s" % t_name) db_connection.execute(create_t) except AlreadyExists: LOG.info("Table already exist for %s" % t_name) db_connection.shutdown()
def run_request_fetch(): LOG.info("Starting request fetch task") req_interval = int(config.get('collector', 'MFC_REQUEST_FREQUENCY')) """Will split fetch job in to chain with two tasks. First request the config for all the MFCs. Prepare the list of MFCs that can be accessed. A hashmap of IP with UUID is created pass the SYNCd MFCs to fetch the stats. """ if len(mfa_dev_list) > 0: collector_task = chain( request_cluster_config.s(list(mfa_dev_list)), request_cluster_stats.s(interval=req_interval), ) collector_task.apply_async() else: LOG.error("Devices list not found. Check file or MFA DB") sys.exit(0) '''
def get_device_list(get_dev_from_file=False): from analyticsengine import dbmanager if get_dev_from_file: with open( config.get('constants', 'CONF_BASE_PATH') + config.get('constants', 'IP_LIST_FILENAME'), 'r') as fp: dev_list = fp.readlines() mfa_dev_list.extend(dev_list) else: LOG.info("Querying MySQL to get the list of devices..") mysql_db = dbmanager.connect_mysql() mysql_cur = mysql_db.cursor() mysql_cur.execute(config.get('constants', 'MFC_DEV_LIST_QUERY')) rows = mysql_cur.fetchmany(500) while len(rows) > 0: mfa_dev_list.extend(rows) for device in rows: LOG.info("Found Device: %s %s %s" % device) rows = mysql_cur.fetchmany(500) mysql_db.close()
def run_request_fetch(): LOG.info("Starting request fetch task") req_interval = int(config.get('collector', 'MFC_REQUEST_FREQUENCY')) """Will split fetch job in to chain with two tasks. First request the config for all the MFCs. Prepare the list of MFCs that can be accessed. A hashmap of IP with UUID is created pass the SYNCd MFCs to fetch the stats. """ if len(mfa_dev_list) > 0: collector_task = chain(request_cluster_config.s(list(mfa_dev_list)), request_cluster_stats.s(interval=req_interval), ) collector_task.apply_async() else: LOG.error("Devices list not found. Check file or MFA DB") sys.exit(0) '''
def run_store_ingestion(): LOG.info("Starting Storage task") store_mfc_task = store_mfc_stats.apply_async(args=[], queue='store', routing_key='store.mfc_stats') store_cluster_task = store_cluster_stats.apply_async( args=[], queue='store', routing_key='store.cluster_stats') store_mfc_conf_task = store_mfc_config.apply_async( args=[], queue='store', routing_key='store.mfc_conf') LOG.info("Store MFC stats task runner with task ID: " + store_mfc_task.task_id) LOG.info("Store Cluster stats task runner with task ID: " + store_cluster_task.task_id) LOG.info("Store MFC Conf task runner with task ID: " + store_mfc_conf_task.task_id)
def parse_config_and_sync(data=None, sync_list=True): if sync_list: sync_list = List(key=config.get('constants', 'REDIS_SYNC_DEV_LIST_KEY'), redis=r) else: sync_list = List(key=config.get('constants', 'REDIS_NEW_SYNC_DEV_LIST_KEY'), redis=r) unsync_list = List(key=config.get('constants', 'REDIS_UNSYNC_DEV_LIST_KEY'), redis=r) if data is None: data = r.blpop(config.get('constants', 'REDIS_CONFIG_XML_QUEUE_KEY')) data = eval(data[1]) device = data[0] xml = data[1] p_obj = Parser.parse_mfc_config(device, xml) if p_obj.header.status_code == 0: """ Update the gloabl DS extend the sync_dev_list with the device tuple Store the UUID in a global hashmap. will be retrieved using IP key. """ try: mfc_uuid.update({device[2] + '_uuid': p_obj.data.config.host_id}) mfc_uuid.update({device[2] + '_hostname': p_obj.data.config.hostname}) #Update to the sync list if its able to retrieve the data attributes sync_list.extend((device,)) except AttributeError: LOG.error("Something wrong with the Config data from MFC: " + device[2]) LOG.error("Restart agentd or make sure the config data is valid.") unsync_list.extend((device,)) finally: r.rpush(config.get('constants', 'REDIS_CONFIG_STORE_QUEUE_KEY'), Serialize.to_json(p_obj)) else: LOG.error("Unable to get config from MFC: " + device[2]) LOG.error("Status Code: %s Message: %s" % (p_obj.header.status_code, p_obj.header.status_msg)) LOG.error("Check MFC state make sure Agentd is working fine.") unsync_list.extend((device,)) return p_obj
def schedule_events_task(): import schedule from datetime import date, timedelta from analyticsengine.dbmanager.mfc import create_daily_tables unsync_dev_key = config.get('constants', 'REDIS_UNSYNC_DEV_LIST_KEY') unsync_flag = False """ Job to create daily DB tables calculate the next day's date and pass it to create all the tables for next day. """ def create_daily_cf_job(): tomorrow = date.today()+timedelta(days=1) tomorrow_strf = tomorrow.strftime('%m%d%Y') LOG.info("Creating tables for date: " + tomorrow_strf) create_daily_tables(tomorrow_strf) """ Schedule daily DB table creation. Will create DB tables for next day at 23:30 of every day """ schedule.every().day.at("23:30").do(create_daily_cf_job) """ Job to recheck un-synced devices if unsync_dev_list exist, config request should be sent to see if the device can be moved to sync. devices are popped from unsync list as they are prepared for recheck. when device get send to check config(Sync check), they get added to unsync if its not able to sync. """ def recheck_unsync_devices(): unsync_list = List(key=config.get('constants', 'REDIS_UNSYNC_DEV_LIST_KEY'), redis=r) recheck_devices = [] while len(unsync_list) > 0: recheck_devices.append(unsync_list.pop()) LOG.info("Processing unsync device list") recheck_task = chain(request_cluster_config.s(recheck_devices), update_unsync_list.s()) recheck_task.apply_async() while True: schedule.run_pending() gevent.sleep(1) if r.exists(unsync_dev_key): if not unsync_flag: LOG.info("Unsync device list found. will schedule job to recheck the status") schedule.every(int(config.get('collector', 'RECHECK_UNSYNC_FREQUENCY'))).minutes.do(recheck_unsync_devices) unsync_flag = True else: LOG.debug("Recheck Unsync devices is already scheduled and is in progress.") else: if unsync_flag: LOG.info("No Unsync devices found. Removing unsync devices rechecking from scheduler") schedule.cancel_job(recheck_unsync_devices) unsync_flag = False
def request_cluster_config(dev_list, unsync_list=False): req_uri = '/admin/agentd_comm' conf_q = config.get('constants', 'REDIS_CONFIG_XML_QUEUE_KEY') mfc_count = len(dev_list) g_pool = gevent.pool.Pool(size=mfc_count) sync_flag = True if unsync_list: sync_flag = False LOG.debug("Creating Config request clients") conf_clients = [] for device in dev_list: url = URL('http://' + device[2] + ':8080' + req_uri) conf_clients.append( HTTPClient.from_url(url, concurrency=1, headers_type=dict)) LOG.debug("Starting to request Config from MFC") for i in xrange(mfc_count): g_pool.spawn(request_config_mfc_cb, conf_clients[i], dev_list[i], conf_q) g_pool.join() LOG.debug("Finished collecting Config from MFC") for i in xrange(mfc_count): conf_clients[i].close() """Parse and store the config. mfc_uuid is a global hashmap(redis Dict) with ip as key and UUID as value parse_config_and_sync will update the sync_dev_list, mfc_uuid for each XML response. """ LOG.debug("Parsing config request output and building the UUID hash.") q_len = r.llen(conf_q) g_pool = gevent.pool.Pool(size=q_len) for _ in xrange(q_len): data = r.blpop(conf_q) g_pool.spawn(parse_config_and_sync, data, sync_flag) g_pool.join() """Return list of MFCs which was able to communicate.""" sync_list = List(key=config.get('constants', 'REDIS_SYNC_DEV_LIST_KEY'), redis=r) return list(sync_list)
def process_mfc_counters(counters=None, data=None): decimal.getcontext().prec = 6 if counters is None: data = r.blpop(config.get('constants', 'REDIS_PARSER_QUEUE_KEY')) counters = json.loads(data[1]) if counters['data'] is None: LOG.critical("Device: %s, %s IP: %s" % (counters['device_id'], counters['name'], counters['ip'])) LOG.critical( "MFC response doesn't have any counter data. skipping sample: %s" % (counters['sample_id'])) else: gl_bytes = Counter(counters['data']['glbl']['bytes']) # MFC CHR tot_bytes = sum(gl_bytes.values()) tot_cache = counters['data']['glbl']['bytes']['ram'] + counters[ 'data']['glbl']['bytes']['disk'] # Handle Zero condition. Cumulative sum could be 0 if tot_bytes == 0: counters['data']['chr'] = 0 else: counters['data']['chr'] = float( (decimal.Decimal(tot_cache) / decimal.Decimal(tot_bytes)) * 100) #Calculate current throughput mfcs_cur_thrpt = Dict(key=config.get('constants', 'REDIS_MFC_CUR_THRPT_KEY'), redis=r) try: counters['data']['cur_thrpt'] = gl_bytes - mfcs_cur_thrpt[ counters['device_id']] counters['data']['cur_thrpt']['total'] = sum( counters['data']['cur_thrpt'].values()) counters['data']['cur_thrpt']['cache'] = counters['data']['cur_thrpt']['ram'] + \ counters['data']['cur_thrpt']['disk'] mfcs_cur_thrpt[counters['device_id']] = gl_bytes except KeyError: LOG.debug("current throughput hashmap - Initial update for " + str(counters['device_id'])) counters['data']['cur_thrpt'] = mfcs_cur_thrpt[ counters['device_id']] = gl_bytes counters['data']['cur_thrpt']['total'] = counters['data'][ 'cur_thrpt']['cache'] = 0 r.rpush(config.get('constants', 'REDIS_MFC_STORE_QUEUE_KEY'), json.dumps(counters)) return counters
def request_cluster_config(dev_list, unsync_list=False): req_uri = '/admin/agentd_comm' conf_q = config.get('constants', 'REDIS_CONFIG_XML_QUEUE_KEY') mfc_count = len(dev_list) g_pool = gevent.pool.Pool(size=mfc_count) sync_flag = True if unsync_list: sync_flag = False LOG.debug("Creating Config request clients") conf_clients = [] for device in dev_list: url = URL('http://' + device[2] + ':8080' + req_uri) conf_clients.append(HTTPClient.from_url(url, concurrency=1, headers_type=dict)) LOG.debug("Starting to request Config from MFC") for i in xrange(mfc_count): g_pool.spawn(request_config_mfc_cb, conf_clients[i], dev_list[i], conf_q) g_pool.join() LOG.debug("Finished collecting Config from MFC") for i in xrange(mfc_count): conf_clients[i].close() """Parse and store the config. mfc_uuid is a global hashmap(redis Dict) with ip as key and UUID as value parse_config_and_sync will update the sync_dev_list, mfc_uuid for each XML response. """ LOG.debug("Parsing config request output and building the UUID hash.") q_len = r.llen(conf_q) g_pool = gevent.pool.Pool(size=q_len) for _ in xrange(q_len): data = r.blpop(conf_q) g_pool.spawn(parse_config_and_sync, data, sync_flag) g_pool.join() """Return list of MFCs which was able to communicate.""" sync_list = List(key=config.get('constants', 'REDIS_SYNC_DEV_LIST_KEY'), redis=r) return list(sync_list)
def process_mfc_counters(counters=None, data=None): decimal.getcontext().prec = 6 if counters is None: data = r.blpop(config.get('constants', 'REDIS_PARSER_QUEUE_KEY')) counters = json.loads(data[1]) if counters['data'] is None: LOG.critical("Device: %s, %s IP: %s" % (counters['device_id'], counters['name'], counters['ip'])) LOG.critical("MFC response doesn't have any counter data. skipping sample: %s" % (counters['sample_id'])) else: gl_bytes = Counter(counters['data']['glbl']['bytes']) # MFC CHR tot_bytes = sum(gl_bytes.values()) tot_cache = counters['data']['glbl']['bytes']['ram'] + counters['data']['glbl']['bytes']['disk'] # Handle Zero condition. Cumulative sum could be 0 if tot_bytes == 0: counters['data']['chr'] = 0 else: counters['data']['chr'] = float((decimal.Decimal(tot_cache) / decimal.Decimal(tot_bytes)) * 100) #Calculate current throughput mfcs_cur_thrpt = Dict(key=config.get('constants', 'REDIS_MFC_CUR_THRPT_KEY'), redis=r) try: counters['data']['cur_thrpt'] = gl_bytes - mfcs_cur_thrpt[counters['device_id']] counters['data']['cur_thrpt']['total'] = sum(counters['data']['cur_thrpt'].values()) counters['data']['cur_thrpt']['cache'] = counters['data']['cur_thrpt']['ram'] + \ counters['data']['cur_thrpt']['disk'] mfcs_cur_thrpt[counters['device_id']] = gl_bytes except KeyError: LOG.debug("current throughput hashmap - Initial update for " + str(counters['device_id'])) counters['data']['cur_thrpt'] = mfcs_cur_thrpt[counters['device_id']] = gl_bytes counters['data']['cur_thrpt']['total'] = counters['data']['cur_thrpt']['cache'] = 0 r.rpush(config.get('constants', 'REDIS_MFC_STORE_QUEUE_KEY'), json.dumps(counters)) return counters
def create_daily_cf_job(): tomorrow = date.today()+timedelta(days=1) tomorrow_strf = tomorrow.strftime('%m%d%Y') LOG.info("Creating tables for date: " + tomorrow_strf) create_daily_tables(tomorrow_strf)
def run_request_parser(): LOG.info("Starting request parser task") parse_cluster_task = parse_cluster_stats.apply_async(args=[], queue='tasks', routing_key='tasks') LOG.info("Parse task runner with task ID: " + parse_cluster_task.task_id)
def redis_flush_keys(): for name, key in r_keys.items(): if r.exists(key): LOG.info("Deleting existing redis key: %s" % key) r.delete(key)
self.data = data self.set_header("Content-Type", "application/x-www-form-urlencoded") self.set_header("Content-Length", "%d" %len(data)) self.set_header("Accept", "*/*") self.url = 'http://' + self.ip + ':' + self.port + self.uri self.request = Request(self.url, self.data, self.headers) def send_request(self, data=None): self.build_req(data=data) if self.request is not None: try: LOG.info("sending request to MFC agentd - " + self.ip) req_open = urlopen(self.request) except HTTPError, e: LOG.critical("Error code: %s Error Message: %s"%(e.code, e.msg)) raise e except URLError, e: if hasattr(e, 'code'): LOG.critical('ERROR code: ', e.code) elif hasattr(e, 'reason') : LOG.critical('URL ERROR: ' + str(e.reason)) else: LOG.debug("No HTTP errors.." + str(e)) raise e else: LOG.debug("reading MFC agentd response - " + self.ip) self.response = req_open.read() return self.response
def run_scheduler(): LOG.info("Starting Scheduler task") sched_task = schedule_events_task.apply_async(args=[], queue='tasks', routing_key='tasks') LOG.info("Scheduler task with task ID: " + sched_task.task_id)
def request_cluster_stats(sync_mfcs, interval=20): req_uri = '/admin/agentd_comm' xml_q = config.get('constants', 'REDIS_XML_QUEUE_KEY') new_dev_list_key = config.get('constants', 'REDIS_NEW_FOUND_DEV_LIST_KEY') new_sync_dev_list_key = config.get('constants', 'REDIS_NEW_SYNC_DEV_LIST_KEY') sync_mfcs_key = config.get('constants', 'REDIS_SYNC_DEV_LIST_KEY') signal.signal(signal.SIGQUIT, gevent.kill) stat_clients = [] """Request to synced MFCs will get the IP list from mfc_uuid """ sync_mfcs_count = len(sync_mfcs) LOG.info("Synced MFCs: ") for device_id, name, ip in sync_mfcs: LOG.info("%s %s %s" % (device_id, name, ip)) def create_stat_clients(): LOG.info("Creating Stats request clients") for device_id, name, ip in sync_mfcs: url = URL('http://' + ip + ':8080' + req_uri) stat_clients.append( HTTPClient.from_url(url, concurrency=1, headers_type=dict)) def close_stat_clients(): for c in xrange(sync_mfcs_count): stat_clients[c].close() create_stat_clients() g_req_pool = gevent.pool.Pool(size=sync_mfcs_count) LOG.info("Starting to request stats from MFC") while True: # Commented following : time based check hogs CPU cycles. ''' if tick(t1) >= interval or initial: t1 = time.time() initial = False ''' sample_id = str(uuid.uuid1()) for i in xrange(sync_mfcs_count): g_req_pool.spawn(request_stats_mfc_cb, stat_clients[i], sync_mfcs[i], sample_id, xml_q) g_req_pool.join(timeout=interval) gevent.sleep(interval) if r.exists(new_sync_dev_list_key): LOG.info( "New MFCs added to the Sync list- updating stat request clients" ) close_stat_clients() stat_clients = [] LOG.info("Newly Synced MFCs: ") new_sync_mfcs = list(List(key=new_sync_dev_list_key, redis=r)) for device_id, name, ip in new_sync_mfcs: LOG.info("%s %s %s" % (device_id, name, ip)) r.delete(new_dev_list_key) #Get the current synced list and extend with newly synced list sync_mfcs = List(key=sync_mfcs_key, redis=r) sync_mfcs.extend(new_sync_mfcs) sync_mfcs = list(sync_mfcs) sync_mfcs_count = len(sync_mfcs) create_stat_clients() close_stat_clients()
def terminate_task(task): LOG.info("Exiting the task: ") task.revoke(terminate=True)
def run_process_counters(): LOG.info("Starting process counter task") process_cluster_task = process_cluster_stats.apply_async(args=[], queue='tasks', routing_key='tasks') LOG.info("Process task runner with task ID: " + process_cluster_task.task_id)
def connect_cassandra(): error = False cluster = Cluster([config.get('cassandra', 'db_host')], port=config.get('cassandra', 'db_port'), protocol_version=3, idle_heartbeat_interval=120) try: LOG.info("Connecting to Cassandra..") return cluster.connect(config.get('cassandra', 'keyspace')) except NoHostAvailable: error = True LOG.info("ERROR: Check Cassandra connection settings in conf") except InvalidRequest: LOG.info("ERROR: Could not find existing Cassandra keyspace. will create new one") try: db_connection = cluster.connect() CREATE_KEYSPACE = """ CREATE KEYSPACE %s WITH replication = {'class': '%s', 'replication_factor': %s } """ % (config.get('cassandra', 'keyspace'), config.get('cassandra', 'replication_strategy'), config.get('cassandra', 'replication_factor')) db_connection.execute(CREATE_KEYSPACE) db_connection.set_keyspace(config.get('cassandra', 'keyspace')) LOG.info("Created and session set to new keyspace: %s" % config.get('cassandra', 'keyspace')) return db_connection except SyntaxException: error = True LOG.info("ERROR: couldn't create new keyspace. check keyspace settings in conf. Exiting now.") raise except: error = True LOG.info("ERROR: something wrong with Cassandra connection") finally: if error: LOG.info("Exiting..") sys.exit(0)
def process_cluster_stats(): def multi_dict_counter(level): if level < 1: return Counter() return defaultdict(lambda: multi_dict_counter(level-1)) """ Creating a 2D dictionary to hold the cluster wide counter counters from across MFCs will be aggregated based on the sample ID. cluster[<Sample ID>][<Counter Name>] = Counter(<Dict of counter values>) cluster['cumulative'][<Counter Name>] will be used to keep track of the cumulative of last sample Delta will be calculated using above counter. """ cluster = multi_dict_counter(2) # 2 Level dictionary of Counter tick = lambda x: time.time() - x item_cnt = 0 cur_sample = None #mfc_hash = Dict(key=config.get('constants', 'REDIS_MFC_UUID_HASH_KEY'), redis=r) sync_list = List(key=config.get('constants', 'REDIS_SYNC_DEV_LIST_KEY'), redis=r) cluster_sample_timeout = config.get('constants', 'CLUSTER_SAMPLE_TIMEOUT') store_q = config.get('constants', 'REDIS_CLUSTER_STORE_QUEUE_KEY') req_interval = int(config.get('collector', 'MFC_REQUEST_FREQUENCY')) sample_q = [] while True: data = r.blpop(config.get('constants', 'REDIS_PARSER_QUEUE_KEY')) counters = json.loads(data[1]) #Check if data exist for the parsed response. Agentd response can be empty if counters['data'] is not None: """Process each MFC counter.""" process_mfc_counters.apply_async(args=[counters], queue='process', routing_key='process.stat') """Process Cluster wide cumulative data for same sample ID.""" item_cnt += 1 # Requests cluster[counters['sample_id']]['requests'].update(counters['data']['glbl']['requests']) #Cumulative Bytes cluster[counters['sample_id']]['bytes'].update(counters['data']['glbl']['bytes']) #Timestamp cluster[counters['sample_id']]['timestamp'] = counters['data']['timestamp'] try: cluster[counters['sample_id']]['ip_list'].append(counters['ip']) # Preserve the IP except AttributeError: cluster[counters['sample_id']]['ip_list'] = list() cluster[counters['sample_id']]['ip_list'].append(counters['ip']) if cur_sample is not None and cur_sample != counters['sample_id']: # new sample has arrived if item_cnt > len(sync_list) or tick(init_sample_ts) >= cluster_sample_timeout: # 1st case: record from all the Sync'd MFCs received. Store and remove the sample from cluster DS. # or 2nd case: some data still left to be received but hit sample time out. #Calculate cumulative Delta. cluster[cur_sample]['cur_thrpt'] = cluster[cur_sample]['bytes'] - cluster['cumulative']['bytes'] cluster[cur_sample]['cur_thrpt']['total'] = sum(cluster[cur_sample]['cur_thrpt'].values()) cluster[cur_sample]['cur_thrpt']['cache'] = cluster[cur_sample]['cur_thrpt']['ram'] + \ cluster[cur_sample]['cur_thrpt']['disk'] #Preserve the cumulative for next sample set cluster['cumulative']['bytes'] = cluster[cur_sample]['bytes'] #Push to store the data r.rpush(store_q, (cur_sample, dict(cluster[cur_sample]))) del cluster[cur_sample] item_cnt = 1 cur_sample = sample_q.pop(0) if(len(sample_q) > 0) else counters['sample_id'] init_sample_ts = time.time() else: LOG.info("Got new sample ID: %s. Need to wait for current sample(%s) to arrive until pushed out" % (counters['sample_id'], cur_sample)) LOG.info("Adding sample ID to the waiting list.") if counters['sample_id'] not in sample_q: sample_q.append(counters['sample_id']) if cur_sample is None: cur_sample = counters['sample_id'] init_sample_ts = time.time() else: LOG.critical("Device: %s, %s IP: %s" % (counters['device_id'], counters['name'], counters['ip'])) LOG.critical("MFC response doesn't have any counter data. skipping sample: %s" % (counters['sample_id']))
def run_process_counters(): LOG.info("Starting process counter task") process_cluster_task = process_cluster_stats.apply_async( args=[], queue='tasks', routing_key='tasks') LOG.info("Process task runner with task ID: " + process_cluster_task.task_id)
def schedule_events_task(): import schedule from datetime import date, timedelta from analyticsengine.dbmanager.mfc import create_daily_tables unsync_dev_key = config.get('constants', 'REDIS_UNSYNC_DEV_LIST_KEY') unsync_flag = False """ Job to create daily DB tables calculate the next day's date and pass it to create all the tables for next day. """ def create_daily_cf_job(): tomorrow = date.today() + timedelta(days=1) tomorrow_strf = tomorrow.strftime('%m%d%Y') LOG.info("Creating tables for date: " + tomorrow_strf) create_daily_tables(tomorrow_strf) """ Schedule daily DB table creation. Will create DB tables for next day at 23:30 of every day """ schedule.every().day.at("23:30").do(create_daily_cf_job) """ Job to recheck un-synced devices if unsync_dev_list exist, config request should be sent to see if the device can be moved to sync. devices are popped from unsync list as they are prepared for recheck. when device get send to check config(Sync check), they get added to unsync if its not able to sync. """ def recheck_unsync_devices(): unsync_list = List(key=config.get('constants', 'REDIS_UNSYNC_DEV_LIST_KEY'), redis=r) recheck_devices = [] while len(unsync_list) > 0: recheck_devices.append(unsync_list.pop()) LOG.info("Processing unsync device list") recheck_task = chain(request_cluster_config.s(recheck_devices), update_unsync_list.s()) recheck_task.apply_async() while True: schedule.run_pending() gevent.sleep(1) if r.exists(unsync_dev_key): if not unsync_flag: LOG.info( "Unsync device list found. will schedule job to recheck the status" ) schedule.every( int(config.get('collector', 'RECHECK_UNSYNC_FREQUENCY'))).minutes.do( recheck_unsync_devices) unsync_flag = True else: LOG.debug( "Recheck Unsync devices is already scheduled and is in progress." ) else: if unsync_flag: LOG.info( "No Unsync devices found. Removing unsync devices rechecking from scheduler" ) schedule.cancel_job(recheck_unsync_devices) unsync_flag = False
def create_daily_cf_job(): tomorrow = date.today() + timedelta(days=1) tomorrow_strf = tomorrow.strftime('%m%d%Y') LOG.info("Creating tables for date: " + tomorrow_strf) create_daily_tables(tomorrow_strf)
def request_cluster_stats(sync_mfcs, interval=20): req_uri = '/admin/agentd_comm' xml_q = config.get('constants', 'REDIS_XML_QUEUE_KEY') new_dev_list_key = config.get('constants', 'REDIS_NEW_FOUND_DEV_LIST_KEY') new_sync_dev_list_key = config.get('constants', 'REDIS_NEW_SYNC_DEV_LIST_KEY') sync_mfcs_key = config.get('constants', 'REDIS_SYNC_DEV_LIST_KEY') signal.signal(signal.SIGQUIT, gevent.kill) stat_clients = [] """Request to synced MFCs will get the IP list from mfc_uuid """ sync_mfcs_count = len(sync_mfcs) LOG.info("Synced MFCs: ") for device_id, name, ip in sync_mfcs: LOG.info("%s %s %s" % (device_id, name, ip)) def create_stat_clients(): LOG.info("Creating Stats request clients") for device_id, name, ip in sync_mfcs: url = URL('http://' + ip + ':8080' + req_uri) stat_clients.append(HTTPClient.from_url(url, concurrency=1, headers_type=dict)) def close_stat_clients(): for c in xrange(sync_mfcs_count): stat_clients[c].close() create_stat_clients() g_req_pool = gevent.pool.Pool(size=sync_mfcs_count) LOG.info("Starting to request stats from MFC") while True: # Commented following : time based check hogs CPU cycles. ''' if tick(t1) >= interval or initial: t1 = time.time() initial = False ''' sample_id = str(uuid.uuid1()) for i in xrange(sync_mfcs_count): g_req_pool.spawn(request_stats_mfc_cb, stat_clients[i], sync_mfcs[i], sample_id, xml_q) g_req_pool.join(timeout=interval) gevent.sleep(interval) if r.exists(new_sync_dev_list_key): LOG.info("New MFCs added to the Sync list- updating stat request clients") close_stat_clients() stat_clients = [] LOG.info("Newly Synced MFCs: ") new_sync_mfcs = list(List(key=new_sync_dev_list_key, redis=r)) for device_id, name, ip in new_sync_mfcs: LOG.info("%s %s %s" % (device_id, name, ip)) r.delete(new_dev_list_key) #Get the current synced list and extend with newly synced list sync_mfcs = List(key=sync_mfcs_key, redis=r) sync_mfcs.extend(new_sync_mfcs) sync_mfcs = list(sync_mfcs) sync_mfcs_count = len(sync_mfcs) create_stat_clients() close_stat_clients()
def create_stat_clients(): LOG.info("Creating Stats request clients") for device_id, name, ip in sync_mfcs: url = URL('http://' + ip + ':8080' + req_uri) stat_clients.append(HTTPClient.from_url(url, concurrency=1, headers_type=dict))
def process_cluster_stats(): def multi_dict_counter(level): if level < 1: return Counter() return defaultdict(lambda: multi_dict_counter(level - 1)) """ Creating a 2D dictionary to hold the cluster wide counter counters from across MFCs will be aggregated based on the sample ID. cluster[<Sample ID>][<Counter Name>] = Counter(<Dict of counter values>) cluster['cumulative'][<Counter Name>] will be used to keep track of the cumulative of last sample Delta will be calculated using above counter. """ cluster = multi_dict_counter(2) # 2 Level dictionary of Counter tick = lambda x: time.time() - x item_cnt = 0 cur_sample = None #mfc_hash = Dict(key=config.get('constants', 'REDIS_MFC_UUID_HASH_KEY'), redis=r) sync_list = List(key=config.get('constants', 'REDIS_SYNC_DEV_LIST_KEY'), redis=r) cluster_sample_timeout = config.get('constants', 'CLUSTER_SAMPLE_TIMEOUT') store_q = config.get('constants', 'REDIS_CLUSTER_STORE_QUEUE_KEY') req_interval = int(config.get('collector', 'MFC_REQUEST_FREQUENCY')) sample_q = [] while True: data = r.blpop(config.get('constants', 'REDIS_PARSER_QUEUE_KEY')) counters = json.loads(data[1]) #Check if data exist for the parsed response. Agentd response can be empty if counters['data'] is not None: """Process each MFC counter.""" process_mfc_counters.apply_async(args=[counters], queue='process', routing_key='process.stat') """Process Cluster wide cumulative data for same sample ID.""" item_cnt += 1 # Requests cluster[counters['sample_id']]['requests'].update( counters['data']['glbl']['requests']) #Cumulative Bytes cluster[counters['sample_id']]['bytes'].update( counters['data']['glbl']['bytes']) #Timestamp cluster[counters['sample_id']]['timestamp'] = counters['data'][ 'timestamp'] try: cluster[counters['sample_id']]['ip_list'].append( counters['ip']) # Preserve the IP except AttributeError: cluster[counters['sample_id']]['ip_list'] = list() cluster[counters['sample_id']]['ip_list'].append( counters['ip']) if cur_sample is not None and cur_sample != counters['sample_id']: # new sample has arrived if item_cnt > len(sync_list) or tick( init_sample_ts) >= cluster_sample_timeout: # 1st case: record from all the Sync'd MFCs received. Store and remove the sample from cluster DS. # or 2nd case: some data still left to be received but hit sample time out. #Calculate cumulative Delta. cluster[cur_sample]['cur_thrpt'] = cluster[cur_sample][ 'bytes'] - cluster['cumulative']['bytes'] cluster[cur_sample]['cur_thrpt']['total'] = sum( cluster[cur_sample]['cur_thrpt'].values()) cluster[cur_sample]['cur_thrpt']['cache'] = cluster[cur_sample]['cur_thrpt']['ram'] + \ cluster[cur_sample]['cur_thrpt']['disk'] #Preserve the cumulative for next sample set cluster['cumulative']['bytes'] = cluster[cur_sample][ 'bytes'] #Push to store the data r.rpush(store_q, (cur_sample, dict(cluster[cur_sample]))) del cluster[cur_sample] item_cnt = 1 cur_sample = sample_q.pop(0) if ( len(sample_q) > 0) else counters['sample_id'] init_sample_ts = time.time() else: LOG.info( "Got new sample ID: %s. Need to wait for current sample(%s) to arrive until pushed out" % (counters['sample_id'], cur_sample)) LOG.info("Adding sample ID to the waiting list.") if counters['sample_id'] not in sample_q: sample_q.append(counters['sample_id']) if cur_sample is None: cur_sample = counters['sample_id'] init_sample_ts = time.time() else: LOG.critical( "Device: %s, %s IP: %s" % (counters['device_id'], counters['name'], counters['ip'])) LOG.critical( "MFC response doesn't have any counter data. skipping sample: %s" % (counters['sample_id']))
def create_stat_clients(): LOG.info("Creating Stats request clients") for device_id, name, ip in sync_mfcs: url = URL('http://' + ip + ':8080' + req_uri) stat_clients.append( HTTPClient.from_url(url, concurrency=1, headers_type=dict))
def connect_cassandra(): error = False cluster = Cluster([config.get('cassandra', 'db_host')], port=config.get('cassandra', 'db_port'), protocol_version=3, idle_heartbeat_interval=120) try: LOG.info("Connecting to Cassandra..") return cluster.connect(config.get('cassandra', 'keyspace')) except NoHostAvailable: error = True LOG.info("ERROR: Check Cassandra connection settings in conf") except InvalidRequest: LOG.info( "ERROR: Could not find existing Cassandra keyspace. will create new one" ) try: db_connection = cluster.connect() CREATE_KEYSPACE = """ CREATE KEYSPACE %s WITH replication = {'class': '%s', 'replication_factor': %s } """ % ( config.get('cassandra', 'keyspace'), config.get('cassandra', 'replication_strategy'), config.get('cassandra', 'replication_factor')) db_connection.execute(CREATE_KEYSPACE) db_connection.set_keyspace(config.get('cassandra', 'keyspace')) LOG.info("Created and session set to new keyspace: %s" % config.get('cassandra', 'keyspace')) return db_connection except SyntaxException: error = True LOG.info( "ERROR: couldn't create new keyspace. check keyspace settings in conf. Exiting now." ) raise except: error = True LOG.info("ERROR: something wrong with Cassandra connection") finally: if error: LOG.info("Exiting..") sys.exit(0)
"application/x-www-form-urlencoded") self.set_header("Content-Length", "%d" % len(data)) self.set_header("Accept", "*/*") self.url = 'http://' + self.ip + ':' + self.port + self.uri self.request = Request(self.url, self.data, self.headers) def send_request(self, data=None): self.build_req(data=data) if self.request is not None: try: LOG.info("sending request to MFC agentd - " + self.ip) req_open = urlopen(self.request) except HTTPError, e: LOG.critical("Error code: %s Error Message: %s" % (e.code, e.msg)) raise e except URLError, e: if hasattr(e, 'code'): LOG.critical('ERROR code: ', e.code) elif hasattr(e, 'reason'): LOG.critical('URL ERROR: ' + str(e.reason)) else: LOG.debug("No HTTP errors.." + str(e)) raise e else: LOG.debug("reading MFC agentd response - " + self.ip) self.response = req_open.read() return self.response
def start_parsing(self): hostid_pattern = re.compile(r"Host ID: ([\w\d-]+)") self.config.host_id = hostid_pattern.search(self.config.dump).group(1) LOG.debug("Host ID: " + self.config.host_id) hostname_pattern = re.compile(r"Hostname: ([\w\d-]+)") hostname = hostname_pattern.search(self.config.dump) if hostname: hostname = hostname.group(1) self.config.hostname = hostname LOG.debug("Hostname: " + hostname) version_pattern = re.compile(r"Version: ([-\w\d .]+)") self.config.version = version_pattern.search(self.config.dump).group(1) LOG.debug("Version: " + self.config.version) lic_pattern = re.compile(r"license install ([-\w\d \".]+)", re.M) lic_match = lic_pattern.findall(self.config.dump) for lic in lic_match: self.config.licenses.append(lic) LOG.debug("License: " + lic) if_pattern = re.compile(r"[no ]* interface ([\w\d]+) ([-\w\d \".]+)", re.M) if_match = if_pattern.findall(self.config.dump) self.config.ifcfg = if_match LOG.debug("Interface Config: ") LOG.debug(self.config.ifcfg) nw_pattern = re.compile(r"[no ]* network ([\w]*) ([-\w\d \".]*)", re.M) nw_match = nw_pattern.findall(self.config.dump) self.config.network = nw_match LOG.debug("Network: ") LOG.debug(self.config.network) ns_pattern = re.compile(r"namespace ([\w]+) ([-\w\d \"./]+)", re.M) ns_match = ns_pattern.findall(self.config.dump) for (ns, conf) in ns_match: if ns not in self.config.namespaces.keys(): self.config.namespaces[ns] = [] LOG.debug("Namespace: " + ns) else: self.config.namespaces[ns].append(conf)