def pillow_datadog_metrics(): def _is_couch(pillow): # text is couch, json is kafka return pillow['seq_format'] == 'text' pillow_meta = get_all_pillows_json() for pillow in pillow_meta: tags = [ 'pillow_name:{}'.format(pillow['name']), 'feed_type:{}'.format('couch' if _is_couch(pillow) else 'kafka') ] datadog_gauge('commcare.change_feed.seconds_since_last_update', pillow['seconds_since_last'], tags=tags) for topic_name, offset in pillow['offsets'].items(): if _is_couch(pillow): if not isinstance(pillow['seq'], int) or len(pillow['offsets']) != 1: _assert( False, "Unexpected couch pillow format {}".format( pillow['name'])) continue tags_with_topic = tags + ['topic:{}'.format(topic_name)] processed_offset = pillow['seq'] else: if not isinstance(pillow['seq'], dict) or len( pillow['offsets']) != len(pillow['seq']): _assert( False, "Unexpected kafka pillow format {}".format( pillow['name'])) continue if not pillow['seq']: # this pillow has never been initialized. # (custom pillows on most environments) continue topic, partition = topic_name.split(',') tags_with_topic = tags + [ 'topic:{}-{}'.format(topic, partition) ] processed_offset = pillow['seq'][topic_name] if processed_offset == 0: # assume if nothing has been processed that this pillow is not # supposed to be running continue datadog_gauge('commcare.change_feed.current_offsets', offset, tags=tags_with_topic) datadog_gauge('commcare.change_feed.processed_offsets', processed_offset, tags=tags_with_topic) needs_processing = offset - processed_offset datadog_gauge('commcare.change_feed.need_processing', needs_processing, tags=tags_with_topic)
def pillow_datadog_metrics(): def _is_couch(pillow): # text is couch, json is kafka return pillow['seq_format'] == 'text' pillow_meta = get_all_pillows_json() active_pillows = getattr(settings, 'ACTIVE_PILLOW_NAMES', None) if active_pillows: pillow_meta = [pillow for pillow in pillow_meta if pillow['name'] in active_pillows] for pillow in pillow_meta: tags = [ 'pillow_name:{}'.format(pillow['name']), 'feed_type:{}'.format('couch' if _is_couch(pillow) else 'kafka') ] datadog_gauge( 'commcare.change_feed.seconds_since_last_update', pillow['seconds_since_last'], tags=tags ) for topic_name, offset in pillow['offsets'].items(): if _is_couch(pillow): if not isinstance(pillow['seq'], int) or len(pillow['offsets']) != 1: _assert(False, "Unexpected couch pillow format {}".format(pillow['name'])) continue tags_with_topic = tags + ['topic:{}'.format(topic_name)] processed_offset = pillow['seq'] else: if not pillow['seq']: # this pillow has never been initialized. # (custom pillows on most environments) continue if not isinstance(pillow['seq'], dict) or len(pillow['offsets']) != len(pillow['seq']): _assert(False, "Unexpected kafka pillow format {}".format(pillow['name'])) continue topic, partition = topic_name.split(',') tags_with_topic = tags + ['topic:{}-{}'.format(topic, partition)] processed_offset = pillow['seq'][topic_name] if processed_offset == 0: # assume if nothing has been processed that this pillow is not # supposed to be running continue datadog_gauge( 'commcare.change_feed.current_offsets', offset, tags=tags_with_topic ) datadog_gauge( 'commcare.change_feed.processed_offsets', processed_offset, tags=tags_with_topic ) needs_processing = offset - processed_offset datadog_gauge( 'commcare.change_feed.need_processing', needs_processing, tags=tags_with_topic )
def pillow_datadog_metrics(): def _is_couch(pillow): # text is couch, json is kafka return pillow['seq_format'] == 'text' pillow_meta = get_all_pillows_json() active_pillows = getattr(settings, 'ACTIVE_PILLOW_NAMES', None) if active_pillows: pillow_meta = [ pillow for pillow in pillow_meta if pillow['name'] in active_pillows ] for pillow in pillow_meta: # The host and group tags are added here to ensure they remain constant # regardless of which celery worker the task get's executed on. # Without this the sum of the metrics get's inflated. tags = [ 'pillow_name:{}'.format(pillow['name']), 'feed_type:{}'.format('couch' if _is_couch(pillow) else 'kafka'), 'host:celery', 'group:celery' ] datadog_gauge('commcare.change_feed.seconds_since_last_update', pillow['seconds_since_last'], tags=tags) for topic_name, offset in pillow['offsets'].items(): if _is_couch(pillow): tags_with_topic = tags + ['topic:{}'.format(topic_name)] processed_offset = pillow['seq'] else: if not pillow['seq']: # this pillow has never been initialized. # (custom pillows on most environments) continue topic, partition = topic_name.split(',') tags_with_topic = tags + [ 'topic:{}-{}'.format(topic, partition) ] processed_offset = pillow['seq'][topic_name] if processed_offset == 0: # assume if nothing has been processed that this pillow is not # supposed to be running continue datadog_gauge('commcare.change_feed.current_offsets', offset, tags=tags_with_topic) datadog_gauge('commcare.change_feed.processed_offsets', processed_offset, tags=tags_with_topic) needs_processing = offset - processed_offset datadog_gauge('commcare.change_feed.need_processing', needs_processing, tags=tags_with_topic)
def pillow_datadog_metrics(): def _is_couch(pillow): # text is couch, json is kafka return pillow['seq_format'] == 'text' pillow_meta = get_all_pillows_json() for pillow in pillow_meta: # The host and group tags are added here to ensure they remain constant # regardless of which celery worker the task get's executed on. # Without this the sum of the metrics get's inflated. tags = { 'pillow_name': pillow['name'], 'feed_type': 'couch' if _is_couch(pillow) else 'kafka', 'host': 'celery', 'group': 'celery' } metrics_gauge('commcare.change_feed.seconds_since_last_update', pillow['seconds_since_last'], tags=tags, multiprocess_mode=MPM_MIN) for topic_name, offset in pillow['offsets'].items(): if _is_couch(pillow): tags_with_topic = {**tags, 'topic': topic_name} processed_offset = pillow['seq'] else: if not pillow['seq']: # this pillow has never been initialized. # (custom pillows on most environments) continue topic, partition = topic_name.split(',') tags_with_topic = { **tags, 'topic': '{}-{}'.format(topic, partition) } processed_offset = pillow['seq'][topic_name] if processed_offset == 0: # assume if nothing has been processed that this pillow is not # supposed to be running continue metrics_gauge('commcare.change_feed.current_offsets', offset, tags=tags_with_topic, multiprocess_mode=MPM_MAX) metrics_gauge('commcare.change_feed.processed_offsets', processed_offset, tags=tags_with_topic, multiprocess_mode=MPM_MAX) needs_processing = offset - processed_offset metrics_gauge('commcare.change_feed.need_processing', needs_processing, tags=tags_with_topic, multiprocess_mode=MPM_MAX)
def system_ajax(request): """ Utility ajax functions for polling couch and celerymon """ type = request.GET.get('api', None) task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 12) celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) db = XFormInstance.get_db() if type == "_active_tasks": try: tasks = [x for x in db.server.active_tasks() if x['type'] == "indexer"] except HTTPError as e: if e.response.status_code == 403: return JsonResponse({'error': "Unable to access CouchDB Tasks (unauthorized)."}, status_code=500) else: return JsonResponse({'error': "Unable to access CouchDB Tasks."}, status_code=500) if not is_bigcouch(): return JsonResponse(tasks, safe=False) else: # group tasks by design doc task_map = defaultdict(dict) for task in tasks: meta = task_map[task['design_document']] tasks = meta.get('tasks', []) tasks.append(task) meta['tasks'] = tasks design_docs = [] for dd, meta in task_map.items(): meta['design_document'] = dd[len('_design/'):] total_changes = sum(task['total_changes'] for task in meta['tasks']) for task in meta['tasks']: task['progress_contribution'] = task['changes_done'] * 100 // total_changes design_docs.append(meta) return JsonResponse(design_docs, safe=False) elif type == "_stats": return JsonResponse({}) elif type == "_logs": pass elif type == 'pillowtop': pillow_meta = get_all_pillows_json() return JsonResponse(sorted(pillow_meta, key=lambda m: m['name'].lower()), safe=False) elif type == 'stale_pillows': es_index_status = [ escheck.check_case_es_index(interval=3), escheck.check_xform_es_index(interval=3), escheck.check_reportcase_es_index(interval=3), escheck.check_reportxform_es_index(interval=3) ] return JsonResponse(es_index_status, safe=False) if celery_monitoring: if type == "flower_poll": ret = [] try: all_tasks = requests.get( celery_monitoring + '/api/tasks', params={'limit': task_limit}, timeout=3, ).json() except Exception as ex: return JsonResponse({'error': "Error with getting from celery_flower: %s" % ex}, status_code=500) for task_id, traw in all_tasks.items(): # it's an array of arrays - looping through [<id>, {task_info_dict}] if 'name' in traw and traw['name']: traw['name'] = '.'.join(traw['name'].split('.')[-2:]) else: traw['name'] = None ret.append(traw) ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True) return HttpResponse(json.dumps(ret), content_type='application/json') return HttpResponse('{}', content_type='application/json')
def system_ajax(request): """ Utility ajax functions for polling couch and celerymon """ type = request.GET.get('api', None) task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 12) celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) db = XFormInstance.get_db() if type == "_active_tasks": try: tasks = filter(lambda x: x['type'] == "indexer", db.server.active_tasks()) except Unauthorized: return json_response({'error': "Unable to access CouchDB Tasks (unauthorized)."}, status_code=500) if not is_bigcouch(): return json_response(tasks) else: # group tasks by design doc task_map = defaultdict(dict) for task in tasks: meta = task_map[task['design_document']] tasks = meta.get('tasks', []) tasks.append(task) meta['tasks'] = tasks design_docs = [] for dd, meta in task_map.items(): meta['design_document'] = dd[len('_design/'):] total_changes = sum(task['total_changes'] for task in meta['tasks']) for task in meta['tasks']: task['progress_contribution'] = task['changes_done'] * 100 / total_changes design_docs.append(meta) return json_response(design_docs) elif type == "_stats": return json_response({}) elif type == "_logs": pass elif type == 'pillowtop': pillow_meta = get_all_pillows_json() supervisor_status = all_pillows_supervisor_status([meta['name'] for meta in pillow_meta]) for meta in pillow_meta: meta.update(supervisor_status[meta['name']]) return json_response(sorted(pillow_meta, key=lambda m: m['name'])) elif type == 'stale_pillows': es_index_status = [ check_case_es_index(interval=3), check_xform_es_index(interval=3), check_reportcase_es_index(interval=3), check_reportxform_es_index(interval=3) ] return json_response(es_index_status) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) if type == "flower_poll": ret = [] try: t = cresource.get("api/tasks", params_dict={'limit': task_limit}).body_string() all_tasks = json.loads(t) except Exception, ex: return json_response({'error': "Error with getting from celery_flower: %s" % ex}, status_code=500) for task_id, traw in all_tasks.items(): # it's an array of arrays - looping through [<id>, {task_info_dict}] if 'name' in traw and traw['name']: traw['name'] = '.'.join(traw['name'].split('.')[-2:]) else: traw['name'] = None ret.append(traw) ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True) return HttpResponse(json.dumps(ret), content_type='application/json')