def page_context(self): environment = settings.SERVER_ENVIRONMENT context = get_hqadmin_base_context(self.request) context['couch_update'] = self.request.GET.get('couch_update', 5000) context['celery_update'] = self.request.GET.get('celery_update', 10000) context['db_update'] = self.request.GET.get('db_update', 30000) context['self.request'] = getattr(settings, 'CELERY_FLOWER_URL', None) context['is_bigcouch'] = is_bigcouch() context['rabbitmq_url'] = get_rabbitmq_management_url() context['hide_filters'] = True context['current_system'] = socket.gethostname() context['deploy_history'] = HqDeploy.get_latest(environment, limit=5) context['user_is_support'] = hasattr(self.request, 'user') and SUPPORT.enabled(self.request.user.username) context['redis'] = service_checks.check_redis() context['rabbitmq'] = service_checks.check_rabbitmq() context['celery_stats'] = get_celery_stats() context['heartbeat'] = service_checks.check_heartbeat() context['elastic'] = escheck.check_es_cluster_health() return context
def page_context(self): environment = settings.SERVER_ENVIRONMENT context = get_hqadmin_base_context(self.request) context['couch_update'] = self.request.GET.get('couch_update', 5000) context['celery_update'] = self.request.GET.get('celery_update', 10000) context['db_update'] = self.request.GET.get('db_update', 30000) context['self.request'] = getattr(settings, 'CELERY_FLOWER_URL', None) context['is_bigcouch'] = is_bigcouch() context['rabbitmq_url'] = get_rabbitmq_management_url() context['hide_filters'] = True context['current_system'] = socket.gethostname() context['deploy_history'] = HqDeploy.objects.filter(environment=environment)[:5] context['user_is_support'] = hasattr(self.request, 'user') and SUPPORT.enabled(self.request.user.username) context['redis'] = service_checks.check_redis() context['rabbitmq'] = service_checks.check_rabbitmq(settings.CELERY_BROKER_URL) context['celery_stats'] = get_celery_stats() context['heartbeat'] = service_checks.check_heartbeat() context['cluster_health'] = escheck.check_es_cluster_health() return context
def system_ajax(request): """ Utility ajax functions for polling couch and celerymon """ type = request.GET.get('api', None) task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 12) celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) db = XFormInstance.get_db() if type == "_active_tasks": tasks = [] if is_bigcouch() else filter( lambda x: x['type'] == "indexer", db.server.active_tasks()) #for reference structure is: # tasks = [{'type': 'indexer', 'pid': 'foo', 'database': 'mock', # 'design_document': 'mockymock', 'progress': 0, # 'started_on': 1349906040.723517, 'updated_on': 1349905800.679458, # 'total_changes': 1023}, # {'type': 'indexer', 'pid': 'foo', 'database': 'mock', # 'design_document': 'mockymock', 'progress': 70, # 'started_on': 1349906040.723517, 'updated_on': 1349905800.679458, # 'total_changes': 1023}] return json_response(tasks) elif type == "_stats": return json_response({}) elif type == "_logs": pass elif type == 'pillowtop': return json_response(get_all_pillows_json()) elif type == 'stale_pillows': es_index_status = [ check_case_es_index(interval=3), check_xform_es_index(interval=3), check_reportcase_es_index(interval=3), check_reportxform_es_index(interval=3) ] return json_response(es_index_status) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) if type == "flower_poll": ret = [] try: t = cresource.get("api/tasks", params_dict={ 'limit': task_limit }).body_string() all_tasks = json.loads(t) except Exception, ex: all_tasks = {} logging.error("Error with getting from celery_flower: %s" % ex) for task_id, traw in all_tasks.items(): # it's an array of arrays - looping through [<id>, {task_info_dict}] if 'name' in traw and traw['name']: traw['name'] = '.'.join(traw['name'].split('.')[-2:]) else: traw['name'] = None ret.append(traw) ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True) return HttpResponse(json.dumps(ret), mimetype='application/json')
def reminders_in_error(request, domain): handler_map = {} if request.method == "POST": form = RemindersInErrorForm(request.POST) if form.is_valid(): kwargs = {} if is_bigcouch(): # Force a write to all nodes before returning kwargs["w"] = bigcouch_quorum_count() current_timestamp = datetime.utcnow() for reminder_id in form.cleaned_data.get("selected_reminders"): reminder = CaseReminder.get(reminder_id) if reminder.domain != domain: continue if reminder.handler_id in handler_map: handler = handler_map[reminder.handler_id] else: handler = reminder.handler handler_map[reminder.handler_id] = handler reminder.error = False reminder.error_msg = None handler.set_next_fire(reminder, current_timestamp) reminder.save(**kwargs) timezone = report_utils.get_timezone(request.couch_user.user_id, domain) reminders = [] for reminder in CaseReminder.view("reminders/reminders_in_error", startkey=[domain], endkey=[domain, {}], include_docs=True).all(): if reminder.handler_id in handler_map: handler = handler_map[reminder.handler_id] else: handler = reminder.handler handler_map[reminder.handler_id] = handler recipient = reminder.recipient case = reminder.case reminders.append({ "reminder_id": reminder._id, "handler_id": reminder.handler_id, "handler_name": handler.nickname, "case_id": case.get_id if case is not None else None, "case_name": case.name if case is not None else None, "next_fire": tz_utils.adjust_datetime_to_timezone( reminder.next_fire, pytz.utc.zone, timezone.zone).strftime("%Y-%m-%d %H:%M:%S"), "error_msg": reminder.error_msg, "recipient_name": get_recipient_name(recipient), })
def system_ajax(request): """ Utility ajax functions for polling couch and celerymon """ type = request.GET.get('api', None) task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 12) celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) db = XFormInstance.get_db() if type == "_active_tasks": tasks = [] if is_bigcouch() else filter(lambda x: x['type'] == "indexer", db.server.active_tasks()) #for reference structure is: # tasks = [{'type': 'indexer', 'pid': 'foo', 'database': 'mock', # 'design_document': 'mockymock', 'progress': 0, # 'started_on': 1349906040.723517, 'updated_on': 1349905800.679458, # 'total_changes': 1023}, # {'type': 'indexer', 'pid': 'foo', 'database': 'mock', # 'design_document': 'mockymock', 'progress': 70, # 'started_on': 1349906040.723517, 'updated_on': 1349905800.679458, # 'total_changes': 1023}] return json_response(tasks) elif type == "_stats": return json_response({}) elif type == "_logs": pass elif type == 'pillowtop': return json_response(get_all_pillows_json()) elif type == 'stale_pillows': es_index_status = [ check_case_es_index(interval=3), check_xform_es_index(interval=3), check_reportcase_es_index(interval=3), check_reportxform_es_index(interval=3) ] return json_response(es_index_status) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) if type == "flower_poll": ret = [] try: t = cresource.get("api/tasks", params_dict={'limit': task_limit}).body_string() all_tasks = json.loads(t) except Exception, ex: all_tasks = {} logging.error("Error with getting from celery_flower: %s" % ex) for task_id, traw in all_tasks.items(): # it's an array of arrays - looping through [<id>, {task_info_dict}] if 'name' in traw and traw['name']: traw['name'] = '.'.join(traw['name'].split('.')[-2:]) else: traw['name'] = None ret.append(traw) ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True) return HttpResponse(json.dumps(ret), mimetype = 'application/json')
def reminders_in_error(request, domain): handler_map = {} if request.method == "POST": form = RemindersInErrorForm(request.POST) if form.is_valid(): kwargs = {} if is_bigcouch(): # Force a write to all nodes before returning kwargs["w"] = bigcouch_quorum_count() current_timestamp = datetime.utcnow() for reminder_id in form.cleaned_data.get("selected_reminders"): reminder = CaseReminder.get(reminder_id) if reminder.domain != domain: continue if reminder.handler_id in handler_map: handler = handler_map[reminder.handler_id] else: handler = reminder.handler handler_map[reminder.handler_id] = handler reminder.error = False reminder.error_msg = None handler.set_next_fire(reminder, current_timestamp) reminder.save(**kwargs) timezone = report_utils.get_timezone(request.couch_user.user_id, domain) reminders = [] for reminder in CaseReminder.view("reminders/reminders_in_error", startkey=[domain], endkey=[domain, {}], include_docs=True).all(): if reminder.handler_id in handler_map: handler = handler_map[reminder.handler_id] else: handler = reminder.handler handler_map[reminder.handler_id] = handler recipient = reminder.recipient case = reminder.case reminders.append({ "reminder_id" : reminder._id, "handler_id" : reminder.handler_id, "handler_name" : handler.nickname, "case_id" : case.get_id if case is not None else None, "case_name" : case.name if case is not None else None, "next_fire" : tz_utils.adjust_datetime_to_timezone(reminder.next_fire, pytz.utc.zone, timezone.zone).strftime("%Y-%m-%d %H:%M:%S"), "error_msg" : reminder.error_msg, "recipient_name" : get_recipient_name(recipient), }) context = { "domain" : domain, "reminders" : reminders, "timezone" : timezone, "timezone_now" : datetime.now(tz=timezone), } return render(request, "reminders/partial/reminders_in_error.html", context)
def system_ajax(request): """ Utility ajax functions for polling couch and celerymon """ type = request.GET.get("api", None) task_limit = getattr(settings, "CELERYMON_TASK_LIMIT", 5) celerymon_url = getattr(settings, "CELERYMON_URL", "") db = XFormInstance.get_db() ret = {} if type == "_active_tasks": tasks = [] if is_bigcouch() else filter(lambda x: x["type"] == "indexer", db.server.active_tasks()) # for reference structure is: # tasks = [{'type': 'indexer', 'pid': 'foo', 'database': 'mock', # 'design_document': 'mockymock', 'progress': 0, # 'started_on': 1349906040.723517, 'updated_on': 1349905800.679458, # 'total_changes': 1023}, # {'type': 'indexer', 'pid': 'foo', 'database': 'mock', # 'design_document': 'mockymock', 'progress': 70, # 'started_on': 1349906040.723517, 'updated_on': 1349905800.679458, # 'total_changes': 1023}] return HttpResponse(json.dumps(tasks), mimetype="application/json") elif type == "_stats": return HttpResponse(json.dumps({}), mimetype="application/json") elif type == "_logs": pass if celerymon_url != "": cresource = Resource(celerymon_url, timeout=3) if type == "celerymon_poll": # inefficient way to just get everything in one fell swoop # first, get all task types: ret = [] try: t = cresource.get("api/task/name/").body_string() task_names = json.loads(t) except Exception, ex: task_names = [] t = {} logging.error("Error with getting celerymon: %s" % ex) for tname in task_names: taskinfo_raw = json.loads( cresource.get("api/task/name/%s" % (tname), params_dict={"limit": task_limit}).body_string() ) for traw in taskinfo_raw: # it's an array of arrays - looping through [<id>, {task_info_dict}] tinfo = traw[1] tinfo["name"] = ".".join(tinfo["name"].split(".")[-2:]) ret.append(tinfo) ret = sorted(ret, key=lambda x: x["succeeded"], reverse=True) return HttpResponse(json.dumps(ret), mimetype="application/json")
def system_ajax(request): """ Utility ajax functions for polling couch and celerymon """ type = request.GET.get('api', None) task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 5) celerymon_url = getattr(settings, 'CELERYMON_URL', '') db = XFormInstance.get_db() ret = {} if type == "_active_tasks": tasks = [] if is_bigcouch() else filter(lambda x: x['type'] == "indexer", db.server.active_tasks()) #for reference structure is: # tasks = [{'type': 'indexer', 'pid': 'foo', 'database': 'mock', # 'design_document': 'mockymock', 'progress': 0, # 'started_on': 1349906040.723517, 'updated_on': 1349905800.679458, # 'total_changes': 1023}, # {'type': 'indexer', 'pid': 'foo', 'database': 'mock', # 'design_document': 'mockymock', 'progress': 70, # 'started_on': 1349906040.723517, 'updated_on': 1349905800.679458, # 'total_changes': 1023}] return HttpResponse(json.dumps(tasks), mimetype='application/json') elif type == "_stats": return HttpResponse(json.dumps({}), mimetype = 'application/json') elif type == "_logs": pass if celerymon_url != '': cresource = Resource(celerymon_url, timeout=3) if type == "celerymon_poll": #inefficient way to just get everything in one fell swoop #first, get all task types: ret = [] try: t = cresource.get("api/task/name/").body_string() task_names = json.loads(t) except Exception, ex: task_names = [] t = {} logging.error("Error with getting celerymon: %s" % ex) for tname in task_names: taskinfo_raw = json.loads(cresource.get('api/task/name/%s' % (tname), params_dict={'limit': task_limit}).body_string()) for traw in taskinfo_raw: # it's an array of arrays - looping through [<id>, {task_info_dict}] tinfo = traw[1] tinfo['name'] = '.'.join(tinfo['name'].split('.')[-2:]) ret.append(tinfo) ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True) return HttpResponse(json.dumps(ret), mimetype = 'application/json')
def get_search_params(self): # the difference is: # on couch lucene: /[db]/_fti/_design/[ddoc]/[search view] # on cloudant: /[db]/_design/[ddoc]/_search/[search view] # this magic combination of args makes it work for each one in couchdbkit if is_bigcouch(): ddoc, view = self._search_view.split("/") return { 'view_name': '%s/_search/%s' % (ddoc, view), 'handler': "_design", } else: return { 'view_name': self._search_view, 'handler': "_fti/_design", }
def system_info(request): def human_bytes(bytes): # source: https://github.com/bartTC/django-memcache-status bytes = float(bytes) if bytes >= 1073741824: gigabytes = bytes / 1073741824 size = "%.2fGB" % gigabytes elif bytes >= 1048576: megabytes = bytes / 1048576 size = "%.2fMB" % megabytes elif bytes >= 1024: kilobytes = bytes / 1024 size = "%.2fKB" % kilobytes else: size = "%.2fB" % bytes return size context = get_hqadmin_base_context(request) context["couch_update"] = request.GET.get("couch_update", 5000) context["celery_update"] = request.GET.get("celery_update", 10000) context["hide_filters"] = True if hasattr(os, "uname"): context["current_system"] = os.uname()[1] # from dimagi.utils import gitinfo # context['current_ref'] = gitinfo.get_project_info() # removing until the async library is updated context["current_ref"] = {} if settings.COUCH_USERNAME == "" and settings.COUCH_PASSWORD == "": couchlog_resource = Resource("http://%s/" % (settings.COUCH_SERVER_ROOT)) else: couchlog_resource = Resource( "http://%s:%s@%s/" % (settings.COUCH_USERNAME, settings.COUCH_PASSWORD, settings.COUCH_SERVER_ROOT) ) try: # todo, fix on bigcouch/cloudant context["couch_log"] = ( "Will be back online shortly" if is_bigcouch() else couchlog_resource.get("_log", params_dict={"bytes": 2000}).body_string() ) except Exception, ex: context["couch_log"] = "unable to open couch log: %s" % ex
def system_info(request): def human_bytes(bytes): #source: https://github.com/bartTC/django-memcache-status bytes = float(bytes) if bytes >= 1073741824: gigabytes = bytes / 1073741824 size = '%.2fGB' % gigabytes elif bytes >= 1048576: megabytes = bytes / 1048576 size = '%.2fMB' % megabytes elif bytes >= 1024: kilobytes = bytes / 1024 size = '%.2fKB' % kilobytes else: size = '%.2fB' % bytes return size context = get_hqadmin_base_context(request) context['couch_update'] = request.GET.get('couch_update', 5000) context['celery_update'] = request.GET.get('celery_update', 10000) context['hide_filters'] = True if hasattr(os, 'uname'): context['current_system'] = os.uname()[1] #from dimagi.utils import gitinfo #context['current_ref'] = gitinfo.get_project_info() #removing until the async library is updated context['current_ref'] = {} if settings.COUCH_USERNAME == '' and settings.COUCH_PASSWORD == '': couchlog_resource = Resource("http://%s/" % (settings.COUCH_SERVER_ROOT)) else: couchlog_resource = Resource("http://%s:%s@%s/" % (settings.COUCH_USERNAME, settings.COUCH_PASSWORD, settings.COUCH_SERVER_ROOT)) try: #todo, fix on bigcouch/cloudant context['couch_log'] = "Will be back online shortly" if is_bigcouch() \ else couchlog_resource.get('_log', params_dict={'bytes': 2000 }).body_string() except Exception, ex: context['couch_log'] = "unable to open couch log: %s" % ex
def system_info(request): environment = settings.SERVER_ENVIRONMENT context = get_hqadmin_base_context(request) context['couch_update'] = request.GET.get('couch_update', 5000) context['celery_update'] = request.GET.get('celery_update', 10000) context['db_update'] = request.GET.get('db_update', 30000) context['celery_flower_url'] = getattr(settings, 'CELERY_FLOWER_URL', None) context['is_bigcouch'] = is_bigcouch() context['rabbitmq_url'] = get_rabbitmq_management_url() context['hide_filters'] = True context['current_system'] = socket.gethostname() context['deploy_history'] = HqDeploy.get_latest(environment, limit=5) context.update(check_redis()) context.update(check_rabbitmq()) context.update(check_celery_health()) context.update(check_es_cluster_health()) return render(request, "hqadmin/system_info.html", context)
def system_ajax(request): """ Utility ajax functions for polling couch and celerymon """ type = request.GET.get('api', None) task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 12) celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) db = XFormInstance.get_db() if type == "_active_tasks": try: tasks = filter(lambda x: x['type'] == "indexer", db.server.active_tasks()) except Unauthorized: return json_response({'error': "Unable to access CouchDB Tasks (unauthorized)."}, status_code=500) if not is_bigcouch(): return json_response(tasks) else: # group tasks by design doc task_map = defaultdict(dict) for task in tasks: meta = task_map[task['design_document']] tasks = meta.get('tasks', []) tasks.append(task) meta['tasks'] = tasks design_docs = [] for dd, meta in task_map.items(): meta['design_document'] = dd[len('_design/'):] total_changes = sum(task['total_changes'] for task in meta['tasks']) for task in meta['tasks']: task['progress_contribution'] = task['changes_done'] * 100 / total_changes design_docs.append(meta) return json_response(design_docs) elif type == "_stats": return json_response({}) elif type == "_logs": pass elif type == 'pillowtop': pillow_meta = get_all_pillows_json() supervisor_status = all_pillows_supervisor_status([meta['name'] for meta in pillow_meta]) for meta in pillow_meta: meta.update(supervisor_status[meta['name']]) return json_response(sorted(pillow_meta, key=lambda m: m['name'])) elif type == 'stale_pillows': es_index_status = [ check_case_es_index(interval=3), check_xform_es_index(interval=3), check_reportcase_es_index(interval=3), check_reportxform_es_index(interval=3) ] return json_response(es_index_status) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) if type == "flower_poll": ret = [] try: t = cresource.get("api/tasks", params_dict={'limit': task_limit}).body_string() all_tasks = json.loads(t) except Exception, ex: return json_response({'error': "Error with getting from celery_flower: %s" % ex}, status_code=500) for task_id, traw in all_tasks.items(): # it's an array of arrays - looping through [<id>, {task_info_dict}] if 'name' in traw and traw['name']: traw['name'] = '.'.join(traw['name'].split('.')[-2:]) else: traw['name'] = None ret.append(traw) ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True) return HttpResponse(json.dumps(ret), content_type='application/json')
def save(self, *args, **kwargs): if is_bigcouch() and "w" not in kwargs: # Force a write to all nodes before returning kwargs["w"] = bigcouch_quorum_count() return super(XFormsSession, self).save(*args, **kwargs)
def do_import(spreadsheet, config, domain, task=None, chunksize=CASEBLOCK_CHUNKSIZE): if not spreadsheet: return {"errors": "EXPIRED"} if spreadsheet.has_errors: return {"errors": "HAS_ERRORS"} row_count = spreadsheet.get_num_rows() columns = spreadsheet.get_header_columns() match_count = created_count = too_many_matches = num_chunks = 0 errors = importer_util.ImportErrorDetail() prime_offset = 1 # used to prevent back-to-back priming user = CouchUser.get_by_user_id(config.couch_user_id, domain) username = user.username user_id = user._id # keep a cache of id lookup successes to help performance id_cache = {} name_cache = {} caseblocks = [] ids_seen = set() def _submit_caseblocks(caseblocks): if caseblocks: submit_case_blocks([ElementTree.tostring(cb.as_xml()) for cb in caseblocks], domain, username, user_id) for i in range(row_count): if task: DownloadBase.set_progress(task, i, row_count) # skip first row if it is a header field if i == 0 and config.named_columns: continue if not is_bigcouch(): priming_progress = match_count + created_count + prime_offset if priming_progress % PRIME_VIEW_FREQUENCY == 0: prime_views(POOL_SIZE) # increment so we can't possibly prime on next iteration prime_offset += 1 row = spreadsheet.get_row(i) search_id = importer_util.parse_search_id(config, columns, row) if config.search_field == "external_id" and not search_id: # do not allow blank external id since we save this errors.add(ImportErrors.BlankExternalId, i + 1) continue try: fields_to_update = importer_util.populate_updated_fields( config, columns, row, spreadsheet.workbook.datemode ) if not any(fields_to_update.values()): # if the row was blank, just skip it, no errors continue except importer_util.InvalidDateException: errors.add(ImportErrors.InvalidDate, i + 1) continue external_id = fields_to_update.pop("external_id", None) parent_id = fields_to_update.pop("parent_id", None) parent_external_id = fields_to_update.pop("parent_external_id", None) parent_type = fields_to_update.pop("parent_type", config.case_type) parent_ref = fields_to_update.pop("parent_ref", "parent") to_close = fields_to_update.pop("close", False) if any([lookup_id and lookup_id in ids_seen for lookup_id in [search_id, parent_id, parent_external_id]]): # clear out the queue to make sure we've processed any potential # cases we want to look up # note: these three lines are repeated a few places, and could be converted # to a function that makes use of closures (and globals) to do the same thing, # but that seems sketchier than just beeing a little RY _submit_caseblocks(caseblocks) num_chunks += 1 caseblocks = [] ids_seen = set() # also clear ids_seen, since all the cases will now be in the database case, error = importer_util.lookup_case(config.search_field, search_id, domain, config.case_type) if case: if case.type != config.case_type: continue elif error == LookupErrors.NotFound: if not config.create_new_cases: continue elif error == LookupErrors.MultipleResults: too_many_matches += 1 continue uploaded_owner_name = fields_to_update.pop("owner_name", None) uploaded_owner_id = fields_to_update.pop("owner_id", None) if uploaded_owner_name: # If an owner name was provided, replace the provided # uploaded_owner_id with the id of the provided group or owner try: uploaded_owner_id = importer_util.get_id_from_name(uploaded_owner_name, domain, name_cache) except SQLLocation.MultipleObjectsReturned: errors.add(ImportErrors.DuplicateLocationName, i + 1) continue if not uploaded_owner_id: errors.add(ImportErrors.InvalidOwnerName, i + 1) continue if uploaded_owner_id: # If an owner_id mapping exists, verify it is a valid user # or case sharing group if importer_util.is_valid_id(uploaded_owner_id, domain, id_cache): owner_id = uploaded_owner_id id_cache[uploaded_owner_id] = True else: errors.add(ImportErrors.InvalidOwnerId, i + 1) id_cache[uploaded_owner_id] = False continue else: # if they didn't supply an owner_id mapping, default to current # user owner_id = user_id extras = {} if parent_id: try: parent_case = CommCareCase.get(parent_id) if parent_case.domain == domain: extras["index"] = {parent_ref: (parent_case.type, parent_id)} except ResourceNotFound: errors.add(ImportErrors.InvalidParentId, i + 1) continue elif parent_external_id: parent_case, error = importer_util.lookup_case("external_id", parent_external_id, domain, parent_type) if parent_case: extras["index"] = {parent_ref: (parent_type, parent_case._id)} if not case: id = uuid.uuid4().hex if config.search_field == "external_id": extras["external_id"] = search_id try: caseblock = CaseBlock( create=True, case_id=id, owner_id=owner_id, user_id=user_id, case_type=config.case_type, update=fields_to_update, **extras ) caseblocks.append(caseblock) created_count += 1 if external_id: ids_seen.add(external_id) except CaseBlockError: errors.add(ImportErrors.CaseGeneration, i + 1) else: if external_id: extras["external_id"] = external_id if uploaded_owner_id: extras["owner_id"] = owner_id if to_close == "yes": extras["close"] = True try: caseblock = CaseBlock(create=False, case_id=case._id, update=fields_to_update, **extras) caseblocks.append(caseblock) match_count += 1 except CaseBlockError: errors.add(ImportErrors.CaseGeneration, i + 1) # check if we've reached a reasonable chunksize # and if so submit if len(caseblocks) >= chunksize: _submit_caseblocks(caseblocks) num_chunks += 1 caseblocks = [] # final purge of anything left in the queue _submit_caseblocks(caseblocks) num_chunks += 1 return { "created_count": created_count, "match_count": match_count, "too_many_matches": too_many_matches, "errors": errors.as_dict(), "num_chunks": num_chunks, }
def do_import(spreadsheet, config, domain, task=None, chunksize=CASEBLOCK_CHUNKSIZE): if not spreadsheet: return {'error': 'EXPIRED'} if spreadsheet.has_errors: return {'error': 'HAS_ERRORS'} row_count = spreadsheet.get_num_rows() columns = spreadsheet.get_header_columns() match_count = created_count = too_many_matches = errors = num_chunks = 0 blank_external_ids = [] invalid_dates = [] owner_id_errors = [] prime_offset = 1 # used to prevent back-to-back priming user = CouchUser.get_by_user_id(config.couch_user_id, domain) username = user.username user_id = user._id # keep a cache of id lookup successes to help performance id_cache = {} caseblocks = [] ids_seen = set() def _submit_caseblocks(caseblocks): if caseblocks: submit_case_blocks( [ ElementTree.tostring( cb.as_xml(format_datetime=json_format_datetime)) for cb in caseblocks ], domain, username, user_id, ) for i in range(row_count): if task: DownloadBase.set_progress(task, i, row_count) # skip first row if it is a header field if i == 0 and config.named_columns: continue if not is_bigcouch(): priming_progress = match_count + created_count + prime_offset if priming_progress % PRIME_VIEW_FREQUENCY == 0: prime_views(POOL_SIZE) # increment so we can't possibly prime on next iteration prime_offset += 1 row = spreadsheet.get_row(i) search_id = importer_util.parse_search_id(config, columns, row) if config.search_field == 'external_id' and not search_id: # do not allow blank external id since we save this blank_external_ids.append(i + 1) continue try: fields_to_update = importer_util.populate_updated_fields( config, columns, row) if not any(fields_to_update.values()): # if the row was blank, just skip it, no errors continue except importer_util.InvalidDateException: invalid_dates.append(i + 1) continue external_id = fields_to_update.pop('external_id', None) parent_id = fields_to_update.pop('parent_id', None) parent_external_id = fields_to_update.pop('parent_external_id', None) parent_type = fields_to_update.pop('parent_type', config.case_type) parent_ref = fields_to_update.pop('parent_ref', 'parent') to_close = fields_to_update.pop('close', False) if any([ lookup_id and lookup_id in ids_seen for lookup_id in [search_id, parent_id, parent_external_id] ]): # clear out the queue to make sure we've processed any potential # cases we want to look up # note: these three lines are repeated a few places, and could be converted # to a function that makes use of closures (and globals) to do the same thing, # but that seems sketchier than just beeing a little RY _submit_caseblocks(caseblocks) num_chunks += 1 caseblocks = [] ids_seen = set( ) # also clear ids_seen, since all the cases will now be in the database case, error = importer_util.lookup_case(config.search_field, search_id, domain, config.case_type) if case: if case.type != config.case_type: continue elif error == LookupErrors.NotFound: if not config.create_new_cases: continue
def do_import(spreadsheet, config, domain, task=None, chunksize=CASEBLOCK_CHUNKSIZE): if not spreadsheet: return {'error': 'EXPIRED'} if spreadsheet.has_errors: return {'error': 'HAS_ERRORS'} row_count = spreadsheet.get_num_rows() columns = spreadsheet.get_header_columns() match_count = created_count = too_many_matches = errors = num_chunks = 0 blank_external_ids = [] invalid_dates = [] owner_id_errors = [] prime_offset = 1 # used to prevent back-to-back priming user = CouchUser.get_by_user_id(config.couch_user_id, domain) username = user.username user_id = user._id # keep a cache of id lookup successes to help performance id_cache = {} caseblocks = [] ids_seen = set() def _submit_caseblocks(caseblocks): if caseblocks: submit_case_blocks( [ElementTree.tostring(cb.as_xml(format_datetime=json_format_datetime)) for cb in caseblocks], domain, username, user_id, ) for i in range(row_count): if task: DownloadBase.set_progress(task, i, row_count) # skip first row if it is a header field if i == 0 and config.named_columns: continue if not is_bigcouch(): priming_progress = match_count + created_count + prime_offset if priming_progress % PRIME_VIEW_FREQUENCY == 0: prime_views(POOL_SIZE) # increment so we can't possibly prime on next iteration prime_offset += 1 row = spreadsheet.get_row(i) search_id = importer_util.parse_search_id(config, columns, row) if config.search_field == 'external_id' and not search_id: # do not allow blank external id since we save this blank_external_ids.append(i + 1) continue try: fields_to_update = importer_util.populate_updated_fields( config, columns, row ) if not any(fields_to_update.values()): # if the row was blank, just skip it, no errors continue except importer_util.InvalidDateException: invalid_dates.append(i + 1) continue external_id = fields_to_update.pop('external_id', None) parent_id = fields_to_update.pop('parent_id', None) parent_external_id = fields_to_update.pop('parent_external_id', None) parent_type = fields_to_update.pop('parent_type', config.case_type) parent_ref = fields_to_update.pop('parent_ref', 'parent') to_close = fields_to_update.pop('close', False) if any([lookup_id and lookup_id in ids_seen for lookup_id in [search_id, parent_id, parent_external_id]]): # clear out the queue to make sure we've processed any potential # cases we want to look up # note: these three lines are repeated a few places, and could be converted # to a function that makes use of closures (and globals) to do the same thing, # but that seems sketchier than just beeing a little RY _submit_caseblocks(caseblocks) num_chunks += 1 caseblocks = [] ids_seen = set() # also clear ids_seen, since all the cases will now be in the database case, error = importer_util.lookup_case( config.search_field, search_id, domain, config.case_type ) if case: if case.type != config.case_type: continue elif error == LookupErrors.NotFound: if not config.create_new_cases: continue elif error == LookupErrors.MultipleResults: too_many_matches += 1 continue uploaded_owner_id = fields_to_update.pop('owner_id', None) if uploaded_owner_id: # If an owner_id mapping exists, verify it is a valid user # or case sharing group if importer_util.is_valid_id(uploaded_owner_id, domain, id_cache): owner_id = uploaded_owner_id id_cache[uploaded_owner_id] = True else: owner_id_errors.append(i + 1) id_cache[uploaded_owner_id] = False continue else: # if they didn't supply an owner_id mapping, default to current # user owner_id = user_id extras = {} if parent_id: try: parent_case = CommCareCase.get(parent_id) if parent_case.domain == domain: extras['index'] = { parent_ref: (parent_case.type, parent_id) } except ResourceNotFound: continue elif parent_external_id: parent_case, error = importer_util.lookup_case( 'external_id', parent_external_id, domain, parent_type ) if parent_case: extras['index'] = { parent_ref: (parent_type, parent_case._id) } if not case: id = uuid.uuid4().hex if config.search_field == 'external_id': extras['external_id'] = search_id try: caseblock = CaseBlock( create=True, case_id=id, version=V2, owner_id=owner_id, user_id=user_id, case_type=config.case_type, update=fields_to_update, **extras ) caseblocks.append(caseblock) created_count += 1 if external_id: ids_seen.add(external_id) except CaseBlockError: errors += 1 else: if external_id: extras['external_id'] = external_id if uploaded_owner_id: extras['owner_id'] = owner_id if to_close == 'yes': extras['close'] = True try: caseblock = CaseBlock( create=False, case_id=case._id, version=V2, update=fields_to_update, **extras ) caseblocks.append(caseblock) match_count += 1 except CaseBlockError: errors += 1 # check if we've reached a reasonable chunksize # and if so submit if len(caseblocks) >= chunksize: _submit_caseblocks(caseblocks) num_chunks += 1 caseblocks = [] # final purge of anything left in the queue _submit_caseblocks(caseblocks) num_chunks += 1 return { 'created_count': created_count, 'match_count': match_count, 'too_many_matches': too_many_matches, 'blank_externals': blank_external_ids, 'invalid_dates': invalid_dates, 'owner_id_errors': owner_id_errors, 'errors': errors, 'num_chunks': num_chunks, }
def system_ajax(request): """ Utility ajax functions for polling couch and celerymon """ type = request.GET.get("api", None) task_limit = getattr(settings, "CELERYMON_TASK_LIMIT", 12) celery_monitoring = getattr(settings, "CELERY_FLOWER_URL", None) db = XFormInstance.get_db() if type == "_active_tasks": try: tasks = filter(lambda x: x["type"] == "indexer", db.server.active_tasks()) except Unauthorized: return json_response({"error": "Unable to access CouchDB Tasks (unauthorized)."}, status_code=500) if not is_bigcouch(): return json_response(tasks) else: # group tasks by design doc task_map = defaultdict(dict) for task in tasks: meta = task_map[task["design_document"]] tasks = meta.get("tasks", []) tasks.append(task) meta["tasks"] = tasks design_docs = [] for dd, meta in task_map.items(): meta["design_document"] = dd[len("_design/") :] total_changes = sum(task["total_changes"] for task in meta["tasks"]) for task in meta["tasks"]: task["progress_contribution"] = task["changes_done"] * 100 / total_changes design_docs.append(meta) return json_response(design_docs) elif type == "_stats": return json_response({}) elif type == "_logs": pass elif type == "pillowtop": return json_response(get_all_pillows_json()) elif type == "stale_pillows": es_index_status = [ check_case_es_index(interval=3), check_xform_es_index(interval=3), check_reportcase_es_index(interval=3), check_reportxform_es_index(interval=3), ] return json_response(es_index_status) if celery_monitoring: cresource = Resource(celery_monitoring, timeout=3) if type == "flower_poll": ret = [] try: t = cresource.get("api/tasks", params_dict={"limit": task_limit}).body_string() all_tasks = json.loads(t) except Exception, ex: return json_response({"error": "Error with getting from celery_flower: %s" % ex}, status_code=500) for task_id, traw in all_tasks.items(): # it's an array of arrays - looping through [<id>, {task_info_dict}] if "name" in traw and traw["name"]: traw["name"] = ".".join(traw["name"].split(".")[-2:]) else: traw["name"] = None ret.append(traw) ret = sorted(ret, key=lambda x: x["succeeded"], reverse=True) return HttpResponse(json.dumps(ret), mimetype="application/json")
def do_import(spreadsheet, config, domain, task=None, chunksize=CASEBLOCK_CHUNKSIZE): row_count = spreadsheet.get_num_rows() columns = spreadsheet.get_header_columns() match_count = created_count = too_many_matches = num_chunks = 0 errors = importer_util.ImportErrorDetail() prime_offset = 1 # used to prevent back-to-back priming user = CouchUser.get_by_user_id(config.couch_user_id, domain) username = user.username user_id = user._id # keep a cache of id lookup successes to help performance id_cache = {} name_cache = {} caseblocks = [] ids_seen = set() def _submit_caseblocks(domain, case_type, caseblocks): err = False if caseblocks: try: form, cases = submit_case_blocks( [cb.as_string() for cb in caseblocks], domain, username, user_id, ) if form.is_error: errors.add(error=ImportErrors.ImportErrorMessage, row_number=form.problem) except Exception: err = True errors.add(error=ImportErrors.ImportErrorMessage, row_number=caseblocks[0]._id) else: properties = set().union(*map( lambda c: set(c.dynamic_case_properties().keys()), cases)) add_inferred_export_properties.delay( 'CaseImporter', domain, case_type, properties, ) return err for i in range(row_count): if task: DownloadBase.set_progress(task, i, row_count) # skip first row if it is a header field if i == 0 and config.named_columns: continue if not is_bigcouch(): priming_progress = match_count + created_count + prime_offset if priming_progress % PRIME_VIEW_FREQUENCY == 0: prime_views(POOL_SIZE) # increment so we can't possibly prime on next iteration prime_offset += 1 row = spreadsheet.get_row(i) search_id = importer_util.parse_search_id(config, columns, row) if config.search_field == 'external_id' and not search_id: # do not allow blank external id since we save this errors.add(ImportErrors.BlankExternalId, i + 1) continue try: fields_to_update = importer_util.populate_updated_fields( config, columns, row, spreadsheet.workbook.datemode) if not any(fields_to_update.values()): # if the row was blank, just skip it, no errors continue except importer_util.InvalidDateException as e: errors.add(ImportErrors.InvalidDate, i + 1, e.column) continue except importer_util.InvalidIntegerException as e: errors.add(ImportErrors.InvalidInteger, i + 1, e.column) continue external_id = fields_to_update.pop('external_id', None) parent_id = fields_to_update.pop('parent_id', None) parent_external_id = fields_to_update.pop('parent_external_id', None) parent_type = fields_to_update.pop('parent_type', config.case_type) parent_ref = fields_to_update.pop('parent_ref', 'parent') to_close = fields_to_update.pop('close', False) if any([ lookup_id and lookup_id in ids_seen for lookup_id in [search_id, parent_id, parent_external_id] ]): # clear out the queue to make sure we've processed any potential # cases we want to look up # note: these three lines are repeated a few places, and could be converted # to a function that makes use of closures (and globals) to do the same thing, # but that seems sketchier than just beeing a little RY _submit_caseblocks(domain, config.case_type, caseblocks) num_chunks += 1 caseblocks = [] ids_seen = set( ) # also clear ids_seen, since all the cases will now be in the database case, error = importer_util.lookup_case(config.search_field, search_id, domain, config.case_type) if case: if case.type != config.case_type: continue elif error == LookupErrors.NotFound: if not config.create_new_cases: continue
def system_ajax(request): """ Utility ajax functions for polling couch and celerymon """ type = request.GET.get('api', None) task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 12) celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None) db = XFormInstance.get_db() if type == "_active_tasks": try: tasks = [x for x in db.server.active_tasks() if x['type'] == "indexer"] except HTTPError as e: if e.response.status_code == 403: return JsonResponse({'error': "Unable to access CouchDB Tasks (unauthorized)."}, status_code=500) else: return JsonResponse({'error': "Unable to access CouchDB Tasks."}, status_code=500) if not is_bigcouch(): return JsonResponse(tasks, safe=False) else: # group tasks by design doc task_map = defaultdict(dict) for task in tasks: meta = task_map[task['design_document']] tasks = meta.get('tasks', []) tasks.append(task) meta['tasks'] = tasks design_docs = [] for dd, meta in task_map.items(): meta['design_document'] = dd[len('_design/'):] total_changes = sum(task['total_changes'] for task in meta['tasks']) for task in meta['tasks']: task['progress_contribution'] = task['changes_done'] * 100 // total_changes design_docs.append(meta) return JsonResponse(design_docs, safe=False) elif type == "_stats": return JsonResponse({}) elif type == "_logs": pass elif type == 'pillowtop': pillow_meta = get_all_pillows_json() return JsonResponse(sorted(pillow_meta, key=lambda m: m['name'].lower()), safe=False) elif type == 'stale_pillows': es_index_status = [ escheck.check_case_es_index(interval=3), escheck.check_xform_es_index(interval=3), escheck.check_reportcase_es_index(interval=3), escheck.check_reportxform_es_index(interval=3) ] return JsonResponse(es_index_status, safe=False) if celery_monitoring: if type == "flower_poll": ret = [] try: all_tasks = requests.get( celery_monitoring + '/api/tasks', params={'limit': task_limit}, timeout=3, ).json() except Exception as ex: return JsonResponse({'error': "Error with getting from celery_flower: %s" % ex}, status_code=500) for task_id, traw in all_tasks.items(): # it's an array of arrays - looping through [<id>, {task_info_dict}] if 'name' in traw and traw['name']: traw['name'] = '.'.join(traw['name'].split('.')[-2:]) else: traw['name'] = None ret.append(traw) ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True) return HttpResponse(json.dumps(ret), content_type='application/json') return HttpResponse('{}', content_type='application/json')
def do_import(spreadsheet, config, domain, task=None, chunksize=CASEBLOCK_CHUNKSIZE): columns = spreadsheet.get_header_columns() match_count = created_count = too_many_matches = num_chunks = 0 errors = importer_util.ImportErrorDetail() prime_offset = 1 # used to prevent back-to-back priming user = CouchUser.get_by_user_id(config.couch_user_id, domain) username = user.username user_id = user._id # keep a cache of id lookup successes to help performance id_cache = {} name_cache = {} caseblocks = [] ids_seen = set() def _submit_caseblocks(domain, case_type, caseblocks): err = False if caseblocks: try: form, cases = submit_case_blocks( [cb.as_string() for cb in caseblocks], domain, username, user_id, ) if form.is_error: errors.add( error=ImportErrors.ImportErrorMessage, row_number=form.problem ) except Exception: err = True errors.add( error=ImportErrors.ImportErrorMessage, row_number=caseblocks[0].case_id ) else: properties = set().union(*map(lambda c: set(c.dynamic_case_properties().keys()), cases)) if case_type and len(properties): add_inferred_export_properties.delay( 'CaseImporter', domain, case_type, properties, ) else: _soft_assert = soft_assert(notify_admins=True) _soft_assert( len(properties) == 0, 'error adding inferred export properties in domain ' '({}): {}'.format(domain, ", ".join(properties)) ) return err row_count = spreadsheet.max_row for i, row in enumerate(spreadsheet.iter_rows()): if task: set_task_progress(task, i, row_count) # skip first row (header row) if i == 0: continue if not is_bigcouch(): priming_progress = match_count + created_count + prime_offset if priming_progress % PRIME_VIEW_FREQUENCY == 0: prime_views(POOL_SIZE) # increment so we can't possibly prime on next iteration prime_offset += 1 search_id = importer_util.parse_search_id(config, columns, row) fields_to_update = importer_util.populate_updated_fields(config, columns, row) if not any(fields_to_update.values()): # if the row was blank, just skip it, no errors continue if config.search_field == 'external_id' and not search_id: # do not allow blank external id since we save this errors.add(ImportErrors.BlankExternalId, i + 1) continue external_id = fields_to_update.pop('external_id', None) parent_id = fields_to_update.pop('parent_id', None) parent_external_id = fields_to_update.pop('parent_external_id', None) parent_type = fields_to_update.pop('parent_type', config.case_type) parent_ref = fields_to_update.pop('parent_ref', 'parent') to_close = fields_to_update.pop('close', False) if any([lookup_id and lookup_id in ids_seen for lookup_id in [search_id, parent_id, parent_external_id]]): # clear out the queue to make sure we've processed any potential # cases we want to look up # note: these three lines are repeated a few places, and could be converted # to a function that makes use of closures (and globals) to do the same thing, # but that seems sketchier than just beeing a little RY _submit_caseblocks(domain, config.case_type, caseblocks) num_chunks += 1 caseblocks = [] ids_seen = set() # also clear ids_seen, since all the cases will now be in the database case, error = importer_util.lookup_case( config.search_field, search_id, domain, config.case_type ) if case: if case.type != config.case_type: continue elif error == LookupErrors.NotFound: if not config.create_new_cases: continue elif error == LookupErrors.MultipleResults: too_many_matches += 1 continue uploaded_owner_name = fields_to_update.pop('owner_name', None) uploaded_owner_id = fields_to_update.pop('owner_id', None) if uploaded_owner_name: # If an owner name was provided, replace the provided # uploaded_owner_id with the id of the provided group or owner try: uploaded_owner_id = importer_util.get_id_from_name(uploaded_owner_name, domain, name_cache) except SQLLocation.MultipleObjectsReturned: errors.add(ImportErrors.DuplicateLocationName, i + 1) continue if not uploaded_owner_id: errors.add(ImportErrors.InvalidOwnerName, i + 1, 'owner_name') continue if uploaded_owner_id: # If an owner_id mapping exists, verify it is a valid user # or case sharing group if importer_util.is_valid_id(uploaded_owner_id, domain, id_cache): owner_id = uploaded_owner_id id_cache[uploaded_owner_id] = True else: errors.add(ImportErrors.InvalidOwnerId, i + 1, 'owner_id') id_cache[uploaded_owner_id] = False continue else: # if they didn't supply an owner_id mapping, default to current # user owner_id = user_id extras = {} if parent_id: try: parent_case = CaseAccessors(domain).get_case(parent_id) if parent_case.domain == domain: extras['index'] = { parent_ref: (parent_case.type, parent_id) } except ResourceNotFound: errors.add(ImportErrors.InvalidParentId, i + 1, 'parent_id') continue elif parent_external_id: parent_case, error = importer_util.lookup_case( 'external_id', parent_external_id, domain, parent_type ) if parent_case: extras['index'] = { parent_ref: (parent_type, parent_case.case_id) } case_name = fields_to_update.pop('name', None) if not case: id = uuid.uuid4().hex if config.search_field == 'external_id': extras['external_id'] = search_id try: caseblock = CaseBlock( create=True, case_id=id, owner_id=owner_id, user_id=user_id, case_type=config.case_type, case_name=case_name or '', update=fields_to_update, **extras ) caseblocks.append(caseblock) created_count += 1 if external_id: ids_seen.add(external_id) except CaseBlockError: errors.add(ImportErrors.CaseGeneration, i + 1) else: if external_id: extras['external_id'] = external_id if uploaded_owner_id: extras['owner_id'] = owner_id if to_close == 'yes': extras['close'] = True if case_name is not None: extras['case_name'] = case_name try: caseblock = CaseBlock( create=False, case_id=case.case_id, update=fields_to_update, **extras ) caseblocks.append(caseblock) match_count += 1 except CaseBlockError: errors.add(ImportErrors.CaseGeneration, i + 1) # check if we've reached a reasonable chunksize # and if so submit if len(caseblocks) >= chunksize: _submit_caseblocks(domain, config.case_type, caseblocks) num_chunks += 1 caseblocks = [] # final purge of anything left in the queue if _submit_caseblocks(domain, config.case_type, caseblocks): match_count -= 1 num_chunks += 1 return { 'created_count': created_count, 'match_count': match_count, 'too_many_matches': too_many_matches, 'errors': errors.as_dict(), 'num_chunks': num_chunks, }