示例#1
0
    def page_context(self):
        environment = settings.SERVER_ENVIRONMENT

        context = get_hqadmin_base_context(self.request)
        context['couch_update'] = self.request.GET.get('couch_update', 5000)
        context['celery_update'] = self.request.GET.get('celery_update', 10000)
        context['db_update'] = self.request.GET.get('db_update', 30000)
        context['self.request'] = getattr(settings, 'CELERY_FLOWER_URL', None)

        context['is_bigcouch'] = is_bigcouch()
        context['rabbitmq_url'] = get_rabbitmq_management_url()
        context['hide_filters'] = True
        context['current_system'] = socket.gethostname()
        context['deploy_history'] = HqDeploy.get_latest(environment, limit=5)

        context['user_is_support'] = hasattr(self.request, 'user') and SUPPORT.enabled(self.request.user.username)

        context['redis'] = service_checks.check_redis()
        context['rabbitmq'] = service_checks.check_rabbitmq()
        context['celery_stats'] = get_celery_stats()
        context['heartbeat'] = service_checks.check_heartbeat()

        context['elastic'] = escheck.check_es_cluster_health()

        return context
示例#2
0
    def page_context(self):
        environment = settings.SERVER_ENVIRONMENT

        context = get_hqadmin_base_context(self.request)
        context['couch_update'] = self.request.GET.get('couch_update', 5000)
        context['celery_update'] = self.request.GET.get('celery_update', 10000)
        context['db_update'] = self.request.GET.get('db_update', 30000)
        context['self.request'] = getattr(settings, 'CELERY_FLOWER_URL', None)

        context['is_bigcouch'] = is_bigcouch()
        context['rabbitmq_url'] = get_rabbitmq_management_url()
        context['hide_filters'] = True
        context['current_system'] = socket.gethostname()
        context['deploy_history'] = HqDeploy.objects.filter(environment=environment)[:5]

        context['user_is_support'] = hasattr(self.request, 'user') and SUPPORT.enabled(self.request.user.username)

        context['redis'] = service_checks.check_redis()
        context['rabbitmq'] = service_checks.check_rabbitmq(settings.CELERY_BROKER_URL)
        context['celery_stats'] = get_celery_stats()
        context['heartbeat'] = service_checks.check_heartbeat()

        context['cluster_health'] = escheck.check_es_cluster_health()

        return context
示例#3
0
def system_ajax(request):
    """
    Utility ajax functions for polling couch and celerymon
    """
    type = request.GET.get('api', None)
    task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 12)
    celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None)
    db = XFormInstance.get_db()
    if type == "_active_tasks":
        tasks = [] if is_bigcouch() else filter(
            lambda x: x['type'] == "indexer", db.server.active_tasks())
        #for reference structure is:
        #        tasks = [{'type': 'indexer', 'pid': 'foo', 'database': 'mock',
        #            'design_document': 'mockymock', 'progress': 0,
        #            'started_on': 1349906040.723517, 'updated_on': 1349905800.679458,
        #            'total_changes': 1023},
        #            {'type': 'indexer', 'pid': 'foo', 'database': 'mock',
        #            'design_document': 'mockymock', 'progress': 70,
        #            'started_on': 1349906040.723517, 'updated_on': 1349905800.679458,
        #            'total_changes': 1023}]
        return json_response(tasks)
    elif type == "_stats":
        return json_response({})
    elif type == "_logs":
        pass
    elif type == 'pillowtop':
        return json_response(get_all_pillows_json())
    elif type == 'stale_pillows':
        es_index_status = [
            check_case_es_index(interval=3),
            check_xform_es_index(interval=3),
            check_reportcase_es_index(interval=3),
            check_reportxform_es_index(interval=3)
        ]
        return json_response(es_index_status)

    if celery_monitoring:
        cresource = Resource(celery_monitoring, timeout=3)
        if type == "flower_poll":
            ret = []
            try:
                t = cresource.get("api/tasks",
                                  params_dict={
                                      'limit': task_limit
                                  }).body_string()
                all_tasks = json.loads(t)
            except Exception, ex:
                all_tasks = {}
                logging.error("Error with getting from celery_flower: %s" % ex)

            for task_id, traw in all_tasks.items():
                # it's an array of arrays - looping through [<id>, {task_info_dict}]
                if 'name' in traw and traw['name']:
                    traw['name'] = '.'.join(traw['name'].split('.')[-2:])
                else:
                    traw['name'] = None
                ret.append(traw)
            ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True)
            return HttpResponse(json.dumps(ret), mimetype='application/json')
示例#4
0
def reminders_in_error(request, domain):
    handler_map = {}
    if request.method == "POST":
        form = RemindersInErrorForm(request.POST)
        if form.is_valid():
            kwargs = {}
            if is_bigcouch():
                # Force a write to all nodes before returning
                kwargs["w"] = bigcouch_quorum_count()
            current_timestamp = datetime.utcnow()
            for reminder_id in form.cleaned_data.get("selected_reminders"):
                reminder = CaseReminder.get(reminder_id)
                if reminder.domain != domain:
                    continue
                if reminder.handler_id in handler_map:
                    handler = handler_map[reminder.handler_id]
                else:
                    handler = reminder.handler
                    handler_map[reminder.handler_id] = handler
                reminder.error = False
                reminder.error_msg = None
                handler.set_next_fire(reminder, current_timestamp)
                reminder.save(**kwargs)

    timezone = report_utils.get_timezone(request.couch_user.user_id, domain)
    reminders = []
    for reminder in CaseReminder.view("reminders/reminders_in_error",
                                      startkey=[domain],
                                      endkey=[domain, {}],
                                      include_docs=True).all():
        if reminder.handler_id in handler_map:
            handler = handler_map[reminder.handler_id]
        else:
            handler = reminder.handler
            handler_map[reminder.handler_id] = handler
        recipient = reminder.recipient
        case = reminder.case
        reminders.append({
            "reminder_id":
            reminder._id,
            "handler_id":
            reminder.handler_id,
            "handler_name":
            handler.nickname,
            "case_id":
            case.get_id if case is not None else None,
            "case_name":
            case.name if case is not None else None,
            "next_fire":
            tz_utils.adjust_datetime_to_timezone(
                reminder.next_fire, pytz.utc.zone,
                timezone.zone).strftime("%Y-%m-%d %H:%M:%S"),
            "error_msg":
            reminder.error_msg,
            "recipient_name":
            get_recipient_name(recipient),
        })
示例#5
0
def system_ajax(request):
    """
    Utility ajax functions for polling couch and celerymon
    """
    type = request.GET.get('api', None)
    task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 12)
    celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None)
    db = XFormInstance.get_db()
    if type == "_active_tasks":
        tasks = [] if is_bigcouch() else filter(lambda x: x['type'] == "indexer", db.server.active_tasks())
        #for reference structure is:
        #        tasks = [{'type': 'indexer', 'pid': 'foo', 'database': 'mock',
        #            'design_document': 'mockymock', 'progress': 0,
        #            'started_on': 1349906040.723517, 'updated_on': 1349905800.679458,
        #            'total_changes': 1023},
        #            {'type': 'indexer', 'pid': 'foo', 'database': 'mock',
        #            'design_document': 'mockymock', 'progress': 70,
        #            'started_on': 1349906040.723517, 'updated_on': 1349905800.679458,
        #            'total_changes': 1023}]
        return json_response(tasks)
    elif type == "_stats":
        return json_response({})
    elif type == "_logs":
        pass
    elif type == 'pillowtop':
        return json_response(get_all_pillows_json())
    elif type == 'stale_pillows':
        es_index_status = [
            check_case_es_index(interval=3),
            check_xform_es_index(interval=3),
            check_reportcase_es_index(interval=3),
            check_reportxform_es_index(interval=3)
        ]
        return json_response(es_index_status)

    if celery_monitoring:
        cresource = Resource(celery_monitoring, timeout=3)
        if type == "flower_poll":
            ret = []
            try:
                t = cresource.get("api/tasks", params_dict={'limit': task_limit}).body_string()
                all_tasks = json.loads(t)
            except Exception, ex:
                all_tasks = {}
                logging.error("Error with getting from celery_flower: %s" % ex)

            for task_id, traw in all_tasks.items():
                # it's an array of arrays - looping through [<id>, {task_info_dict}]
                if 'name' in traw and traw['name']:
                    traw['name'] = '.'.join(traw['name'].split('.')[-2:])
                else:
                    traw['name'] = None
                ret.append(traw)
            ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True)
            return HttpResponse(json.dumps(ret), mimetype = 'application/json')
示例#6
0
def reminders_in_error(request, domain):
    handler_map = {}
    if request.method == "POST":
        form = RemindersInErrorForm(request.POST)
        if form.is_valid():
            kwargs = {}
            if is_bigcouch():
                # Force a write to all nodes before returning
                kwargs["w"] = bigcouch_quorum_count()
            current_timestamp = datetime.utcnow()
            for reminder_id in form.cleaned_data.get("selected_reminders"):
                reminder = CaseReminder.get(reminder_id)
                if reminder.domain != domain:
                    continue
                if reminder.handler_id in handler_map:
                    handler = handler_map[reminder.handler_id]
                else:
                    handler = reminder.handler
                    handler_map[reminder.handler_id] = handler
                reminder.error = False
                reminder.error_msg = None
                handler.set_next_fire(reminder, current_timestamp)
                reminder.save(**kwargs)
    
    timezone = report_utils.get_timezone(request.couch_user.user_id, domain)
    reminders = []
    for reminder in CaseReminder.view("reminders/reminders_in_error", startkey=[domain], endkey=[domain, {}], include_docs=True).all():
        if reminder.handler_id in handler_map:
            handler = handler_map[reminder.handler_id]
        else:
            handler = reminder.handler
            handler_map[reminder.handler_id] = handler
        recipient = reminder.recipient
        case = reminder.case
        reminders.append({
            "reminder_id" : reminder._id,
            "handler_id" : reminder.handler_id,
            "handler_name" : handler.nickname,
            "case_id" : case.get_id if case is not None else None,
            "case_name" : case.name if case is not None else None,
            "next_fire" : tz_utils.adjust_datetime_to_timezone(reminder.next_fire, pytz.utc.zone, timezone.zone).strftime("%Y-%m-%d %H:%M:%S"),
            "error_msg" : reminder.error_msg,
            "recipient_name" : get_recipient_name(recipient),
        })
    context = {
        "domain" : domain,
        "reminders" : reminders,
        "timezone" : timezone,
        "timezone_now" : datetime.now(tz=timezone),
    }
    return render(request, "reminders/partial/reminders_in_error.html", context)
示例#7
0
def system_ajax(request):
    """
    Utility ajax functions for polling couch and celerymon
    """
    type = request.GET.get("api", None)
    task_limit = getattr(settings, "CELERYMON_TASK_LIMIT", 5)
    celerymon_url = getattr(settings, "CELERYMON_URL", "")
    db = XFormInstance.get_db()
    ret = {}
    if type == "_active_tasks":
        tasks = [] if is_bigcouch() else filter(lambda x: x["type"] == "indexer", db.server.active_tasks())
        # for reference structure is:
        #        tasks = [{'type': 'indexer', 'pid': 'foo', 'database': 'mock',
        #            'design_document': 'mockymock', 'progress': 0,
        #            'started_on': 1349906040.723517, 'updated_on': 1349905800.679458,
        #            'total_changes': 1023},
        #            {'type': 'indexer', 'pid': 'foo', 'database': 'mock',
        #            'design_document': 'mockymock', 'progress': 70,
        #            'started_on': 1349906040.723517, 'updated_on': 1349905800.679458,
        #            'total_changes': 1023}]
        return HttpResponse(json.dumps(tasks), mimetype="application/json")
    elif type == "_stats":
        return HttpResponse(json.dumps({}), mimetype="application/json")
    elif type == "_logs":
        pass

    if celerymon_url != "":
        cresource = Resource(celerymon_url, timeout=3)
        if type == "celerymon_poll":
            # inefficient way to just get everything in one fell swoop
            # first, get all task types:
            ret = []
            try:
                t = cresource.get("api/task/name/").body_string()
                task_names = json.loads(t)
            except Exception, ex:
                task_names = []
                t = {}
                logging.error("Error with getting celerymon: %s" % ex)

            for tname in task_names:
                taskinfo_raw = json.loads(
                    cresource.get("api/task/name/%s" % (tname), params_dict={"limit": task_limit}).body_string()
                )
                for traw in taskinfo_raw:
                    # it's an array of arrays - looping through [<id>, {task_info_dict}]
                    tinfo = traw[1]
                    tinfo["name"] = ".".join(tinfo["name"].split(".")[-2:])
                    ret.append(tinfo)
            ret = sorted(ret, key=lambda x: x["succeeded"], reverse=True)
            return HttpResponse(json.dumps(ret), mimetype="application/json")
示例#8
0
def system_ajax(request):
    """
    Utility ajax functions for polling couch and celerymon
    """
    type = request.GET.get('api', None)
    task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 5)
    celerymon_url = getattr(settings, 'CELERYMON_URL', '')
    db = XFormInstance.get_db()
    ret = {}
    if type == "_active_tasks":
        tasks = [] if is_bigcouch() else filter(lambda x: x['type'] == "indexer", db.server.active_tasks())
        #for reference structure is:
        #        tasks = [{'type': 'indexer', 'pid': 'foo', 'database': 'mock',
        #            'design_document': 'mockymock', 'progress': 0,
        #            'started_on': 1349906040.723517, 'updated_on': 1349905800.679458,
        #            'total_changes': 1023},
        #            {'type': 'indexer', 'pid': 'foo', 'database': 'mock',
        #            'design_document': 'mockymock', 'progress': 70,
        #            'started_on': 1349906040.723517, 'updated_on': 1349905800.679458,
        #            'total_changes': 1023}]
        return HttpResponse(json.dumps(tasks), mimetype='application/json')
    elif type == "_stats":
        return HttpResponse(json.dumps({}), mimetype = 'application/json')
    elif type == "_logs":
        pass

    if celerymon_url != '':
        cresource = Resource(celerymon_url, timeout=3)
        if type == "celerymon_poll":
            #inefficient way to just get everything in one fell swoop
            #first, get all task types:
            ret = []
            try:
                t = cresource.get("api/task/name/").body_string()
                task_names = json.loads(t)
            except Exception, ex:
                task_names = []
                t = {}
                logging.error("Error with getting celerymon: %s" % ex)

            for tname in task_names:
                taskinfo_raw = json.loads(cresource.get('api/task/name/%s' % (tname), params_dict={'limit': task_limit}).body_string())
                for traw in taskinfo_raw:
                    # it's an array of arrays - looping through [<id>, {task_info_dict}]
                    tinfo = traw[1]
                    tinfo['name'] = '.'.join(tinfo['name'].split('.')[-2:])
                    ret.append(tinfo)
            ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True)
            return HttpResponse(json.dumps(ret), mimetype = 'application/json')
示例#9
0
 def get_search_params(self):
     # the difference is:
     # on couch lucene: /[db]/_fti/_design/[ddoc]/[search view]
     # on cloudant: /[db]/_design/[ddoc]/_search/[search view]
     # this magic combination of args makes it work for each one in couchdbkit
     if is_bigcouch():
         ddoc, view = self._search_view.split("/")
         return {
             'view_name': '%s/_search/%s' % (ddoc, view),
             'handler': "_design",
         }
     else:
         return {
             'view_name': self._search_view,
             'handler': "_fti/_design",
         }
示例#10
0
 def get_search_params(self):
     # the difference is:
     # on couch lucene: /[db]/_fti/_design/[ddoc]/[search view]
     # on cloudant: /[db]/_design/[ddoc]/_search/[search view]
     # this magic combination of args makes it work for each one in couchdbkit
     if is_bigcouch():
         ddoc, view = self._search_view.split("/")
         return {
             'view_name': '%s/_search/%s' % (ddoc, view),
             'handler': "_design",
         }
     else:
         return {
             'view_name': self._search_view,
             'handler': "_fti/_design",
         }
示例#11
0
def system_info(request):
    def human_bytes(bytes):
        # source: https://github.com/bartTC/django-memcache-status
        bytes = float(bytes)
        if bytes >= 1073741824:
            gigabytes = bytes / 1073741824
            size = "%.2fGB" % gigabytes
        elif bytes >= 1048576:
            megabytes = bytes / 1048576
            size = "%.2fMB" % megabytes
        elif bytes >= 1024:
            kilobytes = bytes / 1024
            size = "%.2fKB" % kilobytes
        else:
            size = "%.2fB" % bytes
        return size

    context = get_hqadmin_base_context(request)

    context["couch_update"] = request.GET.get("couch_update", 5000)
    context["celery_update"] = request.GET.get("celery_update", 10000)

    context["hide_filters"] = True
    if hasattr(os, "uname"):
        context["current_system"] = os.uname()[1]

    # from dimagi.utils import gitinfo
    # context['current_ref'] = gitinfo.get_project_info()
    # removing until the async library is updated
    context["current_ref"] = {}
    if settings.COUCH_USERNAME == "" and settings.COUCH_PASSWORD == "":
        couchlog_resource = Resource("http://%s/" % (settings.COUCH_SERVER_ROOT))
    else:
        couchlog_resource = Resource(
            "http://%s:%s@%s/" % (settings.COUCH_USERNAME, settings.COUCH_PASSWORD, settings.COUCH_SERVER_ROOT)
        )
    try:
        # todo, fix on bigcouch/cloudant
        context["couch_log"] = (
            "Will be back online shortly"
            if is_bigcouch()
            else couchlog_resource.get("_log", params_dict={"bytes": 2000}).body_string()
        )
    except Exception, ex:
        context["couch_log"] = "unable to open couch log: %s" % ex
示例#12
0
def system_info(request):

    def human_bytes(bytes):
        #source: https://github.com/bartTC/django-memcache-status
        bytes = float(bytes)
        if bytes >= 1073741824:
            gigabytes = bytes / 1073741824
            size = '%.2fGB' % gigabytes
        elif bytes >= 1048576:
            megabytes = bytes / 1048576
            size = '%.2fMB' % megabytes
        elif bytes >= 1024:
            kilobytes = bytes / 1024
            size = '%.2fKB' % kilobytes
        else:
            size = '%.2fB' % bytes
        return size

    context = get_hqadmin_base_context(request)

    context['couch_update'] = request.GET.get('couch_update', 5000)
    context['celery_update'] = request.GET.get('celery_update', 10000)

    context['hide_filters'] = True
    if hasattr(os, 'uname'):
        context['current_system'] = os.uname()[1]

    #from dimagi.utils import gitinfo
    #context['current_ref'] = gitinfo.get_project_info()
    #removing until the async library is updated
    context['current_ref'] = {}
    if settings.COUCH_USERNAME == '' and settings.COUCH_PASSWORD == '':
        couchlog_resource = Resource("http://%s/" % (settings.COUCH_SERVER_ROOT))
    else:
        couchlog_resource = Resource("http://%s:%s@%s/" % (settings.COUCH_USERNAME, settings.COUCH_PASSWORD, settings.COUCH_SERVER_ROOT))
    try:
        #todo, fix on bigcouch/cloudant
        context['couch_log'] = "Will be back online shortly" if is_bigcouch() \
            else couchlog_resource.get('_log', params_dict={'bytes': 2000 }).body_string()
    except Exception, ex:
        context['couch_log'] = "unable to open couch log: %s" % ex
示例#13
0
def system_info(request):
    environment = settings.SERVER_ENVIRONMENT

    context = get_hqadmin_base_context(request)
    context['couch_update'] = request.GET.get('couch_update', 5000)
    context['celery_update'] = request.GET.get('celery_update', 10000)
    context['db_update'] = request.GET.get('db_update', 30000)
    context['celery_flower_url'] = getattr(settings, 'CELERY_FLOWER_URL', None)

    context['is_bigcouch'] = is_bigcouch()
    context['rabbitmq_url'] = get_rabbitmq_management_url()
    context['hide_filters'] = True
    context['current_system'] = socket.gethostname()
    context['deploy_history'] = HqDeploy.get_latest(environment, limit=5)

    context.update(check_redis())
    context.update(check_rabbitmq())
    context.update(check_celery_health())
    context.update(check_es_cluster_health())

    return render(request, "hqadmin/system_info.html", context)
示例#14
0
def system_ajax(request):
    """
    Utility ajax functions for polling couch and celerymon
    """
    type = request.GET.get('api', None)
    task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 12)
    celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None)
    db = XFormInstance.get_db()
    if type == "_active_tasks":
        try:
            tasks = filter(lambda x: x['type'] == "indexer", db.server.active_tasks())
        except Unauthorized:
            return json_response({'error': "Unable to access CouchDB Tasks (unauthorized)."}, status_code=500)

        if not is_bigcouch():
            return json_response(tasks)
        else:
            # group tasks by design doc
            task_map = defaultdict(dict)
            for task in tasks:
                meta = task_map[task['design_document']]
                tasks = meta.get('tasks', [])
                tasks.append(task)
                meta['tasks'] = tasks

            design_docs = []
            for dd, meta in task_map.items():
                meta['design_document'] = dd[len('_design/'):]
                total_changes = sum(task['total_changes'] for task in meta['tasks'])
                for task in meta['tasks']:
                    task['progress_contribution'] = task['changes_done'] * 100 / total_changes

                design_docs.append(meta)
            return json_response(design_docs)
    elif type == "_stats":
        return json_response({})
    elif type == "_logs":
        pass
    elif type == 'pillowtop':
        pillow_meta = get_all_pillows_json()
        supervisor_status = all_pillows_supervisor_status([meta['name'] for meta in pillow_meta])
        for meta in pillow_meta:
            meta.update(supervisor_status[meta['name']])
        return json_response(sorted(pillow_meta, key=lambda m: m['name']))
    elif type == 'stale_pillows':
        es_index_status = [
            check_case_es_index(interval=3),
            check_xform_es_index(interval=3),
            check_reportcase_es_index(interval=3),
            check_reportxform_es_index(interval=3)
        ]
        return json_response(es_index_status)

    if celery_monitoring:
        cresource = Resource(celery_monitoring, timeout=3)
        if type == "flower_poll":
            ret = []
            try:
                t = cresource.get("api/tasks", params_dict={'limit': task_limit}).body_string()
                all_tasks = json.loads(t)
            except Exception, ex:
                return json_response({'error': "Error with getting from celery_flower: %s" % ex}, status_code=500)

            for task_id, traw in all_tasks.items():
                # it's an array of arrays - looping through [<id>, {task_info_dict}]
                if 'name' in traw and traw['name']:
                    traw['name'] = '.'.join(traw['name'].split('.')[-2:])
                else:
                    traw['name'] = None
                ret.append(traw)
            ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True)
            return HttpResponse(json.dumps(ret), content_type='application/json')
示例#15
0
 def save(self, *args, **kwargs):
     if is_bigcouch() and "w" not in kwargs:
         # Force a write to all nodes before returning
         kwargs["w"] = bigcouch_quorum_count()
     return super(XFormsSession, self).save(*args, **kwargs)
示例#16
0
def do_import(spreadsheet, config, domain, task=None, chunksize=CASEBLOCK_CHUNKSIZE):
    if not spreadsheet:
        return {"errors": "EXPIRED"}
    if spreadsheet.has_errors:
        return {"errors": "HAS_ERRORS"}

    row_count = spreadsheet.get_num_rows()
    columns = spreadsheet.get_header_columns()
    match_count = created_count = too_many_matches = num_chunks = 0
    errors = importer_util.ImportErrorDetail()
    prime_offset = 1  # used to prevent back-to-back priming

    user = CouchUser.get_by_user_id(config.couch_user_id, domain)
    username = user.username
    user_id = user._id

    # keep a cache of id lookup successes to help performance
    id_cache = {}
    name_cache = {}
    caseblocks = []
    ids_seen = set()

    def _submit_caseblocks(caseblocks):
        if caseblocks:
            submit_case_blocks([ElementTree.tostring(cb.as_xml()) for cb in caseblocks], domain, username, user_id)

    for i in range(row_count):
        if task:
            DownloadBase.set_progress(task, i, row_count)

        # skip first row if it is a header field
        if i == 0 and config.named_columns:
            continue

        if not is_bigcouch():
            priming_progress = match_count + created_count + prime_offset
            if priming_progress % PRIME_VIEW_FREQUENCY == 0:
                prime_views(POOL_SIZE)
                # increment so we can't possibly prime on next iteration
                prime_offset += 1

        row = spreadsheet.get_row(i)
        search_id = importer_util.parse_search_id(config, columns, row)
        if config.search_field == "external_id" and not search_id:
            # do not allow blank external id since we save this
            errors.add(ImportErrors.BlankExternalId, i + 1)
            continue

        try:
            fields_to_update = importer_util.populate_updated_fields(
                config, columns, row, spreadsheet.workbook.datemode
            )
            if not any(fields_to_update.values()):
                # if the row was blank, just skip it, no errors
                continue
        except importer_util.InvalidDateException:
            errors.add(ImportErrors.InvalidDate, i + 1)
            continue

        external_id = fields_to_update.pop("external_id", None)
        parent_id = fields_to_update.pop("parent_id", None)
        parent_external_id = fields_to_update.pop("parent_external_id", None)
        parent_type = fields_to_update.pop("parent_type", config.case_type)
        parent_ref = fields_to_update.pop("parent_ref", "parent")
        to_close = fields_to_update.pop("close", False)

        if any([lookup_id and lookup_id in ids_seen for lookup_id in [search_id, parent_id, parent_external_id]]):
            # clear out the queue to make sure we've processed any potential
            # cases we want to look up
            # note: these three lines are repeated a few places, and could be converted
            # to a function that makes use of closures (and globals) to do the same thing,
            # but that seems sketchier than just beeing a little RY
            _submit_caseblocks(caseblocks)
            num_chunks += 1
            caseblocks = []
            ids_seen = set()  # also clear ids_seen, since all the cases will now be in the database

        case, error = importer_util.lookup_case(config.search_field, search_id, domain, config.case_type)

        if case:
            if case.type != config.case_type:
                continue
        elif error == LookupErrors.NotFound:
            if not config.create_new_cases:
                continue
        elif error == LookupErrors.MultipleResults:
            too_many_matches += 1
            continue

        uploaded_owner_name = fields_to_update.pop("owner_name", None)
        uploaded_owner_id = fields_to_update.pop("owner_id", None)

        if uploaded_owner_name:
            # If an owner name was provided, replace the provided
            # uploaded_owner_id with the id of the provided group or owner
            try:
                uploaded_owner_id = importer_util.get_id_from_name(uploaded_owner_name, domain, name_cache)
            except SQLLocation.MultipleObjectsReturned:
                errors.add(ImportErrors.DuplicateLocationName, i + 1)
                continue

            if not uploaded_owner_id:
                errors.add(ImportErrors.InvalidOwnerName, i + 1)
                continue
        if uploaded_owner_id:
            # If an owner_id mapping exists, verify it is a valid user
            # or case sharing group
            if importer_util.is_valid_id(uploaded_owner_id, domain, id_cache):
                owner_id = uploaded_owner_id
                id_cache[uploaded_owner_id] = True
            else:
                errors.add(ImportErrors.InvalidOwnerId, i + 1)
                id_cache[uploaded_owner_id] = False
                continue
        else:
            # if they didn't supply an owner_id mapping, default to current
            # user
            owner_id = user_id

        extras = {}
        if parent_id:
            try:
                parent_case = CommCareCase.get(parent_id)

                if parent_case.domain == domain:
                    extras["index"] = {parent_ref: (parent_case.type, parent_id)}
            except ResourceNotFound:
                errors.add(ImportErrors.InvalidParentId, i + 1)
                continue
        elif parent_external_id:
            parent_case, error = importer_util.lookup_case("external_id", parent_external_id, domain, parent_type)
            if parent_case:
                extras["index"] = {parent_ref: (parent_type, parent_case._id)}

        if not case:
            id = uuid.uuid4().hex

            if config.search_field == "external_id":
                extras["external_id"] = search_id

            try:
                caseblock = CaseBlock(
                    create=True,
                    case_id=id,
                    owner_id=owner_id,
                    user_id=user_id,
                    case_type=config.case_type,
                    update=fields_to_update,
                    **extras
                )
                caseblocks.append(caseblock)
                created_count += 1
                if external_id:
                    ids_seen.add(external_id)
            except CaseBlockError:
                errors.add(ImportErrors.CaseGeneration, i + 1)
        else:
            if external_id:
                extras["external_id"] = external_id
            if uploaded_owner_id:
                extras["owner_id"] = owner_id
            if to_close == "yes":
                extras["close"] = True

            try:
                caseblock = CaseBlock(create=False, case_id=case._id, update=fields_to_update, **extras)
                caseblocks.append(caseblock)
                match_count += 1
            except CaseBlockError:
                errors.add(ImportErrors.CaseGeneration, i + 1)

        # check if we've reached a reasonable chunksize
        # and if so submit
        if len(caseblocks) >= chunksize:
            _submit_caseblocks(caseblocks)
            num_chunks += 1
            caseblocks = []

    # final purge of anything left in the queue
    _submit_caseblocks(caseblocks)
    num_chunks += 1
    return {
        "created_count": created_count,
        "match_count": match_count,
        "too_many_matches": too_many_matches,
        "errors": errors.as_dict(),
        "num_chunks": num_chunks,
    }
示例#17
0
 def save(self, *args, **kwargs):
     if is_bigcouch() and "w" not in kwargs:
         # Force a write to all nodes before returning
         kwargs["w"] = bigcouch_quorum_count()
     return super(XFormsSession, self).save(*args, **kwargs)
示例#18
0
def do_import(spreadsheet,
              config,
              domain,
              task=None,
              chunksize=CASEBLOCK_CHUNKSIZE):
    if not spreadsheet:
        return {'error': 'EXPIRED'}
    if spreadsheet.has_errors:
        return {'error': 'HAS_ERRORS'}

    row_count = spreadsheet.get_num_rows()
    columns = spreadsheet.get_header_columns()
    match_count = created_count = too_many_matches = errors = num_chunks = 0
    blank_external_ids = []
    invalid_dates = []
    owner_id_errors = []
    prime_offset = 1  # used to prevent back-to-back priming

    user = CouchUser.get_by_user_id(config.couch_user_id, domain)
    username = user.username
    user_id = user._id

    # keep a cache of id lookup successes to help performance
    id_cache = {}
    caseblocks = []
    ids_seen = set()

    def _submit_caseblocks(caseblocks):
        if caseblocks:
            submit_case_blocks(
                [
                    ElementTree.tostring(
                        cb.as_xml(format_datetime=json_format_datetime))
                    for cb in caseblocks
                ],
                domain,
                username,
                user_id,
            )

    for i in range(row_count):
        if task:
            DownloadBase.set_progress(task, i, row_count)

        # skip first row if it is a header field
        if i == 0 and config.named_columns:
            continue

        if not is_bigcouch():
            priming_progress = match_count + created_count + prime_offset
            if priming_progress % PRIME_VIEW_FREQUENCY == 0:
                prime_views(POOL_SIZE)
                # increment so we can't possibly prime on next iteration
                prime_offset += 1

        row = spreadsheet.get_row(i)
        search_id = importer_util.parse_search_id(config, columns, row)
        if config.search_field == 'external_id' and not search_id:
            # do not allow blank external id since we save this
            blank_external_ids.append(i + 1)
            continue

        try:
            fields_to_update = importer_util.populate_updated_fields(
                config, columns, row)
            if not any(fields_to_update.values()):
                # if the row was blank, just skip it, no errors
                continue
        except importer_util.InvalidDateException:
            invalid_dates.append(i + 1)
            continue

        external_id = fields_to_update.pop('external_id', None)
        parent_id = fields_to_update.pop('parent_id', None)
        parent_external_id = fields_to_update.pop('parent_external_id', None)
        parent_type = fields_to_update.pop('parent_type', config.case_type)
        parent_ref = fields_to_update.pop('parent_ref', 'parent')
        to_close = fields_to_update.pop('close', False)

        if any([
                lookup_id and lookup_id in ids_seen
                for lookup_id in [search_id, parent_id, parent_external_id]
        ]):
            # clear out the queue to make sure we've processed any potential
            # cases we want to look up
            # note: these three lines are repeated a few places, and could be converted
            # to a function that makes use of closures (and globals) to do the same thing,
            # but that seems sketchier than just beeing a little RY
            _submit_caseblocks(caseblocks)
            num_chunks += 1
            caseblocks = []
            ids_seen = set(
            )  # also clear ids_seen, since all the cases will now be in the database

        case, error = importer_util.lookup_case(config.search_field, search_id,
                                                domain, config.case_type)

        if case:
            if case.type != config.case_type:
                continue
        elif error == LookupErrors.NotFound:
            if not config.create_new_cases:
                continue
示例#19
0
def do_import(spreadsheet, config, domain, task=None, chunksize=CASEBLOCK_CHUNKSIZE):
    if not spreadsheet:
        return {'error': 'EXPIRED'}
    if spreadsheet.has_errors:
        return {'error': 'HAS_ERRORS'}

    row_count = spreadsheet.get_num_rows()
    columns = spreadsheet.get_header_columns()
    match_count = created_count = too_many_matches = errors = num_chunks = 0
    blank_external_ids = []
    invalid_dates = []
    owner_id_errors = []
    prime_offset = 1  # used to prevent back-to-back priming

    user = CouchUser.get_by_user_id(config.couch_user_id, domain)
    username = user.username
    user_id = user._id

    # keep a cache of id lookup successes to help performance
    id_cache = {}
    caseblocks = []
    ids_seen = set()

    def _submit_caseblocks(caseblocks):
        if caseblocks:
            submit_case_blocks(
                [ElementTree.tostring(cb.as_xml(format_datetime=json_format_datetime)) for cb in caseblocks],
                domain,
                username,
                user_id,
            )

    for i in range(row_count):
        if task:
            DownloadBase.set_progress(task, i, row_count)

        # skip first row if it is a header field
        if i == 0 and config.named_columns:
            continue

        if not is_bigcouch():
            priming_progress = match_count + created_count + prime_offset
            if priming_progress % PRIME_VIEW_FREQUENCY == 0:
                prime_views(POOL_SIZE)
                # increment so we can't possibly prime on next iteration
                prime_offset += 1

        row = spreadsheet.get_row(i)
        search_id = importer_util.parse_search_id(config, columns, row)
        if config.search_field == 'external_id' and not search_id:
            # do not allow blank external id since we save this
            blank_external_ids.append(i + 1)
            continue

        try:
            fields_to_update = importer_util.populate_updated_fields(
                config,
                columns,
                row
            )
            if not any(fields_to_update.values()):
                # if the row was blank, just skip it, no errors
                continue
        except importer_util.InvalidDateException:
            invalid_dates.append(i + 1)
            continue

        external_id = fields_to_update.pop('external_id', None)
        parent_id = fields_to_update.pop('parent_id', None)
        parent_external_id = fields_to_update.pop('parent_external_id', None)
        parent_type = fields_to_update.pop('parent_type', config.case_type)
        parent_ref = fields_to_update.pop('parent_ref', 'parent')
        to_close = fields_to_update.pop('close', False)

        if any([lookup_id and lookup_id in ids_seen for lookup_id in [search_id, parent_id, parent_external_id]]):
            # clear out the queue to make sure we've processed any potential
            # cases we want to look up
            # note: these three lines are repeated a few places, and could be converted
            # to a function that makes use of closures (and globals) to do the same thing,
            # but that seems sketchier than just beeing a little RY
            _submit_caseblocks(caseblocks)
            num_chunks += 1
            caseblocks = []
            ids_seen = set()  # also clear ids_seen, since all the cases will now be in the database

        case, error = importer_util.lookup_case(
            config.search_field,
            search_id,
            domain,
            config.case_type
        )

        if case:
            if case.type != config.case_type:
                continue
        elif error == LookupErrors.NotFound:
            if not config.create_new_cases:
                continue
        elif error == LookupErrors.MultipleResults:
            too_many_matches += 1
            continue

        uploaded_owner_id = fields_to_update.pop('owner_id', None)
        if uploaded_owner_id:
            # If an owner_id mapping exists, verify it is a valid user
            # or case sharing group
            if importer_util.is_valid_id(uploaded_owner_id, domain, id_cache):
                owner_id = uploaded_owner_id
                id_cache[uploaded_owner_id] = True
            else:
                owner_id_errors.append(i + 1)
                id_cache[uploaded_owner_id] = False
                continue
        else:
            # if they didn't supply an owner_id mapping, default to current
            # user
            owner_id = user_id

        extras = {}
        if parent_id:
            try:
                parent_case = CommCareCase.get(parent_id)

                if parent_case.domain == domain:
                    extras['index'] = {
                        parent_ref: (parent_case.type, parent_id)
                    }
            except ResourceNotFound:
                continue
        elif parent_external_id:
            parent_case, error = importer_util.lookup_case(
                'external_id',
                parent_external_id,
                domain,
                parent_type
            )
            if parent_case:
                extras['index'] = {
                    parent_ref: (parent_type, parent_case._id)
                }

        if not case:
            id = uuid.uuid4().hex

            if config.search_field == 'external_id':
                extras['external_id'] = search_id

            try:
                caseblock = CaseBlock(
                    create=True,
                    case_id=id,
                    version=V2,
                    owner_id=owner_id,
                    user_id=user_id,
                    case_type=config.case_type,
                    update=fields_to_update,
                    **extras
                )
                caseblocks.append(caseblock)
                created_count += 1
                if external_id:
                    ids_seen.add(external_id)
            except CaseBlockError:
                errors += 1
        else:
            if external_id:
                extras['external_id'] = external_id
            if uploaded_owner_id:
                extras['owner_id'] = owner_id
            if to_close == 'yes':
                extras['close'] = True

            try:
                caseblock = CaseBlock(
                    create=False,
                    case_id=case._id,
                    version=V2,
                    update=fields_to_update,
                    **extras
                )
                caseblocks.append(caseblock)
                match_count += 1
            except CaseBlockError:
                errors += 1

        # check if we've reached a reasonable chunksize
        # and if so submit
        if len(caseblocks) >= chunksize:
            _submit_caseblocks(caseblocks)
            num_chunks += 1
            caseblocks = []


    # final purge of anything left in the queue
    _submit_caseblocks(caseblocks)
    num_chunks += 1
    return {
        'created_count': created_count,
        'match_count': match_count,
        'too_many_matches': too_many_matches,
        'blank_externals': blank_external_ids,
        'invalid_dates': invalid_dates,
        'owner_id_errors': owner_id_errors,
        'errors': errors,
        'num_chunks': num_chunks,
    }
示例#20
0
def system_ajax(request):
    """
    Utility ajax functions for polling couch and celerymon
    """
    type = request.GET.get("api", None)
    task_limit = getattr(settings, "CELERYMON_TASK_LIMIT", 12)
    celery_monitoring = getattr(settings, "CELERY_FLOWER_URL", None)
    db = XFormInstance.get_db()
    if type == "_active_tasks":
        try:
            tasks = filter(lambda x: x["type"] == "indexer", db.server.active_tasks())
        except Unauthorized:
            return json_response({"error": "Unable to access CouchDB Tasks (unauthorized)."}, status_code=500)

        if not is_bigcouch():
            return json_response(tasks)
        else:
            # group tasks by design doc
            task_map = defaultdict(dict)
            for task in tasks:
                meta = task_map[task["design_document"]]
                tasks = meta.get("tasks", [])
                tasks.append(task)
                meta["tasks"] = tasks

            design_docs = []
            for dd, meta in task_map.items():
                meta["design_document"] = dd[len("_design/") :]
                total_changes = sum(task["total_changes"] for task in meta["tasks"])
                for task in meta["tasks"]:
                    task["progress_contribution"] = task["changes_done"] * 100 / total_changes

                design_docs.append(meta)
            return json_response(design_docs)
    elif type == "_stats":
        return json_response({})
    elif type == "_logs":
        pass
    elif type == "pillowtop":
        return json_response(get_all_pillows_json())
    elif type == "stale_pillows":
        es_index_status = [
            check_case_es_index(interval=3),
            check_xform_es_index(interval=3),
            check_reportcase_es_index(interval=3),
            check_reportxform_es_index(interval=3),
        ]
        return json_response(es_index_status)

    if celery_monitoring:
        cresource = Resource(celery_monitoring, timeout=3)
        if type == "flower_poll":
            ret = []
            try:
                t = cresource.get("api/tasks", params_dict={"limit": task_limit}).body_string()
                all_tasks = json.loads(t)
            except Exception, ex:
                return json_response({"error": "Error with getting from celery_flower: %s" % ex}, status_code=500)

            for task_id, traw in all_tasks.items():
                # it's an array of arrays - looping through [<id>, {task_info_dict}]
                if "name" in traw and traw["name"]:
                    traw["name"] = ".".join(traw["name"].split(".")[-2:])
                else:
                    traw["name"] = None
                ret.append(traw)
            ret = sorted(ret, key=lambda x: x["succeeded"], reverse=True)
            return HttpResponse(json.dumps(ret), mimetype="application/json")
示例#21
0
def do_import(spreadsheet,
              config,
              domain,
              task=None,
              chunksize=CASEBLOCK_CHUNKSIZE):
    row_count = spreadsheet.get_num_rows()
    columns = spreadsheet.get_header_columns()
    match_count = created_count = too_many_matches = num_chunks = 0
    errors = importer_util.ImportErrorDetail()
    prime_offset = 1  # used to prevent back-to-back priming

    user = CouchUser.get_by_user_id(config.couch_user_id, domain)
    username = user.username
    user_id = user._id

    # keep a cache of id lookup successes to help performance
    id_cache = {}
    name_cache = {}
    caseblocks = []
    ids_seen = set()

    def _submit_caseblocks(domain, case_type, caseblocks):
        err = False
        if caseblocks:
            try:
                form, cases = submit_case_blocks(
                    [cb.as_string() for cb in caseblocks],
                    domain,
                    username,
                    user_id,
                )

                if form.is_error:
                    errors.add(error=ImportErrors.ImportErrorMessage,
                               row_number=form.problem)
            except Exception:
                err = True
                errors.add(error=ImportErrors.ImportErrorMessage,
                           row_number=caseblocks[0]._id)
            else:
                properties = set().union(*map(
                    lambda c: set(c.dynamic_case_properties().keys()), cases))
                add_inferred_export_properties.delay(
                    'CaseImporter',
                    domain,
                    case_type,
                    properties,
                )
        return err

    for i in range(row_count):
        if task:
            DownloadBase.set_progress(task, i, row_count)

        # skip first row if it is a header field
        if i == 0 and config.named_columns:
            continue

        if not is_bigcouch():
            priming_progress = match_count + created_count + prime_offset
            if priming_progress % PRIME_VIEW_FREQUENCY == 0:
                prime_views(POOL_SIZE)
                # increment so we can't possibly prime on next iteration
                prime_offset += 1

        row = spreadsheet.get_row(i)
        search_id = importer_util.parse_search_id(config, columns, row)
        if config.search_field == 'external_id' and not search_id:
            # do not allow blank external id since we save this
            errors.add(ImportErrors.BlankExternalId, i + 1)
            continue

        try:
            fields_to_update = importer_util.populate_updated_fields(
                config, columns, row, spreadsheet.workbook.datemode)
            if not any(fields_to_update.values()):
                # if the row was blank, just skip it, no errors
                continue
        except importer_util.InvalidDateException as e:
            errors.add(ImportErrors.InvalidDate, i + 1, e.column)
            continue
        except importer_util.InvalidIntegerException as e:
            errors.add(ImportErrors.InvalidInteger, i + 1, e.column)
            continue

        external_id = fields_to_update.pop('external_id', None)
        parent_id = fields_to_update.pop('parent_id', None)
        parent_external_id = fields_to_update.pop('parent_external_id', None)
        parent_type = fields_to_update.pop('parent_type', config.case_type)
        parent_ref = fields_to_update.pop('parent_ref', 'parent')
        to_close = fields_to_update.pop('close', False)

        if any([
                lookup_id and lookup_id in ids_seen
                for lookup_id in [search_id, parent_id, parent_external_id]
        ]):
            # clear out the queue to make sure we've processed any potential
            # cases we want to look up
            # note: these three lines are repeated a few places, and could be converted
            # to a function that makes use of closures (and globals) to do the same thing,
            # but that seems sketchier than just beeing a little RY
            _submit_caseblocks(domain, config.case_type, caseblocks)
            num_chunks += 1
            caseblocks = []
            ids_seen = set(
            )  # also clear ids_seen, since all the cases will now be in the database

        case, error = importer_util.lookup_case(config.search_field, search_id,
                                                domain, config.case_type)

        if case:
            if case.type != config.case_type:
                continue
        elif error == LookupErrors.NotFound:
            if not config.create_new_cases:
                continue
示例#22
0
def system_ajax(request):
    """
    Utility ajax functions for polling couch and celerymon
    """
    type = request.GET.get('api', None)
    task_limit = getattr(settings, 'CELERYMON_TASK_LIMIT', 12)
    celery_monitoring = getattr(settings, 'CELERY_FLOWER_URL', None)
    db = XFormInstance.get_db()
    if type == "_active_tasks":
        try:
            tasks = [x for x in db.server.active_tasks() if x['type'] == "indexer"]
        except HTTPError as e:
            if e.response.status_code == 403:
                return JsonResponse({'error': "Unable to access CouchDB Tasks (unauthorized)."}, status_code=500)
            else:
                return JsonResponse({'error': "Unable to access CouchDB Tasks."}, status_code=500)

        if not is_bigcouch():
            return JsonResponse(tasks, safe=False)
        else:
            # group tasks by design doc
            task_map = defaultdict(dict)
            for task in tasks:
                meta = task_map[task['design_document']]
                tasks = meta.get('tasks', [])
                tasks.append(task)
                meta['tasks'] = tasks

            design_docs = []
            for dd, meta in task_map.items():
                meta['design_document'] = dd[len('_design/'):]
                total_changes = sum(task['total_changes'] for task in meta['tasks'])
                for task in meta['tasks']:
                    task['progress_contribution'] = task['changes_done'] * 100 // total_changes

                design_docs.append(meta)
            return JsonResponse(design_docs, safe=False)
    elif type == "_stats":
        return JsonResponse({})
    elif type == "_logs":
        pass
    elif type == 'pillowtop':
        pillow_meta = get_all_pillows_json()
        return JsonResponse(sorted(pillow_meta, key=lambda m: m['name'].lower()), safe=False)
    elif type == 'stale_pillows':
        es_index_status = [
            escheck.check_case_es_index(interval=3),
            escheck.check_xform_es_index(interval=3),
            escheck.check_reportcase_es_index(interval=3),
            escheck.check_reportxform_es_index(interval=3)
        ]
        return JsonResponse(es_index_status, safe=False)

    if celery_monitoring:
        if type == "flower_poll":
            ret = []
            try:
                all_tasks = requests.get(
                    celery_monitoring + '/api/tasks',
                    params={'limit': task_limit},
                    timeout=3,
                ).json()
            except Exception as ex:
                return JsonResponse({'error': "Error with getting from celery_flower: %s" % ex}, status_code=500)

            for task_id, traw in all_tasks.items():
                # it's an array of arrays - looping through [<id>, {task_info_dict}]
                if 'name' in traw and traw['name']:
                    traw['name'] = '.'.join(traw['name'].split('.')[-2:])
                else:
                    traw['name'] = None
                ret.append(traw)
            ret = sorted(ret, key=lambda x: x['succeeded'], reverse=True)
            return HttpResponse(json.dumps(ret), content_type='application/json')
    return HttpResponse('{}', content_type='application/json')
示例#23
0
def do_import(spreadsheet, config, domain, task=None, chunksize=CASEBLOCK_CHUNKSIZE):
    columns = spreadsheet.get_header_columns()
    match_count = created_count = too_many_matches = num_chunks = 0
    errors = importer_util.ImportErrorDetail()
    prime_offset = 1  # used to prevent back-to-back priming

    user = CouchUser.get_by_user_id(config.couch_user_id, domain)
    username = user.username
    user_id = user._id

    # keep a cache of id lookup successes to help performance
    id_cache = {}
    name_cache = {}
    caseblocks = []
    ids_seen = set()

    def _submit_caseblocks(domain, case_type, caseblocks):
        err = False
        if caseblocks:
            try:
                form, cases = submit_case_blocks(
                    [cb.as_string() for cb in caseblocks],
                    domain,
                    username,
                    user_id,
                )

                if form.is_error:
                    errors.add(
                        error=ImportErrors.ImportErrorMessage,
                        row_number=form.problem
                    )
            except Exception:
                err = True
                errors.add(
                    error=ImportErrors.ImportErrorMessage,
                    row_number=caseblocks[0].case_id
                )
            else:
                properties = set().union(*map(lambda c: set(c.dynamic_case_properties().keys()), cases))
                if case_type and len(properties):
                    add_inferred_export_properties.delay(
                        'CaseImporter',
                        domain,
                        case_type,
                        properties,
                    )
                else:
                    _soft_assert = soft_assert(notify_admins=True)
                    _soft_assert(
                        len(properties) == 0,
                        'error adding inferred export properties in domain '
                        '({}): {}'.format(domain, ", ".join(properties))
                    )
        return err

    row_count = spreadsheet.max_row
    for i, row in enumerate(spreadsheet.iter_rows()):
        if task:
            set_task_progress(task, i, row_count)

        # skip first row (header row)
        if i == 0:
            continue

        if not is_bigcouch():
            priming_progress = match_count + created_count + prime_offset
            if priming_progress % PRIME_VIEW_FREQUENCY == 0:
                prime_views(POOL_SIZE)
                # increment so we can't possibly prime on next iteration
                prime_offset += 1

        search_id = importer_util.parse_search_id(config, columns, row)

        fields_to_update = importer_util.populate_updated_fields(config, columns, row)
        if not any(fields_to_update.values()):
            # if the row was blank, just skip it, no errors
            continue

        if config.search_field == 'external_id' and not search_id:
            # do not allow blank external id since we save this
            errors.add(ImportErrors.BlankExternalId, i + 1)
            continue

        external_id = fields_to_update.pop('external_id', None)
        parent_id = fields_to_update.pop('parent_id', None)
        parent_external_id = fields_to_update.pop('parent_external_id', None)
        parent_type = fields_to_update.pop('parent_type', config.case_type)
        parent_ref = fields_to_update.pop('parent_ref', 'parent')
        to_close = fields_to_update.pop('close', False)

        if any([lookup_id and lookup_id in ids_seen for lookup_id in [search_id, parent_id, parent_external_id]]):
            # clear out the queue to make sure we've processed any potential
            # cases we want to look up
            # note: these three lines are repeated a few places, and could be converted
            # to a function that makes use of closures (and globals) to do the same thing,
            # but that seems sketchier than just beeing a little RY
            _submit_caseblocks(domain, config.case_type, caseblocks)
            num_chunks += 1
            caseblocks = []
            ids_seen = set()  # also clear ids_seen, since all the cases will now be in the database

        case, error = importer_util.lookup_case(
            config.search_field,
            search_id,
            domain,
            config.case_type
        )

        if case:
            if case.type != config.case_type:
                continue
        elif error == LookupErrors.NotFound:
            if not config.create_new_cases:
                continue
        elif error == LookupErrors.MultipleResults:
            too_many_matches += 1
            continue

        uploaded_owner_name = fields_to_update.pop('owner_name', None)
        uploaded_owner_id = fields_to_update.pop('owner_id', None)

        if uploaded_owner_name:
            # If an owner name was provided, replace the provided
            # uploaded_owner_id with the id of the provided group or owner
            try:
                uploaded_owner_id = importer_util.get_id_from_name(uploaded_owner_name, domain, name_cache)
            except SQLLocation.MultipleObjectsReturned:
                errors.add(ImportErrors.DuplicateLocationName, i + 1)
                continue

            if not uploaded_owner_id:
                errors.add(ImportErrors.InvalidOwnerName, i + 1, 'owner_name')
                continue
        if uploaded_owner_id:
            # If an owner_id mapping exists, verify it is a valid user
            # or case sharing group
            if importer_util.is_valid_id(uploaded_owner_id, domain, id_cache):
                owner_id = uploaded_owner_id
                id_cache[uploaded_owner_id] = True
            else:
                errors.add(ImportErrors.InvalidOwnerId, i + 1, 'owner_id')
                id_cache[uploaded_owner_id] = False
                continue
        else:
            # if they didn't supply an owner_id mapping, default to current
            # user
            owner_id = user_id

        extras = {}
        if parent_id:
            try:
                parent_case = CaseAccessors(domain).get_case(parent_id)

                if parent_case.domain == domain:
                    extras['index'] = {
                        parent_ref: (parent_case.type, parent_id)
                    }
            except ResourceNotFound:
                errors.add(ImportErrors.InvalidParentId, i + 1, 'parent_id')
                continue
        elif parent_external_id:
            parent_case, error = importer_util.lookup_case(
                'external_id',
                parent_external_id,
                domain,
                parent_type
            )
            if parent_case:
                extras['index'] = {
                    parent_ref: (parent_type, parent_case.case_id)
                }

        case_name = fields_to_update.pop('name', None)
        if not case:
            id = uuid.uuid4().hex

            if config.search_field == 'external_id':
                extras['external_id'] = search_id

            try:
                caseblock = CaseBlock(
                    create=True,
                    case_id=id,
                    owner_id=owner_id,
                    user_id=user_id,
                    case_type=config.case_type,
                    case_name=case_name or '',
                    update=fields_to_update,
                    **extras
                )
                caseblocks.append(caseblock)
                created_count += 1
                if external_id:
                    ids_seen.add(external_id)
            except CaseBlockError:
                errors.add(ImportErrors.CaseGeneration, i + 1)
        else:
            if external_id:
                extras['external_id'] = external_id
            if uploaded_owner_id:
                extras['owner_id'] = owner_id
            if to_close == 'yes':
                extras['close'] = True
            if case_name is not None:
                extras['case_name'] = case_name

            try:
                caseblock = CaseBlock(
                    create=False,
                    case_id=case.case_id,
                    update=fields_to_update,
                    **extras
                )
                caseblocks.append(caseblock)
                match_count += 1
            except CaseBlockError:
                errors.add(ImportErrors.CaseGeneration, i + 1)

        # check if we've reached a reasonable chunksize
        # and if so submit
        if len(caseblocks) >= chunksize:
            _submit_caseblocks(domain, config.case_type, caseblocks)
            num_chunks += 1
            caseblocks = []

    # final purge of anything left in the queue
    if _submit_caseblocks(domain, config.case_type, caseblocks):
        match_count -= 1
    num_chunks += 1
    return {
        'created_count': created_count,
        'match_count': match_count,
        'too_many_matches': too_many_matches,
        'errors': errors.as_dict(),
        'num_chunks': num_chunks,
    }