示例#1
0
    def handle(self, domain, case_type, data_source_ids, **options):
        configs = []
        for data_source_id in data_source_ids:
            config, _ = get_datasource_config(data_source_id, domain)
            assert config.asynchronous
            assert config.referenced_doc_type == CASE_DOC_TYPE
            configs.append(config)

        fake_change_doc = {'doc_type': CASE_DOC_TYPE, 'domain': domain}

        doc_store = get_document_store(domain, CASE_DOC_TYPE)
        case_accessor = doc_store.case_accessors

        case_ids = case_accessor.get_case_ids_in_domain(type=case_type)
        num_case_ids = len(case_ids)
        print("inserting %d docs" % num_case_ids)

        for config in configs:
            adapter = get_indicator_adapter(config, can_handle_laboratory=True)
            adapter.build_table()
            # normally called after rebuilding finishes
            adapter.after_table_build()

        config_ids = [config._id for config in configs]
        for i, case_id in enumerate(case_ids):
            change = FakeChange(case_id, fake_change_doc)
            AsyncIndicator.update_indicators(change, config_ids)
            if i % 1000 == 0:
                print("inserted %d / %d docs" % (i, num_case_ids))
示例#2
0
def _iteratively_build_table(config, last_id=None, resume_helper=None):
    resume_helper = resume_helper or DataSourceResumeHelper(config)
    indicator_config_id = config._id

    relevant_ids = []
    document_store = get_document_store(config.domain, config.referenced_doc_type)
    for relevant_id in document_store.iter_document_ids(last_id):
        relevant_ids.append(relevant_id)
        if len(relevant_ids) >= ID_CHUNK_SIZE:
            resume_helper.set_ids_to_resume_from(relevant_ids)
            _build_indicators(config, document_store, relevant_ids, resume_helper)
            relevant_ids = []

    if relevant_ids:
        resume_helper.set_ids_to_resume_from(relevant_ids)
        _build_indicators(config, document_store, relevant_ids, resume_helper)

    if not id_is_static(indicator_config_id):
        resume_helper.clear_ids()
        config.meta.build.finished = True
        try:
            config.save()
        except ResourceConflict:
            current_config = DataSourceConfiguration.get(config._id)
            # check that a new build has not yet started
            if config.meta.build.initiated == current_config.meta.build.initiated:
                current_config.meta.build.finished = True
                current_config.save()
示例#3
0
def save_document(doc_ids):
    lock_keys = []
    for doc_id in doc_ids:
        lock_keys.append(get_async_indicator_modify_lock_key(doc_id))

    indicator_config_ids = None
    timer = TimingContext()
    with CriticalSection(lock_keys):
        indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids)
        if not indicators:
            return

        first_indicator = indicators[0]
        processed_indicators = []
        failed_indicators = []

        for i in indicators:
            assert i.domain == first_indicator.domain
            assert i.doc_type == first_indicator.doc_type

        indicator_by_doc_id = {i.doc_id: i for i in indicators}
        doc_store = get_document_store(first_indicator.domain,
                                       first_indicator.doc_type)
        indicator_config_ids = first_indicator.indicator_config_ids
        related_docs_to_rebuild = set()

        with timer:
            for doc in doc_store.iter_documents(indicator_by_doc_id.keys()):
                indicator = indicator_by_doc_id[doc['_id']]
                successfully_processed, to_remove, rebuild_related_docs = _save_document_helper(
                    indicator, doc)
                if rebuild_related_docs:
                    related_docs_to_rebuild = related_docs_to_rebuild.union(
                        icds_get_related_docs_ids(doc['_id']))
                if successfully_processed:
                    processed_indicators.append(indicator.pk)
                else:
                    failed_indicators.append((indicator, to_remove))

        num_processed = len(processed_indicators)
        num_failed = len(failed_indicators)
        AsyncIndicator.objects.filter(pk__in=processed_indicators).delete()
        with transaction.atomic():
            for indicator, to_remove in failed_indicators:
                indicator.update_failure(to_remove)
                indicator.save()

    # remove any related docs that were just rebuilt
    related_docs_to_rebuild = related_docs_to_rebuild - set(doc_ids)
    # queue the docs that aren't already queued
    _queue_indicators(
        AsyncIndicator.objects.filter(doc_id__in=related_docs_to_rebuild,
                                      date_queued=None))

    datadog_counter('commcare.async_indicator.processed_success',
                    num_processed)
    datadog_counter('commcare.async_indicator.processed_fail', num_failed)
    datadog_histogram('commcare.async_indicator.processing_time',
                      timer.duration,
                      tags=[u'config_ids:{}'.format(indicator_config_ids)])
示例#4
0
def _iteratively_build_table(config, last_id=None, resume_helper=None):
    resume_helper = resume_helper or DataSourceResumeHelper(config)
    indicator_config_id = config._id

    relevant_ids = []
    document_store = get_document_store(config.domain,
                                        config.referenced_doc_type)
    for relevant_id in document_store.iter_document_ids(last_id):
        relevant_ids.append(relevant_id)
        if len(relevant_ids) >= ID_CHUNK_SIZE:
            resume_helper.set_ids_to_resume_from(relevant_ids)
            _build_indicators(config, document_store, relevant_ids,
                              resume_helper)
            relevant_ids = []

    if relevant_ids:
        resume_helper.set_ids_to_resume_from(relevant_ids)
        _build_indicators(config, document_store, relevant_ids, resume_helper)

    if not id_is_static(indicator_config_id):
        resume_helper.clear_ids()
        config.meta.build.finished = True
        try:
            config.save()
        except ResourceConflict:
            current_config = DataSourceConfiguration.get(config._id)
            # check that a new build has not yet started
            if config.meta.build.initiated == current_config.meta.build.initiated:
                current_config.meta.build.finished = True
                current_config.save()
        adapter = get_indicator_adapter(config,
                                        raise_errors=True,
                                        can_handle_laboratory=True)
        adapter.after_table_build()
示例#5
0
 def _get_document(related_doc_type, doc_id, context):
     document_store = get_document_store(context.root_doc['domain'], related_doc_type)
     try:
         doc = document_store.get_document(doc_id)
     except DocumentNotFoundError:
         return None
     if context.root_doc['domain'] != doc.get('domain'):
         return None
     return doc
示例#6
0
def _iteratively_build_table(config,
                             resume_helper=None,
                             in_place=False,
                             limit=-1):
    resume_helper = resume_helper or DataSourceResumeHelper(config)
    indicator_config_id = config._id
    case_type_or_xmlns_list = config.get_case_type_or_xmlns_filter()
    completed_ct_xmlns = resume_helper.get_completed_case_type_or_xmlns()
    if completed_ct_xmlns:
        case_type_or_xmlns_list = [
            case_type_or_xmlns
            for case_type_or_xmlns in case_type_or_xmlns_list
            if case_type_or_xmlns not in completed_ct_xmlns
        ]

    for case_type_or_xmlns in case_type_or_xmlns_list:
        relevant_ids = []
        document_store = get_document_store(
            config.domain,
            config.referenced_doc_type,
            case_type_or_xmlns=case_type_or_xmlns)

        for i, relevant_id in enumerate(document_store.iter_document_ids()):
            if i >= limit > -1:
                break
            relevant_ids.append(relevant_id)
            if len(relevant_ids) >= ID_CHUNK_SIZE:
                _build_indicators(config, document_store, relevant_ids)
                relevant_ids = []

        if relevant_ids:
            _build_indicators(config, document_store, relevant_ids)

        resume_helper.add_completed_case_type_or_xmlns(case_type_or_xmlns)

    resume_helper.clear_resume_info()
    if not id_is_static(indicator_config_id):
        if in_place:
            config.meta.build.finished_in_place = True
        else:
            config.meta.build.finished = True
        try:
            config.save()
        except ResourceConflict:
            current_config = DataSourceConfiguration.get(config._id)
            # check that a new build has not yet started
            if in_place:
                if config.meta.build.initiated_in_place == current_config.meta.build.initiated_in_place:
                    current_config.meta.build.finished_in_place = True
            else:
                if config.meta.build.initiated == current_config.meta.build.initiated:
                    current_config.meta.build.finished = True
            current_config.save()
        adapter = get_indicator_adapter(config,
                                        raise_errors=True,
                                        can_handle_laboratory=True)
        adapter.after_table_build()
    def handle(self, domain, data_source_id, doc_id, **options):
        config, _ = get_datasource_config(data_source_id, domain)
        doc_type = config.referenced_doc_type
        doc_store = get_document_store(domain, doc_type)
        doc = doc_store.get_document(doc_id)
        sort_by = options['sort']
        local_variables = {'config': config, 'doc': doc}

        cProfile.runctx('config.get_all_values(doc)', {}, local_variables,
                        'ucr_stats.log')
        print_profile_stats('ucr_stats.log', sort_by)
示例#8
0
 def get_value(self, doc_id, context):
     try:
         assert context.root_doc['domain']
         document_store = get_document_store(context.root_doc['domain'], self.related_doc_type)
         doc = document_store.get_document(doc_id)
         # ensure no cross-domain lookups of different documents
         if context.root_doc['domain'] != doc.get('domain'):
             return None
         # explicitly use a new evaluation context since this is a new document
         return self._value_expression(doc, EvaluationContext(doc, 0))
     except DocumentNotFoundError:
         return None
示例#9
0
 def get_value(self, doc_id, context):
     try:
         assert context.root_doc['domain']
         document_store = get_document_store(context.root_doc['domain'],
                                             self.related_doc_type)
         doc = document_store.get_document(doc_id)
         # ensure no cross-domain lookups of different documents
         if context.root_doc['domain'] != doc.get('domain'):
             return None
         # explicitly use a new evaluation context since this is a new document
         return self._value_expression(doc, EvaluationContext(doc, 0))
     except DocumentNotFoundError:
         return None
示例#10
0
def resume_building_indicators(indicator_config_id, initiated_by=None):
    config = _get_config_by_id(indicator_config_id)
    success = _('Your UCR table {} has finished rebuilding').format(config.table_id)
    failure = _('There was an error rebuilding Your UCR table {}.').format(config.table_id)
    send = toggles.SEND_UCR_REBUILD_INFO.enabled(initiated_by)
    with notify_someone(initiated_by, success_message=success, error_message=failure, send=send):
        resume_helper = DataSourceResumeHelper(config)

        relevant_ids = resume_helper.get_ids_to_resume_from()
        if len(relevant_ids) > 0:
            _build_indicators(config, get_document_store(config.domain, config.referenced_doc_type), relevant_ids,
                              resume_helper)
            last_id = relevant_ids[-1]
            iteratively_build_table(config, last_id, resume_helper)
示例#11
0
def resume_building_indicators(indicator_config_id, initiated_by=None):
    config = _get_config_by_id(indicator_config_id)
    success = _('Your UCR table {} has finished rebuilding').format(config.table_id)
    failure = _('There was an error rebuilding Your UCR table {}.').format(config.table_id)
    send = toggles.SEND_UCR_REBUILD_INFO.enabled(initiated_by)
    with notify_someone(initiated_by, success_message=success, error_message=failure, send=send):
        resume_helper = DataSourceResumeHelper(config)

        relevant_ids = resume_helper.get_ids_to_resume_from()
        if len(relevant_ids) > 0:
            _build_indicators(config, get_document_store(config.domain, config.referenced_doc_type), relevant_ids,
                              resume_helper)
            last_id = relevant_ids[-1]
            _iteratively_build_table(config, last_id, resume_helper)
示例#12
0
def save_document(doc_ids):
    lock_keys = []
    for doc_id in doc_ids:
        lock_keys.append(get_async_indicator_modify_lock_key(doc_id))

    with CriticalSection(lock_keys):
        indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids)
        if not indicators:
            return

        first_indicator = indicators[0]
        processed_indicators = []
        failed_indicators = []

        for i in indicators:
            assert i.domain == first_indicator.domain
            assert i.doc_type == first_indicator.doc_type

        indicator_by_doc_id = {i.doc_id: i for i in indicators}
        doc_store = get_document_store(first_indicator.domain,
                                       first_indicator.doc_type)
        for doc in doc_store.iter_documents(doc_ids):
            indicator = indicator_by_doc_id[doc['_id']]

            eval_context = EvaluationContext(doc)
            for config_id in indicator.indicator_config_ids:
                adapter = None
                try:
                    config = _get_config(config_id)
                    adapter = get_indicator_adapter(config,
                                                    can_handle_laboratory=True)
                    adapter.save(doc, eval_context)
                    eval_context.reset_iteration()
                except (ESError, RequestError, ConnectionTimeout):
                    # couch or es had an issue so don't log it and go on to the next doc
                    failed_indicators.append(indicator.pk)
                    break
                except Exception as e:
                    # getting the config could fail before the adapter is set
                    if adapter:
                        adapter.handle_exception(doc, e)
                    failed_indicators.append(indicator.pk)
                    break
                else:
                    processed_indicators.append(indicator.pk)

        AsyncIndicator.objects.filter(pk__in=processed_indicators).delete()
        AsyncIndicator.objects.filter(pk__in=failed_indicators).update(
            date_queued=None,
            unsuccessful_attempts=F('unsuccessful_attempts') + 1)
示例#13
0
def save_document(doc_ids):
    lock_keys = []
    for doc_id in doc_ids:
        lock_keys.append(get_async_indicator_modify_lock_key(doc_id))

    indicator_config_ids = None
    timer = TimingContext()
    with CriticalSection(lock_keys):
        indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids)
        if not indicators:
            return

        first_indicator = indicators[0]
        processed_indicators = []
        failed_indicators = []

        for i in indicators:
            assert i.domain == first_indicator.domain
            assert i.doc_type == first_indicator.doc_type

        indicator_by_doc_id = {i.doc_id: i for i in indicators}
        doc_store = get_document_store(first_indicator.domain, first_indicator.doc_type)
        indicator_config_ids = first_indicator.indicator_config_ids

        with timer:
            for doc in doc_store.iter_documents(doc_ids):
                indicator = indicator_by_doc_id[doc['_id']]
                successfully_processed, to_remove = _save_document_helper(indicator, doc)
                if successfully_processed:
                    processed_indicators.append(indicator.pk)
                else:
                    failed_indicators.append((indicator, to_remove))

        AsyncIndicator.objects.filter(pk__in=processed_indicators).delete()
        with transaction.atomic():
            for indicator, to_remove in failed_indicators:
                indicator.update_failure(to_remove)
                indicator.save()

    datadog_histogram(
        'commcare.async_indicator.processing_time', timer.duration,
        tags=[
            u'config_ids:{}'.format(indicator_config_ids)
        ]
    )
示例#14
0
    def handle(self, domain, data_source_id, doc_id, **options):
        config, _ = get_datasource_config(data_source_id, domain)
        doc_type = config.referenced_doc_type
        doc_store = get_document_store(domain, doc_type)
        doc = doc_store.get_document(doc_id)

        local_variables = {'config': config, 'doc': doc}

        cProfile.runctx('config.get_all_values(doc)', {}, local_variables,
                        'ucr_stats.log')
        p = pstats.Stats('ucr_stats.log')
        p.sort_stats('time')

        print("Top 10 functions by time\n")
        p.print_stats(10)

        print("Specs timing\n")
        p.print_stats('userreports.*specs.*\(__call__\)')

        print("Socket recvs\n")
        p.print_stats('recv')

        print("Doc retrievals\n")
        p.print_stats('document_store.*\(get_document\)')

        print("Postgres queries\n")
        p.print_stats('execute.*psycopg')

        print("ES queries\n")
        p.print_stats('es_query.py.*\(run\)')

        print("""
        Note: Due to overhead in profiling, these times are much larger than the real times.

        Next Steps:
           1) choose one of the previous calls to investigate
           2) use print_callees or print_callers to follow the calls
              * usage https://docs.python.org/2/library/profile.html#pstats.Stats.print_stats
           3) check out branch je/time-ucr to get logs for processing time of each column
              (you'll likely need to rebase it on latest master)
        """)
示例#15
0
    def handle(self, domain, count, **options):
        sort_by = options['sort']
        indicators = AsyncIndicator.objects.filter(
            domain=domain).order_by('-date_created')[:count]
        print('processing {} indicators'.format(len(indicators)))

        # build up data source configs and docs
        configs = {}
        docs = {}
        for indicator in indicators:
            docs[indicator.doc_id] = get_document_store(
                domain, indicator.doc_type).get_document(indicator.doc_id)
            for config_id in indicator.indicator_config_ids:
                configs[config_id] = _get_config(config_id)

        local_variables = {
            '_simulate_indicator_saves': _simulate_indicator_saves,
            'indicators': indicators,
            'docs': docs,
            'configs': configs,
        }
        cProfile.runctx('_simulate_indicator_saves(indicators, docs, configs)',
                        {}, local_variables, 'async_ucr_stats.log')
        print_profile_stats('async_ucr_stats.log', sort_by)
示例#16
0
def evaluate_expression(request, domain):
    doc_type = request.POST['doc_type']
    doc_id = request.POST['doc_id']
    try:
        usable_type = {
            'form': 'XFormInstance',
            'case': 'CommCareCase',
        }.get(doc_type, 'Unknown')
        document_store = get_document_store(domain, usable_type)
        doc = document_store.get_document(doc_id)
        expression_text = request.POST['expression']
        expression_json = json.loads(expression_text)
        parsed_expression = ExpressionFactory.from_spec(expression_json)
        result = parsed_expression(doc, EvaluationContext(doc))
        return json_response({
            "result": result,
        })
    except DocumentNotFoundError:
        return json_response(
            {
                "error":
                _("{} with id {} not found in domain {}.").format(
                    doc_type, doc_id, domain)
            },
            status_code=404,
        )
    except BadSpecError as e:
        return json_response(
            {"error": _("Problem with expression: {}.").format(e)},
            status_code=400,
        )
    except Exception as e:
        return json_response(
            {"error": unicode(e)},
            status_code=500,
        )
示例#17
0
    def handle(self, domain, data_source_id, *args, **kwargs):
        config, _ = get_datasource_config(data_source_id, domain)
        adapter = get_indicator_adapter(config)
        q = adapter.get_query_object()
        document_store = get_document_store(domain, config.referenced_doc_type)
        bad_rows = []
        for row in with_progress_bar(q, length=q.count()):
            doc_id = row.doc_id
            doc = document_store.get_document(doc_id)

            current_rows = config.get_all_values(doc)
            if len(current_rows) > 1:
                raise ValueError(
                    "this command doesn't work for datasources returning multiple rows per doc"
                )

            try:
                current_row = current_rows[0]
            except KeyError:
                continue

            # don't compare the 'inserted_at' columns
            current_row = [
                val for val in current_row
                if val.column.database_column_name != 'inserted_at'
            ]

            for val in current_row:
                try:
                    inserted_value = getattr(row,
                                             val.column.database_column_name)
                    if (inserted_value != val.value
                            or row.inserted_at.replace(tzinfo=pytz.utc) <
                            parse_datetime(doc['server_modified_on'])):
                        bad_rows.append({
                            'doc_id':
                            row.doc_id,
                            'column_name':
                            val.column.database_column_name,
                            'inserted_at':
                            row.inserted_at.isoformat(),
                            'server_modified_on':
                            doc['server_modified_on'],
                            'stored_value':
                            getattr(row, val.column.database_column_name),
                            'desired_value':
                            val.value,
                            'message':
                            ('column mismatch' if inserted_value != val.value
                             else "modified date early"),
                        })
                except AttributeError:
                    bad_rows.append({
                        'doc_id':
                        row.doc_id,
                        'column_name':
                        val.column.database_column_name,
                        'inserted_at':
                        'missing',
                        'server_modified_on':
                        doc['server_modified_on'],
                        'stored_value':
                        'missing',
                        'desired_value':
                        val.value,
                        'message':
                        'doc missing',
                    })

        filename = 'datasource_mismatches_{}_{}.csv'.format(
            data_source_id[-8:],
            datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S"))
        with open(filename, 'w') as f:
            headers = [
                'doc_id', 'column_name', 'inserted_at', 'server_modified_on',
                'stored_value', 'desired_value', 'message'
            ]
            writer = csv.DictWriter(f, headers)
            writer.writeheader()
            writer.writerows(bad_rows)

        print("Found {} mismatches. Check {} for more details".format(
            len(bad_rows), filename))