def handle(self): indicator_config_id = 'static-ccs_record_cases' case_type_of_xmlns = 'ccs_record' config = _get_config_by_id(indicator_config_id) document_store = get_document_store_for_doc_type( config.domain, config.referenced_doc_type, case_type_or_xmlns=case_type_of_xmlns ) current_month_start = datetime.date.today().replace(day=1) last_month_start = (current_month_start - datetime.timedelta(days=1)).replace(day=1) current_month_start = current_month_start.strftime('%Y-%m-%d') last_month_start = last_month_start.strftime('%Y-%m-%d') current_month_doc_ids = CcsRecordMonthly.objects.filter(pnc_complete=1, month=current_month_start).values('case_id') docs_last_month = CcsRecordMonthly.objects.filter(pnc_complete=1, month=last_month_start).values('case_id') doc_ids = current_month_doc_ids + docs_last_month doc_ids = {doc_id['case_id'] for doc_id in doc_ids} relevant_ids = list() next_event = time.time() + 10 for doc_id in doc_ids: relevant_ids.append(doc_id) if len(relevant_ids) >= ID_CHUNK_SIZE: _build_indicators(config, document_store, relevant_ids) relevant_ids = [] if time.time() > next_event: print("processed till case %s" % (doc_id['case_id'])) next_event = time.time() + 10 if relevant_ids: _build_indicators(config, document_store, relevant_ids)
def handle(self, domain, data_source_id, *args, **kwargs): config, _ = get_datasource_config(data_source_id, domain) adapter = get_indicator_adapter(config) q = adapter.get_query_object() document_store = get_document_store_for_doc_type(domain, config.referenced_doc_type) bad_rows = [] for row in with_progress_bar(q, length=q.count()): doc_id = row.doc_id doc = document_store.get_document(doc_id) current_rows = config.get_all_values(doc) if len(current_rows) > 1: raise ValueError("this command doesn't work for datasources returning multiple rows per doc") try: current_row = current_rows[0] except KeyError: continue # don't compare the 'inserted_at' columns current_row = [val for val in current_row if val.column.database_column_name != 'inserted_at'] for val in current_row: try: inserted_value = getattr(row, val.column.database_column_name) if (inserted_value != val.value or row.inserted_at.replace(tzinfo=pytz.utc) < parse_datetime(doc['server_modified_on'])): bad_rows.append({ 'doc_id': row.doc_id, 'column_name': val.column.database_column_name, 'inserted_at': row.inserted_at.isoformat(), 'server_modified_on': doc['server_modified_on'], 'stored_value': getattr(row, val.column.database_column_name), 'desired_value': val.value, 'message': ('column mismatch' if inserted_value != val.value else "modified date early"), }) except AttributeError: bad_rows.append({ 'doc_id': row.doc_id, 'column_name': val.column.database_column_name, 'inserted_at': 'missing', 'server_modified_on': doc['server_modified_on'], 'stored_value': 'missing', 'desired_value': val.value, 'message': 'doc missing', }) filename = 'datasource_mismatches_{}_{}.csv'.format( data_source_id[-8:], datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S") ) with open(filename, 'w', encoding='utf-8') as f: headers = ['doc_id', 'column_name', 'inserted_at', 'server_modified_on', 'stored_value', 'desired_value', 'message'] writer = csv.DictWriter(f, headers) writer.writeheader() writer.writerows(bad_rows) print("Found {} mismatches. Check {} for more details".format(len(bad_rows), filename))
def handle(self, domain, count, **options): sort_by = options['sort'] indicators = AsyncIndicator.objects.filter( domain=domain).order_by('-date_created')[:count] print('processing {} indicators'.format(len(indicators))) # build up data source configs and docs configs = {} docs = {} for indicator in indicators: doc_store = get_document_store_for_doc_type( domain, indicator.doc_type, load_source="profile_async_indicators") docs[indicator.doc_id] = doc_store.get_document(indicator.doc_id) for config_id in indicator.indicator_config_ids: configs[config_id] = _get_config(config_id) local_variables = { '_simulate_indicator_saves': _simulate_indicator_saves, 'indicators': indicators, 'docs': docs, 'configs': configs, } cProfile.runctx('_simulate_indicator_saves(indicators, docs, configs)', {}, local_variables, 'async_ucr_stats.log') print_profile_stats('async_ucr_stats.log', sort_by)
def _get_document(related_doc_type, doc_id, context): document_store = get_document_store_for_doc_type(context.root_doc['domain'], related_doc_type) try: doc = document_store.get_document(doc_id) except DocumentNotFoundError: return None if context.root_doc['domain'] != doc.get('domain'): return None return doc
def _iteratively_build_table(config, resume_helper=None, in_place=False, limit=-1): resume_helper = resume_helper or DataSourceResumeHelper(config) indicator_config_id = config._id case_type_or_xmlns_list = config.get_case_type_or_xmlns_filter() completed_ct_xmlns = resume_helper.get_completed_case_type_or_xmlns() if completed_ct_xmlns: case_type_or_xmlns_list = [ case_type_or_xmlns for case_type_or_xmlns in case_type_or_xmlns_list if case_type_or_xmlns not in completed_ct_xmlns ] for case_type_or_xmlns in case_type_or_xmlns_list: relevant_ids = [] document_store = get_document_store_for_doc_type( config.domain, config.referenced_doc_type, case_type_or_xmlns=case_type_or_xmlns) for i, relevant_id in enumerate(document_store.iter_document_ids()): if i >= limit > -1: break relevant_ids.append(relevant_id) if len(relevant_ids) >= ID_CHUNK_SIZE: _build_indicators(config, document_store, relevant_ids) relevant_ids = [] if relevant_ids: _build_indicators(config, document_store, relevant_ids) resume_helper.add_completed_case_type_or_xmlns(case_type_or_xmlns) resume_helper.clear_resume_info() if not id_is_static(indicator_config_id): if in_place: config.meta.build.finished_in_place = True else: config.meta.build.finished = True try: config.save() except ResourceConflict: current_config = DataSourceConfiguration.get(config._id) # check that a new build has not yet started if in_place: if config.meta.build.initiated_in_place == current_config.meta.build.initiated_in_place: current_config.meta.build.finished_in_place = True else: if config.meta.build.initiated == current_config.meta.build.initiated: current_config.meta.build.finished = True current_config.save() adapter = get_indicator_adapter(config, raise_errors=True, can_handle_laboratory=True) adapter.after_table_build()
def _get_document(related_doc_type, doc_id, context): document_store = get_document_store_for_doc_type(context.root_doc['domain'], related_doc_type) try: doc = document_store.get_document(doc_id) except DocumentNotFoundError: return None if context.root_doc['domain'] != doc.get('domain'): return None return doc
def get_ucr_config_and_document_store(self, indicator_config_id, case_type_of_xmlns): config = _get_config_by_id(indicator_config_id) document_store = get_document_store_for_doc_type( config.domain, config.referenced_doc_type, case_type_or_xmlns=case_type_of_xmlns, load_source="build_ccs_record_ucr", ) return config, document_store
def handle(self, domain, data_source_id, doc_id, **options): config, _ = get_datasource_config(data_source_id, domain) doc_type = config.referenced_doc_type doc_store = get_document_store_for_doc_type(domain, doc_type) doc = doc_store.get_document(doc_id) sort_by = options['sort'] local_variables = {'config': config, 'doc': doc} cProfile.runctx('config.get_all_values(doc)', {}, local_variables, 'ucr_stats.log') print_profile_stats('ucr_stats.log', sort_by)
def _iteratively_build_table(config, resume_helper=None, in_place=False, limit=-1): resume_helper = resume_helper or DataSourceResumeHelper(config) indicator_config_id = config._id case_type_or_xmlns_list = config.get_case_type_or_xmlns_filter() domains = config.data_domains loop_iterations = list(itertools.product(domains, case_type_or_xmlns_list)) completed_iterations = resume_helper.get_completed_iterations() if completed_iterations: loop_iterations = list( set(loop_iterations) - set(completed_iterations)) for domain, case_type_or_xmlns in loop_iterations: relevant_ids = [] document_store = get_document_store_for_doc_type( domain, config.referenced_doc_type, case_type_or_xmlns=case_type_or_xmlns, load_source="build_indicators", ) for i, relevant_id in enumerate(document_store.iter_document_ids()): if i >= limit > -1: break relevant_ids.append(relevant_id) if len(relevant_ids) >= ID_CHUNK_SIZE: _build_indicators(config, document_store, relevant_ids) relevant_ids = [] if relevant_ids: _build_indicators(config, document_store, relevant_ids) resume_helper.add_completed_iteration(domain, case_type_or_xmlns) resume_helper.clear_resume_info() if not id_is_static(indicator_config_id): if in_place: config.meta.build.finished_in_place = True else: config.meta.build.finished = True try: config.save() except ResourceConflict: current_config = get_ucr_datasource_config_by_id(config._id) # check that a new build has not yet started if in_place: if config.meta.build.initiated_in_place == current_config.meta.build.initiated_in_place: current_config.meta.build.finished_in_place = True else: if config.meta.build.initiated == current_config.meta.build.initiated: current_config.meta.build.finished = True current_config.save()
def handle(self, domain, data_source_id, doc_id, **options): config, _ = get_datasource_config(data_source_id, domain) doc_type = config.referenced_doc_type doc_store = get_document_store_for_doc_type(domain, doc_type) doc = doc_store.get_document(doc_id) sort_by = options['sort'] local_variables = {'config': config, 'doc': doc} cProfile.runctx('config.get_all_values(doc)', {}, local_variables, 'ucr_stats.log') print_profile_stats('ucr_stats.log', sort_by)
def _iteratively_build_table(config, resume_helper=None, in_place=False, limit=-1): resume_helper = resume_helper or DataSourceResumeHelper(config) indicator_config_id = config._id case_type_or_xmlns_list = config.get_case_type_or_xmlns_filter() completed_ct_xmlns = resume_helper.get_completed_case_type_or_xmlns() if completed_ct_xmlns: case_type_or_xmlns_list = [ case_type_or_xmlns for case_type_or_xmlns in case_type_or_xmlns_list if case_type_or_xmlns not in completed_ct_xmlns ] for case_type_or_xmlns in case_type_or_xmlns_list: relevant_ids = [] document_store = get_document_store_for_doc_type( config.domain, config.referenced_doc_type, case_type_or_xmlns=case_type_or_xmlns, load_source="build_indicators", ) for i, relevant_id in enumerate(document_store.iter_document_ids()): if i >= limit > -1: break relevant_ids.append(relevant_id) if len(relevant_ids) >= ID_CHUNK_SIZE: _build_indicators(config, document_store, relevant_ids) relevant_ids = [] if relevant_ids: _build_indicators(config, document_store, relevant_ids) resume_helper.add_completed_case_type_or_xmlns(case_type_or_xmlns) resume_helper.clear_resume_info() if not id_is_static(indicator_config_id): if in_place: config.meta.build.finished_in_place = True else: config.meta.build.finished = True try: config.save() except ResourceConflict: current_config = DataSourceConfiguration.get(config._id) # check that a new build has not yet started if in_place: if config.meta.build.initiated_in_place == current_config.meta.build.initiated_in_place: current_config.meta.build.finished_in_place = True else: if config.meta.build.initiated == current_config.meta.build.initiated: current_config.meta.build.finished = True current_config.save()
def handle(self): indicator_config_id = 'static-ccs_record_cases' case_type_of_xmlns = 'ccs_record' config = _get_config_by_id(indicator_config_id) document_store = get_document_store_for_doc_type( config.domain, config.referenced_doc_type, case_type_or_xmlns=case_type_of_xmlns, load_source="build_ccs_record_ucr", ) current_month_start = datetime.date.today().replace(day=1) last_month_start = (current_month_start - datetime.timedelta(days=1)).replace(day=1) current_month_start = current_month_start.strftime('%Y-%m-%d') last_month_start = last_month_start.strftime('%Y-%m-%d') current_month_doc_ids = CcsRecordMonthly.objects.filter( pnc_complete=1, month=current_month_start).values('case_id') docs_last_month = CcsRecordMonthly.objects.filter( pnc_complete=1, month=last_month_start).values('case_id') doc_ids = current_month_doc_ids + docs_last_month doc_ids = {doc_id['case_id'] for doc_id in doc_ids} relevant_ids = list() next_event = time.time() + 10 for doc_id in doc_ids: relevant_ids.append(doc_id) if len(relevant_ids) >= ID_CHUNK_SIZE: _build_indicators(config, document_store, relevant_ids) relevant_ids = [] if time.time() > next_event: print("processed till case %s" % (doc_id['case_id'])) next_event = time.time() + 10 if relevant_ids: _build_indicators(config, document_store, relevant_ids)
def handle(self, domain, count, **options): sort_by = options['sort'] indicators = AsyncIndicator.objects.filter(domain=domain).order_by('-date_created')[:count] print('processing {} indicators'.format(len(indicators))) # build up data source configs and docs configs = {} docs = {} for indicator in indicators: doc_store = get_document_store_for_doc_type(domain, indicator.doc_type) docs[indicator.doc_id] = doc_store.get_document(indicator.doc_id) for config_id in indicator.indicator_config_ids: configs[config_id] = _get_config(config_id) local_variables = { '_simulate_indicator_saves': _simulate_indicator_saves, 'indicators': indicators, 'docs': docs, 'configs': configs, } cProfile.runctx('_simulate_indicator_saves(indicators, docs, configs)', {}, local_variables, 'async_ucr_stats.log') print_profile_stats('async_ucr_stats.log', sort_by)
def handle(self, domain, data_source_id, *args, **kwargs): config, _ = get_datasource_config(data_source_id, domain) adapter = get_indicator_adapter(config, load_source='find_datasource_mismatches') q = adapter.get_query_object() document_store = get_document_store_for_doc_type( domain, config.referenced_doc_type, load_source="find_datasource_mismatches") bad_rows = [] for row in with_progress_bar(q, length=q.count()): adapter.track_load() doc_id = row.doc_id doc = document_store.get_document(doc_id) current_rows = config.get_all_values(doc) if len(current_rows) > 1: raise ValueError("this command doesn't work for datasources returning multiple rows per doc") try: current_row = current_rows[0] except KeyError: continue # don't compare the 'inserted_at' columns current_row = [val for val in current_row if val.column.database_column_name != 'inserted_at'] for val in current_row: try: inserted_value = getattr(row, val.column.database_column_name) if (inserted_value != val.value or row.inserted_at.replace(tzinfo=pytz.utc) < parse_datetime(doc['server_modified_on'])): bad_rows.append({ 'doc_id': row.doc_id, 'column_name': val.column.database_column_name, 'inserted_at': row.inserted_at.isoformat(), 'server_modified_on': doc['server_modified_on'], 'stored_value': getattr(row, val.column.database_column_name), 'desired_value': val.value, 'message': ('column mismatch' if inserted_value != val.value else "modified date early"), }) except AttributeError: bad_rows.append({ 'doc_id': row.doc_id, 'column_name': val.column.database_column_name, 'inserted_at': 'missing', 'server_modified_on': doc['server_modified_on'], 'stored_value': 'missing', 'desired_value': val.value, 'message': 'doc missing', }) filename = 'datasource_mismatches_{}_{}.csv'.format( data_source_id[-8:], datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S") ) with open(filename, 'w', encoding='utf-8') as f: headers = ['doc_id', 'column_name', 'inserted_at', 'server_modified_on', 'stored_value', 'desired_value', 'message'] writer = csv.DictWriter(f, headers) writer.writeheader() writer.writerows(bad_rows) print("Found {} mismatches. Check {} for more details".format(len(bad_rows), filename))
def test_doc_store(self): doc_store = get_document_store_for_doc_type(self.domain, self.doc_type) self.assertSetEqual(set(self.all_doc_ids_domain), set(doc_store.iter_document_ids()))
def save_document(doc_ids): lock_keys = [] for doc_id in doc_ids: lock_keys.append(get_async_indicator_modify_lock_key(doc_id)) indicator_config_ids = None timer = TimingContext() with CriticalSection(lock_keys): indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids) if not indicators: return first_indicator = indicators[0] processed_indicators = [] failed_indicators = [] for i in indicators: assert i.domain == first_indicator.domain assert i.doc_type == first_indicator.doc_type indicator_by_doc_id = {i.doc_id: i for i in indicators} doc_store = get_document_store_for_doc_type(first_indicator.domain, first_indicator.doc_type) indicator_config_ids = first_indicator.indicator_config_ids related_docs_to_rebuild = set() with timer: for doc in doc_store.iter_documents( list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] successfully_processed, to_remove, rebuild_related_docs = _save_document_helper( indicator, doc) if rebuild_related_docs: related_docs_to_rebuild = related_docs_to_rebuild.union( icds_get_related_docs_ids(doc['_id'])) if successfully_processed: processed_indicators.append(indicator.pk) else: failed_indicators.append((indicator, to_remove)) num_processed = len(processed_indicators) num_failed = len(failed_indicators) AsyncIndicator.objects.filter(pk__in=processed_indicators).delete() with transaction.atomic(): for indicator, to_remove in failed_indicators: indicator.update_failure(to_remove) indicator.save() # remove any related docs that were just rebuilt related_docs_to_rebuild = related_docs_to_rebuild - set(doc_ids) # queue the docs that aren't already queued _queue_indicators( AsyncIndicator.objects.filter(doc_id__in=related_docs_to_rebuild, date_queued=None)) datadog_counter('commcare.async_indicator.processed_success', num_processed) datadog_counter('commcare.async_indicator.processed_fail', num_failed) datadog_histogram('commcare.async_indicator.processing_time', timer.duration, tags=['config_ids:{}'.format(indicator_config_ids)])
def _build_async_indicators(indicator_doc_ids): def handle_exception(exception, config_id, doc, adapter): metric = None if isinstance(exception, (ProtocolError, ReadTimeout)): metric = 'commcare.async_indicator.riak_error' elif isinstance(exception, (ESError, ConnectionTimeout)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.es_error' elif isinstance(exception, (DatabaseError, InternalError)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.psql_error' else: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, exception) if metric: datadog_counter(metric, 1, tags={ 'config_id': config_id, 'doc_id': doc['_id'] }) def doc_ids_from_rows(rows): formatted_rows = [{ column.column.database_column_name.decode('utf-8'): column.value for column in row } for row in rows] return set(row['doc_id'] for row in formatted_rows) # tracks processed/deleted configs to be removed from each indicator configs_to_remove_by_indicator_id = defaultdict(list) def _mark_config_to_remove(config_id, indicator_ids): for _id in indicator_ids: configs_to_remove_by_indicator_id[_id].append(config_id) timer = TimingContext() lock_keys = [ get_async_indicator_modify_lock_key(indicator_id) for indicator_id in indicator_doc_ids ] with CriticalSection(lock_keys): all_indicators = AsyncIndicator.objects.filter( doc_id__in=indicator_doc_ids) if not all_indicators: return doc_store = get_document_store_for_doc_type(all_indicators[0].domain, all_indicators[0].doc_type) failed_indicators = set() rows_to_save_by_adapter = defaultdict(list) indicator_by_doc_id = {i.doc_id: i for i in all_indicators} config_ids = set() with timer: for doc in doc_store.iter_documents( list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: config_ids.add(config_id) try: config = _get_config_by_id(config_id) except (ResourceNotFound, StaticDataSourceConfigurationNotFoundError): celery_task_logger.info( "{} no longer exists, skipping".format(config_id)) # remove because the config no longer exists _mark_config_to_remove(config_id, [indicator.pk]) continue except ESError: celery_task_logger.info( "ES errored when trying to retrieve config") failed_indicators.add(indicator) continue adapter = None try: adapter = get_indicator_adapter(config) rows_to_save_by_adapter[adapter].extend( adapter.get_all_values(doc, eval_context)) eval_context.reset_iteration() except Exception as e: failed_indicators.add(indicator) handle_exception(e, config_id, doc, adapter) for adapter, rows in six.iteritems(rows_to_save_by_adapter): doc_ids = doc_ids_from_rows(rows) indicators = [ indicator_by_doc_id[doc_id] for doc_id in doc_ids ] try: adapter.save_rows(rows) except Exception as e: failed_indicators.union(indicators) message = six.text_type(e) notify_exception( None, "Exception bulk saving async indicators:{}".format( message)) else: # remove because it's sucessfully processed _mark_config_to_remove(config_id, [i.pk for i in indicators]) # delete fully processed indicators processed_indicators = set(all_indicators) - failed_indicators AsyncIndicator.objects.filter( pk__in=[i.pk for i in processed_indicators]).delete() # update failure for failed indicators with transaction.atomic(): for indicator in failed_indicators: indicator.update_failure( configs_to_remove_by_indicator_id.get(indicator.pk, [])) indicator.save() datadog_counter('commcare.async_indicator.processed_success', len(processed_indicators)) datadog_counter('commcare.async_indicator.processed_fail', len(failed_indicators)) datadog_histogram('commcare.async_indicator.processing_time', timer.duration / len(indicator_doc_ids), tags=[ 'config_ids:{}'.format(config_ids), ])
def build_async_indicators(indicator_doc_ids): # written to be used with _queue_indicators, indicator_doc_ids must # be a chunk of 100 memoizers = {'configs': {}, 'adapters': {}} assert(len(indicator_doc_ids)) <= ASYNC_INDICATOR_CHUNK_SIZE def handle_exception(exception, config_id, doc, adapter): metric = None if isinstance(exception, (ProtocolError, ReadTimeout)): metric = 'commcare.async_indicator.riak_error' elif isinstance(exception, (ESError, ConnectionTimeout)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.es_error' elif isinstance(exception, (DatabaseError, InternalError)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.psql_error' else: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, exception) if metric: metrics_counter(metric, tags={'config_id': config_id}) def doc_ids_from_rows(rows): formatted_rows = [ {column.column.database_column_name.decode('utf-8'): column.value for column in row} for row in rows ] return set(row['doc_id'] for row in formatted_rows) def _get_config(config_id): config_by_id = memoizers['configs'] if config_id in config_by_id: return config_by_id[config_id] else: config = _get_config_by_id(config_id) config_by_id[config_id] = config return config def _get_adapter(config): adapter_by_config = memoizers['adapters'] if config._id in adapter_by_config: return adapter_by_config[config._id] else: adapter = get_indicator_adapter(config, load_source='build_async_indicators') adapter_by_config[config._id] = adapter return adapter def _metrics_timer(step, config_id=None): tags = { 'action': step, } if config_id and settings.ENTERPRISE_MODE: tags['config_id'] = config_id else: # Prometheus requires consistent tags even if not available tags['config_id'] = None return metrics_histogram_timer( 'commcare.async_indicator.timing', timing_buckets=(.03, .1, .3, 1, 3, 10), tags=tags ) # tracks processed/deleted configs to be removed from each indicator configs_to_remove_by_indicator_id = defaultdict(list) def _mark_config_to_remove(config_id, indicator_ids): for _id in indicator_ids: configs_to_remove_by_indicator_id[_id].append(config_id) timer = TimingContext() lock_keys = [ get_async_indicator_modify_lock_key(indicator_doc_id) for indicator_doc_id in indicator_doc_ids ] with CriticalSection(lock_keys): all_indicators = AsyncIndicator.objects.filter( doc_id__in=indicator_doc_ids ) if not all_indicators: return doc_store = get_document_store_for_doc_type( all_indicators[0].domain, all_indicators[0].doc_type, load_source="build_async_indicators", ) failed_indicators = set() rows_to_save_by_adapter = defaultdict(list) docs_to_delete_by_adapter = defaultdict(list) # there will always be one AsyncIndicator per doc id indicator_by_doc_id = {i.doc_id: i for i in all_indicators} config_ids = set() with timer: for doc in doc_store.iter_documents(list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: with _metrics_timer('transform', config_id): config_ids.add(config_id) try: config = _get_config(config_id) except (ResourceNotFound, StaticDataSourceConfigurationNotFoundError): celery_task_logger.info("{} no longer exists, skipping".format(config_id)) # remove because the config no longer exists _mark_config_to_remove(config_id, [indicator.pk]) continue except ESError: celery_task_logger.info("ES errored when trying to retrieve config") failed_indicators.add(indicator) continue adapter = None try: adapter = _get_adapter(config) rows_to_save = adapter.get_all_values(doc, eval_context) if rows_to_save: rows_to_save_by_adapter[adapter].extend(rows_to_save) else: docs_to_delete_by_adapter[adapter].append(doc) eval_context.reset_iteration() except Exception as e: failed_indicators.add(indicator) handle_exception(e, config_id, doc, adapter) with _metrics_timer('single_batch_update'): for adapter, rows in rows_to_save_by_adapter.items(): doc_ids = doc_ids_from_rows(rows) indicators = [indicator_by_doc_id[doc_id] for doc_id in doc_ids] try: with _metrics_timer('update', adapter.config._id): adapter.save_rows(rows, use_shard_col=True) except Exception as e: failed_indicators.union(indicators) message = str(e) notify_exception(None, "Exception bulk saving async indicators:{}".format(message)) else: # remove because it's successfully processed _mark_config_to_remove( config_id, [i.pk for i in indicators] ) with _metrics_timer('single_batch_delete'): for adapter, docs in docs_to_delete_by_adapter.items(): with _metrics_timer('delete', adapter.config._id): adapter.bulk_delete(docs) # delete fully processed indicators processed_indicators = set(all_indicators) - failed_indicators AsyncIndicator.objects.filter(pk__in=[i.pk for i in processed_indicators]).delete() # update failure for failed indicators with transaction.atomic(): for indicator in failed_indicators: indicator.update_failure( configs_to_remove_by_indicator_id.get(indicator.pk, []) ) indicator.save() metrics_counter('commcare.async_indicator.processed_success', len(processed_indicators)) metrics_counter('commcare.async_indicator.processed_fail', len(failed_indicators)) metrics_counter( 'commcare.async_indicator.processing_time', timer.duration, tags={'config_ids': config_ids} ) metrics_counter( 'commcare.async_indicator.processed_total', len(indicator_doc_ids), tags={'config_ids': config_ids} )
def _build_async_indicators(indicator_doc_ids): def handle_exception(exception, config_id, doc, adapter): metric = None if isinstance(exception, (ProtocolError, ReadTimeout)): metric = 'commcare.async_indicator.riak_error' elif isinstance(exception, (ESError, ConnectionTimeout)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.es_error' elif isinstance(exception, (DatabaseError, InternalError)): # a database had an issue so log it and go on to the next document metric = 'commcare.async_indicator.psql_error' else: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, exception) if metric: datadog_counter(metric, 1, tags={'config_id': config_id, 'doc_id': doc['_id']}) def doc_ids_from_rows(rows): formatted_rows = [ {column.column.database_column_name.decode('utf-8'): column.value for column in row} for row in rows ] return set(row['doc_id'] for row in formatted_rows) # tracks processed/deleted configs to be removed from each indicator configs_to_remove_by_indicator_id = defaultdict(list) def _mark_config_to_remove(config_id, indicator_ids): for _id in indicator_ids: configs_to_remove_by_indicator_id[_id].append(config_id) timer = TimingContext() lock_keys = [ get_async_indicator_modify_lock_key(indicator_id) for indicator_id in indicator_doc_ids ] with CriticalSection(lock_keys): all_indicators = AsyncIndicator.objects.filter( doc_id__in=indicator_doc_ids ) if not all_indicators: return doc_store = get_document_store_for_doc_type( all_indicators[0].domain, all_indicators[0].doc_type, load_source="build_async_indicators", ) failed_indicators = set() rows_to_save_by_adapter = defaultdict(list) indicator_by_doc_id = {i.doc_id: i for i in all_indicators} config_ids = set() with timer: for doc in doc_store.iter_documents(list(indicator_by_doc_id.keys())): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: config_ids.add(config_id) try: config = _get_config_by_id(config_id) except (ResourceNotFound, StaticDataSourceConfigurationNotFoundError): celery_task_logger.info("{} no longer exists, skipping".format(config_id)) # remove because the config no longer exists _mark_config_to_remove(config_id, [indicator.pk]) continue except ESError: celery_task_logger.info("ES errored when trying to retrieve config") failed_indicators.add(indicator) continue adapter = None try: adapter = get_indicator_adapter(config, load_source='build_async_indicators') rows_to_save_by_adapter[adapter].extend(adapter.get_all_values(doc, eval_context)) eval_context.reset_iteration() except Exception as e: failed_indicators.add(indicator) handle_exception(e, config_id, doc, adapter) for adapter, rows in six.iteritems(rows_to_save_by_adapter): doc_ids = doc_ids_from_rows(rows) indicators = [indicator_by_doc_id[doc_id] for doc_id in doc_ids] try: adapter.save_rows(rows) except Exception as e: failed_indicators.union(indicators) message = six.text_type(e) notify_exception(None, "Exception bulk saving async indicators:{}".format(message)) else: # remove because it's sucessfully processed _mark_config_to_remove( config_id, [i.pk for i in indicators] ) # delete fully processed indicators processed_indicators = set(all_indicators) - failed_indicators AsyncIndicator.objects.filter(pk__in=[i.pk for i in processed_indicators]).delete() # update failure for failed indicators with transaction.atomic(): for indicator in failed_indicators: indicator.update_failure( configs_to_remove_by_indicator_id.get(indicator.pk, []) ) indicator.save() datadog_counter('commcare.async_indicator.processed_success', len(processed_indicators)) datadog_counter('commcare.async_indicator.processed_fail', len(failed_indicators)) datadog_histogram( 'commcare.async_indicator.processing_time', timer.duration / len(indicator_doc_ids), tags=[ 'config_ids:{}'.format(config_ids), ] )