def test_column_uniqueness_when_truncated(self): problem_spec = { "display_name": "practicing_lessons", "property_name": "long_column", "choices": [ "duplicate_choice_1", "duplicate_choice_2", ], "select_style": "multiple", "column_id": "a_very_long_base_selection_column_name_with_limited_room", "type": "choice_list", } data_source_config = DataSourceConfiguration( domain='test', display_name='foo', referenced_doc_type='CommCareCase', table_id=uuid.uuid4().hex, configured_filter={}, configured_indicators=[problem_spec], ) adapter = IndicatorSqlAdapter(data_source_config) adapter.rebuild_table() # ensure we can save data to the table. adapter.save({ '_id': uuid.uuid4().hex, 'domain': 'test', 'doc_type': 'CommCareCase', 'long_column': 'duplicate_choice_1', }) # and query it back q = adapter.get_query_object() self.assertEqual(1, q.count())
def test_table_population(self): adapter = IndicatorSqlAdapter(self.config) # Delete and create table adapter.rebuild_table() # Create a doc now = datetime.datetime.now() one_hour = datetime.timedelta(hours=1) logs = [ {"start_time": now, "end_time": now + one_hour, "person": "al"}, {"start_time": now + one_hour, "end_time": now + (one_hour * 2), "person": "chris"}, {"start_time": now + (one_hour * 2), "end_time": now + (one_hour * 3), "person": "katie"}, ] doc = _test_doc(form={"time_logs": logs}) # Save this document into the table adapter.save(doc) # Get rows from the table rows = adapter.get_query_object() retrieved_logs = [{"start_time": r.start_time, "end_time": r.end_time, "person": r.person} for r in rows] # Check those rows against the expected result self.assertItemsEqual( retrieved_logs, logs, "The repeat data saved in the data source table did not match the expected data!" )
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith(CustomDataSourceConfiguration._datasource_id_prefix) if is_static: config = CustomDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e)) if not is_static: config.meta.build.finished = True config.save()
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith( CustomDataSourceConfiguration._datasource_id_prefix) if is_static: config = CustomDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format( doc['_id'], e)) if not is_static: config.meta.build.finished = True config.save()
def rebuild_indicators(indicator_config_id): config = _get_config_by_id(indicator_config_id) adapter = IndicatorSqlAdapter(config) couchdb = _get_db(config.referenced_doc_type) redis_client = get_redis_client().client.get_client() redis_key = _get_redis_key_for_config(config) if not is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() redis_key = _get_redis_key_for_config(config) adapter.rebuild_table() relevant_ids_chunk = [] for relevant_id in iterate_doc_ids_in_domain_by_type( config.domain, config.referenced_doc_type, chunk_size=CHUNK_SIZE, database=couchdb): relevant_ids_chunk.append(relevant_id) if len(relevant_ids_chunk) >= CHUNK_SIZE: redis_client.sadd(redis_key, *relevant_ids_chunk) _build_indicators(indicator_config_id, relevant_ids_chunk) relevant_ids_chunk = [] if relevant_ids_chunk: redis_client.sadd(redis_key, *relevant_ids_chunk) _build_indicators(indicator_config_id, relevant_ids_chunk)
def rebuild_indicators(indicator_config_id): config = _get_config_by_id(indicator_config_id) adapter = IndicatorSqlAdapter(config) couchdb = _get_db(config.referenced_doc_type) redis_client = get_redis_client().client.get_client() redis_key = _get_redis_key_for_config(config) if not is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() redis_key = _get_redis_key_for_config(config) adapter.rebuild_table() relevant_ids = get_doc_ids_in_domain_by_type( config.domain, config.referenced_doc_type, database=couchdb, ) for docs in chunked(relevant_ids, 1000): redis_client.sadd(redis_key, *docs) _build_indicators(indicator_config_id, relevant_ids)
def rebuild_aggregate_ucr(request, domain, table_id): table_definition = get_object_or_404( AggregateTableDefinition, domain=domain, table_id=table_id ) aggregate_table_adapter = IndicatorSqlAdapter(table_definition) aggregate_table_adapter.rebuild_table() populate_aggregate_table_data_task.delay(table_definition.id) messages.success(request, 'Table rebuild successfully started.') return HttpResponseRedirect(reverse(AggregateUCRView.urlname, args=[domain, table_id]))
def test_weekly_aggregation(self): # generate our table aggregate_table_adapter = IndicatorSqlAdapter(self.weekly_aggregate_table_definition) aggregate_table_adapter.rebuild_table() populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results() # confirm it's also idempotent populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results()
def test_weekly_aggregation(self): # generate our table aggregate_table_adapter = IndicatorSqlAdapter( self.weekly_aggregate_table_definition) aggregate_table_adapter.rebuild_table() populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results() # confirm it's also idempotent populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results()
def rebuild_indicators(indicator_config_id): config = _get_config_by_id(indicator_config_id) adapter = IndicatorSqlAdapter(config) if not is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() adapter.rebuild_table() _iteratively_build_table(config)
def rebuild_indicators(indicator_config_id): config = DataSourceConfiguration.get(indicator_config_id) adapter = IndicatorSqlAdapter(get_engine(), config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): if config.filter.filter(doc): try: adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e))
def rebuild_indicators(indicator_config_id, initiated_by=None): config = _get_config_by_id(indicator_config_id) success = _('Your UCR table {} has finished rebuilding').format(config.table_id) failure = _('There was an error rebuilding Your UCR table {}.').format(config.table_id) send = toggles.SEND_UCR_REBUILD_INFO.enabled(initiated_by) with notify_someone(initiated_by, success_message=success, error_message=failure, send=send): adapter = IndicatorSqlAdapter(config) if not id_is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() adapter.rebuild_table() _iteratively_build_table(config)
class IndicatorPillowTest(TestCase): def setUp(self): folder = os.path.join(os.path.dirname(__file__), 'data', 'configs') sample_file = os.path.join(folder, 'sample_indicator_config.json') self.pillow = ConfigurableIndicatorPillow() self.engine = self.pillow.get_sql_engine() with open(sample_file) as f: structure = json.loads(f.read()) self.config = DataSourceConfiguration.wrap(structure) self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.engine, self.config) self.adapter.rebuild_table() def tearDown(self): self.adapter.drop_table() self.engine.dispose() def testFilter(self): # note: this is a silly test now that python_filter always returns true not_matching = [ dict(doc_type="NotCommCareCase", domain='user-reports', type='ticket'), dict(doc_type="CommCareCase", domain='not-user-reports', type='ticket'), dict(doc_type="CommCareCase", domain='user-reports', type='not-ticket'), ] for document in not_matching: self.assertTrue(self.pillow.python_filter(document)) self.assertTrue(self.pillow.python_filter( dict(doc_type="CommCareCase", domain='user-reports', type='ticket') )) def testChangeTransport(self): # indicators sample_doc, expected_indicators = get_sample_doc_and_indicators() self.pillow.change_transport(sample_doc) with self.engine.begin() as connection: rows = connection.execute(sqlalchemy.select([self.adapter.get_table()])) self.assertEqual(1, rows.rowcount) row = rows.fetchone() for k, v in row.items(): self.assertEqual(expected_indicators[k], v)
def test_table_population(self): adapter = IndicatorSqlAdapter(self.config) # Delete and create table adapter.rebuild_table() # Create a doc now = datetime.datetime.now() one_hour = datetime.timedelta(hours=1) logs = [ { "start_time": now, "end_time": now + one_hour, "person": "al" }, { "start_time": now + one_hour, "end_time": now + (one_hour * 2), "person": "chris" }, { "start_time": now + (one_hour * 2), "end_time": now + (one_hour * 3), "person": "katie" }, ] doc = _test_doc(form={'time_logs': logs}) # Save this document into the table adapter.save(doc) # Get rows from the table rows = adapter.get_query_object() retrieved_logs = [{ 'start_time': r.start_time, 'end_time': r.end_time, 'person': r.person, } for r in rows] # Check those rows against the expected result self.assertItemsEqual( retrieved_logs, logs, "The repeat data saved in the data source table did not match the expected data!" )
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith(CustomDataSourceConfiguration._datasource_id_prefix) if is_static: config = CustomDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) adapter = IndicatorSqlAdapter(get_engine(), config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e)) adapter.engine.dispose()
def test_table_population(self): engine = get_engine() adapter = IndicatorSqlAdapter(engine, self.config) # Delete and create table adapter.rebuild_table() # Create a doc now = datetime.datetime.now() one_hour = datetime.timedelta(hours=1) logs = [ {"start_time": now, "end_time": now + one_hour, "person": "al"}, {"start_time": now + one_hour, "end_time": now + (one_hour * 2), "person": "chris"}, {"start_time": now + (one_hour * 2), "end_time": now + (one_hour * 3), "person": "katie"}, ] doc = _test_doc(form={'time_logs': logs}) # Save this document into the table adapter.save(doc) # Get rows from the table with engine.connect() as connection: rows = connection.execute(adapter.get_table().select()) retrieved_logs = [ { 'start_time': r[3], 'end_time': r[4], 'person': r[5], } for r in rows ] # Clean up engine.dispose() # Check those rows against the expected result self.assertItemsEqual( retrieved_logs, logs, "The repeat data saved in the data source table did not match the expected data!" )
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith(StaticDataSourceConfiguration._datasource_id_prefix) if is_static: config = StaticDataSourceConfiguration.by_id(indicator_config_id) rev = 'static' else: config = DataSourceConfiguration.get(indicator_config_id) rev = config._rev # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) couchdb = _get_db(config.referenced_doc_type) client = get_redis_client().client.get_client() redis_key = 'ucr_queue-{}:{}'.format(indicator_config_id, rev) if len(client.smembers(redis_key)) > 0: relevant_ids = client.smembers(redis_key) else: adapter.rebuild_table() relevant_ids = get_doc_ids_in_domain_by_type(config.domain, config.referenced_doc_type, database=couchdb) if relevant_ids: client.sadd(redis_key, *relevant_ids) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) client.srem(redis_key, doc.get('_id')) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e)) if not is_static: client.delete(redis_key) config.meta.build.finished = True config.save()
class IndicatorPillowTest(TestCase): def setUp(self): self.config = get_sample_data_source() self.pillow = ConfigurableIndicatorPillow() self.engine = self.pillow.get_sql_engine() self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.engine, self.config) self.adapter.rebuild_table() def tearDown(self): self.adapter.drop_table() self.engine.dispose() def test_filter(self): # note: this is a silly test now that python_filter always returns true not_matching = [ dict(doc_type="NotCommCareCase", domain='user-reports', type='ticket'), dict(doc_type="CommCareCase", domain='not-user-reports', type='ticket'), dict(doc_type="CommCareCase", domain='user-reports', type='not-ticket'), ] for document in not_matching: self.assertTrue(self.pillow.python_filter(document)) self.assertTrue(self.pillow.python_filter( dict(doc_type="CommCareCase", domain='user-reports', type='ticket') )) def test_change_transport(self): sample_doc, _ = get_sample_doc_and_indicators() self.pillow.change_transport(sample_doc) self._check_sample_doc_state() def test_rebuild_indicators(self): self.config.save() sample_doc, _ = get_sample_doc_and_indicators() CommCareCase.get_db().save_doc(sample_doc) rebuild_indicators(self.config._id) self._check_sample_doc_state() def test_bad_integer_datatype(self): self.config.save() bad_ints = ['a', '', None] for bad_value in bad_ints: self.pillow.change_transport({ '_id': uuid.uuid4().hex, 'doc_type': 'CommCareCase', 'domain': 'user-reports', 'type': 'ticket', 'priority': bad_value }) # make sure we saved rows to the table for everything with self.engine.begin() as connection: rows = connection.execute(sqlalchemy.select([self.adapter.get_table()])) self.assertEqual(len(bad_ints), rows.rowcount) def _check_sample_doc_state(self): _, expected_indicators = get_sample_doc_and_indicators() with self.engine.begin() as connection: rows = connection.execute(sqlalchemy.select([self.adapter.get_table()])) self.assertEqual(1, rows.rowcount) row = rows.fetchone() for k, v in row.items(): if isinstance(expected_indicators[k], decimal.Decimal): self.assertAlmostEqual(expected_indicators[k], v) else: self.assertEqual(expected_indicators[k], v)