def get_object_list(self, request): ''' ## when you upload a file, step one is getting data into source submission ## --> DocTransform <-- ## ## Step two is translating form source_submission into datapoints ## --> REfreshMaster <---- ## step three is aggregation ## agg refresh ## ''' try: doc_id = request.GET['document_id'] except KeyError: raise DatapointsException( message='Document_id is a required API param') # dt = DocTransform(request.user.id, doc_id) ran_complex_doc_transform = False try: dt = ComplexDocTransform(request.user.id, doc_id) dt.main() ran_complex_doc_transform = True except Exception as err: try: dt = DateDocTransform(request.user.id, doc_id) ssids = dt.process_file() except Exception as err: raise DatapointsException(message=err.message) mr = MasterRefresh(request.user.id, doc_id) mr.main() if ran_complex_doc_transform: doc_campaign_ids = set(list(DataPoint.objects\ .filter(source_submission__document_id = doc_id)\ .values_list('campaign_id',flat=True))) for c_id in doc_campaign_ids: ar = AggRefresh(c_id) # try/except block hack because tests fail otherwise try: with transaction.atomic(): ar.main() except TransactionManagementError as e: pass return Document.objects.filter(id=doc_id).values()
def get_object_list(self, request): ''' ## when you upload a file, step one is getting data into source submission ## --> DocTransform <-- ## ## Step two is translating form source_submission into datapoints ## --> REfreshMaster <---- ## step three is aggregation ## agg refresh ## ''' try: doc_id = request.GET['document_id'] except KeyError: raise DatapointsException(message='Document_id is a required API param') # dt = DocTransform(request.user.id, doc_id) ran_complex_doc_transform = False try: dt = ComplexDocTransform(request.user.id, doc_id) dt.main() ran_complex_doc_transform = True except Exception as err: try: dt = DateDocTransform(request.user.id, doc_id) ssids = dt.process_file() except Exception as err: raise DatapointsException(message=err.message) mr = MasterRefresh(request.user.id, doc_id) mr.main() if ran_complex_doc_transform: doc_campaign_ids = set(list(DataPoint.objects\ .filter(source_submission__document_id = doc_id)\ .values_list('campaign_id',flat=True))) for c_id in doc_campaign_ids: ar = AggRefresh(c_id) # try/except block hack because tests fail otherwise try: with transaction.atomic(): ar.main() except TransactionManagementError as e: pass return Document.objects.filter(id=doc_id).values()
def test_campaign_create(self): ###### ADD DATA TO CAMPAIGN ##### self.set_up() ss = SourceSubmission.objects.create(document_id=self.doc.id, submission_json='', row_number=0, data_date=self.d) dp_0 = DataPoint.objects.create( location_id = self.tpl.id,\ indicator_id = self.ind_0.id,\ value=2, data_date = self.d, source_submission_id = ss.id, cache_job_id=-1 ) dp_1 = DataPoint.objects.create(\ location_id = self.tpl.id,\ indicator_id = self.ind_1.id,\ value = 3,\ data_date = self.d, \ source_submission_id = ss.id,\ cache_job_id = -1 ) agr = AggRefresh(campaign_id=self.c.id) dp_ids = self.c.get_datapoints() self.assertEqual(len(dp_ids), 2) self.assertTrue(isinstance, (self.c, Campaign))
def process_source_sheet(source_sheet_df, sheet_name): user_id = -1 # file_loc = settings.MEDIA_ROOT + sheet_name saved_csv_file_location = settings.MEDIA_ROOT + sheet_name + '.csv' source_sheet_df.to_csv(saved_csv_file_location) doc_file_text = sheet_name + '.csv' new_doc = Document.objects.create( doc_title = doc_file_text, guid = 'test', docfile=doc_file_text ) create_doc_details(new_doc.id) ## document -> source_submissions ## dt = DocTransform(user_id, new_doc.id) dt.main() ## source_submissions -> datapoints ## mr = MasterRefresh(user_id, new_doc.id) mr.main() ## datapoints -> computed datapoints ## ar = AggRefresh()
def get_object_list(self, request): ''' If no campaign is provided, find one datapoitn that needs processing, then find the related campaign based on the To Do -- Make a method on the Datapoint model called.. get_campaign_for_datapoint so that this logic can be easily extended. This needs cleanup. cache_job_id = -1 --> NEEDS PROCESSING cache_job_id = -2 --> NEEDS CAMPAIGN ASSOCIATED ''' try: campaign_id = request.GET['campaign_id'] ar = AggRefresh(campaign_id) return Campaign.objects.filter(id=campaign_id).values() except KeyError: ar = AggRefresh() return Office.objects.all().values()
def get_object_list(self, request): try: doc_id = request.GET['document_id'] except KeyError: raise DatapointsException( message='Document_id is a required API param') # dt = DocTransform(request.user.id, doc_id) mr = MasterRefresh(request.user.id, doc_id) mr.main() doc_campaign_ids = set(list(DataPoint.objects\ .filter(source_submission__document_id = doc_id)\ .values_list('campaign_id',flat=True))) for c_id in doc_campaign_ids: ar = AggRefresh(c_id) return Document.objects.filter(id=doc_id).values()
def test_raw_data_to_computed(self): ''' This just makes sure that any data in the datapoint table, gets into the Calculated DataPoint table. That is, i insert a value for missed children in Borno, the same exact data should be in the datapoint_with_computed table no matter what. ''' self.set_up() self.create_raw_datapoints() indicator_id, data_date, raw_location_id,\ agg_location_id = 22,'2016-01-01',12910,12907 location_ids = Location.objects.filter(parent_location_id =\ agg_location_id).values_list('id',flat=True) dp_values = DataPoint.objects.filter(\ indicator_id = indicator_id, data_date = data_date, location_id__in = location_ids ).values_list('value',flat=True) sum_dp_value = sum(dp_values) ar = AggRefresh(self.campaign_id) ############################################################ ## ensure that raw data gets into datapoint_with_computed ## ############################################################ raw_value = DataPoint.objects.get(data_date = data_date, indicator_id = indicator_id, location_id = raw_location_id)\ .value raw_value_in_agg = DataPointComputed.objects.get(\ campaign_id = self.campaign_id, indicator_id = indicator_id, location_id = raw_location_id)\ .value self.assertEqual(raw_value, raw_value_in_agg)
def run_agg_refresh(apps, schema_editor): campaigns = Campaign.objects.all() campaigns = Campaign.objects.all() for campaign in campaigns: if DataPoint.objects.filter(campaign_id=campaign.id).exists(): agg = AggRefresh(campaign.id)
def do(self): cr = AggRefresh()
def _recursive_sum(self): ''' Consider the case in which we have "number of missed children" which is the sum of "missed children due to absence", "missed children due to refusal", and "missed children due to child absence." Now consider that "missed children due to refusal" is also generated from the sum of "refusal due to religious reasons", "refusal due to too many rounds", "refusal due to - unhappy with team " (see more here: http://rhizome.work/manage_system/manage/indicator/264). There are two levels here and this test aims to cover this use case. ''' self.set_up() data_date, location_id = '2016-01-01', 12910 parent_indicator = Indicator.objects.create( name='Number of Avoidable Deaths', short_name='Number of Avoidable Deaths', data_format='int') CampaignToIndicator.objects.create(indicator_id = parent_indicator.id,\ campaign_id = self.campaign_id) sub_indicator_1 = Indicator.objects.create( name='Number of Deaths due to Conflict', short_name='Number of Deaths due to Conflict', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_indicator_1.id,\ campaign_id = self.campaign_id) sub_sub_indicator_1 = Indicator.objects.create( name='Number Conflict Deaths - Children', short_name='Conflict Deaths - Children', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_sub_indicator_1.id,\ campaign_id = self.campaign_id) sub_sub_indicator_2 = Indicator.objects.create( name='Number of Adult Civilian Deaths', short_name='Number of Adult Civilian Deaths', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_sub_indicator_2.id,\ campaign_id = self.campaign_id) sub_sub_indicator_3 = Indicator.objects.create( name='Number of Conflict Deaths - Militants', short_name='Conflict Deaths - Militants', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_sub_indicator_3.id,\ campaign_id = self.campaign_id) sub_indicator_2 = Indicator.objects.create( name='Number of Deaths due to Malaria', short_name='Number of Deaths due to Malaria', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_indicator_2.id,\ campaign_id = self.campaign_id) sub_indicator_2_sub_1 = Indicator.objects.create( name='Number of Deaths due to Malaria -- Child had No Net', short_name='Number of Deaths due to Malaria -- no net', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_indicator_2_sub_1.id,\ campaign_id = self.campaign_id) sub_indicator_2_sub_2 = Indicator.objects.create( name='Number of Deaths due to Malaria -- Child had No Medicine', short_name='Number of Deaths due to Malaria -- no Medicie', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_indicator_2_sub_2.id,\ campaign_id = self.campaign_id) sub_indicator_3 = Indicator.objects.create( name='Number of Deaths due to Hunger', short_name='Number of Deaths due to Hunger', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_indicator_3.id,\ campaign_id = self.campaign_id) ## FOR SUM OF PARTS CALUCLATIONS ## indicator_calc_1 = CalculatedIndicatorComponent.objects.create( indicator_id=parent_indicator.id, indicator_component_id=sub_indicator_1.id, calculation='PART_TO_BE_SUMMED') indicator_calc_2 = CalculatedIndicatorComponent.objects.create( indicator_id=parent_indicator.id, indicator_component_id=sub_indicator_2.id, calculation='PART_TO_BE_SUMMED') indicator_calc_3 = CalculatedIndicatorComponent.objects.create( indicator_id=parent_indicator.id, indicator_component_id=sub_indicator_3.id, calculation='PART_TO_BE_SUMMED') ## 2nd layer of indicator calculation ## sub_indicator_calc_1 = CalculatedIndicatorComponent.objects.create( indicator_id=sub_indicator_1.id, indicator_component_id=sub_sub_indicator_1.id, calculation='PART_TO_BE_SUMMED') sub_indicator_calc_2 = CalculatedIndicatorComponent.objects.create( indicator_id=sub_indicator_1.id, indicator_component_id=sub_sub_indicator_2.id, calculation='PART_TO_BE_SUMMED') sub_indicator_calc_3 = CalculatedIndicatorComponent.objects.create( indicator_id=sub_indicator_1.id, indicator_component_id=sub_sub_indicator_3.id, calculation='PART_TO_BE_SUMMED') ## 2nd layer of indicator calculation ## sub_indicator_calc_1 = CalculatedIndicatorComponent.objects.create( indicator_id=sub_indicator_2.id, indicator_component_id=sub_indicator_2_sub_1.id, calculation='PART_TO_BE_SUMMED') sub_indicator_calc_2 = CalculatedIndicatorComponent.objects.create( indicator_id=sub_indicator_2.id, indicator_component_id=sub_indicator_2_sub_2.id, calculation='PART_TO_BE_SUMMED') ## create all the datapoints ## values_to_insert = { sub_indicator_2.id: 22, sub_indicator_3.id: 33, sub_sub_indicator_1.id: 33, sub_sub_indicator_2.id: 44, sub_sub_indicator_3.id: 55, sub_indicator_2_sub_1.id: 66, sub_indicator_2_sub_2.id: 77, } for k, v in values_to_insert.iteritems(): self.create_datapoint(location_id, data_date, k, v) ar = AggRefresh(self.campaign_id) parent_indicator_target_value = sum(values_to_insert.values()) parent_indicator_1_actual_value = DataPointComputed.objects.get( location_id=location_id, data_date=data_date, indicator_id=parent_indicator, ).value self.assertEqual(parent_indicator_1_actual_value,\ parent_indicator_target_value) ## test that a parent overrides the sum of its children when there ## are multiple levels of indicator calcuations ## sub_2_target_val = values_to_insert[sub_indicator_2.id] sub_2_actual_val = DataPointComputed.objects.get( location_id=location_id, data_date=data_date, indicator_id=sub_indicator_2.id, ).value self.assertEqual(sub_2_target_val, sub_2_actual_val)
def test_part_of_difference(self): ''' see here: rhizome.work/manage_system/manage/indicator/187 We use this calculation to perform the following calculation: WHOLE_OF_DIFFERENCE(x) - PART_OF_DIFFERENCE(y) ----------------------------------------- WHOLE_OF_DIFFERENCE(x) ''' self.set_up() data_date, location_id, agg_location_id = '2016-01-01', 12910, 12907 x, y = 303.00, 808.00 ## create the parent and sub indicators ## parent_indicator = Indicator.objects.create( name='Refsual Conversion', short_name='Refsual Conversion', data_format='pct') CampaignToIndicator.objects.create(indicator_id = parent_indicator.id,\ campaign_id = self.campaign_id) sub_indicator_part = Indicator.objects.create( name='Refusals After Revisit', short_name='Refusals After Revisit', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_indicator_part.id,\ campaign_id = self.campaign_id) sub_indicator_denom = Indicator.objects.create( name='Refusals Before Revisit', short_name='Refusals Before Revisit', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_indicator_denom.id,\ campaign_id = self.campaign_id) ## FOR SUM OF PARTS CALUCLATIONS ## indicator_calc_1 = CalculatedIndicatorComponent.objects.create( indicator_id=parent_indicator.id, indicator_component_id=sub_indicator_part.id, calculation='PART_OF_DIFFERENCE') indicator_calc_3 = CalculatedIndicatorComponent.objects.create( indicator_id=parent_indicator.id, indicator_component_id=sub_indicator_denom.id, calculation='WHOLE_OF_DIFFERENCE') ## create the datapoints ## dp_1 = DataPoint.objects.create( indicator_id=sub_indicator_denom.id, data_date=data_date, location_id=location_id, value=x, source_submission_id=1, cache_job_id=-1, ) dp_2 = DataPoint.objects.create( indicator_id=sub_indicator_part.id, data_date=data_date, location_id=location_id, value=y, source_submission_id=1, cache_job_id=-1, ) cr = AggRefresh(self.campaign_id) calc_value = DataPointComputed.objects.get( indicator_id=parent_indicator.id, campaign_id=self.campaign_id, location_id=location_id).value ## test SUM calculation target_value = (x - y) / x self.assertEqual(round(calc_value, 4), round(target_value, 4))
def test_sum_and_pct(self): ''' The system uses the "PART_TO_BE_SUMMED" edge type in order to create indicators such that the sum of: - Number Missed - Missed due to other reasons(24) - Child Absent(251) - Not in Plan (267) - Not Visited (268) - Non Compliance(264) gives us: All Missed Children (21) as well as: pct missed children due to refusal (166) Here we create new metadata so we can test this functionality for an Abstracted use case and test that 1. We can SUM indicators 2. We can use the result of #2 as the denominator for a percentage calculation. ''' self.set_up() data_date, location_id, agg_location_id = '2016-01-01', 12910, 12907 val_1, val_2, val_3 = 303, 808, 909 ## create the parent and sub indicators ## parent_indicator = Indicator.objects.create( name='Number of Avoidable Deaths', short_name='Number of Avoidable Deaths', data_format='int') CampaignToIndicator.objects.create(indicator_id = parent_indicator.id,\ campaign_id = self.campaign_id) sub_indicator_1 = Indicator.objects.create( name='Number of Deaths due to Conflict', short_name='Number of Deaths due to Conflict', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_indicator_1.id,\ campaign_id = self.campaign_id) sub_indicator_2 = Indicator.objects.create( name='Number of Deaths due to Malaria', short_name='Number of Deaths due to Malaria', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_indicator_2.id,\ campaign_id = self.campaign_id) sub_indicator_3 = Indicator.objects.create( name='Number of Deaths due to Hunger', short_name='Number of Deaths due to Hunger', data_format='int') CampaignToIndicator.objects.create(indicator_id = sub_indicator_3.id,\ campaign_id = self.campaign_id) pct_indicator = Indicator.objects.create( name='pct of Deaths due to Hunger', short_name='pct of Deaths due to Hunger', data_format='pct') CampaignToIndicator.objects.create(indicator_id = pct_indicator.id,\ campaign_id = self.campaign_id) ## FOR SUM OF PARTS CALUCLATIONS ## indicator_calc_1 = CalculatedIndicatorComponent.objects.create( indicator_id=parent_indicator.id, indicator_component_id=sub_indicator_1.id, calculation='PART_TO_BE_SUMMED') indicator_calc_2 = CalculatedIndicatorComponent.objects.create( indicator_id=parent_indicator.id, indicator_component_id=sub_indicator_2.id, calculation='PART_TO_BE_SUMMED') indicator_calc_3 = CalculatedIndicatorComponent.objects.create( indicator_id=parent_indicator.id, indicator_component_id=sub_indicator_3.id, calculation='PART_TO_BE_SUMMED') ## FOR PART OVER WHOLE CALCULATIONS ## indicator_calc_numerator = CalculatedIndicatorComponent.objects.create( indicator_id=pct_indicator.id, indicator_component_id=sub_indicator_3.id, calculation='NUMERATOR') indicator_calc_denominator = CalculatedIndicatorComponent.objects.create( indicator_id=pct_indicator.id, indicator_component_id=parent_indicator.id, calculation='DENOMINATOR') ## create the datapoints ## dp_1 = DataPoint.objects.create( indicator_id=sub_indicator_1.id, data_date=data_date, location_id=location_id, value=val_1, source_submission_id=1, cache_job_id=-1, ) dp_2 = DataPoint.objects.create( indicator_id=sub_indicator_2.id, data_date=data_date, location_id=location_id, value=val_2, source_submission_id=1, cache_job_id=-1, ) dp_3 = DataPoint.objects.create( indicator_id=sub_indicator_3.id, data_date=data_date, location_id=location_id, value=val_3, source_submission_id=1, cache_job_id=-1, ) cr = AggRefresh(self.campaign_id) calc_value_sum = DataPointComputed.objects.get( indicator_id=parent_indicator.id, campaign_id=self.campaign_id, location_id=location_id).value calc_value_pct = DataPointComputed.objects.get( indicator_id=pct_indicator.id, campaign_id=self.campaign_id, location_id=location_id).value ## test SUM calculation sum_target_value = val_1 + val_2 + val_3 self.assertEqual(calc_value_sum, sum_target_value) ## test part over whole calction pct_target_value = val_3 / float(sum_target_value) self.assertEqual(calc_value_pct, pct_target_value)
def test_location_aggregation(self): ''' Using the calc_data.csv, create a test_df and target_df. Ensure that the aggregation and calcuation are working properly, but ingesting the stored data, running the cache, and checking that the calculated data for the aggregate location (parent location, in this case Nigeria) is as expected. In addition to the datapoints in the test file, i insert a null valu to ensure that any null won't corrpupt the calculation. python manage.py test rhizome.tests.test_agg.AggRefreshTestCase.test_location_aggregation --settings=rhizome.settings.test ''' self.set_up() self.create_raw_datapoints() indicator_id, data_date, raw_location_id,\ agg_location_id, null_location_id,NaN_location_id = \ 22,'2016-01-01',12910,12907,12928,12913 location_ids = Location.objects.filter(parent_location_id =\ agg_location_id).values_list('id',flat=True) DataPoint.objects.filter( indicator_id=indicator_id, # data_date = data_date, location_id=null_location_id).update(value=None) DataPoint.objects.filter( indicator_id=indicator_id, # data_date = data_date, location_id=NaN_location_id).update(value='NaN') dps = DataPoint.objects.filter(\ indicator_id = indicator_id, # data_date = data_date, location_id__in = location_ids, value__isnull = False ).values_list('id','value') sum_dp_value = sum([y for x, y in dps if not isnan(y)]) agg_r = AggRefresh(self.campaign_id) ################################################# ## ensure that raw data gets into AggDataPoint ## ################################################# raw_value = DataPoint.objects.get( # data_date = data_date, indicator_id = indicator_id, location_id = raw_location_id)\ .value ind_obj = Indicator.objects.get(id=indicator_id) raw_value_in_agg = AggDataPoint.objects.get( # data_date = data_date, indicator_id = indicator_id, location_id = raw_location_id)\ .value self.assertEqual(raw_value, raw_value_in_agg) ############################################# ## ensure that the aggregated data gets in ## ############################################# agg_value = AggDataPoint.objects.get(indicator_id=indicator_id, campaign_id=self.campaign_id, location_id=agg_location_id).value self.assertEqual(agg_value, sum_dp_value) ###################################################### ## ensure that any raw data will override aggregate ## ###################################################### override_value = 909090 agg_override_dp = self.create_datapoint(agg_location_id,data_date,\ indicator_id, override_value) ar = AggRefresh(self.campaign_id) override_value_in_agg = AggDataPoint.objects.get(\ campaign_id = self.campaign_id ,\ indicator_id = indicator_id,\ location_id = agg_location_id).value self.assertEqual(override_value, override_value_in_agg) ########################################### ## ensure that percentages do not aggregate ########################################### pct_ind = Indicator.objects.create( name='pct missed', short_name='pct_missed', description='missed pct', data_format='pct', source_name='my brain', ) dp_1 = DataPoint.objects.create(indicator_id=pct_ind.id, location_id=location_ids[0], data_date=data_date, value=.2, source_submission_id=1) dp_2 = DataPoint.objects.create(indicator_id=pct_ind.id, location_id=location_ids[1], data_date=data_date, value=.6, source_submission_id=1) ar = AggRefresh(self.campaign_id) try: agg_dp_qs = AggDataPoint.objects.get( location_id=agg_location_id, indicator_id=pct_ind, campaign_id=self.campaign_id, ) error_ocurred = False except AggDataPoint.DoesNotExist: error_ocurred = True self.assertTrue(error_ocurred)