def test_promote_properties(self): """Good case for testing our matching system.""" tasks.save_raw_data(self.import_file.id) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) cycle2, _ = Cycle.objects.get_or_create( name=u'Hack Cycle 2016', organization=self.org, start=datetime.datetime(2016, 1, 1), end=datetime.datetime(2016, 12, 31), ) # make sure that the new data was loaded correctly ps = PropertyState.objects.filter(address_line_1='1181 Douglas Street')[0] self.assertEqual(ps.site_eui, 439.9) self.assertEqual(ps.extra_data['CoStar Property ID'], '1575599') # Promote the PropertyState to a PropertyView pv1 = ps.promote(self.cycle) pv2 = ps.promote(self.cycle) # should just return the same object self.assertEqual(pv1, pv2) # promote the same state for a new cycle, same data pv3 = ps.promote(cycle2) self.assertNotEqual(pv3, pv1) props = PropertyView.objects.all() self.assertEqual(len(props), 2)
def test_get_column_mappings(self): """We produce appropriate data structure for mapping""" raw_data = [ { "from_field": "raw_data_0", "to_field": "destination_0", "to_table_name": "PropertyState" }, { "from_field": "raw_data_1", "to_field": "destination_1", "to_table_name": "PropertyState" }, { "from_field": "raw_data_2", "to_field": "destination_0", "to_table_name": "TaxLotState" }, ] Column.create_mappings(raw_data, self.fake_org, self.fake_user) expected = { u'raw_data_0': (u'PropertyState', u'destination_0'), u'raw_data_1': (u'PropertyState', u'destination_1'), u'raw_data_2': (u'TaxLotState', u'destination_0'), } test_mapping, no_concat = ColumnMapping.get_column_mappings(self.fake_org) self.assertDictEqual(test_mapping, expected) self.assertEqual(no_concat, [])
def test_map_data(self): """Save mappings based on user specifications.""" # Create new import file to test import_record = ImportRecord.objects.create( owner=self.user, last_modified_by=self.user, super_organization=self.org ) import_file = ImportFile.objects.create( import_record=import_record, source_type=ASSESSED_RAW, ) import_file.raw_save_done = True import_file.save() fake_raw_bs = PropertyState.objects.create( organization=self.org, import_file=import_file, extra_data=self.fake_row, source_type=ASSESSED_RAW, data_state=DATA_STATE_IMPORT, ) # tasks._save_raw_data(import_file.pk, 'fake_cache_key', 1) self.fake_mappings = FAKE_MAPPINGS['fake_row'] Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(import_file.pk) mapped_bs = list(PropertyState.objects.filter( import_file=import_file, source_type=ASSESSED_BS, )) self.assertEqual(len(mapped_bs), 1) test_bs = mapped_bs[0] self.assertNotEqual(test_bs.pk, fake_raw_bs.pk) self.assertEqual(test_bs.property_name, self.fake_row['Name']) self.assertEqual(test_bs.address_line_1, self.fake_row['Address Line 1']) self.assertEqual( test_bs.year_built, parser.parse(self.fake_row['Year Built']).year ) # Make sure that we saved the extra_data column mappings data_columns = Column.objects.filter( organization=self.org, is_extra_data=True ) # There's only one piece of data that didn't cleanly map. # Note that as of 09/15/2016 - extra data still needs to be defined in the mappings, it # will no longer magically appear in the extra_data field if the user did not specify to # map it! self.assertListEqual( sorted([d.column_name for d in data_columns]), ['Double Tester'] )
def test_import_file(self): tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) ps = PropertyState.objects.filter(pm_property_id='2264').first() ps.promote(self.cycle) # should only be 11 unmatched_properties because one was promoted. ps = self.import_file.find_unmatched_property_states() self.assertEqual(len(ps), 13)
def test_multiple_id_matches(self): tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.pk) # verify that there are no properties listed as canonical property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 0) # promote two properties ps = PropertyState.objects.filter(custom_id_1='13') ps_test = ps.first() ps_test_2 = ps.last() for p in ps: p.promote(self.cycle) # from seed.utils.generic import pp # pp(p) property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 2) # no arguments passed should return no results matches = tasks.query_property_matches(property_states, None, None) self.assertEqual(len(matches), 0) # should return 2 properties matches = tasks.query_property_matches(property_states, None, '13') self.assertEqual(len(matches), 2) self.assertEqual(matches[0], ps_test) self.assertEqual(matches[1], ps_test_2) # should return only the second property matches = tasks.query_property_matches(property_states, '2342', None) self.assertEqual(len(matches), 1) self.assertEqual(matches[0], ps_test_2) # should return both properties, the first one should be the pm match, i.e. the first prop matches = tasks.query_property_matches(property_states, '481516', '13') self.assertEqual(len(matches), 2) self.assertEqual(matches[0], ps_test) self.assertEqual(matches[1], ps_test_2) # if passing in the second pm then it will not be the first matches = tasks.query_property_matches(property_states, '2342', '13') self.assertEqual(len(matches), 2) self.assertEqual(matches[1], ps_test_2) # pass the pm id into the custom id. it should still return the correct buildings. # not sure that this is the right behavior, but this is what it does, so just testing. matches = tasks.query_property_matches(property_states, None, '2342') self.assertEqual(len(matches), 1) self.assertEqual(matches[0], ps_test_2) matches = tasks.query_property_matches(property_states, '13', None) self.assertEqual(len(matches), 2) self.assertEqual(matches[0], ps_test) self.assertEqual(matches[1], ps_test_2)
def setUp(self): filename = getattr(self, 'filename', 'example-data-properties.xlsx') import_file_source_type = ASSESSED_RAW self.fake_mappings = FAKE_MAPPINGS['portfolio'] self.fake_extra_data = FAKE_EXTRA_DATA self.fake_row = FAKE_ROW selfvars = self.set_up(import_file_source_type) self.user, self.org, self.import_file, self.import_record, self.cycle = selfvars self.import_file = self.load_import_file_file(filename, self.import_file) tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk)
def test_match_buildings(self): """ case A (one property <-> one tax lot) """ tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.pk) # Check to make sure all the properties imported ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file, ) self.assertEqual(len(ps), 14) # Check to make sure the taxlots were imported ts = TaxLotState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file, ) self.assertEqual(len(ts), 18) # Check a single case of the taxlotstate ts = TaxLotState.objects.filter(jurisdiction_tax_lot_id='1552813').first() self.assertEqual(ts.jurisdiction_tax_lot_id, '1552813') self.assertEqual(ts.address_line_1, None) self.assertEqual(ts.extra_data["data_008"], 1) # Check a single case of the propertystate ps = PropertyState.objects.filter(pm_property_id='2264') self.assertEqual(len(ps), 1) ps = ps.first() self.assertEqual(ps.pm_property_id, '2264') self.assertEqual(ps.address_line_1, '50 Willow Ave SE') self.assertEqual('data_007' in ps.extra_data.keys(), True) self.assertEqual('data_008' in ps.extra_data.keys(), False) self.assertEqual(ps.extra_data["data_007"], 'a') # verify that the lot_number has the tax_lot information. For this case it is one-to-one self.assertEqual(ps.lot_number, ts.jurisdiction_tax_lot_id) tasks.match_buildings(self.import_file.id) self.assertEqual(TaxLot.objects.count(), 10) qry = PropertyView.objects.filter(state__custom_id_1='7') self.assertEqual(qry.count(), 1) state = qry.first().state self.assertEqual(state.address_line_1, "12 Ninth Street") self.assertEqual(state.property_name, "Grange Hall")
def test_match_buildings(self): """ case A (one property <-> one tax lot) """ tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # Check to make sure all the properties imported ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file, ) self.assertEqual(len(ps), 14) # Check to make sure the taxlots were imported ts = TaxLotState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file, ) # self.assertEqual(len(ts), 10) # 10 unique taxlots after duplicates and delimeters # Check a single case of the taxlotstate ts = TaxLotState.objects.filter(jurisdiction_tax_lot_id='1552813').first() self.assertEqual(ts.jurisdiction_tax_lot_id, '1552813') self.assertEqual(ts.address_line_1, None) self.assertEqual(ts.extra_data["data_008"], 1) # Check a single case of the propertystate ps = PropertyState.objects.filter(pm_property_id='2264') self.assertEqual(len(ps), 1) ps = ps.first() self.assertEqual(ps.pm_property_id, '2264') self.assertEqual(ps.address_line_1, '50 Willow Ave SE') self.assertEqual('data_007' in ps.extra_data.keys(), True) self.assertEqual('data_008' in ps.extra_data.keys(), False) self.assertEqual(ps.extra_data["data_007"], 'a') # verify that the lot_number has the tax_lot information. For this case it is one-to-one self.assertEqual(ps.lot_number, ts.jurisdiction_tax_lot_id) tasks.match_buildings(self.import_file.id, self.user.id) self.assertEqual(TaxLot.objects.count(), 10) qry = PropertyView.objects.filter(state__custom_id_1='7') self.assertEqual(qry.count(), 1) state = qry.first().state self.assertEqual(state.address_line_1, "12 Ninth Street") self.assertEqual(state.property_name, "Grange Hall")
def test_multiple_id_matches(self): tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # verify that there are no properties listed as canonical property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 0) # promote two properties ps = PropertyState.objects.filter(custom_id_1='13') ps_test = ps.first() ps_test_2 = ps.last() for p in ps: p.promote(self.cycle) # from seed.utils.generic import pp # pp(p) property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 2) # no arguments passed should return no results matches = tasks.query_property_matches(property_states, None, None) self.assertEqual(len(matches), 0) # should return 2 properties matches = tasks.query_property_matches(property_states, None, '13') self.assertEqual(len(matches), 2) self.assertEqual(matches[0], ps_test) self.assertEqual(matches[1], ps_test_2) # should return only the second property matches = tasks.query_property_matches(property_states, '2342', None) self.assertEqual(len(matches), 1) self.assertEqual(matches[0], ps_test_2) # should return both properties, the first one should be the pm match, i.e. the first prop matches = tasks.query_property_matches(property_states, '481516', '13') self.assertEqual(len(matches), 2) self.assertEqual(matches[0], ps_test) self.assertEqual(matches[1], ps_test_2) # if passing in the second pm then it will not be the first matches = tasks.query_property_matches(property_states, '2342', '13') self.assertEqual(len(matches), 2) self.assertEqual(matches[1], ps_test_2) # pass the pm id into the custom id. it should still return the correct buildings. # not sure that this is the right behavior, but this is what it does, so just testing. matches = tasks.query_property_matches(property_states, None, '2342') self.assertEqual(len(matches), 1) self.assertEqual(matches[0], ps_test_2) matches = tasks.query_property_matches(property_states, '13', None) self.assertEqual(len(matches), 2) self.assertEqual(matches[0], ps_test) self.assertEqual(matches[1], ps_test_2)
def test_mapping(self): """Test objects in database can be converted to mapped fields""" # for mapping, you have to create an import file, even it is just one record. This is # more of an ID to track imports state = self.property_state_factory.get_property_state_as_extra_data( import_file_id=self.import_file.id, source_type=ASSESSED_RAW, data_state=DATA_STATE_IMPORT, random_extra=42) # set import_file save done to true self.import_file.raw_save_done = True self.import_file.save() # Create mappings from the new states # TODO #239: Convert this to a single helper method to suggest and save suggested_mappings = mapper.build_column_mapping( list(state.extra_data.keys()), Column.retrieve_all_by_tuple(self.org), previous_mapping=get_column_mapping, map_args=[self.org], thresh=80) # Convert mapping suggests to the format needed for saving mappings = [] for raw_column, suggestion in suggested_mappings.items(): # Single suggestion looks like:'lot_number': ['PropertyState', 'lot_number', 100] mapping = { "from_field": raw_column, "from_units": None, "to_table_name": suggestion[0], "to_field": suggestion[1], "to_field_display_name": suggestion[1], } mappings.append(mapping) # Now save the mappings # print(mappings) Column.create_mappings(mappings, self.org, self.user, self.import_file.id) # END TODO tasks.map_data(self.import_file.id) props = self.import_file.find_unmatched_property_states() self.assertEqual(len(props), 1) self.assertEqual(state.extra_data['year_built'], props.first().year_built) self.assertEqual(state.extra_data['random_extra'], props.first().extra_data['random_extra'])
def test_mapping_no_properties(self): # update the mappings to not include any taxlot tables in the data for m in self.fake_mappings: if m["to_table_name"] == 'PropertyState': m["to_table_name"] = 'TaxLotState' tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # make sure that no taxlot objects were created. the 12 here are the import extra_data. ps = PropertyState.objects.all() self.assertEqual(len(ps), 14) # make sure that the new data was loaded correctly ts = TaxLotState.objects.filter(address_line_1='50 Willow Ave SE').first() self.assertEqual(ts.extra_data['site_eui'], 125)
def test_postal_code_property(self): new_mappings = copy.deepcopy(self.fake_mappings['portfolio']) tasks.save_raw_data(self.import_file.pk) Column.create_mappings(new_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.pk) # get mapped property postal_code ps = PropertyState.objects.filter(address_line_1='11 Ninth Street')[0] self.assertEqual(ps.postal_code, '00340') ps = PropertyState.objects.filter(address_line_1='20 Tenth Street')[0] self.assertEqual(ps.postal_code, '00000') ps = PropertyState.objects.filter(address_line_1='93029 Wellington Blvd')[0] self.assertEqual(ps.postal_code, '00001-0002')
def setUp(self): filename = getattr(self, 'filename', 'example-data-properties.xlsx') import_file_source_type = ASSESSED_RAW self.fake_mappings = FAKE_MAPPINGS['portfolio'] self.fake_extra_data = FAKE_EXTRA_DATA self.fake_row = FAKE_ROW selfvars = self.set_up(import_file_source_type) self.user, self.org, self.import_file, self.import_record, self.cycle = selfvars filepath = osp.join(osp.dirname(__file__), 'data', filename) self.import_file.file = SimpleUploadedFile(name=filename, content=open( filepath, 'rb').read()) self.import_file.save() tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.pk)
def test_mapping_tax_lots_only(self): # update the mappings to not include any taxlot tables in the data new_mappings = copy.deepcopy(self.fake_mappings) for m in new_mappings: if m["to_table_name"] == 'PropertyState': m["to_table_name"] = 'TaxLotState' tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(new_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.pk) # make sure that no taxlot objects were created. the 12 here are the import extra_data. ps = PropertyState.objects.all() self.assertEqual(len(ps), 14) # make sure that the new data was loaded correctly ts = TaxLotState.objects.filter(address_line_1='50 Willow Ave SE').first() self.assertEqual(ts.extra_data['site_eui'], 125)
def test_map_all_models_xml(self): # -- Setup with patch.object(ImportFile, 'cache_first_rows', return_value=None): progress_info = tasks.save_raw_data(self.import_file.pk) self.assertEqual('success', progress_info['status'], json.dumps(progress_info)) self.assertEqual( PropertyState.objects.filter(import_file=self.import_file).count(), 1) # make the column mappings self.fake_mappings = default_buildingsync_profile_mappings() Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) # map the data progress_info = tasks.map_data(self.import_file.pk) self.assertEqual('success', progress_info['status']) # verify there were no errors with the files self.assertEqual({}, progress_info.get('file_info', {})) ps = PropertyState.objects.filter(address_line_1='123 MAIN BLVD', import_file=self.import_file) self.assertEqual(len(ps), 1) # -- Act tasks.map_additional_models(self.import_file.pk) # -- Assert ps = PropertyState.objects.filter(address_line_1='123 MAIN BLVD', import_file=self.import_file) self.assertEqual(ps.count(), 1) # verify the property view, scenario and meter data were created pv = PropertyView.objects.filter(state=ps[0]) self.assertEqual(pv.count(), 1) scenario = Scenario.objects.filter(property_state=ps[0]) self.assertEqual(scenario.count(), 3) # for bsync, meters are linked to scenarios only (not properties) meters = Meter.objects.filter(scenario__in=scenario) self.assertEqual(meters.count(), 6)
def setUp(self): super(TestEquivalenceWithFile, self).setUp() filename = getattr(self, 'filename', 'covered-buildings-sample.csv') import_file_source_type = ASSESSED_RAW self.fake_mappings = FAKE_MAPPINGS['covered_building'] selfvars = self.set_up(import_file_source_type) self.user, self.org, self.import_file, self.import_record, self.cycle = selfvars filepath = osp.join(osp.dirname(__file__), '..', '..', '..', 'tests', 'data', filename) self.import_file.file = SimpleUploadedFile(name=filename, content=open( filepath, 'rb').read()) self.import_file.save() tasks.save_raw_data(self.import_file.pk) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.pk)
def setUp(self): data_importer_data_dir = os.path.join(os.path.dirname(__file__), '..', 'data_importer', 'tests', 'data') filename = getattr(self, 'filename', 'example-data-properties.xlsx') self.fake_mappings = copy.copy(FAKE_MAPPINGS['portfolio']) selfvars = self.set_up(ASSESSED_RAW) self.user, self.org, self.import_file, self.import_record, self.cycle = selfvars filepath = os.path.join(data_importer_data_dir, filename) self.import_file.file = SimpleUploadedFile(name=filename, content=open( filepath, 'rb').read()) self.import_file.save() tasks.save_raw_data(self.import_file.pk) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.pk) tasks.match_buildings(self.import_file.id) # import second file that is currently the same, but should be slightly different filename_2 = getattr(self, 'filename', 'example-data-properties-small-changes.xlsx') _, self.import_file_2 = self.create_import_file( self.user, self.org, self.cycle) filepath = os.path.join(data_importer_data_dir, filename_2) self.import_file_2.file = SimpleUploadedFile(name=filename_2, content=open( filepath, 'rb').read()) self.import_file_2.save() tasks.save_raw_data(self.import_file_2.pk) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file_2.id) tasks.map_data(self.import_file_2.pk) tasks.match_buildings(self.import_file_2.id) # for api tests user_details = { 'username': '******', 'password': '******', } self.client.login(**user_details)
def test_match_buildings(self): """ case B (many property <-> one tax lot) """ tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) # Set remap to True because for some reason this file id has been imported before. tasks.map_data(self.import_file.pk, True) # Check to make sure all the properties imported ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file, ) self.assertEqual(len(ps), 14) # Check to make sure the tax lots were imported ts = TaxLotState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file, ) self.assertEqual(len(ts), 18) # verify that the lot_number has the tax_lot information. For this case it is one-to-many p_test = PropertyState.objects.filter( pm_property_id='5233255', organization=self.org, data_state=DATA_STATE_MAPPING, import_file=self.import_file, ).first() self.assertEqual(p_test.lot_number, "333/66555;333/66125;333/66148") tasks.match_buildings(self.import_file.id) # make sure the the property only has one tax lot and vice versa tlv = TaxLotView.objects.filter( state__jurisdiction_tax_lot_id='11160509', cycle=self.cycle) self.assertEqual(len(tlv), 1) tlv = tlv[0] properties = tlv.property_states() self.assertEqual(len(properties), 3)
def test_mapping(self): tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.pk) # There are a total of 18 tax lot ids in the import file ts = TaxLotState.objects.all() self.assertEqual(len(ts), 18) # make sure that the new data was loaded correctly and that the lot_number was set # appropriately ps = PropertyState.objects.filter(address_line_1='2700 Welstone Ave NE')[0] self.assertEqual(ps.site_eui, 1202) self.assertEqual(ps.lot_number, '11160509') ps = PropertyState.objects.filter(address_line_1='521 Elm Street')[0] self.assertEqual(ps.site_eui, 1358) # The lot_number should also have the normalized code run, then re-delimited self.assertEqual(ps.lot_number, '333/66555;333/66125;333/66148')
def test_mapping_no_taxlot(self): # update the mappings to not include any taxlot tables in the data # note that save_data reads in from the propertystate table, so that will always # have entries in the db (for now). for m in self.fake_mappings: if m["to_table_name"] == 'TaxLotState': m["to_table_name"] = 'PropertyState' tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # make sure that no taxlot objects were created ts = TaxLotState.objects.all() self.assertEqual(len(ts), 0) # make sure that the new data was loaded correctly ps = PropertyState.objects.filter(address_line_1='2700 Welstone Ave NE')[0] self.assertEqual(ps.site_eui, 1202) self.assertEqual(ps.extra_data['jurisdiction_tax_lot_id'], '11160509')
def test_postal_code_taxlot(self): new_mappings = copy.deepcopy(self.fake_mappings['taxlot']) tasks.save_raw_data(self.import_file.pk) Column.create_mappings(new_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.pk) # get mapped taxlot postal_code ts = TaxLotState.objects.filter(address_line_1='35 Tenth Street').first() if ts is None: raise TypeError("Invalid Taxlot Address!") self.assertEqual(ts.postal_code, '00333') ts = TaxLotState.objects.filter(address_line_1='93030 Wellington Blvd').first() if ts is None: raise TypeError("Invalid Taxlot Address!") self.assertEqual(ts.postal_code, '00000-0000')
def test_mapping(self): tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # There are a total of 18 tax lot ids in the import file ts = TaxLotState.objects.all() self.assertEqual(len(ts), 18) # make sure that the new data was loaded correctly and that the lot_number was set # appropriately ps = PropertyState.objects.filter(address_line_1='2700 Welstone Ave NE')[0] self.assertEqual(ps.site_eui, 1202) self.assertEqual(ps.lot_number, '11160509') ps = PropertyState.objects.filter(address_line_1='521 Elm Street')[0] self.assertEqual(ps.site_eui, 1358) # The lot_number should also have the normalized code run, then re-delimited self.assertEqual(ps.lot_number, '33366555;33366125;33366148')
def test_map_all_models_xml(self): # -- Setup with patch.object(ImportFile, 'cache_first_rows', return_value=None): progress_info = tasks.save_raw_data(self.import_file.pk) self.assertEqual('success', progress_info['status'], json.dumps(progress_info)) self.assertEqual( PropertyState.objects.filter(import_file=self.import_file).count(), 1) # make the column mappings self.fake_mappings = default_buildingsync_profile_mappings() Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) # map the data progress_info = tasks.map_data(self.import_file.pk) self.assertEqual('success', progress_info['status']) # verify there were no errors with the files self.assertEqual({}, progress_info.get('file_info', {})) ps = PropertyState.objects.filter(address_line_1='123 Main St', import_file=self.import_file) self.assertEqual(ps.count(), 1) # -- Act progress_info = tasks.geocode_and_match_buildings_task( self.import_file.pk) # -- Assert ps = PropertyState.objects.filter(address_line_1='123 Main St', import_file=self.import_file) self.assertEqual(ps.count(), 1) # !! we should have warnings for our file because of the bad measure names !! self.assertNotEqual({}, progress_info.get('file_info', {})) self.assertIn(self.import_file.uploaded_filename, list(progress_info['file_info'].keys())) self.assertNotEqual( [], progress_info['file_info'][self.import_file.uploaded_filename].get( 'warnings', []))
def test_single_id_matches(self): tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # verify that there are no properties listed as canonical property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 0) # promote a properties ps = PropertyState.objects.filter(pm_property_id='2264').first() ps.promote(self.cycle) property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 1) matches = tasks.query_property_matches(property_states, None, None) self.assertEqual(len(matches), 0) matches = tasks.query_property_matches(property_states, '2264', None) self.assertEqual(len(matches), 1) self.assertEqual(matches[0], ps)
def test_mapping_properties_only(self): # update the mappings to not include any taxlot tables in the data # note that save_data reads in from the propertystate table, so that will always # have entries in the db (for now). new_mappings = copy.deepcopy(self.fake_mappings) for m in new_mappings: if m["to_table_name"] == 'TaxLotState': m["to_table_name"] = 'PropertyState' tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(new_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.pk) # make sure that no taxlot objects were created ts = TaxLotState.objects.all() self.assertEqual(len(ts), 0) # make sure that the new data was loaded correctly ps = PropertyState.objects.filter(address_line_1='2700 Welstone Ave NE')[0] self.assertEqual(ps.site_eui, 1202) self.assertEqual(ps.extra_data['jurisdiction_tax_lot_id'], '11160509')
def test_map_data_zip(self): # -- Setup with patch.object(ImportFile, 'cache_first_rows', return_value=None): progress_info = tasks.save_raw_data(self.import_file.pk) self.assertEqual('success', progress_info['status'], json.dumps(progress_info)) self.assertEqual( PropertyState.objects.filter(import_file=self.import_file).count(), 2) # make the column mappings self.fake_mappings = default_buildingsync_profile_mappings() Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) # -- Act progress_info = tasks.map_data(self.import_file.pk) # -- Assert self.assertEqual('success', progress_info['status']) ps = PropertyState.objects.filter(address_line_1='123 Main St', import_file=self.import_file) self.assertEqual(len(ps), 2)
def test_map_all_models_zip(self): # -- Setup with patch.object(ImportFile, 'cache_first_rows', return_value=None): progress_info = tasks.save_raw_data(self.import_file.pk) self.assertEqual('success', progress_info['status'], json.dumps(progress_info)) self.assertEqual( PropertyState.objects.filter(import_file=self.import_file).count(), 2) # make the column mappings self.fake_mappings = default_buildingsync_profile_mappings() Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) # map the data progress_info = tasks.map_data(self.import_file.pk) self.assertEqual('success', progress_info['status']) ps = PropertyState.objects.filter(address_line_1='123 Main St', import_file=self.import_file) self.assertEqual(ps.count(), 2) # -- Act tasks.map_additional_models(self.import_file.pk) # -- Assert ps = PropertyState.objects.filter(address_line_1='123 Main St', import_file=self.import_file) self.assertEqual(ps.count(), 2) # verify there are 2 building files bfs = BuildingFile.objects.all() self.assertEqual(bfs.count(), 2) # check that scenarios were created scenarios = Scenario.objects.all() self.assertEqual(scenarios.count(), 31)
def save_column_mappings(self, file_id, mappings): import_file = ImportFile.objects.get(pk=file_id) org = self.org status = Column.create_mappings(mappings, org, self.user) column_mappings = [{ 'from_field': m['from_field'], 'to_field': m['to_field'], 'to_table_name': m['to_table_name'] } for m in mappings] if status: import_file.save_cached_mapped_columns(column_mappings) return {'status': 'success'} else: return {'status': 'error'}
def save_column_mappings(self, request, pk=None): """ Saves the mappings between the raw headers of an ImportFile and the destination fields in the `to_table_name` model which should be either PropertyState or TaxLotState Valid source_type values are found in ``seed.models.SEED_DATA_SOURCES`` Payload:: { "import_file_id": ID of the ImportFile record, "mappings": [ { 'from_field': 'eui', # raw field in import file 'to_field': 'energy_use_intensity', 'to_table_name': 'PropertyState', }, { 'from_field': 'gfa', 'to_field': 'gross_floor_area', 'to_table_name': 'PropertyState', } ] } Returns:: {'status': 'success'} """ body = request.data import_file = ImportFile.objects.get(pk=pk) organization = import_file.import_record.super_organization mappings = body.get('mappings', []) status1 = Column.create_mappings(mappings, organization, request.user) # extract the to_table_name and to_field column_mappings = [ {'from_field': m['from_field'], 'to_field': m['to_field'], 'to_table_name': m['to_table_name']} for m in mappings] if status1: import_file.save_cached_mapped_columns(column_mappings) return JsonResponse({'status': 'success'}) else: return JsonResponse({'status': 'error'})
def column_mappings(self, request, pk=None): """ Saves the mappings between the raw headers of an ImportFile and the destination fields in the `to_table_name` model which should be either PropertyState or TaxLotState Valid source_type values are found in ``seed.models.SEED_DATA_SOURCES`` """ import_file_id = request.query_params.get('import_file_id') if import_file_id is None: return JsonResponse( { 'status': 'error', 'message': 'Query param `import_file_id` is required' }, status=status.HTTP_400_BAD_REQUEST) try: _ = ImportFile.objects.get(pk=import_file_id) organization = Organization.objects.get(pk=pk) except ImportFile.DoesNotExist: return JsonResponse( { 'status': 'error', 'message': 'No import file found' }, status=status.HTTP_404_NOT_FOUND) except Organization.DoesNotExist: return JsonResponse( { 'status': 'error', 'message': 'No organization found' }, status=status.HTTP_404_NOT_FOUND) result = Column.create_mappings(request.data.get('mappings', []), organization, request.user, import_file_id) if result: return JsonResponse({'status': 'success'}) else: return JsonResponse({'status': 'error'})
def test_check(self): # Import the file and run mapping # Year Ending,Energy Score,Total GHG Emissions (MtCO2e),Weather Normalized Site EUI (kBtu/ft2), # National Median Site EUI (kBtu/ft2),Source EUI (kBtu/ft2),Weather Normalized Source EUI (kBtu/ft2), # National Median Source EUI (kBtu/ft2),Parking - Gross Floor Area (ft2),Organization # Release Date fake_mappings = [{ "from_field": u'Property Id', "to_table_name": u'PropertyState', "to_field": u'pm_property_id', }, { "from_field": u'Property Name', "to_table_name": u'PropertyState', "to_field": u'property_name', }, { "from_field": u'Address 1', "to_table_name": u'PropertyState', "to_field": u'address_line_1', }, { "from_field": u'Address 2', "to_table_name": u'PropertyState', "to_field": u'address_line_2', }, { "from_field": u'City', "to_table_name": u'PropertyState', "to_field": u'city', }, { "from_field": u'State/Province', "to_table_name": u'PropertyState', "to_field": u'state_province', }, { "from_field": u'Postal Code', "to_table_name": u'PropertyState', "to_field": u'postal_code', }, { "from_field": u'Year Built', "to_table_name": u'PropertyState', "to_field": u'year_built', }, { "from_field": u'Property Floor Area (Buildings and Parking) (ft2)', "to_table_name": u'PropertyState', "to_field": u'gross_floor_area', }, { "from_field": u'Site EUI (kBtu/ft2)', "to_table_name": u'PropertyState', "to_field": u'site_eui', }, { "from_field": u'Generation Date', "to_table_name": u'PropertyState', "to_field": u'generation_date', }] tasks.save_raw_data(self.import_file.id) Column.create_mappings(fake_mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.id) qs = PropertyState.objects.filter( import_file=self.import_file, source_type=ASSESSED_BS, ).iterator() d = DataQualityCheck.retrieve(self.org) d.check_data('PropertyState', qs) _log.debug(d.results) self.assertEqual(len(d.results), 2) result = d.retrieve_result_by_address('120243 E True Lane') res = [{ 'severity': 'error', 'value': None, 'field': u'custom_id_1', 'table_name': u'PropertyState', 'message': 'Custom ID 1 (Property) is null', 'detailed_message': 'Custom ID 1 (Property) is null', 'formatted_field': 'Custom ID 1 (Property)' }, { 'severity': 'error', 'value': u'', 'field': u'pm_property_id', 'table_name': u'PropertyState', 'message': 'PM Property ID is null', 'detailed_message': 'PM Property ID is null', 'formatted_field': 'PM Property ID' }] self.assertEqual(res, result['data_quality_results']) result = d.retrieve_result_by_address('95373 E Peach Avenue') res = [ { 'severity': 'error', 'value': None, 'field': u'custom_id_1', 'table_name': u'PropertyState', 'message': 'Custom ID 1 (Property) is null', 'detailed_message': 'Custom ID 1 (Property) is null', 'formatted_field': 'Custom ID 1 (Property)' }, { 'field': u'site_eui', 'formatted_field': u'Site EUI', 'value': '0.1', 'table_name': u'PropertyState', 'message': u'Site EUI out of range', 'detailed_message': u'Site EUI [0.1] < 10.0', 'severity': u'warning' }, ] self.assertEqual(res, result['data_quality_results'])
def test_demo_v2(self): tasks.save_raw_data(self.import_file_tax_lot.pk) Column.create_mappings(self.fake_taxlot_mappings, self.org, self.user, self.import_file_tax_lot.id) Column.create_mappings(self.fake_portfolio_mappings, self.org, self.user, self.import_file_property.id) tasks.map_data(self.import_file_tax_lot.pk) # Check to make sure the taxlots were imported ts = TaxLotState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_tax_lot, ) self.assertEqual(len(ts), 3) # Check taxlot_footprints tax_lot_1 = TaxLotState.objects.get(address_line_1='050 Willow Ave SE') self.assertTrue(isinstance(tax_lot_1.taxlot_footprint, Polygon)) self.assertEqual( tax_lot_1.extra_data.get( 'Tax Lot Coordinates (Invalid Footprint)'), None) # For invalid footprints, # check that extra_data field added with ' (Invalid Footprint)' appended to original column title tax_lot_2 = TaxLotState.objects.get( address_line_1='2655 Welstone Ave NE') invalid_taxlot_footprint_string = '(( -121.927490629756 37.3966545740305, -121.927428469962 37.3965654556064 ))' self.assertEqual(tax_lot_2.taxlot_footprint, None) self.assertEqual( tax_lot_2.extra_data['Tax Lot Coordinates (Invalid Footprint)'], invalid_taxlot_footprint_string) tax_lot_3 = TaxLotState.objects.get( address_line_1='94000 Wellington Blvd') self.assertEqual(tax_lot_3.taxlot_footprint, None) self.assertEqual( tax_lot_3.extra_data['Tax Lot Coordinates (Invalid Footprint)'], '') # Import the property data tasks.save_raw_data(self.import_file_property.pk) tasks.map_data(self.import_file_property.pk) ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_property, ) self.assertEqual(len(ps), 3) # Check taxlot_footprints property_1 = PropertyState.objects.get( address_line_1='50 Willow Ave SE') self.assertTrue(isinstance(property_1.property_footprint, Polygon)) self.assertEqual( property_1.extra_data.get( 'Property Coordinates (Invalid Footprint)'), None) # For invalid footprints, # check that extra_data field added with ' (Invalid Footprint)' appended to original column title property_2 = PropertyState.objects.get( address_line_1='2700 Welstone Ave NE') invalid_property_footprint_string = '(( 1 0, 0 1 ))' self.assertEqual(property_2.property_footprint, None) self.assertEqual( property_2.extra_data['Property Coordinates (Invalid Footprint)'], invalid_property_footprint_string) property_3 = PropertyState.objects.get( address_line_1='11 Ninth Street') self.assertEqual(property_3.property_footprint, None) self.assertEqual( property_3.extra_data['Property Coordinates (Invalid Footprint)'], 123) # Make sure that new DQ rules have been added and apply to the states with (Invalid Footprints) tdq = DataQualityCheck.retrieve(self.org.id) tdq.check_data('TaxLotState', [tax_lot_1, tax_lot_2, tax_lot_3]) initial_tdq_rules_count = tdq.rules.count() self.assertEqual(tdq.results.get(tax_lot_1.id, None), None) self.assertEqual( tdq.results[ tax_lot_2.id]['data_quality_results'][0]['detailed_message'], "'(( -121.927490629756 37.3...' is not a valid geometry") self.assertEqual( tdq.results[tax_lot_3.id]['data_quality_results'][0] ['detailed_message'], "'' is not a valid geometry") pdq = DataQualityCheck.retrieve(self.org.id) pdq.check_data('PropertyState', [property_1, property_2, property_3]) self.assertEqual(pdq.results.get(property_1.id, None), None) self.assertEqual( pdq.results[property_2.id]['data_quality_results'][0] ['detailed_message'], "'{}' is not a valid geometry".format( invalid_property_footprint_string)) self.assertEqual( pdq.results[property_3.id]['data_quality_results'][0] ['detailed_message'], "'123' is not a valid geometry") # Run new import, and check that duplicate rules are not created new_import_file_tax_lot = ImportFile.objects.create( import_record=self.import_record_tax_lot, cycle=self.cycle) tax_lot_filename = getattr( self, 'filename', 'example-data-taxlots-1-invalid-footprint.xlsx') filepath = osp.join(osp.dirname(__file__), '..', 'data', tax_lot_filename) new_import_file_tax_lot.file = SimpleUploadedFile( name=tax_lot_filename, content=open(filepath, 'rb').read()) new_import_file_tax_lot.save() tasks.save_raw_data(new_import_file_tax_lot.pk) updated_tdq = DataQualityCheck.retrieve(self.org.id) self.assertEqual(updated_tdq.rules.count(), initial_tdq_rules_count)
def test_mapping_takes_into_account_selected_units(self): # Just as in the previous test, build extra_data PropertyState raw_state = self.property_state_factory.get_property_state_as_extra_data( import_file_id=self.import_file.id, source_type=ASSESSED_RAW, data_state=DATA_STATE_IMPORT, ) # Replace the site_eui and gross_floor_area key-value that gets # autogenerated by get_property_state_as_extra_data del raw_state.extra_data['site_eui'] raw_state.extra_data['Site EUI'] = 100 del raw_state.extra_data['gross_floor_area'] raw_state.extra_data['Gross Floor Area'] = 100 raw_state.save() self.import_file.raw_save_done = True self.import_file.save() # Build mappings - with unit-aware destinations and non-default unit choices suggested_mappings = mapper.build_column_mapping( list(raw_state.extra_data.keys()), Column.retrieve_all_by_tuple(self.org), previous_mapping=get_column_mapping, map_args=[self.org], thresh=80) mappings = [] for raw_column, suggestion in suggested_mappings.items(): if raw_column == 'Site EUI': mappings.append({ "from_field": raw_column, "from_units": 'kWh/m**2/year', "to_table_name": 'PropertyState', "to_field": 'site_eui', "to_field_display_name": 'Site EUI', }) elif raw_column == 'Gross Floor Area': mappings.append({ "from_field": raw_column, "from_units": 'm**2', "to_table_name": 'PropertyState', "to_field": 'gross_floor_area', "to_field_display_name": 'Gross Floor Area', }) else: other_mapping = { "from_field": raw_column, "from_units": None, "to_table_name": suggestion[0], "to_field": suggestion[1], "to_field_display_name": suggestion[1], } mappings.append(other_mapping) # Perform mapping, creating the initial PropertyState records. Column.create_mappings(mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.id) # Verify that the values have been converted appropriately state = self.import_file.find_unmatched_property_states().get() self.assertAlmostEqual(state.site_eui, (100 * ureg('kWh/m**2/year')).to('kBtu/ft**2/year')) self.assertAlmostEqual(state.gross_floor_area, (100 * ureg('m**2')).to('ft**2'))
def test_remapping_with_and_without_unit_aware_columns_doesnt_lose_data( self): """ During import, when the initial -State objects are created from the extra_data values, ColumnMapping objects are used to take the extra_data dictionary values and create the -State objects, setting the DB-level values as necessary - e.g. taking a raw "Site EUI (kBtu/ft2)" value and inserting it into the DB field "site_eui". Previously, remapping could cause extra Column objects to be created, and subsequently, this created extra ColumnMapping objects. These extra ColumnMapping objects could cause raw values to be inserted into the wrong DB field on -State creation. """ # Just as in the previous test, build extra_data PropertyState state = self.property_state_factory.get_property_state_as_extra_data( import_file_id=self.import_file.id, source_type=ASSESSED_RAW, data_state=DATA_STATE_IMPORT, random_extra=42, ) # Replace the site_eui key-value that gets autogenerated by get_property_state_as_extra_data del state.extra_data['site_eui'] state.extra_data['Site EUI (kBtu/ft2)'] = 123 state.save() self.import_file.raw_save_done = True self.import_file.save() # Build 2 sets of mappings - with and without a unit-aware destination site_eui data suggested_mappings = mapper.build_column_mapping( list(state.extra_data.keys()), Column.retrieve_all_by_tuple(self.org), previous_mapping=get_column_mapping, map_args=[self.org], thresh=80) ed_site_eui_mappings = [] unit_aware_site_eui_mappings = [] for raw_column, suggestion in suggested_mappings.items(): if raw_column == 'Site EUI (kBtu/ft2)': # Make this an extra_data field (without from_units) ed_site_eui_mappings.append({ "from_field": raw_column, "from_units": None, "to_table_name": 'PropertyState', "to_field": raw_column, "to_field_display_name": raw_column, }) unit_aware_site_eui_mappings.append({ "from_field": raw_column, "from_units": 'kBtu/ft**2/year', "to_table_name": 'PropertyState', "to_field": 'site_eui', "to_field_display_name": 'Site EUI', }) else: other_mapping = { "from_field": raw_column, "from_units": None, "to_table_name": suggestion[0], "to_field": suggestion[1], "to_field_display_name": suggestion[1], } ed_site_eui_mappings.append(other_mapping) unit_aware_site_eui_mappings.append(other_mapping) # Map and remap the file multiple times with different mappings each time. # Round 1 - Map site_eui data into Extra Data Column.create_mappings(ed_site_eui_mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.id) # There should only be one raw 'Site EUI (kBtu/ft2)' Column object self.assertEqual( 1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count()) # The one propertystate should have site eui info in extra_data prop = self.import_file.find_unmatched_property_states().get() self.assertIsNone(prop.site_eui) self.assertIsNotNone(prop.extra_data.get('Site EUI (kBtu/ft2)')) # Round 2 - Map site_eui data into the PropertyState attribute "site_eui" Column.create_mappings(unit_aware_site_eui_mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.id, remap=True) self.assertEqual( 1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count()) # The one propertystate should have site eui info in site_eui prop = self.import_file.find_unmatched_property_states().get() self.assertIsNotNone(prop.site_eui) self.assertIsNone(prop.extra_data.get('Site EUI (kBtu/ft2)')) # Round 3 - Map site_eui data into Extra Data Column.create_mappings(ed_site_eui_mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.id, remap=True) self.assertEqual( 1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count()) # The one propertystate should have site eui info in extra_data prop = self.import_file.find_unmatched_property_states().get() self.assertIsNone(prop.site_eui) self.assertIsNotNone(prop.extra_data.get('Site EUI (kBtu/ft2)'))
def import_exported_data(self, filename): """ Import test files from Stephen for many-to-many testing. This imports and maps the data accordingly. Presently these files are missing a couple of attributes to make them valid: 1) the master campus record to define the pm_property_id 2) the joins between propertystate and taxlotstate """ # Do a bunch of work to flatten out this temp file that has extra_data # asa string representation of a dict data = [] new_keys = set() f = osp.join(osp.dirname(__file__), 'data', filename) with open(f, 'rb') as csvfile: reader = csv.DictReader(csvfile) keys = reader.fieldnames for row in reader: ed = json.loads(row.pop('extra_data')) for k, v in ed.iteritems(): new_keys.add(k) row[k] = v data.append(row) # remove the extra_data column and add in the new columns keys.remove('extra_data') for k in new_keys: keys.append(k) # save the new file new_file_name = 'tmp_{}_flat.csv'.format( osp.splitext(osp.basename(filename))[0]) f_new = osp.join(osp.dirname(__file__), 'data', new_file_name) with open(f_new, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=keys) writer.writeheader() for d in data: writer.writerow(d) # save the keys this does not appear to be used anywhere new_file_name = 'tmp_{}_keys.csv'.format( osp.splitext(osp.basename(filename))[0]) f_new = osp.join(osp.dirname(__file__), 'data', new_file_name) with open(f_new, 'w') as outfile: outfile.writelines([str(key) + '\n' for key in keys]) # Continue saving the raw data new_file_name = "tmp_{}_flat.csv".format( osp.splitext(osp.basename(filename))[0]) f_new = osp.join(osp.dirname(__file__), 'data', new_file_name) self.import_file.file = File(open(f_new)) self.import_file.save() save_raw_data(self.import_file.id) # the mapping is just the 'keys' repeated since the file # was created as a database dump mapping = [] for k in keys: if k == 'id': continue mapping.append({ "from_field": k, "to_table_name": "PropertyState", "to_field": k }) Column.create_mappings(mapping, self.org, self.user, self.import_file.pk) # call the mapping function from the tasks file map_data(self.import_file.id)
def import_exported_data(self, filename): """ Import test files from Stephen for many-to-many testing. This imports and maps the data accordingly. Presently these files are missing a couple of attributes to make them valid: 1) the master campus record to define the pm_property_id 2) the joins between propertystate and taxlotstate """ # Do a bunch of work to flatten out this temp file that has extra_data # asa string representation of a dict data = [] keys = None new_keys = set() f = os.path.join(os.path.dirname(__file__), 'data', filename) with open(f, 'rb') as csvfile: reader = csv.DictReader(csvfile) keys = reader.fieldnames for row in reader: ed = json.loads(row.pop('extra_data')) for k, v in ed.iteritems(): new_keys.add(k) row[k] = v data.append(row) # remove the extra_data column and add in the new columns keys.remove('extra_data') for k in new_keys: keys.append(k) # save the new file new_file_name = 'tmp_{}_flat.csv'.format( os.path.splitext(os.path.basename(filename))[0] ) f_new = os.path.join(os.path.dirname(__file__), 'data', new_file_name) with open(f_new, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=keys) writer.writeheader() for d in data: writer.writerow(d) # save the keys This doesn't appear to be used anywhere new_file_name = 'tmp_{}_keys.csv'.format( os.path.splitext(os.path.basename(filename))[0] ) f_new = os.path.join(os.path.dirname(__file__), 'data', new_file_name) with open(f_new, 'w') as outfile: outfile.writelines([str(key) + '\n' for key in keys]) # Continue saving the raw data new_file_name = "tmp_{}_flat.csv".format( os.path.splitext(os.path.basename(filename))[0] ) f_new = os.path.join(os.path.dirname(__file__), 'data', new_file_name) self.import_file.file = File(open(f_new)) self.import_file.save() save_raw_data(self.import_file.id) # the mapping is just the 'keys' repeated since the file # was created as a database dump mapping = [] for k in keys: if k == 'id': continue mapping.append( { "from_field": k, "to_table_name": "PropertyState", "to_field": k } ) Column.create_mappings(mapping, self.org, self.user) # call the mapping function from the tasks file map_data(self.import_file.id)
def test_check(self): # data quality check d = DataQualityCheck.retrieve(self.org) d.remove_all_rules() d.add_rule({ 'table_name': 'PropertyState', 'field': 'gross_floor_area', 'data_type': TYPE_NUMBER, 'rule_type': RULE_TYPE_DEFAULT, 'min': 100, 'max': 7000000, 'severity': SEVERITY_ERROR, 'units': 'square feet', }) d.add_rule({ 'table_name': 'PropertyState', 'field': 'recent_sale_date', 'data_type': TYPE_DATE, 'rule_type': RULE_TYPE_DEFAULT, 'min': 18890101, 'max': 20201231, 'severity': SEVERITY_ERROR, }) # create some status labels for testing sl_data = { 'name': 'year - old or future', 'super_organization': self.org } sl_year, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'year_built', 'data_type': TYPE_YEAR, 'rule_type': RULE_TYPE_DEFAULT, 'min': 1700, 'max': 2019, 'severity': SEVERITY_ERROR, 'status_label': sl_year, } d.add_rule(new_rule) sl_data = { 'name': 'extra data ps float error', 'super_organization': self.org } sl_string, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'extra_data_ps_alpha', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'alpha', 'severity': SEVERITY_ERROR, 'units': 'square feet', 'status_label': sl_string, } d.add_rule(new_rule) sl_data = { 'name': 'extra data ps string error', 'super_organization': self.org } sl_float, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'extra_data_ps_float', 'data_type': TYPE_NUMBER, 'rule_type': RULE_TYPE_CUSTOM, 'min': 9999, 'max': 10001, 'severity': SEVERITY_ERROR, 'status_label': sl_float, } d.add_rule(new_rule) sl_data = { 'name': 'jurisdiction id does not match', 'super_organization': self.org } sl_jurid, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'TaxLotState', 'field': 'jurisdiction_tax_lot_id', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': '1235', 'severity': SEVERITY_ERROR, 'status_label': sl_jurid, } d.add_rule(new_rule) sl_data = {'name': 'No meters present', 'super_organization': self.org} sl_ok_1, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'Meters Present', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_1, } d.add_rule(new_rule) sl_data = { 'name': 'No 12 Consectutive Months', 'super_organization': self.org } sl_ok_2, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': '12 Consectutive Months', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_2, } d.add_rule(new_rule) sl_data = {'name': 'No Monthly Data', 'super_organization': self.org} sl_ok_3, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'Monthly Data', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_3, } d.add_rule(new_rule) # import data tasks.save_raw_data(self.import_file.id) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.id) tasks.match_buildings(self.import_file.id) qs = PropertyState.objects.filter( import_file=self.import_file, source_type=ASSESSED_BS, ).iterator() d.reset_results() d.check_data('PropertyState', qs) result = d.retrieve_result_by_address('4 Myrtle Parkway') res = [{ "severity": "error", "value": "27.0", "field": "extra_data_ps_float", "table_name": "PropertyState", "message": "Extra Data Ps Float out of range", "detailed_message": "Extra Data Ps Float [27.0] < 9999.0", "formatted_field": "Extra Data Ps Float" }, { "severity": "error", "value": "5.0", "field": "gross_floor_area", "table_name": "PropertyState", "message": "Gross Floor Area out of range", "detailed_message": "Gross Floor Area [5.0] < 100.0", "formatted_field": "Gross Floor Area" }] self.assertListEqual(result['data_quality_results'], res) result = d.retrieve_result_by_address('94 Oxford Hill') res = [{ "severity": "error", "value": "20000.0", "field": "extra_data_ps_float", "table_name": "PropertyState", "message": "Extra Data Ps Float out of range", "detailed_message": "Extra Data Ps Float [20000.0] > 10001.0", "formatted_field": "Extra Data Ps Float" }, { "severity": "error", "value": "1888-01-01 08:00:00", "field": "recent_sale_date", "table_name": "PropertyState", "message": "Recent Sale Date out of range", "detailed_message": "Recent Sale Date [1888-01-01 08:00:00] < 1889-01-01 00:00:00", "formatted_field": "Recent Sale Date" }] self.assertListEqual(result['data_quality_results'], res) result = d.retrieve_result_by_address("3 Portage Alley") res = [{ 'severity': u'error', 'value': 'beta', 'field': u'extra_data_ps_alpha', 'table_name': u'PropertyState', 'message': u'Extra Data Ps Alpha does not match expected value', 'detailed_message': u'Extra Data Ps Alpha [beta] does not contain "alpha"', 'formatted_field': u'Extra Data Ps Alpha' }] self.assertListEqual(result['data_quality_results'], res) # make sure that the label has been applied props = PropertyView.objects.filter( property__labels=sl_year).select_related('state') addresses = sorted([p.state.address_line_1 for p in props]) expected = sorted([u'84807 Buell Trail', u'1 International Road']) self.assertListEqual(expected, addresses) props = PropertyView.objects.filter( property__labels=sl_float).select_related('state') addresses = sorted([p.state.address_line_1 for p in props]) expected = sorted([u'4 Myrtle Parkway', u'94 Oxford Hill']) self.assertListEqual(expected, addresses) props = PropertyView.objects.filter( property__labels=sl_string).select_related('state') addresses = [p.state.address_line_1 for p in props] expected = [u'3 Portage Alley'] self.assertListEqual(expected, addresses) # Check tax lots qs = TaxLotState.objects.filter( import_file=self.import_file, ).iterator() d.reset_results() d.check_data('TaxLotState', qs) result = d.retrieve_result_by_tax_lot_id("1234") res = [{ "severity": "error", "value": "1234", "field": "jurisdiction_tax_lot_id", "table_name": "TaxLotState", "message": "Jurisdiction Tax Lot ID does not match expected value", "detailed_message": "Jurisdiction Tax Lot ID [1234] does not contain \"1235\"", "formatted_field": "Jurisdiction Tax Lot ID" }] self.assertListEqual(result['data_quality_results'], res) # verify labels taxlots = TaxLotView.objects.filter( taxlot__labels=sl_jurid).select_related('state') ids = [t.state.jurisdiction_tax_lot_id for t in taxlots] expected = '1234' self.assertEqual(expected, ids[0]) # Check multiple strings props = PropertyView.objects.filter( property__labels=sl_ok_1).select_related('state') addresses = [p.state.address_line_1 for p in props]
def test_cleanse(self): # Import the file and run mapping # This is silly, the mappings are backwards from what you would expect. # The key is the BS field, and the value is the value in the CSV fake_mappings = [ { "from_field": u'block_number', "to_table_name": u'PropertyState', "to_field": u'block_number', }, { "from_field": u'error_type', "to_table_name": u'PropertyState', "to_field": u'error_type', }, { "from_field": u'building_count', "to_table_name": u'PropertyState', "to_field": u'building_count', }, { "from_field": u'conditioned_floor_area', "to_table_name": u'PropertyState', "to_field": u'conditioned_floor_area', }, { "from_field": u'energy_score', "to_table_name": u'PropertyState', "to_field": u'energy_score', }, { "from_field": u'gross_floor_area', "to_table_name": u'PropertyState', "to_field": u'gross_floor_area', }, { "from_field": u'lot_number', "to_table_name": u'PropertyState', "to_field": u'lot_number', }, { "from_field": u'occupied_floor_area', "to_table_name": u'PropertyState', "to_field": u'occupied_floor_area', }, { "from_field": u'conditioned_floor_area', "to_table_name": u'PropertyState', "to_field": u'conditioned_floor_area', }, { "from_field": u'postal_code', "to_table_name": u'PropertyState', "to_field": u'postal_code', }, { "from_field": u'site_eui', "to_table_name": u'PropertyState', "to_field": u'site_eui', }, { "from_field": u'site_eui_weather_normalized', "to_table_name": u'PropertyState', "to_field": u'site_eui_weather_normalized', }, { "from_field": u'source_eui', "to_table_name": u'PropertyState', "to_field": u'source_eui', }, { "from_field": u'source_eui_weather_normalized', "to_table_name": u'PropertyState', "to_field": u'source_eui_weather_normalized', }, { "from_field": u'address_line_1', "to_table_name": u'PropertyState', "to_field": u'address_line_1', }, { "from_field": u'address_line_2', "to_table_name": u'PropertyState', "to_field": u'address_line_2', }, { "from_field": u'building_certification', "to_table_name": u'PropertyState', "to_field": u'building_certification', }, { "from_field": u'city', "to_table_name": u'PropertyState', "to_field": u'city', }, { "from_field": u'custom_id_1', "to_table_name": u'PropertyState', "to_field": u'custom_id_1', }, { "from_field": u'district', "to_table_name": u'PropertyState', "to_field": u'district', }, { "from_field": u'energy_alerts', "to_table_name": u'PropertyState', "to_field": u'energy_alerts', }, { "from_field": u'owner_address', "to_table_name": u'PropertyState', "to_field": u'owner_address', }, { "from_field": u'owner_city_state', "to_table_name": u'PropertyState', "to_field": u'owner_city_state', }, { "from_field": u'owner_email', "to_table_name": u'PropertyState', "to_field": u'owner_email', }, { "from_field": u'owner_postal_code', "to_table_name": u'PropertyState', "to_field": u'owner_postal_code', }, { "from_field": u'owner_telephone', "to_table_name": u'PropertyState', "to_field": u'owner_telephone', }, { "from_field": u'pm_property_id', "to_table_name": u'PropertyState', "to_field": u'pm_property_id', }, { "from_field": u'property_name', "to_table_name": u'PropertyState', "to_field": u'property_name', }, { "from_field": u'property_notes', "to_table_name": u'PropertyState', "to_field": u'property_notes', }, { "from_field": u'space_alerts', "to_table_name": u'PropertyState', "to_field": u'space_alerts', }, { "from_field": u'state_province', "to_table_name": u'PropertyState', "to_field": u'state_province', }, { "from_field": u'tax_lot_id', "to_table_name": u'PropertyState', "to_field": u'tax_lot_id', }, { "from_field": u'use_description', "to_table_name": u'PropertyState', "to_field": u'use_description', }, { "from_field": u'generation_date', "to_table_name": u'PropertyState', "to_field": u'generation_date', }, { "from_field": u'recent_sale_date', "to_table_name": u'PropertyState', "to_field": u'recent_sale_date', }, { "from_field": u'generation_date', "to_table_name": u'PropertyState', "to_field": u'generation_date', }, { "from_field": u'release_date', "to_table_name": u'PropertyState', "to_field": u'release_date', }, { "from_field": u'year_built', "to_table_name": u'PropertyState', "to_field": u'year_built', }, { "from_field": u'year_ending', "to_table_name": u'PropertyState', "to_field": u'year_ending', } ] tasks.save_raw_data(self.import_file.id) Column.create_mappings(fake_mappings, self.org, self.user) tasks.map_data(self.import_file.id) qs = PropertyState.objects.filter( import_file=self.import_file, source_type=ASSESSED_BS, ).iterator() c = Cleansing(self.org) c.cleanse('property', qs) # _log.debug(c.results) # This only checks to make sure the 34 errors have occurred. self.assertEqual(len(c.results), 34)
def test_demo_v2(self): tasks.save_raw_data(self.import_file_tax_lot.pk) Column.create_mappings(self.fake_taxlot_mappings, self.org, self.user) Column.create_mappings(self.fake_portfolio_mappings, self.org, self.user) tasks.map_data(self.import_file_tax_lot.pk) # Check to make sure the taxlots were imported ts = TaxLotState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_tax_lot, ) ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_property, ) self.assertEqual(len(ps), 0) self.assertEqual(len(ts), 9) tasks.match_buildings(self.import_file_tax_lot.id) # Check a single case of the taxlotstate self.assertEqual(TaxLotState.objects.filter(address_line_1='2655 Welstone Ave NE').count(), 1) self.assertEqual( TaxLotView.objects.filter(state__address_line_1='2655 Welstone Ave NE').count(), 1 ) self.assertEqual(TaxLotView.objects.count(), 9) # Import the property data tasks.save_raw_data(self.import_file_property.pk) tasks.map_data(self.import_file_property.pk) ts = TaxLotState.objects.filter( # data_state=DATA_STATE_MAPPING, # Look at all taxlotstates organization=self.org, import_file=self.import_file_tax_lot, ) ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_property, ) self.assertEqual(len(ts), 9) self.assertEqual(len(ps), 14) tasks.match_buildings(self.import_file_property.id) ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_property, ) # there should not be any properties left in the mapping state self.assertEqual(len(ps), 0) # psv = PropertyView.objects.filter(state__organization=self.org) # self.assertEqual(len(psv), 12) # tlv = TaxLotView.objects.filter(state__organization=self.org) # self.assertEqual(len(tlv), 9) self.assertEqual(PropertyView.objects.filter(state__organization=self.org, state__pm_property_id='2264').count(), 1) pv = PropertyView.objects.filter(state__organization=self.org, state__pm_property_id='2264').first() self.assertEqual(pv.state.property_name, 'University Inn') self.assertEqual(pv.state.address_line_1, '50 Willow Ave SE')
def test_demo_v2(self): tasks._save_raw_data(self.import_file_tax_lot.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_taxlot_mappings, self.org, self.user) Column.create_mappings(self.fake_portfolio_mappings, self.org, self.user) tasks.map_data(self.import_file_tax_lot.pk) # Check to make sure the taxlots were imported ts = TaxLotState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_tax_lot, ) ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_property, ) self.assertEqual(len(ps), 0) self.assertEqual(len(ts), 9) tasks.match_buildings(self.import_file_tax_lot.id, self.user.id) # Check a single case of the taxlotstate self.assertEqual(TaxLotState.objects.filter(address_line_1='050 Willow Ave SE').count(), 1) self.assertEqual( TaxLotView.objects.filter(state__address_line_1='050 Willow Ave SE').count(), 1 ) self.assertEqual(TaxLotView.objects.count(), 9) # Import the property data tasks._save_raw_data(self.import_file_property.pk, 'fake_cache_key', 1) tasks.map_data(self.import_file_property.pk) ts = TaxLotState.objects.filter( # data_state=DATA_STATE_MAPPING, # Look at all taxlotstates organization=self.org, import_file=self.import_file_tax_lot, ) ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_property, ) self.assertEqual(len(ts), 9) self.assertEqual(len(ps), 14) tasks.match_buildings(self.import_file_property.id, self.user.id) ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_property, ) # there shouldn't be any properties left in the mapping state self.assertEqual(len(ps), 0) # psv = PropertyView.objects.filter(state__organization=self.org) # self.assertEqual(len(psv), 12) # tlv = TaxLotView.objects.filter(state__organization=self.org) # self.assertEqual(len(tlv), 9) self.assertEqual(PropertyView.objects.filter(state__organization=self.org, state__pm_property_id='2264').count(), 1) pv = PropertyView.objects.filter(state__organization=self.org, state__pm_property_id='2264').first() self.assertEqual(pv.state.property_name, 'University Inn') self.assertEqual(pv.state.address_line_1, '50 Willow Ave SE')
def test_check_multiple_text_match(self): d = DataQualityCheck.retrieve(self.org) d.remove_all_rules() sl_data = {'name': 'No meters present', 'super_organization': self.org} sl_ok_1, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'meters_present', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_1, } d.add_rule(new_rule) sl_data = { 'name': 'No 12 Consectutive Months', 'super_organization': self.org } sl_ok_2, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': '12 Consectutive Months', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_2, } d.add_rule(new_rule) sl_data = {'name': 'No Monthly Data', 'super_organization': self.org} sl_ok_3, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'Monthly Data', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_3, } d.add_rule(new_rule) # import data tasks.save_raw_data(self.import_file.id) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.id) tasks.match_buildings(self.import_file.id) qs = PropertyState.objects.filter( import_file=self.import_file, source_type=ASSESSED_BS, ).iterator() d.reset_results() d.check_data('PropertyState', qs) # Check multiple strings props = PropertyView.objects.filter( property__labels=sl_ok_1).select_related('state') addresses = sorted([p.state.address_line_1 for p in props]) expected = [ u'1 International Road', u'17246 Esch Drive', u'2581 Schiller Parkway', u'3 Northport Place', u'84807 Buell Trail' ] self.assertListEqual(expected, addresses) props = PropertyView.objects.filter( property__labels=sl_ok_2).select_related('state') addresses = sorted([p.state.address_line_1 for p in props]) expected = [ u'1 International Road', u'2581 Schiller Parkway', u'49705 Harper Crossing' ] self.assertListEqual(expected, addresses) props = PropertyView.objects.filter( property__labels=sl_ok_3).select_related('state') addresses = sorted([p.state.address_line_1 for p in props]) expected = [ u'1 International Road', u'17246 Esch Drive', u'84807 Buell Trail', u'88263 Scoville Park' ] self.assertListEqual(expected, addresses)
def test_cleanse(self): # Import the file and run mapping # Year Ending,Energy Score,Total GHG Emissions (MtCO2e),Weather Normalized Site EUI (kBtu/ft2), # National Median Site EUI (kBtu/ft2),Source EUI (kBtu/ft2),Weather Normalized Source EUI (kBtu/ft2), # National Median Source EUI (kBtu/ft2),Parking - Gross Floor Area (ft2),Organization # Release Date fake_mappings = [ { "from_field": u'Property Id', "to_table_name": u'PropertyState', "to_field": u'pm_property_id', }, { "from_field": u'Property Name', "to_table_name": u'PropertyState', "to_field": u'property_name', }, { "from_field": u'Address 1', "to_table_name": u'PropertyState', "to_field": u'address_line_1', }, { "from_field": u'Address 2', "to_table_name": u'PropertyState', "to_field": u'address_line_2', }, { "from_field": u'City', "to_table_name": u'PropertyState', "to_field": u'city', }, { "from_field": u'State/Province', "to_table_name": u'PropertyState', "to_field": u'state_province', }, { "from_field": u'Postal Code', "to_table_name": u'PropertyState', "to_field": u'postal_code', }, { "from_field": u'Year Built', "to_table_name": u'PropertyState', "to_field": u'year_built', }, { "from_field": u'Property Floor Area (Buildings and Parking) (ft2)', "to_table_name": u'PropertyState', "to_field": u'gross_floor_area', }, { "from_field": u'Site EUI (kBtu/ft2)', "to_table_name": u'PropertyState', "to_field": u'site_eui', }, { "from_field": u'Generation Date', "to_table_name": u'PropertyState', "to_field": u'generation_date', } ] tasks.save_raw_data(self.import_file.id) Column.create_mappings(fake_mappings, self.org, self.user) tasks.map_data(self.import_file.id) qs = PropertyState.objects.filter( import_file=self.import_file, source_type=PORTFOLIO_BS, ).iterator() c = Cleansing(self.org) c.cleanse('property', qs) _log.debug(c.results) self.assertEqual(len(c.results), 2) result = [v for v in c.results.values() if v['address_line_1'] == '120243 E True Lane'] if len(result) == 1: result = result[0] else: raise RuntimeError('Non unity results') res = [{ 'field': u'pm_property_id', 'formatted_field': u'PM Property ID', 'value': u'', 'message': u'PM Property ID is missing', 'detailed_message': u'PM Property ID is missing', 'severity': u'error' }] self.assertEqual(res, result['cleansing_results']) result = [v for v in c.results.values() if v['address_line_1'] == '95373 E Peach Avenue'] if len(result) == 1: result = result[0] else: raise RuntimeError('Non unity results') res = [{ 'field': u'site_eui', 'formatted_field': u'Site EUI', 'value': 0.1, 'message': u'Site EUI out of range', 'detailed_message': u'Site EUI [0.1] < 10.0', 'severity': u'warning' }] self.assertEqual(res, result['cleansing_results'])