def test_hash(self): # tasks.hash_state_object(TaxLotState()) # tasks.hash_state_object(TaxLotState(organization=self.org)) self.assertEqual( tasks.hash_state_object(PropertyState()), tasks.hash_state_object(PropertyState(organization=self.org))) self.assertEqual( tasks.hash_state_object(TaxLotState()), tasks.hash_state_object(TaxLotState(organization=self.org))) ps1 = PropertyState(address_line_1='123 fake st', extra_data={"a": "100"}) ps2 = PropertyState(address_line_1='123 fake st', extra_data={"a": "200"}) ps3 = PropertyState(extra_data={"a": "200"}) ps4 = PropertyState(extra_data={"a": "100"}) ps5 = PropertyState(address_line_1='123 fake st') self.assertEqual( len(set(map(tasks.hash_state_object, [ps1, ps2, ps3, ps4, ps5]))), 5) return
def test_hash(self): self.assertEqual( tasks.hash_state_object(PropertyState()), tasks.hash_state_object(PropertyState(organization=self.org))) self.assertEqual( tasks.hash_state_object(TaxLotState()), tasks.hash_state_object(TaxLotState(organization=self.org))) ps1 = PropertyState(address_line_1='123 fake st', extra_data={"a": "100"}) ps2 = PropertyState(address_line_1='123 fake st', extra_data={"a": "200"}) ps3 = PropertyState(extra_data={"a": "200"}) ps4 = PropertyState(extra_data={"a": "100"}) ps5 = PropertyState(address_line_1='123 fake st') self.assertEqual( len(set(map(tasks.hash_state_object, [ps1, ps2, ps3, ps4, ps5]))), 5) # large PropertyState objects -- make sure size is still 32 (why wouldn't it be?) extra_data = {} for i in range(1000): extra_data["entry_%s" % i] = "Value as string %s" % i ps6 = PropertyState(address_line_1='123 fake st', extra_data=extra_data) hash_res = tasks.hash_state_object(ps6) self.assertEqual(len(hash_res), 32)
def recalculate_hash_objects(apps, schema_editor): PropertyState = apps.get_model('seed', 'PropertyState') TaxLotState = apps.get_model('seed', 'TaxLotState') # find which columns are not used in column mappings property_count = PropertyState.objects.count() taxlot_count = TaxLotState.objects.count() # print("There are %s objects to traverse" % (property_count + taxlot_count)) # start = time.clock() # print("Iterating over PropertyStates. Count %s" % property_count) with transaction.atomic(): for idx, obj in enumerate(PropertyState.objects.all().iterator()): if idx % 1000 == 0: print("... %s / %s ..." % (idx, property_count)) obj.hash_object = hash_state_object(obj) obj.save() # print("Iterating over TaxLotStates. Count %s" % taxlot_count) with transaction.atomic(): for idx, obj in enumerate(TaxLotState.objects.all().iterator()): if idx % 1000 == 0: print("... %s / %s ..." % (idx, taxlot_count)) obj.hash_object = hash_state_object(obj) obj.save()
def save(self, *args, **kwargs): # Calculate and save the normalized address if self.address_line_1 is not None: self.normalized_address = normalize_address_str(self.address_line_1) else: self.normalized_address = None # save a hash of the object to the database for quick lookup from seed.data_importer.tasks import hash_state_object self.hash_object = hash_state_object(self) return super(TaxLotState, self).save(*args, **kwargs)
def forwards(apps, schema_editor): PropertyState = apps.get_model("seed", "PropertyState") TaxLotState = apps.get_model("seed", "TaxLotState") # find which columns are not used in column mappings property_count = PropertyState.objects.all().count() taxlot_count = TaxLotState.objects.all().count() # print("There are %s objects to traverse" % (property_count + taxlot_count)) # print("Iterating over PropertyStates. Count %s" % PropertyState.objects.all().count()) for idx, obj in enumerate(PropertyState.objects.all().iterator()): if idx % 1000 == 0: print("... %s / %s ..." % (idx, property_count)) obj.hash_object = hash_state_object(obj) obj.save() # print("Iterating over TaxLotStates. Count %s" % TaxLotState.objects.all().count()) for idx, obj in enumerate(TaxLotState.objects.all().iterator()): if idx % 1000 == 0: print("... %s / %s ..." % (idx, taxlot_count)) obj.hash_object = hash_state_object(obj) obj.save()