Пример #1
0
def _save_raw_data_chunk(chunk, file_pk, prog_key, increment, *args, **kwargs):
    """Save the raw data to the database."""
    import_file = ImportFile.objects.get(pk=file_pk)
    # Save our "column headers" and sample rows for F/E.
    source_type = get_source_type(import_file)
    for c in chunk:
        raw_bs = BuildingSnapshot()
        raw_bs.import_file = import_file
        raw_bs.extra_data = c
        raw_bs.source_type = source_type

        # We require a save to get our PK
        # We save here to set our initial source PKs.
        raw_bs.save()
        super_org = import_file.import_record.super_organization
        raw_bs.super_organization = super_org

        set_initial_sources(raw_bs)
        raw_bs.save()

    # Indicate progress
    increment_cache(prog_key, increment)

    return True
Пример #2
0
    def test_increment_cache(self):
        """Sum our progress by increments properly."""
        expected = 25.0
        test_key = make_key('increment_test')
        increment = 25.0
        # Fresh increment, this initializes the value.
        increment_cache(test_key, increment)
        self.assertEqual(float(get_cache(test_key)['progress']), expected)

        # Increment an existing key
        increment_cache(test_key, increment)
        expected = 50.0
        self.assertEqual(float(get_cache(test_key)['progress']), expected)

        # This should put us well over 100.0 in incrementation w/o bounds check.
        for i in range(10):
            increment_cache(test_key, increment)

        expected = 100.0
        self.assertEqual(float(get_cache(test_key)['progress']), expected)
Пример #3
0
    def test_increment_cache(self):
        """Sum our progress by increments properly."""
        expected = 25.0
        test_key = make_key('increment_test')
        increment = 25.0
        # Fresh increment, this initializes the value.
        increment_cache(test_key, increment)
        self.assertEqual(float(get_cache(test_key)['progress']), expected)

        # Increment an existing key
        increment_cache(test_key, increment)
        expected = 50.0
        self.assertEqual(float(get_cache(test_key)['progress']), expected)

        # This should put us well over 100.0 in incrementation w/o bounds check.
        for i in range(10):
            increment_cache(test_key, increment)

        expected = 100.0
        self.assertEqual(float(get_cache(test_key)['progress']), expected)
Пример #4
0
def _delete_organization_taxlot_state_chunk(del_ids, prog_key, increment,
                                            org_pk, *args, **kwargs):
    """deletes a list of ``del_ids`` and increments the cache"""
    TaxLotState.objects.filter(organization_id=org_pk, pk__in=del_ids).delete()
    increment_cache(prog_key, increment * 100)
Пример #5
0
def _delete_organization_property_state_chunk(del_ids, prog_key, increment,
                                              org_pk, *args, **kwargs):
    """deletes a list of ``del_ids`` and increments the cache"""
    PropertyState.objects.filter(pk__in=del_ids).delete()
    increment_cache(prog_key, increment * 100)
Пример #6
0
 def fake_func(import_file_pk):
     increment_cache(key, increment)
Пример #7
0
def map_row_chunk(chunk, file_pk, source_type, prog_key, increment, *args, **kwargs):
    """Does the work of matching a mapping to a source type and saving

    :param chunk: list of dict of str. One row's worth of parse data.
    :param file_pk: int, the PK for an ImportFile obj.
    :param source_type: int, represented by either ASSESSED_RAW, or
        PORTFOLIO_RAW.
    :param prog_key: string, key of the progress key
    :param increment: double, value by which to increment progress key
    :param cleaner: (optional), the cleaner class you want to send
    to mapper.map_row. (e.g. turn numbers into floats.).
    :param raw_ids: (optional kwarg), the list of ids in chunk order.

    """

    import_file = ImportFile.objects.get(pk=file_pk)
    save_type = PORTFOLIO_BS
    if source_type == ASSESSED_RAW:
        save_type = ASSESSED_BS

    concats = []

    org = Organization.objects.get(
        pk=import_file.import_record.super_organization.pk
    )

    mapping, concats = get_column_mappings(org)
    map_cleaner = _build_cleaner(org)

    # For those column mapping which are not db columns, we
    # need to let MCM know that we apply our mapping function to those.
    apply_columns = []

    mappable_columns = get_mappable_columns()
    for item in mapping:
        if mapping[item] not in mappable_columns:
            apply_columns.append(item)

    apply_func = apply_data_func(mappable_columns)

    for row in chunk:
        model = mapper.map_row(
            row,
            mapping,
            BuildingSnapshot,
            cleaner=map_cleaner,
            concat=concats,
            apply_columns=apply_columns,
            apply_func=apply_func,
            *args,
            **kwargs
        )

        model.import_file = import_file
        model.source_type = save_type
        model.clean()
        model.super_organization = import_file.import_record.super_organization
        model.save()
    if model:
        # Make sure that we've saved all of the extra_data column names
        save_column_names(model, mapping=mapping)

    increment_cache(prog_key, increment)
Пример #8
0
def _delete_organization_buildings_chunk(del_ids, prog_key, increment,
                                         org_pk, *args, **kwargs):
    """deletes a list of ``del_ids`` and increments the cache"""
    qs = BuildingSnapshot.objects.filter(super_organization=org_pk)
    qs.filter(pk__in=del_ids).delete()
    increment_cache(prog_key, increment * 100)
Пример #9
0
def _match_buildings(file_pk, user_pk):
    """ngram search against all of the canonical_building snapshots for org."""
    #     assert True
    min_threshold = settings.MATCH_MIN_THRESHOLD
    import_file = ImportFile.objects.get(pk=file_pk)
    prog_key = get_prog_key('match_buildings', file_pk)
    org = Organization.objects.filter(users=import_file.import_record.owner)[0]
    test = ''
    unmatched_buildings = find_unmatched_buildings(import_file)

    duplicates = []

    newly_matched_building_pks = []

    # Filter out matches based on ID.
    # if the match is a duplicate of other existing data add it to a list
    # and indicate which existing record it is a duplicate of
    for unmatched in unmatched_buildings:
        try:
            match = handle_id_matches(unmatched, import_file, user_pk)
        except DuplicateDataError as e:
            duplicates.append(unmatched.pk)
            unmatched.duplicate_id = e.id
            unmatched.save()
            continue
        if match:
            newly_matched_building_pks.extend([match.pk, unmatched.pk])

    # Remove any buildings we just did exact ID matches with.
    unmatched_buildings = unmatched_buildings.exclude(
        pk__in=newly_matched_building_pks
    ).values_list(*BS_VALUES_LIST)

    # If we don't find any unmatched buildings, there's nothing left to do.
    if not unmatched_buildings:
        _finish_matching(import_file, prog_key)
        return

    # here we deal with duplicates
    unmatched_buildings = unmatched_buildings.exclude(pk__in=duplicates).values_list(*BS_VALUES_LIST)
    if not unmatched_buildings:
        _finish_matching(import_file, prog_key)
        return
        # here we are going to normalize the addresses to match on address_1 field, this is not ideal because you could match on two locations with same address_1 but different city
    #     unmatched_normalized_addresses=[]


    unmatched_normalized_addresses = [
        _normalize_address_str(unmatched[4]) for unmatched in unmatched_buildings
        ]
    # Here we want all the values not related to the BS id for doing comps.
    # dont do this now
    #     unmatched_ngrams = [
    #         _stringify(list(values)[1:]) for values in unmatched_buildings
    #     ]

    canonical_buildings = find_canonical_building_values(org)
    if not canonical_buildings:
        # There are no canonical_buildings for this organization, all unmatched
        # buildings will then become canonicalized.
        hydrated_unmatched_buildings = BuildingSnapshot.objects.filter(
            pk__in=[item[0] for item in unmatched_buildings]
        )
        num_unmatched = len(unmatched_normalized_addresses) or 1
        increment = 1.0 / num_unmatched * 100
        for (i, unmatched) in enumerate(hydrated_unmatched_buildings):
            initialize_canonical_building(unmatched, user_pk)
            if i % 100 == 0:
                increment_cache(prog_key, increment * 100)

        _finish_matching(import_file, prog_key)
        return

    # This allows us to retrieve the PK for a given NGram after a match.
    can_rev_idx = {
        _normalize_address_str(value[4]): value[0] for value in canonical_buildings
        }
    # (SD) This loads up an ngram object with all the canonical buildings. The values are the lists of identifying data for each building
    # (SD) the stringify is given all but the first item in the values list and it concatenates each item with a space in the middle

    # we no longer need to
    #     n = ngram.NGram(
    #         [_stringify(values[1:]) for values in canonical_buildings]
    #     )
    # here we are going to normalize the addresses to match on address_1 field, this is not ideal because you could match on two locations with same address_1 but different city
    canonical_buildings_addresses = [
        _normalize_address_str(values[4]) for values in canonical_buildings
        ]
    # For progress tracking
    # sd we now use the address
    #    num_unmatched = len(unmatched_ngrams) or 1
    num_unmatched = len(unmatched_normalized_addresses) or 1
    # this code below seemed to be unclear when I was debugging so I added the brackets
    increment = (1.0 / num_unmatched) * 100

    # PKs when we have a match.
    import_file.mapping_completion = 0
    import_file.save()
    # this section spencer changed to make the exact match
    for i, un_m_address in enumerate(unmatched_normalized_addresses):
        results = _find_matches(un_m_address, canonical_buildings_addresses)
        if results:
            handle_results(
                results, i, can_rev_idx, unmatched_buildings, user_pk
            )
        else:
            hydrated_building = BuildingSnapshot.objects.get(
                pk=unmatched_buildings[i][0]
            )
            initialize_canonical_building(hydrated_building, user_pk)

        if i % 100 == 0:
            increment_cache(prog_key, increment * 100)
            import_file.mapping_completion += int(increment * 100)
            import_file.save()

    _finish_matching(import_file, prog_key)

    return {'status': 'success'}
Пример #10
0
 def fake_func(import_file_pk):
     increment_cache(key, increment)
Пример #11
0
def _delete_organization_buildings_chunk(del_ids, prog_key, increment,
                                         org_pk, *args, **kwargs):
    """deletes a list of ``del_ids`` and increments the cache"""
    qs = BuildingSnapshot.objects.filter(super_organization=org_pk)
    qs.filter(pk__in=del_ids).delete()
    increment_cache(prog_key, increment * 100)
Пример #12
0
def _delete_organization_taxlot_state_chunk(del_ids, prog_key, increment, org_pk, *args, **kwargs):
    """deletes a list of ``del_ids`` and increments the cache"""
    qs = TaxLotState.objects.filter(organization_id=org_pk)
    qs.filter(pk__in=del_ids).delete()
    increment_cache(prog_key, increment * 100)