def conflate(cdau_address, cat_address, cod_mun_cat): """Conflate CDAU over Cadastre addresses datasets""" cod_mun = cod_mun_cat2ine(cod_mun_cat) q = "ine_mun='{}' and (tipo_portal_pk='{}' or tipo_portal_pk='{}')" exp = q.format(cod_mun, 'PORTAL', 'ACCESORIO') c = 0 addresses = defaultdict(list) index = cat_address.get_index() to_add = [] to_change = {} to_change_g = {} for feat in cat_address.getFeatures(): g = feat['localId'].split('.') ref = '.'.join(g[:3] + g[4:]) addresses[ref].append(feat) for ad in cdau_address.search(exp): c += 1 attr = get_cat_address(ad, cod_mun_cat) ref = attr['localId'] pt = layer.Point(float(ad['x']), float(ad['y'])) if len(addresses[ref]) == 0: # can't resolve cadastral reference area_of_candidates = layer.Point(pt).boundingBox(cdau_thr) fids = index.intersects(area_of_candidates) if len(fids) == 0: # no close cadastre address feat = QgsFeature(cat_address.fields()) for key, value in list(attr.items()): feat[key] = value feat.setGeometry(layer.Geometry.fromPointXY(pt)) to_add.append(feat) # add new else: # get nearest min_dist = 100 candidate = None for feat in addresses[ref]: dist = feat.geometry().asPoint().sqrDist(pt) if dist < min_dist: min_dist = dist candidate = feat if candidate is not None: # update existing to_change_g[candidate.id()] = layer.Geometry.fromPointXY(pt) for key, value in list(attr.items()): candidate[key] = value to_change[candidate.id()] = layer.get_attributes(candidate) log.info(_("Parsed %d addresses from '%s'"), c, 'CDAU') report.inp_address_cdau = c if to_change: cat_address.writer.changeAttributeValues(to_change) cat_address.writer.changeGeometryValues(to_change_g) log.info(_("Replaced %d addresses from '%s'"), len(to_change), 'CDAU') report.rep_address_cdau = len(to_change) cat_address.source_date = cdau_address.source_date report.address_date = cdau_address.source_date if to_add: cat_address.writer.addFeatures(to_add) log.info(_("Added %d addresses from '%s'"), len(to_add), 'CDAU') report.add_address_cdau = len(to_add) report.inp_address += len(to_add) report.inp_address_entrance += len(to_add) cat_address.source_date = cdau_address.source_date report.address_date = cdau_address.source_date
def _test_duplicates_mem(self): dup_thr = 0.012 duplicates = defaultdict(list) index = self.obj.get_index() vertices_by_fid = {feat.id(): feat for feat in self.obj.getFeatures()} for vertex in self.obj.getFeatures(): point = layer.Point(vertex.geometry().asPoint()) area_of_candidates = point.boundingBox(dup_thr) fids = index.intersects(area_of_candidates) fids.remove(vertex.id()) for fid in fids: dup = vertices_by_fid[fid].geometry().asPoint() dist = point.sqrDist(dup) if dist < dup_thr**2: duplicates[point].append(dup) print("duplicados %d" % len(duplicates))
def _test_duplicates_shp1(self): """3x slower than test_duplicates_mem, 3x less memory""" dup_thr = 0.012 duplicates = defaultdict(list) index = self.obj_shp.get_index() request = QgsFeatureRequest() for vertex in self.obj_shp.getFeatures(): point = layer.Point(vertex.geometry().asPoint()) area_of_candidates = point.boundingBox(dup_thr) fids = index.intersects(area_of_candidates) fids.remove(vertex.id()) if fids: request.setFilterFids(fids) for v in self.obj_shp.getFeatures(request): dup = v.geometry().asPoint() dist = point.sqrDist(dup) if dist < dup_thr**2: duplicates[point].append(dup) print("duplicados %d" % len(duplicates))