def export_buildings(): i = 0 blist = [] bitems = buildings.items() log("processing %s buildings"%(len(bitems),), 1) for b_id, bdata in bitems: addr = bdata["address"] building = Building.query(Building.address == addr).get() owner = Owner.query(Owner.name == bdata["owner"]).get() byear = bdata["year"] btype = bdata["building_type"] if not building: log("Can't find building '%s' -- creating new entry"%(addr,), 2) building = Building(address=addr) if owner: building.owner = owner.key if byear: building.year = byear if btype: building.building_type = btype if b_id: building.building_id = b_id if not building.zipcode: zc = _zip(bdata["zipcode"], addr) if zc: building.zipcode = zc if not building.latitude or not building.longitude: building.latitude, building.longitude = address2latlng(building.address) blist.append(building) i += 1 if not i % 100: log("processed %s buildings"%(i,), 2) log("saving buildings", 1) db.put_multi(blist)
def full_scan(): set_log(os.path.join("logs", "txt", "buildings.txt")) log("Scanning BlockLot_with_LatLon.csv", important=True) csv = getcsv(os.path.join("scrapers", "data", "BlockLot_with_LatLon.csv")) winner = None for row in csv: addr = ("%s %s %s"%(row[21], row[19], row[18])).strip() if not addr: continue if addr not in buildings: buildings[addr] = 0 building = Building.query(Building.address == addr).get() if not building: log("Can't find '%s' -- creating new entry"%(addr,), 2) building = Building(address=addr) # TODO: zipcode, year, building_id, owner btype = row[7].strip() lat = row[11].strip() lng = row[10].strip() if btype: building.building_type = btype if lat: building.latitude = float(lat) if lng: building.longitude = float(lng) blds.append(building) buildings[addr] += 1 if not winner or buildings[addr] > buildings[winner]: winner = addr log("winner: %s (%s). scanned lines: %s"%(winner, buildings[winner], len(csv)), 1) log("writing bcounts", 1) writejson(buildings, os.path.join("logs", "json", "bcounts")) log("saving %s buildings to db"%(len(blds),), 1) db.put_multi(blds) log("goodbye") close_log()