def full_scan(): set_log(os.path.join("logs", "txt", "rent_control.txt")) log("Scanning rent_control.geojson", important=True) rcd = read(os.path.join("scrapers", "data", "rent_control.geojson"), isjson=True)["features"] rcp = [rc["properties"] for rc in rcd if rc["properties"]["address"]] log("using %s of %s rows (omitting blank entries)"%(len(rcp), len(rcd)), 1) blds = [] bset = set() for d in rcp: addr = d["address"] if addr not in bset: building = Building.query(Building.address == addr).get() if not building: log("Can't find '%s' -- creating new entry"%(addr,), 2) building = Building(address=addr) building.rent_control = True blds.append(building) bset.add(addr) log("saving %s rent-control buildings to db"%(len(blds),), 1) db.put_multi(blds) log("goodbye") close_log()
def export_buildings(): i = 0 blist = [] bitems = buildings.items() log("processing %s buildings"%(len(bitems),), 1) for b_id, bdata in bitems: addr = bdata["address"] building = Building.query(Building.address == addr).get() owner = Owner.query(Owner.name == bdata["owner"]).get() byear = bdata["year"] btype = bdata["building_type"] if not building: log("Can't find building '%s' -- creating new entry"%(addr,), 2) building = Building(address=addr) if owner: building.owner = owner.key if byear: building.year = byear if btype: building.building_type = btype if b_id: building.building_id = b_id if not building.zipcode: zc = _zip(bdata["zipcode"], addr) if zc: building.zipcode = zc if not building.latitude or not building.longitude: building.latitude, building.longitude = address2latlng(building.address) blist.append(building) i += 1 if not i % 100: log("processed %s buildings"%(i,), 2) log("saving buildings", 1) db.put_multi(blist)
def export_parcels(): i = 0 plist = [] pitems = parcels.items() log("processing %s parcels"%(len(pitems),), 1) for p_id, pdata in pitems: if not Parcel.query(Parcel.parcel_id == p_id).get(): log("Can't find parcel '%s' -- creating new entry"%(p_id,), 2) building = Building.query(Building.building_id == pdata["building"]).get() plist.append(Parcel( parcel_id=p_id, dwelling_units=pdata["dwelling_units"], from_addr=pdata["from_addr"], to_addr=pdata["to_addr"], building=building and building.key or None )) i += 1 if not i % 100: log("processed %s parcels"%(i,), 2) log("saving parcels", 1) db.put_multi(plist)
def full_scan(): set_log(os.path.join("logs", "txt", "buildings.txt")) log("Scanning BlockLot_with_LatLon.csv", important=True) csv = getcsv(os.path.join("scrapers", "data", "BlockLot_with_LatLon.csv")) winner = None for row in csv: addr = ("%s %s %s"%(row[21], row[19], row[18])).strip() if not addr: continue if addr not in buildings: buildings[addr] = 0 building = Building.query(Building.address == addr).get() if not building: log("Can't find '%s' -- creating new entry"%(addr,), 2) building = Building(address=addr) # TODO: zipcode, year, building_id, owner btype = row[7].strip() lat = row[11].strip() lng = row[10].strip() if btype: building.building_type = btype if lat: building.latitude = float(lat) if lng: building.longitude = float(lng) blds.append(building) buildings[addr] += 1 if not winner or buildings[addr] > buildings[winner]: winner = addr log("winner: %s (%s). scanned lines: %s"%(winner, buildings[winner], len(csv)), 1) log("writing bcounts", 1) writejson(buildings, os.path.join("logs", "json", "bcounts")) log("saving %s buildings to db"%(len(blds),), 1) db.put_multi(blds) log("goodbye") close_log()
def scanPage(page): page = fixPage(page) dates, page = getDates(page) years = [d.year for d in dates] # throw away times times, page = page.split("\n\n", 1) zips, addrs, page = getAddrs(page) batts, alarms, page = getBattsAlarms(page) units, people, page = getDisplaced(page, len(zips)) if pnum == 13: # wow... alarms = units units = people people = [int(l) for l in page.split("\n\n")[0].split("\n")[1:]] # TODO: correlate more stuff # - injuries/deaths log("zips %s addrs %s dates %s batts %s alarms %s units %s people %s"%(len(zips), len(addrs), len(dates), len(batts), len(alarms), len(units), len(people))) if STEST: return { "zips": zips, "addrs": addrs, "dates": dates, "batts": batts, "alarms": alarms, "units": units, "people": people } for n in range(len(zips)): if years[n] not in obj: obj[years[n]] = {"total": { "fires": 0, "units": 0, "people": 0 }} if zips[n] not in obj[years[n]]: obj[years[n]][zips[n]] = { "fires": 0, "units": 0, "people": 0 } obj[years[n]]["total"]["fires"] += 1 obj[years[n]]["total"]["units"] += units[n] obj[years[n]]["total"]["people"] += people[n] obj[years[n]][zips[n]]["fires"] += 1 obj[years[n]][zips[n]]["units"] += units[n] obj[years[n]][zips[n]]["people"] += people[n] # TODO: injuries, fatalities, losses building = Building.query(Building.address == addrs[n]).get() if not building: log("no building (%s) -- creating new one!"%(addrs[n],), important=True) building = Building(address=addrs[n], zipcode=getzip(zips[n] or addr2zip(addrs[n])).key) building.latitude, building.longitude = address2latlng(addrs[n]) building.put() fires.append(Fire( building=building.key, date=dates[n], battalion=batts[n], alarms=alarms[n], units=units[n], persons=people[n] ))