def full_scan():
	set_log(os.path.join("logs", "txt", "rent_control.txt"))
	log("Scanning rent_control.geojson", important=True)
	rcd = read(os.path.join("scrapers", "data",
		"rent_control.geojson"), isjson=True)["features"]
	rcp = [rc["properties"] for rc in rcd if rc["properties"]["address"]]
	log("using %s of %s rows (omitting blank entries)"%(len(rcp), len(rcd)), 1)

	blds = []
	bset = set()
	for d in rcp:
		addr = d["address"]
		if addr not in bset:
			building = Building.query(Building.address == addr).get()
			if not building:
				log("Can't find '%s' -- creating new entry"%(addr,), 2)
				building = Building(address=addr)
			building.rent_control = True
			blds.append(building)
			bset.add(addr)

	log("saving %s rent-control buildings to db"%(len(blds),), 1)
	db.put_multi(blds)
	log("goodbye")
	close_log()
Пример #2
0
def export_buildings():
	i = 0
	blist = []
	bitems = buildings.items()
	log("processing %s buildings"%(len(bitems),), 1)
	for b_id, bdata in bitems:
		addr = bdata["address"]
		building = Building.query(Building.address == addr).get()
		owner = Owner.query(Owner.name == bdata["owner"]).get()
		byear = bdata["year"]
		btype = bdata["building_type"]
		if not building:
			log("Can't find building '%s' -- creating new entry"%(addr,), 2)
			building = Building(address=addr)
		if owner:
			building.owner = owner.key
		if byear:
			building.year = byear
		if btype:
			building.building_type = btype
		if b_id:
			building.building_id = b_id
		if not building.zipcode:
			zc = _zip(bdata["zipcode"], addr)
			if zc:
				building.zipcode = zc
		if not building.latitude or not building.longitude:
			building.latitude, building.longitude = address2latlng(building.address)
		blist.append(building)
		i += 1
		if not i % 100:
			log("processed %s buildings"%(i,), 2)
	log("saving buildings", 1)
	db.put_multi(blist)
Пример #3
0
def export_parcels():
	i = 0
	plist = []
	pitems = parcels.items()
	log("processing %s parcels"%(len(pitems),), 1)
	for p_id, pdata in pitems:
		if not Parcel.query(Parcel.parcel_id == p_id).get():
			log("Can't find parcel '%s' -- creating new entry"%(p_id,), 2)
			building = Building.query(Building.building_id == pdata["building"]).get()
			plist.append(Parcel(
				parcel_id=p_id,
				dwelling_units=pdata["dwelling_units"],
				from_addr=pdata["from_addr"],
				to_addr=pdata["to_addr"],
				building=building and building.key or None
			))
		i += 1
		if not i % 100:
			log("processed %s parcels"%(i,), 2)
	log("saving parcels", 1)
	db.put_multi(plist)
def full_scan():
	set_log(os.path.join("logs", "txt", "buildings.txt"))
	log("Scanning BlockLot_with_LatLon.csv", important=True)
	csv = getcsv(os.path.join("scrapers", "data", "BlockLot_with_LatLon.csv"))
	winner = None
	for row in csv:
		addr = ("%s %s %s"%(row[21], row[19], row[18])).strip()
		if not addr:
			continue
		if addr not in buildings:
			buildings[addr] = 0
			building = Building.query(Building.address == addr).get()
			if not building:
				log("Can't find '%s' -- creating new entry"%(addr,), 2)
				building = Building(address=addr)
			# TODO: zipcode, year, building_id, owner
			btype = row[7].strip()
			lat = row[11].strip()
			lng = row[10].strip()
			if btype:
				building.building_type = btype
			if lat:
				building.latitude = float(lat)
			if lng:
				building.longitude = float(lng)
			blds.append(building)
		buildings[addr] += 1
		if not winner or buildings[addr] > buildings[winner]:
			winner = addr
	log("winner: %s (%s). scanned lines: %s"%(winner, buildings[winner], len(csv)), 1)
	log("writing bcounts", 1)
	writejson(buildings, os.path.join("logs", "json", "bcounts"))
	log("saving %s buildings to db"%(len(blds),), 1)
	db.put_multi(blds)
	log("goodbye")
	close_log()
Пример #5
0
def scanPage(page):
	page = fixPage(page)
	dates, page = getDates(page)
	years = [d.year for d in dates]
	# throw away times
	times, page = page.split("\n\n", 1)
	zips, addrs, page = getAddrs(page)
	batts, alarms, page = getBattsAlarms(page)
	units, people, page = getDisplaced(page, len(zips))
	if pnum == 13: # wow...
		alarms = units
		units = people
		people = [int(l) for l in page.split("\n\n")[0].split("\n")[1:]]
	# TODO: correlate more stuff
	# - injuries/deaths

	log("zips %s addrs %s dates %s batts %s alarms %s units %s people %s"%(len(zips),
		len(addrs), len(dates), len(batts), len(alarms), len(units), len(people)))
	if STEST:
		return {
			"zips": zips,
			"addrs": addrs,
			"dates": dates,
			"batts": batts,
			"alarms": alarms,
			"units": units,
			"people": people
		}

	for n in range(len(zips)):
		if years[n] not in obj:
			obj[years[n]] = {"total": {
				"fires": 0,
				"units": 0,
				"people": 0
			}}
		if zips[n] not in obj[years[n]]:
			obj[years[n]][zips[n]] = {
				"fires": 0,
				"units": 0,
				"people": 0
			}
		obj[years[n]]["total"]["fires"] += 1
		obj[years[n]]["total"]["units"] += units[n]
		obj[years[n]]["total"]["people"] += people[n]
		obj[years[n]][zips[n]]["fires"] += 1
		obj[years[n]][zips[n]]["units"] += units[n]
		obj[years[n]][zips[n]]["people"] += people[n]

		# TODO: injuries, fatalities, losses
		building = Building.query(Building.address == addrs[n]).get()
		if not building:
			log("no building (%s) -- creating new one!"%(addrs[n],), important=True)
			building = Building(address=addrs[n], zipcode=getzip(zips[n] or addr2zip(addrs[n])).key)
			building.latitude, building.longitude = address2latlng(addrs[n])
			building.put()
		fires.append(Fire(
			building=building.key,
			date=dates[n],
			battalion=batts[n],
			alarms=alarms[n],
			units=units[n],
			persons=people[n]
		))