Exemplo n.º 1
0
def process_chp_center(chpCenter):
	"""Process a CHP Center.

	"""
	incident_list = []
	psh_pings = []
	dash_re = re.compile(r'\s*-\s*')

	center_id = chpCenter.attrib['ID']

	for chpDispatch in chpCenter:
		dispatch_id = chpDispatch.attrib['ID']

		# For some reason, sometimes the Dispatch ID is blank so skip these.
		if dispatch_id == "":
			continue

		for chpLog in chpDispatch:
			log_id = chpLog.attrib['ID']

			# There are two different time formats in the CHP feed.  Try the
			# "standard" format first, then fall back to the new SAHB format.
			try:
				log_time = datetime.strptime(chpLog.find('LogTime').text, '"%m/%d/%Y %I:%M:%S %p"')
			except ValueError:
				log_time = datetime.strptime(chpLog.find('LogTime').text, '"%b %d %Y %I:%M%p"')

			# Duct tape to get the timezone right
			pacific_tz = tzinfo.Pacific()
			log_time = log_time - pacific_tz.utcoffset(log_time)

			incident_key_name = "%s.%s.%s" % (center_id, dispatch_id, log_id)
			incident_key = ndb.Key(CHPIncident, incident_key_name)

			incident = incident_key.get()
			if incident is None:
				incident = CHPIncident(key = incident_key,
					CenterID = center_id,
					DispatchID = dispatch_id,
					LogID = log_id,
					LogTime = log_time,
					Area = chpLog.find('Area').text.strip('"'))

			#
			# THe following are attributes that can change between updates so reset
			# them from the data given.
			#

			# LogType and LogTypeID
			(logtypeid, dash, logtype) = chpLog.find('LogType').text.strip('"').partition("-")
			if dash == '':
				# If 'dash' is missing then the hyphen was missing, if so we
				# use the whole thing as the LogType and forget the LogTypeID
				incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtypeid.strip()))
			else:
				incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtype.strip()))
				incident.LogTypeID = logtypeid.strip()

			# Location
			incident.Location = deCopIfy(chpLog.find('Location').text.strip('"'))

			# LocationDesc
			# Make sure the location and the locationDesc aren't dupes
			locationDesc = deCopIfy(chpLog.find('LocationDesc').text.strip('"'))
			if (incident.Location.upper() != locationDesc.upper()):
				incident.LocationDesc = locationDesc

			# geolocation
			latlon = chpLog.find('LATLON').text.strip('"')
			if latlon is not None and latlon != "0:0":
				incident.geolocation = ndb.GeoPt(
					lat = float(latlon.partition(":")[0]) / 1000000,
					lon = float(latlon.partition(":")[2]) / 1000000 * -1
				)

			# LogDetails
			LogDetails = {
				'details': []
			}
			logdetails_element = chpLog.find('LogDetails')
			for element in logdetails_element:
				try:
					detail_dict = {
						'DetailTime': element.find('DetailTime').text.strip('"'),
						'IncidentDetail': deCopIfy(element.find('IncidentDetail').text.strip('"^')).capitalize()
					}
					LogDetails[element.tag].append(detail_dict)
				except AttributeError:
					pass
			incident.LogDetails = LogDetails

			# Set up the PSH pings.  Note, we are NOT checking for actual
			# changes in the data, we are just assuming that the existance of
			# an incident in the CHP feed declares it as "updated" so we ping.
			psh_pings.append('http://www.sactraffic.org/atom?center=%s' % urllib.quote(incident.CenterID))
			psh_pings.append('http://www.sactraffic.org/atom?dispatch=%s' % urllib.quote(incident.DispatchID))
			if incident.Area is not None and incident.Area != "":
				psh_pings.append('http://www.sactraffic.org/atom?area=%s' % urllib.quote(incident.Area))
			if incident.city is not None and incident.city != "":
				psh_pings.append('http://www.sactraffic.org/atom?city=%s' % urllib.quote(incident.city.encode('utf8')))

			# Save this incident
			incident_list.append(incident)

	# Store the incidents in a batch
	ndb.put_multi(incident_list)

	# Ping the PSH hub, use a set so we don't ping duplicates.
	ping_set = set(psh_pings)
	if not debug:
		deferred.defer(pubsubhubbub_publish.publish, 'http://pubsubhubbub.appspot.com', ping_set, _queue="pshPingQueue")
	else:
		logging.info("Skipping PSH pings for %s on the development server. %s" % (incident.CenterID, ping_set))

	if center_id == 'SAHB':
		# Limit reverse geocoding to only the SAHB center to limit the number of
		# requests to the geocoder, we are _Sac_ Traffic after all.

		# Reverse geocode the incidents if we haven't already
		for incident in incident_list:
			if incident.city is None and incident.geolocation is not None:
				if not debug:
					deferred.defer(reverse_geocode.load_city, incident, _queue="reverseGeocodeQueue")
				else:
					logging.info("Skipping reverse geocode for %s on the development server." % (incident.Area))

	logging.info("Processed %d incidents in %s." % (len(incident_list), chpCenter.attrib['ID']))
Exemplo n.º 2
0
def process_chp_center(chpCenter):
	"""Process a CHP Center.

	"""
	incident_list = []
	psh_pings = []
	dash_re = re.compile(r'\s*-\s*')

	for chpDispatch in chpCenter:
		# For some reason, sometimes the Dispatch ID is blank
		# so skip these.
		if chpDispatch.attrib['ID'] == "":
			continue

		for chpLog in chpDispatch:
			# There are two different time formats in the CHP feed.  Try the
			# "standard" format first, then fall back to the new SAHB format.
			try:
				log_time = datetime.strptime(chpLog.find('LogTime').text, '"%m/%d/%Y %I:%M:%S %p"').replace(tzinfo=tzinfo.Pacific())
			except ValueError:
				log_time = datetime.strptime(chpLog.find('LogTime').text, '"%b %d %Y %I:%M%p"').replace(tzinfo=tzinfo.Pacific())

			key_name = "%s.%s.%s.%d" % (chpCenter.attrib['ID'], chpDispatch.attrib['ID'], chpLog.attrib['ID'], time.mktime(log_time.timetuple()))
			incident = CHPIncident.get_by_key_name(key_name)
			if incident is None:
				incident = CHPIncident(key_name = key_name,
					CenterID = chpCenter.attrib['ID'],
					DispatchID = chpDispatch.attrib['ID'],
					LogID = chpLog.attrib['ID'])

			incident.LogTime = log_time

			(logtypeid, dash, logtype) = chpLog.find('LogType').text.strip('"').partition("-")
			if dash == '':
				# If 'dash' is missing then the hyphen was missing, if so we
				# use the whole thing as the LogType and forget the LogTypeID
				incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtypeid.strip()))
			else:
				incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtype.strip()))
				incident.LogTypeID = logtypeid.strip()

			incident.Location = deCopIfy(chpLog.find('Location').text.strip('"'))
			incident.Area = chpLog.find('Area').text.strip('"')
			incident.ThomasBrothers = chpLog.find('ThomasBrothers').text.strip('"')

			# Like the LogTime above, there are now two location formats.
			# This time we try the SAHB LATLON first, then fall back to TBXY.
			try:
				latlon = chpLog.find('LATLON').text.strip('"')
				if latlon != "0:0":
					incident.geolocation = db.GeoPt(
						lat = float(latlon.partition(":")[0]) / 1000000,
						lon = float(latlon.partition(":")[2]) / 1000000 * -1
					)
			except AttributeError:
				incident.TBXY = chpLog.find('TBXY').text.strip('"')
				incident.geolocation = geoConvertTBXY(incident.CenterID, incident.TBXY)

			# Special handling for the LogDetails
			LogDetails = {
				'details': [],
				'units': []
			}
			logdetails_element = chpLog.find('LogDetails')
			for element in logdetails_element:
				try:
					detail_dict = {
						'DetailTime': element.find('DetailTime').text.strip('"'),
						'IncidentDetail': deCopIfy(element.find('IncidentDetail').text.strip('"^')).capitalize()
					}
					LogDetails[element.tag].append(detail_dict)
				except AttributeError:
					pass

			incident.LogDetails = pickle.dumps(LogDetails)

			# Set up the PSH pings.  Note, we are NOT checking for actual
			# changes in the data, we are just assuming that the existance of
			# an incident in the CHP feed declares it as "updated" so we ping.
			psh_pings.append('http://www.sactraffic.org/atom?center=%s' % urllib.quote(incident.CenterID))
			psh_pings.append('http://www.sactraffic.org/atom?dispatch=%s' % urllib.quote(incident.DispatchID))
			psh_pings.append('http://www.sactraffic.org/atom?area=%s' % urllib.quote(incident.Area))
			if incident.city is not None and incident.city != "":
				psh_pings.append('http://www.sactraffic.org/atom?city=%s' % urllib.quote(incident.city))

			# Save this incident
			incident_list.append(incident)

	# Store the incidents in a batch
	db.put(incident_list)

	# Ping the PSH hub, use a set so we don't ping duplicates.
	ping_set = set(psh_pings)
	if not debug:
		deferred.defer(pubsubhubbub_publish.publish, 'http://pubsubhubbub.appspot.com', ping_set, _queue="pshPingQueue")
	else:
		logging.info("Skipping PSH pings for %s on the development server. %s" % (incident.CenterID, ping_set))

	# Reverse geocode the incidents if we haven't already
	for incident in incident_list:
		if incident.city is None and incident.geolocation is not None:
			deferred.defer(reverse_geocode.load_city, incident, _queue="reverseGeocodeQueue")

	logging.info("Processed %d incidents in %s." % (len(incident_list), chpCenter.attrib['ID']))