from postcode import query_postcode # print query_postcode(1999) print query_postcode("18 rockleigh way, epping, nsw")
def parse_trafficnsw_twitter_entry(self, raw_data): events = [] for status in raw_data: ''' The following are the keys in TrafficNSW timeline raw data - u'user': user information - u'text': event information - u'created_at': event time ''' str_event = status['text'].encode("utf-8") str_event_time = status['created_at'].encode("utf-8") ''' Typical event is shown as follow Sydney Traffic EMERGENCY ROAD WORKS - TURRAMURRA Pacific Hwy \ at Kissing Point Rd #sydtraffic #trafficnetwork Before '-', it is event type; after that, it is event location. I also need to remove "Sydney Traffic" and "#sydtraffic #trafficnetwork" signs. ''' event_type = str_event[len('Sydney Traffic '):str_event.find(' -')] event_location = str_event[ str_event.find('-')+2 : \ str_event.find(' #')] # ''' # The following part uses geopy to process location into geocode. # ''' # geo = geocoders.GoogleV3() # try: # event_geo = geo.geocode(event_location) # except: # event_geo = None # if event_geo != None: # # print "%s: %.5f, %.5f" % (event_geo[0], event_geo[1][0], event_geo[1][1]) # # print event_geo # event_street = event_geo[0].encode("utf-8").split(',')[0] # event_suburb = event_geo[0].encode("utf-8").split(',')[1].split()[:-2] # event_postcode = event_geo[0].encode("utf-8").split(',')[1].split()[-1] # event_cord = event_geo[1] # else: # event_suburb = "" # for i in event_location.split(): # if i==i.upper(): # event_suburb += i + " " # event_suburb.strip() # Remove the last ' ' # event_street = event_location.lstrip(event_suburb).strip() # event_postcode = None # event_cord = None gcode = query_postcode(event_location) ''' Data is shown as Tue Apr 09 01:50:04 +0000 2013 Please note it is UTC time. So, we need to convert it into local (Sydney) time. Because striptime has a bug in parse '%z', I have to remove +0000 \ from the string ''' str_event_time = str_event_time.replace("+0000 ", "") utc_time = datetime.datetime.strptime(str_event_time, \ '%a %b %d %H:%M:%S %Y') event_time = utc_time + datetime.timedelta(hours=10) event = {} event['time'] = event_time event['type'] = event_type event['suburb'] = gcode['suburb'] event['street'] = gcode['street'] event['postcode'] = gcode['postcode'] event['coordinate'] = gcode['cord'] events.append(event) return events