def remove_rows(table,should_remove): all_rows = asw.get_entities(table) no_removed = 0 for row in all_rows: if should_remove(row): asw.delete_entity(table,row['PartitionKey'],row['RowKey']) no_removed += 1 return no_removed
def generate_hourly_prediction(until_time): weather_start = datetime.now() for lat, lon in gps.values(): last_uploaded = AQR.to_timestamp( *AQR.get_latest_timestamp(str(lat) + str(lon))) if last_uploaded < weather_start: weather_start = last_uploaded # print last_uploaded,weather_start weather_end = AQR.to_timestamp(*AQR.get_latest_timestamp("clweather")) assert (weather_start < weather_end) weather_raw = AQR.get_weather(weather_start, weather_end) weather = dict() for row in weather_raw: timestamp = AQR.from_search_timestamp(row["SearchTimestamp"]) weather[timestamp] = row # print timestamp print "Predicting up to: " + weather_end.__str__() # print weather_end,weather_start for loc, (lat, lon) in gps.items(): show_banner(loc) last_uploaded = AQR.to_timestamp( *AQR.get_latest_timestamp(str(lat) + str(lon))) print "Last time we have data: " + last_uploaded.__str__() if last_uploaded >= weather_end: continue last_row = asw.get_entities( AQR.pollution_table, "SearchTimestamp eq '" + AQR.to_search_timestamp(last_uploaded) + "' and Latitude eq " + str(lat)) for row in last_row: last_row = row break else: print "Empty!" + str(lat) + last_uploaded.__str__() return current_row = AQR.std_time(last_uploaded) + [lat, lon] + [ last_row["PM10"], last_row["PM25"], last_row["NOx"] ] while AQR.to_timestamp(*current_row[:3]) < weather_end: current_row = get_next_hour_row(current_row, weather) # print current_row AQR.upload_pollution(current_row[:-3] + [current_row[-1]] + current_row[-3:-1], table="prediction", force=True) return weather_end
def get_average(): start_year = str(date.today().year - AVG_HISTORY) raw_data = asw.get_entities(AQR.pollution_table, filter="Year ge '" + start_year + "'") result = dict() ## Data is a dictionary of dictionary of dictionaries of dictionaries ## it is first keyed by location then by date then by hour then by pollutant data = dict() for row in raw_data: time_key = (row["Year"], row["Days"]) loc_key = (row["Latitude"], row["Longitude"]) if not loc_key in data: data[loc_key] = dict() if not time_key in data[loc_key]: data[loc_key][time_key] = dict() data[loc_key][time_key][int( row['Minutes'])] = ({k: row[k] for k in AQR.pollutants}) for loc_key in data.keys(): if not loc_key in result: result[loc_key] = dict() for k in AQR.pollutants: result[loc_key][k] = { k * 60: AverageGenerator() for k in range(24) } for day in data[loc_key].values(): daily_average = {k: AverageGenerator() for k in AQR.pollutants} for hour in day.values(): merge(daily_average, hour) daily_average = {k: v.get() for k, v in daily_average.items()} day_row = {k: dict() for k in AQR.pollutants} for hour, values in day.items(): for k, v in values.items(): day_row[k][hour] = div_with_null(v, daily_average[k]) # print day_row for k in result[loc_key]: merge(result[loc_key][k], day_row[k]) for lk in result: for k in result[lk]: for h in result[lk][k]: result[lk][k][h] = result[lk][k][h].get() return result
def get_day_avgs_weather(weather, time, heading): entities = asw.get_entities(AQR.forecast_table, filter="Year eq '" + str(time.year) + "' and Days eq '" + AQR.std_time(time)[1] + "'") result = {k: avg.AverageGenerator() for k in heading} for entity in entities: for k in heading: ## ugly hack if k == "WindDir": entity[k] = to_number(entity[k]) assert (k in entity) result[k].add(entity[k]) for h in result: result[h] = result[h].get() return result
def get_day_avgs_pollution(time, heading): entities = asw.get_entities(AQR.pollution_table, filter="Year eq '" + str(time.year) + "' and Days eq '" + AQR.std_time(time)[1] + "'") result = dict() for entity in entities: loc_key = (entity["Latitude"], entity["Longitude"]) if not loc_key in result: result[loc_key] = {k: avg.AverageGenerator() for k in heading} for k in heading: assert (k in entity) result[loc_key][k].add(entity[k]) for loc in result: for h in result[loc]: result[loc][h] = result[loc][h].get() return result
import azure_service_wrapper as asw ## A simple script for verifying the rows in the table during development # for row in asw.get_entities("pollution",filter="RowKey eq '2017,62,0,52204644'"): # print [row['SearchTimestamp'],row['NOx'],row['PM10'],row['PM25'],row['Latitude']] for row in asw.get_entities("pollution", filter="Year eq '2017' and Days eq '57'"): print[row['SearchTimestamp'], row['NOx'], row['PM10'], row['PM25'], row['Latitude']]
def get_weather(start,end): start = to_search_timestamp(start) end = to_search_timestamp(end) filter_str = "SearchTimestamp ge '" + start + "' and SearchTimestamp le '" + end + "'" return asw.get_entities(weather_table,filter = filter_str)