def processAtcData(year, inputRectangleFile, inputAtcDirectory, outputFile, printPrefixString=""): atcData = {} rectangles = [] # load rectangle data loadRectangles(rectangles, inputRectangleFile, printPrefixString) for rectangle in rectangles: stationName = str(rectangle.name.lower()) atcFile1 = inputAtcDirectory + stationName + "1.csv" atcFile2 = inputAtcDirectory + stationName + "2.csv" if os.path.isfile(atcFile1): processAtcFile(year, atcData, str(rectangle.ID), atcFile1, printPrefixString) if os.path.isfile(atcFile2): processAtcFile(year, atcData, str(rectangle.ID), atcFile2, printPrefixString) output = open(outputFile, 'w') output.write("location,timestamp,atc\n") for stationId in atcData: for timestampKey in atcData[stationId]: output.write(stationId + "," + timestampKey + "," + str(atcData[stationId][timestampKey]) + "\n") output.close()
def processAirQualityFiles(pollutant, years, rectangleFile, inputDirectory, printPrefixString=""): data = {} rectangles = [] loadRectangles(rectangles, rectangleFile, printPrefixString) stationRectanlgeId = {} stations = [] for rectangle in rectangles: stationRectanlgeId[rectangle.name] = str(rectangle.ID) stations.append(rectangle.name) for station in stations: data[stationRectanlgeId[station]] = {} for year in years: for station in stations: fileName = inputDirectory + str(station) + "_" + str(year) + ".csv" print(printPrefixString + "Load data from " + fileName + "...") loadFile(pollutant, fileName, data[stationRectanlgeId[station]]) return data
def getRectangleOSMPolygons(inputPolygonFile, inputRectangleFile, categoryName, generateCount, outputFile, outputStationTraingleGisFile, printPrefixString): polygons = loadPolygons(inputPolygonFile, categoryName, printPrefixString) # load rectangles rectangles = [] # load rectangle data loadRectangles(rectangles, inputRectangleFile, printPrefixString) # calculating stations polygons matchingStationPolygons(rectangles, polygons, printPrefixString) # # write out all station related polygons GIS info # saveStationsPolygonsGis(rectangles, outputStationPolyGisFile, printPrefixString) # generate station polygons triangles createTriangleAndSaveFiles(rectangles, categoryName, generateCount, outputFile, 100, outputStationTraingleGisFile, printPrefixString)
def processAurnFiles(years, rectangleFile, inputDirectory, printPrefixString=""): data = {} rectangles = [] loadRectangles(rectangles, rectangleFile, printPrefixString) for rectangle in rectangles: data[rectangle.ID] = {} for year in years: for rectangle in rectangles: fileName = inputDirectory + str( rectangle.ID) + "_" + str(year) + ".csv" print(printPrefixString + "Load data from " + fileName + "...") loadAurnFile(fileName, data[rectangle.ID]) return data
else: dataToPlot.append(0) index = np.arange(len(names)) bar_width = 0.8 fig = plt.figure(None, figsize=(10, 10)) ax = fig.add_subplot(111) ax.bar(index, dataToPlot, bar_width, color='b', edgecolor='none') plt.xlabel("No2 concentration level (ug/m3)") plt.ylabel("#observation") plt.title(title) plt.margins(0.04, 0.04) plt.savefig(fileName) doGraph(overall, "Histogram of No2 distribution in York (2013)", OUTPUT_DIRECTORY + "histogram2_overall.png") rectangles = [] loadRectangles(rectangles, rectangleFile, "\t") for rectangle in rectangles: doGraph(stationHistogram[rectangle.ID], "Histogram of No2 distribution at " + rectangle.name + " (2013)", OUTPUT_DIRECTORY + "histogram_" + rectangle.name.lower() + ".png")
def generateRectangleBuildings(inputBuildingFile, inputRectangleFile, outputGisFile, outputGISTriangleFile, outputFile, detailLevel, printPrefixString=""): # load buildings buildings = [] loadBuildings(inputBuildingFile, buildings, printPrefixString) # generate index print(printPrefixString + "Generating indicies for buildings...") indexedBuildings = generateIndex(buildings) print(printPrefixString + "Done...") # load rectangles rectangles = [] # load rectangle data loadRectangles(rectangles, inputRectangleFile, printPrefixString) print(printPrefixString + "Matching station rectangles with buildings...") # find out building rectangles for rectangle in rectangles: print(printPrefixString + "\tStation " + str(rectangle.ID)) rectangleBuilding = [] cornerSW = rectangle.cornerSW.toWGS84Coordinate() cornerNE = rectangle.cornerNE.toWGS84Coordinate() rectangleBuildingList = list( indexedBuildings.intersection( (cornerSW.longitude, cornerSW.latitude, cornerNE.longitude, cornerNE.latitude))) for b in rectangleBuildingList: rectangleBuilding.append(buildings[b]) rectangle.buildings = rectangleBuilding print(printPrefixString + "Done...") rectangleBuildings = [] for rectangle in rectangles: for building in rectangle.buildings: rectangleBuildings.append(building) if outputGisFile != None: # create gis file print(printPrefixString + "Writing out gis data to " + outputGisFile) output = open(outputGisFile, 'w') output.write("osref;polygon\n") for building in rectangleBuildings: output.write(building.osref + ";") output.write("POLYGON((") firstCoordinate = True for coordinate in building.coordinates: if firstCoordinate == False: output.write(",") output.write(str(coordinate.longitude) + " ") output.write(str(coordinate.latitude)) if firstCoordinate == True: firstCoordinate = False output.write("))\n") output.close() print(printPrefixString + "Done...") if outputGISTriangleFile != None: # create triangle gis file print(printPrefixString + "Writing out triangle gis data to " + outputGISTriangleFile) output = open(outputGISTriangleFile, 'w') triangleId = 0 output.write("id;polygon\n") for building in rectangleBuildings: building.triangles = [] for i in range(0, len(building.coordinates) - 2): v1 = building.coordinates[0].toWGS84Coordinate() v2 = building.coordinates[i + 1].toWGS84Coordinate() v3 = building.coordinates[i + 2].toWGS84Coordinate() building.triangles.append([ v1.toMapCoordinate(), v2.toMapCoordinate(), v3.toMapCoordinate() ]) triangleId = triangleId + 1 output.write(str(triangleId) + ";") output.write("POLYGON((") output.write(str(v1.longitude) + " ") output.write(str(v1.latitude) + ",") output.write(str(v2.longitude) + " ") output.write(str(v2.latitude) + ",") output.write(str(v3.longitude) + " ") output.write(str(v3.latitude) + ",") output.write(str(v1.longitude) + " ") output.write(str(v1.latitude)) output.write("))\n") output.close() print(printPrefixString + "Done...") print(printPrefixString + "Writing out the main output file (doing covered area) to " + outputFile + "...") # create output file output = open(outputFile, 'w') output.write("location,buildings_number,buildings_area\n") for rectangle in rectangles: print(printPrefixString + "\tStation: " + str(rectangle.ID)) areaCoverred = 0 # try to find out how much part of the rectangle is covered by the buildings for x in range(0, detailLevel): for y in range(0, detailLevel): nw = rectangle.cornerNW.toMapCoordinate() se = rectangle.cornerSE.toMapCoordinate() # p1 = local nw, p2 = local se p1x = se.x + (nw.x - se.x) * (float(x) / float(detailLevel)) p2x = se.x + (nw.x - se.x) * (float(x + 1) / float(detailLevel)) p1y = se.y + (nw.y - se.y) * (float(y) / float(detailLevel)) p2y = se.y + (nw.y - se.y) * (float(y + 1) / float(detailLevel)) c = MapCoordinate((p1x + p2x) / 2.0, (p1y + p2y) / 2.0) wgs84Coordinate = c.toWGS84Coordinate() rectangleBuildingList = list( indexedBuildings.intersection( (wgs84Coordinate.longitude, wgs84Coordinate.latitude, wgs84Coordinate.longitude, wgs84Coordinate.latitude))) if len(rectangleBuildingList) != 0: areaCoverred = areaCoverred + 1 coverage = float(areaCoverred) / (detailLevel * detailLevel) output.write( str(rectangle.ID) + "," + str(len(rectangle.buildings)) + "," + str(coverage) + "\n") output.close() print(printPrefixString + "Done...")
def createTimeFile(timestamps, inputRectangleFile, outputFile, printPrefixString="", binned=False, yorkSpecific=True): bankHolidays = set([ "20120102", "20120406", "20120409", "20120507", "20120604", "20120605", "20120827", "20121225", "20121226", "20130101", "20130329", "20130401", "20130506", "20130527", "20130826", "20131225", "20131226", "20140101", "20140418", "20140421", "20140505", "20140526", "20140825", "20141225", "20141226", "20150101", "20150403", "20150406", "20150504", "20150525", "20150831", "20151225", "20151228", "20160101", "20160325", "20160328", "20160502", "20160530", "20160829", "20161226", "20161227" ]) raceDays = set([ "20120516", "20120517", "20120518", "20120526", "20120615", "20120616", "20120713", "20120714", "20120727", "20120728", "20120822", "20120823", "20120824", "20120825", "20120909", "20121012", "20121013", "20130515", "20130516", "20130517", "20130523", "20130614", "20130615", "20130712", "20130713", "20130726", "20130727", "20130821", "20130822", "20130823", "20130824", "20130908", "20131011", "20131012", "20140514", "20140515", "20140516", "20140531", "20140613", "20140614", "20140711", "20140712", "20140725", "20140726", "20140820", "20140821", "20140822", "20140823", "20140907", "20141010", "20141011", "20150513", "20150514", "20150515", "20150530", "20150612", "20150613", "20150710", "20150711", "20150724", "20150725", "20150819", "20150820", "20150821", "20150822", "20150906", "20151009", "20151010", "20160511", "20160512", "20160513", "20160521", "20160610", "20160611", "20160708", "20160709", "20160722", "20160723", "20160817", "20160818", "20160819", "20160820", "20160904", "20161007", "20161008" ]) # load rectangles rectangles = [] # load rectangle data loadRectangles(rectangles, inputRectangleFile, printPrefixString) print(printPrefixString + "Writing out time related data to " + outputFile) output = open(outputFile, 'w') if binned == False: output.write("location,timestamp,hour,day_of_week,month,bank_holiday") if yorkSpecific == True: output.write(",race_day\n") else: output.write("\n") else: output.write("location,timestamp") for i in range(0, 24): output.write(",hour" + str(i)) for i in range(0, 7): output.write(",day_of_week" + str(i)) for i in range(0, 12): output.write(",month" + str(i)) output.write(",bank_holiday") if yorkSpecific == True: output.write(",race_day\n") else: output.write("\n") for rectangle in rectangles: for timestamp in timestamps: hour = str(timestamp.hour) month = str(timestamp.month) d = datetime(timestamp.year, timestamp.month, timestamp.day) dayOfWeek = str(d.weekday()) timestampDay = timestamp.key[0:8] bankHoliday = "0" if timestampDay in bankHolidays: bankHoliday = "1" raceDay = "0" if timestampDay in raceDays: raceDay = "1" if binned == False: output.write( str(rectangle.ID) + "," + timestamp.key + "," + hour + "," + dayOfWeek + "," + month + "," + bankHoliday + "," + raceDay + "\n") else: output.write(str(rectangle.ID) + "," + timestamp.key + ",") # hour for i in range(0, 24): if str(i) == hour: output.write("1") else: output.write("0") output.write(",") # dayOfWeek for i in range(0, 7): if str(i) == dayOfWeek: output.write("1") else: output.write("0") output.write(",") # month for i in range(1, 13): if str(i) == month: output.write("1") else: output.write("0") output.write(",") output.write(bankHoliday + "," + raceDay + "\n") output.close() print(printPrefixString + "Done...")
def createTimeFileBinned(inputRectangleFile, outputFile, printPrefixString=""): timestamps = generateTimestamps(2013) # load rectangles rectangles = [] # load rectangle data loadRectangles(rectangles, inputRectangleFile, printPrefixString) print(printPrefixString + "Writing out time related data to " + outputFile) output = open(outputFile, 'w') output.write("location,timestamp,") for i in range(0, 24): output.write("hour" + str(i) + ",") for i in range(0, 7): output.write("day_of_week" + str(i) + ",") for i in range(0, 12): output.write("month" + str(i) + ",") output.write("bank_holiday,race_day\n") for rectangle in rectangles: for timestamp in timestamps: hour = int(str(timestamp.hour)) d = datetime(timestamp.year, timestamp.month, timestamp.day) dayOfWeek = int(str(d.weekday())) month = int(str(timestamp.month)) bankHoliday = "0" # 01/01/2013, 29/03/2013, 01/04/2013, 06/05/2013, 27/05/2013, 26/08/2013, 25/12/2015, 26/12/2015 monthday = timestamp.key[4:8] if monthday == "0101" or monthday == "0329" or monthday == "0401" or monthday == "0506" or monthday == "0527" or monthday == "0826" or monthday == "1225" or monthday == "1226": bankHoliday = "1" raceDays = set([ "0515", "0516", "0517", "0525", "0614", "0615", "0712", "0713", "0726", "0727", "0821", "0822", "0823", "0824", "0908", "1011", "1012" ]) raceDay = "0" if monthday in raceDays: raceDay = "1" output.write(str(rectangle.ID) + "," + timestamp.key + ",") # hour for i in range(0, 24): if i == hour: output.write("1") else: output.write("0") output.write(",") # dayOfWeek for i in range(0, 7): if i == dayOfWeek: output.write("1") else: output.write("0") output.write(",") # month for i in range(0, 12): if i == month: output.write("1") else: output.write("0") output.write(",") output.write(bankHoliday + "," + raceDay + "\n") output.close() print(printPrefixString + "Done...")
def downloadYorkAirqualityData(firstTimestamp, lastTimestamp, rectangleFile, printPrefixString=""): # load rectangles rectangles = [] loadRectangles(rectangles, rectangleFile, printPrefixString) startDate = firstTimestamp.key[0:4] + "-" + firstTimestamp.key[ 4:6] + "-" + firstTimestamp.key[6:8] endDate = lastTimestamp.key[0:4] + "-" + lastTimestamp.key[ 4:6] + "-" + lastTimestamp.key[6:8] urls = {} urls[ "Heworth"] = "http://www.airqualityengland.co.uk/site/data.php?site_id=YK13¶meter_id%5B%5D=NO2&f_date_started=" + startDate + "&f_date_ended=" + endDate + "&la_id=76&action=download" urls[ "Bootham"] = "http://www.airqualityengland.co.uk/site/data.php?site_id=YK10¶meter_id%5B%5D=NO2&f_date_started=" + startDate + "&f_date_ended=" + endDate + "&la_id=76&action=download" urls[ "Fulford"] = "http://www.airqualityengland.co.uk/site/data.php?site_id=YK16¶meter_id%5B%5D=NO2&f_date_started=" + startDate + "&f_date_ended=" + endDate + "&la_id=76&action=download" urls[ "Gillygate"] = "http://www.airqualityengland.co.uk/site/data.php?site_id=YK7¶meter_id%5B%5D=NO2&f_date_started=" + startDate + "&f_date_ended=" + endDate + "&la_id=76&action=download" urls[ "Holgate"] = "http://www.airqualityengland.co.uk/site/data.php?site_id=YK8¶meter_id%5B%5D=NO2&f_date_started=" + startDate + "&f_date_ended=" + endDate + "&la_id=76&action=download" urls[ "Lawrence"] = "http://www.airqualityengland.co.uk/site/data.php?site_id=YK9¶meter_id%5B%5D=NO2&f_date_started=" + startDate + "&f_date_ended=" + endDate + "&la_id=76&action=download" urls[ "Nunnery"] = "http://www.airqualityengland.co.uk/site/data.php?site_id=YK15¶meter_id%5B%5D=NO2&f_date_started=" + startDate + "&f_date_ended=" + endDate + "&la_id=76&action=download" aqData = {} for rectangle in rectangles: station = rectangle.name if station == "Fishergate": continue print(printPrefixString + "Downloading data for " + station + "...") url = urls[station] print(printPrefixString + "url: " + url) response = urlopen(url).read().decode("utf-8") r1 = response[response.find("<tr>") + 4:] r1 = r1[r1.find("<tr>") + 4:] while True: if r1.find("<tr>") == -1: break r1 = r1[r1.find("<tr>") + 4:] r1 = r1[r1.find("<td>") + 4:] dateString = r1[0:r1.find("</td>")] r1 = r1[r1.find("<td") + 3:] r1 = r1[r1.find(">") + 1:] timeString = r1[0:r1.find("</td>")] r1 = r1[r1.find("<td>") + 4:] levelString = r1[0:r1.find("</td>")] # 16/06/2016 #24:00:00 # hour = int(timeString[0:2]) - 1 if hour < 10: timestampKey = dateString[6:10] + dateString[3:5] + dateString[ 0:2] + "0" + str(hour) else: timestampKey = dateString[6:10] + dateString[3:5] + dateString[ 0:2] + str(hour) if timestampKey not in aqData: aqData[timestampKey] = {} try: aqData[timestampKey][rectangle.ID] = float(levelString) except: continue return aqData
applyData = {} applyColumns = [] loadData(applyDataFile, [], applyData, applyColumns) print("Done...") print("Apply the model...") predictionData = applyRandomForest(applyData, model, {'estimators': 59, 'leaf': 9}) print("Done...") # generate output # load rectangles rectangles = [] loadRectangles(rectangles, DATAPRE_DIRECTORY + "stations_rectangles.csv") rectanglesMap = {} for rectangle in rectangles: rectanglesMap[str(rectangle.ID)] = rectangle print("Generate outputs...") finalData = {} for timestamp in pastweeksTimestamps: finalData[timestamp.key] = {} locations = set() for i in range(0, len(predictionData)): location = str(int(applyData["location"][i])) locations.add(location)
def createRectangleTrafficAnnual(inputTrafficFile, inputRectangleFile, outputFile, outputGISFile, printPrefixString = ""): roadDataArray = [] # load traffic data loadTraffic(roadDataArray, inputTrafficFile, printPrefixString) rectangles = [] # load rectangle data loadRectangles(rectangles, inputRectangleFile, printPrefixString) print(printPrefixString + "Calculate RoadDatas for each rectangle...") # add MapCoordinates for roadDataArray for roadData in roadDataArray: roadData.c1 = WGS84Coordinate(roadData.latitude1, roadData.longitude1).toMapCoordinate() roadData.c2 = WGS84Coordinate(roadData.latitude2, roadData.longitude2).toMapCoordinate() # generate index print(printPrefixString + "Generating indicies for roadDatas...") indexedRoadDatas = generateIndex(roadDataArray) print(printPrefixString + "Done...") for rectangle in rectangles: print(printPrefixString + "\tstationId:" + str(rectangle.ID)) rectangleRoadDataArray = [] cornerSW = rectangle.cornerSW.toWGS84Coordinate() cornerNE = rectangle.cornerNE.toWGS84Coordinate() rectangleRoadDataList = list(indexedRoadDatas.intersection((cornerSW.longitude, cornerSW.latitude, cornerNE.longitude, cornerNE.latitude))) for r in rectangleRoadDataList: rectangleRoadDataArray.append(roadDataArray[r]) calculateRelatedRoadData(rectangle, rectangleRoadDataArray) print(printPrefixString + "Done...") print(printPrefixString + "Done...") print(printPrefixString + "Collect all the roadData which is part of the rectangles...") rectanglesRoadDataArray = [] for rectangle in rectangles: for roadData in rectangle.roadDatas: rectanglesRoadDataArray.append(roadData) print(printPrefixString + "Done...") print(printPrefixString + "Write out rectangle roadData GIS information to " + outputGISFile + "...") # write out gis File output = open(outputGISFile, 'w') #header output.write("id;speed_limit;lane_number;one_way;am_car;am_lgv;am_hgv;ip_car;ip_lgv;ip_hgv;pm_car;pm_lgv;pm_hgv;linestring\n") for roadData in rectanglesRoadDataArray: output.write(str(roadData.ID) + ";") output.write(str(roadData.speedLimit) + ";") output.write(str(roadData.laneNumber) + ";") output.write(str(roadData.oneWay) + ";") output.write(str(roadData.amCar) + ";") output.write(str(roadData.amLgv) + ";") output.write(str(roadData.amHgv) + ";") output.write(str(roadData.ipCar) + ";") output.write(str(roadData.ipLgv) + ";") output.write(str(roadData.ipHgv) + ";") output.write(str(roadData.pmCar) + ";") output.write(str(roadData.pmLgv) + ";") output.write(str(roadData.pmHgv) + ";") c1 = MapCoordinate(roadData.c1.x, roadData.c1.y).toWGS84Coordinate() c2 = MapCoordinate(roadData.c2.x, roadData.c2.y).toWGS84Coordinate() output.write("LINESTRING (" + str(c1.longitude) + " ") output.write(str(c1.latitude) + ", ") output.write(str(c2.longitude) + " ") output.write(str(c2.latitude) + ")\n") output.close() print(printPrefixString + "Done...") createTrafficRelatedDataAnnual(rectangles, outputFile, printPrefixString)