def ProcessIndustry(self, file): print("process file %s" % file) try: with open(file, "r", encoding="utf8") as f: rows = csv.DictReader(f) for row in rows: if (row["Status"] != "2"): continue if (row["IndustrialWaterConsumption"] == 0): continue data = {} data["_id"] = row["Year"] + "_" + row[ "County"] + "_" + row["Category"] data["Area"] = row["Area"] data["Category"] = row["Category"] data["County"] = row["County"] data["IndustrialWaterConsumption"] = util.ToFloat( row["IndustrialWaterConsumption"]) data["IndustryArea"] = util.ToFloat(row["IndustryArea"]) data["Status"] = row["Status"] data["Year"] = util.ToInt(row["Year"]) query = self.db["waterUseIndustry"].find_one(data["_id"]) if query is None: self.db["waterUseIndustry"].insert_one(data) except: print(sys.exc_info()[0]) traceback.print_exc()
def CollectDataFromFolder(self,folder): count = 0 for filename in os.listdir(folder): print(str(count)+" process "+folder+"/"+filename) with open(folder+"/"+filename, 'r') as f: lines = f.readlines() batch = {} for line in lines: data = line.split(" ") lat,lng = util.TW97ToLatLng(util.ToInt(data[0]),util.ToInt(data[1])) d = {} d["lat"] = lat d["lng"] = lng d["elev"] = util.ToFloat(data[2]) #self.AddGridData(d) for level in range(self.levelNum): scale = self.gridPerUnit/math.pow(2,level) gridX = int(d["lng"]*scale) gridY = int(d["lat"]*scale) key = str(level)+"-"+str(gridX)+"-"+str(gridY) if key in batch: batch[key]["num"] += 1 batch[key]["elevSum"] += d["elev"] else: batch[key] = { "lev":level, "x":gridX, "y":gridY, "num":1, "elevSum":d["elev"] } self.AddGridBatch(batch) count += 1
def ProcessTyphoon(self, url): print("process typhoon url: %s" % url) try: r = requests.get(url) #r.encoding = "utf-8" if r.status_code == requests.codes.all_okay: soup = BeautifulSoup(r.text, 'html.parser') typhoon = soup.find_all("tropicalcyclone") ops = [] for tp in typhoon: for pos in tp.analysis_data.find_all("fix"): data = {} if not tp.typhoon_name is None: data["typhoon_name"] = tp.typhoon_name.string if not tp.cwb_typhoon_name is None: data[ "cwb_typhoon_name"] = tp.cwb_typhoon_name.string if not tp.cwb_td_no is None: data["cwb_td_no"] = tp.cwb_td_no.string dateStr = pos.fix_time.string dateStr = ''.join(dateStr.rsplit(':', 1)) #去掉時區的: dateObj = datetime.datetime.strptime( dateStr, "%Y-%m-%dT%H:%M:%S%z") data["time"] = dateObj tpID = "" if "typhoon_name" in data: tpID += data["typhoon_name"] if "cwb_typhoon_name" in data: tpID += data["cwb_typhoon_name"] if "cwb_td_no" in data: tpID += data["cwb_td_no"] data["_id"] = tpID + "_" + dateStr latlng = pos.coordinate.string.split(",") data["lat"] = util.ToFloat(latlng[1]) data["lng"] = util.ToFloat(latlng[0]) data["max_wind_speed"] = util.ToFloat( pos.max_wind_speed.string) data["max_gust_speed"] = util.ToFloat( pos.max_gust_speed.string) data["pressure"] = util.ToFloat(pos.pressure.string) data["circle_of_15ms"] = util.ToFloat( pos.circle_of_15ms.radius.string) data["circle_of_25ms"] = util.ToFloat( pos.circle_of_25ms.radius.string) key = {"_id": data["_id"]} #self.db["typhoon"].update(key,data,upsert=True) ops.append( pymongo.UpdateOne(key, {"$set": data}, upsert=True)) if len(ops) > 0: self.db["typhoon"].bulk_write(ops, ordered=False) except: print(sys.exc_info()[0]) traceback.print_exc()
def ProcessReservoirSiltation(self, file): print("process file %s" % file) try: with open(file, "r", encoding="utf8") as f: rows = csv.DictReader(f) for row in rows: data = {} data["_id"] = row["Year"] + "_" + row["ReservoirName"] data["Area"] = row["Area"] data["CurruntCapacity"] = util.ToFloat( row["CurruntCapacity"]) data["CurruntEffectiveCapacity"] = util.ToFloat( row["CurruntEffectiveCapacity"]) data["DesignedCapacity"] = util.ToFloat( row["DesignedCapacity"]) data["DesignedEffectiveCapacity"] = util.ToFloat( row["DesignedEffectiveCapacity"]) data["ReservoirName"] = row["ReservoirName"] data["ReservoirSedimentationVolume"] = util.ToFloat( row["ReservoirSedimentationVolume"]) data[ "TheLastestMeasuredTimeOfReservoirCapacity"] = util.ToFloat( row["TheLastestMeasuredTimeOfReservoirCapacity"]) data["Year"] = util.ToInt(row["Year"]) query = self.db["reservoirSiltation"].find_one(data["_id"]) if query is None: self.db["reservoirSiltation"].insert_one(data) except: print(sys.exc_info()[0]) traceback.print_exc()
def ProcessLiving(self, file): print("process file %s" % file) try: with open(file, "r", encoding="utf8") as f: rows = csv.DictReader(f) for row in rows: if (row["Status"] != "2"): continue data = {} data["_id"] = row["Year"] + "_" + row["County"] data["Area"] = row["Area"] data["County"] = row["County"] data[ "DistributedWaterQuantityPerPersonPerDay"] = util.ToFloat( row["DistributedWaterQuantityPerPersonPerDay"]) data[ "DomesticWaterConsumptionPerPersonPerDay"] = util.ToFloat( row["DomesticWaterConsumptionPerPersonPerDay"]) data["SelfIntakePopulation"] = util.ToFloat( row["SelfIntakePapulation"]) data["SelfIntakeWaterConsumption"] = util.ToFloat( row["SelfIntakeWaterConsumption"]) data[ "SelfIntakeWaterConsumptionPerPersonPerDay"] = util.ToFloat( row["SelfIntakeWaterConsumptionPerPersonPerDay"]) data["Status"] = row["Status"] data["TapWaterConsumption"] = util.ToFloat( row["TapWaterConsumption"]) data["TapWaterPopulation"] = util.ToFloat( row["TapWaterPopulation"]) data["TotalPopulation"] = util.ToFloat( row["TotalPopulation"]) data["WaterSalesPerPersonPerDay"] = util.ToFloat( row["WaterSalesPerPersonPerDay"]) data["Year"] = util.ToInt(row["Year"]) query = self.db["waterUseLiving"].find_one(data["_id"]) if query is None: self.db["waterUseLiving"].insert_one(data) except: print(sys.exc_info()[0]) traceback.print_exc()
def ProcessAgriculture(self, file): print("process file %s" % file) try: with open(file, "r", encoding="utf8") as f: rows = csv.DictReader(f) for row in rows: if (row["Status"] != "2"): continue data = {} data["_id"] = row["Year"] + "_" + row[ "IrrigationAssociation"] data["Area"] = row["Area"] data[ "FirstPhaseMiscellaneousIrrigationArea"] = util.ToFloat( row["FirstPhaseMiscellaneousIrrigationArea"]) data[ "FirstPhaseMiscellaneousWaterConsumption"] = util.ToFloat( row["FirstPhaseMiscellaneousWaterConsumption"]) data["FirstPhaseRiceIrrigationArea"] = util.ToFloat( row["FirstPhaseRiceIrrigationArea"]) data["FirstPhaseRiceWaterConsumption"] = util.ToFloat( row["FirstPhaseRiceWaterConsumption"]) data["IrrigationAssociation"] = row[ "IrrigationAssociation"] data[ "SecondPhaseMiscellaneousIrrigationArea"] = util.ToFloat( row["SecondPhaseMiscellaneousIrrigationArea"]) data[ "SecondPhaseMiscellaneousWaterConsumption"] = util.ToFloat( row["SecondPhaseMiscellaneousWaterConsumption"]) data["SecondPhaseRiceIrrigationArea"] = util.ToFloat( row["SecondPhaseRiceIrrigationArea"]) data["SecondPhaseRiceWaterConsumption"] = util.ToFloat( row["SecondPhaseRiceWaterConsumption"]) data["Status"] = row["Status"] data["Year"] = util.ToInt(row["Year"]) query = self.db["waterUseAgriculture"].find_one( data["_id"]) if query is None: self.db["waterUseAgriculture"].insert_one(data) except: print(sys.exc_info()[0]) traceback.print_exc()
def on_message(self, client, userdata, msg): print("recieve topic " + msg.topic) try: message = msg.payload.decode("utf-8") #parse message fieldArr = message.split("|") data = {} for field in fieldArr: f = field.split("=") if len(f) == 2: data[f[0]] = f[1] #save to database d = {} d["device_id"] = data["device_id"] d["lat"] = util.ToFloat(data["gps_lat"]) d["lng"] = util.ToFloat(data["gps_lon"]) t = datetime.datetime.strptime(data["date"] + " " + data["time"], '%Y-%m-%d %H:%M:%S') t = t.replace(minute=(t.minute - t.minute % 10), second=0) t = t.replace(tzinfo=pytz.utc).astimezone(taiwan) d["time"] = t d["s_t0"] = util.ToFloat(data["s_t0"]) #水溫(-20.0~150.0C) d["s_ph"] = util.ToFloat(data["s_ph"]) #酸鹼度(0.00~-14.00) d["s_ec"] = util.ToFloat(data["s_ec"]) #導電度(0~200000 uS/cm) d["s_Tb"] = util.ToFloat(data["s_Tb"]) #濁度(0~10000 NTU) d["s_Lv"] = util.ToFloat(data["s_Lv"]) #水位(0.000~20.000 M) d["s_DO"] = util.ToFloat(data["s_DO"]) #溶氧(DO 0.00~12.00 mg/L) d["s_orp"] = util.ToFloat( data["s_orp"]) #氧化還原電位(ORP -2000~2000 mV) print(d) key = {"device_id": d["device_id"], "time": d["time"]} day = datetime.datetime.strftime(t, "%Y%m%d") self.db["waterbox" + day].update(key, d, upsert=True) except: print(sys.exc_info()[0]) traceback.print_exc()
def ProcessTide(self, url): print("process tide url: %s" % url) try: r = requests.get(url) #r.encoding = "utf-8" if r.status_code == requests.codes.all_okay: result = r.json() ops = {} tideArr = result["cwbopendata"]["dataset"]["location"] for tide in tideArr: for time in tide["time"]: dateStr = ''.join(time["obsTime"].rsplit(':', 1)) #去掉時區的: dateObj = datetime.datetime.strptime( dateStr, "%Y-%m-%dT%H:%M:%S%z") t10min = dateObj.replace(minute=(dateObj.minute - dateObj.minute % 10), second=0) dayStr = dateObj.strftime('%Y%m%d') if not dayStr in ops: ops[dayStr] = [] value = time["weatherElement"]["elementValue"]["value"] if not value: continue d = {} d["stationID"] = tide["stationId"] d["time"] = t10min d["value"] = util.ToFloat(value) * 0.01 key = {"stationID": d["stationID"], "time": d["time"]} ops[dayStr].append( pymongo.UpdateOne(key, {"$set": d}, upsert=True)) for key in ops: self.db["tide" + key].create_index("stationID") self.db["tide" + key].create_index("time") if len(ops[key]) > 0: self.db["tide" + key].bulk_write(ops[key], ordered=False) except: print(sys.exc_info()[0]) traceback.print_exc()
def ProcessMonthWaterUse(self, file): print("process file %s" % file) try: with open(file, "r", encoding="utf8") as f: rows = csv.DictReader(f) for row in rows: data = {} data["_id"] = row["Year"] + "_" + row["Month"] + "_" + row[ "County"] + "_" + row["Town"] data["County"] = row["County"] data["Month"] = util.ToInt(row["Month"]) data[ "TheDailyDomesticConsumptionOfWaterPerPerson"] = util.ToFloat( row["TheDailyDomesticConsumptionOfWaterPerPerson"]) data["Town"] = row["Town"] data["Year"] = util.ToInt(row["Year"]) query = self.db["monthWaterUse"].find_one(data["_id"]) if query is None: self.db["monthWaterUse"].insert_one(data) except: print(sys.exc_info()[0]) traceback.print_exc()
def ProcessRain(self, url): print("process rain url: %s" % url) try: r = requests.get(url) #r.encoding = "utf-8" if r.status_code == requests.codes.all_okay: root = ET.fromstring(r.text) pos = root.tag.find("}") ns = root.tag[:pos + 1] stationArr = [] locHash = {} dataArr = [] for location in root.findall(ns + 'location'): data = {} station = {} dateStr = location.find(ns + "time").find(ns + "obsTime").text dateStr = ''.join(dateStr.rsplit(':', 1)) #去掉時區的: dateObj = datetime.datetime.strptime( dateStr, "%Y-%m-%dT%H:%M:%S%z") #雨量從00:10累積到隔天00:00,原始資料以累積結束時間當資料時間,這邊減10分鐘以累積起始時間當資料時間 dateObj = dateObj - datetime.timedelta(minutes=10) data["time"] = dateObj sID = location.find(ns + "stationId").text sName = location.find(ns + "locationName").text lat = util.ToFloat(location.find(ns + "lat").text) lon = util.ToFloat(location.find(ns + "lon").text) data["stationID"] = sID station["stationID"] = sID station["name"] = sName station["lat"] = lat station["lon"] = lon locHash[sID] = {"lat": lat, "lon": lon} for elem in location.findall(ns + "weatherElement"): if (elem[0].text == "HOUR_12"): data["hour12"] = util.ToFloat(elem[1][0].text) elif (elem[0].text == "HOUR_24"): data["hour24"] = util.ToFloat(elem[1][0].text) elif (elem[0].text == "NOW"): data["now"] = util.ToFloat(elem[1][0].text) if math.isnan(data["now"]) or data["now"] < 0: continue for param in location.findall(ns + "parameter"): if (param[0].text == "CITY"): station["city"] = param[1].text elif (param[0].text == "TOWN"): station["town"] = param[1].text dataArr.append(data) stationArr.append(station) #print(dataArr) #print(stationArr) opSite = [] for s in stationArr: key = {"stationID": s["stationID"]} #self.db["rainStation"].update(key,s,upsert=True) opSite.append((pymongo.UpdateOne(key, {"$set": s}, upsert=True))) if len(opSite) > 0: self.db["rainStation"].bulk_write(opSite, ordered=False) opData = {} opDaily = [] op10min = [] gridArr = {} for d in dataArr: dayStr = d["time"].strftime('%Y%m%d') if dayStr not in opData: opData[dayStr] = [] if dayStr not in gridArr: gridArr[dayStr] = [] key = {"stationID": d["stationID"], "time": d["time"]} query = self.db["rain" + dayStr].find_one(key) if query is None: #self.db["rain"+dayStr].insert_one(d) opData[dayStr].append( pymongo.UpdateOne(key, {"$set": d}, upsert=True)) inc = {} loc = locHash[d["stationID"]] area = util.LatToArea(loc["lat"]) inc[area + "Sum"] = d["now"] inc[area + "Num"] = 1 tday = d["time"].replace(hour=0, minute=0, second=0) t10min = d["time"].replace( minute=(d["time"].minute - d["time"].minute % 10), second=0) #self.db["rainDailySum"].update({"time":tday},{"$inc":inc},upsert=True) #self.db["rain10minSum"].update({"time":t10min},{"$inc":inc},upsert=True) opDaily.append( pymongo.UpdateOne({"time": tday}, {"$inc": inc}, upsert=True)) op10min.append( pymongo.UpdateOne({"time": t10min}, {"$inc": inc}, upsert=True)) #self.grid.AddGridRain(d) grid = d.copy() grid["lat"] = loc["lat"] grid["lon"] = loc["lon"] gridArr[dayStr].append(grid) for key in opData: self.db["rain" + key].create_index("stationID") self.db["rain" + key].create_index("time") if len(opData[key]) > 0: self.db["rain" + key].bulk_write(opData[key], ordered=False) if len(opDaily) > 0: self.db["rainDailySum"].bulk_write(opDaily, ordered=False) if len(op10min) > 0: self.db["rain10minSum"].bulk_write(op10min, ordered=False) for key in gridArr: self.grid.AddGridBatch("rainGrid" + key, gridArr[key], "time", ["now"], "lat", "lon") except: print(sys.exc_info()[0]) traceback.print_exc()
def ProcessWind(self, url): print("process wind url: %s" % url) try: r = requests.get(url) #r.encoding = "utf-8" if r.status_code == requests.codes.all_okay: result = r.json() locArr = result["cwbopendata"]["location"] stationArr = [] dataArr = [] for loc in locArr: station = {} station["stationID"] = loc["stationId"] station["name"] = loc["locationName"] station["lat"] = util.ToFloat(loc["lat"]) station["lon"] = util.ToFloat(loc["lon"]) for param in loc["parameter"]: if (param["parameterName"] == "CITY"): station["city"] = param["parameterValue"] elif (param["parameterName"] == "TOWN"): station["town"] = param["parameterValue"] stationArr.append(station) data = {} t = loc["time"]["obsTime"].replace(":", "") t = datetime.datetime.strptime(t, '%Y-%m-%dT%H%M%S%z') data["time"] = t data["stationID"] = station["stationID"] for elem in loc["weatherElement"]: if (elem["elementName"] == "ELEV"): data["ELEV"] = util.ToFloat( elem["elementValue"]["value"]) elif (elem["elementName"] == "WDIR"): data["WDIR"] = util.ToFloat( elem["elementValue"]["value"]) elif (elem["elementName"] == "WDSD"): data["WDSD"] = util.ToFloat( elem["elementValue"]["value"]) elif (elem["elementName"] == "TEMP"): data["TEMP"] = util.ToFloat( elem["elementValue"]["value"]) elif (elem["elementName"] == "HUMD"): data["HUMD"] = util.ToFloat( elem["elementValue"]["value"]) elif (elem["elementName"] == "PRES"): data["PRES"] = util.ToFloat( elem["elementValue"]["value"]) key = { "stationID": data["stationID"], "time": data["time"] } dataArr.append(data) opSite = [] for s in stationArr: key = {"stationID": s["stationID"]} opSite.append((pymongo.UpdateOne(key, {"$set": s}, upsert=True))) if len(opSite) > 0: self.db["windStation"].bulk_write(opSite, ordered=False) opData = {} for d in dataArr: dayStr = d["time"].strftime('%Y%m%d') if dayStr not in opData: opData[dayStr] = [] key = {"stationID": d["stationID"], "time": d["time"]} opData[dayStr].append( pymongo.UpdateOne(key, {"$set": d}, upsert=True)) for key in opData: self.db["wind" + key].create_index("stationID") self.db["wind" + key].create_index("time") if len(opData[key]) > 0: self.db["wind" + key].bulk_write(opData[key], ordered=False) except: print(sys.exc_info()[0]) traceback.print_exc()
def ProcessReservoirUse(self, file): print("process file %s" % file) try: with open(file, "r", encoding="utf8") as f: rows = csv.DictReader(f) for row in rows: data = {} data["_id"] = row["Year"] + "_" + row["ReservoirName"] data["Area"] = row["Area"] data["BackWaterVolumeOfPowerGeneration"] = util.ToFloat( row["BackWaterVolumeOfPowerGeneration"]) data[ "DischargeWaterVolumeOfPowerGeneration"] = util.ToFloat( row["DischargeWaterVolumeOfPowerGeneration"]) data["EndYearStoragedWater"] = util.ToFloat( row["EndYearStoragedWater"]) data["EndYearWaterLevel"] = util.ToFloat( row["EndYearWaterLevel"]) data["FlushingVolume"] = util.ToFloat( row["FlushingVolume"]) data["GrossVolumeOfWaterConsumptionForAgricultureWater"] = util.ToFloat( row["GrossVolumeOfWaterConsumptionForAgricultureWater"] ) data[ "GrossVolumeOfWaterConsumptionForAllPurposes"] = util.ToFloat( row["GrossVolumeOfWaterConsumptionForAllPurposes"]) data[ "GrossVolumeOfWaterConsumptionForDomesticWater"] = util.ToFloat( row["GrossVolumeOfWaterConsumptionForDomesticWater"] ) data["GrossVolumeOfWaterConsumptionForIndustrialWater"] = util.ToFloat( row["GrossVolumeOfWaterConsumptionForIndustrialWater"]) data["InflowVolume"] = util.ToFloat(row["InflowVolume"]) data["InitialStorageWater"] = util.ToFloat( row["InitialStorageWater"]) data["LeakageVolume"] = util.ToFloat(row["LeakageVolume"]) data["OthersDischargeVolume"] = util.ToFloat( row["OthersDischargeVolume"]) data["ReservoirName"] = row["ReservoirName"] data["SedimentationVariation"] = util.ToFloat( row["SedimentationVariation"]) data["Year"] = util.ToInt(row["Year"]) query = self.db["reservoirUse"].find_one(data["_id"]) if query is None: self.db["reservoirUse"].insert_one(data) except: print(sys.exc_info()[0]) traceback.print_exc()
def ProcessOverview(self, file): print("process file %s" % file) try: with open(file, "r", encoding="utf8") as f: with open(file, "r", encoding="utf8") as f: rows = csv.reader(f) for row in rows: serial = util.ToFloat(row[0]) if (math.isnan(serial)): continue if (row[2] == "0"): continue data = {} data["_id"] = row[1][0:-1] data["Year"] = util.ToInt(data["_id"]) data["TotalWaterSupply"] = util.ToFloat(row[2].replace( ",", "")) data["WaterSupplyRiver"] = util.ToFloat(row[4].replace( ",", "")) data["WaterSupplyReservoir"] = util.ToFloat( row[5].replace(",", "")) data["WaterSupplyUnderGround"] = util.ToFloat( row[6].replace(",", "")) data["TotalWaterUse"] = util.ToFloat(row[7].replace( ",", "")) data["WaterUseAgriculture"] = util.ToFloat( row[9].replace(",", "")) data["WaterUseLivestock"] = util.ToFloat( row[10].replace(",", "")) data["WaterUseCultivation"] = util.ToFloat( row[11].replace(",", "")) data["WaterUseLiving"] = util.ToFloat(row[12].replace( ",", "")) data["WaterUseIndustry"] = util.ToFloat( row[13].replace(",", "")) query = self.db["waterUseOverview"].find_one( data["_id"]) if query is None: self.db["waterUseOverview"].insert_one(data) except: print(sys.exc_info()[0]) traceback.print_exc()