def update(self): for thisProbe in self.probe_list: print "parsing aragonaire.es - " + thisProbe.name + "..." # ensure the aprser is clean to start the parsing process self.__html_parser.reset() req = urllib2.Request(thisProbe.dataURL, data=None, headers=HTTP_HEADERS) htmlFile = urllib2.urlopen(req) # charset detection fails in airecantabria.com #charset = htmlFile.headers.getparam('charset') for line in htmlFile.readlines(): # aragonaire is UTF-8 line = line.strip() try: self.__html_parser.feed(line) except HTMLParseError, ex: print "Exception %s" % (ex.msg) htmlFile.close() thisMeasure = ProbeMeasure() thisMeasure.sample_time = self.__html_parser.m_sampleTime if ('CO' in self.__html_parser.m_pollutants): thisMeasure.co = self.__html_parser.m_pollutants['CO'] if ('NO2' in self.__html_parser.m_pollutants): thisMeasure.no2 = self.__html_parser.m_pollutants['NO2'] if ('SO2' in self.__html_parser.m_pollutants): thisMeasure.so2 = self.__html_parser.m_pollutants['SO2'] if ('O3' in self.__html_parser.m_pollutants): thisMeasure.o3 = self.__html_parser.m_pollutants['O3'] # update probe's latest measure reference thisProbe.last_measure = thisMeasure
def update(self): for thisProbe in self.probe_list: print "parsing jccm.es - " + thisProbe.name + "..." # ensure the parser is clean to start the parsing process self.__html_parser.reset() req = urllib2.Request(thisProbe.dataURL, data=None, headers=HTTP_HEADERS) htmlFile = urllib2.urlopen(req) # what character encoding set is this file???? charset = htmlFile.headers.getparam('charset') for line in htmlFile.readlines(): # jccm.es is ISO-8859-1 line = line.strip().decode(charset).encode("utf-8") try: self.__html_parser.feed(line) except HTMLParseError, ex: print "Exception %s" % (ex.msg) htmlFile.close() thisMeasure = ProbeMeasure() thisMeasure.sample_time = self.__html_parser.m_sampleTime if ('CO' in self.__html_parser.m_pollutants): thisMeasure.co = self.__html_parser.m_pollutants['CO'] if ('NO' in self.__html_parser.m_pollutants): thisMeasure.no = self.__html_parser.m_pollutants['NO'] if ('NO2' in self.__html_parser.m_pollutants): thisMeasure.no2 = self.__html_parser.m_pollutants['NO2'] if ('SO2' in self.__html_parser.m_pollutants): thisMeasure.so2 = self.__html_parser.m_pollutants['SO2'] if ('PM2,5' in self.__html_parser.m_pollutants): thisMeasure.pm25 = self.__html_parser.m_pollutants['PM2,5'] if ('PM10' in self.__html_parser.m_pollutants): thisMeasure.pm10 = self.__html_parser.m_pollutants['PM10'] if ('O3' in self.__html_parser.m_pollutants): thisMeasure.o3 = self.__html_parser.m_pollutants['O3'] if ('SH2' in self.__html_parser.m_pollutants): thisMeasure.sh2 = self.__html_parser.m_pollutants['SH2'] if ('TOL' in self.__html_parser.m_pollutants): thisMeasure.tol = self.__html_parser.m_pollutants['TOL'] if ('BEN' in self.__html_parser.m_pollutants): thisMeasure.ben = self.__html_parser.m_pollutants['BEN'] if ('XIL' in self.__html_parser.m_pollutants): thisMeasure.xyl = self.__html_parser.m_pollutants['XIL'] # update probe's latest measure thisProbe.last_measure = thisMeasure
def update(self): for thisProbe in self.probe_list: print "parsing airecantabria.com - " + thisProbe.name + "..." # ensure the aprser is clean to start the parsing process self.__html_parser.reset() req = urllib2.Request(thisProbe.dataURL, data=None, headers=HTTP_HEADERS) htmlFile = urllib2.urlopen(req) # charset detection fails in airecantabria.com #charset = htmlFile.headers.getparam('charset') lineCount = 0 for line in htmlFile.readlines(): # airecantabria.com is UTF-8 lineCount = lineCount + 1 # parsing these URLs go bananas before this line if (lineCount > 199): line = line.strip() try: self.__html_parser.feed(line) except HTMLParseError, ex: print "Exception %s" % (ex.msg) htmlFile.close() thisMeasure = ProbeMeasure() thisMeasure.sample_time = self.__html_parser.m_sampleTime if ('CO' in self.__html_parser.m_pollutants): thisMeasure.co = self.__html_parser.m_pollutants['CO'] if ('NO' in self.__html_parser.m_pollutants): thisMeasure.no = self.__html_parser.m_pollutants['NO'] if ('NO2' in self.__html_parser.m_pollutants): thisMeasure.no2 = self.__html_parser.m_pollutants['NO2'] if ('SO2' in self.__html_parser.m_pollutants): thisMeasure.so2 = self.__html_parser.m_pollutants['SO2'] if ('PM10' in self.__html_parser.m_pollutants): thisMeasure.pm10 = self.__html_parser.m_pollutants['PM10'] if ('O3' in self.__html_parser.m_pollutants): thisMeasure.o3 = self.__html_parser.m_pollutants['O3'] if ('SH2' in self.__html_parser.m_pollutants): thisMeasure.sh2 = self.__html_parser.m_pollutants['SH2'] if ('TOL' in self.__html_parser.m_pollutants): thisMeasure.tol = self.__html_parser.m_pollutants['TOL'] if ('BEN' in self.__html_parser.m_pollutants): thisMeasure.ben = self.__html_parser.m_pollutants['BEN'] if ('XIL' in self.__html_parser.m_pollutants): thisMeasure.xyl = self.__html_parser.m_pollutants['XIL'] if (len(self.__html_parser.m_weatherParams) > 0): if ('VV' in self.__html_parser.m_weatherParams): thisMeasure.wind_speed = self.__html_parser.m_weatherParams['VV'] if ('DD' in self.__html_parser.m_weatherParams): thisMeasure.wind_dir = self.__html_parser.m_weatherParams['DD'] if ('TMP' in self.__html_parser.m_weatherParams): thisMeasure.temp = self.__html_parser.m_weatherParams['TMP'] if ('HR' in self.__html_parser.m_weatherParams): thisMeasure.hum = self.__html_parser.m_weatherParams['HR'] if ('PRB' in self.__html_parser.m_weatherParams): thisMeasure.pressure = self.__html_parser.m_weatherParams['PRB'] if ('RS' in self.__html_parser.m_weatherParams): thisMeasure.solar_rad = self.__html_parser.m_weatherParams['RS'] if ('LL' in self.__html_parser.m_weatherParams): thisMeasure.precip = self.__html_parser.m_weatherParams['LL'] # update probe's latest measure reference thisProbe.last_measure = thisMeasure
def update(self): for thisProbe in self.probe_list: print "parsing madrid.org - " + thisProbe.name + "..." # ensure the parser is clean to start the parsing process self.__html_parser.reset() req = urllib2.Request(thisProbe.dataURL, data=None, headers=HTTP_HEADERS) htmlFile = urllib2.urlopen(req) # what character encoding set is this file???? charset = htmlFile.headers.getparam('charset') for line in htmlFile.readlines(): # gestiona.madrid.org is ISO-8859-1, but still we should be doing # things right line = line.strip().decode(charset).encode("utf-8") try: self.__html_parser.feed(line) except HTMLParseError, ex: print "Exception %s" % (ex.msg) htmlFile.close() # Are we parsing the correct station? # WARNING: # To get the unicode from the bytes, you decode. To get the bytes from unicode, you encode if (self.__html_parser.m_stationName.decode("utf-8") == thisProbe.name): thisMeasure = ProbeMeasure() thisMeasure.sample_time = self.__html_parser.m_sampleTime if ('CO' in self.__html_parser.m_pollutants): thisMeasure.co = self.__html_parser.m_pollutants['CO'] if ('NO' in self.__html_parser.m_pollutants): thisMeasure.no = self.__html_parser.m_pollutants['NO'] if ('NO2' in self.__html_parser.m_pollutants): thisMeasure.no2 = self.__html_parser.m_pollutants['NO2'] if ('SO2' in self.__html_parser.m_pollutants): thisMeasure.so2 = self.__html_parser.m_pollutants['SO2'] if ('PM2,5' in self.__html_parser.m_pollutants): thisMeasure.pm25 = self.__html_parser.m_pollutants['PM2,5'] if ('PM10' in self.__html_parser.m_pollutants): thisMeasure.pm10 = self.__html_parser.m_pollutants['PM10'] if ('O3' in self.__html_parser.m_pollutants): thisMeasure.o3 = self.__html_parser.m_pollutants['O3'] if ('TOL' in self.__html_parser.m_pollutants): thisMeasure.tol = self.__html_parser.m_pollutants['TOL'] if ('BEN' in self.__html_parser.m_pollutants): thisMeasure.ben = self.__html_parser.m_pollutants['BEN'] if ('XIL' in self.__html_parser.m_pollutants): thisMeasure.xyl = self.__html_parser.m_pollutants['XIL'] if ('Velocidad viento' in self.__html_parser.m_weatherParams): thisMeasure.wind_speed = self.__html_parser.m_weatherParams['Velocidad viento'] if ('Dirección viento' in self.__html_parser.m_weatherParams): thisMeasure.wind_dir = self.__html_parser.m_weatherParams['Dirección viento'] if ('Temperatura' in self.__html_parser.m_weatherParams): thisMeasure.temp = self.__html_parser.m_weatherParams['Temperatura'] if ('Humedad relativa' in self.__html_parser.m_weatherParams): thisMeasure.hum = self.__html_parser.m_weatherParams['Humedad relativa'] if ('Presión' in self.__html_parser.m_weatherParams): thisMeasure.pressure = self.__html_parser.m_weatherParams['Presión'] if ('Radiación solar' in self.__html_parser.m_weatherParams): thisMeasure.solar_rad = self.__html_parser.m_weatherParams['Radiación solar'] if ('Precipitación' in self.__html_parser.m_weatherParams): thisMeasure.precip = self.__html_parser.m_weatherParams['Precipitación'] # update probe's latest measure reference thisProbe.last_measure = thisMeasure else: print "Error parsing " + thisProbe.name + "\n" print " Station name doesn't match parsed info: " + \ self.__html_parser.m_stationName.decode("utf-8") + "\n"