Exemplo n.º 1
0
    def update(self):
        for thisProbe in self.probe_list:
            print "parsing aragonaire.es - " + thisProbe.name + "..."
            # ensure the aprser is clean to start the parsing process
            self.__html_parser.reset()

            req = urllib2.Request(thisProbe.dataURL, data=None, headers=HTTP_HEADERS)
            htmlFile = urllib2.urlopen(req)
        
            # charset detection fails in airecantabria.com
            #charset = htmlFile.headers.getparam('charset')

            for line in htmlFile.readlines():
                # aragonaire is UTF-8  
                line = line.strip()

                try:
                    self.__html_parser.feed(line)
                except HTMLParseError, ex:
                    print "Exception %s" % (ex.msg)

            htmlFile.close()
            
            thisMeasure = ProbeMeasure()
            thisMeasure.sample_time = self.__html_parser.m_sampleTime

            if ('CO' in self.__html_parser.m_pollutants):
                thisMeasure.co = self.__html_parser.m_pollutants['CO']
            if ('NO2' in self.__html_parser.m_pollutants):
                thisMeasure.no2 = self.__html_parser.m_pollutants['NO2']
            if ('SO2' in self.__html_parser.m_pollutants):
                thisMeasure.so2 = self.__html_parser.m_pollutants['SO2']
            if ('O3' in self.__html_parser.m_pollutants):
                thisMeasure.o3 = self.__html_parser.m_pollutants['O3']
            
            # update probe's latest measure reference
            thisProbe.last_measure = thisMeasure
Exemplo n.º 2
0
    def update(self):
        for thisProbe in self.probe_list:
            print "parsing jccm.es - " + thisProbe.name + "..."
            # ensure the parser is clean to start the parsing process
            self.__html_parser.reset()

            req = urllib2.Request(thisProbe.dataURL, data=None, headers=HTTP_HEADERS)
            htmlFile = urllib2.urlopen(req)
        
            # what character encoding set is this file????
            charset = htmlFile.headers.getparam('charset')

            for line in htmlFile.readlines():
                # jccm.es is ISO-8859-1
                line = line.strip().decode(charset).encode("utf-8")

                try:
                    self.__html_parser.feed(line)
                except HTMLParseError, ex:
                    print "Exception %s" % (ex.msg)

            htmlFile.close()
            
            thisMeasure = ProbeMeasure()
            thisMeasure.sample_time = self.__html_parser.m_sampleTime            
                
            if ('CO' in self.__html_parser.m_pollutants):
                thisMeasure.co = self.__html_parser.m_pollutants['CO']
            if ('NO' in self.__html_parser.m_pollutants):
                thisMeasure.no = self.__html_parser.m_pollutants['NO']
            if ('NO2' in self.__html_parser.m_pollutants):
                thisMeasure.no2 = self.__html_parser.m_pollutants['NO2']
            if ('SO2' in self.__html_parser.m_pollutants):
                thisMeasure.so2 = self.__html_parser.m_pollutants['SO2']
            if ('PM2,5' in self.__html_parser.m_pollutants):
                thisMeasure.pm25 = self.__html_parser.m_pollutants['PM2,5']
            if ('PM10' in self.__html_parser.m_pollutants):
                thisMeasure.pm10 = self.__html_parser.m_pollutants['PM10']
            if ('O3' in self.__html_parser.m_pollutants):
                thisMeasure.o3 = self.__html_parser.m_pollutants['O3']
            if ('SH2' in self.__html_parser.m_pollutants):
                thisMeasure.sh2 = self.__html_parser.m_pollutants['SH2']
            if ('TOL' in self.__html_parser.m_pollutants):
                thisMeasure.tol = self.__html_parser.m_pollutants['TOL']
            if ('BEN' in self.__html_parser.m_pollutants):
                thisMeasure.ben = self.__html_parser.m_pollutants['BEN']
            if ('XIL' in self.__html_parser.m_pollutants):
                thisMeasure.xyl = self.__html_parser.m_pollutants['XIL']
            
            # update probe's latest measure
            thisProbe.last_measure = thisMeasure
Exemplo n.º 3
0
    def update(self):
        for thisProbe in self.probe_list:
            print "parsing airecantabria.com - " + thisProbe.name + "..."
            # ensure the aprser is clean to start the parsing process
            self.__html_parser.reset()

            req = urllib2.Request(thisProbe.dataURL, data=None, headers=HTTP_HEADERS)
            htmlFile = urllib2.urlopen(req)
        
            # charset detection fails in airecantabria.com
            #charset = htmlFile.headers.getparam('charset')

            lineCount = 0
            for line in htmlFile.readlines():
                # airecantabria.com is UTF-8                
                lineCount = lineCount + 1

                # parsing these URLs go bananas before this line
                if (lineCount > 199):
                    line = line.strip()
                    try:
                        self.__html_parser.feed(line)
                    except HTMLParseError, ex:
                        print "Exception %s" % (ex.msg)

            htmlFile.close()
            
            thisMeasure = ProbeMeasure()
            thisMeasure.sample_time = self.__html_parser.m_sampleTime

            if ('CO' in self.__html_parser.m_pollutants):
                thisMeasure.co = self.__html_parser.m_pollutants['CO']
            if ('NO' in self.__html_parser.m_pollutants):
                thisMeasure.no = self.__html_parser.m_pollutants['NO']
            if ('NO2' in self.__html_parser.m_pollutants):
                thisMeasure.no2 = self.__html_parser.m_pollutants['NO2']
            if ('SO2' in self.__html_parser.m_pollutants):
                thisMeasure.so2 = self.__html_parser.m_pollutants['SO2']
            if ('PM10' in self.__html_parser.m_pollutants):
                thisMeasure.pm10 = self.__html_parser.m_pollutants['PM10']
            if ('O3' in self.__html_parser.m_pollutants):
                thisMeasure.o3 = self.__html_parser.m_pollutants['O3']
            if ('SH2' in self.__html_parser.m_pollutants):
                thisMeasure.sh2 = self.__html_parser.m_pollutants['SH2']
            if ('TOL' in self.__html_parser.m_pollutants):
                thisMeasure.tol = self.__html_parser.m_pollutants['TOL']
            if ('BEN' in self.__html_parser.m_pollutants):
                thisMeasure.ben = self.__html_parser.m_pollutants['BEN']
            if ('XIL' in self.__html_parser.m_pollutants):
                thisMeasure.xyl = self.__html_parser.m_pollutants['XIL']
            
            if (len(self.__html_parser.m_weatherParams) > 0):
                if ('VV' in self.__html_parser.m_weatherParams):        
                    thisMeasure.wind_speed = self.__html_parser.m_weatherParams['VV']
                if ('DD' in self.__html_parser.m_weatherParams):    
                    thisMeasure.wind_dir = self.__html_parser.m_weatherParams['DD']
                if ('TMP' in self.__html_parser.m_weatherParams):    
                    thisMeasure.temp = self.__html_parser.m_weatherParams['TMP']
                if ('HR' in self.__html_parser.m_weatherParams):    
                    thisMeasure.hum = self.__html_parser.m_weatherParams['HR']
                if ('PRB' in self.__html_parser.m_weatherParams):    
                    thisMeasure.pressure = self.__html_parser.m_weatherParams['PRB']
                if ('RS' in self.__html_parser.m_weatherParams):    
                    thisMeasure.solar_rad = self.__html_parser.m_weatherParams['RS']
                if ('LL' in self.__html_parser.m_weatherParams):    
                    thisMeasure.precip = self.__html_parser.m_weatherParams['LL']
            
            # update probe's latest measure reference
            thisProbe.last_measure = thisMeasure
Exemplo n.º 4
0
    def update(self):
        for thisProbe in self.probe_list:
            print "parsing madrid.org - " + thisProbe.name + "..."
            # ensure the parser is clean to start the parsing process
            self.__html_parser.reset()

            req = urllib2.Request(thisProbe.dataURL, data=None, headers=HTTP_HEADERS)
            htmlFile = urllib2.urlopen(req)
            
            # what character encoding set is this file????
            charset = htmlFile.headers.getparam('charset')
            
            for line in htmlFile.readlines():
                # gestiona.madrid.org is ISO-8859-1, but still we should be doing
                # things right
                line = line.strip().decode(charset).encode("utf-8")
                
                try:
                    self.__html_parser.feed(line)
                except HTMLParseError, ex:
                    print "Exception %s" % (ex.msg)
            
            htmlFile.close()
            
            # Are we parsing the correct station?
            # WARNING: 
            #   To get the unicode from the bytes, you decode. To get the bytes from unicode, you encode
            if (self.__html_parser.m_stationName.decode("utf-8") == thisProbe.name):            
                thisMeasure = ProbeMeasure()
                thisMeasure.sample_time = self.__html_parser.m_sampleTime
                
                if ('CO' in self.__html_parser.m_pollutants):
                    thisMeasure.co = self.__html_parser.m_pollutants['CO']
                if ('NO' in self.__html_parser.m_pollutants):
                    thisMeasure.no = self.__html_parser.m_pollutants['NO']
                if ('NO2' in self.__html_parser.m_pollutants):
                    thisMeasure.no2 = self.__html_parser.m_pollutants['NO2']
                if ('SO2' in self.__html_parser.m_pollutants):
                    thisMeasure.so2 = self.__html_parser.m_pollutants['SO2']
                if ('PM2,5' in self.__html_parser.m_pollutants):
                    thisMeasure.pm25 = self.__html_parser.m_pollutants['PM2,5']
                if ('PM10' in self.__html_parser.m_pollutants):
                    thisMeasure.pm10 = self.__html_parser.m_pollutants['PM10']
                if ('O3' in self.__html_parser.m_pollutants):
                    thisMeasure.o3 = self.__html_parser.m_pollutants['O3']
                if ('TOL' in self.__html_parser.m_pollutants):
                    thisMeasure.tol = self.__html_parser.m_pollutants['TOL']
                if ('BEN' in self.__html_parser.m_pollutants):
                    thisMeasure.ben = self.__html_parser.m_pollutants['BEN']
                if ('XIL' in self.__html_parser.m_pollutants):
                    thisMeasure.xyl = self.__html_parser.m_pollutants['XIL']
                
                if ('Velocidad viento' in self.__html_parser.m_weatherParams):        
                    thisMeasure.wind_speed = self.__html_parser.m_weatherParams['Velocidad viento']
                if ('Dirección viento' in self.__html_parser.m_weatherParams):    
                    thisMeasure.wind_dir = self.__html_parser.m_weatherParams['Dirección viento']
                if ('Temperatura' in self.__html_parser.m_weatherParams):    
                    thisMeasure.temp = self.__html_parser.m_weatherParams['Temperatura']
                if ('Humedad relativa' in self.__html_parser.m_weatherParams):    
                    thisMeasure.hum = self.__html_parser.m_weatherParams['Humedad relativa']
                if ('Presión' in self.__html_parser.m_weatherParams):    
                    thisMeasure.pressure = self.__html_parser.m_weatherParams['Presión']
                if ('Radiación solar' in self.__html_parser.m_weatherParams):    
                    thisMeasure.solar_rad = self.__html_parser.m_weatherParams['Radiación solar']
                if ('Precipitación' in self.__html_parser.m_weatherParams):    
                    thisMeasure.precip = self.__html_parser.m_weatherParams['Precipitación']
                
                # update probe's latest measure reference
                thisProbe.last_measure = thisMeasure

            else:
                print "Error parsing " + thisProbe.name + "\n"
                print "    Station name doesn't match parsed info: " + \
                    self.__html_parser.m_stationName.decode("utf-8") + "\n"