def query(region, pollutant, year): """ """ region_code = regions_dict.get_pk(region) region_name = regions_dict.get_name(region) pollutant_code = pollutants_dict.get_pk(pollutant) pollutant_formula = pollutants_dict.get_formula(pollutant) # pollutant_name = pollutants_dict.get_name(pollutant) # name = '%sdownload/%s_%s_%s.zip' % ( # URL_PREFIX, # region_name.upper(), # pollutant_formula.upper(), # year, # ) query = urllib.urlencode({ 'p_comp': pollutant_code, 'p_comp_name': pollutant_formula.upper(), 'p_reg': region_code, 'p_reg_name': region_name.upper(), 'p_anno': year, }) genfile = "%(prefix)s/servlet/zipper?%(query)s" % { 'prefix': URL_PREFIX, 'query': query, } link = download(genfile) if link is None: raise IOError("Could not fetch '%s'." % genfile) soup = BeautifulSoup(link) location = \ soup.find('script').contents[0].split('"')[1].\ replace("../download/", "") link.close() archive = download( "%(prefix)s/download/%(location)s" % { 'prefix': URL_PREFIX, 'location': location }) if archive is None: raise IOError("Could not fetch '%s'." % genfile) return archive
def append(self, *args, **kwargs): """Data validation and normalization """ if args: raise ValueError("Unexpected unnamed parameter") cleaned = {} for (k, v) in kwargs.items(): if (k == "region"): cleaned["region"] = unicode(v) elif (k == "station"): cleaned["station"] = unicode(v) elif (k == "pollutant"): cleaned["pollutant"] = pollutants_dict.get_pk(v) elif (k == "timestamp"): cleaned["timestamp"] = time.strptime(v, "%d-%m-%Y %H") elif (k == "quantity"): cleaned["quantity"] = float(v) else: raise ValueError( "Unexpected named parameter: '%s'" % k) # normalized, cleaned up data row = DataRow(**cleaned) self._data.append(row) station, region = cleaned["station"], cleaned["region"] # aliases if station not in stations_dict: raise OntologyException("Station '%s' not found in the ontology." % station) if region not in regions_dict: raise OntologyException("Station '%s' not found in the ontology." % station)
def __call__(self, args): opts, args = getopt.getopt(args, "", self.long_options) for o, a in opts: if o == "--year": year = int(a) if year < DEFAULT_FROM_YEAR: raise getopt.GetoptError( "No data available before %d" % DEFAULT_FROM_YEAR) self.from_year = year logger.debug("Setting starting year to %d", year) self.to_year = year logger.debug("Setting ending year to %d", year) elif o == "--from": from_year = int(a) if from_year < DEFAULT_FROM_YEAR: raise getopt.GetoptError( "No data available before %d" % DEFAULT_FROM_YEAR) self.from_year = from_year logger.debug("Setting starting year to %d", from_year) elif o == "--to": to_year = int(a) if DEFAULT_TO_YEAR < to_year: raise getopt.GetoptError( "No data available after %d" % DEFAULT_TO_YEAR) self.to_year = to_year logger.debug("Setting ending year to %d", to_year) elif o == "--region": region_name = regions_dict.get_name(a) region_code = regions_dict.get_pk(a) self.regions.append(region_code) logger.debug("Adding region '%s'", region_name) elif o == '--pollutant': pollutant_formula = pollutants_dict.get_formula(a) pollutant_name = pollutants_dict.get_name(a) pollutant_code = pollutants_dict.get_pk(a) self.pollutants.append(pollutant_code) logger.debug("Adding pollutant '%s' (%s)", pollutant_formula, pollutant_name) elif o == "--verbosity": level = int(a) self.verbosity = level if level == 0: logger.setLevel(logging.ERROR) elif level == 1: logger.setLevel(logging.WARNING) elif level == 2: logger.setLevel(logging.INFO) elif level == 3: logger.setLevel(logging.DEBUG) else: assert False, "Unsupported verbosity level" logger.debug("Setting verbosity level to %s", ["ERROR", "WARNING", "INFO", "DEBUG"][level]) elif o == "--keep": self.keep = True if self.local: raise getopt.GetoptError( "--local and --keep are not supported together") elif o == "--local": self.local = True if self.keep: raise getopt.GetoptError( "--local and --keep are not supported together") elif o == "--help": print usage sys.exit() else: assert False, "unhandled option"