def dl(url, local, logger=OsmoseLog.logger(), min_file_size=10*1024): unzip = False convert_pbf = False # file names file_ts = local+".ts" url_ext = os.path.splitext(url)[1] local_ext = os.path.splitext(local)[1] if (url_ext in [".bz2"]) and (local_ext not in [".bz2"]) : file_dl = local + url_ext unzip = True elif (url_ext in [".pbf"]) and (local_ext not in [".pbf"]) : file_dl = local + url_ext convert_pbf = True else: file_dl = local request = urllib2.Request(url) # make the download conditional if os.path.exists(file_ts): request.add_header("If-Modified-Since", open(file_ts).read()) # request fails with a 304 error when the file wasn't modified try: answer = urllib2.urlopen(request) except urllib2.HTTPError, exc: if exc.getcode() == 304: logger.log(u"not newer") return False else: logger.log(u"got error %d" % exc.getcode()) logger.log(u" URL=%s" % url) raise
def __init__(self, config, logger = OsmoseLog.logger()): Analyser.__init__(self, config, logger) if self.config.plugins: plugins = map(lambda plugin: self._load_plugin(plugin) if isinstance(plugin, str) else plugin, self.config.plugins) else: plugins = self._load_all_plugins() self._init_plugins(plugins)
def run(logger=OsmoseLog.logger()): xml_loc = "/data/work/osmose/tmp/osb.xml" xml_url = "http://osm102.openstreetmap.fr/osmose/osb.xml" front_code = "xxx" front_id = 62 front_url = "http://osmose.openstreetmap.fr/cgi-bin/update.py" src_url = "http://openstreetbugs.schokokeks.org/dumps/osbdump_latest.sql.bz2" src_cmd = "wget -o /dev/null -O - %s | bunzip2 | ./osbsql2osm" % src_url src = popen2.popen2(src_cmd)[0] ## streams i = OsmSax.OsmSaxReader(src) o = ConverterTo(OsmSax.OsmSaxWriter(open(xml_loc, "w"), "UTF-8")) ## headers o.dst.startDocument() o.dst.startElement("analyser", {"timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())}) o.dst.startElement("class", {"id": "1", "item": "7030"}) o.dst.Element("classtext", {"lang": "en", "title": "OpenStreetBugs"}) o.dst.endElement("class") ## content logger.log("generate xml report") i.CopyTo(o) ## footers o.dst.endElement("analyser") o.dst._out.close() ## update logger.log("update front-end") tmp_req = urllib2.Request(front_url) tmp_dat = urllib.urlencode([("url", xml_url), ("source", "openstreetbugs-world"), ("code", front_code)]) fd = urllib2.urlopen(tmp_req, tmp_dat) dt = fd.read().decode("utf8").strip() if dt <> "OK": sys.stderr.write("error: %s" % (dt.encode("utf8"))) logger.sub().log(dt) logger.log("done")
def run(conf, analyser, plugin=None, format='osmose'): from optparse import Values options = Values({ 'verbose': False, 'plugin': plugin and [plugin] or [], 'change': False, }) LOG = StringIO() logger = OsmoseLog.logger(LOG, True) osmosis_manager = modules.OsmOsisManager.OsmOsisManager( conf, conf.db_host, conf.db_user, conf.db_password, conf.db_base, conf.db_schema or conf.country, conf.db_persistent, logger) analyser_conf = analyser_config(conf, options, osmosis_manager) output = StringIO() analyser_conf.error_file = issues_file_from_fromat(output, format) with analyser(analyser_conf, logger.sub()) as analyser_obj: analyser_obj.analyser() return output.getvalue()
def __init__(self, config, logger = OsmoseLog.logger()): Analyser.__init__(self, config, logger) self.resume_from_timestamp = None
analysers_path = os.path.join(os.path.dirname(__file__), "analysers") if options.list_analyser: for fn in sorted(os.listdir(analysers_path)): if fn.startswith("analyser_") and fn.endswith(".py"): print(fn[9:-3]) sys.exit(0) if options.list_country: for k in sorted(config.config.keys()): print(k) sys.exit(0) if options.cron: output = sys.stdout logger = OsmoseLog.logger(output, False) else: output = sys.stdout logger = OsmoseLog.logger(output, True) if options.change_init and not options.change: logger.log(logger.log_av_b + "--change must be specified " + fn[:-3] + logger.log_ap) sys.exit(1) if options.version: print("osmose backend version: %s" % get_version()) sys.exit(0) if not options.country: parser.print_help()
def analyser_osmosis_common(self): for t in ("nodes", "ways", "relations"): self.outxml.startElement("stat_users", {"type": t}) self.run(sql_users.format(t), lambda res: {"self": self.stats}) self.outxml.endElement("stat_users") def stats(self, res): self.outxml.startElement("user", { "user_id": str(res[1]), "user_name": str(res[2]) }) self.outxml.Element("count", {"value": str(res[0])}) self.outxml.Element("timestamp", { "min": str(res[3]), "max": str(res[4]) }) self.outxml.endElement("user") if __name__ == "__main__": country = "france_limousin" class config: db_string = "dbname=osmose" dst = country + ".xml" db_schema = country from modules import OsmoseLog a = OsmoseLog.logger() Analyser_Osmosis_Stats(config, a).analyser()
def dl(url, local, logger=OsmoseLog.logger(), min_file_size=10 * 1024): unzip = False convert_pbf = False # file names file_ts = local + ".ts" url_ext = os.path.splitext(url)[1] local_ext = os.path.splitext(local)[1] if (url_ext in [".bz2"]) and (local_ext not in [".bz2"]): file_dl = local + url_ext unzip = True elif (url_ext in [".pbf"]) and (local_ext not in [".pbf"]): file_dl = local + url_ext convert_pbf = True else: file_dl = local request = Request(url) # make the download conditional if os.path.exists(file_dl) and os.path.exists(file_ts): request.add_header("If-Modified-Since", open(file_ts).read()) # request fails with a 304 error when the file wasn't modified try: answer = urlopen(request) except HTTPError as exc: if exc.getcode() == 304: logger.log(u"not newer") return False else: logger.log(u"got error %d" % exc.getcode()) logger.log(u" URL=%s" % url) raise url_ts = answer.headers.get('Last-Modified') file_size = int(answer.headers.get('content-length')) if file_size < min_file_size: # file must be bigger than 100 KB logger.log("File is not big enough: %d B" % file_size) raise SystemError # write the file outfile = open(file_dl, "wb") try: while True: data = answer.read(2048) if len(data) == 0: break outfile.write(data) finally: outfile.close() if file_size != os.path.getsize(file_dl): logger.log( u"error: Download file (%d) not of the expected size (%d) for %s" % (os.path.getsize(file_dl), file_size, url)) os.remove(file_dl) return False # uncompress if unzip: logger.log(u"bunzip2") res = getstatusoutput("bunzip2 -f %s" % file_dl) if res[0]: raise SystemError(res[1]) # convert pbf to osm if convert_pbf: logger.log(u"osmconvert") res = getstatusoutput("%s %s > %s" % (config.bin_osmconvert, file_dl, local)) if res[0]: raise SystemError(res[1]) os.remove(file_dl) # set timestamp open(file_ts, "w").write(url_ts) return True
def dl(url, local, logger=OsmoseLog.logger(), min_file_size=10*1024): unzip = False convert_pbf = False # file names file_ts = local+".ts" url_ext = os.path.splitext(url)[1] local_ext = os.path.splitext(local)[1] if (url_ext in [".bz2"]) and (local_ext not in [".bz2"]) : file_dl = local + url_ext unzip = True elif (url_ext in [".pbf"]) and (local_ext not in [".pbf"]) : file_dl = local + url_ext convert_pbf = True else: file_dl = local request = urllib2.Request(url) # make the download conditional if os.path.exists(file_ts): request.add_header("If-Modified-Since", open(file_ts).read()) # request fails with a 304 error when the file wasn't modified try: answer = urllib2.urlopen(request) except urllib2.HTTPError as exc: if exc.getcode() == 304: logger.log(u"not newer") return False else: logger.log(u"got error %d" % exc.getcode()) logger.log(u" URL=%s" % url) raise url_ts = answer.headers.getheader('Last-Modified') file_size = int(answer.headers.getheader('content-length')) if file_size < min_file_size: # file must be bigger than 100 KB logger.log("File is not big enough: %d B" % file_size) raise SystemError # write the file outfile = open(file_dl, "wb") try: while True: data = answer.read(2048) if len(data) == 0: break outfile.write(data) finally: outfile.close() # uncompress if unzip: logger.log(u"bunzip2") res = getstatusoutput("bunzip2 -f %s"%file_dl) if res[0]: raise SystemError(res[1]) # convert pbf to osm if convert_pbf: logger.log(u"osmconvert") res = getstatusoutput("%s %s > %s" % (config.bin_osmconvert, file_dl, local)) if res[0]: raise SystemError(res[1]) os.remove(file_dl) # set timestamp open(file_ts, "w").write(url_ts) return True
analysers_path = os.path.join(os.path.dirname(__file__), "analysers") if options.list_analyser: for fn in sorted(os.listdir(analysers_path)): if fn.startswith("analyser_") and fn.endswith(".py"): print fn[9:-3] sys.exit(0) if options.list_country: for k in sorted(config.config.keys()): print k sys.exit(0) if options.cron: output = sys.stdout logger = OsmoseLog.logger(output, False) else: output = sys.stdout logger = OsmoseLog.logger(output, True) if options.change_init and not options.change: logger.log(logger.log_av_b+"--change must be specified "+fn[:-3]+logger.log_ap) sys.exit(1) if options.version: print "osmose backend version: %s" % get_version() sys.exit(0) #===================================== # chargement des analysers
def dl(url, local, logger=OsmoseLog.logger(), min_file_size=10 * 1024): unzip = False convert_pbf = False # file names file_ts = local + ".ts" url_ext = os.path.splitext(url)[1] local_ext = os.path.splitext(local)[1] if (url_ext in [".bz2"]) and (local_ext not in [".bz2"]): file_dl = local + url_ext unzip = True elif (url_ext in [".pbf"]) and (local_ext not in [".pbf"]): file_dl = local + url_ext convert_pbf = True else: file_dl = local headers = {} # make the download conditional if os.path.exists(file_dl) and os.path.exists(file_ts): headers["If-Modified-Since"] = open(file_ts).read() # request fails with a 304 error when the file wasn't modified # Retry on 404, workaround Geofabrik update in progress answer = downloader.get( url, headers=headers, session=downloader.requests_retry_session( status_forcelist=downloader.DEFAULT_RETRY_ON + (404, ))) if answer.status_code == 304: logger.log(u"not newer") return False if not answer.ok: logger.log(u"got error %d" % answer.status_code) logger.log(u" URL=%s" % url) answer.raise_for_status() url_ts = answer.headers.get('Last-Modified') file_size = int(answer.headers.get('Content-Length')) if file_size < min_file_size: # file must be bigger than 100 KB logger.log("File is not big enough: %d B" % file_size) raise SystemError # write the file with open(file_dl, "wb") as outfile: for data in answer.iter_content(chunk_size=None): outfile.write(data) if not answer.headers.get( 'Content-Encoding') and file_size != os.path.getsize(file_dl): logger.log( u"error: Download file (%d) not of the expected size (%d) for %s" % (os.path.getsize(file_dl), file_size, url)) os.remove(file_dl) return False # uncompress if unzip: logger.log(u"bunzip2") subprocess.check_output(['bunzip2', '-f', file_dl]) # convert pbf to osm if convert_pbf: logger.log(u"osmconvert") subprocess.check_output("{} {} > {}".format(config.bin_osmconvert, file_dl, local), shell=True) os.remove(file_dl) # set timestamp open(file_ts, "w").write(url_ts) return True
""" class Analyser_Osmosis_Stats(Analyser_Osmosis): def __init__(self, config, logger = None): Analyser_Osmosis.__init__(self, config, logger) def analyser_osmosis_common(self): for t in ("nodes", "ways", "relations"): self.outxml.startElement("stat_users", {"type": t}) self.run(sql_users.format(t), lambda res: {"self": self.stats} ) self.outxml.endElement("stat_users") def stats(self, res): self.outxml.startElement("user", {"user_id":str(res[1]), "user_name":str(res[2])}) self.outxml.Element("count", {"value":str(res[0])}) self.outxml.Element("timestamp", {"min":str(res[3]), "max":str(res[4])}) self.outxml.endElement("user") if __name__=="__main__": country = "france_limousin" class config: db_string = "dbname=osmose" dst = country + ".xml" db_schema = country from modules import OsmoseLog a = OsmoseLog.logger() Analyser_Osmosis_Stats(config, a).analyser()
def dl(url, local, logger=OsmoseLog.logger(), min_file_size=10*1024): unzip = False convert_pbf = False # file names file_ts = local+".ts" url_ext = os.path.splitext(url)[1] local_ext = os.path.splitext(local)[1] if (url_ext in [".bz2"]) and (local_ext not in [".bz2"]) : file_dl = local + url_ext unzip = True elif (url_ext in [".pbf"]) and (local_ext not in [".pbf"]) : file_dl = local + url_ext convert_pbf = True else: file_dl = local headers = {} # make the download conditional if os.path.exists(file_dl) and os.path.exists(file_ts): headers["If-Modified-Since"] = open(file_ts).read() # request fails with a 304 error when the file wasn't modified answer = downloader.get(url, headers=headers) if answer.status_code == 304: logger.log(u"not newer") return False if not answer.ok: logger.log(u"got error %d" % answer.status_code) logger.log(u" URL=%s" % url) answer.raise_for_status() url_ts = answer.headers.get('Last-Modified') file_size = int(answer.headers.get('Content-Length')) if file_size < min_file_size: # file must be bigger than 100 KB logger.log("File is not big enough: %d B" % file_size) raise SystemError # write the file outfile = open(file_dl, "wb") try: for data in answer.iter_content(chunk_size=None): outfile.write(data) finally: outfile.close() if not answer.headers.get('Content-Encoding') and file_size != os.path.getsize(file_dl): logger.log(u"error: Download file (%d) not of the expected size (%d) for %s" % (os.path.getsize(file_dl), file_size, url)) os.remove(file_dl) return False # uncompress if unzip: logger.log(u"bunzip2") subprocess.check_output(['bunzip2', '-f', file_dl]) # convert pbf to osm if convert_pbf: logger.log(u"osmconvert") subprocess.check_output("{} {} > {}".format(config.bin_osmconvert, file_dl, local), shell=True) os.remove(file_dl) # set timestamp open(file_ts, "w").write(url_ts) return True
def main(options): analysers_path = os.path.join(os.path.dirname(__file__), "analysers") if options.list_analyser: for fn in sorted(os.listdir(analysers_path)): if fn.startswith("analyser_") and fn.endswith(".py"): print(fn[9:-3]) return 0 if options.list_country: for k in sorted(config.config.keys()): print(k) return 0 if options.cron: output = sys.stdout logger = OsmoseLog.logger(output, False) else: output = sys.stdout logger = OsmoseLog.logger(output, True) if options.change_init and not options.change: logger.log(logger.log_av_b+"--change must be specified "+logger.log_ap) return 1 #===================================== # Load of analysers err_code = 0 logger.log("osmose backend version: %s" % get_version()) old_path = list(sys.path) sys.path.insert(0, analysers_path) logger.log(logger.log_av_green+"loading analyses "+logger.log_ap) analysers = {} for fn in os.listdir(analysers_path): if fn.startswith("analyser_") and fn.endswith(".py"): if options.analyser and fn[9:-3] not in options.analyser: continue logger.log(" load "+fn[9:-3]) try: analysers[fn[9:-3]] = importlib.import_module("analysers." + fn[:-3]) except ImportError as e: logger.log(e) logger.log("Fails to load analysers {0}".format(fn[:-3])) if options.analyser: count = 0 for k in options.analyser: if k not in analysers: logger.log(logger.log_av_b+"not found "+k+logger.log_ap) count += 1 # user is passing only non-existent analysers if len(options.analyser) == count: logger.log(logger.log_av_b+"No valid analysers specified"+logger.log_ap) return 1 sys.path[:] = old_path # restore previous path #===================================== # analyser for country in options.country: country_conf = config.config[country] # acquire lock try: base = '|'.join(map(str, [country_conf.db_base, country_conf.db_host])) lfil = "/tmp/analyse-{0}-{1}".format(country, base) lock = lockfile(lfil) except: logger.err("can't lock {0} ({1})".format(country, lfil)) if options.cron: sys.stderr.write("can't lock %s\n" % country) for l in open(lfil).read().rstrip().split("\n"): logger.log(" "+l) if options.cron: sys.stderr.write(" "+l+"\n") if options.cron: sys.stderr.flush() err_code |= 0x80 continue country_conf.init() options.diff = not options.change and "diff" in country_conf.download # analyse err_code |= run(country_conf, logger, analysers, options) # free lock del lock logger.log(logger.log_av_green+u"end of analyses"+logger.log_ap) return err_code
def __init__(self, config, logger = OsmoseLog.logger()): Analyser.__init__(self, config, logger)