def __init__(self, polygon_id, cache_delay=60): polygon_url = u"http://polygons.openstreetmap.fr/" url = polygon_url + "index.py?id=" + str(polygon_id) s = downloader.urlread(url, cache_delay) url = polygon_url + "get_wkt.py?params=0&id=" + str(polygon_id) s = downloader.urlread(url, cache_delay) if s.startswith("SRID="): s = s.split(";", 1)[1] self.polygon = loads(s)
def __init__(self, polygon_id, cache_delay=60): polygon_url = u"http://polygons.openstreetmap.fr/" url = polygon_url + "index.py?id="+str(polygon_id) s = downloader.urlread(url, cache_delay) url = polygon_url + "get_wkt.py?params=0&id="+str(polygon_id) s = downloader.urlread(url, cache_delay) if s.startswith("SRID="): s = s.split(";", 1)[1] self.polygon = loads(s)
def deprecated_list(self): wikiRoot = 'https://wiki.openstreetmap.org/wiki' data = urlread(wikiRoot + '/Template:Deprecated_features?action=raw', 1) # Tidy data up for processing # Eliminate wiki bold formatting data = data.replace("'''", "") # Remove HTML newlines data = re.sub(r'<br\s*/>', ' ', data) # Remove excess whitespace (also removes all newlines) data = " ".join(data.split()) # Eliminate any whitespace around pipe characters # This makes reading the template parameters simple data = re.sub(r'\s?\|\s?', '|', data) # Eliminate templates to prevent unexpected pipe characters data = re.sub(r'{{{\s?lang\s?\|?\s?}}}', '', data, flags=re.I) # Tag template can take one or two params, with trailing | possible data = re.sub(r'{{(?:Tag|Key)\s?\|(.+?)\|?\s?}}', lambda x: '`{}`'.format( x.group(1).replace("||", "=").replace("|", "=")), data, flags=re.I) # Resolve interwiki links now data = re.sub( r'\[\[(.+?)\]\]', lambda x: '[{}]({}/{})'.format(x.group(1), wikiRoot, x.group(1).replace(" ", "_")), data) deprecated = {} for feature in data.split(r'{{Deprecated features/item')[1:]: # Unaccounted for template present in this feature if r'{{' in feature: continue src_key, src_val, dest = None, None, None for param in feature.split('|'): if '=' not in param: continue k, v = param.split('=', 1) # k will always start with the param because we removed whitespace around | earlier # We don't use == because there could be space before the = character if (k.startswith('dkey')): src_key = v elif (k.startswith('dvalue')): src_val = v elif (k.startswith('suggestion')): dest = v # Sanity check in case formatting changes or something if any((src_key, src_val, dest)): deprecated.setdefault(src_key, {})[src_val] = dest return deprecated
def deprecated_list(self): data = urlread(u"https://wiki.openstreetmap.org/wiki/Template:Deprecated_features?action=raw&force_cache_20180805", 1) #data = open("Deprecated_features?action=raw").read() data = data.split("{{Deprecated features/item") dkey = re.compile(r"^\s*\|\s*dkey\s*=") dvalue = re.compile(r"\s*dvalue\s*=") suggestion = re.compile(r"^\s*\|\s*suggestion\s*=") dataMult = [] for feature in data[1:]: deprecated_key = None deprecated_value = None deprecated_suggestion = None for line in feature.split("\n"): if dkey.match(line): deprecated_key = line.split("|")[1].split("=")[1].strip() t = line.split("|") if len(t) > 2: if dvalue.match(t[2]): deprecated_value = t[2].split("=")[1].strip() if suggestion.match(line): deprecated_suggestion = line.split("=")[1].strip() dataMult.append((deprecated_key, deprecated_value, deprecated_suggestion)) deprecated = {} for line in dataMult: src_key = self.cleanWiki(line[0]) src_val = self.cleanWiki(line[1]) dest = self.cleanWiki(line[2]) if src_key not in deprecated: deprecated[src_key] = {} deprecated[src_key][src_val] = dest return deprecated
def _get_brands(self): nsi_url_for_brands = "https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/nsi.json" json_str = urlread(nsi_url_for_brands, 30) results = json.loads(json_str) results = results['nsi'] additional_brands = {} for tag, brands in results.items(): if tag.startswith('brands/'): brand_nsi_name = tag[len('brands/'):] for brand in brands: if "locationSet" in brand: if "include" in brand["locationSet"] and self.country_code not in brand["locationSet"]["include"] and "001" not in brand["locationSet"]["include"]: continue if "exclude" in brand["locationSet"] and self.country_code in brand["locationSet"]["exclude"]: continue if "matchTags" in brand: for additional_tag in brand["matchTags"]: nsi_key = "{}|{}".format(additional_tag, brand["tags"]["name"]) additional_brands[nsi_key.lower()] = brand if "matchNames" in brand: for additional_name in brand["matchNames"]: nsi_key = "{}|{}".format(brand_nsi_name, additional_name) additional_brands[nsi_key.lower()] = brand if "name" in brand["tags"]: additional_brands["{}|{}".format(brand_nsi_name, brand["tags"]["name"]).lower()] = brand additional_brands["{}|{}".format(brand_nsi_name, brand["displayName"]).lower()] = brand return additional_brands
def _get_brands(self): nsi_url_for_brands = "https://raw.githubusercontent.com/osmlab/name-suggestion-index/master/dist/brands.json" json_str = urlread(nsi_url_for_brands, 30) results = json.loads(json_str) additional_brands = {} for brand_nsi_name, brand in results["brands"].items(): if "locationSet" in brand: if "include" in brand[ "locationSet"] and self.country_code not in brand[ "locationSet"]["include"] and "001" not in brand[ "locationSet"]["include"]: continue if "exclude" in brand[ "locationSet"] and self.country_code in brand[ "locationSet"]["exclude"]: continue brand_nsi_name = brand_nsi_name.split("~")[0] if "matchTags" in brand: for additional_tag in brand["matchTags"]: nsi_key = "{}|{}".format(additional_tag, brand_nsi_name.split("|")[1]) additional_brands[nsi_key.lower()] = brand if "matchNames" in brand: for additional_name in brand["matchNames"]: nsi_key = "{}|{}".format( brand_nsi_name.split("|")[0], additional_name) additional_brands[nsi_key.lower()] = brand additional_brands[brand_nsi_name.lower()] = brand return additional_brands
def list_postcode(self): reline = re.compile("^[-CAN ]+$") # remline = re.compile("^[-CAN ]+ *\([-CAN ]+\)$") data = urlread( u"https://en.wikipedia.org/wiki/List_of_postal_codes?action=raw", 1) data = filter( lambda t: len(t) > 2 and (t[1] != "- no codes -" or t[2] != ""), map(lambda x: list(map(lambda y: y.strip(), x.split("|")))[5:8], data.split("|-")[1:-1])) postcode = {} for line in data: iso = line[0][0:2] format_area = line[1] format_street = line[2] # note = line[3] postcode[iso] = {} if format_area != '': regex_area = self.parse_format(reline, format_area) if regex_area: postcode[iso]['area'] = regex_area if format_street != '': regex_street = self.parse_format(reline, format_street) if regex_street: postcode[iso]['street'] = regex_street else: postcode[iso]['street'] = None return postcode
def list_postcode(self): reline = re.compile("^[-CAN ]+$") remline = re.compile("^[-CAN ]+ *\([-CAN ]+\)$") data = urlread("http://en.wikipedia.org/wiki/List_of_postal_codes?action=raw", 1) data = filter(lambda t: len(t)>2 and t[1] != "- no codes -", map(lambda x: map(lambda y: y.strip(), x.split("|"))[5:8], data.split("|-")[1:-1])) postcode = {} for line in data: iso = line[0][0:2] format = line[1] note = line[2] if format[-1] == ')': format = map(lambda x: x.strip(), format[:-1].split('(')) else: format = [format] regexs = [] for f in format: if reline.match(f): regex = f.replace("N", "[0-9]").replace("A", "[A-Z]").replace("CC", self.Country) regexs.append(regex) if len(regexs) > 1: postcode[iso] = "^\("+("\)|\(".join(regexs))+"\)$" elif len(regexs) == 1: postcode[iso] = "^"+regexs[0]+"$" return postcode
def deprecated_list(self): data = urlread("http://wiki.openstreetmap.org/wiki/Template:Deprecated_features?action=raw", 1) #data = open("Deprecated_features?action=raw").read() data = data.split("{{Deprecated_features/item") dataMult = [] for feature in data[1:]: deprecated_key = None deprecated_value = None deprecated_suggestion = None for line in feature.split("\n"): if line.startswith("| dkey=") or line.startswith("| dkey ="): deprecated_key = line.split("|")[1].split("=")[1] t = line.split("|") if len(t) > 2: t = t[2].strip() if t.startswith("dvalue=") or t.startswith("dvalue ="): deprecated_value = t.split("=")[1] if line.startswith("| suggestion=") or line.startswith("| suggestion ="): deprecated_suggestion = line.split("=")[1] dataMult.append((deprecated_key, deprecated_value, deprecated_suggestion)) deprecated = {} for line in dataMult: src_key = self.cleanWiki(line[0]) src_val = self.cleanWiki(line[1]) dest = self.cleanWiki(line[2]) if src_key not in deprecated: deprecated[src_key] = {} deprecated[src_key][src_val] = dest return deprecated
def black_list(self): wikidata_query_for_chain_store = u"https://query.wikidata.org/sparql?query=SELECT%20DISTINCT%20%3Fitem%20%3FitemLabel%20WHERE%20{%0A%20{%20%3Fitem(wdt%3AP31%2Fwdt%3AP279*)wd%3AQ507619%20}%20UNION%20{%20%3Fitem(wdt%3AP31%2Fwdt%3AP279*)%20wd%3AQ1631129%20}%0A%20SERVICE%20wikibase%3Alabel%20{%20bd%3AserviceParam%20wikibase%3Alanguage%20%22[AUTO_LANGUAGE]%2Cen%22.%20}%0A}&format=json" json_str = urlread(wikidata_query_for_chain_store, 30) results = json.loads(json_str) should_be_brand = [elem['item']['value'].split('/')[-1] for elem in results['results']['bindings']] return should_be_brand
def load_poly(poly): try: print poly s = downloader.urlread(poly, 1) return parse_poly(s.split("\n")) except IOError as e: print e return
def load_poly(poly): try: print(poly) s = downloader.urlread(poly, 1) return parse_poly(s.split('\n')) except IOError as e: print(e) return
def liste_des_arbres_fruitiers(self): reline = re.compile("\[\[([^:]*)$") data = urlread(u"https://fr.wikipedia.org/wiki/Liste_des_arbres_fruitiers?action=raw", 1) #data = open(u"Liste_des_arbres_fruitiers?action=raw").read() data = data.split("]]") for line in data: for res in reline.findall(line): for n in res.split('|'): self.Tree[self.normalize(n)] = {'species:fr':res}
def liste_des_essences_europennes(self): reline = re.compile("^\* \[\[([^]]*)\]\][^[]*\[\[([^]]*)\]\][^[]*(?:\[\[([^]]*)\]\][^[]*)?(?:\[\[([^]]*)\]\][^[]*)?") data = urlread(u"https://fr.wikipedia.org/wiki/Liste_des_essences_forestières_européennes?action=raw", 1) #data = open(u"Liste_des_essences_forestières_européennes?action=raw").read() data = data.split("\n") for line in data: for res in reline.findall(line): for n in res[0].split('|'): self.Tree[self.normalize(n)] = {'genus':res[1], 'species':'|'.join(res[2:3]), 'species:fr':res[0]}
def init(self, logger): Plugin.init(self, logger) country = self.father.config.options.get("country") if self.father else None language = self.father.config.options.get("language") if self.father else None if isinstance(language, list): language = None elif language: language = language.split('_')[0] self._update_ks = {} self._update_kr = {} self._update_ks_vs = defaultdict(dict) self._update_kr_vs = defaultdict(dict) self._update_ks_vr = defaultdict(dict) self._update_kr_vr = defaultdict(dict) reline = re.compile("^\|([^|]*)\|\|([^|]*)\|\|([^|]*)\|\|([^|]*).*") # récupération des infos depuis https://wiki.openstreetmap.org/index.php?title=User:FrViPofm/TagwatchCleaner data = urlread(u"https://wiki.openstreetmap.org/index.php?title=User:FrViPofm/TagwatchCleaner&action=raw", 1) data = data.split("\n") for line in data: for res in reline.findall(line): only_for = res[3].strip() if only_for in (None, '', country, language) or (country and country.startswith(only_for)): r = res[1].strip() c0 = res[2].strip() tags = ["fix:chair"] if c0 == "" else [c0, "fix:chair"] c = stablehash(c0) self.errors[c] = self.def_class(item = 3030, level = 2, tags = tags, title = {'en': c0}, detail = T_( '''Simple and frequent errors, the list is available [here](https://wiki.openstreetmap.org/wiki/User:FrViPofm/TagwatchCleaner).''')) if u"=" in res[0]: k = res[0].split(u"=")[0].strip() v = res[0].split(u"=")[1].strip() if self.quoted(k): k = self.quoted2re(k) if self.quoted(v): self._update_kr_vr[k][self.quoted2re(v)] = [r, c] else: self._update_kr_vs[k][v] = [r, c] else: if self.quoted(v): self._update_ks_vr[k][self.quoted2re(v)] = [r, c] else: self._update_ks_vs[k][v] = [r, c] else: if self.quoted(res[0]): self._update_kr[self.quoted2re(res[0])] = [r, c] else: self._update_ks[res[0]] = [r, c]
def init(self, logger): Plugin.init(self, logger) country = self.father.config.options.get("country") if self.father else None language = self.father.config.options.get("language") if self.father else None if not isinstance(language, basestring): language = None self._update_ks = {} self._update_kr = {} self._update_ks_vs = defaultdict(dict) self._update_kr_vs = defaultdict(dict) self._update_ks_vr = defaultdict(dict) self._update_kr_vr = defaultdict(dict) reline = re.compile("^\|([^|]*)\|\|([^|]*)\|\|([^|]*)\|\|([^|]*).*") # récupération des infos depuis http://wiki.openstreetmap.org/index.php?title=User:FrViPofm/TagwatchCleaner data = urlread("http://wiki.openstreetmap.org/index.php?title=User:FrViPofm/TagwatchCleaner&action=raw", 1) data = data.split("\n") for line in data: for res in reline.findall(line): only_for = res[3].strip() if only_for in (None, '', country, language): r = res[1].strip() c0 = res[2].strip() tags = ["fix:chair"] if c0 == "" else [c0, "fix:chair"] c = self.stablehash(c0.encode("utf8")) self.errors[c] = { "item": 3030, "level": 2, "tag": tags, "desc": {"en": c0} } if u"=" in res[0]: k = res[0].split(u"=")[0].strip() v = res[0].split(u"=")[1].strip() if self.quoted(k): k = self.quoted2re(k) if self.quoted(v): self._update_kr_vr[k][self.quoted2re(v)] = [r, c] else: self._update_kr_vs[k][v] = [r, c] else: if self.quoted(v): self._update_ks_vr[k][self.quoted2re(v)] = [r, c] else: self._update_ks_vs[k][v] = [r, c] else: if self.quoted(res[0]): self._update_kr[self.quoted2re(res[0])] = [r, c] else: self._update_ks[res[0]] = [r, c]
def list_postcode(self): reline = re.compile("^[-CAN ]+$") # remline = re.compile("^[-CAN ]+ *\([-CAN ]+\)$") data = urlread("http://en.wikipedia.org/wiki/List_of_postal_codes?action=raw", 1) data = filter(lambda t: len(t)>2 and t[1] != "- no codes -", map(lambda x: map(lambda y: y.strip(), x.split("|"))[5:8], data.split("|-")[1:-1])) postcode = {} for line in data: iso = line[0][0:2] format_area = line[1] format_street = line[2] # note = line[3] postcode[iso] = {} if format_area != '': postcode[iso]['area'] = self.parse_format(reline, format_area) if format_street != '': postcode[iso]['street'] = self.parse_format(reline, format_street) return postcode
def deprecated_list(self): data = urlread("http://wiki.openstreetmap.org/wiki/Deprecated_features?action=raw", 1) #data = open("Deprecated_features?action=raw").read() data = data[:data.index('\n|}\n')].split("|-") dataMult = [] for line in data[2:]: item = line[2:].split(" || ") ss = item[1].replace('<br />', '<br/>').split('<br/>') for s in ss: dataMult.append([s, item[3]]) deprecated = {} for line in dataMult: src = self.cleanWiki(line[0]) dest = self.cleanWiki(line[1]) s = src.split('=') if s[0] not in deprecated: deprecated[s[0]] = {} if len(s) == 2: deprecated[s[0]][s[1]] = dest else: deprecated[s[0]][None] = dest return deprecated
def _get_frequent_names(self): nsi_url_for_names = "https://raw.githubusercontent.com/osmlab/name-suggestion-index/master/dist/names_keep.json" json_str = urlread(nsi_url_for_names, 30) results = json.loads(json_str) return set([elem.lower() for elem in results.keys()])
def _download_nsi(self): nsi_url = "https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/nsi.json" json_str = urlread(nsi_url, 30) results = json.loads(json_str) return results['nsi']
def analyse(self, tags, wikipediaTag="wikipedia"): err=[] if wikipediaTag in tags: m = self.wiki_regexp.match(tags[wikipediaTag]) if (tags[wikipediaTag].startswith("http://") or tags[wikipediaTag].startswith("https://")) and not m: # tag 'wikipedia' starts with 'http://' but it's not a wikipedia url return [{"class": 30310, "subclass": 0}] elif m: # tag 'wikipedia' seams to be an url return [{"class": 30311, "subclass": 1, "text": T_(u"Use wikipedia=%s:*", m.group(2)), "fix": {wikipediaTag: "%s:%s" % (m.group(2), self.human_readable(m.group(3)))} }] if not self.lang_regexp.match(tags[wikipediaTag]): err.append({"class": 30312, "subclass": 2}) else: prefix = tags[wikipediaTag].split(':', 1)[0] tag = wikipediaTag+':'+prefix if tag in tags: err.append({"class": 30316, "subclass": 6, "fix": {'-': [tag]}}) if "%" in tags[wikipediaTag] or "_" in tags[wikipediaTag]: err.append({"class": 30313, "subclass": 3, "fix": {wikipediaTag: self.human_readable(tags[wikipediaTag])}} ) interwiki = False missing_primary = [] for tag in [t for t in tags if t.startswith(wikipediaTag+":")]: suffix = tag[len(wikipediaTag)+1:] if ":" in suffix: suffix = suffix.split(":")[0] if wikipediaTag in tags: if interwiki == False: try: lang, title = tags[wikipediaTag].split(':') json_str = urlread("http://"+lang+".wikipedia.org/w/api.php?action=query&prop=langlinks&titles="+urllib.quote(title.encode('utf-8'))+"&redirects=&lllimit=500&format=json" , 30) interwiki = json.loads(json_str) interwiki = dict(map(lambda x: [x["lang"], x["*"]], interwiki["query"]["pages"].values()[0]["langlinks"])) except: interwiki = None if interwiki and suffix in interwiki and interwiki[suffix] == self.human_readable(tags[tag]): err.append({"class": 30317, "subclass": 7, "fix": [ {'-': [tag]}, {'-': [tag], '~': {wikipediaTag: suffix+':'+interwiki[suffix]}} ]}) if suffix in tags: # wikipedia:xxxx only authorized if tag xxxx exist err.extend(self.analyse(tags, wikipediaTag+":"+suffix)) elif self.lang_restriction_regexp.match(suffix): if not wikipediaTag in tags: m = self.wiki_regexp.match(tags[tag]) if m: value = self.human_readable(m.group(3)) elif tags[tag].startswith(suffix+":"): value = tags[tag][len(suffix)+1:] else: value = self.human_readable(tags[tag]) missing_primary.append({'-': [tag], '+':{wikipediaTag: "%s:%s" % (suffix, value)}}) else: err.append({"class": 30315, "subclass": 5, "text": T_(u"Invalid wikipedia suffix '%s'", suffix) }) if missing_primary != []: if self.Language: missing_primary = sorted(missing_primary, key=lambda x: x['+'][wikipediaTag][0:2] if x['+'][wikipediaTag][0:2] != self.Language else '') err.append({"class": 30314, "subclass": 4, "fix": missing_primary}) return err
def analyse(self, tags, wikipediaTag="wikipedia"): err = [] if wikipediaTag in tags: m = self.wiki_regexp.match(tags[wikipediaTag]) if (tags[wikipediaTag].startswith(u"http://") or tags[wikipediaTag].startswith(u"https://")) and not m: # tag 'wikipedia' starts with 'http://' but it's not a wikipedia url return [{"class": 30310, "subclass": 0}] elif m: # tag 'wikipedia' seams to be an url return [{ "class": 30311, "subclass": 1, "text": T_("Use wikipedia={0}:*", m.group(2)), "fix": { wikipediaTag: "{0}:{1}".format(m.group(2), self.human_readable(m.group(3))) } }] if not self.lang_regexp.match(tags[wikipediaTag]): err.append({"class": 30312, "subclass": 2}) else: prefix = tags[wikipediaTag].split(':', 1)[0] tag = wikipediaTag + ':' + prefix if tag in tags: err.append({ "class": 30316, "subclass": 6, "fix": { '-': [tag] } }) if "%" in tags[wikipediaTag] or "_" in tags[wikipediaTag]: err.append({ "class": 30313, "subclass": 3, "fix": { wikipediaTag: self.human_readable(tags[wikipediaTag]) } }) interwiki = False missing_primary = [] for tag in [t for t in tags if t.startswith(wikipediaTag + ":")]: suffix = tag[len(wikipediaTag) + 1:] if ":" in suffix: suffix = suffix.split(":")[0] if self.Country and self.Country.startswith( "UA" ) and suffix == "ru": # In Ukraine wikipedia=uk:X + wikipedia:ru=Y are allowed continue if wikipediaTag in tags: if interwiki is False: try: lang, title = tags[wikipediaTag].split(':') json_str = urlread( u"https://" + lang + u".wikipedia.org/w/api.php?action=query&prop=langlinks&titles=" + title + u"&redirects=&lllimit=500&format=json", 30) interwiki = json.loads(json_str) interwiki = dict( map( lambda x: [x["lang"], x["*"]], list(interwiki["query"]["pages"].values())[0] ["langlinks"])) except: interwiki = None if interwiki and suffix in interwiki and interwiki[ suffix] == self.human_readable(tags[tag]): err.append({ "class": 30317, "subclass": stablehash64(tag), "fix": [{ '-': [tag] }, { '-': [tag], '~': { wikipediaTag: suffix + ':' + interwiki[suffix] } }] }) if suffix in tags: # wikipedia:xxxx only authorized if tag xxxx exist err.extend(self.analyse(tags, wikipediaTag + ":" + suffix)) elif self.lang_restriction_regexp.match(suffix): if not wikipediaTag in tags: m = self.wiki_regexp.match(tags[tag]) if m: value = self.human_readable(m.group(3)) elif tags[tag].startswith(suffix + ":"): value = tags[tag][len(suffix) + 1:] else: value = self.human_readable(tags[tag]) missing_primary.append({ '-': [tag], '+': { wikipediaTag: "{0}:{1}".format(suffix, value) } }) else: err.append({ "class": 30315, "subclass": stablehash64(tag), "text": T_("Invalid wikipedia suffix '{0}'", suffix) }) if missing_primary != []: if self.Language: missing_primary = sorted( missing_primary, key=lambda x: x['+'][wikipediaTag][0:2] if x['+'][wikipediaTag][0:2] != self.Language.split('_')[ 0] else '') err.append({"class": 30314, "subclass": 4, "fix": missing_primary}) return err