import re from path import project_path from q21 import load_uk from q25 import read_info from q25 import info_to_dict from q26 import rm_emph from q27 import rm_link def rm_tags(text): text = re.sub(r"<br />", r"", text) text = re.sub(r"<([^>\s]+)(\s[^>]+)*(>[^<]*</\1>|\s*/>)", r"", text) return text if __name__ == "__main__": print("\rSolving Q28 ... ", end="") x = info_to_dict(rm_tags(rm_link(rm_emph(read_info(load_uk()))))) with open(project_path / "output/wiki/28.txt", "w") as f: f.write("\n".join("{} {}".format(k, v) for k, v in x.items())) print("Done.")
import re from path import project_path from q21 import load_uk if __name__ == "__main__": print("\rSolving Q24 ... ", end="") r = re.findall(r"\[\[ファイル:(.+?)\|", load_uk()) with open(project_path / "output/wiki/24.txt", "w") as f: f.write("\n".join(r)) print("Done.")
return re.findall(r"{{基礎情報 国\|((?:[^{{}}]*{{[^{{}}]*}})*[^{{}}]*)}}", text)[0] def info_to_dict(info): x = info r0 = r"({{[^}\|]+)\|([^}]+}})" r1 = r"\1<PIPE>\2" while x != re.sub(r0, r1, x): x = re.sub(r0, r1, x) r0 = r"(\[\[[^\]\|]+)\|([^\]]+\]\])" r1 = r"\1<PIPE>\2" while x != re.sub(r0, r1, x): x = re.sub(r0, r1, x) items = [i.replace("<PIPE>", "|") for i in re.split(r"\|", x)] return dict([re.findall(r"(\S+)\s*=\s*(.+)", item)[0] for item in items]) if __name__ == "__main__": print("\rSolving Q25 ... ", end="") r = info_to_dict(read_info(load_uk())) with open(project_path / "output/wiki/25.txt", "w") as f: f.write("\n".join("{} {}".format(k, v) for k, v in r.items())) print("Done.")
import requests from path import project_path from q21 import load_uk from q25 import read_info from q25 import info_to_dict if __name__ == "__main__": print("\rSolving Q29 ... ", end="") url = "https://en.wikipedia.org/w/api.php" params = { "action": "query", "prop": "imageinfo", "iiprop": ["url"], "format": "json", "titles": "File:{}".format(info_to_dict(read_info(load_uk()))["国旗画像"]), } r = requests.get(url=url, params=params).json() r = r["query"]["pages"]["23473560"]["imageinfo"][0]["url"] with open(project_path / "output/wiki/29.txt", "w") as f: f.write(r) print("Done.")
import re from path import project_path from q21 import load_uk from q25 import read_info from q25 import info_to_dict def rm_emph(text): return re.sub(r"('{2,})([^']+)\1", r"\2", text) if __name__ == "__main__": print("\rSolving Q26 ... ", end="") r = info_to_dict(rm_emph(read_info(load_uk()))) with open(project_path / "output/wiki/26.txt", "w") as f: f.write("\n".join("{} {}".format(k, v) for k, v in r.items())) print("Done.")
import re from path import project_path from q21 import load_uk if __name__ == "__main__": print("\rSolving Q22 ... ", end="") r = re.findall(r"\[\[Category:([^\|]+?)(?:|\|.+)\]\]", load_uk()) with open(project_path / "output/wiki/22.txt", "w") as f: f.write("\n".join(r)) print("Done.")
import re from path import project_path from q21 import load_uk if __name__ == "__main__": print("\rSolving Q23 ... ", end="") r = re.findall(r"(={2,})\s?([^=]+)\s?\1", load_uk()) with open(project_path / "output/wiki/23.txt", "w") as f: f.write("\n".join("{} {}".format(len(s[0]), s[1]) for s in r)) print("Done.")