def main(): """Main method.""" maintenance = MerimeeTypeMaintenance() merimee_category = Category(site, title=u'Mérimée without type parameter') pages_generator = merimee_category.articles() for page in pages_generator: #page = pages_generator.next() maintenance.process_page(page)
def _getAllCategoryPages(self): page = pywikibot.Page(self.site, "Category:Meetup on " + self.date) pageText = u"[[Category:Meetup in " + re.sub("-\d{2}$", "", self.date) + u"]]" self._pageWrite(page, pageText) cat = Category(self.site, "Meetup on " + self.date) articleList = cat.articlesList() return articleList
def redirect_cat(cat: pywikibot.Category, target: pywikibot.Category, summary: str) -> None: """ Redirect a category to another category. @param cat: Category to redirect @param target: Category redirect target @param summary: Edit summary """ tpl = Template('Category redirect') tpl.add('1', target.title(with_ns=False)) cat.text = str(tpl) cat.save(summary=summary)
def getArticles(self, category, recurse=False): if category.startswith("Category:"): category = category.split(":")[1] #if not isinstance(category, unicode): # category = unicode(category, "utf-8") baseDir = "categoryArticlesCache/" if not os.path.exists(baseDir): os.makedirs(baseDir) fname = baseDir+category if recurse: fname = baseDir+category fname = fname if self.printMode: try: print(fname) except: print("error printing fname") if os.path.isfile(fname): lines = [] try: with codecs.open(fname) as f: lines = [line.strip() for line in f.readlines()] except: with codecs.open(fname, encoding='utf-8') as f: lines = [line.strip() for line in f.readlines()] if lines!=[]: return lines site = Site("en") cat = "" try: cat = Category(site, title=category) except: cat = Category(site, title=category.decode("utf-8")) articles = cat.articles(namespaces = 0, recurse=recurse) res = [article.title() for article in articles] #print res text = "" for cat in res: text += cat+"\n" try: with codecs.open(fname, "a+") as f: #print text #print type(text) f.write(text) except: with codecs.open(fname, "a+") as f: f.write(text.encode('utf-8')) return res
def get_lemma_str_from_cat(self, category: str) -> List[str]: page = Category(self.wiki, category) cat_list = [ str(lemma).strip("[]")[2:] for lemma in CategorizedPageGenerator(page) ] return cat_list
def main(): data = '20110310' site = pywikibot.getSite() cat = Category(site, 'Kategoria:francuski (indeks)') lista = pagegenerators.CategorizedPageGenerator(cat) #lista_stron1 = xmlreader.XmlDump('plwiktionary-%s-pages-articles.xml' % data) #lista = xmlreader.XmlDump.parse(lista_stron1) for a in lista: h = Haslo(a.title()) #h = HasloXML(a.title, a.text) if h.type != 4 and ' ' in h.title: h.langs() for c in h.list_lang: c.pola() if c.type != 2 and c.lang == 'hiszpański': if ('rzeczownik' in c.znaczenia.tresc) and ( 'rzeczownika' not in c.znaczenia.tresc): print('\n' + h.title) text = '*[[%s]]\n' % h.title file = open("log/rzeczownik.txt", 'a') file.write(text.encode("utf-8")) file.close
def _db_get_new_category_pages( category: pywikibot.Category, start_time: pywikibot.Timestamp, end_time: pywikibot.Timestamp, namespaces: List[int], ) -> Iterator[Tuple[pywikibot.page.BasePage, datetime]]: """Use DB to list category pages. Called by get_new_categoryPages()""" if not wmcs: raise ConnectionError query = ("SELECT page_namespace, page_title, cl_timestamp " "FROM " " categorylinks " " JOIN page ON page_id = cl_from " "WHERE " ' cl_to = "{catname}" AND ' ' cl_type = "page" AND ' " cl_timestamp >= {start_timestamp} AND " " cl_timestamp < {end_timestamp} AND " " page_namespace in ({nslist}) " "ORDER BY cl_timestamp ").format( catname=category.title(underscore=True, with_ns=False), start_timestamp=start_time.totimestampformat(), end_timestamp=end_time.totimestampformat(), nslist=", ".join(str(n) for n in namespaces), ) for ns, title, ts in pywikibot.data.mysql.mysql_query( query, dbname=site.dbName()): yield ( pywikibot.Page(site, title=title.decode(encoding="utf-8"), ns=ns), ts, )
def find_discussion(self, category: pywikibot.Category) -> 'CfdPage': """ Return the relevant discussion. @param category: The category being discussed """ if self.section(): return self text = removeDisabledParts(self.text, tags=EXCEPTIONS, site=self.site) wikicode = mwparserfromhell.parse(text, skip_style_tags=True) for section in wikicode.get_sections(levels=[4]): heading = section.filter_headings()[0] section_title = str(heading.title).strip() discussion = self.__class__( self.site, '{}#{}'.format(self.title(), section_title)) if category.title() == section_title: return discussion # Split approximately into close, nom, and others. parts = str(section).split('(UTC)') if len(parts) < 3: continue # Parse the nom for category links. nom = mwparserfromhell.parse(parts[1], skip_style_tags=True) for node in nom.ifilter(): page = self._cat_from_node(node) if page and category == page: return discussion return self
def _api_get_new_category_pages( category: pywikibot.Category, start_time: pywikibot.Timestamp, end_time: pywikibot.Timestamp, namespaces: List[int], ) -> Iterator[Tuple[pywikibot.page.BasePage, pywikibot.Timestamp]]: """Use API to list category pages. Called by get_new_categoryPages()""" for row in pywikibot.data.api.ListGenerator( "categorymembers", site=site, cmtitle=category.title(underscore=True, with_ns=True), cmprop="title|type|timestamp", cmnamespace="|".join(str(n) for n in namespaces), cmtype="page", cmsort="timestamp", cmstart=start_time.isoformat(), cmend=end_time.isoformat(), ): if row.get("type", "page") != "page": continue yield ( pywikibot.Page(site, title=row.get("title", ""), ns=row.get("ns", "")), pywikibot.Timestamp.fromISOformat(row.get("timestamp")), )
def db_get_usage(cat: pywikibot.Category, depth: int) -> UsageResult: query = """ SELECT page_title, count(*) FROM categorylinks JOIN page ON cl_from = page_id LEFT JOIN globalimagelinks ON page_title = gil_to JOIN image ON img_name = page_title WHERE cl_to IN %(cats)s AND img_major_mime = "image" AND img_minor_mime != "svg+xml" GROUP BY page_title ORDER BY count(*) DESC """ conn = toolforge.connect("commonswiki") with conn.cursor() as cur: total = cur.execute( query, args={ "cats": [ cat.title(with_ns=False, underscore=True) for cat in list_cats(cat, depth) ] }, ) data = cast(List[Tuple[bytes, int]], cur.fetchall()) return UsageResult( [ FileUsage(f"File:{str(page, encoding='utf-8')}", count) for page, count in data ][:200], total, [], )
def getBoundedCat(self, cat, currMinSize): print() print("getBoundedCat") site = Site("en") try: catObj = Category(site, title=cat) except: catObj = Category(site, title=cat.decode("utf-8")) subCats = self.recursiveCats(catObj) articleSet = set() for subCat in subCats: print("inside subCat", subCat) newArts = set(self.getArticles(subCat, recurse=False)) articleSet.update(newArts) print(len(articleSet)) if len(articleSet) > currMinSize: print("break") return currMinSize #continue return len(articleSet)
def main(): site = pywikibot.getSite() cat = Category(site, 'Kategoria:francuski (indeks)') lista = pagegenerators.CategorizedPageGenerator(cat, start='tænia') for a in lista: h = Haslo(a.title()) if h.typ == 3: h.sekcje() for c in h.lista_sekcje: if 'francuski' in c.jezyk: print('\n' + h.tytul) c.pola() print(c.wymowa.tresc)
def getPagesTitleFromCategorie(site, categories): pages = [] cats = [( Category(site, c['title']), c['namespace'] if ("namespace" in c) else None, c['recurse'] if ("recurse" in c) else 0, ) for c in categories] # retrieve all pages from categories for (cat, ns, r) in cats: pages.append(cat.title()) log("Retrieve pages from %s" % cat.title()) # add pages to sync of this categorie pages.extend(mapTitle(cat.articles(namespaces=ns, recurse=r))) return pages
def main(): sekcje = [] sekcje.append('{{odmiana') sekcje.append('{{etymologia}}') sekcje.append('{{wymowa}}') sekcje.append('{{znaczenia}}') sekcje.append('{{przykłady}}') sekcje.append('{{składnia}}') sekcje.append('{{kolokacje}}') sekcje.append('{{pokrewne}}') sekcje.append('{{frazeologia}}') sekcje.append('{{uwagi}}') sekcje.append('{{synonimy}}') sekcje.append('{{antonimy}}') sekcje.append('{{źródła}}') site = pywikibot.getSite() cat = Category(site, 'Kategoria:łaciński (indeks)') lista_stron = pagegenerators.CategorizedPageGenerator(cat) #lista_stron = pagegenerators.AllpagesPageGenerator(namespace = 0, includeredirects = False) '''lista_stron2 = [] for p in lista_stron: if u'Wikisłownik:' not in p.title and u'Szablon:' not in p.title and u'Kategoria:' not in p.title and u'Wikipedysta:' not in p.title and u'Aneks:' not in p.title and u'Indeks:' not in p.title and u'MediaWiki:' not in p.title and u'Portal:' not in p.title and u'Indeks:' not in p.title and u'#TAM' not in p.text and u'#PATRZ' not in p.text and u'Pomoc:' not in p.title and u'#REDIRECT' not in p.text and u'sentencja łacińska' not in p.text and u'#patrz' not in p.text and u'#tam' not in p.text: #if u'{{język francuski}}' in p.text: lista_stron2.append(p) sekcje_join ='|'.join(map(re.escape, sekcje)) szukany_tekst = re.compile(u'{{odmiana}}.*\n{{składnia}}') ''' out = '' for page in lista_stron: text = page.get() if '{{przykłady}}' not in text: print('*[[' + page.title() + ']]') out = out + '*[[' + page.title() + ']]\n' filename = "output-sprzątanie.txt" file = open(filename, 'w') file.write(out.encode("utf-8")) file.close
def load_files(categories, depth): """ Returns a list of unique files in categories @param categories: List of Commons category names as strings @type categories: list @param depth: Category recursion depth @type depth: int @rtype: list """ files = set() for cat in categories: cat = Category(commons, cat) generator = CategorizedPageGenerator(cat, recurse=depth, namespaces=Namespace.FILE) for page in generator: files.add(page.title(withNamespace=False)) return list(files)
def main(): site = pywikibot.getSite() cat = Category(site, 'Kategoria:francuski (indeks)') lista = pagegenerators.CategorizedPageGenerator(cat) for a in lista: h = Haslo(a.title()) if h.typ == 3: h.sekcje() for c in h.lista_sekcje: if 'francuski' in c.jezyk: print('\n' + h.tytul) c.pola() print(c.przyklady.tresc) if (c.przyklady.tresc == '\n: (1.1)' or c.przyklady.tresc == '\n: (1.1) ') and ( 'rzeczownik' not in c.znaczenia.tresc) and ( '{{forma czasownika|fr}}' not in c.znaczenia.tresc): text = '*[[%s]]\n' % h.tytul file = open("log/ex.txt", 'a') file.write(text.encode("utf-8")) file.close
def main(): site = pywikibot.getSite() cat = Category(site, 'Kategoria:esperanto (indeks)') lista = pagegenerators.CategorizedPageGenerator(cat, start='anemi') #, start=u'abduktoro' re_etymn = re.compile(r'\{\{etymn\|eo\|(.*?)\}\}') re_etymn_nr = re.compile( r'(\:\s*?\([0-9]\.[0-9]\))\s*?\{\{etymn\|eo\|(.*?)\}\}(.*?)\n') czesciMowy = [ 'rzeczownik', 'czasownik', 'przymiotnik', 'przysłówek', 'spójnik', 'liczebnik', 'zaimek', 'wykrzyknik', 'partykuła' ] #lista = [pywikibot.Page(site, u'aboc')] for word in lista: a = word.title() h = Haslo(a) if h.type == 3: morfem = 0 etymn = 0 pochodne = 0 pokrewne = 0 inneCzesci = 0 skrot = 0 for b in h.listLangs: b.pola() morfologia = '' if (b.type == 1 or b.type == 10) and b.lang == 'esperanto' and b.znaczeniaDetail: b.etymologia.numer() for c in b.znaczeniaDetail: if c[0] == '{{morfem|eo}}' or c[ 0] == '{{morfem|eo|przedrostkowy}}': morfem = 1 if any(e in c[0] for e in czesciMowy): inneCzesci = 1 if 'skrót' in c[0]: skrot = 1 wordCount = len(b.title.split()) if '{{pochodne}}' in b.content: pochodne = 1 if '{{etymn' in b.etymologia.text: etymn = 1 try: b.pokrewne except AttributeError: pass else: pokrewne = 1 #print u'type = %d, morfem = %d, inneCzesci = %d, skrot = %d, pochodne = %d, etymn = %d, pokrewne = %d' % (b.type, morfem, inneCzesci, skrot, pochodne, etymn, pokrewne) if b.type == 1 and not morfem and not pochodne and pokrewne and ( etymn or wordCount > 1 or skrot): if b.etymologia.type == 2: for elem in b.etymologia.list: s_etymn = re.findall(re_etymn, elem[1]) elem[1] = re.sub(re_etymn, '', elem[1]) if s_etymn: morfologia += '\n' + elem[0] for c in s_etymn: morfologia += ' {{morfeo|%s}}' % (c) if elem[1].strip() == '': elem[0] = '' elem[1] = '' else: elem[1] = elem[1].strip(' ') elem[1] = ' ' + elem[1] b.dodatki.text += '\n{{morfologia}}' + morfologia b.saveChanges() elif b.etymologia.type == 1: s_etymn = re.findall(re_etymn, b.etymologia.text) b.etymologia.text = re.sub(re_etymn, '', b.etymologia.text) b.etymologia.text = b.etymologia.text.strip(' ') if b.etymologia.text != '': b.etymologia.text = ' ' + b.etymologia.text for elem in s_etymn: morfologia += ' {{morfeo|%s}}' % elem b.dodatki.text += '\n{{morfologia}}' + morfologia b.saveChanges() elif b.type == 10 and morfem and pochodne and not etymn and not pokrewne and not inneCzesci: b.naglowek.text = b.naglowek.text.replace( '{{esperanto}}', '{{esperanto (morfem)}}') b.saveChanges() else: b.uwagi.text += ' {{zmiany-w-esperanto}}' b.saveChanges() history = word.getVersionHistory() done = 0 for elem in history: if elem[3] == 'reorganizacja esperanto (wydzielenie morfemów do osobnego języka)' and elem[ 2] == 'AlkamidBot': done = 1 if not done: h.push( False, 'reorganizacja esperanto (wydzielenie morfemów do osobnego języka)' )
def main(): test_mode = 0 site = pywikibot.getSite() site_en = pywikibot.getSite('en', 'wiktionary') site_com = pywikibot.getSite('commons', 'commons') cat = Category(site, 'Kategoria:chiński standardowy (indeks)') cat_com = Category(site, 'Chinese kanji stroke order') lista_stron = pagegenerators.CategorizedPageGenerator(cat) lista_com = pagegenerators.CategorizedPageGenerator(cat_com) log_site = pywikibot.Page(site, 'Wikipedysta:AlkamidBot/zch/log') lista = [] istnieje = [] han_char = re.compile('{{Han(_| )char\|(.*?)}') han_ref = re.compile('{{Han(_| )ref\|(.*})') zh_f = re.compile('{{zh-forms\|(.*)}') jap_f = re.compile('{{ja-forms\|(.*)}') kx = re.compile('kx=(.*?)(\||})') dkj = re.compile('\|dkj=(.*?)(\||})') dj = re.compile('\|dj=(.*?)(\||})') hdz = re.compile('\|hdz=(.*?)(\||})') rn = re.compile('rn=([0-9]*?)\|') rad = re.compile('rad=(.)') han_as = re.compile('as=([0-9]*?)\|') sn = re.compile('sn=([0-9]*?)\|') canj = re.compile('canj=([^\|]*)') cr = re.compile('four=(.*?)\|') alt = re.compile('alt=(.*?)\|') asj = re.compile('asj=(.*?)\|') tekst_przed = re.compile('(.*?)=', re.DOTALL) tekst_po = re.compile('.*?(=.*)', re.DOTALL) grafika = re.compile( '(\-bw\.|\-red\.|\-order\.|{{zch\-cienie}}|{{zch\-animacja}}|{{zch\-komiks}})' ) for page in lista_stron: if len(page.title()) == 1: lista.append(page) for a in lista: tekst = '' rn_abort = 0 rad_abort = 0 han_as_abort = 0 sn_abort = 0 canj_abort = 0 cr_abort = 0 try: strona = a.get() except pywikibot.IsRedirectPage: print('[[%s]] - przekierowanie' % a.title()) log = log + '\n*[[%s]] - przekierowanie' % a.title() except pywikibot.Error: print('[[%s]] - błąd' % a.title()) log = log + '\n*[[%s]] - błąd' % a.title() else: tekst_przed_s = re.search(tekst_przed, a.get()) tekst_po_s = re.search(tekst_po, a.get()) log = '' if test_mode == 1: sekcja_znak = 'fdssagrefadf' else: sekcja_znak = '{{znak chiński}}' if sekcja_znak in a.get(): print('[[%s]] - istnieje już sekcja {{znak chiński}}' % a.title()) log = log + '\n*[[%s]] - istnieje już sekcja {{s|znak chiński}}' % a.title( ) istnieje.append(a) else: ang = pywikibot.Page(site_en, a.title()) han_char_s = re.search(han_char, ang.get()) grafika_s = re.search(grafika, a.get()) if grafika_s != None: print('[[%s]] - znaleziono grafikę z CJK stroke order' % a.title()) log = log + '\n*[[%s]] - znaleziono grafikę z CJK stroke order' % a.title( ) if han_char_s != None: szablon_han = han_char_s.group(2) rn_s = re.search(rn, szablon_han) rad_s = re.search(rad, szablon_han) han_as_s = re.search(han_as, szablon_han) sn_s = re.search(sn, szablon_han) canj_s = re.search(canj, szablon_han) cr_s = re.search(cr, szablon_han) alt_s = re.search(alt, szablon_han) asj_s = re.search(asj, szablon_han) if alt_s == None: alter = 0 else: if alt_s.group(1) == '': alter = 0 else: alter = 1 if asj_s == None: alter1 = 0 else: if asj_s.group(1) == '': alter1 = 0 else: alter1 = 1 if alter == 0 and alter1 == 0: #print a.title() if rn_s == None: print('[[%s]] - Nie istnieje argument \'rn\'' % a.title()) log = log + '\n*[[%s]] - Nie istnieje argument \'rn\'' % a.title( ) rn_abort = 1 if rad_s == None: print('[[%s]] - Nie istnieje argument \'rad\'' % a.title()) log = log + '\n*[[%s]] - Nie istnieje argument \'rad\'' % a.title( ) rad_abort = 1 if han_as_s != None: #print han_as_s.group(1) if han_as_s.group(1) == '0' or han_as_s.group( 1) == '00': as_output = '+ 0' else: if han_as_s.group(1)[0] == '0': as_output = '+ %s' % han_as_s.group(1)[1] else: as_output = han_as_s.group(1)[1] #print as_output else: han_as_abort = 1 if sn_s == None: sn_abort = 1 if canj_s == None: canj_abort = 1 if cr_s != None: if cr_s.group(1).isspace() or cr_s.group(1) == '': print( '[[%s]] - argument \'four\' na en.wikt jest pusty - dodać ręcznie' % a.title()) log = log + '\n*[[%s]] - argument \'four\' na en.wikt jest pusty - dodać ręcznie' % a.title( ) else: cr_abort = 1 kolejnosc_koncowa_c = '' if pywikibot.ImagePage(site_en, '%s-bw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_c = '{{zch-komiks}}' else: if pywikibot.ImagePage(site_en, '%s-red.png' % a.title()).fileIsShared(): kolejnosc_koncowa_c = '{{zch-cienie}}' else: if pywikibot.ImagePage( site_en, '%s-order.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_c = '{{zch-animacja}}' kolejnosc_koncowa_j = '' if pywikibot.ImagePage(site_en, '%s-jbw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_j = '{{zch-komiks|j}}' else: if pywikibot.ImagePage(site_en, '%s-jred.png' % a.title()).fileIsShared(): kolejnosc_koncowa_j = '{{zch-cienie|j}}' else: if pywikibot.ImagePage( site_en, '%s-jorder.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_j = '{{zch-animacja|j}}' kolejnosc_koncowa_t = '' if pywikibot.ImagePage(site_en, '%s-tbw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_t = '{{zch-komiks|t}}' else: if pywikibot.ImagePage(site_en, '%s-tred.png' % a.title()).fileIsShared(): kolejnosc_koncowa_t = '{{zch-cienie|t}}' else: if pywikibot.ImagePage( site_en, '%s-torder.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_t = '{{zch-animacja|t}}' kolejnosc_koncowa_a = '' if pywikibot.ImagePage(site_en, '%s-abw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_a = '{{zch-komiks|a}}' else: if pywikibot.ImagePage(site_en, '%s-ared.png' % a.title()).fileIsShared(): kolejnosc_koncowa_a = '{{zch-cienie|a}}' else: if pywikibot.ImagePage( site_en, '%s-aorder.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_a = '{{zch-animacja|a}}' tekst = '== {{zh|%s}} ({{znak chiński}}) ==\n{{klucz}}' % a.title( ) if rn_abort or rad_abort or han_as_abort: print( '[[%s]] - w en.wikt nie istnieje któryś z argumentów do {{klucz}} - dodać ręcznie' % a.title()) log = log + '\n*[[%s]] - w en.wikt nie istnieje któryś z argumentów do {{s|klucz}} - dodać ręcznie' % a.title( ) else: tekst = tekst + ' %s %s %s' % ( rn_s.group(1), rad_s.group(1), as_output) tekst = tekst + '\n{{kreski}}' if sn_abort: print( '[[%s]] - w en.wikt nie istnieje argument do {{kreski}} - dodać ręcznie' ) log = log + '\n*[[%s]] - w en.wikt nie istnieje argument do {{s|kreski}} - dodać ręcznie' else: tekst = tekst + ' %s\n' % sn_s.group(1) zh_f_s = re.search(zh_f, ang.get()) ja_f_s = re.search(jap_f, ang.get()) warianty = '{{warianty' warianty_obr = '{{warianty-obrazek' ku = '' xu = '' sou = '' sot = '' ming = '' upr = '' trad = '' shin = '' if zh_f_s != None: zh_f_str = zh_f_s.group(1).replace( "[", "").replace("]", "").replace("{{zh-lookup|", "").replace("}", "") zh_osobno = zh_f_str.split('|') warianty = warianty + ' | {{zch-w|ct|%s}} | {{zch-w|cu|%s}}' % ( zh_osobno[1], zh_osobno[0]) ''' if pywikibot.ImagePage(site_en, u'%s-kaishu.svg' % zh_osobno[0]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-kaishu.png' % zh_osobno[0]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s|p}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-kaishu.gif' % zh_osobno[0]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s|g}}' % zh_osobno[0] if pywikibot.ImagePage(site_en, u'%s-xinshu.svg' % zh_osobno[0]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-xinshu.png' % zh_osobno[0]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s|p}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-xinshu.gif' % zh_osobno[0]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s|g}}' % zh_osobno[0] if pywikibot.ImagePage(site_en, u'%s-songti.svg' % zh_osobno[0]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-songti.png' % zh_osobno[0]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s|p}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-songti.gif' % zh_osobno[0]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s|g}}' % zh_osobno[0] if ku != u'' or xu !=u'' or sou !=u'': warianty = warianty + u'{{warianty-obrazek' if ku != u'': warianty = warianty + ku if xu !=u'': warianty = warianty + xu if sou !=u'': warianty = warianty + sou warianty = warianty + u'}}' ''' if ja_f_s != None: ja_f_str = ja_f_s.group(1).replace( "[", "").replace("]", "").replace("{{zh-lookup|", "").replace("}", "") ja_osobno = ja_f_str.split('|') warianty = warianty + ' | {{zch-w|js|%s}} | {{zch-w|ct|%s}} | {{zch-w|cu|%s}}' % ( ja_osobno[0], ja_osobno[2], ja_osobno[1]) trad = ja_osobno[2] upr = ja_osobno[1] shin = ja_osobno[0] '''if pywikibot.ImagePage(site_en, u'%s-kaishu.svg' % ja_osobno[1]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-kaishu.png' % ja_osobno[1]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s|p}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-kaishu.gif' % ja_osobno[1]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s|g}}' % ja_osobno[1] if pywikibot.ImagePage(site_en, u'%s-xinshu.svg' % ja_osobno[1]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-xinshu.png' % ja_osobno[1]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s|p}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-xinshu.gif' % ja_osobno[1]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s|g}}' % ja_osobno[1] if pywikibot.ImagePage(site_en, u'%s-songti.svg' % ja_osobno[1]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-songti.png' % ja_osobno[1]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s|p}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-songti.gif' % ja_osobno[1]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s|g}}' % ja_osobno[1] if ku != u'' or xu !=u'' or sou !=u'': warianty = warianty + u'{{warianty-obrazek' if ku != u'': warianty = warianty + ku if xu !=u'': warianty = warianty + xu if sou !=u'': warianty = warianty + sou warianty = warianty + u'}}''' if pywikibot.ImagePage(site_en, '%s-clerical.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-clerical.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-clerical.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-xinshu.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-xinshu.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-xinshu.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-still.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-caoshu.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-still.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-caoshu.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-still.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s|g}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-caoshu.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-kaishu.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-kaishu.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-kaishu.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-songti.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-songti.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-songti.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s|g}}' % a.title( ) '''if sot != u'': ming = ming + sot else: if zh_f_s != None: ming = ming + u' | {{zch-w|ct|%s}}' % zh_osobno[1] if ja_f_s != None: ming = ming + u' | {{zch-w|ct|%s}}' % ja_osobno[2] if sou != u'': ming = ming + sou else: if zh_f_s != None: ming = ming + u' | {{zch-w|cu|%s}}' % zh_osobno[0] if ja_f_s != None: ming = ming + u' | {{zch-w|cu|%s}}' % ja_osobno[1]''' if warianty == '{{warianty': tekst = tekst + '{{warianty|{{zch-w}}' else: tekst = tekst + warianty tekst = tekst + '}}' if warianty_obr != '{{warianty-obrazek': tekst = tekst + ' ' + warianty_obr + '}}' tekst = tekst + '\n{{kolejność}}' if kolejnosc_koncowa_c == '' and kolejnosc_koncowa_j == '' and kolejnosc_koncowa_t == '' and kolejnosc_koncowa_a == '': print( '[[%s]] - na commons nie znaleziono żadnej kolejności pisania' % a.title()) log = log + '\n*[[%s]] - na commons nie znaleziono żadnej kolejności pisania' % a.title( ) else: tekst = tekst + '\n' if kolejnosc_koncowa_c != '': tekst = tekst + '%s ' % kolejnosc_koncowa_c if kolejnosc_koncowa_j != '': tekst = tekst + '%s ' % kolejnosc_koncowa_j if kolejnosc_koncowa_t != '': tekst = tekst + '%s ' % kolejnosc_koncowa_t if kolejnosc_koncowa_a != '': tekst = tekst + '%s ' % kolejnosc_koncowa_a tekst = tekst + '\n{{znaczenia}}\n{{etymologia}}' etym = ' {{warianty-obrazek' if pywikibot.ImagePage(site_en, '%s-oracle.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|o|%s}}' % a.title() else: if pywikibot.ImagePage(site_en, '%s-oracle.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|o|%s|p}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-bronze.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|br|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-bronze.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|br|%s|p}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-bigseal.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|bs|%s}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-bigseal.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|bs|%s|p}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-seal.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|ss|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-seal.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|ss|%s|p}}' % a.title( ) etym = etym + '}}' if etym != ' {{warianty-obrazek}}': tekst = tekst + etym tekst = tekst + '\n{{kody|cjz=' if canj_abort: print( '[[%s]] - w en.wikt nie istnieje argument cjz - dodać ręcznie' ) log = log + '\n*[[%s]] - w en.wikt nie istnieje argument cjz - dodać ręcznie' else: tekst = tekst + '%s' % canj_s.group(1) tekst = tekst + '|cr=' if cr_abort == 1: print( '[[%s]] - w en.wikt nie istnieje argument \'\'four\'\' - dodać ręcznie' % a.title()) log = log + '\n*[[%s]] - w en.wikt nie istnieje argument \'\'four\'\' - dodać ręcznie' % a.title( ) else: tekst = tekst + '%s' % cr_s.group(1) tekst = tekst + '|u=%x}}' % ord(a.title()) han_ref_s = re.search(han_ref, ang.get()) if han_ref_s != None: tekst = tekst + '\n{{słowniki' kx_s = re.search(kx, han_ref_s.group(2)) if kx_s != None: tekst = tekst + '|kx=%s' % kx_s.group(1) dkj_s = re.search(dkj, han_ref_s.group(2)) if dkj_s != None: tekst = tekst + '|dkj=%s' % dkj_s.group(1) dj_s = re.search(dj, han_ref_s.group(2)) if dj_s != None: tekst = tekst + '|dj=%s' % dj_s.group(1) hdz_s = re.search(hdz, han_ref_s.group(2)) if hdz_s != None: tekst = tekst + '|hdz=%s' % hdz_s.group(1) tekst = tekst + '}}' tekst = tekst + '\n{{uwagi}}\n{{źródła}}\n\n' else: print( '[[%s]] - znaleziono alternatywne zapisy, pomijam' % a.title()) log = log + '\n*[[%s]] - znaleziono alternatywne zapisy, pomijam' % a.title( ) final = tekst_przed_s.group(1) + tekst + tekst_po_s.group( 1) if test_mode == 1: print(final + '\n\n') else: a.put(final, comment='bot dodaje sekcję {{znak chiński}}') else: print('[[%s]] - Nie znaleziono szablonu {{Han char}}' % a.title()) log = log + '\n*[[%s]] - Nie znaleziono szablonu {{s|Han char}}, pomijam' % a.title( ) log_site = pywikibot.Page(site, 'Wikipedysta:AlkamidBot/zch/log') log_stary = log_site.get() if test_mode == 1: print(log) else: log = log_stary + log log_site.put(log, comment='%s' % a.title())
from pywikibot.pagegenerators import CategorizedPageGenerator from pywikibot import Site, Category import json site = Site('commons', 'commons') cat = Category(site, 'Category:Images_from_Nordiska_museet:_2019-06') pages = list() for page in CategorizedPageGenerator(cat, recurse=False, namespaces=6): item = {} item['id'] = page.pageid print(page.title()) item['title'] = str(page.title()) if (('(2)' in item['title']) or ('(3)' in item['title'])): continue pages.append(item) with open('../static/pages.json', 'w') as outfile: json.dump(pages, outfile, ensure_ascii=False)
def maariv_papers_pages() -> Iterable[pw.Page]: maariv_papers = Category(site, maariv_papers_category) return pagegenerators.CategorizedPageGenerator(maariv_papers)
def main(): site = pywikibot.getSite() # mode = 1 - updating pages from recent changes; 2 - after adding new languages to the script, all the words in that language have to be checked mode = 1 mylist = set() if mode == 1: RClimit = readRCLimit('headerIndexing').strip() mylist = RecentChanges(RClimit) writeRCLimit('headerIndexing') if mode == 2: newlangs = [ 'arabski', 'perski', 'paszto', 'dari', 'urdu', 'osmańsko-turecki' ] for elem in newlangs: cat = Category(site, 'Kategoria:%s (indeks)' % elem) pageSet = set(pagegenerators.CategorizedPageGenerator(cat)) for page in pageSet: mylist.add(page.title()) replace = {} replace['arabski'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'} replace['dari'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'} replace['francuski'] = { 'À': 'A', 'Â': 'A', 'Ç': 'C', 'É': 'E', 'È': 'E', 'Ë': 'E', 'Ê': 'E', 'Î': 'I', 'Ï': 'I', 'Ô': 'O', 'Œ': 'OE', 'Ù': 'U', 'Ú': 'U', 'Û': 'U', 'à': 'a', 'â': 'a', 'ç': 'c', 'é': 'e', 'è': 'e', 'ë': 'e', 'ê': 'e', 'î': 'i', 'ï': 'i', 'ô': 'o', 'œ': 'oe', 'ù': 'u', 'ú': 'u', 'û': 'u' } replace['hiszpański'] = { 'Á': 'A', 'É': 'E', 'Í': 'I', 'Ó': 'O', 'Ú': 'U', 'á': 'a', 'é': 'e', 'í': 'i', 'ó': 'o', 'ú': 'u' } #replace[u'kurdyjski'] = {u'É': u'E', u'Í': u'I', u'Ú': u'U', u'Ù': u'U', u'é': u'e', u'í': u'i', u'ú': u'u', u'ù': u'u'} replace['nowogrecki'] = { 'Ά': 'Α', 'Έ': 'Ε', 'Ή': 'Η', 'Ί': 'Ι', 'Ϊ': 'Ι', 'Ό': 'Ο', 'Ύ': 'Υ', 'Ϋ': 'Υ', 'Ώ': 'Ω', 'ά': 'α', 'έ': 'ε', 'ί': 'ι', 'ϊ': 'ι', 'ΐ': 'ι', 'ό': 'ο', 'ύ': 'υ', 'ϋ': 'υ', 'ΰ': 'υ', 'ώ': 'ω', 'ή': 'η', 'ς': 'σ' } replace['osmańsko-turecki'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'} replace['perski'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'} replace['paszto'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'} replace['urdu'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'} #replace[u'wietnamski'] = {u'Ă': u'A', u'Â': u'A', u'Đ': u'D', u'Ê': u'E', u'Ô': u'O', u'Ơ': u'O', u'Ư': u'U', u'ă': u'a', u'â': u'a', u'đ': u'd', u'ê': u'e', u'ô': u'o', u'ơ': u'o', u'ư': u'u'} for mytitle in mylist: try: h = Haslo(mytitle) except sectionsNotFound: pass except WrongHeader: pass else: if h.type == 3: change = 0 for c in h.listLangs: try: c.lang except AttributeError: pass else: if c.lang in replace: first = c.title temp = c.title for rep in replace[c.lang]: temp = temp.replace(rep, replace[c.lang][rep]) if first != temp: c.headerArg = temp c.updateHeader() change = 1 if change: h.push( False, 'modyfikacja nagłówka w celu poprawnego indeksowania haseł (usunięcie znaków diakrytycznych)' )
def league_table_files(): league_table_files_category = Category(site, league_table_files_category_name) return pagegenerators.CategorizedPageGenerator(league_table_files_category)
def main(): site = pywikibot.getSite() indeks = pywikibot.Page(site, 'Indeks:Francuski_-_Związki_frazeologiczne') cat = Category(site, 'Kategoria:francuski_(indeks)') gen1 = pagegenerators.CategorizedPageGenerator(cat) ex = pywikibot.Page(site, 'Wikipedysta:AlkamidBot/wykluczone') re_obj = re.compile( r"''związek frazeologiczny''\n:\s*\(1\.1\) (\[\[[^]]*\]\])(\n|<ref)") tekst_dodaj = " zobacz też [[Indeks:Francuski - Związki frazeologiczne]]" zw = [] trad = [] lista = [] for page in gen1: if ('związek frazeologiczny' in page.get()) and (page.title() not in indeks.get()) and (page.title() not in ex.get()): tlum = re_obj.search(page.get()) if tlum != None: print(page, ' dodatek') zw.append(page.title()) trad.append(tlum.group(1)) print(tlum.group(1)) if '[[Indeks: Francuski - Związki frazeologiczne]]' not in page.get( ) and '[[Indeks:Francuski - Związki frazeologiczne]]' not in page.get( ) and '{{źródła}}' in page.get(): sekcja_przed = re.search(r"(.*?)\n{{źródła}}", page.get(), re.DOTALL) sekcja_po = re.search(r"({{źródła}}.*)", page.get(), re.DOTALL) dozmiany = sekcja_przed.group(1) dozmiany += tekst_dodaj final = dozmiany + '\n' + sekcja_po.group(1) page.put( final, comment= 'bot dodaje linka do indeksu związków frazeologicznych' ) else: text = ex.get() text += '\n* [[' + page.title() + ']]' ex.put(text, comment='bot dodaje wyjątek') print("zła: ", page) orig = indeks.get() for a, b in zip(zw, trad): lit = a[0] litcap = lit.capitalize() sekcja = re.search(r"== %s ==\n(.*?)\{\{do góry\}\}" % (litcap), orig, re.DOTALL) sekcja_przed = re.search(r"(.*?== %s ==\n)" % litcap, orig, re.DOTALL) sekcja_po = re.search(r"== %s ==\n.*?({{do góry}}.*)" % litcap, orig, re.DOTALL) lista = sekcja.group(1).split('\n') str = "* [[" + a + "]] → " + b lista.append(str) bez = [x for x in lista if len(x) > 1] bez.sort() bez1 = "\n".join(bez) + "\n\n" orig = sekcja_przed.group(1) + bez1 + sekcja_po.group(1) indeks.put(orig, comment='bot aktualizuje indeks', botflag=False)
def main(): site = pywikibot.Site() # fetch Polish pages only cat_allpages = Category(site, 'Kategoria:polski (indeks)') # dialects are excluded, it would be too difficult to link all of them cat_dialects = Category(site, 'Kategoria:Polski_(dialekty_i_gwary)') list_allpages = pagegenerators.CategorizedPageGenerator(cat_allpages) list_dialects = set( pagegenerators.CategorizedPageGenerator(cat_dialects, recurse=True)) count_all = 0 intro = ( '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n' '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n' '<html xmlns="http://www.w3.org/1999/xhtml\nxml:lang="pl">\n' '<head>\n<meta http-equiv="content-type" content="text/html; charset=UTF-8" />\n' '</head><body>') intro += ( 'Poniżej znajduje się lista polskich haseł, do których ' 'nie linkuje żadne inne hasło z głównej przestrzeni nazw. W związku ' 'z tym trudno trafić do takiego hasła inaczej niż przez bezpośrednie jego ' 'wyszukanie. Jeśli możesz, dodaj w innym haśle odnośnik do porzuconego ' 'słowa, np. w przykładach lub pokrewnych.') with open('{0}public_html/porzucone.html.1'.format(config.path['home']), 'w', encoding='utf-8') as f: f.write(intro) for page in list_allpages: if page not in list_dialects: refs = list( page.getReferences(namespaces=0, total=2) ) # only look in the main namespace (because virtually all pages are references somewhere, e.g. in missing pronunciation lists) try: refs.remove( page ) # reference search is limited to 2. Then the page itself is removed (because on pl.wikt we often self-link pages in examples of usage) except ValueError: pass if len(refs) == 0: try: f.write( '\n<br /><a href="http://pl.wiktionary.org/wiki/{0}">{0}</a>' .format(page.title())) except UnicodeEncodeError: print('Unicode Error: ', page.title()) pass count_all += 1 date_now = datetime.datetime.now() + datetime.timedelta(hours=2) f.write( date_now.strftime( "\n<br />Ostatnia aktualizacja listy: %Y-%m-%d, %H:%M:%S")) f.write('<br />Licznik porzuconych: {0}'.format(count_all)) f.write('</body></html') move('{0}public_html/porzucone.html.1'.format(config.path['home']), '{0}public_html/porzucone.html'.format(config.path['home']))
def main(): global expedListPeople global expedListPeopleOrig global expedListGrats global expedListGratsOrig # wikipedia.verbose = 1 titleOfPageToLoad = u'2009-06-14_49_-122' # The "u" before the title means Unicode, important for special characters # pywikibot.put_throttle.setDelay(5, absolute = True) # wikipedia.get_throttle.setDelay(5, absolute = True) enwiktsite = pywikibot.Site('en', 'geohashing') # loading a defined project's page # os.unlink("graticules.sqlite") # db = GraticuleDatabase.GraticuleDatabase() db = GraticuleDatabase.GraticuleDatabase("graticules.sqlite") all = db.getAllKeys() # catdb = Category.CategoryDatabase() pp_list2 = Category(enwiktsite, u"Category:Expedition_planning").articles() # pp_list2 = get_all_category_pages(enwiktsite, u"Category:Expedition_planning", catdb) #Produce a list of all pages from 3 weekdays ago through when coordinates are available # by looking at the [[Category:Meetup on YYYY-MM-DD]] pages expedListPeople = parseExpedLists(enwiktsite) expedListGrats = parseExpedListsGrats(enwiktsite) # Save off the original pages so we can only update exped lists if they've changed expedListPeopleOrig = copy.deepcopy(expedListPeople) expedListGratsOrig = copy.deepcopy(expedListGrats) all_text = u"" first_date_obj = get_last_day_avail(datetime.date.today() + datetime.timedelta(7)) last_date_obj = first_date_obj cur_dates = [] plan_dates = [] old_date_list = [] try: for i in range(0,3): while (first_date_obj > datetime.date.today()): cur_dates.append(first_date_obj.isoformat()) expedSums = ExpeditionSummaries.ExpeditionSummaries(enwiktsite, first_date_obj.isoformat(), db) expedListPeople = updateExpedLists(expedSums, expedListPeople, first_date_obj.isoformat()) expedListGrats = updateExpedListsGrats(expedSums, expedListGrats, first_date_obj.isoformat()) first_date_obj = first_date_obj - datetime.timedelta(1) cur_dates.append(first_date_obj.isoformat()) expedSums = ExpeditionSummaries.ExpeditionSummaries(enwiktsite, first_date_obj.isoformat(), db) expedListPeople = updateExpedLists(expedSums, expedListPeople, first_date_obj.isoformat()) expedListGrats = updateExpedListsGrats(expedSums, expedListGrats, first_date_obj.isoformat()) first_date_obj = first_date_obj - datetime.timedelta(1) while (first_date_obj.weekday() > 4): cur_dates.append(first_date_obj.isoformat()) expedSums = ExpeditionSummaries.ExpeditionSummaries(enwiktsite, first_date_obj.isoformat(), db) expedListPeople = updateExpedLists(expedSums, expedListPeople, first_date_obj.isoformat()) expedListGrats = updateExpedListsGrats(expedSums, expedListGrats, first_date_obj.isoformat()) first_date_obj = first_date_obj - datetime.timedelta(1) cur_dates.append(first_date_obj.isoformat()) expedSums = ExpeditionSummaries.ExpeditionSummaries(enwiktsite, first_date_obj.isoformat(), db) expedListPeople = updateExpedLists(expedSums, expedListPeople, first_date_obj.isoformat()) expedListGrats = updateExpedListsGrats(expedSums, expedListGrats, first_date_obj.isoformat()) first_date = first_date_obj.isoformat() remove_dates(enwiktsite, cur_dates) #Get a list of old date pages to update old_date_list = get_old_dates(enwiktsite, db) #This looks at the pages in [[Category:Expedition planning]] # and produces the summaries for all the pages for far in the future plan_dates = getExpeditionSummaries(pp_list2, db, None, (last_date_obj+datetime.timedelta(1)).isoformat()) for i in plan_dates.keys(): cur_dates.append(i) if check_banana(enwiktsite) != 0: return 1 updateUserTexts(enwiktsite) updateGratTexts(enwiktsite) except Exception as e: pywikibot.output("cur_dates: " + str(cur_dates)) pywikibot.output("plan_dates: " + str(plan_dates)) pywikibot.output("old_dates: " + str(old_date_list)) bug_page = pywikibot.Page(enwiktsite, u"User:AperfectBot/BotBugs") bug_page_text = bug_page.get() bug_page_text = bug_page_text + u"\n== NEW REPORT ==\nDates:\n" + str(cur_dates) + str(plan_dates) + str(old_date_list) + u"\n" pywikibot.output(bug_page_text) page_write(bug_page, bug_page_text, enwiktsite) raise e #Create the [[Template:Expedition_summaries/YYYY-MM-DD]] pages for planning page dates putExpeditionSummaries(plan_dates, enwiktsite) #Build up the text for [[Template:Recent_expeditions]] recent_expedition_page_name = u"Template:Recent_expeditions" recent_exp_page = pywikibot.Page(enwiktsite, recent_expedition_page_name) recent_exp_text = recent_exp_page.get() recent_exp_res = re.findall("=== \[\[(\d{4}-\d{2}-\d{2}).*?\]\] ===\n([^=]*)", recent_exp_text, re.S) recent_exp_hash = {} for i in range(0,len(recent_exp_res)): recent_exp_hash[recent_exp_res[i][0]] = recent_exp_res[i][1] summary_text = u"" summary_text += u"<noinclude>__NOTOC__</noinclude>\n" date_keys = cur_dates date_keys.sort() date_keys.reverse() if (date_keys[0] > last_date_obj.isoformat()): summary_text += u"== Upcoming Events ==\n" for i in date_keys: if (summary_text[len(summary_text)-1] != u"\n"): summary_text += u"\n" if (i == (datetime.date.today() - datetime.timedelta(1)).isoformat()): summary_text += u"== Recent Expeditions ==\n" if (i == last_date_obj.isoformat()): summary_text += u"== Expeditions Being Planned ==\n" if i in recent_exp_hash: summary_text += recent_exp_hash[i] else: summary_text += u"{{Expedition_summaries|" + i + u"}}\n" summary_text += u"<!--Insert manual updates below this line. Manual updates may not contain equal signs-->\n" recent_exp_page = pywikibot.Page(enwiktsite, recent_expedition_page_name) page_write(recent_exp_page, summary_text, enwiktsite)
def get_all_football_games_category_pages(): games_category = Category(site, football_games_category_name) games_category = list( pagegenerators.CategorizedPageGenerator(games_category)) return games_category
def get_all_football_players_category_pages(): players_category = Category(site, football_players_category_name) players_category_pages = list(pagegenerators.CategorizedPageGenerator(players_category)) return players_category_pages
from pywikibot.pagegenerators import CategorizedPageGenerator from pywikibot import Site, Category from dataskakare import GoogleTranslate import mwparserfromhell import hashlib import uuid import json site = Site('commons', 'commons') cat = Category(site, 'Category:Media_contributed_by_the_Swedish_Performing_Arts_Agency:_2019-03') translate = GoogleTranslate(input('google service account file:')) def thumb_from_title(title): safe_title = title.encode('utf-8') md5_title = hashlib.md5(safe_title).hexdigest() return 'https://upload.wikimedia.org/wikipedia/commons/thumb/{}/{}/{}/500px-{}.jpg'.format(md5_title[:1], md5_title[:2], title, title) final_pages = list() for page in CategorizedPageGenerator(cat, recurse=False, namespaces=6): wikicode = mwparserfromhell.parse(page.text) template_to_parse = False for template in wikicode.filter_templates(): if template.name.matches('Musikverket-image'): template_to_parse = template if not template_to_parse: print('failed to find given template') continue
def main(): global test_mode test_mode = 0 global site site = pywikibot.getSite() global site_en site_en = pywikibot.getSite('en', 'wiktionary') global site_com site_com = pywikibot.getSite('commons', 'commons') global log_site log_site = pywikibot.Page(site, 'Wikipedysta:AlkamidBot/zch/log') cat = Category(site, 'Kategoria:japoński (indeks)') cat_en = Category(site_en, 'Category:Han_characters') lista_stron_en = pagegenerators.CategorizedPageGenerator(cat_en) lista_stron = pagegenerators.CategorizedPageGenerator(cat) tekst_przed = re.compile('(.*?)=', re.DOTALL) tekst_po = re.compile('.*?(=.*)', re.DOTALL) lista = [] grafika = re.compile( '(\-bw\.|\-red\.|\-order\.|{{zch\-cienie}}|{{zch\-animacja}}|{{zch\-komiks}})' ) log = '' #for page in lista_stron_en: # if len(page.title())==1: # lista.append(page) lista.append(pywikibot.Page(site, '九')) lista.append(pywikibot.Page(site, '八')) for a in lista: final = '' log = '' a_pl = pywikibot.Page(site, a.title()) try: strona = a_pl.get() except pywikibot.IsRedirectPage: print('[[%s]] - przekierowanie na pl.wikt' % a_pl.title()) log = log + '\n*[[%s]] - przekierowanie na pl.wikt' % a_pl.title() except pywikibot.NoPage: result = zch(a_pl) if result != 0: if test_mode == 1: print(result + '\n\n') else: a_pl.put(result, comment='źródło: [[:en:%s]]' % a.title()) except pywikibot.Error: print('[[%s]] - błąd na en.wikt' % a_pl.title()) log = log + '\n*[[%s]] - błąd na pl.wikt' % a_pl.title() else: tekst_przed_s = re.search(tekst_przed, a_pl.get()) tekst_po_s = re.search(tekst_po, a_pl.get()) ''' grafika_s = re.search(grafika, a.get()) if grafika_s != None: print u'[[%s]] - znaleziono grafikę z CJK stroke order' % a.title() log = log + u'\n*[[%s]] - znaleziono grafikę z CJK stroke order' % a.title() ''' if test_mode == 1: sekcja_znak = 'fdss73agrefadf' else: sekcja_znak = '{{znak chiński}}' if sekcja_znak not in a_pl.get(): result = zch(a_pl) if result != 0: final = tekst_przed_s.group( 1) + result + '\n\n' + tekst_po_s.group(1) if test_mode == 1: print(final + '\n\n') else: a_pl.put(final, comment='źródło: [[:en:%s]]' % a.title()) log_write(log_site, log, a_pl.title())
def main(): list = makeConversionList() global site_pl site_pl = pywikibot.getSite() global site_en site_en = pywikibot.getSite('en', 'wiktionary') global commons commons = pywikibot.getSite('commons', 'commons') global test_mode test_mode = 0 global data_en data_en = '20120125' cat_en = Category(site_en, 'Category:Han_characters') lista_stron_en = pagegenerators.CategorizedPageGenerator(cat_en) file = open("%s/wikt/moje/log/zch.txt" % environ['HOME'], 'w') file.write(''.encode("utf-8")) file.close #pagesDump1 = xmlreader.XmlDump('/mnt/user-store/dumps/enwiktionary/enwiktionary-%s-pages-articles.xml' % data_en) #pagesDump = xmlreader.XmlDump.parse(pagesDump1) lista_stron_en = ['㭻'] for elem in lista_stron_en: title = elem.title() if len(title) == 1: en = retrieveEnPlusCommons(title) pl = Haslo(title) sekcja = None if en and pl.type not in (0, 1, 2): try: pl.listLangs except AttributeError: log('*[[%s]] - brak listy sekcji!' % en.title) else: for sec in pl.listLangs: if sec.lang == 'znak chiński': sec.pola() if sec.type == 4: sekcja = sec elif en and pl.type == 1: sekcja = Sekcja(title=title, type=4, lang='znak chiński') if sekcja: push = 0 if compare(en, sekcja, 'klucz'): push = 1 if compare(en, sekcja, 'kreski'): push = 1 if compare(en, sekcja, 'kody'): push = 1 if compare(en, sekcja, 'warianty'): push = 1 if compare(en, sekcja, 'kolejnosc'): push = 1 if compare(en, sekcja, 'etymologia'): push = 1 if compare(en, sekcja, 'slowniki'): push = 1 if ordinal(sekcja): push = 1 tab = SimpTrad(title, list) if compare(en, sekcja, 'upr-trad', tab): push = 1 if push: if pl.type == 1: pl = Haslo(title, new=True) pl.addSection(sekcja) log('*[[%s]] - dodano' % title) pl.push( False, myComment= 'aktualizacja danych o znaku chińskim; źródła: [[:en:%s]], http://simplify.codeplex.com/, commons' % title, new=True) logPage = pywikibot.Page(site_pl, 'Wikipedysta:AlkamidBot/listy/znak chiński') logPageText = 'AlkamidBot cyklicznie sprawdza, czy w angielskim Wikisłowniku lub na commons pojawiły się nowe informacje o znakach chińskich (np. warianty pisania, zapisy etymologiczne itp.). Na tej liście zapisuje problemy, jakie napotkał: ("tabela" oznacza dane z http://simplify.codeplex.com/)\n\n' file = codecs.open("%s/wikt/moje/log/zch.txt" % environ['HOME'], 'r', 'utf-8') logPageText += file.read() file.close
collections = False with open('src/static/collections.json') as json_file: collections = json.load(json_file) found_collection = False for c in collections: if c['generator_value'] == arg: found_collection = c['id'] break if not found_collection: print('Could not find a colletion for the given category') site = Site('commons', 'commons') cat = Category(site, 'Category:{}'.format(arg)) gen = CategorizedPageGenerator(cat, recurse=False, namespaces=6) endpoint = 'https://commons.wikimedia.org/w/api.php?format=json&action=wbgetentities&ids=' final_translations = list() def chunks(l, n): """Yield successive n-sized chunks from l.""" for i in range(0, len(l), n): yield l[i:i + n] for page in gen: media_id = 'M{}'.format(page.pageid)