def main():
    """Main method."""
    maintenance = MerimeeTypeMaintenance()
    merimee_category = Category(site, title=u'Mérimée without type parameter')
    pages_generator = merimee_category.articles()
    for page in pages_generator:
    #page = pages_generator.next()
        maintenance.process_page(page)
示例#2
0
    def _getAllCategoryPages(self):
        page = pywikibot.Page(self.site, "Category:Meetup on " + self.date)
        pageText = u"[[Category:Meetup in " + re.sub("-\d{2}$", "",
                                                     self.date) + u"]]"
        self._pageWrite(page, pageText)

        cat = Category(self.site, "Meetup on " + self.date)
        articleList = cat.articlesList()
        return articleList
示例#3
0
def redirect_cat(cat: pywikibot.Category, target: pywikibot.Category,
                 summary: str) -> None:
    """
    Redirect a category to another category.

    @param cat: Category to redirect
    @param target: Category redirect target
    @param summary: Edit summary
    """
    tpl = Template('Category redirect')
    tpl.add('1', target.title(with_ns=False))
    cat.text = str(tpl)
    cat.save(summary=summary)
示例#4
0
 def getArticles(self, category, recurse=False):
     if category.startswith("Category:"):
         category = category.split(":")[1]
     
     #if not isinstance(category, unicode):
     #    category = unicode(category, "utf-8")
     baseDir = "categoryArticlesCache/"
     if not os.path.exists(baseDir):
         os.makedirs(baseDir)
     
     fname = baseDir+category
     if recurse:
         fname = baseDir+category
     fname = fname
     if self.printMode:
         try:
             print(fname)
         except:
             print("error printing fname")
     if os.path.isfile(fname):
         lines = []
         try:
             with codecs.open(fname) as f:
                 lines = [line.strip() for line in f.readlines()]
         except:
             with codecs.open(fname, encoding='utf-8') as f:
                 lines = [line.strip() for line in f.readlines()]
         if lines!=[]:
             return lines
 
     site = Site("en")
     cat = ""
     try:
         cat = Category(site, title=category)
     except:
         cat = Category(site, title=category.decode("utf-8"))
     articles = cat.articles(namespaces = 0, recurse=recurse)
     res = [article.title() for article in articles]
     #print res
     text = ""
     for cat in res:
         text += cat+"\n"
     try:
         with codecs.open(fname, "a+") as f:    
             #print text
             #print type(text)
             f.write(text)
     except:
         with codecs.open(fname, "a+") as f:    
             f.write(text.encode('utf-8'))
     return res
示例#5
0
 def get_lemma_str_from_cat(self, category: str) -> List[str]:
     page = Category(self.wiki, category)
     cat_list = [
         str(lemma).strip("[]")[2:]
         for lemma in CategorizedPageGenerator(page)
     ]
     return cat_list
示例#6
0
def main():

    data = '20110310'

    site = pywikibot.getSite()
    cat = Category(site, 'Kategoria:francuski (indeks)')
    lista = pagegenerators.CategorizedPageGenerator(cat)
    #lista_stron1 = xmlreader.XmlDump('plwiktionary-%s-pages-articles.xml' % data)

    #lista = xmlreader.XmlDump.parse(lista_stron1)

    for a in lista:
        h = Haslo(a.title())
        #h = HasloXML(a.title, a.text)
        if h.type != 4 and ' ' in h.title:
            h.langs()
            for c in h.list_lang:
                c.pola()
                if c.type != 2 and c.lang == 'hiszpański':
                    if ('rzeczownik' in c.znaczenia.tresc) and (
                            'rzeczownika' not in c.znaczenia.tresc):
                        print('\n' + h.title)
                        text = '*[[%s]]\n' % h.title
                        file = open("log/rzeczownik.txt", 'a')
                        file.write(text.encode("utf-8"))
                        file.close
def _db_get_new_category_pages(
    category: pywikibot.Category,
    start_time: pywikibot.Timestamp,
    end_time: pywikibot.Timestamp,
    namespaces: List[int],
) -> Iterator[Tuple[pywikibot.page.BasePage, datetime]]:
    """Use DB to list category pages. Called by get_new_categoryPages()"""
    if not wmcs:
        raise ConnectionError

    query = ("SELECT page_namespace, page_title, cl_timestamp "
             "FROM "
             "    categorylinks "
             "    JOIN page ON page_id = cl_from "
             "WHERE "
             '    cl_to = "{catname}" AND '
             '    cl_type = "page" AND '
             "    cl_timestamp >= {start_timestamp} AND "
             "    cl_timestamp < {end_timestamp} AND "
             "    page_namespace in ({nslist}) "
             "ORDER BY cl_timestamp ").format(
                 catname=category.title(underscore=True, with_ns=False),
                 start_timestamp=start_time.totimestampformat(),
                 end_timestamp=end_time.totimestampformat(),
                 nslist=", ".join(str(n) for n in namespaces),
             )

    for ns, title, ts in pywikibot.data.mysql.mysql_query(
            query, dbname=site.dbName()):
        yield (
            pywikibot.Page(site, title=title.decode(encoding="utf-8"), ns=ns),
            ts,
        )
示例#8
0
    def find_discussion(self, category: pywikibot.Category) -> 'CfdPage':
        """
        Return the relevant discussion.

        @param category: The category being discussed
        """
        if self.section():
            return self
        text = removeDisabledParts(self.text, tags=EXCEPTIONS, site=self.site)
        wikicode = mwparserfromhell.parse(text, skip_style_tags=True)
        for section in wikicode.get_sections(levels=[4]):
            heading = section.filter_headings()[0]
            section_title = str(heading.title).strip()
            discussion = self.__class__(
                self.site, '{}#{}'.format(self.title(), section_title))
            if category.title() == section_title:
                return discussion
            # Split approximately into close, nom, and others.
            parts = str(section).split('(UTC)')
            if len(parts) < 3:
                continue
            # Parse the nom for category links.
            nom = mwparserfromhell.parse(parts[1], skip_style_tags=True)
            for node in nom.ifilter():
                page = self._cat_from_node(node)
                if page and category == page:
                    return discussion
        return self
def _api_get_new_category_pages(
    category: pywikibot.Category,
    start_time: pywikibot.Timestamp,
    end_time: pywikibot.Timestamp,
    namespaces: List[int],
) -> Iterator[Tuple[pywikibot.page.BasePage, pywikibot.Timestamp]]:
    """Use API to list category pages. Called by get_new_categoryPages()"""
    for row in pywikibot.data.api.ListGenerator(
            "categorymembers",
            site=site,
            cmtitle=category.title(underscore=True, with_ns=True),
            cmprop="title|type|timestamp",
            cmnamespace="|".join(str(n) for n in namespaces),
            cmtype="page",
            cmsort="timestamp",
            cmstart=start_time.isoformat(),
            cmend=end_time.isoformat(),
    ):
        if row.get("type", "page") != "page":
            continue

        yield (
            pywikibot.Page(site,
                           title=row.get("title", ""),
                           ns=row.get("ns", "")),
            pywikibot.Timestamp.fromISOformat(row.get("timestamp")),
        )
示例#10
0
def db_get_usage(cat: pywikibot.Category, depth: int) -> UsageResult:
    query = """
SELECT page_title, count(*)
FROM categorylinks
JOIN page ON cl_from = page_id
LEFT JOIN globalimagelinks ON page_title = gil_to
JOIN image ON img_name = page_title
WHERE
    cl_to IN %(cats)s
    AND img_major_mime = "image"
    AND img_minor_mime != "svg+xml"
GROUP BY page_title
ORDER BY count(*) DESC
"""
    conn = toolforge.connect("commonswiki")
    with conn.cursor() as cur:
        total = cur.execute(
            query,
            args={
                "cats": [
                    cat.title(with_ns=False, underscore=True)
                    for cat in list_cats(cat, depth)
                ]
            },
        )
        data = cast(List[Tuple[bytes, int]], cur.fetchall())
    return UsageResult(
        [
            FileUsage(f"File:{str(page, encoding='utf-8')}", count)
            for page, count in data
        ][:200],
        total,
        [],
    )
示例#11
0
    def getBoundedCat(self, cat, currMinSize):
        print()
        print("getBoundedCat")
        site = Site("en")
        try:
            catObj = Category(site, title=cat)
        except:
            catObj = Category(site, title=cat.decode("utf-8"))
        
        subCats = self.recursiveCats(catObj) 
        
        articleSet = set()
        
        for subCat in subCats:
            print("inside subCat", subCat)
            newArts = set(self.getArticles(subCat, recurse=False))
            articleSet.update(newArts)
            print(len(articleSet))
            if len(articleSet) > currMinSize:
                print("break")
                return currMinSize
                #continue

        return len(articleSet)
示例#12
0
def main():

    site = pywikibot.getSite()
    cat = Category(site, 'Kategoria:francuski (indeks)')
    lista = pagegenerators.CategorizedPageGenerator(cat, start='tænia')

    for a in lista:
        h = Haslo(a.title())
        if h.typ == 3:
            h.sekcje()
            for c in h.lista_sekcje:
                if 'francuski' in c.jezyk:
                    print('\n' + h.tytul)
                    c.pola()
                    print(c.wymowa.tresc)
示例#13
0
def getPagesTitleFromCategorie(site, categories):
    pages = []
    cats = [(
        Category(site, c['title']),
        c['namespace'] if ("namespace" in c) else None,
        c['recurse'] if ("recurse" in c) else 0,
    ) for c in categories]
    # retrieve all pages from categories
    for (cat, ns, r) in cats:
        pages.append(cat.title())
        log("Retrieve pages from %s" % cat.title())
        # add pages to sync of this categorie
        pages.extend(mapTitle(cat.articles(namespaces=ns, recurse=r)))

    return pages
示例#14
0
def main():

    sekcje = []
    sekcje.append('{{odmiana')
    sekcje.append('{{etymologia}}')
    sekcje.append('{{wymowa}}')
    sekcje.append('{{znaczenia}}')
    sekcje.append('{{przykłady}}')
    sekcje.append('{{składnia}}')
    sekcje.append('{{kolokacje}}')
    sekcje.append('{{pokrewne}}')
    sekcje.append('{{frazeologia}}')
    sekcje.append('{{uwagi}}')
    sekcje.append('{{synonimy}}')
    sekcje.append('{{antonimy}}')
    sekcje.append('{{źródła}}')

    site = pywikibot.getSite()
    cat = Category(site, 'Kategoria:łaciński (indeks)')
    lista_stron = pagegenerators.CategorizedPageGenerator(cat)
    #lista_stron = pagegenerators.AllpagesPageGenerator(namespace = 0, includeredirects = False)
    '''lista_stron2 = []
    for p in lista_stron:
            if u'Wikisłownik:' not in p.title and u'Szablon:' not in p.title and u'Kategoria:' not in p.title and u'Wikipedysta:' not in p.title and u'Aneks:' not in p.title and u'Indeks:' not in p.title and u'MediaWiki:' not in p.title and u'Portal:' not in p.title and u'Indeks:' not in p.title and u'#TAM' not in p.text and u'#PATRZ' not in p.text and u'Pomoc:' not in p.title and u'#REDIRECT' not in p.text and u'sentencja łacińska' not in p.text and u'#patrz' not in p.text and u'#tam' not in p.text:
                    #if u'{{język francuski}}' in p.text:
                    lista_stron2.append(p)

    sekcje_join ='|'.join(map(re.escape, sekcje))
    szukany_tekst = re.compile(u'{{odmiana}}.*\n{{składnia}}')
    '''
    out = ''

    for page in lista_stron:

        text = page.get()

        if '{{przykłady}}' not in text:
            print('*[[' + page.title() + ']]')
            out = out + '*[[' + page.title() + ']]\n'

    filename = "output-sprzątanie.txt"

    file = open(filename, 'w')
    file.write(out.encode("utf-8"))
    file.close
示例#15
0
def load_files(categories, depth):
    """
    Returns a list of unique files in categories

    @param categories: List of Commons category names as strings
    @type categories: list
    @param depth: Category recursion depth
    @type depth: int
    @rtype: list
    """
    files = set()
    for cat in categories:
        cat = Category(commons, cat)
        generator = CategorizedPageGenerator(cat,
                                             recurse=depth,
                                             namespaces=Namespace.FILE)
        for page in generator:
            files.add(page.title(withNamespace=False))

    return list(files)
示例#16
0
文件: ex.py 项目: jagnajoz/wiktionary
def main():

    site = pywikibot.getSite()
    cat = Category(site, 'Kategoria:francuski (indeks)')
    lista = pagegenerators.CategorizedPageGenerator(cat)

    for a in lista:
        h = Haslo(a.title())
        if h.typ == 3:
            h.sekcje()
            for c in h.lista_sekcje:
                if 'francuski' in c.jezyk:
                    print('\n' + h.tytul)
                    c.pola()
                    print(c.przyklady.tresc)
                    if (c.przyklady.tresc == '\n: (1.1)'
                            or c.przyklady.tresc == '\n: (1.1) ') and (
                                'rzeczownik' not in c.znaczenia.tresc) and (
                                    '{{forma czasownika|fr}}'
                                    not in c.znaczenia.tresc):
                        text = '*[[%s]]\n' % h.tytul
                        file = open("log/ex.txt", 'a')
                        file.write(text.encode("utf-8"))
                        file.close
示例#17
0
def main():

    site = pywikibot.getSite()
    cat = Category(site, 'Kategoria:esperanto (indeks)')
    lista = pagegenerators.CategorizedPageGenerator(cat, start='anemi')
    #, start=u'abduktoro'
    re_etymn = re.compile(r'\{\{etymn\|eo\|(.*?)\}\}')
    re_etymn_nr = re.compile(
        r'(\:\s*?\([0-9]\.[0-9]\))\s*?\{\{etymn\|eo\|(.*?)\}\}(.*?)\n')

    czesciMowy = [
        'rzeczownik', 'czasownik', 'przymiotnik', 'przysłówek', 'spójnik',
        'liczebnik', 'zaimek', 'wykrzyknik', 'partykuła'
    ]
    #lista = [pywikibot.Page(site, u'aboc')]
    for word in lista:
        a = word.title()

        h = Haslo(a)
        if h.type == 3:
            morfem = 0
            etymn = 0
            pochodne = 0
            pokrewne = 0
            inneCzesci = 0
            skrot = 0
            for b in h.listLangs:
                b.pola()
                morfologia = ''
                if (b.type == 1 or b.type
                        == 10) and b.lang == 'esperanto' and b.znaczeniaDetail:
                    b.etymologia.numer()
                    for c in b.znaczeniaDetail:
                        if c[0] == '{{morfem|eo}}' or c[
                                0] == '{{morfem|eo|przedrostkowy}}':
                            morfem = 1
                        if any(e in c[0] for e in czesciMowy):
                            inneCzesci = 1
                        if 'skrót' in c[0]:
                            skrot = 1
                    wordCount = len(b.title.split())
                    if '{{pochodne}}' in b.content:
                        pochodne = 1
                    if '{{etymn' in b.etymologia.text:
                        etymn = 1
                    try:
                        b.pokrewne
                    except AttributeError:
                        pass
                    else:
                        pokrewne = 1
                    #print u'type = %d, morfem = %d, inneCzesci = %d, skrot = %d, pochodne = %d, etymn = %d, pokrewne = %d' % (b.type, morfem, inneCzesci, skrot, pochodne, etymn, pokrewne)

                    if b.type == 1 and not morfem and not pochodne and pokrewne and (
                            etymn or wordCount > 1 or skrot):
                        if b.etymologia.type == 2:
                            for elem in b.etymologia.list:
                                s_etymn = re.findall(re_etymn, elem[1])
                                elem[1] = re.sub(re_etymn, '', elem[1])
                                if s_etymn:
                                    morfologia += '\n' + elem[0]
                                    for c in s_etymn:
                                        morfologia += ' {{morfeo|%s}}' % (c)
                                if elem[1].strip() == '':
                                    elem[0] = ''
                                    elem[1] = ''
                                else:
                                    elem[1] = elem[1].strip(' ')
                                    elem[1] = ' ' + elem[1]
                            b.dodatki.text += '\n{{morfologia}}' + morfologia
                            b.saveChanges()
                        elif b.etymologia.type == 1:
                            s_etymn = re.findall(re_etymn, b.etymologia.text)
                            b.etymologia.text = re.sub(re_etymn, '',
                                                       b.etymologia.text)
                            b.etymologia.text = b.etymologia.text.strip(' ')
                            if b.etymologia.text != '':
                                b.etymologia.text = ' ' + b.etymologia.text
                            for elem in s_etymn:
                                morfologia += ' {{morfeo|%s}}' % elem
                            b.dodatki.text += '\n{{morfologia}}' + morfologia
                            b.saveChanges()
                    elif b.type == 10 and morfem and pochodne and not etymn and not pokrewne and not inneCzesci:
                        b.naglowek.text = b.naglowek.text.replace(
                            '{{esperanto}}', '{{esperanto (morfem)}}')
                        b.saveChanges()
                    else:
                        b.uwagi.text += ' {{zmiany-w-esperanto}}'
                        b.saveChanges()

            history = word.getVersionHistory()
            done = 0
            for elem in history:
                if elem[3] == 'reorganizacja esperanto (wydzielenie morfemów do osobnego języka)' and elem[
                        2] == 'AlkamidBot':
                    done = 1

            if not done:
                h.push(
                    False,
                    'reorganizacja esperanto (wydzielenie morfemów do osobnego języka)'
                )
示例#18
0
文件: zh.py 项目: jagnajoz/wiktionary
def main():
    test_mode = 0
    site = pywikibot.getSite()
    site_en = pywikibot.getSite('en', 'wiktionary')
    site_com = pywikibot.getSite('commons', 'commons')
    cat = Category(site, 'Kategoria:chiński standardowy (indeks)')
    cat_com = Category(site, 'Chinese kanji stroke order')
    lista_stron = pagegenerators.CategorizedPageGenerator(cat)
    lista_com = pagegenerators.CategorizedPageGenerator(cat_com)
    log_site = pywikibot.Page(site, 'Wikipedysta:AlkamidBot/zch/log')

    lista = []
    istnieje = []

    han_char = re.compile('{{Han(_| )char\|(.*?)}')
    han_ref = re.compile('{{Han(_| )ref\|(.*})')
    zh_f = re.compile('{{zh-forms\|(.*)}')
    jap_f = re.compile('{{ja-forms\|(.*)}')
    kx = re.compile('kx=(.*?)(\||})')
    dkj = re.compile('\|dkj=(.*?)(\||})')
    dj = re.compile('\|dj=(.*?)(\||})')
    hdz = re.compile('\|hdz=(.*?)(\||})')
    rn = re.compile('rn=([0-9]*?)\|')
    rad = re.compile('rad=(.)')
    han_as = re.compile('as=([0-9]*?)\|')
    sn = re.compile('sn=([0-9]*?)\|')
    canj = re.compile('canj=([^\|]*)')
    cr = re.compile('four=(.*?)\|')
    alt = re.compile('alt=(.*?)\|')
    asj = re.compile('asj=(.*?)\|')
    tekst_przed = re.compile('(.*?)=', re.DOTALL)
    tekst_po = re.compile('.*?(=.*)', re.DOTALL)
    grafika = re.compile(
        '(\-bw\.|\-red\.|\-order\.|{{zch\-cienie}}|{{zch\-animacja}}|{{zch\-komiks}})'
    )

    for page in lista_stron:
        if len(page.title()) == 1:
            lista.append(page)

    for a in lista:
        tekst = ''

        rn_abort = 0
        rad_abort = 0
        han_as_abort = 0
        sn_abort = 0
        canj_abort = 0
        cr_abort = 0

        try:
            strona = a.get()
        except pywikibot.IsRedirectPage:
            print('[[%s]] - przekierowanie' % a.title())
            log = log + '\n*[[%s]] - przekierowanie' % a.title()
        except pywikibot.Error:
            print('[[%s]] - błąd' % a.title())
            log = log + '\n*[[%s]] - błąd' % a.title()
        else:

            tekst_przed_s = re.search(tekst_przed, a.get())
            tekst_po_s = re.search(tekst_po, a.get())

            log = ''

            if test_mode == 1:
                sekcja_znak = 'fdssagrefadf'
            else:
                sekcja_znak = '{{znak chiński}}'

            if sekcja_znak in a.get():
                print('[[%s]] - istnieje już sekcja {{znak chiński}}' %
                      a.title())
                log = log + '\n*[[%s]] - istnieje już sekcja {{s|znak chiński}}' % a.title(
                )
                istnieje.append(a)
            else:
                ang = pywikibot.Page(site_en, a.title())
                han_char_s = re.search(han_char, ang.get())

                grafika_s = re.search(grafika, a.get())
                if grafika_s != None:
                    print('[[%s]] - znaleziono grafikę z CJK stroke order' %
                          a.title())
                    log = log + '\n*[[%s]] - znaleziono grafikę z CJK stroke order' % a.title(
                    )

                if han_char_s != None:

                    szablon_han = han_char_s.group(2)

                    rn_s = re.search(rn, szablon_han)
                    rad_s = re.search(rad, szablon_han)
                    han_as_s = re.search(han_as, szablon_han)
                    sn_s = re.search(sn, szablon_han)
                    canj_s = re.search(canj, szablon_han)
                    cr_s = re.search(cr, szablon_han)
                    alt_s = re.search(alt, szablon_han)
                    asj_s = re.search(asj, szablon_han)

                    if alt_s == None:
                        alter = 0
                    else:
                        if alt_s.group(1) == '':
                            alter = 0
                        else:
                            alter = 1
                    if asj_s == None:
                        alter1 = 0
                    else:
                        if asj_s.group(1) == '':
                            alter1 = 0
                        else:
                            alter1 = 1

                    if alter == 0 and alter1 == 0:

                        #print a.title()
                        if rn_s == None:
                            print('[[%s]] - Nie istnieje argument \'rn\'' %
                                  a.title())
                            log = log + '\n*[[%s]] - Nie istnieje argument \'rn\'' % a.title(
                            )
                            rn_abort = 1
                        if rad_s == None:
                            print('[[%s]] - Nie istnieje argument \'rad\'' %
                                  a.title())
                            log = log + '\n*[[%s]] - Nie istnieje argument \'rad\'' % a.title(
                            )
                            rad_abort = 1
                        if han_as_s != None:
                            #print han_as_s.group(1)
                            if han_as_s.group(1) == '0' or han_as_s.group(
                                    1) == '00':
                                as_output = '+ 0'
                            else:
                                if han_as_s.group(1)[0] == '0':
                                    as_output = '+ %s' % han_as_s.group(1)[1]
                                else:
                                    as_output = han_as_s.group(1)[1]
                            #print as_output
                        else:

                            han_as_abort = 1
                        if sn_s == None:

                            sn_abort = 1
                        if canj_s == None:

                            canj_abort = 1
                        if cr_s != None:
                            if cr_s.group(1).isspace() or cr_s.group(1) == '':
                                print(
                                    '[[%s]] - argument \'four\' na en.wikt jest pusty - dodać ręcznie'
                                    % a.title())
                                log = log + '\n*[[%s]] - argument \'four\' na en.wikt jest pusty - dodać ręcznie' % a.title(
                                )
                        else:
                            cr_abort = 1

                        kolejnosc_koncowa_c = ''

                        if pywikibot.ImagePage(site_en, '%s-bw.png' %
                                               a.title()).fileIsShared():
                            kolejnosc_koncowa_c = '{{zch-komiks}}'
                        else:
                            if pywikibot.ImagePage(site_en, '%s-red.png' %
                                                   a.title()).fileIsShared():
                                kolejnosc_koncowa_c = '{{zch-cienie}}'
                            else:
                                if pywikibot.ImagePage(
                                        site_en, '%s-order.gif' %
                                        a.title()).fileIsShared():
                                    kolejnosc_koncowa_c = '{{zch-animacja}}'

                        kolejnosc_koncowa_j = ''

                        if pywikibot.ImagePage(site_en, '%s-jbw.png' %
                                               a.title()).fileIsShared():
                            kolejnosc_koncowa_j = '{{zch-komiks|j}}'
                        else:
                            if pywikibot.ImagePage(site_en, '%s-jred.png' %
                                                   a.title()).fileIsShared():
                                kolejnosc_koncowa_j = '{{zch-cienie|j}}'
                            else:
                                if pywikibot.ImagePage(
                                        site_en, '%s-jorder.gif' %
                                        a.title()).fileIsShared():
                                    kolejnosc_koncowa_j = '{{zch-animacja|j}}'

                        kolejnosc_koncowa_t = ''

                        if pywikibot.ImagePage(site_en, '%s-tbw.png' %
                                               a.title()).fileIsShared():
                            kolejnosc_koncowa_t = '{{zch-komiks|t}}'
                        else:
                            if pywikibot.ImagePage(site_en, '%s-tred.png' %
                                                   a.title()).fileIsShared():
                                kolejnosc_koncowa_t = '{{zch-cienie|t}}'
                            else:
                                if pywikibot.ImagePage(
                                        site_en, '%s-torder.gif' %
                                        a.title()).fileIsShared():
                                    kolejnosc_koncowa_t = '{{zch-animacja|t}}'

                        kolejnosc_koncowa_a = ''

                        if pywikibot.ImagePage(site_en, '%s-abw.png' %
                                               a.title()).fileIsShared():
                            kolejnosc_koncowa_a = '{{zch-komiks|a}}'
                        else:
                            if pywikibot.ImagePage(site_en, '%s-ared.png' %
                                                   a.title()).fileIsShared():
                                kolejnosc_koncowa_a = '{{zch-cienie|a}}'
                            else:
                                if pywikibot.ImagePage(
                                        site_en, '%s-aorder.gif' %
                                        a.title()).fileIsShared():
                                    kolejnosc_koncowa_a = '{{zch-animacja|a}}'

                        tekst = '== {{zh|%s}} ({{znak chiński}}) ==\n{{klucz}}' % a.title(
                        )

                        if rn_abort or rad_abort or han_as_abort:
                            print(
                                '[[%s]] - w en.wikt nie istnieje któryś z argumentów do {{klucz}} - dodać ręcznie'
                                % a.title())
                            log = log + '\n*[[%s]] - w en.wikt nie istnieje któryś z argumentów do {{s|klucz}} - dodać ręcznie' % a.title(
                            )
                        else:
                            tekst = tekst + ' %s %s %s' % (
                                rn_s.group(1), rad_s.group(1), as_output)

                        tekst = tekst + '\n{{kreski}}'
                        if sn_abort:
                            print(
                                '[[%s]] - w en.wikt nie istnieje argument do {{kreski}} - dodać ręcznie'
                            )
                            log = log + '\n*[[%s]] - w en.wikt nie istnieje argument do {{s|kreski}} - dodać ręcznie'
                        else:
                            tekst = tekst + ' %s\n' % sn_s.group(1)

                        zh_f_s = re.search(zh_f, ang.get())
                        ja_f_s = re.search(jap_f, ang.get())

                        warianty = '{{warianty'
                        warianty_obr = '{{warianty-obrazek'
                        ku = ''
                        xu = ''
                        sou = ''
                        sot = ''
                        ming = ''
                        upr = ''
                        trad = ''
                        shin = ''

                        if zh_f_s != None:
                            zh_f_str = zh_f_s.group(1).replace(
                                "[",
                                "").replace("]",
                                            "").replace("{{zh-lookup|",
                                                        "").replace("}", "")
                            zh_osobno = zh_f_str.split('|')
                            warianty = warianty + ' | {{zch-w|ct|%s}} | {{zch-w|cu|%s}}' % (
                                zh_osobno[1], zh_osobno[0])
                            '''
                            if pywikibot.ImagePage(site_en, u'%s-kaishu.svg' % zh_osobno[0]).fileIsShared():
                                    ku = u' | {{zch-obrazek|ku|%s}}' % zh_osobno[0]
                            else:
                                    if pywikibot.ImagePage(site_en, u'%s-kaishu.png' % zh_osobno[0]).fileIsShared():
                                            ku = u' | {{zch-obrazek|ku|%s|p}}' % zh_osobno[0]
                                    else:
                                            if pywikibot.ImagePage(site_en, u'%s-kaishu.gif' % zh_osobno[0]).fileIsShared():
                                                    ku = u' | {{zch-obrazek|ku|%s|g}}' % zh_osobno[0]

                            if pywikibot.ImagePage(site_en, u'%s-xinshu.svg' % zh_osobno[0]).fileIsShared():
                                    xu = u' | {{zch-obrazek|xu|%s}}' % zh_osobno[0]
                            else:
                                    if pywikibot.ImagePage(site_en, u'%s-xinshu.png' % zh_osobno[0]).fileIsShared():
                                            xu = u' | {{zch-obrazek|xu|%s|p}}' % zh_osobno[0]
                                    else:
                                            if pywikibot.ImagePage(site_en, u'%s-xinshu.gif' % zh_osobno[0]).fileIsShared():
                                                    xu = u' | {{zch-obrazek|xu|%s|g}}' % zh_osobno[0]

                            if pywikibot.ImagePage(site_en, u'%s-songti.svg' % zh_osobno[0]).fileIsShared():
                                    sou = u' | {{zch-obrazek|sou|%s}}' % zh_osobno[0]
                            else:
                                    if pywikibot.ImagePage(site_en, u'%s-songti.png' % zh_osobno[0]).fileIsShared():
                                            sou = u' | {{zch-obrazek|sou|%s|p}}' % zh_osobno[0]
                                    else:
                                            if pywikibot.ImagePage(site_en, u'%s-songti.gif' % zh_osobno[0]).fileIsShared():
                                                    sou = u' | {{zch-obrazek|sou|%s|g}}' % zh_osobno[0]

                            if ku != u'' or xu !=u'' or sou !=u'':
                                    warianty = warianty + u'{{warianty-obrazek'
                                    if ku != u'':
                                            warianty = warianty + ku
                                    if xu !=u'':
                                            warianty = warianty + xu
                                    if sou !=u'':
                                            warianty = warianty + sou
                                    warianty = warianty + u'}}'
                                    '''

                        if ja_f_s != None:
                            ja_f_str = ja_f_s.group(1).replace(
                                "[",
                                "").replace("]",
                                            "").replace("{{zh-lookup|",
                                                        "").replace("}", "")
                            ja_osobno = ja_f_str.split('|')
                            warianty = warianty + ' | {{zch-w|js|%s}} | {{zch-w|ct|%s}} | {{zch-w|cu|%s}}' % (
                                ja_osobno[0], ja_osobno[2], ja_osobno[1])
                            trad = ja_osobno[2]
                            upr = ja_osobno[1]
                            shin = ja_osobno[0]
                            '''if pywikibot.ImagePage(site_en, u'%s-kaishu.svg' % ja_osobno[1]).fileIsShared():
                                    ku = u' | {{zch-obrazek|ku|%s}}' % ja_osobno[1]
                            else:
                                    if pywikibot.ImagePage(site_en, u'%s-kaishu.png' % ja_osobno[1]).fileIsShared():
                                            ku = u' | {{zch-obrazek|ku|%s|p}}' % ja_osobno[1]
                                    else:
                                            if pywikibot.ImagePage(site_en, u'%s-kaishu.gif' % ja_osobno[1]).fileIsShared():
                                                    ku = u' | {{zch-obrazek|ku|%s|g}}' % ja_osobno[1]

                            if pywikibot.ImagePage(site_en, u'%s-xinshu.svg' % ja_osobno[1]).fileIsShared():
                                    xu = u' | {{zch-obrazek|xu|%s}}' % ja_osobno[1]
                            else:
                                    if pywikibot.ImagePage(site_en, u'%s-xinshu.png' % ja_osobno[1]).fileIsShared():
                                            xu = u' | {{zch-obrazek|xu|%s|p}}' % ja_osobno[1]
                                    else:
                                            if pywikibot.ImagePage(site_en, u'%s-xinshu.gif' % ja_osobno[1]).fileIsShared():
                                                    xu = u' | {{zch-obrazek|xu|%s|g}}' % ja_osobno[1]

                            if pywikibot.ImagePage(site_en, u'%s-songti.svg' % ja_osobno[1]).fileIsShared():
                                    sou = u' | {{zch-obrazek|sou|%s}}' % ja_osobno[1]
                            else:
                                    if pywikibot.ImagePage(site_en, u'%s-songti.png' % ja_osobno[1]).fileIsShared():
                                            sou = u' | {{zch-obrazek|sou|%s|p}}' % ja_osobno[1]
                                    else:
                                            if pywikibot.ImagePage(site_en, u'%s-songti.gif' % ja_osobno[1]).fileIsShared():
                                                    sou = u' | {{zch-obrazek|sou|%s|g}}' % ja_osobno[1]

                            if ku != u'' or xu !=u'' or sou !=u'':
                                    warianty = warianty + u'{{warianty-obrazek'
                                    if ku != u'':
                                            warianty = warianty + ku
                                    if xu !=u'':
                                            warianty = warianty + xu
                                    if sou !=u'':
                                            warianty = warianty + sou
                                    warianty = warianty + u'}}'''

                        if pywikibot.ImagePage(site_en, '%s-clerical.svg' %
                                               a.title()).fileIsShared():
                            warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s}}' % a.title(
                            )
                        else:
                            if pywikibot.ImagePage(
                                    site_en, '%s-clerical.png' %
                                    a.title()).fileIsShared():
                                warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s|p}}' % a.title(
                                )
                            else:
                                if pywikibot.ImagePage(
                                        site_en, '%s-clerical.gif' %
                                        a.title()).fileIsShared():
                                    warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s|g}}' % a.title(
                                    )

                        if pywikibot.ImagePage(site_en, '%s-xinshu.svg' %
                                               a.title()).fileIsShared():
                            warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s}}' % a.title(
                            )
                        else:
                            if pywikibot.ImagePage(site_en, '%s-xinshu.png' %
                                                   a.title()).fileIsShared():
                                warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s|p}}' % a.title(
                                )
                            else:
                                if pywikibot.ImagePage(
                                        site_en, '%s-xinshu.gif' %
                                        a.title()).fileIsShared():
                                    warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s|g}}' % a.title(
                                    )

                        if pywikibot.ImagePage(site_en, '%s-still.svg' %
                                               a.title()).fileIsShared():
                            warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s}}' % a.title(
                            )
                        else:
                            if pywikibot.ImagePage(site_en, '%s-caoshu.svg' %
                                                   a.title()).fileIsShared():
                                warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s}}' % a.title(
                                )
                            else:
                                if pywikibot.ImagePage(
                                        site_en, '%s-still.png' %
                                        a.title()).fileIsShared():
                                    warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s|p}}' % a.title(
                                    )
                                else:
                                    if pywikibot.ImagePage(
                                            site_en, '%s-caoshu.png' %
                                            a.title()).fileIsShared():
                                        warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s|p}}' % a.title(
                                        )
                                    else:
                                        if pywikibot.ImagePage(
                                                site_en, '%s-still.gif' %
                                                a.title()).fileIsShared():
                                            warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s|g}}' % a.title(
                                            )
                                        else:
                                            if pywikibot.ImagePage(
                                                    site_en, '%s-caoshu.gif' %
                                                    a.title()).fileIsShared():
                                                warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s|g}}' % a.title(
                                                )

                        if pywikibot.ImagePage(site_en, '%s-kaishu.svg' %
                                               a.title()).fileIsShared():
                            warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s}}' % a.title(
                            )
                        else:
                            if pywikibot.ImagePage(site_en, '%s-kaishu.png' %
                                                   a.title()).fileIsShared():
                                warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s|p}}' % a.title(
                                )
                            else:
                                if pywikibot.ImagePage(
                                        site_en, '%s-kaishu.gif' %
                                        a.title()).fileIsShared():
                                    warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s|g}}' % a.title(
                                    )

                        if pywikibot.ImagePage(site_en, '%s-songti.svg' %
                                               a.title()).fileIsShared():
                            warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s}}' % a.title(
                            )
                        else:
                            if pywikibot.ImagePage(site_en, '%s-songti.png' %
                                                   a.title()).fileIsShared():
                                warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s|p}}' % a.title(
                                )
                            else:
                                if pywikibot.ImagePage(
                                        site_en, '%s-songti.gif' %
                                        a.title()).fileIsShared():
                                    warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s|g}}' % a.title(
                                    )
                        '''if sot != u'':
                                ming = ming + sot
                        else:
                                if zh_f_s != None:
                                        ming = ming + u' | {{zch-w|ct|%s}}' % zh_osobno[1]
                                if ja_f_s != None:
                                        ming = ming + u' | {{zch-w|ct|%s}}' % ja_osobno[2]

                        if sou != u'':
                                ming = ming + sou
                        else:
                                if zh_f_s != None:
                                        ming = ming + u' | {{zch-w|cu|%s}}' % zh_osobno[0]
                                if ja_f_s != None:
                                        ming = ming + u' | {{zch-w|cu|%s}}' % ja_osobno[1]'''

                        if warianty == '{{warianty':
                            tekst = tekst + '{{warianty|{{zch-w}}'
                        else:
                            tekst = tekst + warianty

                        tekst = tekst + '}}'

                        if warianty_obr != '{{warianty-obrazek':
                            tekst = tekst + ' ' + warianty_obr + '}}'

                        tekst = tekst + '\n{{kolejność}}'

                        if kolejnosc_koncowa_c == '' and kolejnosc_koncowa_j == '' and kolejnosc_koncowa_t == '' and kolejnosc_koncowa_a == '':
                            print(
                                '[[%s]] - na commons nie znaleziono żadnej kolejności pisania'
                                % a.title())
                            log = log + '\n*[[%s]] - na commons nie znaleziono żadnej kolejności pisania' % a.title(
                            )
                        else:
                            tekst = tekst + '\n'

                        if kolejnosc_koncowa_c != '':
                            tekst = tekst + '%s ' % kolejnosc_koncowa_c
                        if kolejnosc_koncowa_j != '':
                            tekst = tekst + '%s ' % kolejnosc_koncowa_j
                        if kolejnosc_koncowa_t != '':
                            tekst = tekst + '%s ' % kolejnosc_koncowa_t
                        if kolejnosc_koncowa_a != '':
                            tekst = tekst + '%s ' % kolejnosc_koncowa_a

                        tekst = tekst + '\n{{znaczenia}}\n{{etymologia}}'

                        etym = ' {{warianty-obrazek'
                        if pywikibot.ImagePage(site_en, '%s-oracle.svg' %
                                               a.title()).fileIsShared():
                            etym = etym + ' | {{zch-obrazek|o|%s}}' % a.title()
                        else:
                            if pywikibot.ImagePage(site_en, '%s-oracle.png' %
                                                   a.title()).fileIsShared():
                                etym = etym + ' | {{zch-obrazek|o|%s|p}}' % a.title(
                                )

                        if pywikibot.ImagePage(site_en, '%s-bronze.svg' %
                                               a.title()).fileIsShared():
                            etym = etym + ' | {{zch-obrazek|br|%s}}' % a.title(
                            )
                        else:
                            if pywikibot.ImagePage(site_en, '%s-bronze.png' %
                                                   a.title()).fileIsShared():
                                etym = etym + ' | {{zch-obrazek|br|%s|p}}' % a.title(
                                )

                        if pywikibot.ImagePage(site_en, '%s-bigseal.svg' %
                                               a.title()).fileIsShared():
                            etym = etym + ' | {{zch-obrazek|bs|%s}}' % a.title(
                            )
                        else:
                            if pywikibot.ImagePage(
                                    site_en, '%s-bigseal.png' %
                                    a.title()).fileIsShared():
                                etym = etym + ' | {{zch-obrazek|bs|%s|p}}' % a.title(
                                )

                        if pywikibot.ImagePage(site_en, '%s-seal.svg' %
                                               a.title()).fileIsShared():
                            etym = etym + ' | {{zch-obrazek|ss|%s}}' % a.title(
                            )
                        else:
                            if pywikibot.ImagePage(site_en, '%s-seal.png' %
                                                   a.title()).fileIsShared():
                                etym = etym + ' | {{zch-obrazek|ss|%s|p}}' % a.title(
                                )

                        etym = etym + '}}'

                        if etym != ' {{warianty-obrazek}}':
                            tekst = tekst + etym

                        tekst = tekst + '\n{{kody|cjz='
                        if canj_abort:
                            print(
                                '[[%s]] - w en.wikt nie istnieje argument cjz - dodać ręcznie'
                            )
                            log = log + '\n*[[%s]] - w en.wikt nie istnieje argument cjz - dodać ręcznie'
                        else:
                            tekst = tekst + '%s' % canj_s.group(1)
                        tekst = tekst + '|cr='
                        if cr_abort == 1:
                            print(
                                '[[%s]] - w en.wikt nie istnieje argument \'\'four\'\' - dodać ręcznie'
                                % a.title())
                            log = log + '\n*[[%s]] - w en.wikt nie istnieje argument \'\'four\'\' - dodać ręcznie' % a.title(
                            )
                        else:
                            tekst = tekst + '%s' % cr_s.group(1)
                        tekst = tekst + '|u=%x}}' % ord(a.title())

                        han_ref_s = re.search(han_ref, ang.get())
                        if han_ref_s != None:
                            tekst = tekst + '\n{{słowniki'

                            kx_s = re.search(kx, han_ref_s.group(2))
                            if kx_s != None:
                                tekst = tekst + '|kx=%s' % kx_s.group(1)

                            dkj_s = re.search(dkj, han_ref_s.group(2))
                            if dkj_s != None:
                                tekst = tekst + '|dkj=%s' % dkj_s.group(1)

                            dj_s = re.search(dj, han_ref_s.group(2))
                            if dj_s != None:
                                tekst = tekst + '|dj=%s' % dj_s.group(1)

                            hdz_s = re.search(hdz, han_ref_s.group(2))
                            if hdz_s != None:
                                tekst = tekst + '|hdz=%s' % hdz_s.group(1)

                            tekst = tekst + '}}'

                        tekst = tekst + '\n{{uwagi}}\n{{źródła}}\n\n'

                    else:
                        print(
                            '[[%s]] - znaleziono alternatywne zapisy, pomijam'
                            % a.title())
                        log = log + '\n*[[%s]] - znaleziono alternatywne zapisy, pomijam' % a.title(
                        )

                    final = tekst_przed_s.group(1) + tekst + tekst_po_s.group(
                        1)

                    if test_mode == 1:
                        print(final + '\n\n')
                    else:
                        a.put(final,
                              comment='bot dodaje sekcję {{znak chiński}}')
                else:
                    print('[[%s]] - Nie znaleziono szablonu {{Han char}}' %
                          a.title())
                    log = log + '\n*[[%s]] - Nie znaleziono szablonu {{s|Han char}}, pomijam' % a.title(
                    )

            log_site = pywikibot.Page(site, 'Wikipedysta:AlkamidBot/zch/log')
            log_stary = log_site.get()

            if test_mode == 1:
                print(log)
            else:
                log = log_stary + log
                log_site.put(log, comment='%s' % a.title())
示例#19
0
from pywikibot.pagegenerators import CategorizedPageGenerator
from pywikibot import Site, Category
import json

site = Site('commons', 'commons')
cat = Category(site, 'Category:Images_from_Nordiska_museet:_2019-06')

pages = list()
for page in CategorizedPageGenerator(cat, recurse=False, namespaces=6):
    item = {}
    item['id'] = page.pageid
    print(page.title())
    item['title'] = str(page.title())
    if (('(2)' in item['title']) or ('(3)' in item['title'])):
        continue
    pages.append(item)

    with open('../static/pages.json', 'w') as outfile:
        json.dump(pages, outfile, ensure_ascii=False)
def maariv_papers_pages() -> Iterable[pw.Page]:
    maariv_papers = Category(site, maariv_papers_category)
    return pagegenerators.CategorizedPageGenerator(maariv_papers)
示例#21
0
def main():

    site = pywikibot.getSite()

    # mode = 1 - updating pages from recent changes; 2 - after adding new languages to the script, all the words in that language have to be checked
    mode = 1

    mylist = set()
    if mode == 1:
        RClimit = readRCLimit('headerIndexing').strip()
        mylist = RecentChanges(RClimit)
        writeRCLimit('headerIndexing')
    if mode == 2:
        newlangs = [
            'arabski', 'perski', 'paszto', 'dari', 'urdu', 'osmańsko-turecki'
        ]
        for elem in newlangs:
            cat = Category(site, 'Kategoria:%s (indeks)' % elem)
            pageSet = set(pagegenerators.CategorizedPageGenerator(cat))
            for page in pageSet:
                mylist.add(page.title())

    replace = {}
    replace['arabski'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'}
    replace['dari'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'}
    replace['francuski'] = {
        'À': 'A',
        'Â': 'A',
        'Ç': 'C',
        'É': 'E',
        'È': 'E',
        'Ë': 'E',
        'Ê': 'E',
        'Î': 'I',
        'Ï': 'I',
        'Ô': 'O',
        'Œ': 'OE',
        'Ù': 'U',
        'Ú': 'U',
        'Û': 'U',
        'à': 'a',
        'â': 'a',
        'ç': 'c',
        'é': 'e',
        'è': 'e',
        'ë': 'e',
        'ê': 'e',
        'î': 'i',
        'ï': 'i',
        'ô': 'o',
        'œ': 'oe',
        'ù': 'u',
        'ú': 'u',
        'û': 'u'
    }
    replace['hiszpański'] = {
        'Á': 'A',
        'É': 'E',
        'Í': 'I',
        'Ó': 'O',
        'Ú': 'U',
        'á': 'a',
        'é': 'e',
        'í': 'i',
        'ó': 'o',
        'ú': 'u'
    }
    #replace[u'kurdyjski'] = {u'É': u'E', u'Í': u'I', u'Ú': u'U', u'Ù': u'U', u'é': u'e', u'í': u'i', u'ú': u'u', u'ù': u'u'}
    replace['nowogrecki'] = {
        'Ά': 'Α',
        'Έ': 'Ε',
        'Ή': 'Η',
        'Ί': 'Ι',
        'Ϊ': 'Ι',
        'Ό': 'Ο',
        'Ύ': 'Υ',
        'Ϋ': 'Υ',
        'Ώ': 'Ω',
        'ά': 'α',
        'έ': 'ε',
        'ί': 'ι',
        'ϊ': 'ι',
        'ΐ': 'ι',
        'ό': 'ο',
        'ύ': 'υ',
        'ϋ': 'υ',
        'ΰ': 'υ',
        'ώ': 'ω',
        'ή': 'η',
        'ς': 'σ'
    }
    replace['osmańsko-turecki'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'}
    replace['perski'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'}
    replace['paszto'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'}
    replace['urdu'] = {'إ': 'ا', 'آ': 'ا', 'ا': 'ا', 'أ': 'ا'}
    #replace[u'wietnamski'] = {u'Ă': u'A', u'Â': u'A', u'Đ': u'D', u'Ê': u'E', u'Ô': u'O', u'Ơ': u'O', u'Ư': u'U', u'ă': u'a', u'â': u'a', u'đ': u'd', u'ê': u'e', u'ô': u'o', u'ơ': u'o', u'ư': u'u'}

    for mytitle in mylist:
        try:
            h = Haslo(mytitle)
        except sectionsNotFound:
            pass
        except WrongHeader:
            pass
        else:
            if h.type == 3:
                change = 0
                for c in h.listLangs:
                    try:
                        c.lang
                    except AttributeError:
                        pass
                    else:
                        if c.lang in replace:
                            first = c.title
                            temp = c.title
                            for rep in replace[c.lang]:
                                temp = temp.replace(rep, replace[c.lang][rep])
                            if first != temp:
                                c.headerArg = temp
                                c.updateHeader()
                                change = 1

                if change:
                    h.push(
                        False,
                        'modyfikacja nagłówka w celu poprawnego indeksowania haseł (usunięcie znaków diakrytycznych)'
                    )
def league_table_files():
    league_table_files_category = Category(site,
                                           league_table_files_category_name)
    return pagegenerators.CategorizedPageGenerator(league_table_files_category)
示例#23
0
def main():
    site = pywikibot.getSite()
    indeks = pywikibot.Page(site, 'Indeks:Francuski_-_Związki_frazeologiczne')
    cat = Category(site, 'Kategoria:francuski_(indeks)')
    gen1 = pagegenerators.CategorizedPageGenerator(cat)
    ex = pywikibot.Page(site, 'Wikipedysta:AlkamidBot/wykluczone')
    re_obj = re.compile(
        r"''związek frazeologiczny''\n:\s*\(1\.1\) (\[\[[^]]*\]\])(\n|<ref)")
    tekst_dodaj = " zobacz też [[Indeks:Francuski - Związki frazeologiczne]]"

    zw = []
    trad = []
    lista = []
    for page in gen1:
        if ('związek frazeologiczny'
                in page.get()) and (page.title()
                                    not in indeks.get()) and (page.title()
                                                              not in ex.get()):
            tlum = re_obj.search(page.get())
            if tlum != None:
                print(page, ' dodatek')
                zw.append(page.title())
                trad.append(tlum.group(1))
                print(tlum.group(1))
                if '[[Indeks: Francuski - Związki frazeologiczne]]' not in page.get(
                ) and '[[Indeks:Francuski - Związki frazeologiczne]]' not in page.get(
                ) and '{{źródła}}' in page.get():
                    sekcja_przed = re.search(r"(.*?)\n{{źródła}}", page.get(),
                                             re.DOTALL)
                    sekcja_po = re.search(r"({{źródła}}.*)", page.get(),
                                          re.DOTALL)
                    dozmiany = sekcja_przed.group(1)
                    dozmiany += tekst_dodaj
                    final = dozmiany + '\n' + sekcja_po.group(1)
                    page.put(
                        final,
                        comment=
                        'bot dodaje linka do indeksu związków frazeologicznych'
                    )
            else:
                text = ex.get()
                text += '\n* [[' + page.title() + ']]'
                ex.put(text, comment='bot dodaje wyjątek')
                print("zła: ", page)

    orig = indeks.get()
    for a, b in zip(zw, trad):
        lit = a[0]
        litcap = lit.capitalize()
        sekcja = re.search(r"== %s ==\n(.*?)\{\{do góry\}\}" % (litcap), orig,
                           re.DOTALL)
        sekcja_przed = re.search(r"(.*?== %s ==\n)" % litcap, orig, re.DOTALL)
        sekcja_po = re.search(r"== %s ==\n.*?({{do góry}}.*)" % litcap, orig,
                              re.DOTALL)
        lista = sekcja.group(1).split('\n')
        str = "* [[" + a + "]] → " + b
        lista.append(str)
        bez = [x for x in lista if len(x) > 1]
        bez.sort()
        bez1 = "\n".join(bez) + "\n\n"
        orig = sekcja_przed.group(1) + bez1 + sekcja_po.group(1)

    indeks.put(orig, comment='bot aktualizuje indeks', botflag=False)
示例#24
0
def main():

    site = pywikibot.Site()

    # fetch Polish pages only
    cat_allpages = Category(site, 'Kategoria:polski (indeks)')
    # dialects are excluded, it would be too difficult to link all of them
    cat_dialects = Category(site, 'Kategoria:Polski_(dialekty_i_gwary)')

    list_allpages = pagegenerators.CategorizedPageGenerator(cat_allpages)
    list_dialects = set(
        pagegenerators.CategorizedPageGenerator(cat_dialects, recurse=True))

    count_all = 0

    intro = (
        '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n'
        '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
        '<html xmlns="http://www.w3.org/1999/xhtml\nxml:lang="pl">\n'
        '<head>\n<meta http-equiv="content-type" content="text/html; charset=UTF-8" />\n'
        '</head><body>')

    intro += (
        'Poniżej znajduje się lista polskich haseł, do których '
        'nie linkuje żadne inne hasło z głównej przestrzeni nazw. W związku '
        'z tym trudno trafić do takiego hasła inaczej niż przez bezpośrednie jego '
        'wyszukanie. Jeśli możesz, dodaj w innym haśle odnośnik do porzuconego '
        'słowa, np. w przykładach lub pokrewnych.')

    with open('{0}public_html/porzucone.html.1'.format(config.path['home']),
              'w',
              encoding='utf-8') as f:
        f.write(intro)
        for page in list_allpages:
            if page not in list_dialects:
                refs = list(
                    page.getReferences(namespaces=0, total=2)
                )  # only look in the main namespace (because virtually all pages are references somewhere, e.g. in missing pronunciation lists)
                try:
                    refs.remove(
                        page
                    )  # reference search is limited to 2. Then the page itself is removed (because on pl.wikt we often self-link pages in examples of usage)
                except ValueError:
                    pass

            if len(refs) == 0:
                try:
                    f.write(
                        '\n<br /><a href="http://pl.wiktionary.org/wiki/{0}">{0}</a>'
                        .format(page.title()))
                except UnicodeEncodeError:
                    print('Unicode Error: ', page.title())
                    pass
                count_all += 1

        date_now = datetime.datetime.now() + datetime.timedelta(hours=2)
        f.write(
            date_now.strftime(
                "\n<br />Ostatnia aktualizacja listy: %Y-%m-%d, %H:%M:%S"))
        f.write('<br />Licznik porzuconych: {0}'.format(count_all))
        f.write('</body></html')

    move('{0}public_html/porzucone.html.1'.format(config.path['home']),
         '{0}public_html/porzucone.html'.format(config.path['home']))
示例#25
0
def main():
    global expedListPeople
    global expedListPeopleOrig
    global expedListGrats
    global expedListGratsOrig
#    wikipedia.verbose = 1
    titleOfPageToLoad = u'2009-06-14_49_-122' # The "u" before the title means Unicode, important for special characters
#    pywikibot.put_throttle.setDelay(5, absolute = True)
#    wikipedia.get_throttle.setDelay(5, absolute = True)

    enwiktsite = pywikibot.Site('en', 'geohashing') # loading a defined project's page

#    os.unlink("graticules.sqlite")

#    db = GraticuleDatabase.GraticuleDatabase()
    db = GraticuleDatabase.GraticuleDatabase("graticules.sqlite")
    all = db.getAllKeys()

    # catdb = Category.CategoryDatabase()

    pp_list2 = Category(enwiktsite, u"Category:Expedition_planning").articles()
    # pp_list2 = get_all_category_pages(enwiktsite, u"Category:Expedition_planning", catdb)

#Produce a list of all pages from 3 weekdays ago through when coordinates are available
#  by looking at the [[Category:Meetup on YYYY-MM-DD]] pages

    expedListPeople = parseExpedLists(enwiktsite)
    expedListGrats = parseExpedListsGrats(enwiktsite)

    # Save off the original pages so we can only update exped lists if they've changed
    expedListPeopleOrig = copy.deepcopy(expedListPeople)
    expedListGratsOrig = copy.deepcopy(expedListGrats)

    all_text = u""
    first_date_obj = get_last_day_avail(datetime.date.today() + datetime.timedelta(7))
    last_date_obj = first_date_obj
    cur_dates = []
    plan_dates = []
    old_date_list = []
    try:
        for i in range(0,3):
            while (first_date_obj > datetime.date.today()):
                cur_dates.append(first_date_obj.isoformat())
                expedSums = ExpeditionSummaries.ExpeditionSummaries(enwiktsite, first_date_obj.isoformat(), db)
                expedListPeople = updateExpedLists(expedSums, expedListPeople, first_date_obj.isoformat())
                expedListGrats = updateExpedListsGrats(expedSums, expedListGrats, first_date_obj.isoformat())
                first_date_obj = first_date_obj - datetime.timedelta(1)

            cur_dates.append(first_date_obj.isoformat())
            expedSums = ExpeditionSummaries.ExpeditionSummaries(enwiktsite, first_date_obj.isoformat(), db)
            expedListPeople = updateExpedLists(expedSums, expedListPeople, first_date_obj.isoformat())
            expedListGrats = updateExpedListsGrats(expedSums, expedListGrats, first_date_obj.isoformat())
            first_date_obj = first_date_obj - datetime.timedelta(1)

            while (first_date_obj.weekday() > 4):
                cur_dates.append(first_date_obj.isoformat())
                expedSums = ExpeditionSummaries.ExpeditionSummaries(enwiktsite, first_date_obj.isoformat(), db)
                expedListPeople = updateExpedLists(expedSums, expedListPeople, first_date_obj.isoformat())
                expedListGrats = updateExpedListsGrats(expedSums, expedListGrats, first_date_obj.isoformat())
                first_date_obj = first_date_obj - datetime.timedelta(1)

        cur_dates.append(first_date_obj.isoformat())
        expedSums = ExpeditionSummaries.ExpeditionSummaries(enwiktsite, first_date_obj.isoformat(), db)
        expedListPeople = updateExpedLists(expedSums, expedListPeople, first_date_obj.isoformat())
        expedListGrats = updateExpedListsGrats(expedSums, expedListGrats, first_date_obj.isoformat())
        first_date = first_date_obj.isoformat()

        remove_dates(enwiktsite, cur_dates)

#Get a list of old date pages to update
        old_date_list = get_old_dates(enwiktsite, db)

#This looks at the pages in [[Category:Expedition planning]]
#  and produces the summaries for all the pages for far in the future
        plan_dates = getExpeditionSummaries(pp_list2, db, None, (last_date_obj+datetime.timedelta(1)).isoformat())
        for i in plan_dates.keys():
            cur_dates.append(i)

        if check_banana(enwiktsite) != 0:
            return 1

        updateUserTexts(enwiktsite)
        updateGratTexts(enwiktsite)
    except Exception as e:

        pywikibot.output("cur_dates: " + str(cur_dates))
        pywikibot.output("plan_dates: " + str(plan_dates))
        pywikibot.output("old_dates: " + str(old_date_list))
        bug_page = pywikibot.Page(enwiktsite, u"User:AperfectBot/BotBugs")
        bug_page_text = bug_page.get()
        bug_page_text = bug_page_text + u"\n== NEW REPORT ==\nDates:\n" + str(cur_dates) + str(plan_dates) + str(old_date_list) + u"\n"

        pywikibot.output(bug_page_text)
        page_write(bug_page, bug_page_text, enwiktsite)
        raise e

#Create the [[Template:Expedition_summaries/YYYY-MM-DD]] pages for planning page dates
    putExpeditionSummaries(plan_dates, enwiktsite)

#Build up the text for [[Template:Recent_expeditions]]
    recent_expedition_page_name = u"Template:Recent_expeditions"

    recent_exp_page = pywikibot.Page(enwiktsite, recent_expedition_page_name)
    recent_exp_text = recent_exp_page.get()
    recent_exp_res = re.findall("=== \[\[(\d{4}-\d{2}-\d{2}).*?\]\] ===\n([^=]*)", recent_exp_text, re.S)

    recent_exp_hash = {}
    for i in range(0,len(recent_exp_res)):
        recent_exp_hash[recent_exp_res[i][0]] = recent_exp_res[i][1]

    summary_text = u""
    summary_text += u"<noinclude>__NOTOC__</noinclude>\n"

    date_keys = cur_dates
    date_keys.sort()
    date_keys.reverse()
    if (date_keys[0] > last_date_obj.isoformat()):
        summary_text += u"== Upcoming Events ==\n"
    for i in date_keys:
        if (summary_text[len(summary_text)-1] != u"\n"):
            summary_text += u"\n"

        if (i == (datetime.date.today() - datetime.timedelta(1)).isoformat()):
            summary_text += u"== Recent Expeditions ==\n"

        if (i == last_date_obj.isoformat()):
            summary_text += u"== Expeditions Being Planned ==\n"

        if i in recent_exp_hash:
            summary_text += recent_exp_hash[i]
        else:
            summary_text += u"{{Expedition_summaries|" + i + u"}}\n"
            summary_text += u"<!--Insert manual updates below this line.  Manual updates may not contain equal signs-->\n"

    recent_exp_page = pywikibot.Page(enwiktsite, recent_expedition_page_name)
    page_write(recent_exp_page, summary_text, enwiktsite)
def get_all_football_games_category_pages():
    games_category = Category(site, football_games_category_name)
    games_category = list(
        pagegenerators.CategorizedPageGenerator(games_category))
    return games_category
def get_all_football_players_category_pages():
    players_category = Category(site, football_players_category_name)
    players_category_pages = list(pagegenerators.CategorizedPageGenerator(players_category))
    return players_category_pages
from pywikibot.pagegenerators import CategorizedPageGenerator
from pywikibot import Site, Category
from dataskakare import GoogleTranslate
import mwparserfromhell
import hashlib
import uuid
import json

site = Site('commons', 'commons')
cat = Category(site, 'Category:Media_contributed_by_the_Swedish_Performing_Arts_Agency:_2019-03')
translate = GoogleTranslate(input('google service account file:'))

def thumb_from_title(title):
    safe_title = title.encode('utf-8')
    md5_title = hashlib.md5(safe_title).hexdigest()

    return 'https://upload.wikimedia.org/wikipedia/commons/thumb/{}/{}/{}/500px-{}.jpg'.format(md5_title[:1], md5_title[:2], title, title)

final_pages = list()
for page in CategorizedPageGenerator(cat, recurse=False, namespaces=6):
    wikicode = mwparserfromhell.parse(page.text)

    template_to_parse = False
    for template in wikicode.filter_templates():
        if template.name.matches('Musikverket-image'):
            template_to_parse = template

    if not template_to_parse:
        print('failed to find given template')
        continue
示例#29
0
def main():
    global test_mode
    test_mode = 0
    global site
    site = pywikibot.getSite()
    global site_en
    site_en = pywikibot.getSite('en', 'wiktionary')
    global site_com
    site_com = pywikibot.getSite('commons', 'commons')
    global log_site
    log_site = pywikibot.Page(site, 'Wikipedysta:AlkamidBot/zch/log')

    cat = Category(site, 'Kategoria:japoński (indeks)')
    cat_en = Category(site_en, 'Category:Han_characters')
    lista_stron_en = pagegenerators.CategorizedPageGenerator(cat_en)
    lista_stron = pagegenerators.CategorizedPageGenerator(cat)

    tekst_przed = re.compile('(.*?)=', re.DOTALL)
    tekst_po = re.compile('.*?(=.*)', re.DOTALL)
    lista = []
    grafika = re.compile(
        '(\-bw\.|\-red\.|\-order\.|{{zch\-cienie}}|{{zch\-animacja}}|{{zch\-komiks}})'
    )
    log = ''

    #for page in lista_stron_en:
    #       if len(page.title())==1:
    #               lista.append(page)

    lista.append(pywikibot.Page(site, '九'))
    lista.append(pywikibot.Page(site, '八'))

    for a in lista:

        final = ''
        log = ''
        a_pl = pywikibot.Page(site, a.title())

        try:
            strona = a_pl.get()
        except pywikibot.IsRedirectPage:
            print('[[%s]] - przekierowanie na pl.wikt' % a_pl.title())
            log = log + '\n*[[%s]] - przekierowanie na pl.wikt' % a_pl.title()
        except pywikibot.NoPage:
            result = zch(a_pl)
            if result != 0:
                if test_mode == 1:
                    print(result + '\n\n')
                else:
                    a_pl.put(result, comment='źródło: [[:en:%s]]' % a.title())
        except pywikibot.Error:
            print('[[%s]] - błąd na en.wikt' % a_pl.title())
            log = log + '\n*[[%s]] - błąd na pl.wikt' % a_pl.title()

        else:
            tekst_przed_s = re.search(tekst_przed, a_pl.get())
            tekst_po_s = re.search(tekst_po, a_pl.get())
            '''
            grafika_s = re.search(grafika, a.get())
            if grafika_s != None:
                    print u'[[%s]] - znaleziono grafikę z CJK stroke order' % a.title()
                    log = log + u'\n*[[%s]] - znaleziono grafikę z CJK stroke order' % a.title()
            '''
            if test_mode == 1:
                sekcja_znak = 'fdss73agrefadf'
            else:
                sekcja_znak = '{{znak chiński}}'

            if sekcja_znak not in a_pl.get():
                result = zch(a_pl)

                if result != 0:

                    final = tekst_przed_s.group(
                        1) + result + '\n\n' + tekst_po_s.group(1)

                    if test_mode == 1:
                        print(final + '\n\n')
                    else:
                        a_pl.put(final,
                                 comment='źródło: [[:en:%s]]' % a.title())

        log_write(log_site, log, a_pl.title())
示例#30
0
def main():

    list = makeConversionList()
    global site_pl
    site_pl = pywikibot.getSite()
    global site_en
    site_en = pywikibot.getSite('en', 'wiktionary')
    global commons
    commons = pywikibot.getSite('commons', 'commons')
    global test_mode
    test_mode = 0
    global data_en
    data_en = '20120125'
    cat_en = Category(site_en, 'Category:Han_characters')
    lista_stron_en = pagegenerators.CategorizedPageGenerator(cat_en)

    file = open("%s/wikt/moje/log/zch.txt" % environ['HOME'], 'w')
    file.write(''.encode("utf-8"))
    file.close

    #pagesDump1 = xmlreader.XmlDump('/mnt/user-store/dumps/enwiktionary/enwiktionary-%s-pages-articles.xml' % data_en)
    #pagesDump = xmlreader.XmlDump.parse(pagesDump1)

    lista_stron_en = ['㭻']
    for elem in lista_stron_en:
        title = elem.title()
        if len(title) == 1:
            en = retrieveEnPlusCommons(title)
            pl = Haslo(title)
            sekcja = None
            if en and pl.type not in (0, 1, 2):
                try:
                    pl.listLangs
                except AttributeError:
                    log('*[[%s]] - brak listy sekcji!' % en.title)
                else:
                    for sec in pl.listLangs:
                        if sec.lang == 'znak chiński':
                            sec.pola()
                            if sec.type == 4:
                                sekcja = sec
            elif en and pl.type == 1:
                sekcja = Sekcja(title=title, type=4, lang='znak chiński')

            if sekcja:
                push = 0
                if compare(en, sekcja, 'klucz'):
                    push = 1
                if compare(en, sekcja, 'kreski'):
                    push = 1
                if compare(en, sekcja, 'kody'):
                    push = 1
                if compare(en, sekcja, 'warianty'):
                    push = 1
                if compare(en, sekcja, 'kolejnosc'):
                    push = 1
                if compare(en, sekcja, 'etymologia'):
                    push = 1
                if compare(en, sekcja, 'slowniki'):
                    push = 1
                if ordinal(sekcja):
                    push = 1

                tab = SimpTrad(title, list)
                if compare(en, sekcja, 'upr-trad', tab):
                    push = 1
                if push:
                    if pl.type == 1:
                        pl = Haslo(title, new=True)
                        pl.addSection(sekcja)
                        log('*[[%s]] - dodano' % title)

                    pl.push(
                        False,
                        myComment=
                        'aktualizacja danych o znaku chińskim; źródła: [[:en:%s]], http://simplify.codeplex.com/, commons'
                        % title,
                        new=True)

    logPage = pywikibot.Page(site_pl,
                             'Wikipedysta:AlkamidBot/listy/znak chiński')
    logPageText = 'AlkamidBot cyklicznie sprawdza, czy w angielskim Wikisłowniku lub na commons pojawiły się nowe informacje o znakach chińskich (np. warianty pisania, zapisy etymologiczne itp.). Na tej liście zapisuje problemy, jakie napotkał: ("tabela" oznacza dane z http://simplify.codeplex.com/)\n\n'
    file = codecs.open("%s/wikt/moje/log/zch.txt" % environ['HOME'], 'r',
                       'utf-8')
    logPageText += file.read()
    file.close
示例#31
0
collections = False
with open('src/static/collections.json') as json_file:
    collections = json.load(json_file)

found_collection = False
for c in collections:
    if c['generator_value'] == arg:
        found_collection = c['id']
        break

if not found_collection:
    print('Could not find a colletion for the given category')

site = Site('commons', 'commons')
cat = Category(site, 'Category:{}'.format(arg))
gen = CategorizedPageGenerator(cat, recurse=False, namespaces=6)
endpoint = 'https://commons.wikimedia.org/w/api.php?format=json&action=wbgetentities&ids='

final_translations = list()


def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]


for page in gen:

    media_id = 'M{}'.format(page.pageid)