/
remove_i3rab.py
114 lines (103 loc) · 3.91 KB
/
remove_i3rab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import blib, pywikibot
from blib import msg, getparam, addparam
from arabiclib import *
site = pywikibot.Site()
verbose = True
def remove_i3rab(page, index, entry, word, nowarn=False):
def mymsg(text):
if not nowarn:
msg("Page %s %s: Entry %s: %s" % (index, page, entry, text))
word = reorder_shadda(word)
if word.endswith(UN):
mymsg("Removing i3rab (UN) from %s" % word)
return re.sub(UN + "$", "", word)
if word.endswith(U):
mymsg("Removing i3rab (U) from %s" % word)
return re.sub(U + "$", "", word)
if word.endswith(UUNA):
mymsg("Removing i3rab (UUNA -> UUN) from %s" % word)
return re.sub(UUNA + "$", UUN, word)
if word and word[-1] in [A, I, U, AN]:
mymsg("FIXME: Strange diacritic at end of %s" % word)
if word and word[0] == ALIF_WASLA:
mymsg("Changing alif wasla to plain alif for %s" % word)
word = ALIF + word[1:]
return word
def do_nouns(poses, headtempls, save, startFrom, upTo):
def do_one_page_noun(page, index, text):
pagename = page.title()
nouncount = 0
nounids = []
for template in text.filter_templates():
if template.name in headtempls:
nouncount += 1
params_done = []
entry = getparam(template, "1")
for param in template.params:
value = param.value
newvalue = remove_i3rab(pagename, index, entry, str(value))
if newvalue != value:
param.value = newvalue
params_done.append(str(param.name))
if params_done:
nounids.append("#%s %s %s (%s)" %
(nouncount, template.name, entry, ", ".join(params_done)))
return text, "Remove i3rab from params in %s" % (
'; '.join(nounids))
for pos in poses:
for index, page in blib.cat_articles("Arabic %ss" % pos.lower(), startFrom, upTo):
blib.do_edit(page, index, do_one_page_noun, save=save, verbose=verbose)
def do_verbs(save, startFrom, upTo):
def do_one_page_verb(page, index, text):
pagename = page.title()
verbcount = 0
verbids = []
for template in text.filter_templates():
if template.name == "ar-conj":
verbcount += 1
vnvalue = getparam(template, "vn")
uncertain = False
if vnvalue.endswith("?"):
vnvalue = vnvalue[:-1]
msg("Page %s %s: Verbal noun(s) identified as uncertain" % (
index, pagename))
uncertain = True
if not vnvalue:
continue
vns = re.split("[,،]", vnvalue)
form = getparam(template, "1")
verbid = "#%s form %s" % (verbcount, form)
if re.match("^[1I](-|$)", form):
verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3"))
no_i3rab_vns = []
for vn in vns:
no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn))
newvn = ",".join(no_i3rab_vns)
if uncertain:
newvn += "?"
if newvn != vnvalue:
msg("Page %s %s: Verb %s, replacing %s with %s" % (
index, pagename, verbid, vnvalue, newvn))
addparam(template, "vn", newvn)
verbids.append(verbid)
return text, "Remove i3rab from verbal nouns for verb(s) %s" % (
', '.join(verbids))
for index, page in blib.cat_articles("Arabic verbs", startFrom, upTo):
blib.do_edit(page, index, do_one_page_verb, save=save, verbose=verbose)
pa = blib.create_argparser("Remove i3rab")
pa.add_argument("--verb", action='store_true',
help="Do verbal nouns in verbs")
pa.add_argument("--noun", action='store_true',
help="Do arguments in nouns")
params = pa.parse_args()
startFrom, upTo = blib.parse_start_end(params.start, params.end)
if params.noun:
do_nouns(["noun", "adjective"],
["ar-noun", "ar-coll-noun", "ar-sing-noun", "ar-nisba", "ar-noun-nisba",
"ar-adj", "ar-numeral"],
params.save, startFrom, upTo)
if params.verb:
do_verbs(params.save, startFrom, upTo)