def __init__(self, card): conn = httphelp() conn.server = "textual.ru" conn.codepage = "windows-1251" conn.scriptname = "/gvr/index.php" conn.parameters = {'card': card} self._card = card self._data = {} self._data[u"accessdate"] = datetime.date.today() for l in conn.lines(): if l.find('class="cardv"')>0: key = "" for s in re.split('<.+?>', l): if s != "": if key != "": # workarounds for different keys if key == u"Площадь водоёма" or key == u"Водосборная площадь": if s.find("0")!=0 and s.find("999")!=0: # data errors self._data[key] = unicode(s[:-9]) else: self._data[key] = "" elif key == u"Название" and s.find("(")>0: self._data[key] = unicode(s[:s.find("(")-1]) self._data[u"Названия"] = unicode(s[s.find("(")+1:s.find(")")]) elif key == u"Вытекает" and s.find(u"река")==0: if s.find("(") >0: self._data[u"Вытекает"] = unicode(s[5:s.find("(")]) else: self._data[u"Вытекает"] = unicode(s[5:]) elif key == u"Бассейновый округ" or key == u"Речной бассейн" or key == u"Речной подбассейн": if s.find("(") > 0: self._data[key] = unicode(s[:s.rfind("(")]) else: # get it self._data[key] = unicode(s) else: #first string in response is key, second is parameter key = unicode(s) if l.find(u'<td valign="top"><a href="')>=0: # get rivers from page s = u"\nВ озеро впадают: " for l in re.split( "<.+?>", l): if l == "": continue a = l.find(" ")+1 # wikification b = l.find("(")-1 if b == -2: b = len(l) s += l[:a]+u"[["+l[a:b]+u"]]"+l[b:]+", " s = s[:-2]+"." self._data[u"Реки"] = s
def __init__(self, cat): self.cat = cat self.articles = u"-" self.pages = u"-" self.redir = u"-" self.editors = u"-" self.edits = u"-" self.littleedits = u"-" if self.cat.titleWithoutNamespace()[:2] == u"Wp": self.lang = self.cat.titleWithoutNamespace()[3:].encode("utf-8") self.localname = self.lang self.rusname = u"---" else: raise IncubatorException for tl in self.cat.templatesWithParams(): if tl[0] == u"Test language": #self.localname = tl for param in tl[1]: if param.find(u"localname") > 0: self.localname = param[param.find(u"=")+1:].strip() break self.conn = httphelp() self.conn.server = "toolserver.org" self.conn.scriptname = "/~pathoschild/catanalysis/index.php?title=Wp/%s&wiki=incubatorwiki_p&cat=0" % self.lang self.conn.parameters = {} self.conn.codepage = "utf-8" self.lines = self.conn.lines("GET") for l in self.lines: # i f*****g LOVE unicode m = re.search(u'<li>(\d*) articles', l) if m != None: self.pages = m.group(1).encode("utf-8") m = re.search(u'<li>(\d*) editors', l) if m != None: self.editors = m.group(1).encode("utf-8") m = re.search(u'<li>(\d*) revisions', l) if m != None: self.edits = m.group(1).encode("utf-8") m = re.search(u'including (\d*) minor', l) if m != None: self.littleedits = m.group(1).encode("utf-8")
def __init__(self, bo="", rb="", subb="", hep="", wot="", name="", num="", loc="", start=0): if bo == "": self._data = [] return # http://textual.ru/gvr/index.php?bo=1&rb=68&subb=0&hep=0&wot=11&name=&num=&loc=&s=%CF%EE%E8%F1%EA conn = httphelp() conn.server = "textual.ru" conn.codepage = "windows-1251" conn.scriptname = "/gvr/index.php" conn.parameters = {'bo': bo, "rb":rb, "subb":subb, "hep":hep, "wot": wot, "name":name, "num":num, "loc":loc, "start":start} self._data = [] self._pointer = -1 for l in conn.lines(): b = l.find('/gvr/index.php?card=') if b > 0: self._data += [l[b+20:l.find('&', b)]] if l.find(u'следующая страница результатов')>1: # results divided into pages. self._data += GVRList(bo, rb, subb, hep, wot, name, num, loc, start+200).get_data()
def __init__(self, basecat, tagcat, depth='12', \ lang='ru', project='wikipedia'): self.articles = [] if type(tagcat) == type(""): catlist = [tagcat] else: catlist = tagcat for tag in catlist: # Переделать, этот скрипт понимает несколько категорий tag = tag.encode('utf-8', 'ignore') conn = httphelp() conn.server = "tools.wmflabs.org" conn.scriptname = "/catscan2/quick_intersection.php" conn.parameters = {'cats': '%s\r\n%s' % (basecat, tag), \ 'depth': depth, \ 'format': 'wiki', \ 'lang': lang, \ 'max': 30000, \ 'ns': '0', \ 'project': project, \ 'start': '0', \ 'redirects' : ''} conn.codepage = "utf-8" lines = conn.lines(method="GET") while len(lines) != 0: data = lines.pop(0) # this deletes item. if data.find(u"Database Error") > 0: # check for db error # do i need this? raise CategoryIntersectException(u"Database error") if (data.find("[[") == -1) or (data.find("]]") == -1) \ or (data.find("Category") > 1): continue d = data[data.find("[[")+2:data.find("]]")] d = d[:d.find("|")] if not d in self.articles: self.articles += [d] self.articles.sort()