Пример #1
0
    def __init__(self, card):
        conn = httphelp()
        conn.server     = "textual.ru"
        conn.codepage   = "windows-1251" 
        conn.scriptname = "/gvr/index.php"
        conn.parameters = {'card': card}    
    
        self._card = card
        self._data = {}
        self._data[u"accessdate"] = datetime.date.today()

        for l in conn.lines():
            if l.find('class="cardv"')>0:
                key = ""
                for s in re.split('<.+?>', l):
                    if s != "":
                        if key != "":
                            # workarounds for different keys
                            if key == u"Площадь водоёма" or key == u"Водосборная площадь":
                                if s.find("0")!=0 and s.find("999")!=0:
                                    # data errors
                                    self._data[key] = unicode(s[:-9])                                
                                else:
                                    self._data[key] = ""
                            elif key == u"Название" and s.find("(")>0:
                                self._data[key] = unicode(s[:s.find("(")-1])
                                self._data[u"Названия"] = unicode(s[s.find("(")+1:s.find(")")])
                            elif key == u"Вытекает" and s.find(u"река")==0:
                                if s.find("(") >0:
                                    self._data[u"Вытекает"] = unicode(s[5:s.find("(")])
                                else:
                                    self._data[u"Вытекает"] = unicode(s[5:])
                            elif key == u"Бассейновый округ" or key == u"Речной бассейн" or key == u"Речной подбассейн":
                                if s.find("(") > 0:
                                    self._data[key] = unicode(s[:s.rfind("(")])
                            else: # get it 
                                self._data[key] = unicode(s)
                        else: #first string in response is key, second is parameter
                            key = unicode(s)
            if l.find(u'<td valign="top"><a href="')>=0:
                # get rivers from page
                s = u"\nВ озеро впадают: "
                for l in re.split( "<.+?>", l):
                    if l == "":
                        continue
                    a = l.find(" ")+1 # wikification
                    b = l.find("(")-1
                    if b == -2:
                        b = len(l)
                    s += l[:a]+u"[["+l[a:b]+u"]]"+l[b:]+", "
                    s = s[:-2]+"."
                    self._data[u"Реки"] = s
Пример #2
0
 def __init__(self, cat):
     self.cat = cat
     self.articles = u"-"
     self.pages = u"-"
     self.redir = u"-"
     self.editors = u"-"
     self.edits = u"-"
     self.littleedits = u"-"
     
     if self.cat.titleWithoutNamespace()[:2] == u"Wp":
         self.lang = self.cat.titleWithoutNamespace()[3:].encode("utf-8")
         self.localname = self.lang
         self.rusname = u"---"
     else:
         raise IncubatorException
     for tl in self.cat.templatesWithParams():
         if tl[0] == u"Test language":
             #self.localname = tl
             for param in tl[1]:
                 if param.find(u"localname") > 0:
                     self.localname =  param[param.find(u"=")+1:].strip()
                     break
     self.conn = httphelp()
     self.conn.server     = "toolserver.org"
     self.conn.scriptname = "/~pathoschild/catanalysis/index.php?title=Wp/%s&wiki=incubatorwiki_p&cat=0" % self.lang
     self.conn.parameters = {}
     self.conn.codepage = "utf-8"
     self.lines = self.conn.lines("GET")
     for l in self.lines:
         # i f*****g LOVE unicode
         m = re.search(u'<li>(\d*) articles', l)
         if m != None:
             self.pages = m.group(1).encode("utf-8")
         m = re.search(u'<li>(\d*) editors', l)
         if m != None:
             self.editors = m.group(1).encode("utf-8")
         m = re.search(u'<li>(\d*) revisions', l)
         if m != None:
             self.edits = m.group(1).encode("utf-8")
         m = re.search(u'including (\d*) minor', l)
         if m != None:
             self.littleedits = m.group(1).encode("utf-8")
Пример #3
0
    def __init__(self, bo="", rb="", subb="", hep="", wot="", name="", num="", loc="", start=0):
        if bo == "":
            self._data = []
            return
        # http://textual.ru/gvr/index.php?bo=1&rb=68&subb=0&hep=0&wot=11&name=&num=&loc=&s=%CF%EE%E8%F1%EA
        conn = httphelp()
        conn.server     = "textual.ru"
        conn.codepage   = "windows-1251"
        conn.scriptname = "/gvr/index.php"
        conn.parameters = {'bo': bo, "rb":rb, "subb":subb, "hep":hep, "wot": wot, "name":name, "num":num, "loc":loc, "start":start}

        self._data = []
        self._pointer = -1
        
        for l in conn.lines():
            b = l.find('/gvr/index.php?card=')
            if b > 0:
                self._data += [l[b+20:l.find('&', b)]]
            if l.find(u'следующая страница результатов')>1: # results divided into pages.
                self._data += GVRList(bo, rb, subb, hep, wot, name, num, loc, start+200).get_data()
Пример #4
0
 def __init__(self, basecat, tagcat, depth='12', \
                 lang='ru', project='wikipedia'):
     self.articles = []
     if type(tagcat) == type(""):
         catlist = [tagcat]
     else:
         catlist = tagcat
     for tag in catlist: # Переделать, этот скрипт понимает несколько категорий
         tag = tag.encode('utf-8', 'ignore')
         conn = httphelp()
         conn.server = "tools.wmflabs.org"
         conn.scriptname = "/catscan2/quick_intersection.php"
         conn.parameters = {'cats': '%s\r\n%s' % (basecat, tag),  \
                            'depth': depth,                       \
                            'format': 'wiki',                     \
                            'lang': lang,                         \
                            'max': 30000,                         \
                            'ns': '0',                            \
                            'project': project,                   \
                            'start': '0',                         \
                            'redirects' : ''}
         conn.codepage = "utf-8"
         lines = conn.lines(method="GET")
         while len(lines) != 0:
             data = lines.pop(0) # this deletes item.
             if data.find(u"Database Error") > 0: # check for db error
                 # do i need this?
                 raise CategoryIntersectException(u"Database error")
             if (data.find("[[") == -1) or (data.find("]]") == -1) \
                 or (data.find("Category") > 1):
                 continue
             d = data[data.find("[[")+2:data.find("]]")]
             d = d[:d.find("|")]
             if not d in self.articles:
                 self.articles += [d]
     self.articles.sort()