def SearchCategorymember(self,categoryname): try: self.subcategoryresultlist.DeleteAllItems(); self.pageresultlist.DeleteAllItems(); wikiextractor = Extractor.wikiextractor() query = configuration.api_url_zh +'&list=categorymembers&cmtitle=Category:%s&cmsort=timestamp&' \ 'cmdir=desc&cmlimit=max' % categoryname json_content = wikiextractor.getjson(query) members = json_content['query']['categorymembers'] for member in members: #TODO:如果没有Category属性,无法判断是否为子类(?) pageid = str(member['pageid']) if 'Category:' in member['title']: subcategory = member['title'].lstrip('Category:') index = self.subcategoryresultlist.InsertStringItem(sys.maxint, subcategory) self.subcategoryresultlist.SetStringItem(index, 0, subcategory) else: page = member['title'] #TODO:待完善 # 说明不是有效的page if ':' in page: continue index = self.pageresultlist.InsertStringItem(sys.maxint, pageid) self.pageresultlist.SetStringItem(index, 0, pageid) self.pageresultlist.SetStringItem(index, 1, page) except Exception,e: self.statusbar.SetStatusText(e.message,0)
def OnGeoExtract(self,evt): try: wikiextractor = Extractor.wikiextractor() data_dict={} members=self.GetGeoList() wikiextractor.get_data_dict_from_pageid(members, data_dict,'f') filewriter=FileWriter.filewriter() filewriter.SaveToSQLite(data_dict) #filewriter.SaveToExcel(data_dict) self.statusbar.SetStatusText("保存成功,请检查excel文件",0) except Exception,e: self.statusbar.SetStatusText(e.message,0)
def OnExtract(self,evt): try: categoryname = self.categoryname.GetValue() wikiextractor = Extractor.wikiextractor() data_dict = {} if self.extractsubcategoryck.Get3StateValue() == wx.CHK_CHECKED: wikiextractor.parse_members(categoryname, data_dict,'t') else: wikiextractor.parse_members(categoryname, data_dict,'f') filewriter=FileWriter.filewriter() #filewriter.SaveToSQLite(data_dict) filewriter.SaveToExcel(data_dict) self.statusbar.SetStatusText(u"保存成功,请检查excel文件",0) except Exception,e: self.statusbar.SetStatusText(e.message,0)
def SearchbyPrex(self,prex): try: self.subcategoryresultlist.DeleteAllItems(); wikiextractor = Extractor.wikiextractor() query = configuration.api_url_zh + '&list=allcategories&acprefix=%s'% prex json_content = wikiextractor.getjson(query) members = json_content['query']['allcategories'] for member in members: #TODO:如果没有Category属性,无法判断是否为子类(?) category = member['*'] index = self.subcategoryresultlist.InsertStringItem(sys.maxint, category) self.subcategoryresultlist.SetStringItem(index, 0, category) except Exception,e: self.statusbar.SetStatusText(e.message,0)
def SearchbyGeo(self, lat, lon,primay): try: self.geopageresultlist.DeleteAllItems(); sourcelist = self.GetSourceList() wikiextractor = Extractor.wikiextractor() query ="" queries = {} pagedatalist=[] for source in sourcelist: lastcount =-1 geopagelist = [] while(len(geopagelist)<100): while True: if primay: query = source + '&list=geosearch&gscoord=%s|%s&gsradius=10000&gsglobe=earth&gsnamespace=0&gslimit=500&gsprop=dim&gsprimary=primary' %(lat,lon) else: query = source + '&list=geosearch&gscoord=%s|%s&gsradius=10000&gsglobe=earth&gsnamespace=0&gslimit=500&gsprop=dim&gsprimary=all' %(lat,lon) json_content = wikiextractor.getjson(query) queries = json_content['query'] nowcount =len(geopagelist) if('geosearch' in queries.keys() and len(queries['geosearch'])>0): break; lat+=0.003; lon+=0.003; if(nowcount==lastcount): break; lastcount=len(geopagelist) pages = queries['geosearch'] for page in pages: try: #TODO:如果没有Category属性,无法判断是否为子类(?) lat = page['lat'] lon = page['lon'] pageid = page['pageid'] strpageid=str(pageid).decode('utf-8') title = page['title'].decode('utf8') strlat = str(page['lat']).decode('utf-8') strlon = str(page['lat']).decode('utf-8') strdim=str(page['dim']).decode('utf-8') if(pageid in self.geopagelist): continue else: geopagelist.append(pageid) #测试写入查询结果信息 pagedata = {} pagedata[u'文章ID']= pageid pagedata[u'标题']= title pagedata[u'经度']= lon pagedata[u'纬度']= lat pagedata[u'大小']= strdim pagedatalist.append(pagedata); index = self.geopageresultlist.InsertStringItem(sys.maxint, strpageid) self.geopageresultlist.SetStringItem(index, 0, strpageid) self.geopageresultlist.SetStringItem(index, 1, title) self.geopageresultlist.SetStringItem(index, 2, strlat) self.geopageresultlist.SetStringItem(index, 3, strlon) self.geopageresultlist.SetStringItem(index, 4, strdim) except Exception,e: continue #最后存储查询结果 self.resultdict[u'查询结果']=pagedatalist