def __init__(self): xbmc.log("__init__") # Get the plugin url in plugin:// notation. self._url = sys.argv[0] # Get the plugin handle as an integer number. self._handle = int(sys.argv[1]) self.cache = simplecache.SimpleCache() self.addon = xbmcaddon.Addon() self.addonRootPath = self.addon.getAddonInfo("path").decode("utf-8") self.dataPath = xbmc.translatePath(xbmcaddon.Addon().getAddonInfo( ("profile"))).decode("utf-8") # settings self.username = self.addon.getSetting("filelist.user") self.passkey = self.addon.getSetting("filelist.passphrase") self.watchDir = self.addon.getSetting("saveTorrentFolder") self.torrentAction = self.addon.getSetting("torrentAction") self.tmdb_api_key = self.addon.getSetting("tmdb_api_key") self.metaDataAvailable = False self.pageSize = 10 if not self.tmdb_api_key: self.tmdb_api_key = base64.urlsafe_b64decode( "NjI4YTFhNDAxZThiZDg1ZDFlZTc2OTA4MWUwZjFmYzE=") if self.tmdb_api_key: from movieinfo import MovieInfoProvider self.movieInfoProvider = MovieInfoProvider(self.tmdb_api_key) self.metaDataAvailable = True self.FLTorrentProvider = flprovider.FLTorrentProvider( self.username, self.passkey) self.categories = categories.Categories(self.addonRootPath)
def getMembers(): cats = params[params['subtype']]['categories'] member_list = [] for cat in cats: memcat = categories.Categories(cat, namespace = params['main namespace']) #not sure if namespace is optional in categories? cat_list = memcat.getCatMembers() member_list.extend(cat_list) member_list = tools.dedupeMemberList(member_list, 'timestamp', 'page id') for member in member_list: member['title'] = tools.titleFromPath(member['page path']) return member_list
def getGrantReports(): category = categories.Categories( 'Reports_for_WMF_grants_funded_in_FY_2011-12') member_list = category.getCatMembers() return member_list
tmp_datas = [] dataOutput = DataOutput() for i in range(self.searchUrlsManger.get_urls_num()): tmp_searchUrl = self.searchUrlsManger.get_new_url() tmp_content = htmlDownloader.download(tmp_searchUrl) tmp_data = self.htmlParser.parse(tmp_content, i) tmp_datas.extend(tmp_data) dataOutput.output_excel(tmp_datas, path) def runSpider(root_url, category_Nums, categories_Name, path): obj_spider = SpyderMain(root_url, category_Nums, categories_Name, path) if __name__ == '__main__': categories = cateG.Categories().getcategories() root_url = r"https://zhidao.baidu.com/search?" # 每个类别需要爬去的回答数 threads = [] paths = [] category_Nums = 200 # cate="父母体检" # path="D:\\Works\\datas\\"+cate+"百度知道体检知识.xlsx" # runSpider(root_url,category_Nums,"父母体检",path) for cate in categories: path = "D:\\Works\\datas\\outputFiles\\" + cate + "百度知道体检知识.xlsx" paths.append(path) t = threading.Thread(target=runSpider, args=(root_url, category_Nums, cate, path)) threads.append(t)
def getPages(): category = categories.Categories("IEG_2013_round_2", 200) #namespace redundancy member_list = category.getCatMembers() print member_list