def run(self): if self.flags["launch"]: return self.launch() if self.flags["dbrepair"]: return self.dbrepair() while True: while True: print "---------FETCHING WIKIPEDIA PAGE------------" scraper = wk.WikiRevisionScrape( title=self.params["title"], pageid=self.params["pageid"], domain=self.params["domain"], scrapemin=self.params["scrapemin"], ) if scraper.scrape(): pageid = scraper.getPageID() title = scraper.getTitle() domain = scraper.getDomain() break elif self.params["domain"]: return -1 ##if you asked but didnt get. terminate ##instead of trying again print print "-----------------ANALYSING------------------" analyser = WikiAnalysis(title, pageid, domain) results = analyser.analyse() if not results: return -1 if self.flags["plot"]: print print "--------------------PLOT--------------------" import wikiDataPlot as dpl plotter = dpl.WikiDataPlot( os.path.abspath(self.params["plotpath"]) if self.params["plotpath"] else None ) plotted = plotter.plot(title, pageid, domain) print len(plotted), "plotted" revidLog(title, pageid, domain) if not self.flags["trundle"]: break return 0
def run(self): if self.flags['launch']: return self.launch() if self.flags['dbrepair']: return self.dbrepair() while True: while True: print "---------FETCHING WIKIPEDIA PAGE------------" scraper = wk.WikiRevisionScrape( title=self.params['title'], pageid=self.params['pageid'], domain=self.params['domain'], scrapemin=self.params['scrapemin']) if scraper.scrape(): pageid = scraper.getPageID() title = scraper.getTitle() domain = scraper.getDomain() break elif (self.params['domain']): return -1 ##if you asked but didnt get. terminate ##instead of trying again print print "-----------------ANALYSING------------------" analyser = WikiAnalysis(title, pageid, domain) results = analyser.analyse() if not results: return -1 if self.flags['plot']: print print "--------------------PLOT--------------------" import wikiDataPlot as dpl plotter = dpl.WikiDataPlot( os.path.abspath(self.params['plotpath']) if self. params['plotpath'] else None) plotted = plotter.plot(title, pageid, domain) print len(plotted), "plotted" revidLog(title, pageid, domain) if not self.flags['trundle']: break return 0
def dbrepair(self, delete=False, clear=False): import database as db dtb = db.WikiDatabase() fetch = dtb.getallfetched() delete = True if delete: print "cleaning incomplete entries from the database" if clear: dtb.empty() else: dtb.cleanup() return 0 else: piddoms = dtb.getallscraped() print "Checking", len(piddoms), "pageids for complete details" for t in piddoms: scraper = wk.WikiRevisionScrape( pageid=self.params["pageid"], title=self.params["title"], domain=self.params["domain"], scrapemin=0 ) if scraper.scrape(): pageid = scraper.getPageID() title = scraper.getTitle() domain = scraper.getDomain() else: continue print "Checking", len(fetch), "fetched entries for analyses" for f in fetch: analyser = WikiAnalysis(*f) results = analyser.analyse() if not results: return -1 return 0
def dbrepair(self, delete=False, clear=False): import database as db dtb = db.WikiDatabase() fetch = dtb.getallfetched() delete = True if delete: print "cleaning incomplete entries from the database" if clear: dtb.empty() else: dtb.cleanup() return 0 else: piddoms = dtb.getallscraped() print "Checking", len(piddoms), "pageids for complete details" for t in piddoms: scraper = wk.WikiRevisionScrape(pageid=self.params['pageid'], title=self.params['title'], domain=self.params['domain'], scrapemin=0) if scraper.scrape(): pageid = scraper.getPageID() title = scraper.getTitle() domain = scraper.getDomain() else: continue print "Checking", len(fetch), "fetched entries for analyses" for f in fetch: analyser = WikiAnalysis(*f) results = analyser.analyse() if not results: return -1 return 0