Exemplo n.º 1
0
 def __parseTweet(self, tweet):
     for url in tweet.urls():
         if url.isError():
             logger.info(u"Tweet bad: wrong url: " + unicode(tweet) + u" " + unicode(url))
             self.__urlBuilder.delete(url)
             break
         url.setDocumentClasses(TxtClassificatorWrapper.instance().classify(url.getText()))
         if url.isRoot() or url.lang() != "en" or "short" in url.documentClasses():
             logger.info(u"Tweet bad: " + unicode(tweet) + u" " + unicode(url))
             self.__urlBuilder.delete(url)
             break
         logger.info(u"Tweet good: " + unicode(tweet) + u" " + unicode(url))
         logger.info(u"URL: " + unicode(url))
         self.__model.updateUrl(url)
Exemplo n.º 2
0
 def _doSmthElse(self):
     if self.__refreshGui.isSet():
         logger.info("Send data to GUI")
         self.__refreshGui.clear()
         data = {}
         data["urls"] = self.__tweetResolvedListener.finalUrls()
         Publisher.sendMessage("update.urls", data=data)
     if self.__showProbDist.isSet():
         url = self.__probDistUrl
         self.__showProbDist.clear()
         self.__probDistUrl = None
         probDistI = TxtClassificatorWrapper.instance().probDist(url.getText())
     if self.__refreshStatusBar.isSet():
         self.__refreshStatusBar.clear()
         data = {}
         data["cache"] = self.__urlResolver.cacheHitRate()
         data["position"] = self.__iter.position()
         data["position_end"] = self.__iter.count()
         data["current_file_c"] = self.__iter.currentFile()
         data["last_file_c"] = self.__iter.filesCount()
         Publisher.sendMessage("update.statusBar", data=data)
Exemplo n.º 3
0
 def _classifier(self):
     return TxtClassificatorWrapper.instance()