Пример #1
0
class MyStreamer(TwythonStreamer):
    def __init__(self, *args, **kwargs):
        TwythonStreamer.__init__(self, *args, **kwargs)
        with open("stopwords.txt") as f:
            self.stopwords = set(map(lambda x: x.strip(), f.readlines()))
        self.trendis = Trendis(namespace="twitter")

    def on_success(self, data):
        if "text" in data:
            tweet = data["text"].encode("utf-8")
            tokens = [
                (word, self.__weight(word))
                for word in map(lambda x: x.lower(), tweet.split())
                if len(word) > 4 and (word.startswith("#") or word.isalnum()) and word not in self.stopwords
            ]
            if tokens:
                self.trendis.insert(*tokens)

    def on_error(self, status_code, data):
        print status_code

    def __weight(self, word):
        weight = 1
        if word.endswith("ing"):
            return 0
        if re.search(r"((\w)\2{2,})", word):
            # get rid of hellooooooo's
            return 0
        if len(word) > 20:
            return 0
        if word.startswith("#"):
            weight += 2
        if 8 <= len(word) <= 20:
            weight += 2
        return weight
Пример #2
0
 def __init__(self, *args, **kwargs):
     TwythonStreamer.__init__(self, *args, **kwargs)
     with open("stopwords.txt") as f:
         self.stopwords = set(map(lambda x: x.strip(), f.readlines()))
     self.trendis = Trendis(namespace="twitter")