Exemplo n.º 1
0
 def __init__(self, wordlist):
     self.formatter = PLDAFormatter(wordlist)
     self.parser = Parser()
Exemplo n.º 2
0
 def __init__(self, wordlist):
     self.formatter = PLDAFormatter(wordlist)
     self.parser = Parser()
Exemplo n.º 3
0
class UserModelFactory(object):
    """
    ユーザモデルを生成するクラス
    prefixがloadnのときはtwitterから読み込む
    getのときは基本的に内部だけで処理できる
    """
    
    def __init__(self, wordlist):
        self.formatter = PLDAFormatter(wordlist)
        self.parser = Parser()
    
    def get_user_model(self, user):
        """ユーザーモデルの生成"""
        self.log = open("log_"+user+".json", "w")
        lists = self.load_lists(user)
        tweets = []
        profiles = []
        for lst in lists:
            list_id = lst["id"]
            #tweets.extend(self.load_list_timeline(list_id))
            profiles.extend(self.load_member_profile(list_id))
        profiles_ = []
        for profile in profiles:
            profiles_.extend(self.parser.parse(profile))
        #tweets_ = [self.parser.parse(tweet) for tweet in tweets]
        #profiles_.extend(tweets)
        bag_of_words = self.formatter.format(profiles_)
        return {
            "bag_of_words": bag_of_words,
            "topic": self.get_topic(bag_of_words)
        }
    
    def get_topic(self, bag_of_words):
        return get_topic_from_server(bag_of_words)
    
    def load_lists(self, user):
        """twitterからlistを読み込む"""
        next_cursor = -1
        lists = []
        while next_cursor:
            try:
                r = api.lists__memberships(screen_name=user, cursor=next_cursor)
                lists.extend(r['lists'])
                next_cursor = r["next_cursor"]
            except:
                pass
        return lists
    
    def load_list_timeline(self, list_id):
        """listのタイムラインを読み込む"""
        tweets = []
        for i in range(1,3):
            try:
                r = api.lists__statuses(list_id=list_id,
                                        page=str(i),
                                        per_page=200)
                tweets.extend([tweet["text"] for tweet in r if len(tweet) > 30])
            except:
                pass
        return tweets
    
    def load_member_profile(self, list_id):
        """list内のすべてのユーザーのプロフィールを取得"""
        next_cursor = -1
        profiles = []
        i = 0
        while next_cursor and i < 3:
            try:
                r = api.lists__members(list_id=list_id, cursor=next_cursor)
                next_cursor = r["next_cursor"]
                #log
                self.log.write(json.dumps(r)+"\n")
                self.log.flush()
                profiles.extend([user["description"] for user in r["users"]])
            except:
                pass
            i += 1
        return profiles
Exemplo n.º 4
0
class UserModelFactory(object):
    """
    ユーザモデルを生成するクラス
    prefixがloadnのときはtwitterから読み込む
    getのときは基本的に内部だけで処理できる
    """

    def __init__(self, wordlist):
        self.formatter = PLDAFormatter(wordlist)
        self.parser = Parser()

    def get_user_model(self, user):
        """ユーザーモデルの生成"""
        self.log = open("log_" + user + ".json", "w")
        lists = self.load_lists(user)
        tweets = []
        profiles = []
        for lst in lists:
            list_id = lst["id"]
            # tweets.extend(self.load_list_timeline(list_id))
            profiles.extend(self.load_member_profile(list_id))
        profiles_ = []
        for profile in profiles:
            profiles_.extend(self.parser.parse(profile))
        # tweets_ = [self.parser.parse(tweet) for tweet in tweets]
        # profiles_.extend(tweets)
        bag_of_words = self.formatter.format(profiles_)
        return {"bag_of_words": bag_of_words, "topic": self.get_topic(bag_of_words)}

    def get_topic(self, bag_of_words):
        return get_topic_from_server(bag_of_words)

    def load_lists(self, user):
        """twitterからlistを読み込む"""
        next_cursor = -1
        lists = []
        while next_cursor:
            try:
                r = api.lists__memberships(screen_name=user, cursor=next_cursor)
                lists.extend(r["lists"])
                next_cursor = r["next_cursor"]
            except:
                pass
        return lists

    def load_list_timeline(self, list_id):
        """listのタイムラインを読み込む"""
        tweets = []
        for i in range(1, 3):
            try:
                r = api.lists__statuses(list_id=list_id, page=str(i), per_page=200)
                tweets.extend([tweet["text"] for tweet in r if len(tweet) > 30])
            except:
                pass
        return tweets

    def load_member_profile(self, list_id):
        """list内のすべてのユーザーのプロフィールを取得"""
        next_cursor = -1
        profiles = []
        i = 0
        while next_cursor and i < 3:
            try:
                r = api.lists__members(list_id=list_id, cursor=next_cursor)
                next_cursor = r["next_cursor"]
                # log
                self.log.write(json.dumps(r) + "\n")
                self.log.flush()
                profiles.extend([user["description"] for user in r["users"]])
            except:
                pass
            i += 1
        return profiles