Exemplo n.º 1
0
def build_graph():
    pair_list = TwitterUser.get_top_100_pair()
    DG = nx.DiGraph()
    DG.add_edges_from([(foer, twitter_user) for twitter_user, foer in
        pair_list])
    betweenness = nx.betweenness_centrality(DG)
    closeness = nx.closeness_centrality(DG)
    edge_betweenness = nx.edge_betweenness(DG)
    clustering_co = nx.clustering(nx.Graph(DG))
    page_rank = nx.pagerank(DG)
    for twitter_id in DG.nodes():
        t = TwitterUser.get_by_id(twitter_id)
        node = DG.node[twitter_id]
        node['user_id'] = t.user_id
        node['label'] = t.scrn_name
        node['follower_count'] = t.foer_cnt
        node['friend_count'] = t.friend_cnt
        node['status_count'] = t.status_cnt
        node['location'] = t.location
        node['verified'] = t.verified
        node['twitter_age'] = (date.today() - t.created_at).days
        node['daily_tweet'] = t.status_cnt*1.0/node['twitter_age']
        node['indegree'] = len([(id, foer) for id, foer 
            in pair_list if id == twitter_id])
        node['outdegree'] = len([(id, foer) for id, foer 
            in pair_list if foer == twitter_id])
        node['cluster'] = clustering_co[twitter_id]
        node['betweenness'] = betweenness[twitter_id]
        node['closeness'] = closeness[twitter_id]
        node['page_rank'] = page_rank[twitter_id]
    for out_n, in_n in DG.edges():
        DG[out_n][in_n]['edge_betweenness'] = edge_betweenness[(out_n,in_n)]

    return DG
Exemplo n.º 2
0
def build_graph_encoded():
    pair_list = TwitterUser.get_top_100_pair()
    print
    DG = nx.DiGraph()
    DG.add_edges_from(pair_list)
    for twitter_id in DG.nodes():
        t = TwitterUser.get_by_id(twitter_id)
        node = DG.node[twitter_id]
        node['twitter_id'] = t.user_id
        node['label'] = t.scrn_name.encode('utf-8')
        node['screen_name'] = t.scrn_name.encode('utf-8')
        node['name'] = t.name.encode('utf-8')
        node['follower_count'] = t.foer_cnt
        node['friend_count'] = t.friend_cnt
        node['status_count'] = t.status_cnt
        node['description']  = t.desc.encode('utf-8')
        node['location'] = t.location.encode('utf-8')
        node['created_at'] = str(t.created_at)
        node['verified'] = t.verified
        node['twitter_age'] = (date.today() - t.created_at).days
        node['daily_tweet'] = t.status_cnt*1.0/node['twitter_age']
        node['follower_count_top100'] = len([(id, foer) for id, foer 
            in pair_list if id == twitter_id])
        node['friend_count_top100'] = len([(id, foer) for id, foer 
            in pair_list if foer == twitter_id])

    return DG
Exemplo n.º 3
0
def save_user_followers(user):
    try:
        c = Cursor(api.followers,user.user_id)
    except TweepError:
        print "tweep breaks!"
        print TweepError.message
    while(True):
        try:
            print 'taking a rest before move to next page'
            sleep(10)
            page = c.pages().next()
            print "start a new page of user ", user.scrn_name, \
                'page', c.pages().count
        except TweepError:
            print "tweep breaks!"
            print TweepError.message
            continue
        except StopIteration:
            print "Move to next unscanned"
            break
        
        for tweepy_user in page:
            print "follower -----", tweepy_user.screen_name, "----- found......"
            if TwitterUser.get_by_id(tweepy_user.id) or \
                is_in_no_chn(tweepy_user.id):
                print 'ALREADY in DB!!, skip'
                continue
            try:
                if not tweepy_user.protected or \
                        (tweepy_user.protected and tweepy_user.following):
                        if is_chn(tweepy_user):
                            print "and speaks Chinese! Saving...."
                            TwitterUser.save_tweepy_user(tweepy_user)
                        else:
                            save_non_chn(tweepy_user.id)
                            print "pitty, s/he is not Chinese Speaker, next..."
                            continue
            except TweepError:
                print "tweep breaks!"
                print TweepError.message
            try:
                print "the remaining hit is ", \
                    api.rate_limit_status()['remaining_hits']
            except TweepError:
                print "tweep breaks!"
                print TweepError.message
        page =[]
    user.update_scanned()
Exemplo n.º 4
0
def get_relation(top_list):
    top_list.reverse()

    for twitter_id in top_list:
        print twitter_id, 'analyzing.....'
        sleep(3)
        print 'getting followers id...'
        twitter_user = TwitterUser.get_by_id(twitter_id)
        try:
            tweepy_obj = twitter_user.tweepy_obj
            foer_ids = get_follower_ids(tweepy_obj)
            api.rate_limit_status()['remaining_hits']
            top_100_foer = list(set(foer_ids).intersection(TOP_100))
            print 'saving relation....'
            for id in top_100_foer:
                TwitterUser.save_relationship(twitter_id, id)
        except TweepError:
            print "tweep breaks!"
            print TweepError.message