Python search示例，recipe__search.search Python示例

示例#1

0

显示文件

文件： recipe__analyze_users_in_search_results.py 项目： 4sp1r3/Recipes-for-Mining-Twitter

def analyze_users_in_search_results(t, q, max_batches=5, count=100):

    # Search for something

    statuses = search(t, q=q, max_batches=max_batches, count=count)

    # Extract the screen names from the results
    # and optionally map them to a useful field like the tweet id

    screen_name_to_tweet_ids = {}
    screen_name_to_info = {}
    screen_name_to_location = {}
    for status in statuses:

        screen_name = status['user']['screen_name']

        if not screen_name_to_tweet_ids.has_key(screen_name):
            screen_name_to_tweet_ids[screen_name] = []
        screen_name_to_tweet_ids[screen_name] += [ status['id'] ]

        screen_name_to_info[screen_name] = status['user']

        screen_name_to_location[screen_name] = status['user']['location']

    # Note that the "location" field can 
    # be anything a user has typed in, and may be something like "Everywhere", 
    # "United States" or something else that won't geocode to a specific coordinate 
    # on a map.

    # Use the various screen_name_to{tweet_ids, info, location} maps to determine 
    # interesting things about the people who appear in the search results.

    return screen_name_to_info, screen_name_to_location, screen_name_to_tweet_ids

示例#2

0

显示文件

def analyze_users_in_search_results(t, q, max_batches=5, count=100):

    # Search for something

    statuses = search(t, q=q, max_batches=max_batches, count=count)

    # Extract the screen names from the results
    # and optionally map them to a useful field like the tweet id

    screen_name_to_tweet_ids = {}
    screen_name_to_info = {}
    screen_name_to_location = {}
    for status in statuses:

        screen_name = status['user']['screen_name']

        if not screen_name_to_tweet_ids.has_key(screen_name):
            screen_name_to_tweet_ids[screen_name] = []
        screen_name_to_tweet_ids[screen_name] += [status['id']]

        screen_name_to_info[screen_name] = status['user']

        screen_name_to_location[screen_name] = status['user']['location']

    # Note that the "location" field can
    # be anything a user has typed in, and may be something like "Everywhere",
    # "United States" or something else that won't geocode to a specific coordinate
    # on a map.

    # Use the various screen_name_to{tweet_ids, info, location} maps to determine
    # interesting things about the people who appear in the search results.

    return screen_name_to_info, screen_name_to_location, screen_name_to_tweet_ids

示例#3

0

显示文件

            )

    return g

if __name__ == '__main__':

    # Your query

    Q = ' '.join(sys.argv[1])

    # How many batches of data to grab for the search results

    MAX_BATCHES = 2

    # How many search results per page

    COUNT = 100

    # Get some search results for a query
    t = oauth_login()
    search_results = search(t, q=Q, max_batches=MAX_BATCHES, count=COUNT)
    g = create_rt_graph(search_results)

    # Print out some stats

    print >> sys.stderr, "Number nodes:", g.number_of_nodes()
    print >> sys.stderr, "Num edges:", g.number_of_edges()
    print >> sys.stderr, "Num connected components:", \
                         len(nx.connected_components(g.to_undirected()))
    print >> sys.stderr, "Node degrees:", sorted(nx.degree(g))

示例#4

0

显示文件

文件： recipe__create_rt_graph.py 项目： davidsoloman/Recipes-for-Mining-Twitter

            )

    return g


if __name__ == "__main__":

    # Your query

    Q = " ".join(sys.argv[1])

    # How many batches of data to grab for the search results

    MAX_BATCHES = 2

    # How many search results per page

    COUNT = 100

    # Get some search results for a query
    t = oauth_login()
    search_results = search(t, q=Q, max_batches=MAX_BATCHES, count=COUNT)
    g = create_rt_graph(search_results)

    # Print out some stats

    print >>sys.stderr, "Number nodes:", g.number_of_nodes()
    print >>sys.stderr, "Num edges:", g.number_of_edges()
    print >>sys.stderr, "Num connected components:", len(nx.connected_components(g.to_undirected()))
    print >>sys.stderr, "Node degrees:", sorted(nx.degree(g))

示例#5

0

显示文件

文件： recipe__get_rt_origins.py 项目： 4sp1r3/Recipes-for-Mining-Twitter

    # Inspect the tweet to see if was produced with /statuses/retweet/:id
    # See http://dev.twitter.com/doc/post/statuses/retweet/:id
    
    if tweet.has_key('retweeted_status'):
        rt_origins += [ tweet['retweeted_status']['user']['screen_name'].lower() ]

    # Also, inspect the tweet for the presence of "legacy" retweet 
    # patterns such as "RT" and "via"

    try:
        rt_origins += [ 
                        mention.strip() 
                        for mention in rt_patterns.findall(tweet['text'])[0][1].split() 
                      ]
    except IndexError, e:
        pass

    # Filter out any duplicates

    return list(set([rto.strip("@").lower() for rto in rt_origins]))

if __name__ == '__main__':

    t = oauth_login()
    tweets = search(t, q='Python', max_batches=1, count=100)


    for tweet in tweets:
        print tweet['text'], get_rt_origins(tweet)

示例#6

0

显示文件

# -*- coding: utf-8 -*-

import json
from recipe__oauth_login import oauth_login
from recipe__search import search

def get_entities(tweet):
    return tweet['entities']

if __name__ == '__main__':
    t = oauth_login()
    tweets = search(t, "Python", max_batches=1, count=10) # Use "Python" as a sample query to get some tweets to process
    entities = [ get_entities(tweet) for tweet in tweets ]

    print json.dumps(entities, indent=1)

示例#7

0

显示文件

文件： recipe__get_rt_origins.py 项目： rkp2916/SODM

    # See http://dev.twitter.com/doc/post/statuses/retweet/:id

    if tweet.has_key('retweeted_status'):
        rt_origins += [
            tweet['retweeted_status']['user']['screen_name'].lower()
        ]

    # Also, inspect the tweet for the presence of "legacy" retweet
    # patterns such as "RT" and "via"

    try:
        rt_origins += [
            mention.strip()
            for mention in rt_patterns.findall(tweet['text'])[0][1].split()
        ]
    except IndexError, e:
        pass

    # Filter out any duplicates

    return list(set([rto.strip("@").lower() for rto in rt_origins]))


if __name__ == '__main__':

    t = oauth_login()
    tweets = search(t, q='RKP1107', max_batches=1, count=100)

    for tweet in tweets:
        print tweet['text'], get_rt_origins(tweet)

示例#8

0

显示文件

文件： recipe__geocode_tweets.py 项目： 4sp1r3/Recipes-for-Mining-Twitter

# Extract geo coordinates from search results

import sys
import twitter
from recipe__oauth_login import oauth_login
from recipe__search import search

Q = ' '.join(sys.argv[1:])

t = oauth_login()
statuses = search(t, q=Q, max_batches=10, count=100)

# Extract geocoordinates from tweets in search results

coords =  [ status['geo'] for status in statuses if status['geo'] is not None ]

print coords

示例#9

0

显示文件

# Extract geo coordinates from search results

import sys
import twitter
from recipe__oauth_login import oauth_login
from recipe__search import search

Q = ' '.join(sys.argv[1:])

t = oauth_login()
statuses = search(t, q=Q, max_batches=10, count=100)

# Extract geocoordinates from tweets in search results

coords = [status['geo'] for status in statuses if status['geo'] is not None]

print coords

示例#10

0

显示文件

    # See http://dev.twitter.com/doc/post/statuses/retweet/:id

    if tweet.has_key('retweeted_status'):
        rt_origins += [
            tweet['retweeted_status']['user']['screen_name'].lower()
        ]

    # Also, inspect the tweet for the presence of "legacy" retweet
    # patterns such as "RT" and "via"

    try:
        rt_origins += [
            mention.strip()
            for mention in rt_patterns.findall(tweet['text'])[0][1].split()
        ]
    except IndexError, e:
        pass

    # Filter out any duplicates

    return list(set([rto.strip("@").lower() for rto in rt_origins]))


if __name__ == '__main__':

    t = oauth_login()
    tweets = search(t, q='Python', max_batches=1, count=100)

    for tweet in tweets:
        print tweet['text'], get_rt_origins(tweet)