def thinking_about_this_stuff(): import networkx as nx import community #from make_graph import make_graph api_path = '../api_keys/' screen_name = 'graphlabteam' apis = oauth_login(api_path) target, target_tweets, followers, following, user_lists = \ get_user_data(apis[0], screen_name) edges = make_graph(target['id'], followers, apis) g = nx.Graph(data=edges) p0 = community.best_partition(g) p1 = community.best_partition(g, partition=p0) while p0 != p1: p0 = community.best_partition(g, partition=p1) p1 = community.best_partition(g, partition=p0) partitions = [[k for k in p1.keys() if p1[k] == v] for v in set(p1.values())]
def load(screen_name=None, user_id=None, force_db_update = False, force_twitter_update=False, debug=False): ''' Main entry point into gravitty module. Should be used by importing gravitty and calling gravitty.load('<your_screen_name'). Please see the readme at github.com/ericjeske/gravitty for mandatory setup instructions and api requirements. The load function will make every attempt to load data from cache sources (mongoDB) before using twitter's api. It is, however, suggested that multiple twitter api keys are utilized with this app to avoid rate limiting restrictions. By default, running this function will return a json object that can be parsed by d3.js to create a community graph. Additional information, including the raw twitter data, parsed twitter data, user similarity, community clustering dendrogram, community analytics data, community networkx graph, and community json object, can be returned by passing in debug=True. Also, by default, this app will create two pickled objects, one containing the debug data described above, the other containing the community json file. Subsequent calls for the same user will use this data to save time (and api calls). To override the use of pickled data, use force_db_update = True. Data for each follower will be pulled from mongoDB if possible, otherwise it will be pulled from twitter. To do a clean-slate download, downloading everything from twitter, use force_twitter_update = True. ''' if screen_name == None and user_id == None: raise Exception('Please enter an id or name') # Assume that if screen_name was not provided (only user id) then a # pickle has not been created. if screen_name is not None: ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH) sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT # Check to see if there are pickles for the user. Note that this will # be overriden if force_db_update is set to true if os.path.isfile(sn_file_debug) and debug \ and not force_twitter_update and not force_db_update: return pickle.load(open(sn_file_debug, 'rb')) if os.path.isfile(sn_file) \ and not force_twitter_update and not force_db_update: return pickle.load(open(sn_file, 'rb')) # Use api credentials from files located in the API_PATH. ABS_API_PATH = os.path.join(os.path.dirname(__file__), API_PATH) apis = oauth_login(ABS_API_PATH) # Try to start up a mongo database connection to cache data in try: conn = pymongo.MongoClient("localhost", 27017) except pymongo.errors.ConnectionFailure: print 'Please run mongod and re-run program' raise Exception('DBError') db = conn[DB_NAME] # Get the target user's data from either the screen_name or user_id user_data = get_user_data(db, apis[0], name = screen_name, uid = user_id, force = force_twitter_update) # If the user is protected (or has more than the maximum # followers/friends), then return an error if user_data == None: print 'Was unable to access data for %s / %s' % (screen_name, user_id) raise Exception('TargetError') user_info, user_tweets, followers, following, user_lists = user_data # Using the target user's list of followers (user ids), get the same # information we just got for the target user for each of its followers raw_df = get_follower_data(db, apis, followers, force = force_twitter_update) # Filter the dataframe for inactive users. Then parse the raw dataframe # to extract the relevant features from the raw data df = parse_dataframe( filter_dataframe(raw_df) ) # With the features in hand, calculate the latent similarity between each # set of users. See similarity.py for more detail on the calculations of # this similarity metric. # The resulting dataframe will be a square matrix indexed/columned by # user_id and contain the undirected edge weights between each pair of # users. df_similarity = make_similarity_dataframe(df) # Make an undirected representing the relationship between each user, # if any. Each node ID is the user ID, each edge weight is equal to the # similarity score between those two users. graph = make_graph(df, df_similarity) # Using the louvain method, find communities within the weighted graph. # The returned dendrogram is a list of dictionaries where the values of # each dictionary are the keys of the next dictionary. The length of the # dendrogram indicates the number of levels of community clusters # detected. dendrogram = generate_dendrogram(graph) # Add a final mapping to the dendrogram that maps everyone into the # same community. They are, after all, followers of the same user. dendrogram.append({k:0 for k in dendrogram[-1].values()}) # Modify the dataframe to contain columns titled 'cid + <level>'. Each # column contains the community id's for that level for each user. # Also, this is a convenient time to calculate graph modularity at each # level so produce that here as well. df, modularity = get_community_assignment(df, graph, dendrogram) num_levels = len(dendrogram) # For each community at each level of the dendrogram, find the topics, # sentiment, biggest influencers, etc. for each. data = get_community_analytics(df, graph, num_levels, community_modularity = modularity) # Both the mentioned and most connected users fields from the community # analytics function are user ids. Turn them into screen names. data = get_screen_names(data, 'mentioned', df, db, apis[0]) data = get_screen_names(data, 'most_connected', df, db, apis[0]) # Close the database connection. It is no longer needed. conn.close() # Create a networkx graph where each node represents a community. Edges # represent membership into larger communities at the next level up ( # down?) the dendrogram and have no edge weights. The data obtained in # the previous steps from community_analytics is loaded into the # attributes of each node. community_graph = create_community_graph(data, dendrogram) # Parse this graph into a json representation for use & consumption by # d3.js community_json = create_community_json(community_graph, user_info) # Just in case we don't have the screen name, grab it. if screen_name is None: screen_name = user_info['screen_name'] # Pickle the objects for reuse. ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH) sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT pickle.dump((raw_df, df, df_similarity, dendrogram, data, community_graph, community_json), open(sn_file_debug, 'wb')) pickle.dump(community_json, open(sn_file, 'wb')) # If debug is true, return all of the precusor objects along with the json if debug: return (raw_df, df, df_similarity, dendrogram, data, community_graph, community_json) # Otherwise return the json object return community_json
NUMBER_OF_COLLECTIONS = 5 COLLECTION_TIME = 2.5 # in minutes WAIT_TIME = 10 # in minutes date_format = '%m/%d/%Y %H:%M:%S' broncos, panthers, counts = [], [], [] for counter in range(1, NUMBER_OF_COLLECTIONS + 1): print '------------------------------------------' print 'COLLECTION NUMBER', counter broncos_counter, panthers_counter = 0, 0 count_dict = {'start_time': datetime.now().strftime(format=date_format)} # Create a stream instance auth = oauth_login(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, token=OAUTH_TOKEN, token_secret=OAUTH_TOKEN_SECRET) twitter_stream = TwitterStream(auth=auth) stream = twitter_stream.statuses.filter(track=track, locations=locations) endTime = datetime.now() + timedelta(minutes=COLLECTION_TIME) while datetime.now() <= endTime: # collect tweets while current time is less than endTime for tweet in stream: if 'text' in tweet.keys(): if datetime.now() > endTime: break elif 'Broncos' in tweet['text'] and 'Panthers' in tweet['text']: broncos.append(tweet), panthers.append(tweet) broncos_counter += 1 panthers_counter += 1 print 'Panthers: %s, Broncos: %s' % (panthers_counter, broncos_counter) elif 'Broncos' in tweet['text']:
# -*- coding: utf-8 -*- import csv from clint import resources from utils import oauth_login, base_url, bank_id, account_id resources.init('openbankproject', 'evmakesgeo') key_file = resources.user.read('config.ini') openbank = oauth_login(base_url, key_file) # Parse cords from csv reader = csv.reader(open('static/ireland_points.csv')) firstline = True csv_data = [] for row in reader: if firstline: #skip first line firstline = False continue csv_data.append((row[0], row[1])) # Get transactoin ids response = openbank.get( "{}obp/v1.2.1/banks/{}/accounts/{}/owner/transactions".format( base_url, bank_id, account_id)) transactions = [each['id'] for each in response.json()['transactions']][0:len(csv_data)] # Create for a list of transactions id geo metadata for index, transaction in enumerate(transactions):
# TODO: add secret key to url (or POST?) # also look at invalid signature error message (oauth) or if that is part of missing secret key # (scala obp importer has it working) #import json from clint import resources from utils import oauth_login, base_url resources.init('openbankproject', 'evmakesgeo') key_file = resources.user.read('config.ini') # key file in a place like: /Users/simonredfern/Library/Application\ Support/evmakesgeo/config.ini print "key file is: %s" % key_file openbank = oauth_login(base_url, key_file) # Load a json file for sandbox creation. with open('/Users/simonredfern/Documents/OpenBankProject/DATA/BNPP/OBP-sandbox-bnpp-fr_compact.json') as data_file: #data = json.load(data_file) data=data_file.read().replace('\n', '') url = "{}obp/vsandbox/v1.0/data-import".format(base_url) print 'url is: %s' % url #print 'data is: %s' % data headers = { 'content-type': 'application/json', 'Accept': 'application/json' }
# -*- coding: utf-8 -*- from utils import oauth_login from data import get_user_data, get_follower_data #from make_graph import make_graph api_path = '../api_keys/' screen_name = 'ZipfianAcademy' user_id = 1244850380 #screen_name = 'graphlabteam' apis = oauth_login(api_path)[3:] target, target_tweets, followers, following, user_lists = \ get_user_data(apis[2], screen_name=screen_name, user_id=user_id) print target['id'] ind = followers.index(562363) + 1 data = get_follower_data(apis, followers[ind:]) #g = make_graph(target['id'], followers, apis) def thinking_about_this_stuff(): import networkx as nx import community #from make_graph import make_graph api_path = '../api_keys/' screen_name = 'graphlabteam' apis = oauth_login(api_path)
from constants import CONSUMER_KEY, CONSUMER_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET from twitter import TwitterStream from utils import oauth_login, save_json track = "Patriots" # Tweets for Patriots TOTAL_TWEETS = 2500 patriots = [] patriots_counter = 0 while patriots_counter < TOTAL_TWEETS: # collect tweets while current time is less than endTime # Create a stream instance auth = oauth_login(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, token=OAUTH_TOKEN, token_secret=OAUTH_TOKEN_SECRET) twitter_stream = TwitterStream(auth=auth) stream = twitter_stream.statuses.filter(track=track) counter = 0 for tweet in stream: if patriots_counter == TOTAL_TWEETS: print 'break' break elif counter % 500 == 0 and counter != 0: print 'get new stream' break else: patriots.append(tweet) patriots_counter += 1 counter += 1