def read_object_from_S3(client, key): credentials = get_credentials() s3_bucket = credentials['s3_bucket'] object_reference = client.get_object(Key=key, Bucket=s3_bucket) object_body = object_reference['Body'] tweet_data = json.loads(object_body.read().decode()) return tweet_data
def list_files_in_S3_bucket(client): credentials = get_credentials() s3_bucket = credentials['s3_bucket'] objects = client.list_objects(Bucket=s3_bucket) objects_df = DataFrame(objects['Contents']) return list(objects_df.Key.values)
def write_file_to_S3(client, filename): credentials = get_credentials() s3_bucket = credentials['s3_bucket'] with open(filename) as infile: json_data=infile.read() client.put_object(Key=filename, Body=json_data, Bucket=s3_bucket)
def create_mongo_client_to_database_collection(): credentials = get_credentials() client = MongoClient(credentials['mongo']['ip'], credentials['mongo']['port']) database = client.get_database(credentials['mongo']['database']) collection = database.get_collection(credentials['mongo']['collection']) print("Created Mongo Client") return collection
def create_timestamped_filename(): credentials = get_credentials() username = credentials['username'] timestamp_str = str(datetime.now()) timestamp_str = (timestamp_str.replace(' ', '_').replace('.', '-').replace(':', '-')) filename = "tweets-" + username + '-' + timestamp_str + ".json" return filename
def create_boto_client(): s3 = boto3.resource('s3') credentials = get_credentials() client = boto3.client('s3', aws_access_key_id=credentials['aws']['aws_access_key_id'], aws_secret_access_key=credentials['aws']['aws_secret_access_key']) print("Created S3 Client") return client
def create_tweet_iterator(): credentials = get_credentials() oauth = OAuth(credentials['twitter']['token'], credentials['twitter']['token_secret'], credentials['twitter']['consumer_key'], credentials['twitter']['consumer_secret']) twitter_stream = TwitterStream(auth=oauth) tweet_iterator = twitter_stream.statuses.filter( locations=credentials['bounding_box']) print("Created Tweet Iterator.") return tweet_iterator
from datetime import datetime import json from os import rename import lib from s3 import create_boto_client, process_local_file_to_S3 from twitter_funcs import collect_tweets, create_tweet_iterator from mongo import create_mongo_client_to_database_collection, insert_to_mongo from utility import get_credentials, timestamp, write_to_disk from requests import HTTPError if __name__ == "__main__": credentials = get_credentials() if credentials['twitter']['token'] is None: print( "Did you forget to add your twitter tokens to the credentials.json file?" ) raise HTTPError tweet_iterator = create_tweet_iterator() s3_client = create_boto_client() collection_client = create_mongo_client_to_database_collection() while True: timestamp() tweets = collect_tweets(tweet_iterator, 100) filename = write_to_disk(tweets) process_local_file_to_S3(s3_client, filename) insert_to_mongo(s3_client, collection_client, filename)