예제 #1
0
def read_object_from_S3(client, key):

    credentials = get_credentials()
    s3_bucket = credentials['s3_bucket']
    object_reference = client.get_object(Key=key, Bucket=s3_bucket)
    object_body = object_reference['Body']
    tweet_data = json.loads(object_body.read().decode())
    return tweet_data
예제 #2
0
def list_files_in_S3_bucket(client):

    credentials = get_credentials()
    s3_bucket = credentials['s3_bucket']

    objects = client.list_objects(Bucket=s3_bucket)
    objects_df = DataFrame(objects['Contents'])
    return list(objects_df.Key.values)
예제 #3
0
def write_file_to_S3(client, filename):
    credentials = get_credentials()
    s3_bucket = credentials['s3_bucket']
    with open(filename) as infile:
        json_data=infile.read()
        client.put_object(Key=filename,
                          Body=json_data,
                          Bucket=s3_bucket)
예제 #4
0
def create_mongo_client_to_database_collection():
    credentials = get_credentials()

    client = MongoClient(credentials['mongo']['ip'],
                         credentials['mongo']['port'])
    database = client.get_database(credentials['mongo']['database'])
    collection = database.get_collection(credentials['mongo']['collection'])
    print("Created Mongo Client")
    return collection
예제 #5
0
def create_timestamped_filename():

    credentials = get_credentials()
    username = credentials['username']
    timestamp_str = str(datetime.now())
    timestamp_str = (timestamp_str.replace(' ',
                                           '_').replace('.',
                                                        '-').replace(':', '-'))
    filename = "tweets-" + username + '-' + timestamp_str + ".json"
    return filename
예제 #6
0
def create_boto_client():
    s3 = boto3.resource('s3')

    credentials = get_credentials()

    client = boto3.client('s3',
                          aws_access_key_id=credentials['aws']['aws_access_key_id'],
                          aws_secret_access_key=credentials['aws']['aws_secret_access_key'])
    print("Created S3 Client")
    return client
def create_tweet_iterator():
    credentials = get_credentials()

    oauth = OAuth(credentials['twitter']['token'],
                  credentials['twitter']['token_secret'],
                  credentials['twitter']['consumer_key'],
                  credentials['twitter']['consumer_secret'])

    twitter_stream = TwitterStream(auth=oauth)
    tweet_iterator = twitter_stream.statuses.filter(
        locations=credentials['bounding_box'])
    print("Created Tweet Iterator.")
    return tweet_iterator
예제 #8
0
from datetime import datetime
import json
from os import rename
import lib

from s3 import create_boto_client, process_local_file_to_S3
from twitter_funcs import collect_tweets, create_tweet_iterator
from mongo import create_mongo_client_to_database_collection, insert_to_mongo
from utility import get_credentials, timestamp, write_to_disk
from requests import HTTPError

if __name__ == "__main__":

    credentials = get_credentials()
    if credentials['twitter']['token'] is None:
        print(
            "Did you forget to add your twitter tokens to the credentials.json file?"
        )
        raise HTTPError

    tweet_iterator = create_tweet_iterator()
    s3_client = create_boto_client()
    collection_client = create_mongo_client_to_database_collection()

    while True:
        timestamp()
        tweets = collect_tweets(tweet_iterator, 100)
        filename = write_to_disk(tweets)
        process_local_file_to_S3(s3_client, filename)
        insert_to_mongo(s3_client, collection_client, filename)