Exemplo n.º 1
0
def _logging_service():
    expected_name = 'test'

    logging_service = LoggingService(expected_name)

    assert expected_name == logging_service.name
    assert default_logging_format == logging_service.logging_format
    assert default_level == logging_service.level
    assert logging_service._logger is None

    return logging_service
Exemplo n.º 2
0
from ingestor.readers.read_tweet_json import read_tweet_json
from ingestor.senders.send_event_hub_message import send_event_hub_message, stop_client
from ingestor.utils.env import env
from ingestor.utils.logging_service import LoggingService

logger = LoggingService('Send tweet.json').logger

PROFILE = env('PROFILE_NAME')

message_type = 'tweet'
data_delimiter = '|~|'


def build_message(profile: str,
                  message_type: str,
                  event_id: str,
                  message: str,
                  *,
                  delimiter: str = data_delimiter):
    structured_message = \
        f'profile:{profile}{delimiter}' \
        f'type:{message_type}{delimiter}' \
        f'id:{event_id}{delimiter}' \
        f'message:{message}'

    return structured_message


if __name__ == "__main__":
    tweets = read_tweet_json()
Exemplo n.º 3
0
from ingestor.utils.logging_service import LoggingService
from tests.integration import build_firebase_connection_for_tests

logger = LoggingService('Write Firebase').logger


def test_write():
    db, id_token = build_firebase_connection_for_tests()

    # create
    archer = {"name": "Sterling Archer", "agency": "Figgis Agency"}
    db.child("agents").push(archer, id_token)

    # set
    lana = {"name": "Lana Kane", "agency": "Figgis Agency"}
    db.child("agents").child("Lana").set(lana, id_token)

    # read all
    all_agents = db.child("agents").get(id_token).val()
    assert len(all_agents) >= 2

    # read one
    lana_data = db.child("agents").child("Lana").get(id_token).val()
    assert "Lana Kane" == lana_data['name']

    # update
    db.child("agents").child("Lana").update({"name": "Lana Anthony Kane"},
                                            id_token)
    new_lana_data = db.child("agents").child("Lana").get(id_token).val()
    assert 'Lana Anthony Kane' == new_lana_data['name']
Exemplo n.º 4
0
import operator

from ingestor.extractors.extract_hashtags_from_tweets_list import extract_hashtags
from ingestor.readers.read_tweet_json import read_tweet_json
from ingestor.utils.logging_service import LoggingService

logger = LoggingService('Read tweet.json').logger


def _add_to_aggregate(aggregate_dict: dict, key: str):
    aggregate_dict[key] = aggregate_dict[key] + 1


if __name__ == "__main__":
    tweets = read_tweet_json()
    print(tweets)

    hashtags = extract_hashtags(tweets)
    hashtags_sorted = sorted(hashtags.items(),
                             key=operator.itemgetter(1),
                             reverse=True)

    print(hashtags_sorted)
Exemplo n.º 5
0
import time

from azure.eventhub import EventHubClient, Offset

from ingestor.builders.build_firebase import get_firebase_auth_and_db
from ingestor.daos.last_received_event_offset import LastReceivedEventOffset
from ingestor.retrievers.event_hub_retriever import EventHubRetriever
from ingestor.utils.env import env
from ingestor.utils.logging_service import LoggingService

logger = LoggingService('Receive').logger

CONNECTION_STRING = env('EVENT_HUB_CONNECTION_STRING')
EVENT_HUB_TOPIC = env('EVENT_HUB_TOPIC_HELLO_WORLD_NAME')
EVENT_HUB_PARTITION = env('EVENT_HUB_TOPIC_HELLO_WORLD_PARTITION')

CONSUMER_GROUP = "$default"
PREFETCH = 1
OFFSET = Offset('1', inclusive=True)

USER = env('FIREBASE_AUTH_USER')
PASSWORD = env('FIREBASE_AUTH_PASSWORD')


def receive_one():
    auth, db = get_firebase_auth_and_db()

    user = auth.sign_in_with_email_and_password(USER, PASSWORD)
    id_token = user['idToken']

    last_received_event_offset = LastReceivedEventOffset(db, id_token)
Exemplo n.º 6
0
import time

from azure.eventhub import EventData

from ingestor.builders.build_sender_client import build_sender_client
from ingestor.utils.logging_service import LoggingService

logger = LoggingService('send_event_hub_message').logger

client = None
sender = None


def send_event_hub_message(message: str):
    global client
    global sender

    if not client or not sender:
        client, sender = build_sender_client()

    logger.info(f"Sending tweet message: {message}")

    start_time = time.time()
    sender.send(EventData(message))
    end_time = time.time()

    run_time = end_time - start_time
    logger.info("Runtime: {} seconds".format(run_time))


def stop_client():
from collections import defaultdict

from ingestor.utils.env import env
from ingestor.utils.logging_service import LoggingService

logger = LoggingService('extract_hashtags_from_tweets_list').logger

profile = env('PROFILE_NAME')


def extract_hashtags(tweets: list) -> dict:
    hashtags_found = defaultdict(int)

    for tweet in tweets:
        tweet_hashtags = tweet['entities']['hashtags']
        [_add_to_aggregate(hashtags_found, tweet_hashtag['text'].lower()) for tweet_hashtag in tweet_hashtags]

    return hashtags_found


def _add_to_aggregate(aggregate_dict: dict, key: str):
    aggregate_dict[key] = aggregate_dict[key] + 1
Exemplo n.º 8
0
from ingestor.utils.env import env
from ingestor.utils.logging_service import LoggingService
from ingestor.utils.read_json import read_json

logger = LoggingService('read_tweet_json').logger

PROFILE = env('PROFILE_NAME')

data_path = './data'
tweet_json_file_name = 'tweet.json'


def read_tweet_json(profile_to_read: str = PROFILE) -> list:
    tweet_json_profile_data_path = f'{data_path}/{profile_to_read}/{tweet_json_file_name}'

    logger.info(f"Reading tweet file: {tweet_json_profile_data_path}")

    return read_json(tweet_json_profile_data_path)
Exemplo n.º 9
0
from azure.eventhub import EventHubClient

from ingestor.utils.env import env
from ingestor.utils.logging_service import LoggingService

logger = LoggingService('build_sender_client').logger

CONNECTION_STRING = env('EVENT_HUB_CONNECTION_STRING')
EVENT_HUB_TOPIC = env('EVENT_HUB_TOPIC_HELLO_WORLD_NAME')
EVENT_HUB_PARTITION = env('EVENT_HUB_TOPIC_HELLO_WORLD_PARTITION')


def build_sender_client(*,
                        topic: str = EVENT_HUB_TOPIC,
                        partition: str = EVENT_HUB_PARTITION) -> tuple:
    if not CONNECTION_STRING:
        raise ValueError("No EventHubs URL supplied.")

    client = EventHubClient.from_connection_string(CONNECTION_STRING, topic)
    sender = client.add_sender(partition)
    client.run()

    return client, sender