def sim_msg_stream(self, sleep_time=0.25): """Sends a stream of messages to the Kafka topic "messages". Args: sleep_time: float number in seconds representing the rate messages should be sent to the Kafka topic Returns: None """ msg_cnt = 0 while True: print len(self.county_state_list) county, state = IngUt.select_random_county(self.county_state_list) timestamp = list(time.localtime()[0:6]) message_info = IngUt.create_json_message(county=county, state=state, rank=0, timestamp=timestamp, creator_id=random.randint(0, NUM_USERS-1), sender_id=random.randint(0, NUM_USERS-1), message_id=msg_cnt, message=fake.text()) self.producer.send_messages('messages', message_info) print timestamp if sleep_time != 0: time.sleep(sleep_time) msg_cnt += 1
__author__ = 'aouyang1' import time from datetime import datetime import IngestionUtilities as IngUt from faker import Factory fake = Factory.create() NUM_MESSAGES = 375000 NUM_USERS = 1000000 NUM_REPLICATIONS = 75 hadoop_remote_path = "/user/PuppyPlaydate/history/" county_state_list = IngUt.parse_county_list("county_list.txt") start_dtt = datetime(2013, 1, 1, 0, 0, 0).timetuple() end_dtt = datetime(2015, 2, 4, 0, 0, 0).timetuple() start_ts = time.mktime(start_dtt) end_ts = time.mktime(end_dtt) IngUt.gen_random_messages(county_state_list, start_ts, end_ts, reps=NUM_REPLICATIONS, num_messages=NUM_MESSAGES, num_users=NUM_USERS, date_model=IngUt.gen_random_date)
def __init__(self, addr): """Initialize Producer with address of the kafka broker ip address.""" self.client = KafkaClient(addr) self.producer = SimpleProducer(self.client) self.county_state_list = IngUt.parse_county_list('ingestion/county_list.txt')