class Station(Producer): """Defines a single station""" key_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_key.json") # # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below # value_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_value.json") def __init__(self, station_id, name, color, direction_a=None, direction_b=None): self.name = name station_name = (self.name.lower().replace("/", "_and_").replace( " ", "_").replace("-", "_").replace("'", "")) # # # TODO: Complete the below by deciding on a topic name, number of partitions, and number of # replicas # # topic_name = f"org.chicago.cta.station.arrival.{station_name}" # TODO: Come up with a better topic name super().__init__( topic_name, key_schema=Station.key_schema, value_schema=Station. value_schema, # TODO: Uncomment once schema is defined num_partitions=5, num_replicas=1, ) self.station_id = station_id self.color = color self.dir_a = direction_a self.dir_b = direction_b self.a_train = None self.b_train = None self.turnstile = Turnstile(self) def run(self, train, direction, prev_station_id, prev_direction): """Simulates train arrivals at this station""" # # # TODO: Complete this function by producing an arrival message to Kafka # # print(f"producing arrival to {self.topic_name}") self.producer.produce(topic=self.topic_name, key={"timestamp": self.time_millis()}, value={ "station_id": self.station_id, "train_id": train.train_id, "direction": direction, "line": self.color.name, "train_status": train.status.name, "prev_station_id": prev_station_id, "prev_direction": prev_direction }, key_schema=self.key_schema, value_schema=self.value_schema) def __str__(self): return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format( self.station_id, self.name, self.a_train.train_id if self.a_train is not None else "---", self.dir_a.name if self.dir_a is not None else "---", self.b_train.train_id if self.b_train is not None else "---", self.dir_b.name if self.dir_b is not None else "---", ) def __repr__(self): return str(self) def arrive_a(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'a' direction""" self.a_train = train self.run(train, "a", prev_station_id, prev_direction) def arrive_b(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'b' direction""" self.b_train = train self.run(train, "b", prev_station_id, prev_direction) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.turnstile.close() super(Station, self).close()
class Station(Producer): """Defines a single station""" key_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_key.json") value_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_value.json") def __init__(self, station_id, name, color, direction_a=None, direction_b=None): self.name = name station_name = (self.name.lower().replace("/", "_and_").replace( " ", "_").replace("-", "_").replace("'", "")) topic_name = f"org.chicago.cta.station.arrivals.{station_name}" super().__init__( topic_name, key_schema=Station.key_schema, value_schema=Station.value_schema, num_partitions=2, num_replicas=1, ) self.station_id = int(station_id) self.color = color self.dir_a = direction_a self.dir_b = direction_b self.a_train = None self.b_train = None self.turnstile = Turnstile(self) def run(self, train, direction, prev_station_id, prev_direction): """Simulates train arrivals at this station""" if prev_station_id: prev_station_id = int(prev_station_id) self.producer.produce( topic=self.topic_name, key={"timestamp": self.time_millis()}, value={ "station_id": self.station_id, "train_id": train.train_id, "direction": direction, "line": self.color.name, "train_status": train.status.name, "prev_station_id": prev_station_id, "prev_direction": prev_direction }, ) def __str__(self): return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format( self.station_id, self.name, self.a_train.train_id if self.a_train is not None else "---", self.dir_a.name if self.dir_a is not None else "---", self.b_train.train_id if self.b_train is not None else "---", self.dir_b.name if self.dir_b is not None else "---", ) def __repr__(self): return str(self) def arrive_a(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'a' direction""" self.a_train = train self.run(train, "a", prev_station_id, prev_direction) def arrive_b(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'b' direction""" self.b_train = train self.run(train, "b", prev_station_id, prev_direction) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.turnstile.close() super(Station, self).close()
class Station(Producer): """Defines a single station""" key_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_key.json") # # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below # value_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_value.json") def __init__(self, station_id, name, color, direction_a=None, direction_b=None): self.name = name station_name = (self.name.lower().replace("/", "_and_").replace( " ", "_").replace("-", "_").replace("'", "")) # # # TODO: Complete the below by deciding on a topic name, number of partitions, and number of # replicas # # topic_name = f"{station_name}" # TODO: Come up with a better topic name super().__init__( topic_name, key_schema=Station.key_schema, # TODO: value_schema=Station. value_schema, # TODO: Uncomment once schema is defined # https://www.confluent.io/blog/how-choose-number-topics-partitions-kafka-cluster/ # p : single partition for production # c : single partition for consumption # t : target throughput # choose at least max(t/p, t/c) # partions = max(throughput/#producers, throughput/#consumers) # Partitions = Max(Overall Throughput/Producer Throughput, Overall Throughput/Consumer Throughput) # Example from video, with 3 Producers and 5 Consumers, each operating at 10MB/s per single producer/consumer # partition: Max(100MBs/(3 * 10MB/s), 100MBs/(5 * 10MB/s)) = Max(2) ~= *4 partitions needed* # TODO: num_partitions= 2, # higher partition leads to higher throughput but high latency # TODO: num_replicas=1, # replicas shared between brokers ) self.station_id = int(station_id) self.color = color self.dir_a = direction_a self.dir_b = direction_b self.a_train = None self.b_train = None self.turnstile = Turnstile(self) def run(self, train, direction, prev_station_id, prev_direction): """Simulates train arrivals at this station""" # # # TODO: Complete this function by producing an arrival message to Kafka # # #logger.info("arrival kafka integration incomplete - skipping") # make sure the arrival events to kafka are paired with Avro key and value schemas # look at train.py and line.py to get the properties of those instances (train and line) self.producer.produce( topic=self.topic_name, key={"timestamp": self.time_millis()}, value={ # TODO: Configure this "station_id": self.station_id, "train_id": train. train_id, # to get train_id, look at `self.train_id` in train.py "direction": direction, "line": self.color. name, # to get the line , look at `self.color.name` in line.py "train_status": train.status. name, # to get train status, look at `self.status.name` in train.py "prev_station_id": prev_station_id, "prev_direction": prev_direction }, ) logger.info(f"producing arrival event to kafka is complete") def __str__(self): return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format( self.station_id, self.name, self.a_train.train_id if self.a_train is not None else "---", self.dir_a.name if self.dir_a is not None else "---", self.b_train.train_id if self.b_train is not None else "---", self.dir_b.name if self.dir_b is not None else "---", ) def __repr__(self): return str(self) def arrive_a(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'a' direction""" self.a_train = train self.run(train, "a", prev_station_id, prev_direction) def arrive_b(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'b' direction""" self.b_train = train self.run(train, "b", prev_station_id, prev_direction) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.turnstile.close() super(Station, self).close()
class Station(Producer): ''' Defines a single station ''' def __init__( self, station_id, name, color, direction_a=None, direction_b=None ): key_schema = utils.load_avro_schema('arrival_key.json') value_schema = utils.load_avro_schema('arrival_value.json') super().__init__( constants.TOPIC_ARRIVALS_V1, key_schema, value_schema=value_schema, num_partitions=5 ) self.station_id = int(station_id) self.name = name self.color = color self.dir_a = direction_a self.dir_b = direction_b self.a_train = None self.b_train = None self.turnstile = Turnstile(self) def run(self, train, direction, prev_station_id, prev_direction): ''' Simulates train arrivals at this station ''' logger.info(f'train {train.train_id} arrived to station {self.station_id}') self.producer.produce( topic=self.topic_name, key={'timestamp': self.time_millis()}, value={ 'station_id': self.station_id, 'train_id': train.train_id, 'direction': direction, 'line': self.color.name, 'train_status': train.status.name, 'prev_station_id': prev_station_id, 'prev_direction': prev_direction } ) def __str__(self): return 'Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | '.format( self.station_id, self.name, self.a_train.train_id if self.a_train is not None else '---', self.dir_a.name if self.dir_a is not None else '---', self.b_train.train_id if self.b_train is not None else '---', self.dir_b.name if self.dir_b is not None else '---' ) def __repr__(self): return str(self) def arrive_a(self, train, prev_station_id, prev_direction): ''' Denotes a train arrival at this station in the 'a' direction ''' self.a_train = train self.run(train, 'a', prev_station_id, prev_direction) def arrive_b(self, train, prev_station_id, prev_direction): ''' Denotes a train arrival at this station in the 'b' direction ''' self.b_train = train self.run(train, 'b', prev_station_id, prev_direction) def close(self): ''' Prepares the producer for exit by cleaning up the producer ''' self.turnstile.close() super().close()
class Station(Producer): """Defines a single station""" key_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_key.json") # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below value_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_value.json") def __init__(self, station_id, name, color, direction_a=None, direction_b=None): self.name = name station_name = (self.name.lower().replace("/", "_and_").replace( " ", "_").replace("-", "_").replace("'", "")) # TODO: Complete the below by deciding on a topic name, number of partitions, and number of # replicas topic_name = f"{station_name}-mod" # TODO: Come up with a better topic name print('station_id:', station_id, 'station_name:', station_name, 'topic_name:', topic_name) #print('Station.key_schema:', Station.key_schema) #print('Station.value_schema:', Station.value_schema) super().__init__( topic_name, key_schema=Station.key_schema, value_schema=Station. value_schema, # TODO: Uncomment once schema is defined num_partitions=1, # TODO num_replicas=1, # TODO ) #print("station initialization done") self.station_id = int(station_id) self.color = color self.dir_a = direction_a self.dir_b = direction_b self.a_train = None self.b_train = None self.turnstile = Turnstile(self) #print("station.pu init:") def run(self, train, direction, prev_station_id, prev_direction): """Simulates train arrivals at this station""" # TODO: Complete this function by producing an arrival message to Kafka print('train:', train, 'direction:', direction, 'prev_staion_id:', prev_station_id, 'prev_direction:', prev_direction) print('self.topic_name:', self.topic_name) print(type(train)) self.producer.produce(topic=self.topic_name, key={"timestamp": self.time_millis()}, value={ "train_id": str(train), "direction": direction, "prev_station_id": prev_station_id, "prev_direction": prev_direction }) print('run') logger.info("arrival kafka integration incomplete - skipping") def __str__(self): return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format( self.station_id, self.name, self.a_train.train_id if self.a_train is not None else "---", self.dir_a.name if self.dir_a is not None else "---", self.b_train.train_id if self.b_train is not None else "---", self.dir_b.name if self.dir_b is not None else "---", ) def __repr__(self): return str(self) def arrive_a(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'a' direction""" self.a_train = train self.run(train, "a", prev_station_id, prev_direction) def arrive_b(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'b' direction""" self.b_train = train self.run(train, "b", prev_station_id, prev_direction) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.turnstile.close() super(Station, self).close()
class Station(Producer): """Defines a single station""" key_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_key.json") # # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below # value_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_value.json") def __init__(self, station_id, name, color, direction_a=None, direction_b=None): self.name = name station_name = (self.name.lower().replace("/", "_and_").replace( " ", "_").replace("-", "_").replace("'", "")) # # # TODO: Complete the below by deciding on a topic name, number of partitions, and number of # replicas # # topic_name = f"{STATION_TOPIC_PREFIX}.{station_name}" # TODO: Come up with a better topic name super().__init__( topic_name, key_schema=Station.key_schema, value_schema=Station. value_schema, # TODO: Uncomment once schema is defined num_partitions=NUM_PARTITIONS_PER_TOPIC, num_replicas=NUM_REPLICAS_PER_TOPIC, ) self.station_id = int(station_id) self.color = color self.dir_a = direction_a self.dir_b = direction_b self.a_train = None self.b_train = None self.turnstile = Turnstile(self) def to_dict(self, train, direction, prev_station_id, prev_direction): return { "station_id": self.station_id, "train_id": train.train_id, "direction": direction, "line": self.color.name, "train_status": train.status.name, "prev_station_id": prev_station_id, "prev_direction": prev_direction, } def run(self, train, direction, prev_station_id, prev_direction): """Simulates train arrivals at this station""" try: self.producer.produce( topic=self.topic_name, key={"timestamp": self.time_millis()}, value_schema=self.value_schema, key_schema=self.key_schema, value=self.to_dict(train, direction, prev_station_id, prev_direction), ) logger.debug("Station produce success") except Exception as e: logger.error(f"error produce station data {self.topic_name}" + str(e)) def __str__(self): return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format( self.station_id, self.name, self.a_train.train_id if self.a_train is not None else "---", self.dir_a.name if self.dir_a is not None else "---", self.b_train.train_id if self.b_train is not None else "---", self.dir_b.name if self.dir_b is not None else "---", ) def __repr__(self): return str(self) def arrive_a(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'a' direction""" self.a_train = train self.run(train, "a", prev_station_id, prev_direction) def arrive_b(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'b' direction""" self.b_train = train self.run(train, "b", prev_station_id, prev_direction) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.turnstile.close() super(Station, self).close()
class Station(Producer): """Defines a single station""" # ref: https://stackoverflow.com/questions/30218802/get-parent-of-current-directory-from-python-script key_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_key.json") # TODO: Define this value schema in `schemas/station_value.json, then uncomment the below value_schema = avro.load( f"{Path(__file__).parents[0]}/schemas/arrival_value.json") def __init__(self, station_id, name, color, direction_a=None, direction_b=None): self.name = name station_name = (self.name.lower().replace("/", "_and_").replace( " ", "_").replace("-", "_").replace("'", "")) # TODO: Complete the below by deciding on a topic name, number of partitions, and number of # replicas topic_name = ( "streaming.project1.station" # TODO: Come up with a better topic name ) super().__init__( topic_name, key_schema=Station.key_schema, value_schema=Station.value_schema, num_partitions=self.num_partitions, num_replicas=self.num_replicas, ) self.station_id = int(station_id) self.color = color self.dir_a = direction_a self.dir_b = direction_b self.a_train = None self.b_train = None self.turnstile = Turnstile(self) def run(self, train, direction, prev_station_id, prev_direction): """Simulates train arrivals at this station""" # TODO: Complete this function by producing an arrival message to Kafka logger.info("arrival kafka integration incomplete - skipping") self.producer.produce( topic=self.topic_name, key={"timestamp": self.time_millis()}, value={ "station_id": self.station_id, "train_id": self.a_train.train_id, "direction": self.dir_a, "line": self.color, "train_status": self.turnstile, "prev_station_id": self.b_train.train_id, "prev_direction": self.dir_b, }, ) def __str__(self): return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format( self.station_id, self.name, self.a_train.train_id if self.a_train is not None else "---", self.dir_a.name if self.dir_a is not None else "---", self.b_train.train_id if self.b_train is not None else "---", self.dir_b.name if self.dir_b is not None else "---", ) def __repr__(self): return str(self) def arrive_a(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'a' direction""" self.a_train = train self.run(train, "a", prev_station_id, prev_direction) def arrive_b(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'b' direction""" self.b_train = train self.run(train, "b", prev_station_id, prev_direction) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.turnstile.close() super(Station, self).close()
class Station(Producer): """Defines a single station""" this_fp = Path(__file__) key_schema = avro.load(f"{this_fp.parents[0]}/schemas/arrival_key.json") # # TODO: Define this value schema in `schemas/arrival_value.json, then # uncomment the line below # value_schema = avro.load( f"{this_fp.parents[0]}/schemas/arrival_value.json") def __init__( self, station_id, name, color, direction_a=None, direction_b=None, ): self.name = name station_name = (self.name.lower().replace("/", "_and_").replace( " ", "_").replace("-", "_").replace("'", "")) # # TODO: Complete the below by deciding on a topic name, number of # partitions, and number of replicas # # TODO: Come up with a better topic name topic_name = f"org.chicago.cta.station.arrivals.v1" # TODO: Include/fill the following in the call to super.__init__(): # value_schema=Station.value_schema, # num_partitions=???, # num_replicas=???, # call the super to instantiate super's vars also incl. self.producer super().__init__( topic_name, key_schema=Station.key_schema, value_schema=Station.value_schema, num_partitions=3, num_replicas=1, ) self.station_id = int(station_id) self.color = color self.dir_a = direction_a self.dir_b = direction_b self.a_train = None self.b_train = None self.turnstile = Turnstile(self) def run(self, train, direction, prev_station_id, prev_direction): """Simulates train arrivals at this station""" # # # TODO: Complete this function by producing an arrival message to Kafka # # # schemas have already been set in instance creation hence commented out try: self.producer.produce( topic=self.topic_name, key={"timestamp": self.time_millis()}, # key_schema=Station.key_schema, # value_schema=Station.value_schema, value={ "station_id": self.station_id, "train_id": train.train_id, "direction": direction, "line": self.color.name, "train_status": train.status.name, "prev_station_id": prev_station_id, "prev_direction": prev_direction, }, ) except Exception as e: logger.fatal(e) raise def __str__(self): return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing" \ " to {:<30} | Direction B: | {:^5} | departing to {:<30} | " \ .format( self.station_id, self.name, self.a_train.train_id if self.a_train is not None else "---", self.dir_a.name if self.dir_a is not None else "---", self.b_train.train_id if self.b_train is not None else "---", self.dir_b.name if self.dir_b is not None else "---", ) def __repr__(self): return str(self) def arrive_a(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'a' direction""" self.a_train = train self.run(train, "a", prev_station_id, prev_direction) def arrive_b(self, train, prev_station_id, prev_direction): """Denotes a train arrival at this station in the 'b' direction""" self.b_train = train self.run(train, "b", prev_station_id, prev_direction) def close(self): """Prepares the producer for exit by cleaning up the producer""" self.turnstile.close() super(Station, self).close()
class Station(Producer): key_schema = avro.load(f"{Path(__file__).parents[0]}/schemas/arrival_key.json") value_schema = avro.load(f"{Path(__file__).parents[0]}/schemas/arrival_value.json") def __init__(self, station_id, name, color, direction_a=None, direction_b=None): self.name = name station_name = ( self.name.lower() .replace("/", "_and_") .replace(" ", "_") .replace("-", "_") .replace("'", "") ) super().__init__( topic_name = f"{STATION_NAME}", key_schema=Station.key_schema, value_schema=Station.value_schema, num_partitions=1, num_replicas=1 ) self.station_id = int(station_id) self.color = color self.dir_a = direction_a self.dir_b = direction_b self.a_train = None self.b_train = None self.turnstile = Turnstile(self) def run(self, train, direction, prev_station_id, prev_direction): logger.info("arrival kafka integration incomplete - skipping") self.producer.produce( topic=self.topic_name, key={"timestamp": self.time_millis()}, value={ 'station_id'=self.station_id, 'train_id'=train.train_id, 'direction'=direction, 'line'=self.color.name, 'train_status'=train.status.name, 'prev_station_id'=prev_station_id, 'prev_direction'=prev_direction }, ) def __str__(self): return "Station | {:^5} | {:<30} | Direction A: | {:^5} | departing to {:<30} | Direction B: | {:^5} | departing to {:<30} | ".format( self.station_id, self.name, self.a_train.train_id if self.a_train is not None else "---", self.dir_a.name if self.dir_a is not None else "---", self.b_train.train_id if self.b_train is not None else "---", self.dir_b.name if self.dir_b is not None else "---", ) def _repr_(self): return str(self) def arrive_a(self, train, prev_station_id, prev_direction): self.a_train=train self.run(train, "a", prev_station_id, prev_direction) def arrive_b(self, train, prev_station_id, prev_direction): self.b_train = train self.run(train, "b", prev_station_id, prev_direction) def close(self): self.turnstile.close() super(Station, self).close()