def test_time_series_upload_queue2(self): self.client.time_series.create( TimeSeries(external_id=self.time_series1)) queue = TimeSeriesUploadQueue(cdf_client=self.client, max_upload_interval=1) queue.start() # Create some synthetic data now = int(datetime.now(tz=timezone.utc).timestamp() * 1000) points1 = [(now + i * 107, random.randint(0, 10)) for i in range(10)] points2 = [(now + i * 107, random.randint(0, 10)) for i in range(10, 20)] queue.add_to_upload_queue(external_id=self.time_series1, datapoints=points1) queue.add_to_upload_queue(external_id="noSuchExternalId", datapoints=points2) time.sleep(20) recv_points1 = self.client.datapoints.retrieve( external_id=self.time_series1, start="1w-ago", end="now", limit=None) self.assertListEqual([int(p) for p in recv_points1.value], [p[1] for p in points1]) queue.stop()
def test_time_series_upload_queue_create_missing(self): id1 = self.time_series1 + "_missing" id2 = self.time_series2 + "_missing" id3 = self.time_series3 + "_missing" self.client.time_series.delete(external_id=[id1, id2], ignore_unknown_ids=True) queue = TimeSeriesUploadQueue(cdf_client=self.client, create_missing=True) # Create some synthetic data now = int(datetime.now(tz=timezone.utc).timestamp() * 1000) points1 = [(now + i * 107, random.randint(0, 10)) for i in range(10)] points2 = [ (now + i * 107, "".join([random.choice(string.printable) for j in range(16)])) for i in range(10, 20) ] points3 = [{ "timestamp": now + i * 107, "value": random.randint(0, 10) } for i in range(10)] queue.add_to_upload_queue(external_id=id1, datapoints=points1) queue.add_to_upload_queue(external_id=id2, datapoints=points2) queue.add_to_upload_queue(external_id=id3, datapoints=points3) queue.upload() time.sleep(3) recv_points1 = self.client.datapoints.retrieve(external_id=id1, start="1w-ago", end="now", limit=None) recv_points2 = self.client.datapoints.retrieve(external_id=id2, start="1w-ago", end="now", limit=None) recv_points3 = self.client.datapoints.retrieve(external_id=id3, start="1w-ago", end="now", limit=None) self.assertListEqual([int(p) for p in recv_points1.value], [p[1] for p in points1]) self.assertListEqual([p for p in recv_points2.value], [p[1] for p in points2]) self.assertListEqual([int(p) for p in recv_points3.value], [p["value"] for p in points3]) queue.stop() self.client.time_series.delete(external_id=[id1, id2, id3], ignore_unknown_ids=True)
def test_ts_uploader1(self, MockCogniteClient): client: CogniteClient = MockCogniteClient() queue = TimeSeriesUploadQueue(client) queue.add_to_upload_queue(id=1, datapoints=[(1, 1), (2, 2)]) queue.add_to_upload_queue(id=2, datapoints=[(3, 3), (4, 4)]) queue.add_to_upload_queue(id=1, datapoints=[(5, 5), (6, 6)]) queue.add_to_upload_queue(id=3, datapoints=[(7, 7), (8, 8)]) client.datapoints.insert_multiple.assert_not_called() queue.upload() client.datapoints.insert_multiple.assert_called_with([ { "id": 1, "datapoints": [(1, 1), (2, 2), (5, 5), (6, 6)] }, { "id": 2, "datapoints": [(3, 3), (4, 4)] }, { "id": 3, "datapoints": [(7, 7), (8, 8)] }, ])
def __init__(self, cdf_client: CogniteClient, metrics: Metrics, config: IotHubConfig): self.cdf_client = cdf_client self.metrics = metrics self.logger = logging.getLogger(__name__) self.asset_id = None self.time_series_ensured = set() self.queue = TimeSeriesUploadQueue( cdf_client, max_queue_size=config.extractor.upload_queue_size, max_upload_interval=config.extractor.upload_interval, post_upload_function=self.upload_callback, )
def test_ts_uploader2(self, MockCogniteClient): client: CogniteClient = MockCogniteClient() post_upload_test = {"value": False} def post(x): post_upload_test["value"] = True queue = TimeSeriesUploadQueue(client, max_upload_interval=2, post_upload_function=post) queue.start() queue.add_to_upload_queue(id=1, datapoints=[(1, 1), (2, 2)]) queue.add_to_upload_queue(id=2, datapoints=[(3, 3), (4, 4)]) queue.add_to_upload_queue(id=1, datapoints=[(5, 5), (6, 6)]) queue.add_to_upload_queue(id=3, datapoints=[(7, 7), (8, 8)]) time.sleep(2.1) client.datapoints.insert_multiple.assert_called_with([ { "id": 1, "datapoints": [(1, 1), (2, 2), (5, 5), (6, 6)] }, { "id": 2, "datapoints": [(3, 3), (4, 4)] }, { "id": 3, "datapoints": [(7, 7), (8, 8)] }, ]) self.assertTrue(post_upload_test["value"]) queue.stop()
def test_time_series_upload_queue1(self): created = self.client.time_series.create([ TimeSeries(external_id=self.time_series1), TimeSeries(external_id=self.time_series2, is_string=True) ]) last_point = {"timestamp": 0} def store_latest(points): last_point["timestamp"] = max( last_point["timestamp"], *[ts["datapoints"][-1][0] for ts in points]) queue = TimeSeriesUploadQueue(cdf_client=self.client, post_upload_function=store_latest, max_upload_interval=1) queue.start() # Create some synthetic data now = int(datetime.now(tz=timezone.utc).timestamp() * 1000) points1_1 = [(now + i * 107, random.randint(0, 10)) for i in range(10)] points1_2 = [(now + i * 107, random.randint(0, 10)) for i in range(10, 100)] points2 = [(now + i * 93, chr(97 + i)) for i in range(26)] queue.add_to_upload_queue(external_id=self.time_series1, datapoints=points1_1) queue.add_to_upload_queue(external_id=self.time_series1, datapoints=points1_2) queue.add_to_upload_queue(id=created[1].id, datapoints=points2) time.sleep(30) recv_points1 = self.client.datapoints.retrieve( external_id=self.time_series1, start="1w-ago", end="now", limit=None) recv_points2 = self.client.datapoints.retrieve( external_id=self.time_series2, start="1w-ago", end="now", limit=None) self.assertListEqual([int(p) for p in recv_points1.value], [p[1] for p in points1_1 + points1_2]) self.assertListEqual(recv_points2.value, [p[1] for p in points2]) self.assertEqual(last_point["timestamp"], points1_2[-1][0]) queue.stop()
def test_upload_queue_integration(self, MockCogniteClient): state_store = NoStateStore() upload_queue = TimeSeriesUploadQueue( cdf_client=MockCogniteClient(), post_upload_function=state_store.post_upload_handler() ) upload_queue.add_to_upload_queue(external_id="testId", datapoints=[(1, 1), (4, 4)]) upload_queue.upload() self.assertTupleEqual(state_store.get_state("testId"), (1, 4)) upload_queue.add_to_upload_queue(external_id="testId", datapoints=[(2, 2), (3, 3)]) upload_queue.upload() self.assertTupleEqual(state_store.get_state("testId"), (1, 4)) upload_queue.add_to_upload_queue(external_id="testId", datapoints=[(5, 5)]) upload_queue.upload() self.assertTupleEqual(state_store.get_state("testId"), (1, 5)) upload_queue.add_to_upload_queue(external_id="testId", datapoints=[(0, 0)]) upload_queue.upload() self.assertTupleEqual(state_store.get_state("testId"), (0, 5))
# Reroute ctrl-C to trigger the stopping condition instead of exiting uncleanly def sigint_handler(sig, frame): print() # ensure newline before log logger.warning("Interrupt signal received, stopping") stop.set() logger.info("Waiting for threads to complete") signal.signal(signal.SIGINT, sigint_handler) if config.metrics: config.metrics.start_pushers(cdf) with TimeSeriesUploadQueue( cdf, post_upload_function=state_store.post_upload_handler(), max_upload_interval=config.extractor.upload_interval, trigger_log_level="INFO", thread_name="CDF-Uploader", ) as upload_queue: if config.backfill: logger.info("Starting backfiller") backfiller = Backfiller(upload_queue, stop, frost, weather_stations, config, state_store) Thread(target=backfiller.run, name="Backfiller").start() # Fill in gap in data between end of last run and now logger.info("Starting frontfiller") frontfill(upload_queue, frost, weather_stations, config, state_store) # Start streaming live data logger.info("Starting streamer")
class IotHubExtractor: """ Main extractor class Args: queue: Upload queue to use metrics: Collection of metrics to use """ def __init__(self, cdf_client: CogniteClient, metrics: Metrics, config: IotHubConfig): self.cdf_client = cdf_client self.metrics = metrics self.logger = logging.getLogger(__name__) self.asset_id = None self.time_series_ensured = set() self.queue = TimeSeriesUploadQueue( cdf_client, max_queue_size=config.extractor.upload_queue_size, max_upload_interval=config.extractor.upload_interval, post_upload_function=self.upload_callback, ) def run(self, config: IotHubConfig) -> None: """ Process queue and upload to CDF Args: config: Configuration parameters """ CONNECTION_STR = f"Endpoint={config.azureiothub.eventhub_compatible_endpoint}/;SharedAccessKeyName=service;SharedAccessKey={config.azureiothub.iot_sas_key};EntityPath={config.azureiothub.eventhub_compatible_path}" client = EventHubConsumerClient.from_connection_string( conn_str=CONNECTION_STR, consumer_group="$default", # transport_type=TransportType.AmqpOverWebsocket, # uncomment it if you want to use web socket # http_proxy={ # uncomment if you want to use proxy # 'proxy_hostname': '127.0.0.1', # proxy hostname. # 'proxy_port': 3128, # proxy port. # 'username': '******', # 'password': '******' # } ) self.asset_id = cdf_client.assets.retrieve( external_id=config.azureiothub.iot_root).id try: with client: client.receive_batch( on_event_batch=self.on_event_batch, on_error=self.on_error, ) except KeyboardInterrupt: print("Receiving has stopped.") # Define callbacks to process events def on_event_batch(self, partition_context, events): for event in events: self.metrics.messages_consumed.inc() values = event.body_as_json() device = event.system_properties[ b"iothub-connection-device-id"].decode("utf-8") for key in values.keys(): ext_id = f"{device}_{key}" if not ext_id in self.time_series_ensured: ensure_time_series( self.cdf_client, [ TimeSeries( external_id=ext_id, name=f"{device} {key}", asset_id=self.asset_id, ) ], ) self.time_series_ensured.add(ext_id) self.metrics.iouthub_timeseries_ensured.inc() timestamp = event.system_properties[b"iothub-enqueuedtime"] self.queue.add_to_upload_queue( external_id=ext_id, datapoints=[(timestamp, values[key])], ) self.queue.upload() # upload to CDF partition_context.update_checkpoint() def on_error(self, partition_context, error): # Put your code here. partition_context can be None in the on_error callback. if partition_context: print( "An exception: {} occurred during receiving from Partition: {}." .format(partition_context.partition_id, error)) else: print("An exception: {} occurred during the load balance process.". format(error)) def upload_callback(self, uploaded_datapoints): count = 0 for entry in uploaded_datapoints: count += len(entry["datapoints"]) logging.getLogger(__name__).info(f"Uploaded {count} datapoints to CDF") self.metrics.datapoints_written.inc(count)