Пример #1
0
    def test_time_series_upload_queue2(self):
        self.client.time_series.create(
            TimeSeries(external_id=self.time_series1))

        queue = TimeSeriesUploadQueue(cdf_client=self.client,
                                      max_upload_interval=1)
        queue.start()

        # Create some synthetic data
        now = int(datetime.now(tz=timezone.utc).timestamp() * 1000)

        points1 = [(now + i * 107, random.randint(0, 10)) for i in range(10)]
        points2 = [(now + i * 107, random.randint(0, 10))
                   for i in range(10, 20)]

        queue.add_to_upload_queue(external_id=self.time_series1,
                                  datapoints=points1)
        queue.add_to_upload_queue(external_id="noSuchExternalId",
                                  datapoints=points2)

        time.sleep(20)

        recv_points1 = self.client.datapoints.retrieve(
            external_id=self.time_series1,
            start="1w-ago",
            end="now",
            limit=None)

        self.assertListEqual([int(p) for p in recv_points1.value],
                             [p[1] for p in points1])

        queue.stop()
Пример #2
0
    def test_time_series_upload_queue_create_missing(self):
        id1 = self.time_series1 + "_missing"
        id2 = self.time_series2 + "_missing"
        id3 = self.time_series3 + "_missing"

        self.client.time_series.delete(external_id=[id1, id2],
                                       ignore_unknown_ids=True)

        queue = TimeSeriesUploadQueue(cdf_client=self.client,
                                      create_missing=True)

        # Create some synthetic data
        now = int(datetime.now(tz=timezone.utc).timestamp() * 1000)
        points1 = [(now + i * 107, random.randint(0, 10)) for i in range(10)]
        points2 = [
            (now + i * 107,
             "".join([random.choice(string.printable) for j in range(16)]))
            for i in range(10, 20)
        ]
        points3 = [{
            "timestamp": now + i * 107,
            "value": random.randint(0, 10)
        } for i in range(10)]

        queue.add_to_upload_queue(external_id=id1, datapoints=points1)
        queue.add_to_upload_queue(external_id=id2, datapoints=points2)
        queue.add_to_upload_queue(external_id=id3, datapoints=points3)

        queue.upload()
        time.sleep(3)

        recv_points1 = self.client.datapoints.retrieve(external_id=id1,
                                                       start="1w-ago",
                                                       end="now",
                                                       limit=None)
        recv_points2 = self.client.datapoints.retrieve(external_id=id2,
                                                       start="1w-ago",
                                                       end="now",
                                                       limit=None)
        recv_points3 = self.client.datapoints.retrieve(external_id=id3,
                                                       start="1w-ago",
                                                       end="now",
                                                       limit=None)

        self.assertListEqual([int(p) for p in recv_points1.value],
                             [p[1] for p in points1])
        self.assertListEqual([p for p in recv_points2.value],
                             [p[1] for p in points2])
        self.assertListEqual([int(p) for p in recv_points3.value],
                             [p["value"] for p in points3])

        queue.stop()
        self.client.time_series.delete(external_id=[id1, id2, id3],
                                       ignore_unknown_ids=True)
    def test_ts_uploader1(self, MockCogniteClient):
        client: CogniteClient = MockCogniteClient()

        queue = TimeSeriesUploadQueue(client)

        queue.add_to_upload_queue(id=1, datapoints=[(1, 1), (2, 2)])
        queue.add_to_upload_queue(id=2, datapoints=[(3, 3), (4, 4)])
        queue.add_to_upload_queue(id=1, datapoints=[(5, 5), (6, 6)])
        queue.add_to_upload_queue(id=3, datapoints=[(7, 7), (8, 8)])

        client.datapoints.insert_multiple.assert_not_called()
        queue.upload()
        client.datapoints.insert_multiple.assert_called_with([
            {
                "id": 1,
                "datapoints": [(1, 1), (2, 2), (5, 5), (6, 6)]
            },
            {
                "id": 2,
                "datapoints": [(3, 3), (4, 4)]
            },
            {
                "id": 3,
                "datapoints": [(7, 7), (8, 8)]
            },
        ])
Пример #4
0
    def __init__(self, cdf_client: CogniteClient, metrics: Metrics,
                 config: IotHubConfig):
        self.cdf_client = cdf_client
        self.metrics = metrics
        self.logger = logging.getLogger(__name__)
        self.asset_id = None
        self.time_series_ensured = set()

        self.queue = TimeSeriesUploadQueue(
            cdf_client,
            max_queue_size=config.extractor.upload_queue_size,
            max_upload_interval=config.extractor.upload_interval,
            post_upload_function=self.upload_callback,
        )
    def test_ts_uploader2(self, MockCogniteClient):
        client: CogniteClient = MockCogniteClient()

        post_upload_test = {"value": False}

        def post(x):
            post_upload_test["value"] = True

        queue = TimeSeriesUploadQueue(client,
                                      max_upload_interval=2,
                                      post_upload_function=post)
        queue.start()

        queue.add_to_upload_queue(id=1, datapoints=[(1, 1), (2, 2)])
        queue.add_to_upload_queue(id=2, datapoints=[(3, 3), (4, 4)])
        queue.add_to_upload_queue(id=1, datapoints=[(5, 5), (6, 6)])
        queue.add_to_upload_queue(id=3, datapoints=[(7, 7), (8, 8)])

        time.sleep(2.1)

        client.datapoints.insert_multiple.assert_called_with([
            {
                "id": 1,
                "datapoints": [(1, 1), (2, 2), (5, 5), (6, 6)]
            },
            {
                "id": 2,
                "datapoints": [(3, 3), (4, 4)]
            },
            {
                "id": 3,
                "datapoints": [(7, 7), (8, 8)]
            },
        ])
        self.assertTrue(post_upload_test["value"])

        queue.stop()
Пример #6
0
    def test_time_series_upload_queue1(self):
        created = self.client.time_series.create([
            TimeSeries(external_id=self.time_series1),
            TimeSeries(external_id=self.time_series2, is_string=True)
        ])

        last_point = {"timestamp": 0}

        def store_latest(points):
            last_point["timestamp"] = max(
                last_point["timestamp"],
                *[ts["datapoints"][-1][0] for ts in points])

        queue = TimeSeriesUploadQueue(cdf_client=self.client,
                                      post_upload_function=store_latest,
                                      max_upload_interval=1)
        queue.start()

        # Create some synthetic data
        now = int(datetime.now(tz=timezone.utc).timestamp() * 1000)

        points1_1 = [(now + i * 107, random.randint(0, 10)) for i in range(10)]
        points1_2 = [(now + i * 107, random.randint(0, 10))
                     for i in range(10, 100)]
        points2 = [(now + i * 93, chr(97 + i)) for i in range(26)]

        queue.add_to_upload_queue(external_id=self.time_series1,
                                  datapoints=points1_1)
        queue.add_to_upload_queue(external_id=self.time_series1,
                                  datapoints=points1_2)
        queue.add_to_upload_queue(id=created[1].id, datapoints=points2)

        time.sleep(30)

        recv_points1 = self.client.datapoints.retrieve(
            external_id=self.time_series1,
            start="1w-ago",
            end="now",
            limit=None)
        recv_points2 = self.client.datapoints.retrieve(
            external_id=self.time_series2,
            start="1w-ago",
            end="now",
            limit=None)

        self.assertListEqual([int(p) for p in recv_points1.value],
                             [p[1] for p in points1_1 + points1_2])
        self.assertListEqual(recv_points2.value, [p[1] for p in points2])
        self.assertEqual(last_point["timestamp"], points1_2[-1][0])

        queue.stop()
Пример #7
0
    def test_upload_queue_integration(self, MockCogniteClient):
        state_store = NoStateStore()

        upload_queue = TimeSeriesUploadQueue(
            cdf_client=MockCogniteClient(), post_upload_function=state_store.post_upload_handler()
        )

        upload_queue.add_to_upload_queue(external_id="testId", datapoints=[(1, 1), (4, 4)])
        upload_queue.upload()

        self.assertTupleEqual(state_store.get_state("testId"), (1, 4))

        upload_queue.add_to_upload_queue(external_id="testId", datapoints=[(2, 2), (3, 3)])
        upload_queue.upload()

        self.assertTupleEqual(state_store.get_state("testId"), (1, 4))

        upload_queue.add_to_upload_queue(external_id="testId", datapoints=[(5, 5)])
        upload_queue.upload()

        self.assertTupleEqual(state_store.get_state("testId"), (1, 5))

        upload_queue.add_to_upload_queue(external_id="testId", datapoints=[(0, 0)])
        upload_queue.upload()

        self.assertTupleEqual(state_store.get_state("testId"), (0, 5))
Пример #8
0
    # Reroute ctrl-C to trigger the stopping condition instead of exiting uncleanly
    def sigint_handler(sig, frame):
        print()  # ensure newline before log
        logger.warning("Interrupt signal received, stopping")
        stop.set()
        logger.info("Waiting for threads to complete")

    signal.signal(signal.SIGINT, sigint_handler)

    if config.metrics:
        config.metrics.start_pushers(cdf)

    with TimeSeriesUploadQueue(
            cdf,
            post_upload_function=state_store.post_upload_handler(),
            max_upload_interval=config.extractor.upload_interval,
            trigger_log_level="INFO",
            thread_name="CDF-Uploader",
    ) as upload_queue:
        if config.backfill:
            logger.info("Starting backfiller")
            backfiller = Backfiller(upload_queue, stop, frost,
                                    weather_stations, config, state_store)
            Thread(target=backfiller.run, name="Backfiller").start()

        # Fill in gap in data between end of last run and now
        logger.info("Starting frontfiller")
        frontfill(upload_queue, frost, weather_stations, config, state_store)

        # Start streaming live data
        logger.info("Starting streamer")
Пример #9
0
class IotHubExtractor:
    """
    Main extractor class

    Args:
        queue: Upload queue to use
        metrics: Collection of metrics to use
    """
    def __init__(self, cdf_client: CogniteClient, metrics: Metrics,
                 config: IotHubConfig):
        self.cdf_client = cdf_client
        self.metrics = metrics
        self.logger = logging.getLogger(__name__)
        self.asset_id = None
        self.time_series_ensured = set()

        self.queue = TimeSeriesUploadQueue(
            cdf_client,
            max_queue_size=config.extractor.upload_queue_size,
            max_upload_interval=config.extractor.upload_interval,
            post_upload_function=self.upload_callback,
        )

    def run(self, config: IotHubConfig) -> None:
        """
        Process queue and upload to CDF

        Args:
            config: Configuration parameters
        """

        CONNECTION_STR = f"Endpoint={config.azureiothub.eventhub_compatible_endpoint}/;SharedAccessKeyName=service;SharedAccessKey={config.azureiothub.iot_sas_key};EntityPath={config.azureiothub.eventhub_compatible_path}"

        client = EventHubConsumerClient.from_connection_string(
            conn_str=CONNECTION_STR,
            consumer_group="$default",
            # transport_type=TransportType.AmqpOverWebsocket,  # uncomment it if you want to use web socket
            # http_proxy={  # uncomment if you want to use proxy
            #     'proxy_hostname': '127.0.0.1',  # proxy hostname.
            #     'proxy_port': 3128,  # proxy port.
            #     'username': '******',
            #     'password': '******'
            # }
        )

        self.asset_id = cdf_client.assets.retrieve(
            external_id=config.azureiothub.iot_root).id

        try:
            with client:
                client.receive_batch(
                    on_event_batch=self.on_event_batch,
                    on_error=self.on_error,
                )
        except KeyboardInterrupt:
            print("Receiving has stopped.")

    # Define callbacks to process events
    def on_event_batch(self, partition_context, events):
        for event in events:
            self.metrics.messages_consumed.inc()
            values = event.body_as_json()
            device = event.system_properties[
                b"iothub-connection-device-id"].decode("utf-8")
            for key in values.keys():
                ext_id = f"{device}_{key}"

                if not ext_id in self.time_series_ensured:
                    ensure_time_series(
                        self.cdf_client,
                        [
                            TimeSeries(
                                external_id=ext_id,
                                name=f"{device} {key}",
                                asset_id=self.asset_id,
                            )
                        ],
                    )
                    self.time_series_ensured.add(ext_id)
                    self.metrics.iouthub_timeseries_ensured.inc()

                timestamp = event.system_properties[b"iothub-enqueuedtime"]

                self.queue.add_to_upload_queue(
                    external_id=ext_id,
                    datapoints=[(timestamp, values[key])],
                )

        self.queue.upload()  # upload to CDF
        partition_context.update_checkpoint()

    def on_error(self, partition_context, error):
        # Put your code here. partition_context can be None in the on_error callback.
        if partition_context:
            print(
                "An exception: {} occurred during receiving from Partition: {}."
                .format(partition_context.partition_id, error))
        else:
            print("An exception: {} occurred during the load balance process.".
                  format(error))

    def upload_callback(self, uploaded_datapoints):

        count = 0
        for entry in uploaded_datapoints:
            count += len(entry["datapoints"])

        logging.getLogger(__name__).info(f"Uploaded {count} datapoints to CDF")
        self.metrics.datapoints_written.inc(count)