Exemplo n.º 1
0
 def test_updated_date_lte_filter_datetime(self):
     """Ensure True is returned when updated_date is less than or equal to
     the filtering datetime
     """
     self.assertTrue(
         filter_record(
             {"updated_date": "2019-01-01"},
             MagicMock(filter_datetime=datetime(2020, 1, 1, tzinfo=UTC)),
         ))
     self.assertTrue(
         filter_record(
             {"updated_date": "2020-01-01"},
             MagicMock(filter_datetime=datetime(2020, 1, 1, tzinfo=UTC)),
         ))
Exemplo n.º 2
0
    def test_update_date_none(self):
        """Ensure False is returned if updated_date is not found"""

        self.assertFalse(
            filter_record(
                {},
                MagicMock(filter_datetime=datetime(2020, 1, 1, tzinfo=UTC))))
Exemplo n.º 3
0
 def test_updated_date_gt_filter_datetime(self):
     self.assertFalse(
         filter_record(
             {"updated_date": "2020-01-02"},
             MagicMock(filter_datetime=datetime(2020, 1, 1, tzinfo=UTC)),
         )
     )
Exemplo n.º 4
0
def process_stream(
    stream_def: Union[Stream, ResponseSubstream, EndpointSubstream],
    stream_version: Optional[int],
    state: Dict[str, Any],
    json_message: Dict[str, Any],
    filter_datetime: "datetime",
) -> None:
    LOGGER.info("Message: %s", json.dumps(json_message))
    stream_id = pluralize(underscore(json_message["object"]))

    record = json_message["record"]
    # Filter based off of the message timestamp or
    # the replication key?
    if filter_record(
            record,
            DataContext(tap_stream_id=stream_id,
                        stream=stream_def,
                        filter_datetime=filter_datetime),
    ):
        return None

    state = handle_record(stream_id, record, stream_def, stream_version, state)

    # Make sure stream is selected for record to print
    if stream_def.is_selected:
        if isinstance(stream_def, Stream):
            for substream in stream_def.substreams:
                # Can't handle EndpointSubstream's like this -
                # I'm assuming the producer is pushing the data
                # in a similar way to the API?
                if not substream.is_selected:
                    continue
                if not isinstance(substream, ResponseSubstream):
                    continue

                # .sync_sub_records performs transformations, so not necessary
                # to invoke ourselves here
                for tap_substream_id, sub_record in stream_def.sync_sub_records(
                        substream, record, filter_datetime):
                    state = handle_record(tap_substream_id, sub_record,
                                          stream_def, stream_version, state)

            with stream_def.transformer_class() as transformer:
                for record in transformer.transform(
                        record,
                        stream_def.schema_dict,
                        context=DataContext(
                            stream=stream_def,
                            filter_datetime=filter_datetime,
                            tap_stream_id=stream_id,
                        ),
                        metadata=stream_def.mapped_metadata,
                ):
                    state = handle_record(stream_id, record, stream_def,
                                          stream_version, state)

        elif isinstance(stream_def, EndpointSubstream):
            # This assumes the data being consumed is akin to
            # the API. As in - /customer/<id>/notes is separated
            # into its own individual message
            context = DataContext(
                tap_stream_id=stream_def.tap_stream_id,
                stream=stream_def,
                filter_datetime=filter_datetime,
            )

            with stream_def.transformer_class() as transformer:
                records = transformer.transform(
                    record,
                    stream_def.schema_dict,
                    context=context,
                    metadata=stream_def.mapped_metadata,
                )

                for record in records:
                    state = handle_record(stream_id, record, stream_def,
                                          stream_version, state)

        write_state(state)

    return None