def test_returns_list_of_overflow_dates_depending_on_cutoff_when_start_and_end_datetime_are_none( cutoff_days, expected_overflow_dates ): reporting_window = ReportingWindow( start_datetime=None, end_datetime=None, conversation_cutoff=timedelta(days=cutoff_days) ) actual_dates = reporting_window.get_overflow_dates() assert actual_dates == expected_overflow_dates
def test_returns_empty_list_given_cutoff_of_0(): start_datetime = datetime(year=2022, month=1, day=12, hour=0, minute=0, second=0, tzinfo=UTC) end_datetime = datetime(year=2022, month=1, day=13, hour=0, minute=0, second=0, tzinfo=UTC) reporting_window = ReportingWindow( start_datetime, end_datetime, conversation_cutoff=timedelta(days=0) ) expected_overflow_dates: List = [] actual = reporting_window.get_overflow_dates() assert actual == expected_overflow_dates
def spine_messages(self, reporting_window: ReportingWindow) -> List[str]: dates = reporting_window.get_dates( ) + reporting_window.get_overflow_dates() return [ self._s3_path( self._gp2gp_spine_bucket, self._SPINE_MESSAGES_VERSION, f"{add_leading_zero(date.year)}", f"{add_leading_zero(date.month)}", f"{add_leading_zero(date.day)}", self._spine_message_filename(date), ) for date in dates ]
def test_get_overflow_dates_returns_list_of_datetimes_within_cutoff_period(): start_datetime = datetime(year=2019, month=12, day=30, hour=0, minute=0, second=0, tzinfo=UTC) end_datetime = datetime(year=2019, month=12, day=31, hour=0, minute=0, second=0, tzinfo=UTC) conversation_cutoff = timedelta(days=3) reporting_window = ReportingWindow(start_datetime, end_datetime, conversation_cutoff) expected_overflow_dates = [ datetime(year=2019, month=12, day=31, tzinfo=UTC), datetime(year=2020, month=1, day=1, tzinfo=UTC), datetime(year=2020, month=1, day=2, tzinfo=UTC), ] actual = reporting_window.get_overflow_dates() assert actual == expected_overflow_dates
class TransferClassifier: def __init__(self, config: TransferClassifierConfig): s3 = boto3.resource("s3", endpoint_url=config.s3_endpoint_url) s3_manager = S3DataManager(s3) self._reporting_window = ReportingWindow(config.start_datetime, config.end_datetime, config.conversation_cutoff) self._config = config self._uris = TransferClassifierS3UriResolver( gp2gp_spine_bucket=config.input_spine_data_bucket, transfers_bucket=config.output_transfer_data_bucket, ods_metadata_bucket=config.input_ods_metadata_bucket, ) self._io = TransferClassifierIO(s3_manager) def _read_spine_messages(self) -> Iterator[Message]: input_paths = self._uris.spine_messages(self._reporting_window) return self._io.read_spine_messages(input_paths) def _read_ods_metadata(self) -> OrganisationMetadataMonthly: input_paths = self._uris.ods_metadata(self._reporting_window) return self._io.read_ods_metadata_files(input_paths) def _write_transfers( self, transfers: Iterator[Transfer], daily_start_datetime: datetime, cutoff: timedelta, metadata: Dict[str, str], ): output_path = self._uris.gp2gp_transfers( daily_start_datetime=daily_start_datetime, cutoff=cutoff) self._io.write_transfers(transfers, output_path, metadata) def _construct_json_log_date_range_info(self) -> dict: reporting_window_dates = self._reporting_window.get_dates() reporting_window_overflow_dates = self._reporting_window.get_overflow_dates( ) return { "config_start_datetime": convert_to_datetime_string(self._config.start_datetime), "config_end_datetime": convert_to_datetime_string(self._config.end_datetime), "conversation_cutoff": str(self._config.conversation_cutoff), "reporting_window_dates": convert_to_datetimes_string(reporting_window_dates), "reporting_window_overflow_dates": convert_to_datetimes_string(reporting_window_overflow_dates), } def run(self): transfer_observability_probe = TransferObservabilityProbe( logger=module_logger) log_date_range_info = self._construct_json_log_date_range_info() logger.info( "Attempting to classify conversations for a date range", extra={ "event": "ATTEMPTING_CLASSIFY_CONVERSATIONS_FOR_A_DATE_RANGE", **log_date_range_info, }, ) spine_messages = self._read_spine_messages() ods_metadata_monthly = self._read_ods_metadata() transfer_service = TransferService( message_stream=spine_messages, cutoff=self._config.conversation_cutoff, observability_probe=transfer_observability_probe, ) conversations = transfer_service.group_into_conversations() gp2gp_conversations = transfer_service.parse_conversations_into_gp2gp_conversations( conversations) for daily_start_datetime in self._reporting_window.get_dates(): metadata = { "cutoff-days": str(self._config.conversation_cutoff.days), "build-tag": self._config.build_tag, "start-datetime": convert_to_datetime_string(daily_start_datetime), "end-datetime": convert_to_datetime_string(daily_start_datetime + timedelta(days=1)), "ods-metadata-month": f"{daily_start_datetime.year}-{daily_start_datetime.month}", } conversations_started_in_reporting_window = filter_conversations_by_day( gp2gp_conversations, daily_start_datetime) organisation_lookup = ods_metadata_monthly.get_lookup( (daily_start_datetime.year, daily_start_datetime.month)) transfers = transfer_service.convert_to_transfers( conversations_started_in_reporting_window, organisation_lookup=organisation_lookup) self._write_transfers( transfers=transfers, daily_start_datetime=daily_start_datetime, cutoff=self._config.conversation_cutoff, metadata=metadata, ) logger.info( "Successfully classified conversations for a date range", extra={ "event": "CLASSIFIED_CONVERSATIONS_FOR_A_DATE_RANGE", **log_date_range_info, }, )