def test_determine_output_date_order_style_datetime(self): unhandled_hints = set() # Records Mover only supports Postgres in ISO format at this # point (YYYY-MM-DD) - see comments in types.py and in # date_output_style.py for more detail. expected_failures = { 'MM-DD-YYYY', 'DD-MM-YYYY', 'MM/DD/YY', 'DD/MM/YY', 'DD-MM-YY', } for dateformat in DATE_CASES: records_format = DelimitedRecordsFormat( hints={ 'dateformat': dateformat, 'timeonlyformat': 'HH24:MI:SS', 'datetimeformattz': f'{dateformat} HH:MI:SSOF', 'datetimeformat': f'{dateformat} HH24:MI:SS' }) fail_if_cant_handle_hint = True validated_hints =\ records_format.validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint) try: out = determine_date_output_style(unhandled_hints, validated_hints, fail_if_cant_handle_hint) except NotImplementedError: if dateformat in expected_failures: pass else: raise self.assertEqual(out, ('ISO', None))
def test_vertica(self): records_format = DelimitedRecordsFormat(variant='vertica', hints={'compression': None}) records_format.hints['escape'] = '\\' unhandled_hints = set(records_format.hints) processing_instructions = ProcessingInstructions() load_plan = RecordsLoadPlan(processing_instructions, records_format) with self.assertRaises(NotImplementedError): postgres_copy_from_options(unhandled_hints, load_plan)
def test_bluelabs_with_doublequoting(self): records_format = DelimitedRecordsFormat(variant='bluelabs', hints={'compression': None}) records_format.hints['doublequote'] = '"' unhandled_hints = set(records_format.hints) processing_instructions = ProcessingInstructions() load_plan = RecordsLoadPlan(processing_instructions, records_format) with self.assertRaises(NotImplementedError): postgres_copy_from_options(unhandled_hints, load_plan)
def test_new_compression_hint(self): records_format = DelimitedRecordsFormat(variant='bluelabs', hints={'compression': None}) records_format.hints['encoding'] = 'NEWNEWENCODING' unhandled_hints = set(records_format.hints) processing_instructions = ProcessingInstructions() load_plan = RecordsLoadPlan(processing_instructions, records_format) with self.assertRaises(NotImplementedError): postgres_copy_from_options(unhandled_hints, load_plan)
def test_determine_output_date_order_style_datetimeformat(self): unhandled_hints = set() # Records Mover only supports Postgres in ISO format at this # point (YYYY-MM-DD) - see comments in types.py and in # date_output_style.py for more detail. expected_failures = { # no timezone, even though otherwise in ISO format 'YYYY-MM-DD HH:MI:SS', # not in ISO format 'MM/DD/YY HH24:MI', # not in ISO format 'YYYY-MM-DD HH12:MI AM', } natural_dateformat = { 'YYYY-MM-DD HH:MI:SS': 'YYYY-MM-DD', 'MM/DD/YY HH24:MI': 'MM/DD/YY', 'YYYY-MM-DD HH24:MI:SS': 'YYYY-MM-DD', 'YYYY-MM-DD HH12:MI AM': 'YYYY-MM-DD', } natural_timeonlyformat = { 'YYYY-MM-DD HH:MI:SS': 'HH:MI:SS', 'MM/DD/YY HH24:MI': 'HH24:MI', 'YYYY-MM-DD HH24:MI:SS': 'HH24:MI:SS', 'YYYY-MM-DD HH12:MI AM': 'HH12:MI AM', } natural_datetimeformattz = { 'YYYY-MM-DD HH:MI:SS': 'YYYY-MM-DD HH:MI:SSOF', 'MM/DD/YY HH24:MI': 'MM/DD/YY HH24:MIOF', 'YYYY-MM-DD HH24:MI:SS': 'YYYY-MM-DD HH24:MI:SSOF', 'YYYY-MM-DD HH12:MI AM': 'YYYY-MM-DD HH12:MI AM' } for datetimeformat in DATETIME_CASES: records_format = DelimitedRecordsFormat( hints={ 'dateformat': natural_dateformat[datetimeformat], 'timeonlyformat': natural_timeonlyformat[datetimeformat], 'datetimeformattz': natural_datetimeformattz[datetimeformat], 'datetimeformat': datetimeformat, }) fail_if_cant_handle_hint = True validated_hints =\ records_format.validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint) try: out = determine_date_output_style(unhandled_hints, validated_hints, fail_if_cant_handle_hint) except NotImplementedError: if datetimeformat in expected_failures: pass else: raise self.assertEqual(out, ('ISO', None))
def test_postgres_copy_options_csv_no_quoting(self): records_format = DelimitedRecordsFormat(variant='csv', hints={ 'quoting': None, 'compression': None, }) unhandled_hints = set(records_format.hints) fail_if_cant_handle_hint = True with self.assertRaises(NotImplementedError): postgres_copy_options_csv( unhandled_hints, records_format.validate(fail_if_cant_handle_hint=True), fail_if_cant_handle_hint, CopyOptionsMode.UNLOADING)
def test_determine_output_date_order_style_iso(self): unhandled_hints = set() records_format = DelimitedRecordsFormat(hints={ 'dateformat': 'YYYY-MM-DD', 'timeonlyformat': 'HH24:MI:SS', 'datetimeformattz': 'YYYY-MM-DD HH:MI:SSOF', 'datetimeformat': 'YYYY-MM-DD HH24:MI:SS' }) fail_if_cant_handle_hint = True validated_hints = records_format.validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint) out = determine_date_output_style(unhandled_hints, validated_hints, fail_if_cant_handle_hint) self.assertEqual(out, ('ISO', None))
def test_dateformat(self) -> None: class DateFormatExpectations(TypedDict): # Use the datetimeformat/datetimeformattz which is # compatible, as pandas doesn't let you configure those # separately dayfirst: bool testcases: Dict[HintDateFormat, DateFormatExpectations] = { 'YYYY-MM-DD': { 'dayfirst': False, }, 'MM-DD-YYYY': { 'dayfirst': False, }, 'DD-MM-YYYY': { 'dayfirst': True, }, 'MM/DD/YY': { 'dayfirst': False, }, 'DD/MM/YY': { 'dayfirst': True, }, 'DD-MM-YY': { 'dayfirst': True, }, } for dateformat in DATE_CASES: records_format = DelimitedRecordsFormat(hints={ 'dateformat': dateformat, 'datetimeformat': f"{dateformat} HH:MI:SS", 'datetimeformattz': f"{dateformat} HH:MI:SSOF", 'compression': None, }) records_schema = RecordsSchema.from_data({ 'schema': 'bltypes/v1', 'fields': { 'first': { 'type': 'date' } }, }) unhandled_hints = set(records_format.hints) processing_instructions = ProcessingInstructions() expectations = testcases[dateformat] try: options = pandas_read_csv_options(records_format, records_schema, unhandled_hints, processing_instructions) except NotImplementedError: self.fail(f'Could not handle combination for {dateformat}') self.assertTrue(all(item in options.items() for item in expectations.items())) fileobj = io.StringIO(create_sample(dateformat)) df = pandas.read_csv(filepath_or_buffer=fileobj, **options) timestamp = df['untitled_0'][0] self.assertEqual(timestamp.year, SAMPLE_YEAR) self.assertEqual(timestamp.month, SAMPLE_MONTH) self.assertEqual(timestamp.day, SAMPLE_DAY)
def test_unload(self, mock_text, mock_UnloadFromSelect): mock_text.side_effect = fake_text self.mock_records_unload_plan.processing_instructions.fail_if_dont_understand = True self.mock_records_unload_plan.processing_instructions.fail_if_cant_handle_hint = True self.mock_records_unload_plan.records_format =\ DelimitedRecordsFormat(variant='bluelabs', hints=bluelabs_format_hints) self.mock_directory.scheme = 's3' self.mock_db_engine.execute.return_value.scalar.return_value = 456 rows = self.redshift_db_driver.unloader().\ unload(schema='myschema', table='mytable', unload_plan=self.mock_records_unload_plan, directory=self.mock_directory) expected_args = { 'access_key_id': 'fake_aws_id', 'add_quotes': False, 'delimiter': ',', 'escape': True, 'gzip': True, 'manifest': True, 'secret_access_key': 'fake_aws_secret', 'select': ('SELECT * FROM myschema.mytable',), 'session_token': 'fake_aws_token', 'unload_location': 's3://mybucket/myparent/mychild/' } mock_UnloadFromSelect.assert_called_with(**expected_args) self.assertEqual(456, rows)
def test_unload_to_non_s3(self, mock_text, mock_UnloadFromSelect): mock_text.side_effect = fake_text self.mock_records_unload_plan.processing_instructions.fail_if_dont_understand = True self.mock_records_unload_plan.processing_instructions.fail_if_cant_handle_hint = True self.mock_records_unload_plan.records_format =\ DelimitedRecordsFormat(variant='bluelabs', hints=bluelabs_format_hints) self.mock_directory.scheme = 'mumble' self.mock_db_engine.execute.return_value.scalar.return_value = 456 rows = self.redshift_db_driver.unloader().\ unload(schema='myschema', table='mytable', unload_plan=self.mock_records_unload_plan, directory=self.mock_directory) mock_aws_creds = self.mock_s3_temp_base_loc.temporary_directory().__enter__().aws_creds() mock_access_key_id = mock_aws_creds.access_key mock_secret_key = mock_aws_creds.secret_key mock_token = mock_aws_creds.token expected_args = { 'access_key_id': mock_access_key_id, 'add_quotes': False, 'delimiter': ',', 'escape': True, 'gzip': True, 'manifest': True, 'secret_access_key': mock_secret_key, 'select': ('SELECT * FROM myschema.mytable',), 'session_token': mock_token, 'unload_location': self.mock_s3_temp_base_loc.temporary_directory().__enter__().url } mock_UnloadFromSelect.assert_called_with(**expected_args) self.assertEqual(456, rows)
def test_vertica_export_options_datetimeformattz(self): # Vertica doesn't currently allow any configurability on # output datetimeformattz. Check again before adding any test # cases here! should_raise = { 'YYYY-MM-DD HH:MI:SS': True, 'YYYY-MM-DD HH24:MI:SSOF': False, 'MM/DD/YY HH24:MI': True, } for datetimeformattz in DATETIMETZ_CASES: records_format = DelimitedRecordsFormat( variant='vertica', hints={'datetimeformattz': datetimeformattz}) unhandled_hints = set(records_format.hints) processing_instructions = ProcessingInstructions( max_failure_rows=123) load_plan = RecordsLoadPlan( processing_instructions=processing_instructions, records_format=records_format) # Records Mover passes no particular option for dateformat on # export in Vertica; it always uses YYYY-MM-DD as a result. try: vertica_export_options(unhandled_hints, load_plan) except NotImplementedError: if should_raise[datetimeformattz]: pass else: self.fail()
def test_vertica_export_options_timeonlyformat(self): # Vertica doesn't currently allow any configurability on # output timeonlyformat. Check again before adding any test # cases here! should_raise = { 'HH:MI:SS': False, 'HH24:MI:SS': False, 'HH24:MI': True, 'HH12:MI AM': True, } for timeonlyformat in TIMEONLY_CASES: records_format = DelimitedRecordsFormat(variant='vertica', hints={ 'timeonlyformat': timeonlyformat, }) unhandled_hints = set(records_format.hints) processing_instructions = ProcessingInstructions( max_failure_rows=123) load_plan = RecordsLoadPlan( processing_instructions=processing_instructions, records_format=records_format) # Records Mover passes no particular option for dateformat on # export in Vertica; it always uses YYYY-MM-DD as a result. try: vertica_export_options(unhandled_hints, load_plan) except NotImplementedError: if should_raise[timeonlyformat]: pass else: raise
def test_mysql_load_options_dateformat(self) -> None: expected_failures: Set[str] = { 'MM-DD-YYYY', 'DD-MM-YYYY', 'MM/DD/YY', 'DD/MM/YY', 'DD-MM-YY', } for dateformat in DATE_CASES: records_format = DelimitedRecordsFormat(variant='bluelabs', hints={ 'dateformat': dateformat, 'compression': None, }) unhandled_hints = set(records_format.hints.keys()) try: mysql_load_options(unhandled_hints, records_format, fail_if_cant_handle_hint=True) except NotImplementedError: if dateformat in expected_failures: continue else: raise self.assertNotIn(dateformat, expected_failures)
def test_redshift_unload_options_datetimeformattz(self): # Redshift offers no options and only unloads YYYY-MM-DD # HH:MI:SSOF, so we should reject everything else. Double # check with the docs just in case, though--maybe that's # changed! expected_failures = { 'YYYY-MM-DD HH:MI:SS', 'MM/DD/YY HH24:MI', } for datetimeformattz in DATETIMETZ_CASES: hints = { 'datetimeformattz': datetimeformattz, } records_format =\ DelimitedRecordsFormat(variant='bluelabs', hints=hints) unhandled_hints = set(records_format.hints.keys()) try: redshift_unload_options(unhandled_hints, records_format, fail_if_cant_handle_hint=True) except NotImplementedError: if datetimeformattz in expected_failures: continue else: raise self.assertNotIn(datetimeformattz, expected_failures)
def test_vertica_import_options_datetimeformat(self): # Vertica doesn't currently allow any configurability on # input datetimeformat. Check again before adding any test cases # here! should_raise = { 'YYYY-MM-DD HH:MI:SS': True, 'YYYY-MM-DD HH24:MI:SS': False, 'MM/DD/YY HH24:MI': True, 'YYYY-MM-DD HH12:MI AM': True, } for datetimeformat in DATETIME_CASES: records_format = DelimitedRecordsFormat(variant='vertica', hints={ 'datetimeformat': datetimeformat, }) unhandled_hints = set(records_format.hints) processing_instructions = ProcessingInstructions( max_failure_rows=123) load_plan = RecordsLoadPlan( processing_instructions=processing_instructions, records_format=records_format) try: vertica_import_options(unhandled_hints, load_plan) except NotImplementedError: if should_raise[datetimeformat]: pass else: self.fail()
def test_redshift_copy_options_dateformat(self): # The records spec's date/time formats are based on Redshift's # spec originally, so it's expected that everything here would # be accepted as-is, but please double-check with Redshift's # docs as new test cases are added accept_as_is = { 'YYYY-MM-DD': True, 'MM-DD-YYYY': True, 'DD-MM-YYYY': True, 'MM/DD/YY': True, 'DD/MM/YY': True, 'DD-MM-YY': True, } for dateformat in DATE_CASES: records_format =\ DelimitedRecordsFormat(variant='bluelabs', hints={ 'dateformat': dateformat }) unhandled_hints = set(records_format.hints.keys()) out = redshift_copy_options(unhandled_hints, records_format, fail_if_cant_handle_hint=True, fail_if_row_invalid=True, max_failure_rows=0) if accept_as_is[dateformat]: self.assertIs(out['date_format'], dateformat) else: self.fail('define what to expect here')
def test_redshift_copy_options_datetimeformattz(self): # Redshift's time_format doesn't support separate # configuration for datetimeformat vs datetimeformattz, but # the 'auto' flag seems to work with specific things (see # tests run in records_copy.py). # # Please verify new formats have a test run # and documented in records_copy.py before putting an entry in # here. expectations = { 'YYYY-MM-DD HH:MI:SSOF': 'auto', 'YYYY-MM-DD HH:MI:SS': 'YYYY-MM-DD HH:MI:SS', 'YYYY-MM-DD HH24:MI:SSOF': 'auto', 'MM/DD/YY HH24:MIOF': 'auto', 'MM/DD/YY HH24:MI': 'MM/DD/YY HH24:MI', } for datetimeformattz in DATETIMETZ_CASES: hints = { 'datetimeformattz': datetimeformattz, 'datetimeformat': datetimeformattz.replace('OF', '') } records_format =\ DelimitedRecordsFormat(variant='bluelabs', hints=hints) unhandled_hints = set(records_format.hints.keys()) out = redshift_copy_options(unhandled_hints, records_format, fail_if_cant_handle_hint=True, fail_if_row_invalid=True, max_failure_rows=0) self.assertEquals(out['time_format'], expectations[datetimeformattz])
def test_determine_date_order_style_(self): unhandled_hints = set() tests = [ ( # No ambiguity, can handle all { 'datetimeformattz': 'YYYY-MM-DD HH:MI:SSOF', 'datetimeformat': "YYYY-MM-DD HH12:MI AM", 'timeonlyformat': "HH12:MI AM", 'dateformat': "YYYY-MM-DD", }, None), ( # No ambiguity, can handle all { 'datetimeformattz': 'INVALID', 'datetimeformat': "YYYY-MM-DD HH12:MI AM", 'timeonlyformat': "HH12:MI AM", 'dateformat': "YYYY-MM-DD", }, NotImplementedError), ( # Can't parse MDY and DMY at the same time { 'datetimeformattz': 'MM/DD/YY HH24:MI', 'datetimeformat': "MM/DD/YY HH24:MI", 'timeonlyformat': "HH12:MI AM", 'dateformat': "DD-MM-YYYY", }, NotImplementedError), ( # Can't parse MDY and DMY at the same time { 'datetimeformattz': 'MM/DD/YY HH24:MI', 'datetimeformat': "other", 'timeonlyformat': "HH12:MI AM", 'dateformat': "MM-DD-YYYY", }, NotImplementedError), ] fail_if_cant_handle_hint = True for raw_hints, expected_result in tests: records_format = DelimitedRecordsFormat(hints=raw_hints) if expected_result == NotImplementedError: with self.assertRaises(NotImplementedError): validated_hints = records_format.\ validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint) determine_input_date_order_style(unhandled_hints, validated_hints, fail_if_cant_handle_hint) else: validated_hints = records_format.\ validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint) out = determine_input_date_order_style( unhandled_hints, validated_hints, fail_if_cant_handle_hint) self.assertEqual(out, expected_result)
def test_prep_df_for_csv_output_include_index(self): schema_data = { 'schema': "bltypes/v1", 'fields': { "date": { "type": "date", "index": 1, }, "time": { "type": "time", "index": 2, }, "timetz": { "type": "timetz", "index": 3, }, } } records_format = DelimitedRecordsFormat(variant='bluelabs') records_schema = RecordsSchema.from_data(schema_data) processing_instructions = ProcessingInstructions() # us_eastern = pytz.timezone('US/Eastern') data = { 'time': [ pd.Timestamp(year=1970, month=1, day=1, hour=12, minute=33, second=53, microsecond=1234) ], # timetz is not well supported in records mover yet. For # instance, specifying how it's turned into a CSV is not # currently part of the records spec: # # https://github.com/bluelabsio/records-mover/issues/76 # # In addition, Vertica suffers from a driver limitation: # # https://github.com/bluelabsio/records-mover/issues/77 # # 'timetz': [ # us_eastern.localize(pd.Timestamp(year=1970, month=1, day=1, # hour=12, minute=33, second=53, # microsecond=1234)), # ], } df = pd.DataFrame(data, index=[pd.Timestamp(year=1970, month=1, day=1)], columns=['time', 'timetz']) new_df = prep_df_for_csv_output(df=df, include_index=True, records_schema=records_schema, records_format=records_format, processing_instructions=processing_instructions) self.assertEqual(new_df.index[0], '1970-01-01') self.assertEqual(new_df['time'][0], '12:33:53') # self.assertEqual(new_df['timetz'][0], '12:33:53-05') self.assertIsNotNone(new_df)
def test_pandas_read_csv_options_bzip(self): records_format = DelimitedRecordsFormat(hints={'compression': 'BZIP'}) records_schema = RecordsSchema.from_data({'schema': 'bltypes/v1'}) unhandled_hints = set(records_format.hints) processing_instructions = ProcessingInstructions() expectations = {'compression': 'bz2'} out = pandas_read_csv_options(records_format, records_schema, unhandled_hints, processing_instructions) self.assertTrue( all(item in out.items() for item in expectations.items()))
def test_mysql_load_options_encoding_utf8bom_fallback(self) -> None: records_format = DelimitedRecordsFormat(variant='bluelabs', hints={ 'encoding': 'UTF8BOM', 'compression': None, }) unhandled_hints = set(records_format.hints.keys()) out = mysql_load_options(unhandled_hints, records_format, fail_if_cant_handle_hint=False) self.assertEqual(out.character_set, 'utf8')
def test_csv_quote_all(self): records_format = DelimitedRecordsFormat(variant='csv', hints={ 'compression': None, 'quoting': 'all' }) unhandled_hints = set(records_format.hints) processing_instructions = ProcessingInstructions() load_plan = RecordsLoadPlan(processing_instructions, records_format) with self.assertRaises(NotImplementedError): postgres_copy_from_options(unhandled_hints, load_plan)
def test_mysql_load_options_encoding_utf8bom_fail(self) -> None: records_format = DelimitedRecordsFormat(variant='bluelabs', hints={ 'encoding': 'UTF8BOM', 'compression': None, }) unhandled_hints = set(records_format.hints.keys()) with self.assertRaises(NotImplementedError) as r: mysql_load_options(unhandled_hints, records_format, fail_if_cant_handle_hint=True) self.assertIn('UTF8BOM', str(r.exception))
def test_mysql_load_options_nonnumeric_quoting(self) -> None: records_format = DelimitedRecordsFormat(variant='bluelabs', hints={ 'quoting': 'nonnumeric', 'doublequote': True, 'compression': None, }) unhandled_hints = set(records_format.hints.keys()) out = mysql_load_options(unhandled_hints, records_format, fail_if_cant_handle_hint=True) self.assertEqual(out.fields_optionally_enclosed_by, '"')
def test_redshift_copy_options_encodings(self): tests = { 'UTF16': Encoding.utf16, 'UTF16LE': Encoding.utf16le, 'UTF16BE': Encoding.utf16be } for hint_spelling, redshift_sqlalchemy_spelling in tests.items(): records_format =\ DelimitedRecordsFormat(variant='bluelabs', hints={ 'encoding': hint_spelling }) unhandled_hints = set(records_format.hints.keys()) out = redshift_copy_options( unhandled_hints, records_format.validate(fail_if_cant_handle_hint=True), fail_if_cant_handle_hint=True, fail_if_row_invalid=True, max_failure_rows=0) self.assertIs(out['encoding'], redshift_sqlalchemy_spelling)
def unload(self, variant, directory, hints={}) -> None: records_format = DelimitedRecordsFormat(variant=variant, hints=hints) directory_url = pathlib.Path(directory).resolve().as_uri() + '/' targets = self.records.targets sources = self.records.sources source = sources.table(schema_name=self.schema_name, table_name=self.table_name, db_engine=self.engine) target = targets.directory_from_url(output_url=directory_url, records_format=records_format) out = self.records.move(source, target) self.assertTrue(out.move_count in [1, None])
def test_postgres_copy_options_csv_minimal_quoting(self): records_format = DelimitedRecordsFormat(variant='csv', hints={ 'quoting': 'minimal', 'compression': None, }) unhandled_hints = set(records_format.hints) fail_if_cant_handle_hint = True mode = CopyOptionsMode.UNLOADING hints = records_format.validate(fail_if_cant_handle_hint=True) out = postgres_copy_options_csv(unhandled_hints, hints, fail_if_cant_handle_hint, mode) self.assertEqual( out, { 'format': 'csv', 'quote': '"', 'delimiter': ',', 'encoding': 'UTF8', 'format': 'csv', 'header': True, })
def test_redshift_copy_options_datetimeformat(self): # Redshift's time_format doesn't support separate # configuration for datetimeformat vs datetimeformattz, but # the 'auto' flag seems to work with specific things (see # tests run in records_copy.py). # # Please verify new formats have a test run # and documented in records_copy.py before putting an entry in # here. for datetimeformat in DATETIME_CASES: hints = { 'datetimeformattz': f"{datetimeformat}OF", 'datetimeformat': datetimeformat, } records_format =\ DelimitedRecordsFormat(variant='bluelabs', hints=hints) unhandled_hints = set(records_format.hints.keys()) out = redshift_copy_options(unhandled_hints, records_format, fail_if_cant_handle_hint=True, fail_if_row_invalid=True, max_failure_rows=0) self.assertEqual(out['time_format'], 'auto') for datetimeformat in DATETIME_CASES: hints = { 'datetimeformattz': datetimeformat, 'datetimeformat': datetimeformat, } records_format =\ DelimitedRecordsFormat(variant='bluelabs', hints=hints) unhandled_hints = set(records_format.hints.keys()) out = redshift_copy_options(unhandled_hints, records_format, fail_if_cant_handle_hint=True, fail_if_row_invalid=True, max_failure_rows=0) self.assertEqual(out['time_format'], datetimeformat)
def test_timeonlyformat(self): schema_data = { 'schema': "bltypes/v1", 'fields': { "time_as_timestamp": { "type": "time", "index": 1, }, "time_as_time": { "type": "time", "index": 2, }, } } records_schema = RecordsSchema.from_data(schema_data) processing_instructions = ProcessingInstructions() for timeonlyformat in TIMEONLY_CASES: records_format = DelimitedRecordsFormat(variant='bluelabs', hints={ 'timeonlyformat': timeonlyformat, }) # us_eastern = pytz.timezone('US/Eastern') time_as_timestamp = pd.Timestamp(year=SAMPLE_YEAR, month=SAMPLE_MONTH, day=SAMPLE_DAY, hour=SAMPLE_HOUR, minute=SAMPLE_MINUTE, second=SAMPLE_SECOND) time_as_time = datetime.time(hour=SAMPLE_HOUR, minute=SAMPLE_MINUTE, second=SAMPLE_SECOND) data = { 'time_as_timestamp': [ time_as_timestamp ], 'time_as_time': [ time_as_time ], } df = pd.DataFrame(data, columns=['time_as_timestamp', 'time_as_time']) new_df = prep_df_for_csv_output(df=df, include_index=False, records_schema=records_schema, records_format=records_format, processing_instructions=processing_instructions) self.assertEqual(new_df['time_as_timestamp'][0], create_sample(timeonlyformat), timeonlyformat) self.assertEqual(new_df['time_as_time'][0], create_sample(timeonlyformat), timeonlyformat) # self.assertEqual(new_df['timetz'][0], '12:33:53-05') self.assertIsNotNone(new_df)
def test_mysql_load_options_valid_quoting_no_doublequote(self) -> None: records_format = DelimitedRecordsFormat(variant='bluelabs', hints={ 'quoting': 'all', 'doublequote': False, 'compression': None, }) unhandled_hints = set(records_format.hints.keys()) with self.assertRaises(NotImplementedError) as r: mysql_load_options(unhandled_hints, records_format, fail_if_cant_handle_hint=True) self.assertIn('doublequote=False', str(r.exception))