def test_determine_output_date_order_style_datetime(self):
     unhandled_hints = set()
     # Records Mover only supports Postgres in ISO format at this
     # point (YYYY-MM-DD) - see comments in types.py and in
     # date_output_style.py for more detail.
     expected_failures = {
         'MM-DD-YYYY',
         'DD-MM-YYYY',
         'MM/DD/YY',
         'DD/MM/YY',
         'DD-MM-YY',
     }
     for dateformat in DATE_CASES:
         records_format = DelimitedRecordsFormat(
             hints={
                 'dateformat': dateformat,
                 'timeonlyformat': 'HH24:MI:SS',
                 'datetimeformattz': f'{dateformat} HH:MI:SSOF',
                 'datetimeformat': f'{dateformat} HH24:MI:SS'
             })
         fail_if_cant_handle_hint = True
         validated_hints =\
             records_format.validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint)
         try:
             out = determine_date_output_style(unhandled_hints,
                                               validated_hints,
                                               fail_if_cant_handle_hint)
         except NotImplementedError:
             if dateformat in expected_failures:
                 pass
             else:
                 raise
         self.assertEqual(out, ('ISO', None))
示例#2
0
 def test_vertica(self):
     records_format = DelimitedRecordsFormat(variant='vertica',
                                             hints={'compression': None})
     records_format.hints['escape'] = '\\'
     unhandled_hints = set(records_format.hints)
     processing_instructions = ProcessingInstructions()
     load_plan = RecordsLoadPlan(processing_instructions, records_format)
     with self.assertRaises(NotImplementedError):
         postgres_copy_from_options(unhandled_hints, load_plan)
示例#3
0
 def test_bluelabs_with_doublequoting(self):
     records_format = DelimitedRecordsFormat(variant='bluelabs',
                                             hints={'compression': None})
     records_format.hints['doublequote'] = '"'
     unhandled_hints = set(records_format.hints)
     processing_instructions = ProcessingInstructions()
     load_plan = RecordsLoadPlan(processing_instructions, records_format)
     with self.assertRaises(NotImplementedError):
         postgres_copy_from_options(unhandled_hints, load_plan)
示例#4
0
 def test_new_compression_hint(self):
     records_format = DelimitedRecordsFormat(variant='bluelabs',
                                             hints={'compression': None})
     records_format.hints['encoding'] = 'NEWNEWENCODING'
     unhandled_hints = set(records_format.hints)
     processing_instructions = ProcessingInstructions()
     load_plan = RecordsLoadPlan(processing_instructions, records_format)
     with self.assertRaises(NotImplementedError):
         postgres_copy_from_options(unhandled_hints, load_plan)
 def test_determine_output_date_order_style_datetimeformat(self):
     unhandled_hints = set()
     # Records Mover only supports Postgres in ISO format at this
     # point (YYYY-MM-DD) - see comments in types.py and in
     # date_output_style.py for more detail.
     expected_failures = {
         # no timezone, even though otherwise in ISO format
         'YYYY-MM-DD HH:MI:SS',
         # not in ISO format
         'MM/DD/YY HH24:MI',
         # not in ISO format
         'YYYY-MM-DD HH12:MI AM',
     }
     natural_dateformat = {
         'YYYY-MM-DD HH:MI:SS': 'YYYY-MM-DD',
         'MM/DD/YY HH24:MI': 'MM/DD/YY',
         'YYYY-MM-DD HH24:MI:SS': 'YYYY-MM-DD',
         'YYYY-MM-DD HH12:MI AM': 'YYYY-MM-DD',
     }
     natural_timeonlyformat = {
         'YYYY-MM-DD HH:MI:SS': 'HH:MI:SS',
         'MM/DD/YY HH24:MI': 'HH24:MI',
         'YYYY-MM-DD HH24:MI:SS': 'HH24:MI:SS',
         'YYYY-MM-DD HH12:MI AM': 'HH12:MI AM',
     }
     natural_datetimeformattz = {
         'YYYY-MM-DD HH:MI:SS': 'YYYY-MM-DD HH:MI:SSOF',
         'MM/DD/YY HH24:MI': 'MM/DD/YY HH24:MIOF',
         'YYYY-MM-DD HH24:MI:SS': 'YYYY-MM-DD HH24:MI:SSOF',
         'YYYY-MM-DD HH12:MI AM': 'YYYY-MM-DD HH12:MI AM'
     }
     for datetimeformat in DATETIME_CASES:
         records_format = DelimitedRecordsFormat(
             hints={
                 'dateformat': natural_dateformat[datetimeformat],
                 'timeonlyformat': natural_timeonlyformat[datetimeformat],
                 'datetimeformattz':
                 natural_datetimeformattz[datetimeformat],
                 'datetimeformat': datetimeformat,
             })
         fail_if_cant_handle_hint = True
         validated_hints =\
             records_format.validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint)
         try:
             out = determine_date_output_style(unhandled_hints,
                                               validated_hints,
                                               fail_if_cant_handle_hint)
         except NotImplementedError:
             if datetimeformat in expected_failures:
                 pass
             else:
                 raise
         self.assertEqual(out, ('ISO', None))
    def test_postgres_copy_options_csv_no_quoting(self):
        records_format = DelimitedRecordsFormat(variant='csv',
                                                hints={
                                                    'quoting': None,
                                                    'compression': None,
                                                })
        unhandled_hints = set(records_format.hints)
        fail_if_cant_handle_hint = True

        with self.assertRaises(NotImplementedError):
            postgres_copy_options_csv(
                unhandled_hints,
                records_format.validate(fail_if_cant_handle_hint=True),
                fail_if_cant_handle_hint, CopyOptionsMode.UNLOADING)
    def test_determine_output_date_order_style_iso(self):
        unhandled_hints = set()
        records_format = DelimitedRecordsFormat(hints={
            'dateformat': 'YYYY-MM-DD',
            'timeonlyformat': 'HH24:MI:SS',
            'datetimeformattz': 'YYYY-MM-DD HH:MI:SSOF',
            'datetimeformat': 'YYYY-MM-DD HH24:MI:SS'
        })
        fail_if_cant_handle_hint = True
        validated_hints = records_format.validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint)

        out = determine_date_output_style(unhandled_hints,
                                          validated_hints,
                                          fail_if_cant_handle_hint)
        self.assertEqual(out, ('ISO', None))
示例#8
0
    def test_dateformat(self) -> None:
        class DateFormatExpectations(TypedDict):
            # Use the datetimeformat/datetimeformattz which is
            # compatible, as pandas doesn't let you configure those
            # separately
            dayfirst: bool

        testcases: Dict[HintDateFormat, DateFormatExpectations] = {
            'YYYY-MM-DD': {
                'dayfirst': False,
            },
            'MM-DD-YYYY': {
                'dayfirst': False,
            },
            'DD-MM-YYYY': {
                'dayfirst': True,
            },
            'MM/DD/YY': {
                'dayfirst': False,
            },
            'DD/MM/YY': {
                'dayfirst': True,
            },
            'DD-MM-YY': {
                'dayfirst': True,
            },
        }
        for dateformat in DATE_CASES:
            records_format = DelimitedRecordsFormat(hints={
                'dateformat': dateformat,
                'datetimeformat': f"{dateformat} HH:MI:SS",
                'datetimeformattz': f"{dateformat} HH:MI:SSOF",
                'compression': None,
            })
            records_schema = RecordsSchema.from_data({
                'schema': 'bltypes/v1',
                'fields': {
                    'first': {
                        'type': 'date'
                    }
                },
            })
            unhandled_hints = set(records_format.hints)
            processing_instructions = ProcessingInstructions()
            expectations = testcases[dateformat]
            try:
                options = pandas_read_csv_options(records_format,
                                                  records_schema,
                                                  unhandled_hints,
                                                  processing_instructions)
            except NotImplementedError:
                self.fail(f'Could not handle combination for {dateformat}')
            self.assertTrue(all(item in options.items() for item in expectations.items()))
            fileobj = io.StringIO(create_sample(dateformat))
            df = pandas.read_csv(filepath_or_buffer=fileobj,
                                 **options)
            timestamp = df['untitled_0'][0]
            self.assertEqual(timestamp.year, SAMPLE_YEAR)
            self.assertEqual(timestamp.month, SAMPLE_MONTH)
            self.assertEqual(timestamp.day, SAMPLE_DAY)
    def test_unload(self,
                    mock_text,
                    mock_UnloadFromSelect):
        mock_text.side_effect = fake_text
        self.mock_records_unload_plan.processing_instructions.fail_if_dont_understand = True
        self.mock_records_unload_plan.processing_instructions.fail_if_cant_handle_hint = True
        self.mock_records_unload_plan.records_format =\
            DelimitedRecordsFormat(variant='bluelabs',
                                   hints=bluelabs_format_hints)
        self.mock_directory.scheme = 's3'
        self.mock_db_engine.execute.return_value.scalar.return_value = 456
        rows = self.redshift_db_driver.unloader().\
            unload(schema='myschema',
                   table='mytable',
                   unload_plan=self.mock_records_unload_plan,
                   directory=self.mock_directory)

        expected_args = {
            'access_key_id': 'fake_aws_id',
            'add_quotes': False,
            'delimiter': ',',
            'escape': True,
            'gzip': True,
            'manifest': True,
            'secret_access_key': 'fake_aws_secret',
            'select': ('SELECT * FROM myschema.mytable',),
            'session_token': 'fake_aws_token',
            'unload_location': 's3://mybucket/myparent/mychild/'
        }
        mock_UnloadFromSelect.assert_called_with(**expected_args)
        self.assertEqual(456, rows)
    def test_unload_to_non_s3(self,
                              mock_text,
                              mock_UnloadFromSelect):
        mock_text.side_effect = fake_text
        self.mock_records_unload_plan.processing_instructions.fail_if_dont_understand = True
        self.mock_records_unload_plan.processing_instructions.fail_if_cant_handle_hint = True
        self.mock_records_unload_plan.records_format =\
            DelimitedRecordsFormat(variant='bluelabs',
                                   hints=bluelabs_format_hints)
        self.mock_directory.scheme = 'mumble'
        self.mock_db_engine.execute.return_value.scalar.return_value = 456
        rows = self.redshift_db_driver.unloader().\
            unload(schema='myschema',
                   table='mytable',
                   unload_plan=self.mock_records_unload_plan,
                   directory=self.mock_directory)

        mock_aws_creds = self.mock_s3_temp_base_loc.temporary_directory().__enter__().aws_creds()
        mock_access_key_id = mock_aws_creds.access_key
        mock_secret_key = mock_aws_creds.secret_key
        mock_token = mock_aws_creds.token
        expected_args = {
            'access_key_id': mock_access_key_id,
            'add_quotes': False,
            'delimiter': ',',
            'escape': True,
            'gzip': True,
            'manifest': True,
            'secret_access_key': mock_secret_key,
            'select': ('SELECT * FROM myschema.mytable',),
            'session_token': mock_token,
            'unload_location': self.mock_s3_temp_base_loc.temporary_directory().__enter__().url
        }
        mock_UnloadFromSelect.assert_called_with(**expected_args)
        self.assertEqual(456, rows)
示例#11
0
 def test_vertica_export_options_datetimeformattz(self):
     # Vertica doesn't currently allow any configurability on
     # output datetimeformattz.  Check again before adding any test
     # cases here!
     should_raise = {
         'YYYY-MM-DD HH:MI:SS': True,
         'YYYY-MM-DD HH24:MI:SSOF': False,
         'MM/DD/YY HH24:MI': True,
     }
     for datetimeformattz in DATETIMETZ_CASES:
         records_format = DelimitedRecordsFormat(
             variant='vertica',
             hints={'datetimeformattz': datetimeformattz})
         unhandled_hints = set(records_format.hints)
         processing_instructions = ProcessingInstructions(
             max_failure_rows=123)
         load_plan = RecordsLoadPlan(
             processing_instructions=processing_instructions,
             records_format=records_format)
         # Records Mover passes no particular option for dateformat on
         # export in Vertica; it always uses YYYY-MM-DD as a result.
         try:
             vertica_export_options(unhandled_hints, load_plan)
         except NotImplementedError:
             if should_raise[datetimeformattz]:
                 pass
             else:
                 self.fail()
示例#12
0
 def test_vertica_export_options_timeonlyformat(self):
     # Vertica doesn't currently allow any configurability on
     # output timeonlyformat.  Check again before adding any test
     # cases here!
     should_raise = {
         'HH:MI:SS': False,
         'HH24:MI:SS': False,
         'HH24:MI': True,
         'HH12:MI AM': True,
     }
     for timeonlyformat in TIMEONLY_CASES:
         records_format = DelimitedRecordsFormat(variant='vertica',
                                                 hints={
                                                     'timeonlyformat':
                                                     timeonlyformat,
                                                 })
         unhandled_hints = set(records_format.hints)
         processing_instructions = ProcessingInstructions(
             max_failure_rows=123)
         load_plan = RecordsLoadPlan(
             processing_instructions=processing_instructions,
             records_format=records_format)
         # Records Mover passes no particular option for dateformat on
         # export in Vertica; it always uses YYYY-MM-DD as a result.
         try:
             vertica_export_options(unhandled_hints, load_plan)
         except NotImplementedError:
             if should_raise[timeonlyformat]:
                 pass
             else:
                 raise
示例#13
0
    def test_mysql_load_options_dateformat(self) -> None:
        expected_failures: Set[str] = {
            'MM-DD-YYYY',
            'DD-MM-YYYY',
            'MM/DD/YY',
            'DD/MM/YY',
            'DD-MM-YY',
        }

        for dateformat in DATE_CASES:
            records_format = DelimitedRecordsFormat(variant='bluelabs',
                                                    hints={
                                                        'dateformat':
                                                        dateformat,
                                                        'compression': None,
                                                    })
            unhandled_hints = set(records_format.hints.keys())
            try:
                mysql_load_options(unhandled_hints,
                                   records_format,
                                   fail_if_cant_handle_hint=True)
            except NotImplementedError:
                if dateformat in expected_failures:
                    continue
                else:
                    raise
            self.assertNotIn(dateformat, expected_failures)
 def test_redshift_unload_options_datetimeformattz(self):
     # Redshift offers no options and only unloads YYYY-MM-DD
     # HH:MI:SSOF, so we should reject everything else.  Double
     # check with the docs just in case, though--maybe that's
     # changed!
     expected_failures = {
         'YYYY-MM-DD HH:MI:SS',
         'MM/DD/YY HH24:MI',
     }
     for datetimeformattz in DATETIMETZ_CASES:
         hints = {
             'datetimeformattz': datetimeformattz,
         }
         records_format =\
             DelimitedRecordsFormat(variant='bluelabs',
                                    hints=hints)
         unhandled_hints = set(records_format.hints.keys())
         try:
             redshift_unload_options(unhandled_hints,
                                     records_format,
                                     fail_if_cant_handle_hint=True)
         except NotImplementedError:
             if datetimeformattz in expected_failures:
                 continue
             else:
                 raise
         self.assertNotIn(datetimeformattz, expected_failures)
示例#15
0
 def test_vertica_import_options_datetimeformat(self):
     # Vertica doesn't currently allow any configurability on
     # input datetimeformat.  Check again before adding any test cases
     # here!
     should_raise = {
         'YYYY-MM-DD HH:MI:SS': True,
         'YYYY-MM-DD HH24:MI:SS': False,
         'MM/DD/YY HH24:MI': True,
         'YYYY-MM-DD HH12:MI AM': True,
     }
     for datetimeformat in DATETIME_CASES:
         records_format = DelimitedRecordsFormat(variant='vertica',
                                                 hints={
                                                     'datetimeformat':
                                                     datetimeformat,
                                                 })
         unhandled_hints = set(records_format.hints)
         processing_instructions = ProcessingInstructions(
             max_failure_rows=123)
         load_plan = RecordsLoadPlan(
             processing_instructions=processing_instructions,
             records_format=records_format)
         try:
             vertica_import_options(unhandled_hints, load_plan)
         except NotImplementedError:
             if should_raise[datetimeformat]:
                 pass
             else:
                 self.fail()
示例#16
0
 def test_redshift_copy_options_dateformat(self):
     # The records spec's date/time formats are based on Redshift's
     # spec originally, so it's expected that everything here would
     # be accepted as-is, but please double-check with Redshift's
     # docs as new test cases are added
     accept_as_is = {
         'YYYY-MM-DD': True,
         'MM-DD-YYYY': True,
         'DD-MM-YYYY': True,
         'MM/DD/YY': True,
         'DD/MM/YY': True,
         'DD-MM-YY': True,
     }
     for dateformat in DATE_CASES:
         records_format =\
             DelimitedRecordsFormat(variant='bluelabs',
                                    hints={
                                        'dateformat': dateformat
                                    })
         unhandled_hints = set(records_format.hints.keys())
         out = redshift_copy_options(unhandled_hints,
                                     records_format,
                                     fail_if_cant_handle_hint=True,
                                     fail_if_row_invalid=True,
                                     max_failure_rows=0)
         if accept_as_is[dateformat]:
             self.assertIs(out['date_format'], dateformat)
         else:
             self.fail('define what to expect here')
示例#17
0
 def test_redshift_copy_options_datetimeformattz(self):
     # Redshift's time_format doesn't support separate
     # configuration for datetimeformat vs datetimeformattz, but
     # the 'auto' flag seems to work with specific things (see
     # tests run in records_copy.py).
     #
     # Please verify new formats have a test run
     # and documented in records_copy.py before putting an entry in
     # here.
     expectations = {
         'YYYY-MM-DD HH:MI:SSOF': 'auto',
         'YYYY-MM-DD HH:MI:SS': 'YYYY-MM-DD HH:MI:SS',
         'YYYY-MM-DD HH24:MI:SSOF': 'auto',
         'MM/DD/YY HH24:MIOF': 'auto',
         'MM/DD/YY HH24:MI': 'MM/DD/YY HH24:MI',
     }
     for datetimeformattz in DATETIMETZ_CASES:
         hints = {
             'datetimeformattz': datetimeformattz,
             'datetimeformat': datetimeformattz.replace('OF', '')
         }
         records_format =\
             DelimitedRecordsFormat(variant='bluelabs',
                                    hints=hints)
         unhandled_hints = set(records_format.hints.keys())
         out = redshift_copy_options(unhandled_hints,
                                     records_format,
                                     fail_if_cant_handle_hint=True,
                                     fail_if_row_invalid=True,
                                     max_failure_rows=0)
         self.assertEquals(out['time_format'],
                           expectations[datetimeformattz])
 def test_determine_date_order_style_(self):
     unhandled_hints = set()
     tests = [
         (
             # No ambiguity, can handle all
             {
                 'datetimeformattz': 'YYYY-MM-DD HH:MI:SSOF',
                 'datetimeformat': "YYYY-MM-DD HH12:MI AM",
                 'timeonlyformat': "HH12:MI AM",
                 'dateformat': "YYYY-MM-DD",
             },
             None),
         (
             # No ambiguity, can handle all
             {
                 'datetimeformattz': 'INVALID',
                 'datetimeformat': "YYYY-MM-DD HH12:MI AM",
                 'timeonlyformat': "HH12:MI AM",
                 'dateformat': "YYYY-MM-DD",
             },
             NotImplementedError),
         (
             # Can't parse MDY and DMY at the same time
             {
                 'datetimeformattz': 'MM/DD/YY HH24:MI',
                 'datetimeformat': "MM/DD/YY HH24:MI",
                 'timeonlyformat': "HH12:MI AM",
                 'dateformat': "DD-MM-YYYY",
             },
             NotImplementedError),
         (
             # Can't parse MDY and DMY at the same time
             {
                 'datetimeformattz': 'MM/DD/YY HH24:MI',
                 'datetimeformat': "other",
                 'timeonlyformat': "HH12:MI AM",
                 'dateformat': "MM-DD-YYYY",
             },
             NotImplementedError),
     ]
     fail_if_cant_handle_hint = True
     for raw_hints, expected_result in tests:
         records_format = DelimitedRecordsFormat(hints=raw_hints)
         if expected_result == NotImplementedError:
             with self.assertRaises(NotImplementedError):
                 validated_hints = records_format.\
                     validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint)
                 determine_input_date_order_style(unhandled_hints,
                                                  validated_hints,
                                                  fail_if_cant_handle_hint)
         else:
             validated_hints = records_format.\
                 validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint)
             out = determine_input_date_order_style(
                 unhandled_hints, validated_hints, fail_if_cant_handle_hint)
             self.assertEqual(out, expected_result)
示例#19
0
    def test_prep_df_for_csv_output_include_index(self):
        schema_data = {
            'schema': "bltypes/v1",
            'fields': {
                "date": {
                    "type": "date",
                    "index": 1,
                },
                "time": {
                    "type": "time",
                    "index": 2,
                },
                "timetz": {
                    "type": "timetz",
                    "index": 3,
                },
            }
        }
        records_format = DelimitedRecordsFormat(variant='bluelabs')
        records_schema = RecordsSchema.from_data(schema_data)
        processing_instructions = ProcessingInstructions()
        # us_eastern = pytz.timezone('US/Eastern')
        data = {
            'time': [
                pd.Timestamp(year=1970, month=1, day=1,
                             hour=12, minute=33, second=53, microsecond=1234)
            ],
            # timetz is not well supported in records mover yet.  For
            # instance, specifying how it's turned into a CSV is not
            # currently part of the records spec:
            #
            #   https://github.com/bluelabsio/records-mover/issues/76
            #
            # In addition, Vertica suffers from a driver limitation:
            #
            #   https://github.com/bluelabsio/records-mover/issues/77
            #
            # 'timetz': [
            #     us_eastern.localize(pd.Timestamp(year=1970, month=1, day=1,
            #                                      hour=12, minute=33, second=53,
            #                                      microsecond=1234)),
            # ],
        }
        df = pd.DataFrame(data,
                          index=[pd.Timestamp(year=1970, month=1, day=1)],
                          columns=['time', 'timetz'])

        new_df = prep_df_for_csv_output(df=df,
                                        include_index=True,
                                        records_schema=records_schema,
                                        records_format=records_format,
                                        processing_instructions=processing_instructions)
        self.assertEqual(new_df.index[0], '1970-01-01')
        self.assertEqual(new_df['time'][0], '12:33:53')
        # self.assertEqual(new_df['timetz'][0], '12:33:53-05')
        self.assertIsNotNone(new_df)
 def test_pandas_read_csv_options_bzip(self):
     records_format = DelimitedRecordsFormat(hints={'compression': 'BZIP'})
     records_schema = RecordsSchema.from_data({'schema': 'bltypes/v1'})
     unhandled_hints = set(records_format.hints)
     processing_instructions = ProcessingInstructions()
     expectations = {'compression': 'bz2'}
     out = pandas_read_csv_options(records_format, records_schema,
                                   unhandled_hints, processing_instructions)
     self.assertTrue(
         all(item in out.items() for item in expectations.items()))
示例#21
0
 def test_mysql_load_options_encoding_utf8bom_fallback(self) -> None:
     records_format = DelimitedRecordsFormat(variant='bluelabs',
                                             hints={
                                                 'encoding': 'UTF8BOM',
                                                 'compression': None,
                                             })
     unhandled_hints = set(records_format.hints.keys())
     out = mysql_load_options(unhandled_hints,
                              records_format,
                              fail_if_cant_handle_hint=False)
     self.assertEqual(out.character_set, 'utf8')
示例#22
0
 def test_csv_quote_all(self):
     records_format = DelimitedRecordsFormat(variant='csv',
                                             hints={
                                                 'compression': None,
                                                 'quoting': 'all'
                                             })
     unhandled_hints = set(records_format.hints)
     processing_instructions = ProcessingInstructions()
     load_plan = RecordsLoadPlan(processing_instructions, records_format)
     with self.assertRaises(NotImplementedError):
         postgres_copy_from_options(unhandled_hints, load_plan)
示例#23
0
 def test_mysql_load_options_encoding_utf8bom_fail(self) -> None:
     records_format = DelimitedRecordsFormat(variant='bluelabs',
                                             hints={
                                                 'encoding': 'UTF8BOM',
                                                 'compression': None,
                                             })
     unhandled_hints = set(records_format.hints.keys())
     with self.assertRaises(NotImplementedError) as r:
         mysql_load_options(unhandled_hints,
                            records_format,
                            fail_if_cant_handle_hint=True)
     self.assertIn('UTF8BOM', str(r.exception))
示例#24
0
 def test_mysql_load_options_nonnumeric_quoting(self) -> None:
     records_format = DelimitedRecordsFormat(variant='bluelabs',
                                             hints={
                                                 'quoting': 'nonnumeric',
                                                 'doublequote': True,
                                                 'compression': None,
                                             })
     unhandled_hints = set(records_format.hints.keys())
     out = mysql_load_options(unhandled_hints,
                              records_format,
                              fail_if_cant_handle_hint=True)
     self.assertEqual(out.fields_optionally_enclosed_by, '"')
示例#25
0
    def test_redshift_copy_options_encodings(self):
        tests = {
            'UTF16': Encoding.utf16,
            'UTF16LE': Encoding.utf16le,
            'UTF16BE': Encoding.utf16be
        }
        for hint_spelling, redshift_sqlalchemy_spelling in tests.items():

            records_format =\
                DelimitedRecordsFormat(variant='bluelabs',
                                       hints={
                                           'encoding': hint_spelling
                                       })
            unhandled_hints = set(records_format.hints.keys())
            out = redshift_copy_options(
                unhandled_hints,
                records_format.validate(fail_if_cant_handle_hint=True),
                fail_if_cant_handle_hint=True,
                fail_if_row_invalid=True,
                max_failure_rows=0)
            self.assertIs(out['encoding'], redshift_sqlalchemy_spelling)
示例#26
0
    def unload(self, variant, directory, hints={}) -> None:
        records_format = DelimitedRecordsFormat(variant=variant, hints=hints)

        directory_url = pathlib.Path(directory).resolve().as_uri() + '/'
        targets = self.records.targets
        sources = self.records.sources
        source = sources.table(schema_name=self.schema_name,
                               table_name=self.table_name,
                               db_engine=self.engine)
        target = targets.directory_from_url(output_url=directory_url,
                                            records_format=records_format)
        out = self.records.move(source, target)
        self.assertTrue(out.move_count in [1, None])
    def test_postgres_copy_options_csv_minimal_quoting(self):
        records_format = DelimitedRecordsFormat(variant='csv',
                                                hints={
                                                    'quoting': 'minimal',
                                                    'compression': None,
                                                })
        unhandled_hints = set(records_format.hints)
        fail_if_cant_handle_hint = True
        mode = CopyOptionsMode.UNLOADING
        hints = records_format.validate(fail_if_cant_handle_hint=True)

        out = postgres_copy_options_csv(unhandled_hints, hints,
                                        fail_if_cant_handle_hint, mode)
        self.assertEqual(
            out, {
                'format': 'csv',
                'quote': '"',
                'delimiter': ',',
                'encoding': 'UTF8',
                'format': 'csv',
                'header': True,
            })
示例#28
0
 def test_redshift_copy_options_datetimeformat(self):
     # Redshift's time_format doesn't support separate
     # configuration for datetimeformat vs datetimeformattz, but
     # the 'auto' flag seems to work with specific things (see
     # tests run in records_copy.py).
     #
     # Please verify new formats have a test run
     # and documented in records_copy.py before putting an entry in
     # here.
     for datetimeformat in DATETIME_CASES:
         hints = {
             'datetimeformattz': f"{datetimeformat}OF",
             'datetimeformat': datetimeformat,
         }
         records_format =\
             DelimitedRecordsFormat(variant='bluelabs',
                                    hints=hints)
         unhandled_hints = set(records_format.hints.keys())
         out = redshift_copy_options(unhandled_hints,
                                     records_format,
                                     fail_if_cant_handle_hint=True,
                                     fail_if_row_invalid=True,
                                     max_failure_rows=0)
         self.assertEqual(out['time_format'], 'auto')
     for datetimeformat in DATETIME_CASES:
         hints = {
             'datetimeformattz': datetimeformat,
             'datetimeformat': datetimeformat,
         }
         records_format =\
             DelimitedRecordsFormat(variant='bluelabs',
                                    hints=hints)
         unhandled_hints = set(records_format.hints.keys())
         out = redshift_copy_options(unhandled_hints,
                                     records_format,
                                     fail_if_cant_handle_hint=True,
                                     fail_if_row_invalid=True,
                                     max_failure_rows=0)
         self.assertEqual(out['time_format'], datetimeformat)
示例#29
0
    def test_timeonlyformat(self):
        schema_data = {
            'schema': "bltypes/v1",
            'fields': {
                "time_as_timestamp": {
                    "type": "time",
                    "index": 1,
                },
                "time_as_time": {
                    "type": "time",
                    "index": 2,
                },
            }
        }
        records_schema = RecordsSchema.from_data(schema_data)
        processing_instructions = ProcessingInstructions()
        for timeonlyformat in TIMEONLY_CASES:
            records_format = DelimitedRecordsFormat(variant='bluelabs',
                                                    hints={
                                                        'timeonlyformat': timeonlyformat,
                                                    })
            # us_eastern = pytz.timezone('US/Eastern')
            time_as_timestamp = pd.Timestamp(year=SAMPLE_YEAR, month=SAMPLE_MONTH, day=SAMPLE_DAY,
                                             hour=SAMPLE_HOUR, minute=SAMPLE_MINUTE,
                                             second=SAMPLE_SECOND)
            time_as_time = datetime.time(hour=SAMPLE_HOUR,
                                         minute=SAMPLE_MINUTE,
                                         second=SAMPLE_SECOND)
            data = {
                'time_as_timestamp': [
                    time_as_timestamp
                ],
                'time_as_time': [
                    time_as_time
                ],
            }
            df = pd.DataFrame(data, columns=['time_as_timestamp', 'time_as_time'])

            new_df = prep_df_for_csv_output(df=df,
                                            include_index=False,
                                            records_schema=records_schema,
                                            records_format=records_format,
                                            processing_instructions=processing_instructions)
            self.assertEqual(new_df['time_as_timestamp'][0],
                             create_sample(timeonlyformat),
                             timeonlyformat)
            self.assertEqual(new_df['time_as_time'][0],
                             create_sample(timeonlyformat),
                             timeonlyformat)
            # self.assertEqual(new_df['timetz'][0], '12:33:53-05')
            self.assertIsNotNone(new_df)
示例#30
0
    def test_mysql_load_options_valid_quoting_no_doublequote(self) -> None:
        records_format = DelimitedRecordsFormat(variant='bluelabs',
                                                hints={
                                                    'quoting': 'all',
                                                    'doublequote': False,
                                                    'compression': None,
                                                })
        unhandled_hints = set(records_format.hints.keys())
        with self.assertRaises(NotImplementedError) as r:
            mysql_load_options(unhandled_hints,
                               records_format,
                               fail_if_cant_handle_hint=True)

        self.assertIn('doublequote=False', str(r.exception))