Exemplos de DelimitedRecordsFormat em Python, exemplos de records_mover.records.records_format.DelimitedRecordsFormat em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: loader.py Projeto: cwegrzyn/records-mover

 def known_supported_records_formats_for_load(
         self) -> List[BaseRecordsFormat]:
     return [
         # MySQL supports a healthy amount of load types, but
         # doesn't support loading compressed files.
         DelimitedRecordsFormat(variant='bluelabs',
                                hints={'compression': None}),
         DelimitedRecordsFormat(variant='bigquery',
                                hints={'compression': None}),
         DelimitedRecordsFormat(variant='vertica',
                                hints={'compression': None}),
     ]

Exemplo n.º 2

0

Exibir arquivo

 def test_christmas_tree_format_1_permissive(self):
     vertica_format = DelimitedRecordsFormat(variant='dumb', hints=christmas_tree_format_1_hints)
     processing_instructions = ProcessingInstructions(fail_if_cant_handle_hint=False)
     load_plan = RecordsLoadPlan(processing_instructions=processing_instructions,
                                 records_format=vertica_format)
     unhandled_hints = set(load_plan.records_format.hints.keys())
     with patch.object(driver_logger, 'warning') as mock_warning:
         options = vertica_import_options(unhandled_hints, load_plan)
     expected_options = {
         'abort_on_error': True,
         'delimiter': '\x01',
         'enforcelength': True,
         'error_tolerance': False,
         'escape_as': '\\',
         'load_method': 'AUTO',
         'no_commit': False,
         'null_as': None,
         'record_terminator': '\x02',
         'rejectmax': 1,
         'skip': 1,
         'trailing_nullcols': False,
     }
     self.assertDictEqual(options, expected_options)
     self.assertListEqual(mock_warning.mock_calls,
                          [call("Ignoring hint compression = 'LZO'"),
                           call("Ignoring hint quoting = 'nonnumeric'")])
     self.assertEqual(unhandled_hints, set())

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_load_config_options_custom.py Projeto: cwegrzyn/records-mover

 def test_load_job_config_permissive(self):
     records_format = DelimitedRecordsFormat(variant='bigquery')
     processing_instructions = ProcessingInstructions(
         fail_if_dont_understand=True,
         fail_if_cant_handle_hint=True,
         fail_if_row_invalid=False)
     load_plan = RecordsLoadPlan(
         processing_instructions=processing_instructions,
         records_format=records_format)
     unhandled_hints = set(records_format.hints.keys())
     out = load_job_config(unhandled_hints, load_plan)
     expectations = {
         'allowJaggedRows': True,
         'allowQuotedNewlines': True,
         'autodetect': False,
         'createDisposition': 'CREATE_NEVER',
         'destinationTableProperties': {},
         'encoding': 'UTF-8',
         'fieldDelimiter': ',',
         'ignoreUnknownValues': False,
         'maxBadRecords': 999999,
         'quote': '"',
         'schemaUpdateOptions': None,
         'skipLeadingRows': '1',
         'sourceFormat': 'CSV',
         'writeDisposition': 'WRITE_APPEND'
     }
     self.assertEqual(out.to_api_repr()['load'], expectations)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_load_config_options_datetime.py Projeto: cwegrzyn/records-mover

 def test_timeonlyformat(self):
     # Double check this before adding anything else in here to see
     # if it has changed, but HH:MI:SS is the only format accepted
     # by BigQuery as of this writing
     should_raise = {
         'HH:MI:SS': False,
         'HH24:MI:SS': False,
         'HH12:MI AM': True,
     }
     processing_instructions = ProcessingInstructions(
         fail_if_dont_understand=True,
         fail_if_cant_handle_hint=True,
         fail_if_row_invalid=True)
     for timeonlyformat in TIMEONLY_CASES:
         records_format =\
             DelimitedRecordsFormat(variant='bigquery',
                                    hints={
                                        'timeonlyformat': timeonlyformat,
                                    })
         load_plan = RecordsLoadPlan(
             processing_instructions=processing_instructions,
             records_format=records_format)
         unhandled_hints = set(records_format.hints.keys())
         try:
             load_job_config(unhandled_hints, load_plan)
         except NotImplementedError:
             if should_raise[timeonlyformat]:
                 pass
             else:
                 raise

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_pandas_read_csv_options.py Projeto: cwegrzyn/records-mover

 def test_pandas_read_csv_options_vertica(self):
     self.maxDiff = None
     expected = {
         'dayfirst': False,
         'compression': None,
         'delimiter': '\x01',
         'doublequote': False,
         'engine': 'c',
         'error_bad_lines': True,
         'header': None,
         'lineterminator': '\x02',
         'prefix': 'untitled_',
         'quotechar': '"',
         'quoting': 3,
         'warn_bad_lines': True,
         'parse_dates': [0, 1, 2, 3],
     }
     processing_instructions = ProcessingInstructions()
     records_format = DelimitedRecordsFormat(hints=vertica_format_hints)
     unhandled_hints = set(records_format.hints)
     actual = pandas_read_csv_options(records_format,
                                      self.records_schema,
                                      unhandled_hints,
                                      processing_instructions)
     self.assertEqual(expected, actual)
     self.assertFalse(unhandled_hints)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_load_config_options_datetime.py Projeto: cwegrzyn/records-mover

 def test_datetimeformat(self):
     # Double check this before adding anything else in here to see
     # if it has changed, but YYYY-MM-DD HH:MI:SS, YYYY-MM-DD
     # HH24:MI:SS and YYYY-MM-DD HH:MI:SS are the only formats
     # accepted by BigQuery as of this writing
     should_raise = {
         'YYYY-MM-DD HH12:MI AM': True,
         'MM/DD/YY HH24:MI': True,
     }
     processing_instructions = ProcessingInstructions(
         fail_if_dont_understand=True,
         fail_if_cant_handle_hint=True,
         fail_if_row_invalid=True)
     for datetimeformat in DATETIME_CASES:
         records_format =\
             DelimitedRecordsFormat(variant='bigquery',
                                    hints={
                                        'datetimeformat': datetimeformat
                                    })
         load_plan = RecordsLoadPlan(
             processing_instructions=processing_instructions,
             records_format=records_format)
         unhandled_hints = set(records_format.hints.keys())
         try:
             load_job_config(unhandled_hints, load_plan)
         except NotImplementedError:
             if should_raise[datetimeformat]:
                 pass
             else:
                 raise

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_pandas_to_csv_options_christmas_tree.py Projeto: alisoncollins-bluelabs/records-mover

 def test_pandas_to_csv_options_christmas_tree_format_3(self):
     expected = {
         'compression': 'bz2',
         'date_format': '%d-%m-%Y %H:%M:%S.%f%z',
         'doublequote': True,
         'encoding': 'UTF8',
         'escapechar': '\\',
         'header': False,
         'line_terminator': '\x02',
         'quotechar': '"',
         'quoting': 0,
         'sep': '\x01',
     }
     processing_instructions =\
         ProcessingInstructions(fail_if_cant_handle_hint=False)
     records_format = DelimitedRecordsFormat(
         hints=christmas_tree_format_3_hints)
     unhandled_hints = set(records_format.hints)
     with patch.object(driver_logger, 'warning') as mock_warning:
         actual = pandas_to_csv_options(records_format, unhandled_hints,
                                        processing_instructions)
         self.assertEqual(expected, actual)
         self.assertListEqual(mock_warning.mock_calls, [
             call("Ignoring hint quoting = "
                  "'some_future_option_not_supported_now'"),
             call("Ignoring hint escape = '@'"),
             call("Ignoring hint datetimeformattz = 'HH:MI:SSOF YYYY-MM-DD'"
                  ),
             call("Ignoring hint datetimeformattz = "
                  "'YYYY-MM-DD HH24:MI:SSOF'"),
             call("Ignoring hint datetimeformat = 'YYYY-MM-DD HH24:MI:SS'")
         ])
         self.assertFalse(unhandled_hints)

Exemplo n.º 8

0

Exibir arquivo

 def test_vertica_format_permissive(self):
     vertica_format = DelimitedRecordsFormat(variant='vertica')
     processing_instructions = ProcessingInstructions(fail_if_row_invalid=False)
     load_plan = RecordsLoadPlan(processing_instructions=processing_instructions,
                                 records_format=vertica_format)
     unhandled_hints = set(load_plan.records_format.hints.keys())
     options = vertica_import_options(unhandled_hints, load_plan)
     expected_options = {
         'abort_on_error': False,
         'delimiter': '\x01',
         'enclosed_by': None,
         'enforcelength': False,
         'error_tolerance': True,
         'escape_as': None,
         'gzip': False,
         'load_method': 'AUTO',
         'no_commit': False,
         'null_as': None,
         'record_terminator': '\x02',
         'rejectmax': None,
         'skip': 0,
         'trailing_nullcols': True,
     }
     self.assertDictEqual(options, expected_options)
     self.assertEqual(unhandled_hints, set())

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_pandas_read_csv_options.py Projeto: cwegrzyn/records-mover

 def test_pandas_read_csv_options_bluelabs(self):
     expected = {
         'dayfirst': False,
         'compression': 'gzip',
         'delimiter': ',',
         'doublequote': False,
         'encoding': 'UTF8',
         'engine': 'python',
         'error_bad_lines': True,
         'escapechar': '\\',
         'header': None,
         'prefix': 'untitled_',
         'quotechar': '"',
         'quoting': 3,
         'warn_bad_lines': True,
         'parse_dates': [0, 1, 2, 3],
     }
     processing_instructions = ProcessingInstructions()
     records_format = DelimitedRecordsFormat(hints=bluelabs_format_hints)
     unhandled_hints = set(records_format.hints)
     actual = pandas_read_csv_options(records_format,
                                      self.records_schema,
                                      unhandled_hints,
                                      processing_instructions)
     self.assertEqual(expected, actual)
     self.assertFalse(unhandled_hints)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_bigquery_unloader.py Projeto: cwegrzyn/records-mover

 def test_can_unload_format_delimited_false(self):
     mock_db = Mock(name='mock_db')
     mock_url_resolver = MagicMock(name='mock_url_resolver')
     mock_gcs_temp_base_loc = MagicMock(name='gcs_temp_base_loc')
     big_query_unloader = BigQueryUnloader(db=mock_db, url_resolver=mock_url_resolver,
                                           gcs_temp_base_loc=mock_gcs_temp_base_loc)
     delimited_format = DelimitedRecordsFormat()
     self.assertFalse(big_query_unloader.can_unload_format(delimited_format))

Exemplo n.º 11

0

Exibir arquivo

 def test_known_supported_records_formats_for_load(self):
     out = self.redshift_loader.known_supported_records_formats_for_load()
     self.assertEqual(out, [
         DelimitedRecordsFormat(variant='csv',
                                hints={
                                    'dateformat': 'YYYY-MM-DD',
                                    'timeonlyformat': 'HH24:MI:SS',
                                    'datetimeformat': 'YYYY-MM-DD HH:MI:SS',
                                    'datetimeformattz':
                                    'YYYY-MM-DD HH:MI:SSOF',
                                }),
         DelimitedRecordsFormat(variant='bigquery'),
         DelimitedRecordsFormat(variant='csv'),
         DelimitedRecordsFormat(variant='bluelabs',
                                hints={'quoting': 'all'}),
         DelimitedRecordsFormat(variant='bluelabs'),
     ])

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_unloader.py Projeto: cwegrzyn/records-mover

 def test_known_supported_records_formats_for_unload(self):
     mock_db = Mock(name='db')
     mock_source_records_format = Mock(name='source_records_format',
                                       spec=DelimitedRecordsFormat)
     mock_s3_temp_base_loc = Mock(name='s3_temp_base_loc')
     vertica_unloader = VerticaUnloader(
         db=mock_db, s3_temp_base_loc=mock_s3_temp_base_loc)
     mock_source_records_format.hints = {}
     out = vertica_unloader.known_supported_records_formats_for_unload()
     self.assertEqual(out, [DelimitedRecordsFormat(variant='vertica')])

Exemplo n.º 13

0

Exibir arquivo

 def test_weird_timeonlyformat(self):
     vertica_format = DelimitedRecordsFormat(variant='dumb', hints={
         'timeonlyformat': 'something else'
     })
     processing_instructions = ProcessingInstructions()
     load_plan = RecordsLoadPlan(processing_instructions=processing_instructions,
                                 records_format=vertica_format)
     unhandled_hints = set(load_plan.records_format.hints.keys())
     with self.assertRaisesRegexp(NotImplementedError,
                                  "Implement hint timeonlyformat='something else' or try again "
                                  "with fail_if_cant_handle_hint=False"):
         vertica_import_options(unhandled_hints, load_plan)

Exemplo n.º 14

0

Exibir arquivo

 def test_load_job_config_vertica(self):
     records_format = DelimitedRecordsFormat(variant='vertica')
     processing_instructions = ProcessingInstructions(fail_if_dont_understand=True,
                                                      fail_if_cant_handle_hint=True,
                                                      fail_if_row_invalid=True)
     load_plan = RecordsLoadPlan(processing_instructions=processing_instructions,
                                 records_format=records_format)
     unhandled_hints = set(records_format.hints.keys())
     with self.assertRaisesRegex(NotImplementedError,
                                 r"Implement hint record-terminator='\\x02' "
                                 "or try again with fail_if_cant_handle_hint=False"):
         load_job_config(unhandled_hints, load_plan)

Exemplo n.º 15

0

Exibir arquivo

 def test_quote_all_with_doublequote(self):
     vertica_format = DelimitedRecordsFormat(variant='csv', hints={
         'quoting': 'all'
     })
     processing_instructions = ProcessingInstructions()
     load_plan = RecordsLoadPlan(processing_instructions=processing_instructions,
                                 records_format=vertica_format)
     unhandled_hints = set(load_plan.records_format.hints.keys())
     with self.assertRaisesRegexp(NotImplementedError,
                                  r"Implement hint doublequote=True or try again with "
                                  "fail_if_cant_handle_hint=False"):
         vertica_import_options(unhandled_hints, load_plan)

Exemplo n.º 16

0

Exibir arquivo

 def test_json(self):
     records_format = DelimitedRecordsFormat()
     self.assertEqual(
         {
             'hints': {
                 'compression': 'GZIP',
                 'dateformat': 'YYYY-MM-DD',
                 'datetimeformat': 'YYYY-MM-DD HH24:MI:SS',
                 'datetimeformattz': 'YYYY-MM-DD HH:MI:SSOF',
                 'doublequote': False,
                 'encoding': 'UTF8',
                 'escape': '\\',
                 'field-delimiter': ',',
                 'header-row': False,
                 'quotechar': '"',
                 'quoting': None,
                 'record-terminator': '\n',
                 'timeonlyformat': 'HH24:MI:SS'
             },
             'type': 'delimited',
             'variant': 'bluelabs'
         }, json.loads(records_format.json()))

Exemplo n.º 17

0

Exibir arquivo

 def load(self, hints, fail_if):
     processing_instructions = ProcessingInstructions()
     processing_instructions.fail_if_cant_handle_hint = fail_if
     processing_instructions.fail_if_dont_understand = fail_if
     processing_instructions.fail_if_row_invalid = fail_if
     self.mock_records_load_plan.records_format = DelimitedRecordsFormat(
         hints=hints)
     self.mock_records_load_plan.processing_instructions = processing_instructions
     return self.redshift_db_driver.loader().\
         load(schema='myschema',
              table='mytable',
              load_plan=self.mock_records_load_plan,
              directory=self.mock_directory)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_load_config_options_custom.py Projeto: cwegrzyn/records-mover

 def test_load_job_config_unknown_quoting(self):
     records_format = DelimitedRecordsFormat(variant='bigquery',
                                             hints={'quoting': 'blah'})
     processing_instructions = ProcessingInstructions(
         fail_if_dont_understand=True,
         fail_if_cant_handle_hint=True,
         fail_if_row_invalid=True)
     load_plan = RecordsLoadPlan(
         processing_instructions=processing_instructions,
         records_format=records_format)
     unhandled_hints = set(records_format.hints.keys())
     with self.assertRaises(NotImplementedError):
         load_job_config(unhandled_hints, load_plan)

Exemplo n.º 19

0

Exibir arquivo

    def test_move_from_dataframe_compressed_with_header_row(self,
                                                            mock_complain_on_unhandled_hints,
                                                            mock_io,
                                                            mock_prep_df_for_csv_output):
        mock_fileobj = Mock(name='fileobj')
        mock_records_format = DelimitedRecordsFormat(hints={
            'encoding': 'UTF8',
            'compression': 'GZIP',
            'header-row': True,
            'quoting': 'all'
        })
        fileobj_target = FileobjTarget(fileobj=mock_fileobj,
                                       records_format=mock_records_format)

        mock_df_1 = Mock(name='df_1')
        mock_df_1.index = ['a']
        mock_df_2 = Mock(name='df_2')
        mock_df_2.index = ['a']
        mock_processing_instructions = Mock(name='processing_instructions')
        mock_dfs_source = Mock(name='dfs_source')
        mock_dfs_source.dfs = [mock_df_1, mock_df_2]
        mock_prep_df_for_csv_output.side_effect = [mock_df_1, mock_df_2]
        out = fileobj_target.move_from_dataframes_source(mock_dfs_source,
                                                         mock_processing_instructions)
        mock_df_1.to_csv.assert_called_with(path_or_buf=ANY,
                                            index=mock_dfs_source.include_index,
                                            mode="a",
                                            compression='gzip',
                                            date_format='%Y-%m-%d %H:%M:%S.%f%z',
                                            doublequote=False,
                                            encoding='UTF8',
                                            escapechar='\\',
                                            header=True,
                                            line_terminator='\n',
                                            quotechar='"',
                                            quoting=1,
                                            sep=',')
        mock_df_2.to_csv.assert_called_with(path_or_buf=ANY,
                                            index=mock_dfs_source.include_index,
                                            mode="a",
                                            compression='gzip',
                                            date_format='%Y-%m-%d %H:%M:%S.%f%z',
                                            doublequote=False,
                                            encoding='UTF8',
                                            escapechar='\\',
                                            header=False,
                                            line_terminator='\n',
                                            quotechar='"',
                                            quoting=1,
                                            sep=',')
        self.assertEqual(out, MoveResult(move_count=2, output_urls=None))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: test_pandas_read_csv_options.py Projeto: cwegrzyn/records-mover

 def test_pandas_read_csv_options_inconsistent_date_format(self):
     processing_instructions = ProcessingInstructions()
     hints = bluelabs_format_hints.copy()
     hints.update({
         'dateformat': 'DD-MM-YYYY',
         'datetimeformattz': 'MM-DD-YYYY HH24:MIOF',
         'datetimeformat': 'DD-MM-YYYY HH24:MI',
     })
     records_format = DelimitedRecordsFormat(hints=hints)
     unhandled_hints = set(records_format.hints)
     with self.assertRaises(NotImplementedError):
         pandas_read_csv_options(records_format,
                                 self.records_schema,
                                 unhandled_hints,
                                 processing_instructions)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: test_unloader.py Projeto: alisoncollins-bluelabs/records-mover

 def test_known_supported_records_formats_for_unload(self):
     mock_db = Mock(name='db')
     mock_source_records_format = Mock(name='source_records_format',
                                       spec=DelimitedRecordsFormat)
     mock_s3_temp_base_loc = Mock(name='s3_temp_base_loc')
     vertica_unloader = VerticaUnloader(
         db=mock_db, s3_temp_base_loc=mock_s3_temp_base_loc)
     mock_resultset = Mock(name='resultset')
     mock_db.execute.return_value = mock_resultset
     mock_resultset.fetchall.return_value = ['awslib']
     mock_source_records_format.hints = {}
     out = vertica_unloader.known_supported_records_formats_for_unload()
     mock_db.execute.\
         assert_called_with("SELECT lib_name from user_libraries where lib_name = 'awslib'")
     self.assertEqual(out, [DelimitedRecordsFormat(variant='vertica')])

Exemplo n.º 22

0

Exibir arquivo

Arquivo: test_load_config_options_custom.py Projeto: cwegrzyn/records-mover

 def test_load_job_config_no_bzip_support(self):
     records_format = DelimitedRecordsFormat(variant='bigquery',
                                             hints={'compression': 'BZIP'})
     processing_instructions = ProcessingInstructions(
         fail_if_dont_understand=True,
         fail_if_cant_handle_hint=True,
         fail_if_row_invalid=True)
     load_plan = RecordsLoadPlan(
         processing_instructions=processing_instructions,
         records_format=records_format)
     unhandled_hints = set(records_format.hints.keys())
     with self.assertRaisesRegex(
             NotImplementedError, r"Implement hint compression='BZIP' "
             "or try again with fail_if_cant_handle_hint=False"):
         load_job_config(unhandled_hints, load_plan)

Exemplo n.º 23

0

Exibir arquivo

 def test_quote_all_without_doublequote(self):
     vertica_format = DelimitedRecordsFormat(variant='csv', hints={
         'quoting': 'all',
         'doublequote': False,
         # Vertica doesn't support exporting CSV variant style dates by
         # default, so let's pick some it can for purposes of this
         # test:
         'dateformat': 'YYYY-MM-DD',
         'datetimeformat': 'YYYY-MM-DD HH:MI:SS',
         'datetimeformattz': 'YYYY-MM-DD HH:MI:SSOF',
     })
     processing_instructions = ProcessingInstructions()
     load_plan = RecordsLoadPlan(processing_instructions=processing_instructions,
                                 records_format=vertica_format)
     unhandled_hints = set(load_plan.records_format.hints.keys())
     out = vertica_import_options(unhandled_hints, load_plan)
     self.assertEqual(out['enclosed_by'], '"')

Exemplo n.º 24

0

Exibir arquivo

Arquivo: test_load_config_options_custom.py Projeto: cwegrzyn/records-mover

 def test_load_job_config_unsupported_datetimeformattz(self):
     records_format = DelimitedRecordsFormat(
         variant='bigquery',
         hints={'datetimeformattz': 'MM/DD/YY HH:MI:SSOF'})
     processing_instructions = ProcessingInstructions(
         fail_if_dont_understand=True,
         fail_if_cant_handle_hint=True,
         fail_if_row_invalid=True)
     load_plan = RecordsLoadPlan(
         processing_instructions=processing_instructions,
         records_format=records_format)
     unhandled_hints = set(records_format.hints.keys())
     with self.assertRaisesRegex(
             NotImplementedError,
             r"Implement hint datetimeformattz='MM/DD/YY HH:MI:SSOF' "
             "or try again with fail_if_cant_handle_hint=False"):
         load_job_config(unhandled_hints, load_plan)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: test_pandas_to_csv_options_vertica.py Projeto: cwegrzyn/records-mover

 def test_pandas_to_csv_options_vertica(self):
     expected = {
         'date_format': '%Y-%m-%d %H:%M:%S.%f%z',
         'doublequote': False,
         'encoding': 'UTF8',
         'header': False,
         'line_terminator': '\x02',
         'quotechar': '"',
         'quoting': 3,
         'sep': '\x01',
     }
     processing_instructions = ProcessingInstructions()
     records_format = DelimitedRecordsFormat(hints=vertica_format_hints)
     unhandled_hints = set(records_format.hints)
     actual = pandas_to_csv_options(records_format, unhandled_hints,
                                    processing_instructions)
     self.assertEqual(expected, actual)
     self.assertFalse(unhandled_hints)

Exemplo n.º 26

0

Exibir arquivo

Arquivo: test_pandas_to_csv_options_csv.py Projeto: cwegrzyn/records-mover

 def test_pandas_to_csv_options_csv(self):
     expected = {
         'compression': 'gzip',
         'date_format': '%m/%d/%y %H:%M',
         'doublequote': True,
         'encoding': 'UTF8',
         'header': True,
         'line_terminator': '\n',
         'quotechar': '"',
         'quoting': 0,
         'sep': ','
     }
     processing_instructions =\
         ProcessingInstructions(fail_if_cant_handle_hint=True)
     records_format = DelimitedRecordsFormat(hints=csv_format_hints)
     unhandled_hints = set(records_format.hints)
     actual = pandas_to_csv_options(records_format, unhandled_hints,
                                    processing_instructions)
     self.assertEqual(expected, actual)
     self.assertFalse(unhandled_hints)

Exemplo n.º 27

0

Exibir arquivo

 def test_dumb(self):
     records_format = DelimitedRecordsFormat(variant='dumb')
     # Should match up with
     # https://github.com/bluelabsio/records-mover/blob/master/docs/RECORDS_SPEC.md#dumb-variant
     expected_hints = {
         'compression': 'GZIP',
         'dateformat': 'YYYY-MM-DD',
         'datetimeformat': 'YYYY-MM-DD HH:MI:SS',
         'datetimeformattz': 'YYYY-MM-DD HH:MI:SSOF',
         'doublequote': False,
         'encoding': 'UTF8',
         'escape': None,
         'field-delimiter': ',',
         'quotechar': '"',
         'quoting': None,
         'record-terminator': '\n',
         'timeonlyformat': 'HH24:MI:SS',
         'header-row': False,
     }
     self.assertEqual(expected_hints, records_format.hints)

Exemplo n.º 28

0

Exibir arquivo

 def test_with_altered_hints(self):
     records_format = DelimitedRecordsFormat(variant='csv').alter_hints(
         {'quotechar': 'A'})
     # Should match up with
     # https://github.com/bluelabsio/records-mover/blob/master/docs/RECORDS_SPEC.md#csv-variant
     expected_hints = {
         'compression': 'GZIP',
         'dateformat': 'MM/DD/YY',
         'datetimeformat': 'MM/DD/YY HH24:MI',
         'datetimeformattz': 'MM/DD/YY HH24:MI',
         'doublequote': True,
         'encoding': 'UTF8',
         'escape': None,
         'field-delimiter': ',',
         'quotechar': 'A',
         'quoting': 'minimal',
         'record-terminator': '\n',
         'timeonlyformat': 'HH24:MI:SS',
         'header-row': True,
     }
     self.assertEqual(expected_hints, records_format.hints)
     self.assertEqual({'quotechar': 'A'}, records_format.custom_hints)

Exemplo n.º 29

0

Exibir arquivo

def postgres_copy_to_options(unhandled_hints: Set[str],
                             delimited_records_format: DelimitedRecordsFormat,
                             fail_if_cant_handle_hint: bool) ->\
                                         Tuple[DateOutputStyle,
                                               Optional[DateOrderStyle],
                                               PostgresCopyOptions]:
    hints = delimited_records_format.validate(
        fail_if_cant_handle_hint=fail_if_cant_handle_hint)

    if needs_csv_format(hints):
        copy_options = postgres_copy_options_csv(unhandled_hints, hints,
                                                 fail_if_cant_handle_hint,
                                                 CopyOptionsMode.UNLOADING)
    else:
        copy_options = postgres_copy_options_text(unhandled_hints, hints,
                                                  fail_if_cant_handle_hint,
                                                  CopyOptionsMode.UNLOADING)

    date_output_style, date_order_style =\
        determine_date_output_style(unhandled_hints,
                                    hints,
                                    fail_if_cant_handle_hint)

    return (date_output_style, date_order_style, copy_options)

Exemplo n.º 30

0

Exibir arquivo

Arquivo: test_pandas_to_csv_options_christmas_tree.py Projeto: alisoncollins-bluelabs/records-mover

 def test_pandas_to_csv_options_christmas_tree_format_1(self):
     expected = {
         'date_format': '%Y-%m-%d %H:%M:%S.%f%z',
         'doublequote': False,
         'encoding': 'UTF8',
         'escapechar': '\\',
         'header': True,
         'line_terminator': '\x02',
         'quotechar': '"',
         'quoting': 2,
         'sep': '\x01'
     }
     processing_instructions =\
         ProcessingInstructions(fail_if_cant_handle_hint=False)
     records_format = DelimitedRecordsFormat(
         hints=christmas_tree_format_1_hints)
     unhandled_hints = set(records_format.hints)
     with patch.object(driver_logger, 'warning') as mock_warning:
         actual = pandas_to_csv_options(records_format, unhandled_hints,
                                        processing_instructions)
         self.assertEqual(expected, actual)
         self.assertListEqual(mock_warning.mock_calls,
                              [call("Ignoring hint compression = 'LZO'")])
         self.assertFalse(unhandled_hints)