def test_timeout_backoff__get_file_handle(self, mocked_sftp, mocked_sleep): """ Test case to verify we backoff and retry for 'get_file_handle' function """ # mock 'open' and raise 'socket.timeout' error mocked_open = mock.Mock() mocked_open.side_effect = socket.timeout mocked_sftp.open.side_effect = mocked_open config = { "host": "10.0.0.1", "port": 22, "username": "******", "password": "", "start_date": "2020-01-01" } # create connection conn = client.connection(config=config) with self.assertRaises(socket.timeout): # function call conn.get_file_handle({"filepath": "/root/file.csv"}) # verify that the tap backoff for 5 times self.assertEquals(mocked_sftp.open.call_count, 5)
def test_timeout_backoff__get_files_by_prefix(self, mocked_sftp): """ Test case to verify we backoff and retry for 'get_files_by_prefix' function """ # mock 'listdir_attr' and raise 'socket.timeout' error mocked_listdir_attr = mock.Mock() mocked_listdir_attr.side_effect = socket.timeout mocked_sftp.listdir_attr.side_effect = mocked_listdir_attr config = { "host": "10.0.0.1", "port": 22, "username": "******", "password": "", "start_date": "2020-01-01" } # create connection conn = client.connection(config=config) before_time = datetime.now() with self.assertRaises(socket.timeout): # function call conn.get_files_by_prefix(".") after_time = datetime.now() # verify that the tap backoff for 60 seconds time_difference = (after_time - before_time).total_seconds() self.assertGreaterEqual(time_difference, 60)
def test_timeout_backoff__sync_file(self, mocked_get_row_iterators, mocked_get_file_handle, mocked_sleep): """ Test case to verify we backoff and retry for 'sync_file' function """ # mock 'get_row_iterators' and raise 'socket.timeout' error mocked_get_row_iterators.side_effect = socket.timeout # mock 'get_file_handle' mocked_get_file_handle.return_value = None config = { "host": "10.0.0.1", "port": 22, "username": "******", "password": "", "start_date": "2020-01-01" } table_spec = { "key_properties": "test_key_properties", "delimiter": "," } file = {"filepath": "/root/file.csv"} # create connection conn = client.connection(config=config) with self.assertRaises(socket.timeout): # function call sync.sync_file(conn=conn, f=file, stream="test_stream", table_spec=table_spec) # verify that the tap backoff for 5 times self.assertEquals(mocked_get_row_iterators.call_count, 5)
def discover_streams(config): streams = [] conn = client.connection(config) prefix = format(config.get("user_dir", "./")) tables = json.loads(config['tables']) for table_spec in tables: schema, stream_md = get_schema(conn, table_spec) streams.append({ 'stream': table_spec['table_name'], 'tap_stream_id': table_spec['table_name'], 'schema': schema, 'metadata': stream_md }) return streams
def sync_stream(config, state, stream): table_name = stream.tap_stream_id modified_since = utils.strptime_to_utc( singer.get_bookmark(state, table_name, 'modified_since') or config['start_date']) LOGGER.info('Syncing table "%s".', table_name) LOGGER.info('Getting files modified since %s.', modified_since) conn = client.connection(config) table_spec = [ c for c in json.loads(config["tables"]) if c["table_name"] == table_name ] if len(table_spec) == 0: LOGGER.info("No table configuration found for '%s', skipping stream", table_name) return 0 if len(table_spec) > 1: LOGGER.info( "Multiple table configurations found for '%s', skipping stream", table_name) return 0 table_spec = table_spec[0] files = conn.get_files(table_spec["search_prefix"], table_spec["search_pattern"], modified_since) LOGGER.info('Found %s files to be synced.', len(files)) records_streamed = 0 if not files: return records_streamed for f in files: records_streamed += sync_file(conn, f, stream, table_spec) state = singer.write_bookmark(state, table_name, 'modified_since', f['last_modified'].isoformat()) singer.write_state(state) LOGGER.info('Wrote %s records for table "%s".', records_streamed, table_name) return records_streamed
def test_timeout_value_not_passed_in_config(self): """ Test case to verify that the timeout value is 300 as we have not passed 'request_timeout' in config """ # create config config = { "host": "10.0.0.1", "port": 22, "username": "******", "password": "", "start_date": "2020-01-01" } # create connection conn = client.connection(config=config) # verify the expected timeout value is set self.assertEquals(conn.request_timeout, 300)
def discover_streams(config): streams = [] conn = client.connection(config) prefix = format(config.get("user_dir", "./")) tables = json.loads(config['tables']) for table_spec in tables: LOGGER.info('Sampling records to determine table JSON schema "%s".', table_spec['table_name']) schema = json_schema.get_schema_for_table(conn, table_spec) stream_md = metadata.get_standard_metadata(schema, key_properties=table_spec.get('key_properties'), replication_method='INCREMENTAL') streams.append( { 'stream': table_spec['table_name'], 'tap_stream_id': table_spec['table_name'], 'schema': schema, 'metadata': stream_md } ) return streams