def test_run(self):

        self.setUpTestEnvironment(COMPRESSION_FOLDER_PATH)

        runner.run_check_job_and_check_status(self)

        found_catalogs = menagerie.get_catalogs(self.conn_id)
        self.assertEqual(
            len(found_catalogs),
            1,
            msg="unable to locate schemas for connection {}".format(
                self.conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))
        subset = self.expected_check_streams().issubset(found_catalog_names)
        self.assertTrue(
            subset,
            msg="Expected check streams are not subset of discovered catalog")

        # Clear state before our run
        menagerie.set_state(self.conn_id, {})

        self.select_specific_catalog(found_catalogs,
                                     "gz_file_having_empty_csv")

        runner.run_sync_job_and_check_status(self)

        expected_records = 0
        # Verify actual rows were synced
        records = runner.get_upserts_from_target_output()

        self.assertEqual(expected_records, len(records))
示例#2
0
    def test_run(self):
        runner.run_check_job_and_check_status(self)

        found_catalogs = menagerie.get_catalogs(self.conn_id)
        self.check_all_streams_in_catalogs(found_catalogs)
        self.select_found_catalogs(found_catalogs)

        # clear state and run the actual sync
        menagerie.set_state(self.conn_id, {})
        runner.run_sync_job_and_check_status(self)
        self.check_output_record_counts()

        max_bookmarks_from_records = runner.get_max_bookmarks_from_target(self)
        state = menagerie.get_state(self.conn_id)
        bookmarks = state.get("bookmarks", {})
        self.check_bookmarks(bookmarks, max_bookmarks_from_records)
        self.check_offsets(bookmarks)
        self.look_for_unexpected_bookmarks(bookmarks)
        self.assertIsNone(state.get("currently_syncing"))
    def test_run(self):

        self.setUpTestEnvironment()

        runner.run_check_job_and_check_status(self)

        found_catalogs = menagerie.get_catalogs(self.conn_id)
        self.assertEqual(
            len(found_catalogs),
            len(self.expected_check_streams()),
            msg="unable to locate schemas for connection {}".format(
                self.conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))
        subset = self.expected_check_streams().issubset(found_catalog_names)
        self.assertTrue(
            subset,
            msg="Expected check streams are not subset of discovered catalog")

        # Clear state before our run
        menagerie.set_state(self.conn_id, {})

        self.select_found_catalogs(found_catalogs)

        runner.run_sync_job_and_check_status(self)

        no_csv_records = 998
        no_jsonl_records = 10
        no_gz_has_csv_records = 998
        no_gz_has_jsonl_records = 2
        no_zip_records = 40

        expected_records = no_csv_records + no_jsonl_records + no_gz_has_csv_records + no_gz_has_jsonl_records + no_zip_records

        with open(
                utils.get_resources_path(
                    "output_csv_records.json",
                    ALL_SUPPORTED_FOLDER_PATH)) as json_file:
            expected_csv_records = simplejson.load(json_file,
                                                   use_decimal=True).get(
                                                       "records", [])
        with open(
                utils.get_resources_path(
                    "output_jsonl_records.json",
                    ALL_SUPPORTED_FOLDER_PATH)) as json_file:
            expected_jsonl_records = simplejson.load(json_file,
                                                     use_decimal=True).get(
                                                         "records", [])
        with open(
                utils.get_resources_path(
                    "output_gz_csv_records.json",
                    ALL_SUPPORTED_FOLDER_PATH)) as json_file:
            expected_gz_has_csv_records = simplejson.load(
                json_file, use_decimal=True).get("records", [])
        with open(
                utils.get_resources_path(
                    "output_gz_jsonl_records.json",
                    ALL_SUPPORTED_FOLDER_PATH)) as json_file:
            expected_gz_has_jsonl_records = simplejson.load(
                json_file, use_decimal=True).get("records", [])
        with open(
                utils.get_resources_path(
                    "output_zip_records.json",
                    ALL_SUPPORTED_FOLDER_PATH)) as json_file:
            expected_zip_records = simplejson.load(json_file,
                                                   use_decimal=True).get(
                                                       "records", [])

        synced_records = runner.get_records_from_target_output()

        csv_upsert_messages = [
            m for m in synced_records.get('all_support_csv').get('messages')
            if m['action'] == 'upsert'
        ]
        jsonl_upsert_messages = [
            m for m in synced_records.get('all_support_jsonl').get('messages')
            if m['action'] == 'upsert'
        ]
        gz_with_csv_upsert_messages = [
            m for m in synced_records.get('all_support_gz_has_csv').get(
                'messages') if m['action'] == 'upsert'
        ]
        gz_with_jsonl_upsert_messages = [
            m for m in synced_records.get('all_support_gz_has_jsonl').get(
                'messages') if m['action'] == 'upsert'
        ]
        zip_upsert_messages = [
            m for m in synced_records.get('all_support_zip').get('messages')
            if m['action'] == 'upsert'
        ]

        csv_records = [message.get('data') for message in csv_upsert_messages]
        jsonl_records = [
            message.get('data') for message in jsonl_upsert_messages
        ]
        gz_has_csv_records = [
            message.get('data') for message in gz_with_csv_upsert_messages
        ]
        gz_has_jsonl_records = [
            message.get('data') for message in gz_with_jsonl_upsert_messages
        ]
        zip_records = [message.get('data') for message in zip_upsert_messages]

        no_records = len(csv_records) + len(jsonl_records) + len(
            gz_has_csv_records) + len(gz_has_jsonl_records) + len(zip_records)
        self.assertEqual(expected_records, no_records)

        self.assertEquals(expected_csv_records, csv_records)
        self.assertEquals(expected_jsonl_records, jsonl_records)
        self.assertEquals(expected_gz_has_csv_records, gz_has_csv_records)
        self.assertEquals(expected_gz_has_jsonl_records, gz_has_jsonl_records)
        self.assertEquals(expected_zip_records, zip_records)