Пример #1
0
    def test_run(self):
        """
        Verify that for each stream you can get multiple pages of data
        when no fields are selected and only the automatic fields are replicated.

        PREREQUISITE
        For EACH stream add enough data that you surpass the limit of a single
        fetch of data.  For instance if you have a limit of 250 records ensure
        that 251 (or more) records have been posted for that stream.
        """
        print("\n\nRUNNING {}\n\n".format(self.name()))

        # Resetting tracked parent objects prior to test
        utils.reset_tracked_parent_objects()

        # ensure data exists for sync streams and set expectations
        _, existing_boards = utils.get_total_record_count_and_objects('boards')
        custom_fields_dict = {x: []
                              for x in self.expected_custom_fields()
                              }  # ids by stream
        custom_fields_by_board = {
            x.get('id'): copy.deepcopy(custom_fields_dict)
            for x in existing_boards
        }  # ids by stream

        # get existing custom fields for each board
        print("Getting objects on baord with static custom field set")
        for board_id, board_cfields in custom_fields_by_board.items():
            cfields = utils.get_custom_fields('boards', board_id)
            for field in self.expected_custom_fields():
                cfields_type_field = [f for f in cfields if f['type'] == field]
                if cfields_type_field:
                    board_cfields[field] += cfields_type_field

        # get expected cards with custom fields
        expected_records_cfields = list()
        board_id = utils.NEVER_DELETE_BOARD_ID
        all_cards_on_board = utils.get_objects('cards', parent_id=board_id)
        print("Setting custom fields expectations based on static data")
        for card in all_cards_on_board:
            card_with_cfields = utils.get_objects('cards',
                                                  obj_id=card.get('id'),
                                                  parent_id=board_id,
                                                  custom_fields=True)

            if card_with_cfields:
                expected_records_cfields += card_with_cfields

        # veryify at least 1 record exists for each custom field type or else our assertions are invalid
        fields_exist = {x: False for x in self.expected_custom_fields()}
        for record in expected_records_cfields:
            if all(v for _, v in fields_exist.items()):
                break
            value = record.get('value')
            if value:
                key = next(iter(value))
                if key in self.expected_custom_fields(
                ) and not fields_exist.get(key):
                    fields_exist[key] = True
                elif key == 'checked':
                    fields_exist['checkbox'] = True
                elif key == 'option':
                    fields_exist['list'] = True

        self.assertTrue(all(v for _, v in fields_exist.items()),
                        msg="Not all custom field types have data. Data must be restored manually on Trello account" +\
                        "\nCurrent data: {}".format(fields_exist))

        conn_id = connections.ensure_connection(self)

        # run in check mode
        check_job_name = runner.run_check_mode(self, conn_id)

        # verify check exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        found_catalogs = menagerie.get_catalogs(conn_id)
        self.assertGreater(
            len(found_catalogs),
            0,
            msg="unable to locate schemas for connection {}".format(conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))
        diff = self.expected_check_streams().symmetric_difference(
            found_catalog_names)
        self.assertEqual(
            len(diff),
            0,
            msg="discovered schemas do not match: {}".format(diff))
        print("discovered schemas are OK")

        # Select all streams and all fields
        self.select_all_streams_and_fields(conn_id,
                                           found_catalogs,
                                           select_all_fields=True)

        for cat in found_catalogs:
            catalog_entry = menagerie.get_annotated_schema(
                conn_id, cat['stream_id'])
            for k in self.expected_automatic_fields()[cat['stream_name']]:
                mdata = next(
                    (m for m in catalog_entry['metadata']
                     if len(m['breadcrumb']) == 2 and m['breadcrumb'][1] == k),
                    None)
                print("Validating inclusion on {}: {}".format(
                    cat['stream_name'], mdata))
                self.assertTrue(
                    mdata and mdata['metadata']['inclusion'] == 'automatic')

        catalogs = menagerie.get_catalogs(conn_id)

        #clear state
        menagerie.set_state(conn_id, {})

        # run sync
        sync_job_name = runner.run_sync_mode(self, conn_id)

        # Verify tap exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # read target output
        first_record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        replicated_row_count = reduce(lambda accum, c: accum + c,
                                      first_record_count_by_stream.values())
        synced_records = runner.get_records_from_target_output()

        # Verify target has records for all synced streams
        for stream, count in first_record_count_by_stream.items():
            assert stream in self.expected_sync_streams()
            self.assertGreater(
                count,
                0,
                msg="failed to replicate any data for: {}".format(stream))
        print("total replicated row count: {}".format(replicated_row_count))

        # Testing streams with custom fields
        for stream in self.testable_streams():
            with self.subTest(stream=stream):

                data = synced_records.get(stream)
                record_messages = [row['data'] for row in data['messages']]
                record_ids = [message.get('id') for message in record_messages]

                record_custom_fields = [
                    message.get('customFieldItems')
                    for message in record_messages
                    if message.get('customFieldItems', None)
                ]
                record_cfield_ids = []
                for record in record_custom_fields:
                    for cfield in record:
                        record_cfield_ids.append(cfield.get('id'))

                # Verify that we replicated the records with custom_fields
                for card in all_cards_on_board:
                    if card.get('id') in expected_records_cfields:
                        self.assertIn(
                            card.get('id'),
                            records_ids,
                            msg="Missing a record that has custom fields:\n{}".
                            format(card.get('id')))

                # Verify that we replicated the expected custom fields on those records
                for expected_cfield in expected_records_cfields:
                    self.assertIn(
                        expected_cfield.get('id'),
                        record_cfield_ids,
                        msg="Missing custom field from expected {} record id={}"
                        .format(stream, expected_cfield.get('id')))

                    # Verify the expected custom field attributes match the replicated data
                    for actual_cfields in record_custom_fields:
                        expected_cfield_replicated = expected_cfield in actual_cfields
                        if expected_cfield_replicated:
                            break
                    self.assertTrue(expected_cfield_replicated)

        # Reset the parent objects that we have been tracking
        utils.reset_tracked_parent_objects()
    def test_run(self):
        """
        Verify that for each stream you can get multiple pages of data
        when no fields are selected and only the automatic fields are replicated.

        PREREQUISITE
        For EACH stream add enough data that you surpass the limit of a single
        fetch of data.  For instance if you have a limit of 250 records ensure
        that 251 (or more) records have been posted for that stream.
        """
        print("\n\nRUNNING {}\n\n".format(self.name()))

        # Resetting tracked parent objects prior to test
        utils.reset_tracked_parent_objects()

        # ensure data exists for sync streams and set expectations
        expected_records = {x: []
                            for x in self.expected_sync_streams()
                            }  # ids by stream
        for stream in self.testable_streams():
            since = None
            if stream in self.expected_incremental_streams():
                since = dt.strptime(self.get_properties()['start_date'],
                                    self.START_DATE_FORMAT).strftime(
                                        self.TEST_TIME_FORMAT)
            _, existing_objects = utils.get_total_record_count_and_objects(
                stream, since=since)
            if existing_objects:
                logging.info("Data exists for stream: {}".format(stream))
                for obj in existing_objects:
                    expected_records[stream].append({
                        field: obj.get(field)
                        for field in self.expected_automatic_fields().get(
                            stream)
                    })
                continue

            logging.info("Data does not exist for stream: {}".format(stream))

            new_object = utils.create_object(stream)
            logging.info("Data generated for stream: {}".format(stream))
            expected_records[stream].append({
                field: new_object.get(field)
                for field in self.expected_automatic_fields().get(stream)
            })

        conn_id = connections.ensure_connection(self)

        #run in check mode
        check_job_name = runner.run_check_mode(self, conn_id)

        #verify check  exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        found_catalogs = menagerie.get_catalogs(conn_id)
        self.assertGreater(
            len(found_catalogs),
            0,
            msg="unable to locate schemas for connection {}".format(conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))
        diff = self.expected_check_streams().symmetric_difference(
            found_catalog_names)
        self.assertEqual(
            len(diff),
            0,
            msg="discovered schemas do not match: {}".format(diff))
        print("discovered schemas are OK")

        # Select all streams but only automtic fields
        self.select_all_streams_and_fields(conn_id,
                                           found_catalogs,
                                           select_all_fields=False)

        for cat in found_catalogs:
            catalog_entry = menagerie.get_annotated_schema(
                conn_id, cat['stream_id'])
            for k in self.expected_automatic_fields()[cat['stream_name']]:
                mdata = next(
                    (m for m in catalog_entry['metadata']
                     if len(m['breadcrumb']) == 2 and m['breadcrumb'][1] == k),
                    None)
                print("Validating inclusion on {}: {}".format(
                    cat['stream_name'], mdata))
                self.assertTrue(
                    mdata and mdata['metadata']['inclusion'] == 'automatic')

        catalogs = menagerie.get_catalogs(conn_id)

        #clear state
        menagerie.set_state(conn_id, {})

        # run sync
        sync_job_name = runner.run_sync_mode(self, conn_id)

        # Verify tap exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # read target output
        first_record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        replicated_row_count = reduce(lambda accum, c: accum + c,
                                      first_record_count_by_stream.values())
        synced_records = runner.get_records_from_target_output()

        # Verify target has records for all synced streams
        for stream, count in first_record_count_by_stream.items():
            assert stream in self.expected_sync_streams()
            self.assertGreater(
                count,
                0,
                msg="failed to replicate any data for: {}".format(stream))
        print("total replicated row count: {}".format(replicated_row_count))

        for stream in self.testable_streams():
            with self.subTest(stream=stream):
                data = synced_records.get(stream)
                record_messages_keys = [
                    set(row['data'].keys()) for row in data['messages']
                ]
                expected_keys = self.expected_automatic_fields().get(stream)

                # Verify that ONLY automatic fields are emitted
                for actual_keys in record_messages_keys:
                    self.assertEqual(
                        actual_keys.symmetric_difference(expected_keys),
                        set(),
                        msg="Expected automatic fields and nothing else.")

                actual_records = [row['data'] for row in data['messages']]

                # Verify the number of records match expectations
                # NOTE: actions seem to be getting updated by trello's backend resulting in an action from a previous
                #       test run gettting synced again, so we will be less strict for this stream
                if stream == 'actions':
                    self.assertLessEqual(len(expected_records.get(stream)),
                                         len(actual_records),
                                         msg="Number of actual records do match expectations. " +\
                                         "We probably have duplicate records.")
                else:
                    self.assertEqual(len(expected_records.get(stream)),
                                     len(actual_records),
                                     msg="Number of actual records do match expectations. " +\
                                     "We probably have duplicate records.")

                # verify by values, that we replicated the expected records
                for actual_record in actual_records:
                    if stream != 'actions':  # see NOTE above
                        self.assertTrue(
                            actual_record in expected_records.get(stream),
                            msg="Actual record missing from expectations")
                for expected_record in expected_records.get(stream):
                    self.assertTrue(expected_record in actual_records,
                                    msg="Expected record missing from target.")

        # CLEAN UP
        stream_to_delete = 'boards'
        boards_remaining = 5
        print("Deleting all but {} records for stream {}.".format(
            boards_remaining, stream_to_delete))
        board_count = len(expected_records.get(stream_to_delete, []))
        for obj_to_delete in expected_records.get(
                stream_to_delete, []):  # Delete all baords between syncs
            if board_count > boards_remaining:
                utils.delete_object(stream_to_delete, obj_to_delete.get('id'))
                board_count -= 1
            else:
                break

        # Reset the parent objects that we have been tracking
        utils.reset_tracked_parent_objects()
Пример #3
0
    def test_run(self):
        """
        Verify that for each stream you can get multiple pages of data
        when no fields are selected and only the automatic fields are replicated.

        PREREQUISITE
        For EACH stream add enough data that you surpass the limit of a single
        fetch of data.  For instance if you have a limit of 250 records ensure
        that 251 (or more) records have been posted for that stream.
        """
        print("\n\nRUNNING {}\n\n".format(self.name()))

        # Ensure tested streams have a record count which exceeds the API LIMIT
        expected_records = {x: [] for x in self.expected_sync_streams()} # ids by stream
        final_count = {x: 0 for x in self.expected_sync_streams()}
        for stream in self.testable_streams(): # just actions at the moment
            # Look for parent object with most number of stream records
            start_date = dt.strptime(self.get_properties().get('start_date'), self.START_DATE_FORMAT)
            since = start_date.strftime(self.TEST_TIME_FORMAT)
            parent_stream = utils.get_parent_stream(stream)
            record_count, parent_id = self.get_highest_record_count_by_parent_obj_id(parent_stream, stream, since)

            if record_count > 0: # If we do have data already add it to expectations
                logging.info("Data exists for stream: {}".format(stream))
                existing_objects = utils.get_objects(obj_type=stream, parent_id=parent_id, since=since)
                assert record_count == len(existing_objects), "TEST ISSUE | referencing wrong parent obj."
                for obj in existing_objects:
                    expected_records[stream].append(
                        {field: obj.get(field)
                         for field in self.expected_automatic_fields().get(stream)}
                    )

            if record_count <= self.API_LIMIT:
                logging.info("Not enough data to paginate : {} has {} records".format(stream, record_count))
                while record_count <= self.API_LIMIT:
                    new_object = utils.create_object(obj_type=stream, parent_id=parent_id)
                    record_count += 1
                    logging.info("Record Created: {} has {} records".format(stream, record_count))
                    expected_records[stream].append({field: new_object.get(field)
                                                     for field in self.expected_automatic_fields().get(stream)})
                final_count[stream] = record_count
                logging.info("FINAL RECORD COUNT: {} has {} records".format(stream, final_count[stream]))

                # Verify we did in fact generate enough records to exceed the API LIMIT
                # If we are failing here, it is most likely an issue with /tests/trello_utils.py
                self.assertGreater(final_count[stream], self.API_LIMIT,
                                   msg="Failed to create sufficient data prior to sync.")

        conn_id = connections.ensure_connection(self)

        #run in check mode
        check_job_name = runner.run_check_mode(self, conn_id)

        #verify check  exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        found_catalogs = menagerie.get_catalogs(conn_id)
        self.assertGreater(len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id))

        found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs))

        diff = self.expected_check_streams().symmetric_difference( found_catalog_names )
        self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff))
        print("discovered schemas are OK")

        #select all catalogs
        for cat in found_catalogs:
            catalog_entry = menagerie.get_annotated_schema(conn_id, cat['stream_id'])

            for k in self.expected_automatic_fields()[cat['stream_name']]:
                mdata = next((m for m in catalog_entry['metadata']
                              if len(m['breadcrumb']) == 2 and m['breadcrumb'][1] == k), None)
                print("Validating inclusion on {}: {}".format(cat['stream_name'], mdata))
                self.assertTrue(mdata and mdata['metadata']['inclusion'] == 'automatic')

            connections.select_catalog_and_fields_via_metadata(conn_id, cat, catalog_entry)

        #clear state
        menagerie.set_state(conn_id, {})

        # run sync
        sync_job_name = runner.run_sync_mode(self, conn_id)

        # Verify tap exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # read target output
        record_count_by_stream = runner.examine_target_output_file(self, conn_id,
                                                                         self.expected_sync_streams(),
                                                                         self.expected_pks())
        replicated_row_count =  reduce(lambda accum,c : accum + c, record_count_by_stream.values())
        synced_records = runner.get_records_from_target_output()

        for stream in self.testable_streams():
            with self.subTest(stream=stream):

                # Verify we are paginating for testable synced streams
                self.assertGreater(record_count_by_stream.get(stream, -1), self.API_LIMIT,
                                   msg="We didn't gaurantee pagination. The number of records should exceed the api limit.")

                data = synced_records.get(stream, [])
                record_messages_keys = [set(row['data'].keys()) for row in data['messages']]

                for actual_keys in record_messages_keys:

                    # Verify that the automatic fields are sent to the target for paginated streams
                    self.assertEqual(self.expected_automatic_fields().get(stream) - actual_keys,
                                     set(), msg="A paginated synced stream has a record that is missing automatic fields.")

                    # Verify we have more fields sent to the target than just automatic fields (this is set above)
                    # SKIP THIS ASSERTION IF ALL FIELDS ARE INTENTIONALLY AUTOMATIC FOR THIS STREAM
                    self.assertGreater(actual_keys, self.expected_automatic_fields().get(stream),
                                      msg="A paginated synced stream has a record that is missing non-automatic fields.")

        # Reset the parent objects that we have been tracking
        utils.reset_tracked_parent_objects()
Пример #4
0
    def test_run(self):
        print("\n\nRUNNING {}\n\n".format(self.name()))

        # ensure data exists for sync streams and set expectations
        expected_records_1 = {x: [] for x in self.expected_sync_streams()} # ids by stream
        for stream in self.expected_sync_streams().difference(self.untestable_streams()):
            if stream in self.expected_incremental_sync_streams():
                start_date = dt.strptime(self.get_properties().get('start_date'), self.START_DATE_FORMAT)
                since = start_date.strftime(self.TEST_TIME_FORMAT)
                _, existing_objects = utils.get_total_record_count_and_objects(stream, since=since)
            else:
                _, existing_objects = utils.get_total_record_count_and_objects(stream)

            if existing_objects:
                logging.info("Data exists for stream: {}".format(stream))
                for obj in existing_objects:  # add existing records to expectations
                    expected_records_1[stream].append(obj)
                continue
            # Create 1 record if none exist
            logging.info("Data does not exist for stream: {}".format(stream))
            new_object = utils.create_object(stream)
            logging.info("Data generated for stream: {}".format(stream))
            expected_records_1[stream].append(new_object)

        # Create comment actions
        start_date = dt.strptime(self.get_properties().get('start_date'), self.START_DATE_FORMAT)
        since = start_date.strftime(self.TEST_TIME_FORMAT)
        # count_before, before_records = utils.get_total_record_count_and_objects('actions', since=since)
        action_comments = []
        action_comments.append(utils.create_object('actions', action_type="comment"))
        action_comments.append(utils.create_object('actions', action_type="comment"))
        for action in action_comments:
            expected_records_1['actions'].append(action)
        # count_after, after_records = utils.get_total_record_count_and_objects('actions', since=since)


        # run in check mode
        conn_id = connections.ensure_connection(self)
        check_job_name = runner.run_check_mode(self, conn_id)

        #verify check  exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        found_catalogs = menagerie.get_catalogs(conn_id)
        self.assertGreater(len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id))

        found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs))

        diff = self.expected_check_streams().symmetric_difference( found_catalog_names )
        self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff))
        print("discovered schemas are OK")

        #select all catalogs
        for c in found_catalogs:
            catalog_entry = menagerie.get_annotated_schema(conn_id, c['stream_id'])

            for k in self.expected_automatic_fields()[c['stream_name']]:
                mdata = next((m for m in catalog_entry['metadata']
                              if len(m['breadcrumb']) == 2 and m['breadcrumb'][1] == k), None)
                print("Validating inclusion on {}: {}".format(c['stream_name'], mdata))
                self.assertTrue(mdata and mdata['metadata']['inclusion'] == 'automatic')

            connections.select_catalog_and_fields_via_metadata(conn_id, c, catalog_entry)
            
        #clear state
        menagerie.set_state(conn_id, {})

        sync_job_name = runner.run_sync_mode(self, conn_id)

        #verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # verify data was replicated
        record_count_by_stream_1 = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks()
        )
        replicated_row_count_1 =  reduce(lambda accum,c : accum + c, record_count_by_stream_1.values())
        self.assertGreater(replicated_row_count_1, 0, msg="failed to replicate any data: {}".format(record_count_by_stream_1))
        print("total replicated row count: {}".format(replicated_row_count_1))

        # get emitted with records
        synced_records_1 = runner.get_records_from_target_output()

        # Verify bookmarks were saved for all streams
        state_1 = menagerie.get_state(conn_id)
        for stream in self.expected_incremental_sync_streams():
            self.assertTrue(state_1.get('bookmarks', {}).get(stream, {}).get('window_start', {}))
        print("Bookmarks meet expectations")

        # Generate data between syncs for bookmarking streams
        print("Generating more data prior to 2nd sync")
        expected_records_2 = {x: [] for x in self.expected_sync_streams()}
        for stream in self.expected_full_table_sync_streams().difference(self.untestable_streams()):
            for _ in range(1):
                new_object = utils.create_object(stream)
                expected_records_2[stream].append({field: new_object.get(field)
                                                   for field in self.expected_automatic_fields().get(stream)})

        # Update a single comment action before second sync
        print("Updating existing data prior to 2nd sync")
        updated_records = {x: [] for x in self.expected_sync_streams()}
        action_id_to_update = random.choice(action_comments).get('id')
        updated_action = utils.update_object_action(obj_id=action_id_to_update)
        updated_records['actions'].append(updated_action)

        # Get new actions from data manipulation between syncs
        print("Acquriing in-test actions prior to 2nd sync")
        for stream in self.expected_incremental_sync_streams().difference(self.untestable_streams()):
            state = dt.strptime(state_1.get('bookmarks').get(stream).get('window_start'), self.TEST_TIME_FORMAT)
            since = (state - timedelta(days=self.LOOKBACK_WINDOW)).strftime(self.TEST_TIME_FORMAT)
            # start_date = dt.strptime(self.get_properties().get('start_date'), self.START_DATE_FORMAT)
            # since = start_date.strftime(self.TEST_TIME_FORMAT)
            _, objects = utils.get_total_record_count_and_objects(stream, since=since)
            for obj in objects:
                expected_records_2[stream].append({field: obj.get(field)
                                                   for field in self.expected_automatic_fields().get(stream)})

        # Run another sync
        print("Running 2nd sync job")
        sync_job_name_2 = runner.run_sync_mode(self, conn_id)

        #verify tap and target exit codes
        exit_status_2 = menagerie.get_exit_status(conn_id, sync_job_name_2)
        menagerie.verify_sync_exit_status(self, exit_status_2, sync_job_name_2)

        # verify data was replicated
        record_count_by_stream_2 = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks()
        )
        replicated_row_count_2 =  reduce(lambda accum,c : accum + c, record_count_by_stream_2.values())
        self.assertGreater(replicated_row_count_2, 0,
                           msg="failed to replicate any data: {}".format(record_count_by_stream_2))
        print("total replicated row count: {}".format(replicated_row_count_2))

        # get emitted with records
        synced_records_2 = runner.get_records_from_target_output()

        # Verify bookmarks were saved as expected inc streams
        state_2 = menagerie.get_state(conn_id)
        for stream in self.expected_incremental_sync_streams():
            self.assertTrue(state_2.get('bookmarks', {}).get(stream, {}).get('window_start', {}))
        print("Bookmarks meet expectations")

        # TESTING FULL TABLE STREAMS
        for stream in self.expected_full_table_sync_streams().difference(self.untestable_streams()):
            with self.subTest(stream=stream):
                record_count_1 = record_count_by_stream_1.get(stream, 0)
                record_count_2 = record_count_by_stream_2.get(stream, 0)

                # Assert we have data for both syncs for full table streams
                self.assertGreater(record_count_1, 0)
                self.assertGreater(record_count_2, 0)

                # Assert that we are capturing the expected number of records for full table streams
                self.assertGreater(record_count_2, record_count_1,
                                   msg="Full table streams should have more data in second sync.")
                self.assertEqual((record_count_2 - record_count_1),
                                 len(expected_records_2.get(stream, [])),
                                 msg="The differnce in record counts between syncs should " +\
                                 "equal the number of records we created between syncs.\n" +\
                                 "This is not the case for {}".format(stream))

                # Test that we are capturing the expected records for full table streams
                expected_ids_1 = set(record.get('id') for record in expected_records_1.get(stream))
                data_1 = synced_records_1.get(stream, [])
                record_messages_1 = [row.get('data') for row in data_1['messages']]
                record_ids_1 = set(row.get('data').get('id') for row in data_1['messages'])
                expected_ids_2 = set(record.get('id') for record in expected_records_2.get(stream))
                data_2 = synced_records_2.get(stream, [])
                record_messages_2 = [row.get('data') for row in data_2['messages']]
                record_ids_2 = set(row.get('data').get('id') for row in data_2['messages'])

                # verify all expected records are replicated for both syncs
                self.assertEqual(expected_ids_1, record_ids_1,
                                 msg="Data discrepancy. Expected records do not match actual in sync 1.")
                self.assertTrue(expected_ids_1.issubset(record_ids_2),
                                 msg="Data discrepancy. Expected records do not match actual in sync 2.")

                for expected_record in expected_records_1.get(stream):
                    actual_record = [message for message in record_messages_1
                                     if message.get('id') == expected_record.get('id')].pop()
                    self.assertEqual(set(expected_record.keys()), set(actual_record.keys()),
                                     msg="Field mismatch between expectations and replicated records in sync 1.")

                # verify the 2nd sync gets records created after the 1st sync
                self.assertEqual(set(record_ids_2).difference(set(record_ids_1)),
                                 expected_ids_2,
                                 msg="We did not get the new record(s)")

        print("Full table streams tested.")

        # TESTING INCREMENTAL STREAMS
        for stream in self.expected_incremental_sync_streams().difference(self.untestable_streams()):
            with self.subTest(stream=stream):
                record_count_1 = record_count_by_stream_1.get(stream, 0)
                record_count_2 = record_count_by_stream_2.get(stream, 0)

                # Assert we have data for both syncs for inc streams
                self.assertGreater(record_count_1, 0)
                self.assertGreater(record_count_2, 0)

                # Assert that we are capturing the expected number of records for inc streams
                self.assertEqual(record_count_1, len(expected_records_1.get(stream, [])),
                                 msg="Stream {} replicated an unexpedted number records on 1st sync.".format(stream))
                self.assertEqual(record_count_2, len(expected_records_2.get(stream, [])),
                                 msg="Stream {} replicated an unexpedted number records on 2nd sync.".format(stream))

                # Assert that we are capturing the expected records for inc streams
                data_1 = synced_records_1.get(stream, [])
                record_messages_1 = [row.get('data').get('id') for row in data_1['messages']]
                data_2 = synced_records_2.get(stream, [])
                record_messages_2 = [row.get('data').get('id') for row in data_2['messages']]
                for record in expected_records_1.get(stream):
                    self.assertTrue(record.get('id') in record_messages_1,
                                    msg="Missing an expected record from sync 1.")
                for record in expected_records_2.get(stream):
                    self.assertTrue(record.get('id') in record_messages_2,
                                    msg="Missing an expected record from sync 2.")

                record_data_1 = [row.get('data') for row in data_1['messages']]
                record_data_2 = [row.get('data') for row in data_2['messages']]

                # Testing action comments (the only action type that can be updated)
                for action in action_comments:

                    # Get text value for action comment from sync 1
                    original_action_text = ""
                    for record in record_data_1:
                        if record.get('id') == action.get('id'):
                            original_action_text = record.get('data').get('text')
                    assert original_action_text, "Record  {} is missing from 1st sync.".format(action.get('id'))
                    # Get text value for action comment from sync 2
                    for record in record_data_2:
                        if record.get('id') == action.get('id'):
                            current_action_text = record.get('data').get('text')
                    assert current_action_text, "Record  {} is missing from 2nd sync.".format(action.get('id'))

                    # Verify the action comment text matches expectations
                    if action.get('id')== action_id_to_update:
                        self.assertNotEqual(original_action_text, current_action_text, msg="Update was not captured.")
                        self.assertIn("UPDATE", current_action_text, msg="Update was captured but not as expected.")
                    else:
                        self.assertEqual(original_action_text, current_action_text, msg="Text does not match expected.")

        print("Incremental streams tested.")

        # CLEANING UP
        stream_to_delete = 'boards'
        boards_remaining = 5
        print("Deleting all but {} records for stream {}.".format(boards_remaining, stream_to_delete))
        board_count = len(expected_records_1.get(stream_to_delete, [])) + len(expected_records_2.get(stream_to_delete, []))
        for obj_to_delete in expected_records_2.get(stream_to_delete, []): # Delete all baords between syncs
            if board_count > boards_remaining:
                utils.delete_object(stream_to_delete, obj_to_delete.get('id'))
                board_count -= 1
            else:
                break
        for obj_to_delete in expected_records_1.get(stream_to_delete, []): # Delete all baords between syncs
            if board_count > boards_remaining:
                utils.delete_object(stream_to_delete, obj_to_delete.get('id'))
                board_count -= 1
            else:
                break
        # Reset the parent objects that we have been tracking
        utils.reset_tracked_parent_objects()
    def test_run(self):
        print("\n\nRUNNING {}\n\n".format(self.name()))

        # Initialize start date prior to first sync
        self.START_DATE = self.get_properties().get('start_date')

        # ensure data exists for sync streams and set expectations
        records_to_create = 3
        expected_records = {x: []
                            for x in self.expected_sync_streams()
                            }  # ids by stream
        for stream in self.expected_sync_streams().difference(
                self.untestable_streams()):
            if stream in self.expected_incremental_sync_streams():
                since = dt.strptime(self.START_DATE,
                                    self.START_DATE_FORMAT).strftime(
                                        self.TEST_TIME_FORMAT)
                _, existing_objects = utils.get_total_record_count_and_objects(
                    stream, since=since)
            else:
                _, existing_objects = utils.get_total_record_count_and_objects(
                    stream)

            if existing_objects:
                logging.info("Data exists for stream: {}".format(stream))
                for obj in existing_objects:  # add existing records to expectations
                    expected_records[stream].append({
                        field: obj.get(field)
                        for field in self.expected_automatic_fields().get(
                            stream)
                    })
            else:
                logging.info(
                    "Data does not exist for stream: {}".format(stream))
            while len(expected_records.get(stream)) < records_to_create:
                # Create more records if necessary
                new_object = utils.create_object(stream)
                logging.info("Data generated for stream: {}".format(stream))
                expected_records[stream].append({
                    field: new_object.get(field)
                    for field in self.expected_automatic_fields().get(stream)
                })

        # run in check mode
        conn_id = connections.ensure_connection(self)
        check_job_name = runner.run_check_mode(self, conn_id)

        #verify check  exit codes
        exit_status = menagerie.get_exit_status(conn_id, check_job_name)
        menagerie.verify_check_exit_status(self, exit_status, check_job_name)

        found_catalogs = menagerie.get_catalogs(conn_id)
        self.assertGreater(
            len(found_catalogs),
            0,
            msg="unable to locate schemas for connection {}".format(conn_id))

        found_catalog_names = set(
            map(lambda c: c['tap_stream_id'], found_catalogs))

        diff = self.expected_check_streams().symmetric_difference(
            found_catalog_names)
        self.assertEqual(
            len(diff),
            0,
            msg="discovered schemas do not match: {}".format(diff))
        print("discovered schemas are OK")

        #select all catalogs
        for c in found_catalogs:
            catalog_entry = menagerie.get_annotated_schema(
                conn_id, c['stream_id'])

            for k in self.expected_automatic_fields()[c['stream_name']]:
                mdata = next(
                    (m for m in catalog_entry['metadata']
                     if len(m['breadcrumb']) == 2 and m['breadcrumb'][1] == k),
                    None)
                print("Validating inclusion on {}: {}".format(
                    c['stream_name'], mdata))
                self.assertTrue(
                    mdata and mdata['metadata']['inclusion'] == 'automatic')

            connections.select_catalog_and_fields_via_metadata(
                conn_id, c, catalog_entry)

        #clear state
        menagerie.set_state(conn_id, {})

        # Run sync
        sync_job_name = runner.run_sync_mode(self, conn_id)

        #verify tap and target exit codes
        exit_status = menagerie.get_exit_status(conn_id, sync_job_name)
        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)

        # verify data was replicated
        record_count_by_stream = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        replicated_row_count = reduce(lambda accum, c: accum + c,
                                      record_count_by_stream.values())
        self.assertGreater(replicated_row_count,
                           0,
                           msg="failed to replicate any data: {}".format(
                               record_count_by_stream))
        print("total replicated row count: {}".format(replicated_row_count))
        synced_records = runner.get_records_from_target_output()

        # Verify bookmarks were saved for all streams
        state = menagerie.get_state(conn_id)
        for stream in self.expected_incremental_sync_streams():
            self.assertTrue(
                state.get('bookmarks', {}).get(stream,
                                               {}).get('window_start', {}))
        print("Bookmarks meet expectations")

        # Grab the empty formatted states to test
        states_to_test = [
            self.get_states_formatted(i)
            for i in range(len(self.ACTIONS_STATES))
        ]

        ##########################################################################
        ### Testing standard sync state_0
        ##########################################################################
        version_0 = menagerie.get_state_version(conn_id)

        # Set window_start to start_date
        window_start_0 = dt.strptime(self.START_DATE, self.START_DATE_FORMAT)
        states_to_test[0]['bookmarks']['actions'][
            'window_start'] = window_start_0.strftime(self.TEST_TIME_FORMAT)

        print("Interjecting test state:\n{}".format(states_to_test[0]))
        menagerie.set_state(conn_id, states_to_test[0], version_0)

        # Run another sync
        print("Running sync job 0")
        sync_job_name_0 = runner.run_sync_mode(self, conn_id)

        #verify tap and target exit codes
        exit_status_0 = menagerie.get_exit_status(conn_id, sync_job_name_0)
        menagerie.verify_sync_exit_status(self, exit_status_0, sync_job_name_0)

        # verify data was replicated
        record_count_by_stream_0 = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        replicated_row_count_0 = reduce(lambda accum, c: accum + c,
                                        record_count_by_stream_0.values())
        self.assertGreater(replicated_row_count_0,
                           0,
                           msg="failed to replicate any data: {}".format(
                               record_count_by_stream_0))
        print("total replicated row count: {}".format(replicated_row_count_0))
        synced_records_0 = runner.get_records_from_target_output()

        # Test state_0
        print("Testing State 0")
        state_0 = menagerie.get_state(conn_id)
        for stream in self.expected_incremental_sync_streams():
            # Verify bookmarks were saved as expected inc streams
            self.assertTrue(
                state_0.get('bookmarks', {}).get(stream,
                                                 {}).get('window_start', {}))
            print("Bookmarks meet expectations")
        for stream in self.expected_sync_streams().difference(
                self.untestable_streams()):
            data = synced_records.get(stream)
            record_messages = [set(row['data']) for row in data['messages']]

            data_0 = synced_records_0.get(stream)
            record_messages_0 = [
                set(row['data']) for row in data_0['messages']
            ]

            # Verify we got the same number of records as the first sync
            self.assertEqual(
                record_count_by_stream_0.get(stream),
                record_count_by_stream.get(stream),
                msg="Syncs should replicate the samee number of records")
            self.assertEqual(
                record_messages_0,
                record_messages,
                msg="Syncs should replicate the samee number of records")

            # Verify we got the exact same records as the first sync
            for record_message in record_messages:
                self.assertTrue(record_message in record_messages_0,
                                msg="Expected {} to be in this sync.".format(
                                    record_message))

        ##########################################################################
        ### Testing interrupted sync state_1 with date-windowing
        ##########################################################################
        version_1 = menagerie.get_state_version(conn_id)

        # Set parent_id to id of second-to-last baord the tap will replicate
        sorted_parent_objs = self.get_tap_sorted_stream()
        penultimate_created_parent_id, _ = sorted_parent_objs[-2]
        last_created_parent_id, _ = sorted_parent_objs[-1]
        states_to_test[1]['bookmarks']['actions'][
            'parent_id'] = penultimate_created_parent_id

        # Set window_end based off current time
        window_end_1 = dt.utcnow().strftime(self.TEST_TIME_FORMAT)
        # window_end_1 = state['bookmarks']['actions']['window_start']
        states_to_test[1]['bookmarks']['actions']['window_end'] = window_end_1

        # Set sub_window_end to today
        sub_window_end_1 = dt.strptime(
            self.START_DATE, self.START_DATE_FORMAT) + timedelta(days=2)
        states_to_test[1]['bookmarks']['actions'][
            'sub_window_end'] = sub_window_end_1.strftime(
                self.TEST_TIME_FORMAT)

        # Set window_start to start_date
        window_start_1 = dt.strptime(self.START_DATE, self.START_DATE_FORMAT)
        states_to_test[1]['bookmarks']['actions'][
            'window_start'] = window_start_1.strftime(self.TEST_TIME_FORMAT)

        print("Interjecting test state:\n{}".format(states_to_test[1]))
        menagerie.set_state(conn_id, states_to_test[1], version_1)

        # Run another sync (state_1)
        print("Running sync job 1")
        sync_job_name_1 = runner.run_sync_mode(self, conn_id)

        #verify tap and target exit codes
        exit_status_1 = menagerie.get_exit_status(conn_id, sync_job_name_1)
        menagerie.verify_sync_exit_status(self, exit_status_1, sync_job_name_1)

        # verify data was replicated
        record_count_by_stream_1 = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        replicated_row_count_1 = reduce(lambda accum, c: accum + c,
                                        record_count_by_stream_1.values())
        self.assertGreater(replicated_row_count_1,
                           0,
                           msg="failed to replicate any data: {}".format(
                               record_count_by_stream_1))
        print("total replicated row count: {}".format(replicated_row_count_1))

        synced_records_1 = runner.get_records_from_target_output()

        # Test state_1
        print("Testing State 1")
        state_1 = menagerie.get_state(conn_id)
        for stream in self.expected_incremental_sync_streams():
            # Verify bookmarks were saved as expected inc streams
            self.assertTrue(
                state_1.get('bookmarks', {}).get(stream,
                                                 {}).get('window_start', {}))
            print("Bookmarks for {} meet expectations".format(stream))

            # Verify the original sync catches more data since current test state bookmarks on the second most recent board
            self.assertGreater(
                record_count_by_stream.get(stream, 0),
                record_count_by_stream_1.get(stream, 0),
                msg="Expected to have more records for {}".format(stream))

            # Verify sync 1 only replicates data from the bookmarked parent object (the most recently creted board)
            records_last_board = utils.get_objects(
                stream, parent_id=last_created_parent_id, since=window_start_1)
            record_count_last_board = len(records_last_board)

            records_penult_window_start = utils.get_objects(
                stream,
                parent_id=penultimate_created_parent_id,
                since=window_start_1)
            record_count_penult_window_start = len(records_penult_window_start)

            records_penult_sub_window = utils.get_objects(
                stream,
                parent_id=penultimate_created_parent_id,
                since=sub_window_end_1)
            record_count_penult_sub_window = len(records_penult_sub_window)

            record_count_penult_board = record_count_penult_window_start - record_count_penult_sub_window
            for record in records_penult_sub_window:  # records_penult_window_start - records_penult_sub_window
                for rec in records_penult_window_start:
                    if record.get('id') == rec.get('id'):
                        records_penult_window_start.remove(rec)
                        break

            assert record_count_penult_board == len(
                records_penult_window_start)
            expected_record_count_1 = record_count_penult_board + record_count_last_board
            # expected_records_1 = records_last_board + records_penult_window_start SEE FOR LOOPS

            synced_actions = synced_records_1.get(stream)
            actual_data = [
                row.get('data').get('id') for row in synced_actions['messages']
            ]

            for record in records_last_board:
                if record.get('id') in actual_data:
                    continue
                print("MISSING RECORD {}".format(record))

            for record in records_penult_window_start:
                if record.get('id') in actual_data:
                    continue
                print("MISSING RECORD {}".format(record))

            self.assertEqual(
                expected_record_count_1,
                record_count_by_stream_1.get(stream, 0),
                msg=
                "Sync 1 should only replicate data from the most recently creted board."
            )

        ##########################################################################
        ### Testing interrupted sync state_2 without date-windowing
        ##########################################################################
        version_2 = menagerie.get_state_version(conn_id)

        # Set parent_id to id of last baord the tap will replicate
        # Set window_end based off current time
        window_end_2 = dt.utcnow().strftime(self.TEST_TIME_FORMAT)
        # Set window_start to today at midnight
        window_start_2 = dt.strptime(
            self.START_DATE, self.START_DATE_FORMAT) + timedelta(days=2)
        states_to_test[2]['bookmarks']['actions'] = {}
        for stream in self.expected_full_table_sync_streams().difference(
            {'boards'}):
            states_to_test[2]['bookmarks'][stream] = {
                'window_start': window_start_2.strftime(self.TEST_TIME_FORMAT),
                'window_end': window_end_2,
                'parent_id': last_created_parent_id
            }

        print("Interjecting test state:\n{}".format(states_to_test[2]))
        menagerie.set_state(conn_id, states_to_test[2], version_2)

        # Run another sync
        print("Running sync job 2")
        sync_job_name_2 = runner.run_sync_mode(self, conn_id)

        #verify tap and target exit codes
        exit_status_2 = menagerie.get_exit_status(conn_id, sync_job_name_2)
        menagerie.verify_sync_exit_status(self, exit_status_2, sync_job_name_2)

        # verify data was replicated
        record_count_by_stream_2 = runner.examine_target_output_file(
            self, conn_id, self.expected_sync_streams(), self.expected_pks())
        replicated_row_count_2 = reduce(lambda accum, c: accum + c,
                                        record_count_by_stream_2.values())
        self.assertGreater(replicated_row_count_2,
                           0,
                           msg="failed to replicate any data: {}".format(
                               record_count_by_stream_2))
        print("total replicated row count: {}".format(replicated_row_count_2))
        synced_records_2 = runner.get_records_from_target_output()

        # Test state_2
        print("Testing State 2")
        state_2 = menagerie.get_state(conn_id)
        for stream in self.expected_full_table_sync_streams().difference(
                self.untestable_streams()):
            # Verify bookmarks were saved as expected inc streams
            self.assertTrue(
                state_2.get('bookmarks', {}).get(stream,
                                                 {}).get('window_start', {}),
                msg="{} should have a bookmark value".format(stream))
            print("Bookmarks meet expectations")

            # Verify the smaller window replicates less data
            self.assertLessEqual(
                record_count_by_stream_2.get(stream, 0),
                record_count_by_stream.get(stream, 0),
                msg="Expected to have more records for {}".format(stream))

            # Verify the actions from today are caught in this sync
            expected_record_count_2 = len(
                utils.get_objects(stream, parent_id=last_created_parent_id))
            self.assertEqual(
                expected_record_count_2,
                record_count_by_stream_2.get(stream, 0),
                msg=
                "Should have less than or equal number of records based on whether we lookback."
            )

        ##########################################################################
        ### CLEAN UP
        ##########################################################################
        stream_to_delete = 'boards'
        boards_remaining = 5
        print("Deleting all but {} records for stream {}.".format(
            boards_remaining, stream_to_delete))
        board_count = len(expected_records.get(stream_to_delete, []))
        for obj_to_delete in expected_records.get(
                stream_to_delete, []):  # Delete all baords between syncs
            if board_count > boards_remaining:
                utils.delete_object(stream_to_delete, obj_to_delete.get('id'))
                board_count -= 1
            else:
                break

        # Reset the parent objects that we have been tracking
        utils.reset_tracked_parent_objects()