Python get_catalogs示例

编程语言: Python

命名空间/包名称: utils

方法/功能: get_catalogs

hotexamples.com的示例: 4

Python get_catalogs - 已找到4个示例。这些是从开源项目中提取的最受好评的utils.get_catalogs现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

    def do_test(self, conn_id):
        """Test we get a lot of data back based on the start date configured in base"""

        # Select all streams and all fields within streams
        found_catalogs = menagerie.get_catalogs(conn_id)
        incremental_streams = {
            key
            for key, value in self.expected_replication_method().items()
            if value == self.INCREMENTAL
        }

        # IF THERE ARE STREAMS THAT SHOULD NOT BE TESTED
        # REPLACE THE EMPTY SET BELOW WITH THOSE STREAMS
        untested_streams = self.child_streams().union(
            {'disputes', 'events', 'transfers', 'payout_transactions'})
        our_catalogs = get_catalogs(
            conn_id, incremental_streams.difference(untested_streams))

        self.select_all_streams_and_fields(conn_id,
                                           our_catalogs,
                                           select_all_fields=True)

        # Create a record for each stream under test prior to the first sync
        new_objects = {
            stream: create_object(stream)
            for stream in incremental_streams.difference(untested_streams)
        }

        # Run a sync job using orchestrator
        first_sync_record_count = self.run_sync(conn_id)
        first_total_records = reduce(lambda a, b: a + b,
                                     first_sync_record_count.values())

        # Count actual rows synced
        first_sync_records = runner.get_records_from_target_output()

        # set the start date for a new connection based off bookmarks largest value
        first_max_bookmarks = self.max_bookmarks_by_stream(first_sync_records)

        bookmark_list = [
            next(iter(book.values()))
            for stream, book in first_max_bookmarks.items()
        ]
        bookmark_dates = []
        for bookmark in bookmark_list:
            try:
                bookmark_dates.append(parse(bookmark))
            except (ValueError, OverflowError, TypeError):
                pass

        if not bookmark_dates:
            # THERE WERE NO BOOKMARKS THAT ARE DATES.
            # REMOVE CODE TO FIND A START DATE AND ENTER ONE MANUALLY
            raise ValueError

        # largest_bookmark = reduce(lambda a, b: a if a > b else b, bookmark_dates)
        # self.start_date = self.local_to_utc(largest_bookmark).strftime(self.START_DATE_FORMAT)

        self.start_date = dt.strftime(dt.today() - timedelta(days=1),
                                      self.START_DATE_FORMAT)

        # create a new connection with the new start_date
        conn_id = self.create_connection(original_properties=False)

        # Select all streams and all fields within streams
        found_catalogs = menagerie.get_catalogs(conn_id)
        our_catalogs = [
            catalog for catalog in found_catalogs
            if catalog.get('tap_stream_id') in incremental_streams.difference(
                untested_streams)
        ]
        self.select_all_streams_and_fields(conn_id,
                                           our_catalogs,
                                           select_all_fields=True)

        # TODO remove the updates, this is unnecessary. Verify with Harvest
        # Update a record for each stream under test prior to the 2nd sync
        first_sync_created, _ = self.split_records_into_created_and_updated(
            first_sync_records)
        updated = {}  # holds id for updated objects in each stream
        for stream in new_objects:
            # There needs to be some test data for each stream, otherwise this will break
            record = first_sync_created[stream]["messages"][0]["data"]
            update_object(stream, record["id"])
            updated[stream] = record["id"]

        # Run a sync job using orchestrator
        second_sync_record_count = self.run_sync(conn_id)

        # tap-stripe uses events for updates, so these need filtered to validate bookmark
        second_sync_records = runner.get_records_from_target_output()
        second_sync_created, second_sync_updated = self.split_records_into_created_and_updated(
            second_sync_records)
        second_total_records = reduce(lambda a, b: a + b,
                                      second_sync_record_count.values(), 0)

        # Only examine bookmarks for "created" objects, not updates
        second_min_bookmarks = self.min_bookmarks_by_stream(
            second_sync_created)

        # verify that at least one record synced and less records synced than the 1st connection
        self.assertGreater(second_total_records, 0)
        self.assertLess(first_total_records, second_total_records)

        # validate that all newly created records are greater than the start_date
        for stream in incremental_streams.difference(untested_streams):
            with self.subTest(stream=stream):

                # verify that each stream has less records in the first sync than the second
                self.assertGreater(
                    second_sync_record_count.get(stream, 0),
                    first_sync_record_count.get(stream, 0),
                    msg="first had more records, start_date usage not verified"
                )

                # verify all data from 2nd sync >= start_date
                target_mark = second_min_bookmarks.get(stream, {"mark": None})
                target_value = next(iter(
                    target_mark.values()))  # there should be only one

                if target_value:

                    # it's okay if there isn't target data for a stream
                    try:
                        target_value = self.local_to_utc(parse(target_value))
                        expected_value = self.local_to_utc(
                            parse(self.start_date))
                        # verify that the minimum bookmark sent to the target for the second sync
                        # is greater than or equal to the start date
                        self.assertGreaterEqual(target_value, expected_value)

                    except (OverflowError, ValueError, TypeError):
                        print("bookmarks cannot be converted to dates, "
                              "can't test start_date for {}".format(stream))

                if stream in updated:
                    delete_object(stream, updated[stream])

示例#2

显示文件

    def do_test(self, conn_id):
        """
        Verify that the sync only sent records to the target for selected streams
        Update metadata[test] with a random number for each stream with event updates
        Verify that the second sync includes at least one update for each stream
        Verify that the second sync includes less records than the first sync
        Verify that the updated metadata was picked up on the second sync

        PREREQUISITE
        For EACH stream that gets updates through events stream, there's at least 1 row
            of data
        """
        event_update_streams = {
            # "balance_transactions"  # Cannot be directly updated
            "charges",
            "coupons",
            "customers",
            # "disputes",  # Cannot create directly with api
            "invoice_items",
            # "invoice_line_items",  # Can't be updated via api
            "invoices",
            # "payout_transactions",  # See bug in create_test
            "payouts",
            "plans",
            "products",
            # "subscription_items", # BUG https://stitchdata.atlassian.net/browse/SUP-1214
            "subscriptions",
            # "transfers",  # Cannot be updated directly via api
        }

        our_catalogs = get_catalogs(conn_id, event_update_streams)

        self.select_all_streams_and_fields(conn_id,
                                           our_catalogs,
                                           select_all_fields=True)

        # Ensure each stream under test has data to start
        new_objects = {
            stream: create_object(stream)
            for stream in event_update_streams
        }

        # Some streams will be updated implicitly
        streams_to_update = event_update_streams.difference({
            "invoice_line_items",
            "subscription_items",
        })

        # Run a sync job using orchestrator
        first_sync_record_count = self.run_sync(conn_id)

        # verify that the sync only sent records to the target for selected streams (catalogs)
        self.assertEqual(set(first_sync_record_count.keys()),
                         event_update_streams)

        # Get the set of records from a first sync
        first_sync_records = runner.get_records_from_target_output()

        first_sync_created, _ = self.split_records_into_created_and_updated(
            first_sync_records)

        updated = {}  # holds id for updated objects in each stream
        for stream in streams_to_update:

            # There needs to be some test data for each stream, otherwise this will break
            self.assertGreater(len(first_sync_created[stream]["messages"]),
                               0,
                               msg='We did not get any new records from '
                               'the first sync for {}'.format(stream))
            record = first_sync_created[stream]["messages"][0]["data"]

            # We need to make sure the data actually changes, otherwise no event update
            # will get created
            update_object(stream, record["id"])
            updated[stream] = record["id"]

        # Run a second sync job using orchestrator
        second_sync_record_count = self.run_sync(conn_id)

        # Get the set of records from a second sync
        second_sync_records = runner.get_records_from_target_output()

        _, second_sync_updated = self.split_records_into_created_and_updated(
            second_sync_records)

        # # THIS MAKES AN ASSUMPTION THAT CHILD STREAMS DO NOT NEED TESTING.
        # # ADJUST IF NECESSARY
        for stream in event_update_streams.difference(self.child_streams()):
            with self.subTest(stream=stream):
                # verify that there is more than 1 record of data - setup necessary
                self.assertGreater(
                    first_sync_record_count.get(stream, 0),
                    1,
                    msg="Data isn't set up to be able to test event updates",
                )

                # verify that you get at least one updated record on the second sync
                self.assertGreaterEqual(
                    len(
                        second_sync_updated.get(stream,
                                                {}).get("messages", [])),
                    1,
                    msg="second syc didn't have updates",
                )

                # verify that you get less data the 2nd time around since only updates
                # should be picked up
                self.assertLess(
                    second_sync_record_count.get(stream, 0),
                    first_sync_record_count.get(stream, 0),
                    msg="second syc had the same or more records",
                )

                # verify all the updated records in the 2nd sync are different from
                # the first run
                first_data = next(
                    record["data"] for record in first_sync_created.get(
                        stream, {}).get("messages", [])
                    if record.get("data", {}).get("id") == updated[stream])

                second_data = next(
                    record["data"] for record in second_sync_updated.get(
                        stream, {}).get("messages", [])
                    if record.get("data", {}).get("id") == updated[stream])

                # verify the updated timestamp is greater in the second sync
                self.assertGreater(
                    second_data["updated"],
                    first_data["updated"],
                    "updated timestamp for second sync is not greater than first sync",
                )

                # verify the metadata[test] value actually changed
                self.assertNotEqual(
                    second_data["metadata"].get("test_value", 0),
                    first_data["metadata"].get("test_value", 0),
                    "the test metadata should be different",
                )

                if stream in new_objects:
                    delete_object(stream, new_objects[stream]["id"])

示例#3

显示文件

    def test_run(self):
        """
        Verify that for each stream you can get multiple pages of data
        and that when all fields are selected more than the automatic fields are replicated.

        PREREQUISITE
        For EACH stream add enough data that you surpass the limit of a single
        fetch of data.  For instance if you have a limit of 250 records ensure
        that 251 (or more) records have been posted for that stream.
        """
        conn_id = connections.ensure_connection(self)

        incremental_streams = {key for key, value in self.expected_replication_method().items()
                               if value == self.INCREMENTAL}
        untested_streams = self.child_streams().union({
            'balance_transactions',
            # 'charges',
            # 'coupons',
            # 'customers',
            'disputes',
            # 'invoice_items',
            'invoice_line_items',
            # 'invoices',
            'payout_transactions',
            # 'payouts',
            # 'plans',
            # 'products',
            'subscription_items',
            # 'subscriptions',
            'transfers',
        })
        tested_streams = incremental_streams.difference(untested_streams)
        
        # Select all streams and all fields within streams
        found_catalogs = self.run_and_verify_check_mode(conn_id)
        our_catalogs = get_catalogs(conn_id, tested_streams)
        self.select_all_streams_and_fields(conn_id, our_catalogs, select_all_fields=True)

        # Ensure tested streams have a record count which exceeds the API LIMIT
        logging.info("Checking record counts for tested streams...")
        streams_to_create = {}
        for stream in tested_streams:
            records = list_all_object(stream)
            record_count = len(records)

            streams_to_create[stream] = record_count
            logging.info("   Stream {} has {} records created today".format(stream, record_count))

        logging.info("Creating records for tested streams...")
        new_objects = {stream: [] for stream in streams_to_create}
        for stream in streams_to_create:
            if stream != "events" and streams_to_create[stream] <= self.API_LIMIT:
                while streams_to_create[stream] <= self.API_LIMIT:
                    logging.info("Creating a record for {} | {} records created today ".format(stream,
                                                                                        streams_to_create[stream]))
                    new_objects[stream].append(create_object(stream))
                    streams_to_create[stream] += 1
                records = list_all_object(stream)
                self.assertEqual(100, len(records))
                logging.info("   Stream {} has at least {} records created today".format(stream, len(records) + 1))

        # Run a sync job using orchestrator
        record_count_by_stream = self.run_and_verify_sync(conn_id)

        actual_fields_by_stream = runner.examine_target_output_for_fields()

        for stream in incremental_streams.difference(untested_streams):
            with self.subTest(stream=stream):

                # verify that we can paginate with all fields selected
                self.assertGreater(
                    record_count_by_stream.get(stream, -1),
                    self.expected_metadata().get(stream, {}).get(self.API_LIMIT, 0),
                    msg="The number of records is not over the stream max limit")

                # verify that the automatic fields are sent to the target
                actual = actual_fields_by_stream.get(stream) or set()
                expected = self.expected_automatic_fields().get(stream, set())
                self.assertTrue(actual.issuperset(expected),
                                msg="The fields sent to the target don't include all automatic fields. "
                                "Expected: {}, Actual: {}". format(expected, actual)
                )

                # verify we have more fields sent to the target than just automatic fields
                # SKIP THIS ASSERTION IF ALL FIELDS ARE INTENTIONALLY AUTOMATIC FOR THIS STREAM
                actual = actual_fields_by_stream.get(stream) or set()
                expected = self.expected_automatic_fields().get(stream, set())
                self.assertTrue(actual.symmetric_difference(expected),
                                msg="The fields sent to the target don't include any non-automatic fields"
                )

                if stream != "events":
                    actual = actual_fields_by_stream.get(stream, set())
                    expected = set(new_objects[stream][0].keys())

示例#4

显示文件

    def do_test(self, conn_id):
        """
        Verify that the sync only sent records to the target for selected streams
        Create a new object for each stream
        Verify that the second sync includes at least one create for each stream
        Verify that the created record was picked up on the second sync
        """
        streams_to_create = {
            "balance_transactions",  # should be created implicity with a create in the payouts or charges streams
            "charges",
            "coupons",
            "customers",
            "invoice_items",
            "invoice_line_items",  # this is created implicity by invoices, it just creates another invoice TODO get this outa here
            "invoices",  # this will create an invoice_item
            "payouts",
            "plans",
            "products",
            "subscription_items",
            "subscriptions",  # this will create a new plan and payment method
        }

        missing_streams_to_create = {
            "disputes",  # can be created by simulating a dispute transaction with a specific card number
            # no way to create directly, see: https://stripe.com/docs/testing#disputes
            "payout_transactions",  # BUG (https://stitchdata.atlassian.net/browse/SUP-1294)
            # depends on payouts and transactions
            "transfers",
            # needs an account that we can transfer to, not sure
            # how to set up a test account we can use to create a transfer
        }

        our_catalogs = get_catalogs(conn_id, streams_to_create)

        self.select_all_streams_and_fields(conn_id,
                                           our_catalogs,
                                           select_all_fields=True)

        # Run a sync job using orchestrator
        first_sync_record_count = self.run_sync(conn_id)

        # verify that the sync only sent records to the target for selected streams (catalogs)
        self.assertEqual(set(first_sync_record_count.keys()),
                         streams_to_create)

        # Get the set of records from a first sync
        first_sync_records = runner.get_records_from_target_output()

        first_sync_created, _ = self.split_records_into_created_and_updated(
            first_sync_records)

        new_objects = {
            stream: create_object(stream)
            for stream in streams_to_create.difference(
                {"balance_transactions"})
        }

        # Run a second sync job using orchestrator
        second_sync_record_count = self.run_sync(conn_id)

        # Get the set of records from a second sync
        second_sync_records = runner.get_records_from_target_output()

        second_sync_created, _ = self.split_records_into_created_and_updated(
            second_sync_records)

        # # THIS MAKES AN ASSUMPTION THAT CHILD STREAMS DO NOT NEED TESTING.
        # # ADJUST IF NECESSARY
        for stream in streams_to_create.difference(self.child_streams()):
            with self.subTest(stream=stream):

                second_sync_created_objects = second_sync_created.get(
                    stream, {}).get("messages", [])

                # verify that you get at least one new record on the second sync
                self.assertGreaterEqual(
                    len(second_sync_created_objects),
                    1,
                    msg="second sync didn't have created objects",
                )

                if stream == "balance_transactions":
                    sources = [
                        record.get("data", {}).get("source")
                        for record in second_sync_created_objects
                    ]

                    self.assertTrue(new_objects['payouts']['id'] in sources)
                    self.assertTrue(new_objects['charges']['id'] in sources)

                    continue

                # verify the new object is in the list of created objects
                # from the second sync
                self.assertTrue(
                    any(new_objects[stream]["id"] == record.get("data",
                                                                {}).get("id")
                        for record in second_sync_created_objects))

                if stream in streams_to_create:
                    delete_object(stream, new_objects[stream]["id"])