def test_open_connections_with_default_session_sqls(self):
        """Default session parameters should be applied if no custom session SQLs"""
        with patch('tap_mysql.connection.MySQLConnection.connect'):
            with patch('tap_mysql.connection.run_sql') as run_sql_mock:
                run_sql_mock.side_effect = self.run_sql_mock
                conn = MySQLConnectionMock(config=test_utils.get_db_config())
                connect_with_backoff(conn)

        # Test if session variables applied on connection
        self.assertEqual(self.executed_queries, tap_mysql.connection.DEFAULT_SESSION_SQLS)
Exemplo n.º 2
0
def main_impl():
    args = utils.parse_args(REQUIRED_CONFIG_KEYS)
    connection = MySQLConnection(args.config)

    connect_with_backoff(connection)

    warnings = []
    with connection.cursor() as cur:
        try:
            cur.execute('SET @@session.time_zone="+0:00"')
        except pymysql.err.InternalError as e:
            warnings.append(
                'Could not set session.time_zone. Error: ({}) {}'.format(
                    *e.args))

        try:
            cur.execute('SET @@session.wait_timeout=2700')
        except pymysql.err.InternalError as e:
            warnings.append(
                'Could not set session.wait_timeout. Error: ({}) {}'.format(
                    *e.args))

        try:
            cur.execute('SET @@session.innodb_lock_wait_timeout=2700')
        except pymysql.err.InternalError as e:
            warnings.append(
                'Could not set session.innodb_lock_wait_timeout. Error: ({}) {}'
                .format(*e.args))

    if warnings:
        LOGGER.info((
            "Encountered non-fatal errors when configuring MySQL session that could "
            "impact performance:"))
    for w in warnings:
        LOGGER.warning(w)

    log_server_params(connection)
    if args.discover:
        do_discover(connection)
    elif args.catalog:
        state = args.state or {}
        do_sync(connection, args.config, args.catalog, state)
    elif args.properties:
        catalog = Catalog.from_dict(args.properties)
        state = args.state or {}
        do_sync(connection, args.config, catalog, state)
    else:
        LOGGER.info("No properties were selected")
Exemplo n.º 3
0
    def setUp(self):
        self.conn = test_utils.get_test_connection()

        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute("CREATE TABLE full_table (val int)")
                cursor.execute("INSERT INTO full_table (val) VALUES (1)")

        self.catalog = test_utils.discover_catalog(self.conn, {})
        for stream in self.catalog.streams:
            stream.key_properties = []

            stream.metadata = [
                {
                    "breadcrumb": (),
                    "metadata": {
                        "selected": True,
                        "database-name": "tap_mysql_test"
                    },
                },
                {
                    "breadcrumb": ("properties", "val"),
                    "metadata": {
                        "selected": True
                    }
                },
            ]

            stream.stream = stream.table
            test_utils.set_replication_method_and_key(stream, "FULL_TABLE",
                                                      None)
Exemplo n.º 4
0
def pks_are_integer_or_varchar(mysql_conn, config, catalog_entry):
    database_name = common.get_database_name(catalog_entry)
    key_properties = common.get_key_properties(catalog_entry)

    if config.get('allow_non_auto_increment_pks') == 'true' and key_properties:
        valid_column_types = set([
            'tinyint', 'smallint'
            'mediumint', 'int', 'bigint', 'varchar', 'char'
        ])

        sql = """SELECT data_type
                   FROM information_schema.columns
                  WHERE table_schema = '{}'
                    AND table_name = '{}'
                    AND column_name = '{}'
        """

        with connect_with_backoff(mysql_conn) as open_conn:
            with open_conn.cursor() as cur:
                for pk in key_properties:
                    cur.execute(
                        sql.format(database_name, catalog_entry.table, pk))

                    result = cur.fetchone()

                    if not result:
                        raise Exception(
                            "Primary key column {} does not exist.".format(pk))

                    if result[0] not in valid_column_types:
                        return False

        return True

    return False
Exemplo n.º 5
0
def log_server_params(mysql_conn):
    with connect_with_backoff(mysql_conn) as open_conn:
        try:
            with open_conn.cursor() as cur:
                cur.execute('''
                SELECT VERSION() as version,
                       @@session.wait_timeout as wait_timeout,
                       @@session.innodb_lock_wait_timeout as innodb_lock_wait_timeout,
                       @@session.max_allowed_packet as max_allowed_packet,
                       @@session.interactive_timeout as interactive_timeout''')
                row = cur.fetchone()
                LOGGER.info('Server Parameters: ' +
                            'version: %s, ' +
                            'wait_timeout: %s, ' +
                            'innodb_lock_wait_timeout: %s, ' +
                            'max_allowed_packet: %s, ' +
                            'interactive_timeout: %s',
                            *row)
            with open_conn.cursor() as cur:
                cur.execute('''
                show session status where Variable_name IN ('Ssl_version', 'Ssl_cipher')''')
                rows = cur.fetchall()
                mapped_row = {r[0]: r[1] for r in rows}
                LOGGER.info(
                    'Server SSL Parameters(blank means SSL is not active): [ssl_version: %s], [ssl_cipher: %s]',
                    mapped_row['Ssl_version'], mapped_row['Ssl_cipher'])

        except pymysql.err.InternalError as exc:
            LOGGER.warning("Encountered error checking server params. Error: (%s) %s", *exc.args)
Exemplo n.º 6
0
def pks_are_auto_incrementing(mysql_conn, catalog_entry):
    database_name = common.get_database_name(catalog_entry)
    key_properties = common.get_key_properties(catalog_entry)

    if not key_properties:
        return False

    sql = """SELECT 1
               FROM information_schema.columns
              WHERE table_schema = '{}'
                AND table_name = '{}'
                AND column_name = '{}'
                AND extra LIKE '%auto_increment%'
    """

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            for pk in key_properties:
                cur.execute(sql.format(database_name,
                                          catalog_entry.table,
                                          pk))

                result = cur.fetchone()

                if not result:
                    return False

    return True
Exemplo n.º 7
0
def log_engine(mysql_conn, catalog_entry):
    is_view = common.get_is_view(catalog_entry)
    database_name = common.get_database_name(catalog_entry)

    if is_view:
        LOGGER.info("Beginning sync for view %s.%s", database_name, catalog_entry.table)
    else:
        with connect_with_backoff(mysql_conn) as open_conn:
            with open_conn.cursor() as cur:
                cur.execute(
                    """
                    SELECT engine
                      FROM information_schema.tables
                     WHERE table_schema = %s
                       AND table_name   = %s
                """,
                    (database_name, catalog_entry.table),
                )

                row = cur.fetchone()

                if row:
                    LOGGER.info(
                        "Beginning sync for %s table %s.%s",
                        row[0],
                        database_name,
                        catalog_entry.table,
                    )
Exemplo n.º 8
0
def verify_binlog_config(mysql_conn):
    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            cur.execute("SELECT  @@binlog_format")
            binlog_format = cur.fetchone()[0]

            if binlog_format != 'ROW':
                raise Exception(
                    "Unable to replicate binlog stream because binlog_format is not set to 'ROW': {}."
                    .format(binlog_format))

            try:
                cur.execute("SELECT  @@binlog_row_image")
                binlog_row_image = cur.fetchone()[0]
            except pymysql.err.InternalError as ex:
                if ex.args[0] == 1193:
                    raise Exception(
                        "Unable to replicate binlog stream because binlog_row_image system variable does not exist. MySQL version must be at least 5.6.2 to use binlog replication."
                    )

                raise ex

            if binlog_row_image != 'FULL':
                raise Exception(
                    "Unable to replicate binlog stream because binlog_row_image is not set to 'FULL': {}."
                    .format(binlog_row_image))
Exemplo n.º 9
0
    def setUp(self):
        self.conn = test_utils.get_test_connection()

        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute("CREATE TABLE a (`b c` int)")
                cursor.execute("INSERT INTO a (`b c`) VALUES (1)")

        self.catalog = test_utils.discover_catalog(self.conn, {})

        self.catalog.streams[0].stream = "some_stream_name"

        self.catalog.streams[0].metadata = [
            {
                "breadcrumb": (),
                "metadata": {
                    "selected": True,
                    "table-key-properties": [],
                    "database-name": "tap_mysql_test",
                },
            },
            {
                "breadcrumb": ("properties", "b c"),
                "metadata": {
                    "selected": True
                }
            },
        ]

        test_utils.set_replication_method_and_key(self.catalog.streams[0],
                                                  "FULL_TABLE", None)
Exemplo n.º 10
0
    def setUp(self):
        self.conn = test_utils.get_test_connection()

        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute('CREATE TABLE full_table (val int)')
                cursor.execute('INSERT INTO full_table (val) VALUES (1)')

        self.catalog = test_utils.discover_catalog(self.conn, {})
        for stream in self.catalog.streams:
            stream.key_properties = []

            stream.metadata = [{
                'breadcrumb': (),
                'metadata': {
                    'selected': True,
                    'database-name': 'tap_mysql_test'
                }
            }, {
                'breadcrumb': ('properties', 'val'),
                'metadata': {
                    'selected': True
                }
            }]

            stream.stream = stream.table
            test_utils.set_replication_method_and_key(stream, 'FULL_TABLE',
                                                      None)
Exemplo n.º 11
0
    def setUpClass(cls):
        conn = test_utils.get_test_connection()

        with connect_with_backoff(conn) as open_conn:
            with open_conn.cursor() as cur:
                cur.execute('''
                CREATE TABLE test_type_mapping (
                c_pk INTEGER PRIMARY KEY,
                c_decimal DECIMAL,
                c_decimal_2_unsigned DECIMAL(5, 2) UNSIGNED,
                c_decimal_2 DECIMAL(11, 2),
                c_tinyint TINYINT,
                c_tinyint_1 TINYINT(1),
                c_tinyint_1_unsigned TINYINT(1) UNSIGNED,
                c_smallint SMALLINT,
                c_mediumint MEDIUMINT,
                c_int INT,
                c_bigint BIGINT,
                c_bigint_unsigned BIGINT(20) UNSIGNED,
                c_float FLOAT,
                c_double DOUBLE,
                c_bit BIT(4),
                c_date DATE,
                c_time TIME,
                c_year YEAR
                )''')

        catalog = test_utils.discover_catalog(conn, {})
        cls.schema = catalog.streams[0].schema
        cls.metadata = catalog.streams[0].metadata
    def runTest(self):
        conn = test_utils.get_test_connection()

        with connect_with_backoff(conn) as open_conn:
            with open_conn.cursor() as cur:
                cur.execute('''
                    CREATE TABLE tab (
                      id INTEGER PRIMARY KEY,
                      a INTEGER,
                      b INTEGER)
                ''')

        catalog = test_utils.discover_catalog(conn, {})
        catalog.streams[0].stream = 'tab'
        catalog.streams[0].metadata = [
            {'breadcrumb': (), 'metadata': {'selected': True, 'database-name': 'tap_mysql_test'}},
            {'breadcrumb': ('properties', 'a'), 'metadata': {'selected': True}}
        ]

        test_utils.set_replication_method_and_key(catalog.streams[0], 'FULL_TABLE', None)

        global SINGER_MESSAGES
        SINGER_MESSAGES.clear()
        tap_mysql.do_sync(conn, {}, catalog, {})

        schema_message = list(filter(lambda m: isinstance(m, singer.SchemaMessage), SINGER_MESSAGES))[0]
        self.assertTrue(isinstance(schema_message, singer.SchemaMessage))
        # tap-mysql selects new fields by default. If a field doesn't appear in the schema, then it should be
        # selected
        expectedKeys = ['id', 'a', 'b']

        self.assertEqual(schema_message.schema['properties'].keys(), set(expectedKeys))
    def setUp(self):
        self.conn = test_utils.get_test_connection()

        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute('CREATE TABLE incremental (val int, updated datetime)')
                cursor.execute('INSERT INTO incremental (val, updated) VALUES (1, \'2017-06-01\')')
                cursor.execute('INSERT INTO incremental (val, updated) VALUES (2, \'2017-06-20\')')
                cursor.execute('INSERT INTO incremental (val, updated) VALUES (3, \'2017-09-22\')')
                cursor.execute('CREATE TABLE integer_incremental (val int, updated int)')
                cursor.execute('INSERT INTO integer_incremental (val, updated) VALUES (1, 1)')
                cursor.execute('INSERT INTO integer_incremental (val, updated) VALUES (2, 2)')
                cursor.execute('INSERT INTO integer_incremental (val, updated) VALUES (3, 3)')

        self.catalog = test_utils.discover_catalog(self.conn, {})

        for stream in self.catalog.streams:
            stream.metadata = [
                {'breadcrumb': (),
                 'metadata': {
                     'selected': True,
                     'table-key-properties': [],
                     'database-name': 'tap_mysql_test'
                 }},
                {'breadcrumb': ('properties', 'val'), 'metadata': {'selected': True}}
            ]

            stream.stream = stream.table
            test_utils.set_replication_method_and_key(stream, 'INCREMENTAL', 'updated')
Exemplo n.º 14
0
def fetch_server_id(mysql_conn):
    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            cur.execute("SELECT @@server_id")
            server_id = cur.fetchone()[0]

            return server_id
    def test_open_connections_with_session_sqls(self):
        """Custom session parameters should be applied if defined"""
        session_sqls = [
            'SET SESSION max_statement_time=0',
            'SET SESSION wait_timeout=28800'
        ]

        with patch('tap_mysql.connection.MySQLConnection.connect'):
            with patch('tap_mysql.connection.run_sql') as run_sql_mock:
                run_sql_mock.side_effect = self.run_sql_mock
                conn = MySQLConnectionMock(config={**test_utils.get_db_config(),
                                                   **{'session_sqls': session_sqls}})
                connect_with_backoff(conn)

        # Test if session variables applied on connection
        self.assertEqual(self.executed_queries, session_sqls)
Exemplo n.º 16
0
    def setUp(self):
        self.conn = test_utils.get_test_connection()

        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute('CREATE TABLE a (`b c` int)')
                cursor.execute('INSERT INTO a (`b c`) VALUES (1)')

        self.catalog = test_utils.discover_catalog(self.conn, {})

        self.catalog.streams[0].stream = 'some_stream_name'

        self.catalog.streams[0].metadata = [{
            'breadcrumb': (),
            'metadata': {
                'selected': True,
                'table-key-properties': [],
                'database-name': 'tap_mysql_test'
            }
        }, {
            'breadcrumb': ('properties', 'b c'),
            'metadata': {
                'selected': True
            }
        }]

        test_utils.set_replication_method_and_key(self.catalog.streams[0],
                                                  'FULL_TABLE', None)
def init_tables(conn):
    with connect_with_backoff(conn) as open_conn:
        with open_conn.cursor() as cur:
            cur.execute("""
            CREATE TABLE table_1 (
            id  BIGINT AUTO_INCREMENT PRIMARY KEY,
            foo BIGINT,
            bar VARCHAR(10)
            )""")

            cur.execute("""
            CREATE TABLE table_2 (
            id  BIGINT AUTO_INCREMENT PRIMARY KEY,
            foo BIGINT,
            bar VARCHAR(10)
            )""")

    for record in TABLE_1_DATA:
        insert_record(conn, "table_1", record)

    for record in TABLE_2_DATA:
        insert_record(conn, "table_2", record)

    catalog = test_utils.discover_catalog(conn, {})

    return catalog
Exemplo n.º 18
0
def calculate_bookmark(mysql_conn, binlog_streams_map, state):
    min_log_pos_per_file = get_min_log_pos_per_log_file(
        binlog_streams_map, state)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            cur.execute("SHOW BINARY LOGS")

            binary_logs = cur.fetchall()

            if binary_logs:
                server_logs_set = {log[0] for log in binary_logs}
                state_logs_set = set(min_log_pos_per_file.keys())
                expired_logs = state_logs_set.difference(server_logs_set)

                if expired_logs:
                    raise Exception(
                        "Unable to replicate binlog stream because the following binary log(s) no longer exist: {}"
                        .format(", ".join(expired_logs)))

                for log_file in sorted(server_logs_set):
                    if min_log_pos_per_file.get(log_file):
                        return log_file, min_log_pos_per_file[log_file][
                            'log_pos']

            raise Exception(
                "Unable to replicate binlog stream because no binary logs exist on the server."
            )
Exemplo n.º 19
0
def sync_table(mysql_conn, catalog_entry, state, columns, stream_version):
    common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry),
                                   catalog_entry.tap_stream_id, state)

    bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {})
    version_exists = True if 'version' in bookmark else False

    initial_full_table_complete = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, 'initial_full_table_complete')

    state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'version')

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream, version=stream_version)

    # For the initial replication, emit an ACTIVATE_VERSION message
    # at the beginning so the records show up right away.
    if not initial_full_table_complete and not (version_exists
                                                and state_version is None):
        singer.write_message(activate_version_message)

    perform_resumable_sync = sync_is_resumable(mysql_conn, catalog_entry)

    pk_clause = ""

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)

            if perform_resumable_sync:
                LOGGER.info(
                    "Full table sync is resumable based on primary key definition, will replicate incrementally"
                )

                state = update_incremental_full_table_state(
                    catalog_entry, state, cur)
                pk_clause = generate_pk_clause(catalog_entry, state)

            select_sql += pk_clause

            try:
                select_sql = _create_temp_table(mysql_conn, catalog_entry,
                                                columns, pk_clause)
            except Exception as ex:
                logging.warning("creating temp table failed: {}".format(
                    str(ex)))

            params = {}

            common.sync_query(cur, catalog_entry, state, select_sql, columns,
                              stream_version, params)

    # clear max pk value and last pk fetched upon successful sync
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values')
    singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                          'last_pk_fetched')

    singer.write_message(activate_version_message)
Exemplo n.º 20
0
def sync_table(mysql_conn,
               catalog_entry,
               state,
               columns,
               original_state_file=''):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id,
                                   state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    replication_key_metadata = stream_metadata.get('replication-key')
    replication_key_state = singer.get_bookmark(state,
                                                catalog_entry.tap_stream_id,
                                                'replication_key')

    replication_key_value = None

    if replication_key_metadata == replication_key_state:
        replication_key_value = singer.get_bookmark(
            state, catalog_entry.tap_stream_id, 'replication_key_value')
    else:
        state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key',
                                      replication_key_metadata)
        state = singer.clear_bookmark(state, catalog_entry.tap_stream_id,
                                      'replication_key_value')

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id,
                                               state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id,
                                  'version', stream_version)

    activate_version_message = singer.ActivateVersionMessage(
        stream='%s_%s' %
        (common.get_database_name(catalog_entry), catalog_entry.stream),
        version=stream_version)

    singer.write_message(activate_version_message)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)
            params = {}

            if replication_key_value is not None:
                if catalog_entry.schema.properties[
                        replication_key_metadata].format == 'date-time':
                    replication_key_value = pendulum.parse(
                        replication_key_value)

                select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format(
                    replication_key_metadata, replication_key_metadata)

                params['replication_key_value'] = replication_key_value

            common.sync_query(cur, catalog_entry, state, select_sql, columns,
                              stream_version, params, original_state_file)
    def test_open_connections_with_invalid_session_sqls(self):
        """Invalid SQLs in session_sqls should be ignored"""
        session_sqls = [
            'SET SESSION max_statement_time=0',
            'INVALID-SQL-SHOULD-BE-SILENTLY-IGNORED',
            'SET SESSION wait_timeout=28800'
        ]

        with patch('tap_mysql.connection.MySQLConnection.connect'):
            with patch('tap_mysql.connection.run_sql') as run_sql_mock:
                run_sql_mock.side_effect = self.run_sql_mock
                conn = MySQLConnectionMock(config={**test_utils.get_db_config(),
                                                   **{'session_sqls': session_sqls}})
                connect_with_backoff(conn)

        # Test if session variables applied on connection
        self.assertEqual(self.executed_queries, ['SET SESSION max_statement_time=0',
                                                 'SET SESSION wait_timeout=28800'])
    def setUp(self):
        self.maxDiff = None
        self.state = {}
        self.conn = test_utils.get_test_connection()

        log_file, log_pos = binlog.fetch_current_log_file_and_pos(self.conn)

        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute('CREATE TABLE binlog_1 (id int, updated datetime)')
                cursor.execute('CREATE TABLE binlog_2 (id int, updated datetime)')
                cursor.execute('INSERT INTO binlog_1 (id, updated) VALUES (1, \'2017-06-01\')')
                cursor.execute('INSERT INTO binlog_1 (id, updated) VALUES (2, \'2017-06-20\')')
                cursor.execute('INSERT INTO binlog_1 (id, updated) VALUES (3, \'2017-09-22\')')
                cursor.execute('INSERT INTO binlog_2 (id, updated) VALUES (1, \'2017-10-22\')')
                cursor.execute('INSERT INTO binlog_2 (id, updated) VALUES (2, \'2017-11-10\')')
                cursor.execute('INSERT INTO binlog_2 (id, updated) VALUES (3, \'2017-12-10\')')
                cursor.execute('UPDATE binlog_1 set updated=\'2018-06-18\' WHERE id = 3')
                cursor.execute('UPDATE binlog_2 set updated=\'2018-06-18\' WHERE id = 2')
                cursor.execute('DELETE FROM binlog_1 WHERE id = 2')
                cursor.execute('DELETE FROM binlog_2 WHERE id = 1')

            open_conn.commit()

        self.catalog = test_utils.discover_catalog(self.conn, {})

        for stream in self.catalog.streams:
            stream.stream = stream.table

            stream.metadata = [
                {'breadcrumb': (),
                 'metadata': {
                     'selected': True,
                     'database-name': 'tap_mysql_test',
                     'table-key-propertes': ['id']
                 }},
                {'breadcrumb': ('properties', 'id'), 'metadata': {'selected': True}},
                {'breadcrumb': ('properties', 'updated'), 'metadata': {'selected': True}}
            ]

            test_utils.set_replication_method_and_key(stream, 'LOG_BASED', None)

            self.state = singer.write_bookmark(self.state,
                                               stream.tap_stream_id,
                                               'log_file',
                                               log_file)

            self.state = singer.write_bookmark(self.state,
                                               stream.tap_stream_id,
                                               'log_pos',
                                               log_pos)

            self.state = singer.write_bookmark(self.state,
                                               stream.tap_stream_id,
                                               'version',
                                               singer.utils.now())
    def test_sync_messages_are_correct(self):

        self.catalog.streams[0] = test_utils.set_replication_method_and_key(self.catalog.streams[0], 'LOG_BASED', None)
        self.catalog.streams[0] = test_utils.set_selected(self.catalog.streams[0], True)

        global SINGER_MESSAGES
        SINGER_MESSAGES.clear()

        #inital sync
        tap_mysql.do_sync(self.conn, {}, self.catalog, {})

        # get schema message to test that it has all the table's columns
        schema_message = next(filter(lambda m: isinstance(m, singer.SchemaMessage), SINGER_MESSAGES))
        expectedKeys = ['good_pk', 'age']

        self.assertEqual(schema_message.schema['properties'].keys(), set(expectedKeys))

        # get the records, these are generated by Full table replication
        record_messages = list(filter(lambda m: isinstance(m, singer.RecordMessage), SINGER_MESSAGES))

        self.assertEqual(len(record_messages), 4)
        self.assertListEqual([
            {'age': 20, 'good_pk': '61'},
            {'age': 30, 'good_pk': '62'},
            {'age': 30, 'good_pk': '63'},
            {'age': 40, 'good_pk': '64'},
        ], [rec.record for rec in record_messages])

        # get the last state message to be fed to the next sync
        state_message = list(filter(lambda m: isinstance(m, singer.StateMessage), SINGER_MESSAGES))[-1]

        SINGER_MESSAGES.clear()

        # run some queries
        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute("UPDATE good_pk_tab set age=age+5")
                cursor.execute("INSERT INTO good_pk_tab (good_pk, age) VALUES "
                               "(BINARY('e'), 16), "
                               "(BINARY('f'), 5)")

        # do a sync and give the state so that binlog replication start from the last synced position
        tap_mysql.do_sync(self.conn, test_utils.get_db_config(), self.catalog, state_message.value)

        # get the changed/new records
        record_messages = list(filter(lambda m: isinstance(m, singer.RecordMessage), SINGER_MESSAGES))

        self.assertEqual(len(record_messages), 6)
        self.assertListEqual([
            {'age': 25, 'good_pk': '61'},
            {'age': 35, 'good_pk': '62'},
            {'age': 35, 'good_pk': '63'},
            {'age': 45, 'good_pk': '64'},
            {'age': 16, 'good_pk': '65'},
            {'age': 5, 'good_pk': '66'},
        ], [rec.record for rec in record_messages])
def insert_record(conn, table_name, record):
    value_sql = ",".join(["%s" for i in range(len(record))])

    insert_sql = """
        INSERT INTO {}.{}
               ( `foo`, `bar` )
        VALUES ( {} )""".format(test_utils.DB_NAME, table_name, value_sql)

    with connect_with_backoff(conn) as open_conn:
        with open_conn.cursor() as cur:
            cur.execute(insert_sql, record)
Exemplo n.º 25
0
def sync_table(mysql_conn, catalog_entry, state, columns):
    common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id, state)

    catalog_metadata = metadata.to_map(catalog_entry.metadata)
    stream_metadata = catalog_metadata.get((), {})

    replication_key_metadata = stream_metadata.get("replication-key")
    replication_key_state = singer.get_bookmark(
        state, catalog_entry.tap_stream_id, "replication_key"
    )

    replication_key_value = None

    if replication_key_metadata == replication_key_state:
        replication_key_value = singer.get_bookmark(
            state, catalog_entry.tap_stream_id, "replication_key_value"
        )
    else:
        state = singer.write_bookmark(
            state, catalog_entry.tap_stream_id, "replication_key", replication_key_metadata
        )
        state = singer.clear_bookmark(state, catalog_entry.tap_stream_id, "replication_key_value")

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state)
    state = singer.write_bookmark(state, catalog_entry.tap_stream_id, "version", stream_version)

    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream, version=stream_version
    )

    singer.write_message(activate_version_message)

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            select_sql = common.generate_select_sql(catalog_entry, columns)
            params = {}

            if replication_key_value is not None:
                if catalog_entry.schema.properties[replication_key_metadata].format == "date-time":
                    replication_key_value = pendulum.parse(replication_key_value)

                select_sql += " WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC".format(
                    replication_key_metadata, replication_key_metadata
                )

                params["replication_key_value"] = replication_key_value
            elif replication_key_metadata is not None:
                select_sql += " ORDER BY `{}` ASC".format(replication_key_metadata)

            common.sync_query(
                cur, catalog_entry, state, select_sql, columns, stream_version, params
            )
Exemplo n.º 26
0
def fetch_current_log_file_and_pos(mysql_conn):
    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            cur.execute("SHOW MASTER STATUS")

            result = cur.fetchone()

            if result is None:
                raise Exception("MySQL binary logging is not enabled.")

            current_log_file, current_log_pos = result[0:2]

            return current_log_file, current_log_pos
Exemplo n.º 27
0
    def setUp(self):
        self.conn = test_utils.get_test_connection()

        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute(
                    "CREATE TABLE incremental (val int, updated datetime)")
                cursor.execute(
                    "INSERT INTO incremental (val, updated) VALUES (1, '2017-06-01')"
                )
                cursor.execute(
                    "INSERT INTO incremental (val, updated) VALUES (2, '2017-06-20')"
                )
                cursor.execute(
                    "INSERT INTO incremental (val, updated) VALUES (3, '2017-09-22')"
                )
                cursor.execute(
                    "CREATE TABLE integer_incremental (val int, updated int)")
                cursor.execute(
                    "INSERT INTO integer_incremental (val, updated) VALUES (1, 1)"
                )
                cursor.execute(
                    "INSERT INTO integer_incremental (val, updated) VALUES (2, 2)"
                )
                cursor.execute(
                    "INSERT INTO integer_incremental (val, updated) VALUES (3, 3)"
                )

        self.catalog = test_utils.discover_catalog(self.conn, {})

        for stream in self.catalog.streams:
            stream.metadata = [
                {
                    "breadcrumb": (),
                    "metadata": {
                        "selected": True,
                        "table-key-properties": [],
                        "database-name": "tap_mysql_test",
                    },
                },
                {
                    "breadcrumb": ("properties", "val"),
                    "metadata": {
                        "selected": True
                    }
                },
            ]

            stream.stream = stream.table
            test_utils.set_replication_method_and_key(stream, "INCREMENTAL",
                                                      "updated")
Exemplo n.º 28
0
    def setUp(self):
        self.conn = test_utils.get_test_connection()

        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute(
                    'CREATE TABLE bad_pk_tab (bad_pk BINARY, age INT, PRIMARY KEY (bad_pk))'
                )  # BINARY not presently supported
                cursor.execute(
                    'CREATE TABLE good_pk_tab (good_pk INT, age INT, PRIMARY KEY (good_pk))'
                )
                cursor.execute(
                    "INSERT INTO bad_pk_tab (bad_pk, age) VALUES ('a', 100)")
                cursor.execute(
                    "INSERT INTO good_pk_tab (good_pk, age) VALUES (1, 100)")
Exemplo n.º 29
0
    def setUp(self):
        self.conn = test_utils.get_test_connection()

        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute('''
                    CREATE TABLE a_table (
                      id int primary key,
                      a int,
                      b int)
                    ''')

                cursor.execute('''
                    CREATE VIEW a_view AS SELECT id, a FROM a_table
                    ''')
    def setUp(self):
        self.conn = test_utils.get_test_connection()

        with connect_with_backoff(self.conn) as open_conn:
            with open_conn.cursor() as cursor:
                cursor.execute(
                    # BINARY is presently supported
                    'CREATE TABLE good_pk_tab (good_pk BINARY(10), age INT, PRIMARY KEY (good_pk))')

                cursor.execute("INSERT INTO good_pk_tab (good_pk, age) VALUES "
                               "(BINARY('a'), 20), "
                               "(BINARY('b'), 30), "
                               "(BINARY('c'), 30), "
                               "(BINARY('d'), 40)")

        self.catalog = test_utils.discover_catalog(self.conn, {})