def write_schema_message(catalog_entry, bookmark_properties=[]): key_properties = common.get_key_properties(catalog_entry) singer.write_message( singer.SchemaMessage(stream=catalog_entry.stream, schema=catalog_entry.schema.to_dict(), key_properties=key_properties, bookmark_properties=bookmark_properties))
def do_sync_full_table(mssql_conn, config, catalog_entry, state, columns): key_properties = common.get_key_properties(catalog_entry) mssql_conn = MSSQLConnection(config) write_schema_message(catalog_entry) stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) full_table.sync_table(mssql_conn, config, catalog_entry, state, columns, stream_version) # Prefer initial_full_table_complete going forward singer.clear_bookmark(state, catalog_entry.tap_stream_id, "version") state = singer.write_bookmark( state, catalog_entry.tap_stream_id, "initial_full_table_complete", True ) singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
def do_sync_full_table(mssql_conn, config, catalog_entry, state, columns): LOGGER.info("Stream %s is using full table replication", catalog_entry.stream) key_properties = common.get_key_properties(catalog_entry) write_schema_message(catalog_entry) stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) full_table.sync_table(mssql_conn, catalog_entry, state, columns, stream_version) singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'version') state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'initial_full_table_complete', True) singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
def generate_pk_clause(catalog_entry, state): key_properties = common.get_key_properties(catalog_entry) max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values') last_pk_fetched = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched') last_pk_clause = '' max_pk_comparisons = [] if not max_pk_values: return "" if last_pk_fetched: for pk in key_properties: column_type = catalog_entry.schema.properties.get(pk).type # Add AND to interpolate along with max_pk_values clauses last_pk_clause = '({}) AND '.format( generate_pk_bookmark_clause(key_properties, last_pk_fetched, catalog_entry)) max_pk_comparisons.append("{} <= {}".format( common.escape(pk), quote_where_clause_value(max_pk_values[pk], column_type))) else: for pk in key_properties: column_schema = catalog_entry.schema.properties.get(pk) column_type = column_schema.type pk_val = quote_where_clause_value(max_pk_values[pk], column_type) max_pk_comparisons.append("{} <= {}".format( common.escape(pk), pk_val)) order_by_columns = [common.escape(c) for c in key_properties] sql = " WHERE {}{} ORDER BY {} ASC".format( last_pk_clause, " AND ".join(max_pk_comparisons), ", ".join(order_by_columns)) return sql
def sync_is_resumable(mysql_conn, catalog_entry): ''' In order to resume a full table sync, a table requires ''' database_name = common.get_database_name(catalog_entry) key_properties = common.get_key_properties(catalog_entry) if not key_properties: return False sql = """SELECT data_type FROM information_schema.columns WHERE table_schema = '{}' AND table_name = '{}' AND column_name = '{}' """ with connect_with_backoff(mysql_conn) as open_conn: with open_conn.cursor() as cur: for pk in key_properties: cur.execute(sql.format(database_name, catalog_entry.table, pk)) result = cur.fetchone() if not result: raise Exception( "Primary key column {} does not exist.".format(pk)) if result[0] not in RESUMABLE_PK_TYPES: LOGGER.warn( "Found primary key column %s with type %s. Will not be able " + "to resume interrupted FULL_TABLE sync using this key.", pk, result[0]) return False return True