Пример #1
0
def get_max_pk_values(cursor, catalog_entry):
    database_name = common.get_database_name(catalog_entry)
    escaped_db = common.escape(database_name)
    escaped_table = common.escape(catalog_entry.table)

    key_properties = common.get_key_properties(catalog_entry)
    escaped_columns = [common.escape(c) for c in key_properties]

    sql = """SELECT {}
               FROM {}.{}
    """

    select_column_clause = ", ".join(
        ["max(" + pk + ")" for pk in escaped_columns])

    cursor.execute(sql.format(select_column_clause, escaped_db, escaped_table))
    result = cursor.fetchone()
    processed_results = []
    for bm in result:
        if isinstance(bm,
                      (datetime.date, datetime.datetime, datetime.timedelta)):
            processed_results += [common.to_utc_datetime_str(bm)]
        elif bm is not None:
            processed_results += [bm]

    max_pk_values = {}
    if processed_results:
        max_pk_values = dict(zip(key_properties, processed_results))

    return max_pk_values
Пример #2
0
def get_max_pk_values(cursor, catalog_entry):
    database_name = common.get_database_name(catalog_entry)
    escaped_db = common.escape(database_name)
    escaped_table = common.escape(catalog_entry.table)

    key_properties = common.get_key_properties(catalog_entry)
    escaped_columns = [common.escape(c) for c in key_properties]

    sql = """SELECT {}
               FROM {}.{}
              ORDER BY {}
              LIMIT 1
    """

    select_column_clause = ", ".join(escaped_columns)
    order_column_clause = ", ".join([pk + " DESC" for pk in escaped_columns])

    cursor.execute(
        sql.format(select_column_clause, escaped_db, escaped_table,
                   order_column_clause))
    result = cursor.fetchone()

    if result:
        max_pk_values = dict(zip(key_properties, result))
    else:
        max_pk_values = {}

    return max_pk_values
Пример #3
0
def generate_pk_clause(catalog_entry, state):
    key_properties = common.get_key_properties(catalog_entry)
    escaped_columns = [common.escape(c) for c in key_properties]

    where_clause = " AND ".join([pk + " > `{}`" for pk in escaped_columns])
    order_by_clause = ", ".join(['`{}`, ' for pk in escaped_columns])

    max_pk_values = singer.get_bookmark(state,
                                        catalog_entry.tap_stream_id,
                                        'max_pk_values')

    last_pk_fetched = singer.get_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'last_pk_fetched')

    if last_pk_fetched:
        pk_comparisons = ["({} > {} AND {} <= {})".format(common.escape(pk),
                                                          last_pk_fetched[pk],
                                                          common.escape(pk),
                                                          max_pk_values[pk])
                          for pk in key_properties]
    else:
        pk_comparisons = ["{} <= {}".format(common.escape(pk), max_pk_values[pk])
                          for pk in key_properties]

    sql = " WHERE {} ORDER BY {} ASC".format(" AND ".join(pk_comparisons),
                                             ", ".join(escaped_columns))

    return sql
Пример #4
0
def generate_pk_clause(catalog_entry, state):
    key_properties = common.get_key_properties(catalog_entry)
    escaped_columns = [common.escape(c) for c in key_properties]

    where_clause = " AND ".join([pk + " > `{}`" for pk in escaped_columns])
    order_by_clause = ", ".join(['`{}`, ' for pk in escaped_columns])

    max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'max_pk_values')

    last_pk_fetched = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                          'last_pk_fetched')

    pk_comparisons = []

    if not max_pk_values:
        return ""

    if last_pk_fetched:
        for pk in key_properties:
            column_type = catalog_entry.schema.properties.get(pk).type

            # quote last/max PK val if column is VARCHAR
            if 'string' in column_type:
                last_pk_val = "'" + last_pk_fetched[pk] + "'"
                max_pk_val = "'" + max_pk_values[pk] + "'"
            else:
                last_pk_val = last_pk_fetched[pk]
                max_pk_val = max_pk_values[pk]

            pk_comparisons.append("({} > {} AND {} <= {})".format(
                common.escape(pk), last_pk_val, common.escape(pk), max_pk_val))
    else:
        for pk in key_properties:
            column_schema = catalog_entry.schema.properties.get(pk)
            column_type = column_schema.type

            # quote last/max PK val if column is VARCHAR
            if 'string' in column_type:
                pk_val = "'{}'".format(max_pk_values[pk])
            else:
                pk_val = max_pk_values[pk]

            pk_comparisons.append("{} <= {}".format(common.escape(pk), pk_val))

    sql = " WHERE {} ORDER BY {} ASC".format(" AND ".join(pk_comparisons),
                                             ", ".join(escaped_columns))

    return sql
Пример #5
0
def generate_pk_clause(catalog_entry, state):
    key_properties = common.get_key_properties(catalog_entry)

    max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'max_pk_values')

    last_pk_fetched = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                          'last_pk_fetched')

    last_pk_clause = ''
    max_pk_comparisons = []

    if not max_pk_values:
        return ""

    if last_pk_fetched:
        for pk in key_properties:
            column_type = catalog_entry.schema.properties.get(pk).type

            # Add AND to interpolate along with max_pk_values clauses
            last_pk_clause = '({}) AND '.format(
                generate_pk_bookmark_clause(key_properties, last_pk_fetched,
                                            catalog_entry))
            max_pk_comparisons.append("{} <= {}".format(
                common.escape(pk),
                quote_where_clause_value(max_pk_values[pk], column_type)))
    else:
        for pk in key_properties:
            column_schema = catalog_entry.schema.properties.get(pk)
            column_type = column_schema.type

            pk_val = quote_where_clause_value(max_pk_values[pk], column_type)

            max_pk_comparisons.append("{} <= {}".format(
                common.escape(pk), pk_val))

    order_by_columns = [common.escape(c) for c in key_properties]
    sql = " WHERE {}{} ORDER BY {} ASC".format(
        last_pk_clause, " AND ".join(max_pk_comparisons),
        ", ".join(order_by_columns))

    return sql
Пример #6
0
def _create_temp_table(open_connection, catalog_entry, columns,
                       pk_clause: str):
    with open_connection.cursor() as c:
        temporary_catalog_entry = copy.deepcopy(catalog_entry)
        logging.info("try creating temp table")
        db_name = common.get_database_name(temporary_catalog_entry)
        select_sql = common.generate_select_sql(catalog_entry, columns)
        if pk_clause:
            select_sql += pk_clause
        tmp_uuid = str(uuid.uuid4()).replace("-", "")
        temp_table_name = "tmp_" + tmp_uuid
        temp_sql = "CREATE TEMPORARY TABLE {}.{}  {}".format(
            common.escape(db_name), common.escape(temp_table_name), select_sql)
        logging.info("write data to temporary table: {}".format(temp_sql))
        c.execute(temp_sql)
        logging.info("created temporary table successful")
        temporary_catalog_entry.table = temp_table_name
        select_temp_sql = common.generate_select_sql(temporary_catalog_entry,
                                                     columns)
        if pk_clause:
            select_temp_sql += pk_clause
        return select_temp_sql
Пример #7
0
def generate_pk_bookmark_clause(key_properties, last_pk_fetched,
                                catalog_entry):
    """
    Generates a bookmark clause based on `key_properties`, and
    `last_pk_fetched` bookmark. This ensures that the stream is resumed at
    the location in the data set per primary key component. Inclusivity is
    not maintained, since these are primary keys.

    Example:

    key_properties = ['name','birthday']
    last_pk_fetched = {'name': "Phil Collins", 'birthday': "1951-01-30"}

    Returns:
    "(`name` > 'Phil Collins') OR (`name` = 'Phil Collins' AND `birthday` > '1951-01-30')
    """
    assert last_pk_fetched is not None, \
        "Must call generate_pk_bookmark with a non-null 'last_pk_fetched' dict"

    clause_terms = []
    inclusive_pk_values = []
    for pk in key_properties:
        term = []
        for prev_pk, prev_pk_val, prev_col_type in inclusive_pk_values:
            term.append(
                common.escape(prev_pk) + ' = ' +
                quote_where_clause_value(prev_pk_val, prev_col_type))

        column_type = catalog_entry.schema.properties.get(pk).type
        term.append(
            common.escape(pk) + ' > ' +
            quote_where_clause_value(last_pk_fetched[pk], column_type))
        inclusive_pk_values.append((pk, last_pk_fetched[pk], column_type))

        clause_terms.append(' AND '.join(term))
    return '({})'.format(') OR ('.join(clause_terms)) if clause_terms else ''