Пример #1
0
def up(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    if table.schema[-1].name == 'client_paid_rank_level':
        logging.warning(
            f'rank levels  already added to {table_name} in {dataset_name} dataset!'
        )
        return dataset

    new_schema.extend([
        bigquery.SchemaField('lifetime_rank_level', 'INTEGER'),
        bigquery.SchemaField('paid_rank_level', 'INTEGER'),
        bigquery.SchemaField('client_lifetime_rank_level', 'INTEGER'),
        bigquery.SchemaField('client_paid_rank_level', 'INTEGER')
    ])
    table.schema = new_schema

    migration.client.update_table(table, ['schema'])

    return dataset
def up(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))

    if table.schema[1].mode == 'NULLABLE':
        logging.warning(
            'leo_eid is already set to NULLABLE. Cannot do it again')
        return dataset

    new_schema = []
    for f in table.schema:
        if f.name == 'leo_eid':
            new_schema.append(
                bigquery.SchemaField('leo_eid', 'STRING', mode='NULLABLE'))
        else:
            new_schema.append(
                bigquery.SchemaField(f.name, f.field_type, mode=f.mode))

    table.schema = new_schema

    migration.client.update_table(table, ['schema'])

    return dataset
Пример #3
0
def up(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))

    new_schema = []
    for f in table.schema:
        if f.name == 'client_paid_rank' or f.name == 'paid_rank':
            if f.mode == 'NULLABLE':
                logging.warning(
                    'paid_rank fields are already set to NULLABLE. Cannot do it again'
                )
                return dataset

            new_schema.append(
                bigquery.SchemaField(f.name, f.field_type, 'NULLABLE'))
        else:
            new_schema.append(
                bigquery.SchemaField(f.name, f.field_type, mode=f.mode))

    table.schema = new_schema

    migration.client.update_table(table, ['schema'])

    return dataset
Пример #4
0
def up(client):
    migration = BigQueryMigration(client)

    dataset = migration.dataset(
        dataset_name
    )  # use me if you are NOT creating a new dataset. -- ndg 2/5/20

    clusters = migration.default_clustering_fields

    del clusters[0]

    clusters.insert(0, 'client_wrench_id:STRING')

    schema = [
        bigquery.SchemaField('entity_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField("tree_user_id", "INTEGER", mode="REQUIRED"),
        bigquery.SchemaField("prediction", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("client_wrench_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("expirement_name", "STRING"),
        bigquery.SchemaField("processing_datetime", "DATETIME")
    ]

    migration.create_table(name=table_name,
                           project=migration.client.project,
                           schema=schema,
                           dataset=dataset,
                           partition={'type': 'time'},
                           clustering_fields=clusters)

    return dataset
def down(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset('system')

    migration.client.query(
        f'DELETE FROM {migration.client.project}.system.clients WHERE icentris_client = "bluesun"'
    )

    return dataset
Пример #6
0
def down(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name.format(client.env))
    table = migration.client.get_table(dataset.table(table_name))
    for f in table.schema:
        table_field = f.name
        if table_field in new_fields:
            logging.warning(f'{table_field} already added to {table_name} in {dataset_name} dataset!')
    return dataset
def down(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    tbls = migration.client.list_tables(dataset)

    for item in tbls:
        migration.client.delete_table(item)

    migration.client.delete_dataset(dataset)
Пример #8
0
def down(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))

    if table.schema[-1].name == 'client_paid_rank_level':
        logging.warning(
            f'Cannot remove rank_level columns from table {table_name}')

    return dataset
def down(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))

    if table.schema[1].mode == 'NULLABLE':
        logging.warning(
            'leo_eid is already set to NULLABLE. Cannot roll this one back')

    return dataset
def down(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))

    if table.schema[-1].name == 'product_type':
        logging.warning(
            f'Cannot remove column product_type from table {table_name}')

    return dataset
def up(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset('system')

    sql = f"""
        INSERT INTO {migration.client.project}.system.clients (icentris_client, partition_id, wrench_id) VALUES
        ('bluesun', 4, '5396d067-9e31-4572-951a-a7d1b0a5eaf6')"""
    job = migration.client.query(sql)
    job_result = job.result()

    print(job_result)

    return dataset
Пример #12
0
def down(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))

    for f in table.schema:
        if f.name == 'client_paid_rank' or f.name == 'paid_rank':
            if f.mode == 'NULLABLE':
                logging.warning(
                    'paid_rank fields are already set to NULLABLE. Cannot do it again'
                )

    return dataset
Пример #13
0
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    product_reviews = [
        bigquery.SchemaField('client_partition_id', 'INTEGER',
                             mode='REQUIRED'),
        bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('icentris_client', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('site_id', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('user_id', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('tree_user_id', 'INTEGER'),
        bigquery.SchemaField('product_code', 'STRING'),
        bigquery.SchemaField('product_name', 'STRING'),
        bigquery.SchemaField('product_description', 'STRING'),
        bigquery.SchemaField('product_short_description', 'STRING'),
        bigquery.SchemaField('product_available_on', 'DATETIME'),
        bigquery.SchemaField('product_discontinued_on', 'DATETIME'),
        bigquery.SchemaField('product_slug', 'STRING'),
        bigquery.SchemaField('product_avg_rating', 'NUMERIC'),
        bigquery.SchemaField('product_reviews_count', 'INTEGER'),
        bigquery.SchemaField('review_name', 'STRING'),
        bigquery.SchemaField('location', 'STRING'),
        bigquery.SchemaField('rating', 'INTEGER'),
        bigquery.SchemaField('title', 'STRING'),
        bigquery.SchemaField('review', 'STRING'),
        bigquery.SchemaField('approved', 'BOOLEAN'),
        bigquery.SchemaField('created_at', 'DATETIME'),
        bigquery.SchemaField('updated_at', 'DATETIME'),
        bigquery.SchemaField('ip_address', 'STRING'),
        bigquery.SchemaField('show_identifier', 'BOOLEAN'),
    ]

    client.create_table(
        name=table_name,
        project=client.client.project,
        schema=product_reviews,
        dataset=dataset,
        partition={
            'type': 'range',
            'field': 'client_partition_id',
            'start': 1,
            'end': 100,
            'interval': 1
        },
        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'])
    return dataset
Пример #14
0
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    table = client.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    new_schema.insert(
        1,
        bigquery.SchemaField('commission_user_id', 'INTEGER', mode='REQUIRED'))

    client.delete_table(table)
    client.create_table(
        name=table_name,
        project=client.client.project,
        schema=new_schema,
        dataset=dataset,
        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'],
        partition={
            'type': 'range',
            'field': 'client_partition_id',
            'start': 1,
            'end': 100,
            'interval': 1
        })
    return dataset
Пример #15
0
def down(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    table = client.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    if new_schema[1].name == 'commission_user_id':
        new_schema.pop(1)
        client.delete_table(table)
        client.create_table(name=table_name,
                            project=client.client.project,
                            schema=new_schema,
                            dataset=dataset,
                            partition={
                                'type': 'range',
                                'field': 'client_partition_id',
                                'start': 1,
                                'end': 100,
                                'interval': 1
                            },
                            clustering_fields=[
                                'leo_eid:STRING',
                                'ingestion_timestamp:TIMESTAMP'
                            ])

    return dataset
def up(client):
    client = BigQueryMigration(client)

    dataset = client.create_dataset('staging')
    for tbl, schema in schemas.items():
        clusters = ['leo_eid:STRING',  'ingestion_timestamp:TIMESTAMP']

        client.create_table(name=tbl,
                            project=client.client.project, schema=schema, dataset=dataset,
                            partition={
                                'type': 'range',
                                'field':
                                'client_partition_id',
                                'start': 1,
                                'end': 100,
                                'interval': 1},
                            clustering_fields=clusters)
    return dataset
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    flat_site_visitors = [
        bigquery.SchemaField('client_partition_id', 'INTEGER',
                             mode='REQUIRED'),
        bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('icentris_client', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('site_id', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('user_id', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('tree_user_id', 'INTEGER'),
        bigquery.SchemaField('visitor_id', 'STRING'),
        bigquery.SchemaField('last_visit_date', 'DATETIME'),
        bigquery.SchemaField('visit_count', 'INTEGER'),
        bigquery.SchemaField('ipaddress', 'STRING'),
        bigquery.SchemaField('browser_agent', 'STRING'),
        bigquery.SchemaField('created_at', 'DATETIME'),
        bigquery.SchemaField('site_template_id', 'INTEGER'),
        bigquery.SchemaField('active', 'INTEGER'),
        bigquery.SchemaField('third_party_tracking_company', 'STRING'),
        bigquery.SchemaField('tracking_code', 'STRING'),
        bigquery.SchemaField('owner_name', 'STRING'),
        bigquery.SchemaField('email', 'STRING'),
        bigquery.SchemaField('story', 'STRING'),
        bigquery.SchemaField('avatar_file_name', 'STRING')
    ]

    client.create_table(
        name=table_name,
        project=client.client.project,
        schema=flat_site_visitors,
        dataset=dataset,
        partition={
            'type': 'range',
            'field': 'client_partition_id',
            'start': 1,
            'end': 100,
            'interval': 1
        },
        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'])
    return dataset
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    # In order to use clusters in BigQuery, the must be inside partitions.
    # Even though TIMESTAMP is supported as a partition type,
    # a partition can only be done by date not datetime.  Furthermore, if timestamp is used
    checkpoint_schema = [
        bigquery.SchemaField("table", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("leo_eid", "STRING",
                             mode="REQUIRED"),  # Partition
        bigquery.SchemaField("checkpoint", "TIMESTAMP",
                             mode="REQUIRED"),  # Partition
    ]

    client.create_table(name=table_name,
                        project=client.client.project,
                        schema=checkpoint_schema,
                        dataset=dataset)
    return dataset
def up(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    if table.schema[-1].name == 'product_type':
        logging.warning(
            f'product_type already added to {table_name} in {dataset_name} dataset!'
        )
        return dataset

    new_schema.append(bigquery.SchemaField('product_type', 'STRING'))
    table.schema = new_schema

    migration.client.update_table(table, ['schema'])

    return dataset
Пример #20
0
def up(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    for f in table.schema:
        table_field = f.name
        if table_field in new_fields:
            logging.warning(f'{table_field} already added to {table_name} in {dataset_name} dataset!')
            new_fields.pop(table_field)
    for k, v in new_fields.items():
        new_schema.append(bigquery.SchemaField(k, v))

    if table.schema != new_schema:
        table.schema = new_schema
        migration.client.update_table(table, ['schema'])

    return dataset
Пример #21
0
def up(client):
    migration = BigQueryMigration(client)

    dataset = migration.create_dataset(dataset_name)

    parent_dataset = migration.dataset('pyr_bluesun_{}'.format(client.env))
    tbls = migration.client.list_tables(parent_dataset)

    clusters = migration.default_clustering_fields

    clusters.insert(0, 'icentris_client:STRING')

    for item in tbls:
        tbl = migration.client.get_table(item.reference)

        orig = tbl.schema

        new = orig.copy()

        migration.create_table(name=tbl.table_id,
                               project=migration.client.project,
                               schema=new,
                               dataset=dataset,
                               partition={'type': 'time'},
                               clustering_fields=clusters)

    return dataset
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    schema = [
        bigquery.SchemaField("icentris_client", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("partition_id", "INTEGER", mode="REQUIRED"),
        bigquery.SchemaField("wrench_id", "STRING", mode="REQUIRED")
    ]

    tbl = client.create_table(name=table_name,
                              project=client.client.project,
                              schema=schema,
                              dataset=dataset)

    client.client.insert_rows(
        client.client.get_table(tbl),
        [('monat', 1, '2c889143-9169-436a-b610-48c8fe31bb87'),
         ('worldventures', 2, 'd7d3e26f-d105-4816-825d-d5858b9cf0d1'),
         ('naturessunshine', 3, '16bcfb48-153a-4c7d-bb65-19074d9edb17')])

    return dataset
Пример #23
0
def up(client):
    migration = BigQueryMigration(client)
    name = dataset_name + '_{}'.format(client.env)

    with PosixPath('/workspace/bigquery/migrations/bluesun_schema.json').open(
            mode='r') as f:
        tbls = json.loads(f.read())

    dataset = migration.create_dataset(name)

    for tbl, raw in tbls.items():
        schema = []
        for f in raw['fields']:
            schema.append(bigquery.SchemaField(f['name'], f['type'],
                                               f['mode']))

        migration.create_table(name=tbl,
                               project=migration.client.project,
                               schema=schema,
                               dataset=dataset)

    return dataset
def up(client):
    migration = BigQueryMigration(client)

    dataset = migration.create_dataset(
        dataset_name
    )  # use me if you are creating a new dataset. -- ndg 2/5/20

    for tbl, schema in schemas.items():
        migration.create_table(
            name=tbl,
            project=migration.client.project,
            schema=schema,
            dataset=dataset,
            partition={
                'type': 'range',
                'field': 'client_partition_id',
                'start': 1,
                'end': 100,
                'interval': 1
            },
            clustering_fields=migration.default_clustering_fields)
    return dataset
Пример #25
0
def up(client):
    client = BigQueryMigration(client)
    dataset = client.create_dataset('pii')

    pii_schema = [
        bigquery.SchemaField('client_partition_id', 'INTEGER',
                             mode='REQUIRED'),
        bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField("icentris_client", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("tree_user_id", "INTEGER"),
        bigquery.SchemaField("first_name", "STRING"),
        bigquery.SchemaField("last_name", "STRING"),
        bigquery.SchemaField("company_name", "STRING"),
        bigquery.SchemaField("email", "STRING"),
        bigquery.SchemaField("phone", "STRING"),
        bigquery.SchemaField("mobile_phone", "STRING"),
        bigquery.SchemaField("street", "STRING"),
        bigquery.SchemaField("city", "STRING"),
        bigquery.SchemaField("state", "STRING"),
        bigquery.SchemaField("country", "STRING"),
        bigquery.SchemaField("birth_date", "DATE"),
        bigquery.SchemaField("gender", "STRING")
    ]

    client.create_table(
        name='users',
        project=client.client.project,
        schema=pii_schema,
        dataset=dataset,
        partition={
            'type': 'range',
            'field': 'client_partition_id',
            'start': 1,
            'end': 100,
            'interval': 1
        },
        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'])
    return dataset
Пример #26
0
def up(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    del new_schema[1]
    new_schema.insert(
        0, bigquery.SchemaField('dag_id', 'STRING', mode='REQUIRED'))

    migration.delete_table(table)
    migration.create_table(name=table_name,
                           project=migration.client.project,
                           schema=new_schema,
                           dataset=dataset)
    return dataset
Пример #27
0
def down(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    if new_schema[0].name == 'dag_id':
        new_schema.pop(0)

        new_schema.insert(
            1, bigquery.SchemaField('leo_eid', 'STRING', mode="NULLABLE"))

        migration.delete_table(table)
        migration.create_table(name=table_name,
                               project=migration.client.project,
                               schema=new_schema,
                               dataset=dataset)

    return dataset
def up(client):
    migration = BigQueryMigration(client)

    global dataset_name
    dataset_name = dataset_name.format(client.env)
    wv_ds = migration.create_dataset(dataset_name)
    bs_ds = migration.dataset('pyr_bluesun_{}'.format(client.env))

    ls = client.list_tables(bs_ds)
    for tbl in ls:
        tbl_ref = bs_ds.table(tbl.table_id)
        tbl = client.get_table(tbl_ref)
        migration.create_table(tbl_ref.table_id,
                               tbl_ref.project,
                               wv_ds,
                               schema=tbl.schema)

    return wv_ds
def down(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)
    table = client.client.get_table(dataset.table(table_name))
    client.delete_table(table)
def down(client):
    migration = BigQueryMigration(client)

    #  required code for deleting a newly created dataset -- ndg 2/5/20
    migration.delete_dataset(dataset_name.format(client.env))