def main(): num_products = get_num_products(num_iterations, scale_factor) schema = [ # DIMS ('naive_type2_scd', { 'name': 'customer', 'entity_generator': generate_customer, 'num_iterations': num_iterations, 'mutation_rate': 0.3, # Will update mutate cols 30% of the time 'mutating_cols': ['address'] # Only address will update }), ('naive', { 'name': 'product', 'entity_generator': generate_product, 'num_iterations': num_products }), # FACTS ('naive', { 'name': 'orders', 'entity_generator': generate_order, 'num_iterations': num_iterations * scale_factor, 'relations': [{'name': 'customer'}, {'name': 'product'}] })] dummy_data = StarSchemaModel.from_list(schema) dummy_data.generate_all_datasets(print_progress=True) dummy_data.to_csv(folder) dummy_data.to_pickled_pyschema(folder) print("Done")
def main(): num_products = get_num_products(num_iterations, scale_factor) schema = [ # DIMS ( 'naive', { 'name': 'customer', # the name of the entity/table 'entity_generator': generate_customer, # function that defines entity 'num_iterations': num_iterations # How many times to run that function }), ('naive', { 'name': 'product', 'entity_generator': generate_product, 'num_iterations': num_products }), ('naive', { 'name': 'currency', 'entity_generator': generate_currency, 'num_iterations': num_currencies }), # FACTS ('naive', { 'name': 'orders', 'entity_generator': generate_order, 'num_iterations': num_iterations * scale_factor, 'relations': [{ 'name': 'customer' }, { 'name': 'currency' }] }), ( 'naive', { 'name': 'order_item', 'entity_generator': generate_order_item, 'num_iterations': num_iterations * scale_factor, 'num_entities_per_iteration': lambda: random.randint( 1, 3 ), # Number of facts per iteration (e.g. 3 items 1 order) 'relations': [{ 'name': 'orders', 'unique': True }, { 'name': 'product', 'type': 'many_to_many', 'unique': True }] # Each iteration has the same entity link for one_to_many relations (e.g. one order_id per order_item) # For many_to_many this link is sampled - if unique_per_fact then it is sampled without replacement. # In this example an order has multiple order items, each linked to a unique_per_fact product within that order # If an order could have multiple of the same product then unique_per_fact would be false }), ( 'naive', { 'name': 'currency_conversion', 'entity_generator': generate_currency_conv, 'num_iterations': num_currencies, 'num_entities_per_iteration': num_days, # We get one record per currency per day 'relations': [{ 'name': 'currency', 'unique': True }] # Here the default type is one_to_many - in this case there will be a unique value for each iteration # Sampled from the source table - note this will fail if there are more iterations that values in # The original table. }) ] dummy_data = StarSchemaModel.from_list(schema) dummy_data.generate_all_datasets(print_progress=True) dummy_data.to_csv(folder) dummy_data.to_pickled_pyschema(folder) print("Done")
def main(): num_products = get_num_products(num_iterations, scale_factor) schema = [ # DIMS ('naive_type2_scd', { 'name': 'customer', 'min_valid_from': low_date, 'max_valid_from': high_date, 'entity_generator': generate_customer, 'num_iterations': num_iterations, 'mutation_rate': 0.1, # Will update mutate cols 10% of the time 'mutating_cols': ['address'] # Only address will update }), ('naive', { 'name': 'product', 'entity_generator': generate_product, 'num_iterations': num_products }), ('naive', { 'name': 'currency', 'entity_generator': generate_currency, 'num_iterations': num_currencies }), # FACTS ('naive', { 'name': 'orders', 'entity_generator': generate_order, 'num_iterations': num_iterations * scale_factor, 'relations': [{'name': 'customer'}, {'name': 'currency'}] }), ('naive', { 'name': 'order_item', 'entity_generator': generate_order_item, 'num_iterations': num_iterations * scale_factor, 'num_entities_per_iteration': lambda: random.randint(1, 3), # Number of facts per iteration (e.g. 3 items 1 order) 'relations': [{'name': 'orders', 'unique': True}, {'name': 'product', 'type': 'many_to_many', 'unique': True}] # Each iteration has the same entity link for one_to_many relations (e.g. one order_id per order_item) # For many_to_many this link is sampled - if unique_per_fact then it is sampled without replacement. # In this example an order has multiple order items, each linked to a unique_per_fact product within that order # If an order could have multiple of the same product then unique_per_fact would be false }), ('naive', { 'name': 'currency_conversion', 'entity_generator': generate_currency_conv, 'num_iterations': num_currencies, 'num_entities_per_iteration': num_days, # We get one record per currency per day 'relations': [{'name': 'currency', 'unique': True}] # Here the default type is one_to_many - in this case there will be a unique value for each iteration # Sampled from the source table - note this will fail if there are more iterations that values in # The original table. }) ] dummy_data = StarSchemaModel.from_list(schema) dummy_data.generate_all_datasets(print_progress=True) dummy_data.to_csv(data_path) padapter = PostgresSchemaAdapter(dummy_data) padapter.to_dbt_schema(path=schema_path) bqadapter = BigquerySchemaAdapter(dummy_data) bqadapter.to_dbt_schema(path=schema_path) print("Done")