def run_stripe_test(api_key: str): from snapflow_stripe import module as stripe if not api_key: api_key = TEST_API_KEY storage = get_tmp_sqlite_db_url() env = Environment(DataspaceCfg(metadata_storage="sqlite://", storages=[storage])) env.add_module(stripe) # Initial graph raw_charges = GraphCfg( key="import_charges", function="stripe.import_charges", params={"api_key": api_key}, ) clean_charges = GraphCfg( key="clean_charges", function="stripe.clean_charges", input="import_charges" ) g = GraphCfg(nodes=[raw_charges, clean_charges]) results = env.produce( clean_charges.key, g, target_storage=storage, execution_timelimit_seconds=1 ) records = results[0].stdout().as_records() assert len(records) >= 100 assert records[0]["amount"] > 0
def as_dataframe(self, env: Environment, sess: Session): schema = None if self.schema: schema = env.get_schema(self.schema, sess) return str_as_dataframe(self.data, module=self.module, nominal_schema=schema)
def as_dataframe(self, env: Environment): schema = None if self.schema: schema = env.get_schema(self.schema) return str_as_dataframe(env, self.data, package=self.package, nominal_schema=schema)
def ensure_function( env: Environment, function_like: Union[DataFunctionLike, str]) -> DataFunction: if isinstance(function_like, DataFunction): return function_like if isinstance(function_like, str): return env.get_function(function_like) return make_function(function_like)
def get_translation_to(self, env: Environment, sess: Session, other: Schema) -> Optional[SchemaTranslation]: if not self.implementations: return None for impl in self.implementations: schema = env.get_schema(impl.schema_key, sess) if schema.key == other.key: return impl.as_schema_translation(env, sess, other) return None
def test_bigcommerce_order_products(): from snapflow_bigcommerce import module as snapflow_bigcommerce env = Environment(metadata_storage="sqlite://") env.add_module(snapflow_bigcommerce) g = graph() # Initial graph api_key = ensure_api_key() store_id = ensure_store_id() get_orders = g.create_node( snapflow_bigcommerce.functions.import_order_products, params={"api_key": api_key, "store_id": store_id,}, ) blocks = env.produce(get_orders, g, execution_timelimit_seconds=2,) assert len(blocks[0].as_records()) > 0
def produce_function_output_for_static_input( function: DataFunction, params: Dict[str, Any] = None, input: Any = None, inputs: Any = None, env: Optional[Environment] = None, module: Optional[SnapflowModule] = None, target_storage: Optional[Storage] = None, upstream: Any = None, # TODO: DEPRECATED ) -> Iterator[List[DataBlock]]: inputs = input or inputs or upstream if env is None: db = get_tmp_sqlite_db_url() env = Environment(metadata_storage=db) if module: env.add_module(module) with provide_test_storages(function, target_storage) as target_storage: if target_storage: target_storage = env.add_storage(target_storage) with env.md_api.begin(): g = Graph(env) input_datas = inputs input_nodes: Dict[str, Node] = {} pi = function.get_interface() if not isinstance(inputs, dict): assert len(pi.get_non_recursive_inputs()) == 1 input_datas = { pi.get_single_non_recursive_input().name: inputs } for inpt in pi.inputs.values(): if inpt.is_self_reference: continue assert inpt.name is not None input_data = input_datas[inpt.name] if isinstance(input_data, str): input_data = DataInput(data=input_data) assert isinstance(input_data, DataInput) n = g.create_node( key=f"_input_{inpt.name}", function="core.import_dataframe", params={ "dataframe": input_data.as_dataframe(env), "schema": input_data.get_schema_key(), }, ) input_nodes[inpt.name] = n test_node = g.create_node( key=f"{function.name}", function=function, params=params, inputs=input_nodes, ) blocks = env.produce(test_node, to_exhaustion=False, target_storage=target_storage) yield blocks
def update_matching_field_definitions(env: Environment, schema: Schema, update_with_schema: Schema) -> Schema: fields = [] modified = False for f in schema.fields: new_f = f try: new_f = update_with_schema.get_field(f.name) modified = True except NameError: pass fields.append(new_f) if not modified: return schema schema_dict = asdict(schema) schema_dict["name"] = f"{schema.name}_with_{update_with_schema.name}" schema_dict["fields"] = fields updated = Schema.from_dict(schema_dict) env.add_new_generated_schema(updated) return updated
def test_shopify(): from snapflow_shopify import module as shopify api_key = ensure_api_key() storage = get_tmp_sqlite_db_url() env = Environment( DataspaceCfg(metadata_storage="sqlite://", storages=[storage])) # Initial graph orders = GraphCfg( key="import_orders", function="shopify.import_orders", params={"admin_url": api_key}, ) g = GraphCfg(nodes=[orders]) results = env.produce(orders.key, g, target_storage=storage, execution_timelimit_seconds=1) records = results[0].stdout().as_records() assert len(records) > 0
def produce_pipe_output_for_static_input( pipe: Pipe, config: Dict[str, Any] = None, input: Any = None, upstream: Any = None, env: Optional[Environment] = None, module: Optional[SnapflowModule] = None, target_storage: Optional[Storage] = None, ) -> Iterator[Optional[DataBlock]]: input = input or upstream if env is None: db = get_tmp_sqlite_db_url() env = Environment(metadata_storage=db) if target_storage: target_storage = env.add_storage(target_storage) with env.session_scope() as sess: g = Graph(env) input_datas = input input_nodes: Dict[str, Node] = {} pi = pipe.get_interface() if not isinstance(input, dict): assert len(pi.get_non_recursive_inputs()) == 1 input_datas = {pi.get_non_recursive_inputs()[0].name: input} for input in pi.inputs: if input.is_self_ref: continue assert input.name is not None input_data = input_datas[input.name] if isinstance(input_data, str): input_data = DataInput(data=input_data) n = g.create_node( key=f"_input_{input.name}", pipe="core.extract_dataframe", config={ "dataframe": input_data.as_dataframe(env, sess), "schema": input_data.get_schema_key(), }, ) input_nodes[input.name] = n test_node = g.create_node(key=f"{pipe.name}", pipe=pipe, config=config, upstream=input_nodes) db = env.produce(test_node, to_exhaustion=False, target_storage=target_storage) yield db
def run_test_crunchbase_import_funding_rounds(user_key): from snapflow_crunchbase import module as snapflow_crunchbase env = Environment() env.add_module(snapflow_crunchbase) env.add_storage("file://.") g = graph() # test funding rounds importer import_funding_rounds = g.create_node( snapflow_crunchbase.functions.import_funding_rounds, params={ "user_key": user_key, "use_sample": True }, ) output = env.produce(node_like=import_funding_rounds, graph=g) assert len(output[0].as_dataframe()) == 50
def test_fred(): api_key = ensure_api_key() from snapflow_fred import module as fred env = Environment(metadata_storage="sqlite://") g = graph() # Initial graph gdp = g.create_node( "fred.import_observations", params={ "api_key": api_key, "series_id": "gdp" }, ) blocks = produce(gdp, env=env, modules=[fred]) records = blocks[0].as_records() assert len(records) >= (utcnow().year - 1946) * 4 - 1 assert len(records) < (utcnow().year + 1 - 1946) * 4 - 1
def schema(self, env: Environment, sess: Session) -> Schema: return env.get_schema(self.schema_key, sess)
return "select customer, sum(amount) as amount from txs group by customer" # Can use jinja templates too # return template("sql/customer_lifetime_sales.sql", ctx) g = graph_from_yaml(""" nodes: - key: stripe_charges function: stripe.import_charges params: api_key: sk_test_4eC39HqLyjWDarjtT1zdp7dc - key: accumulated_stripe_charges function: core.accumulator input: stripe_charges - key: stripe_customer_lifetime_sales function: customer_lifetime_sales input: accumulated_stripe_charges """) # print(g) assert len(g._nodes) == 3 env = Environment(modules=[stripe]) run(g, env=env, execution_timelimit_seconds=1) # Get the final output block datablock = env.get_latest_output("stripe_customer_lifetime_sales", g) df = datablock.as_dataframe() assert len(df.columns) == 2 assert len(df) > 1 and len(df) <= 100 # Stripe data varies
def ensure_pipe(env: Environment, pipe_like: Union[PipeLike, str]) -> Pipe: if isinstance(pipe_like, Pipe): return pipe_like if isinstance(pipe_like, str): return env.get_pipe(pipe_like) return make_pipe(pipe_like)
def ensure_snap(env: Environment, snap_like: Union[SnapLike, str]) -> _Snap: if isinstance(snap_like, _Snap): return snap_like if isinstance(snap_like, str): return env.get_snap(snap_like) return make_snap(snap_like)