def test_postgres_sql_spec(tmp_pipeline_sql, pg_client_and_schema, add_current_to_sys_path, monkeypatch): _, schema = pg_client_and_schema with open('pipeline-postgres.yaml') as f: dag_spec = yaml.load(f, Loader=yaml.SafeLoader) # clients for this pipeline are initialized without custom create_engine # args but we need to set the default schema, mock the call so it # includes that info monkeypatch.setattr(db, 'create_engine', create_engine_with_schema(schema)) dates = _random_date_from(datetime(2016, 1, 1), 365, 100) df = pd.DataFrame({ 'customer_id': np.random.randint(0, 5, 100), 'value': np.random.rand(100), 'purchase_date': dates }) loader = load_dotted_path(dag_spec['clients']['SQLScript']) client = loader() df.to_sql('sales', client.engine, if_exists='replace') client.engine.dispose() dag = DAGSpec(dag_spec).to_dag() # FIXME: this does no show the custom Upstream key missing error dag.build() assert not dag['load'].upstream assert list(dag['filter'].upstream.keys()) == ['load'] assert list(dag['transform'].upstream.keys()) == ['filter']
def test_mixed_db_sql_spec(tmp_pipeline_sql, add_current_to_sys_path, pg_client_and_schema, monkeypatch): _, schema = pg_client_and_schema with open('pipeline-multiple-dbs.yaml') as f: dag_spec = yaml.load(f, Loader=yaml.SafeLoader) # clients for this pipeline are initialized without custom create_engine # args but we need to set the default schema, mock the call so it # includes that info monkeypatch.setattr(db, 'create_engine', create_engine_with_schema(schema)) dates = _random_date_from(datetime(2016, 1, 1), 365, 100) df = pd.DataFrame({ 'customer_id': np.random.randint(0, 5, 100), 'value': np.random.rand(100), 'purchase_date': dates }) # make sales data for pg and sqlite loader = load_dotted_path(dag_spec['clients']['PostgresRelation']) client = loader() df.to_sql('sales', client.engine, if_exists='replace') client.engine.dispose() # make sales data for pg and sqlite loader = load_dotted_path(dag_spec['clients']['SQLiteRelation']) client = loader() df.to_sql('sales', client.engine) client.engine.dispose() dag = DAGSpec(dag_spec).to_dag() # FIXME: this does no show the custom Upstream key missing error dag.build()
def test_pipeline_r(tmp_pipeline_r): Path('output').mkdir() with open('pipeline.yaml') as f: dag_spec = yaml.load(f, Loader=yaml.SafeLoader) dag = DAGSpec(dag_spec).to_dag() dag.build()
def test_notebook_spec(processor, tmp_nbs): Path('output').mkdir() with open('pipeline.yaml') as f: dag_spec = yaml.load(f, Loader=yaml.SafeLoader) dag_spec = processor(dag_spec) dag = DAGSpec(dag_spec).to_dag() dag.build()
def test_sql_spec_w_products_in_source(tmp_pipeline_sql_products_in_source, add_current_to_sys_path): with open('pipeline.yaml') as f: dag_spec = yaml.load(f, Loader=yaml.SafeLoader) dates = _random_date_from(datetime(2016, 1, 1), 365, 100) df = pd.DataFrame({ 'customer_id': np.random.randint(0, 5, 100), 'value': np.random.rand(100), 'purchase_date': dates }) loader = load_dotted_path(dag_spec['clients']['SQLScript']) client = loader() df.to_sql('sales', client.engine, if_exists='replace') client.engine.dispose() dag = DAGSpec(dag_spec).to_dag() dag.build()
def test_sqlite_sql_spec(spec, tmp_pipeline_sql, add_current_to_sys_path): with open(spec) as f: dag_spec = yaml.load(f, Loader=yaml.SafeLoader) dates = _random_date_from(datetime(2016, 1, 1), 365, 100) df = pd.DataFrame({ 'customer_id': np.random.randint(0, 5, 100), 'value': np.random.rand(100), 'purchase_date': dates }) loader = load_dotted_path(dag_spec['clients']['SQLScript']) client = loader() df.to_sql('sales', client.engine) client.engine.dispose() dag = DAGSpec(dag_spec).to_dag() # FIXME: this does no show the custom Upstream key missing error dag.build() assert not dag['load'].upstream assert list(dag['filter'].upstream.keys()) == ['load'] assert list(dag['transform'].upstream.keys()) == ['filter']
def test_notebook_spec_w_location(tmp_nbs, add_current_to_sys_path): Path('output').mkdir() dag = DAGSpec('pipeline-w-location.yaml').to_dag() dag.build()
def test_notebook_spec_nested(tmp_nbs_nested): Path('output').mkdir() dag = DAGSpec('pipeline.yaml').to_dag() dag.build()