def discover(tap_name, disable_colors: bool = False): """Run discover for a singer tap""" patch( mara_pipelines.config.root_pipeline)(lambda: _internal_root_pipeline()) # the pipeline to run pipeline, found = pipelines.find_node( ['_singer', tap_name.replace('-', '_')]) if not found: print( f'Could not find pipeline. You have to add {tap_name} to config mara_singer.config.tap_names to be able to use this command', file=sys.stderr) sys.exit(-1) if not isinstance(pipeline, pipelines.Pipeline): print( f'Internal error: Note is not a pipeline, but a {pipeline.__class__.__name__}', file=sys.stderr) sys.exit(-1) # a list of nodes to run selectively in the pipeline nodes = set() nodes.add(pipeline.nodes.get('discover')) if not mara_pipelines.ui.cli.run_pipeline(pipeline, nodes, interactively_started=False, disable_colors=disable_colors): sys.exit(-1)
def client_unauth(): import mara_page.acl import mara_markdown_docs.docs orig_func = mara_page.acl.current_user_has_permissions @patch(mara_page.acl.current_user_has_permissions) def current_user_has_permissions(resources): def _exclude_docs(resource): if resource is mara_markdown_docs.docs.documentation_acl_resource: return [resource, False] return [resource, True] return list(map(_exclude_docs, resources)) yield from _client() patch(mara_page.acl.current_user_has_permissions)(orig_func)
import os from data_integration.pipelines import Pipeline, Task from data_integration.ui.cli import run_pipeline import mara_db.auto_migration import mara_db.config import mara_db.dbs import data_integration import data_integration.config from mara_app.monkey_patch import patch from bcreg.bcreg_pipelines import bc_reg_root_pipeline patch(data_integration.config.system_statistics_collection_period)(lambda: 15) @patch(data_integration.config.root_pipeline) def root_pipeline(): return bc_reg_root_pipeline() mara_host = os.environ.get('MARA_DB_HOST', 'bcregdb') mara_database = os.environ.get('MARA_DB_DATABASE', 'mara_db') mara_port = os.environ.get('MARA_DB_PORT', '5432') mara_user = os.environ.get('MARA_DB_USER', 'mara_db') mara_password = os.environ.get('MARA_DB_PASSWORD') mara_db.config.databases \ = lambda: {'mara': mara_db.dbs.PostgreSQLDB(user=mara_user, password=mara_password, host=mara_host, database=mara_database, port=mara_port)} (child_pipeline, success) = data_integration.pipelines.find_node(['bc_reg_event_processor']) if success:
mara_host = os.environ.get('MARA_DB_HOST', 'bcregdb') mara_database = os.environ.get('MARA_DB_DATABASE', 'mara_db') mara_port = os.environ.get('MARA_DB_PORT', '5432') mara_user = os.environ.get('MARA_DB_USER', 'mara_db') mara_password = os.environ.get('MARA_DB_PASSWORD') return { 'mara': mara_db.dbs.PostgreSQLDB(user=mara_user, password=mara_password, host=mara_host, database=mara_database, port=mara_port) } # How many cores to use for running the ETL, defaults to the number of CPUs of the machine # On production, make sure the ETL does not slow down other services too much patch(data_integration.config.max_number_of_parallel_tasks)(lambda: 4) # The first day for which to download and process data (default 2017-01-01). # Locally, a few days of data is enough to test a pipeline. # On production, size of days that can be processed depends on machine size. # One year of data amounts to roughly 50GB database size patch(app.config.first_date)( lambda: datetime.date.today() - datetime.timedelta(days=5)) # Whether it is possible to run the ETL from the web UI # Disable on production patch(data_integration.config.allow_run_from_web_ui)(lambda: True)
"""Configures the data integration pipelines of the project""" import datetime import functools import data_integration.config from data_integration.pipelines import Pipeline, Task from mara_app.monkey_patch import patch import app.config from bcreg.bcreg_pipelines import bc_reg_root_pipeline patch(data_integration.config.data_dir)(lambda: app.config.data_dir()) patch(data_integration.config.first_date)(lambda: app.config.first_date()) patch(data_integration.config.default_db_alias)(lambda: 'dwh') patch(data_integration.config.system_statistics_collection_period)(lambda: 15) @patch(data_integration.config.root_pipeline) @functools.lru_cache(maxsize=None) def root_pipeline(): return bc_reg_root_pipeline()
for data_set in mt_data_sets(): personal_data_column_names = [] default_column_names = [] for path, attributes in data_set.connected_attributes().items(): for prefixed_name, attribute in attributes.items(): if attribute.personal_data: personal_data_column_names.append(prefixed_name) if attribute.important_field: default_column_names.append(prefixed_name) for metric in data_set.metrics.values(): if metric.important_field: default_column_names.append(metric.name) result.append( mara_data_explorer.data_set.DataSet( id=data_set.id(), name=data_set.name, database_alias='dwh', database_schema='data_sets', database_table=data_set.id(), personal_data_column_names=personal_data_column_names, default_column_names=default_column_names, use_attributes_table=True)) return result # adapt to the favorite chart color of your company patch(mara_data_explorer.config.charts_color)(lambda: '#0275d8')
def test_state_read_not_existing_file(): patch(config.state_dir)(lambda: './tests/') state = SingerTapState(tap_name='does_not_exist-state') bk_value = state.get_bookmark(tap_stream_id='STREAM_NAME', key='date') assert bk_value == None
def test_state_read_sample_state_file(): patch(config.state_dir)(lambda: './tests/') state = SingerTapState(tap_name='sample-state1') bk_value = state.get_bookmark(tap_stream_id='STREAM_NAME', key='date') assert bk_value == '2020-01-01T00:00:00.000000Z'
'# Closed pull requests' ], use_attributes_table=True), mara_data_explorer.data_set.DataSet( id='github-repo-activity', name='Github repo activities', database_alias='dwh', database_schema='gh_dim', database_table='repo_activity_data_set', default_column_names=[ 'Date', 'User', 'Repo', '# Forks', '# Commits', '# Closed pull requests' ], use_attributes_table=True), mara_data_explorer.data_set.DataSet( id='pypi-download-counts', name='PyPI download counts', database_alias='dwh', database_schema='pypi_dim', database_table='download_counts_data_set', default_column_names=[ 'Download date', 'Project', 'Project version', 'Installer', 'Python version', '# Downloads' ], use_attributes_table=True), ] # adapt to the favorite chart color of your company patch(mara_data_explorer.config.charts_color)(lambda: '#008000')
import mara_page.acl from mara_app import monkey_patch from mara_page import acl from mara_page import navigation from app.ui import start_page blueprint = flask.Blueprint('ui', __name__, url_prefix='/ui', static_folder='static') MARA_FLASK_BLUEPRINTS = [start_page.blueprint, blueprint] # replace logo and favicon monkey_patch.patch(mara_app.config.favicon_url)( lambda: flask.url_for('ui.static', filename='favicon.ico')) monkey_patch.patch(mara_app.config.logo_url)( lambda: flask.url_for('ui.static', filename='logo.png')) # add custom css @monkey_patch.wrap(mara_app.layout.css_files) def css_files(original_function, response): files = original_function(response) files.append(flask.url_for('ui.static', filename='styles.css')) return files # define protected ACL resources @monkey_patch.patch(mara_acl.config.resources) def acl_resources():
"""Configures the data integration pipelines of the project""" import datetime import functools import data_integration.config from data_integration.pipelines import Pipeline, Task from mara_app.monkey_patch import patch import app.config from bcreg.bcreg_pipelines import db_init_pipeline, bc_reg_pipeline, bc_reg_pipeline_status, bc_reg_pipeline_initial_load, bc_reg_pipeline_post_credentials from bcreg.bcreg_pipelines import bc_init_test_data, bc_reg_test_corps, bc_reg_pipeline_jsonbender patch(data_integration.config.data_dir)(lambda: app.config.data_dir()) patch(data_integration.config.first_date)(lambda: app.config.first_date()) patch(data_integration.config.default_db_alias)(lambda: 'dwh') @patch(data_integration.config.root_pipeline) @functools.lru_cache(maxsize=None) def root_pipeline(): parent_pipeline = Pipeline( id='holder_for_pipeline_versions', description= 'Holder for the different versions of the BC Registries pipeline.') parent_pipeline.add(bc_reg_pipeline()) parent_pipeline.add(bc_reg_pipeline_status())
"""Configures the data integration pipelines of the project""" import datetime import functools import mara_pipelines.config import etl_tools.config from mara_pipelines.pipelines import Pipeline from mara_app.monkey_patch import patch import app.config patch(mara_pipelines.config.data_dir)(lambda: app.config.data_dir()) patch(mara_pipelines.config.first_date)(lambda: app.config.first_date()) patch(mara_pipelines.config.default_db_alias)(lambda: 'dwh') @patch(mara_pipelines.config.root_pipeline) @functools.lru_cache(maxsize=None) def root_pipeline(): import app.pipelines.initialize_db import app.pipelines.load_data.load_ecommerce_data import app.pipelines.load_data.load_marketing_data import app.pipelines.e_commerce import app.pipelines.marketing import app.pipelines.generate_artifacts import app.pipelines.update_frontends import app.pipelines.consistency_checks import app.pipelines.update_frontends pipeline = Pipeline(
@patch(data_sets.config.data_sets) def _data_sets(): return [ data_sets.data_set.DataSet( id='python-project-activity', name='Python project activities', database_alias='dwh', database_schema='pp_dim', database_table='python_project_activity_data_set', default_column_names=['Date', 'Project', '# Downloads', '# Forks', '# Commits', '# Closed pull requests'], use_attributes_table=True), data_sets.data_set.DataSet( id='github-repo-activity', name='Github repo activities', database_alias='dwh', database_schema='gh_dim', database_table='repo_activity_data_set', default_column_names=['Date', 'User', 'Repo', '# Forks', '# Commits', '# Closed pull requests'], use_attributes_table=True), data_sets.data_set.DataSet( id='pypi-download-counts', name='PyPI download counts', database_alias='dwh', database_schema='pypi_dim', database_table='download_counts_data_set', default_column_names=['Download date', 'Project', 'Project version', 'Installer', 'Python version', '# Downloads'], use_attributes_table=True), ] # adapt to the favorite chart color of your company patch(data_sets.config.charts_color)(lambda: '#008000')