def query_narrativedx(service): ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi'] ppw_params = quote_plus('DRIVER={driver};' 'SERVER={server};' 'DATABASE={database};' 'UID={user};' 'PWD={password};' 'PORT={port};' 'TDS_Version={tds_version};'.format(**ebi)) ppw_engine = sa.create_engine( f'mssql+pyodbc:///?odbc_connect={ppw_params}') with open(basepath.joinpath('narrativedx_query.sql')) as file: sql = file.read() # get custom dates if they exist in Airflow variables, otherwise do first and last day of prev month first_of_month = date.today().replace(day=1) end_date = Variable.get('narrativedx_end_date', default_var=first_of_month - timedelta(days=1)) start_date = Variable.get('narrativedx_start_date', default_var=first_of_month - timedelta(days=end_date.day)) sql = sql.format(start_date=start_date, end_date=end_date, surv=service) df = pd.read_sql(sql, ppw_engine) df.to_csv(basepath.joinpath(f'NarrativeDX - {service} - {exec_date}.csv'))
def kill_job(**context): tb_job_id = context['task_instance'].xcom_pull(task_ids='get_job_id') ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi'] auth = TSC.TableauAuth(ebi['user'].split(sep='\\')[1], ebi['password']) server = TSC.Server(Variable.get('tableau_server_url'), use_server_version=True) server.auth.sign_in(auth) resp = server.jobs.cancel(tb_job_id) server.auth.sign_out()
def get_job_id(tb_job_id): # TODO allow for a list to be input, but how is the variable returned from the UI if it's a list? pg = get_json_secret('ebi_db_conn')['db_connections']['tableau_pg'] pg_params = '{user}:{password}@{server}:{port}/{database}'.format(**pg) tpg_engine = sa.create_engine('postgresql+psycopg2://{}'.format(pg_params)) sql = 'select id, luid from background_jobs where id = {}'.format( tb_job_id) df = pd.read_sql(sql, tpg_engine) return str(df.luid[0])
def refresh_extracts(): server_url = 'https://ebi.coh.org' ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi'] tableau_auth = TSC.TableauAuth(ebi['user'].split(sep='\\')[1], ebi['password']) server = TSC.Server(server_url, use_server_version=True) query = '''{ tags (filter: {name: "dbt"}){ publishedDatasources { luid } } }''' with server.auth.sign_in(tableau_auth): ds_ls = server.metadata.query( query)['data']['tags'][0]['publishedDatasources'] [ server.datasources.refresh(server.datasources.get_by_id( ds['luid'])) for ds in ds_ls ]
'email_on_failure': True, 'email_on_retry': False, 'retries': 2, 'retry_delay': timedelta(minutes=5) } dag = DAG('send_hims', default_args=default_args, catchup=False, schedule_interval='0 9 * * 1') pw = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi']['password'] path = 'C:\\Airflow\\send_hims' ebi_db_server_prod = Variable.get('ebi_db_server_prod') airflow_server_prod = Variable.get('airflow_server_prod') query_cmd = (f'sqlcmd -S {ebi_db_server_prod} -d FI_DM_EBI -E ' f'-i {path}\\hims_query.sql ' f'-o {path}\\hims_results.csv ' '-s"|" -W -X -I') copy_cmd = (f'pscp -pw {pw} {path}\\hims_results.csv ' f'{airflow_server_prod}:/var/nfsshare/files') query = SSHOperator(ssh_conn_id='tableau_server', task_id='query_hims',
from airflow import DAG from airflow.operators.python import PythonOperator from auxiliary.outils import get_json_secret from sqlalchemy.exc import ProgrammingError def attempt_connection(db_engine): while True: try: db_engine.connect() break except ProgrammingError as e: sleep(300) ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi'] params = quote_plus('DRIVER={driver};' 'SERVER={server};' 'DATABASE={database};' 'UID={user};' 'PWD={password};' 'PORT={port};' 'TDS_Version={tds_version};'.format(**ebi)) engine = sa.create_engine('mssql+pyodbc:///?odbc_connect={}'.format(params)) default_args = { 'owner': 'airflow', 'depends_on_past':
'email_on_failure': True, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=2) } dag = DAG('tableau_server_maintenance', default_args=default_args, catchup=False, schedule_interval='0 1 * * *') ebi = get_json_secret('ebi_db_conn')['db_connections']['fi_dm_ebi'] pg = get_json_secret('ebi_db_conn')['db_connections']['tableau_pg'] pg_params = '{user}:{password}@{server}:{port}/{database}'.format(**pg) tpg_engine = sa.create_engine('postgresql+psycopg2://{}'.format(pg_params)) server_url = Variable.get('tableau_server_url') tableau_auth = TSC.TableauAuth(ebi['user'].split(sep='\\')[1], ebi['password']) server = TSC.Server(server_url, use_server_version=True) tagsql = """select s.name as site ,p.name as project ,w.name as workbook ,w.luid as luid from workbooks w inner join sites s