예제 #1
0
dag = DAG(
    dag_id='actrn',
    default_args=args,
    max_active_runs=1,
    schedule_interval='@monthly'
)

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='actrn',
    dag=dag,
    command='make start actrn 2001-01-01'
)

processor_task = helpers.create_processor_task(
    name='actrn',
    dag=dag
)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex',
    dag=dag
)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)
예제 #2
0
import datetime
from airflow.models import DAG
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime.datetime.utcnow(),
    'retries': 1,
}

dag = DAG(dag_id='takeda',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

collector_task = helpers.create_collector_task(name='takeda_collector',
                                               dag=dag)

processor_task = helpers.create_processor_task(name='takeda_processor',
                                               dag=dag)

merge_trials_identifiers_task = helpers.create_processor_task(
    name='merge_trials_identifiers', dag=dag)

processor_task.set_upstream(collector_task)
merge_trials_identifiers_task.set_upstream(processor_task)
예제 #3
0
    dag=dag,
)

save_nct_xml_to_s3_task = HTTPToS3Transfer(
    task_id='save_nct_xml_to_s3',
    dag=dag,
    url='https://clinicaltrials.gov/search',
    url_params={
        'resultsxml': 'True',
        'rcv_s': '01/01/2001',
        'rcv_e': '{{ macros.ds_format(end_date, "%Y-%m-%d", "%d/%m/%Y") }}',
    },
    s3_conn_id='datastore_s3',
    s3_url=NCT_DATA_URL.replace('http://', 's3://'),
)

collector_task = helpers.create_collector_task(
    name='nct',
    dag=dag,
    command='make start nct {url}'.format(url=NCT_DATA_URL))

processor_task = helpers.create_processor_task(name='nct', dag=dag)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex', dag=dag)

save_nct_xml_to_s3_task.set_upstream(latest_only_task)
collector_task.set_upstream(save_nct_xml_to_s3_task)
processor_task.set_upstream(collector_task)
merge_identifiers_and_reindex_task.set_upstream(processor_task)
예제 #4
0
from datetime import datetime
from airflow.models import DAG
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2016, 12, 1),
    'retries': 1,
}

dag = DAG(
    dag_id='icdcm',
    default_args=args,
    max_active_runs=1,
    schedule_interval='@monthly'
)

collector_task = helpers.create_collector_task(
    name='icdcm',
    dag=dag
)

processor_task = helpers.create_processor_task(
    name='icdcm',
    dag=dag
)

processor_task.set_upstream(collector_task)
예제 #5
0
from datetime import datetime
from airflow.models import DAG
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2016, 12, 1),
    'retries': 1,
}

dag = DAG(dag_id='gsk',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

collector_task = helpers.create_collector_task(
    name='gsk', dag=dag, command='make start gsk 2001-01-01')

processor_task = helpers.create_processor_task(name='gsk', dag=dag)

merge_trials_identifiers_task = helpers.create_processor_task(
    name='merge_trials_identifiers', dag=dag)

processor_task.set_upstream(collector_task)
merge_trials_identifiers_task.set_upstream(processor_task)
예제 #6
0
from datetime import datetime
from airflow.models import DAG
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2016, 12, 1),
    'retries': 1,
}

dag = DAG(dag_id='euctr',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

collector_task = helpers.create_collector_task(
    name='euctr', dag=dag, command='make start euctr 2001-01-01')

processor_task = helpers.create_processor_task(name='euctr', dag=dag)

merge_trials_identifiers_task = helpers.create_processor_task(
    name='merge_trials_identifiers', dag=dag)

processor_task.set_upstream(collector_task)
merge_trials_identifiers_task.set_upstream(processor_task)
예제 #7
0
import datetime
from airflow.models import DAG
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime.datetime.utcnow(),
    'retries': 1,
}

dag = DAG(dag_id='isrctn',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

collector_task = helpers.create_collector_task(
    name='isrctn', dag=dag, command='make start isrctn 2001-01-01')

processor_task = helpers.create_processor_task(name='isrctn', dag=dag)

merge_trials_identifiers_task = helpers.create_processor_task(
    name='merge_trials_identifiers', dag=dag)

processor_task.set_upstream(collector_task)
merge_trials_identifiers_task.set_upstream(processor_task)
예제 #8
0
    'start_date': datetime(2016, 12, 1),
    'retries': 1,
}

dag = airflow.models.DAG(dag_id='ictrp',
                         default_args=args,
                         max_active_runs=1,
                         schedule_interval='@monthly')

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='ictrp',
    dag=dag,
    environment={
        'ICTRP_USER': airflow.models.Variable.get('ICTRP_USER'),
        'ICTRP_PASS': airflow.models.Variable.get('ICTRP_PASS'),
    })

processor_task = helpers.create_processor_task(name='ictrp', dag=dag)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex', dag=dag)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)
merge_identifiers_and_reindex_task.set_upstream(processor_task)
예제 #9
0
dag = DAG(
    dag_id='jprn',
    default_args=args,
    max_active_runs=1,
    schedule_interval='@monthly'
)

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='jprn',
    dag=dag,
    command='make start jprn 1 100000'
)

processor_task = helpers.create_processor_task(
    name='jprn',
    dag=dag
)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex',
    dag=dag
)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)
예제 #10
0
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2016, 12, 1),
    'retries': 1,
}

dag = airflow.models.DAG(dag_id='hra',
                         default_args=args,
                         max_active_runs=1,
                         schedule_interval='@monthly')

collector_task = helpers.create_collector_task(
    name='hra',
    dag=dag,
    environment={
        'HRA_ENV': airflow.models.Variable.get('HRA_ENV'),
        'HRA_URL': airflow.models.Variable.get('HRA_URL'),
        'HRA_USER': airflow.models.Variable.get('HRA_USER'),
        'HRA_PASS': airflow.models.Variable.get('HRA_PASS'),
    })

processor_task = helpers.create_processor_task(name='hra', dag=dag)

hra_linker_task = helpers.create_processor_task(name='hra_linker', dag=dag)

processor_task.set_upstream(collector_task)
hra_linker_task.set_upstream(processor_task)
예제 #11
0
    'depends_on_past': False,
    'start_date': datetime.datetime(2017, 4, 1),
    'retries': 1,
}

dag = DAG(dag_id='pubmed',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='pubmed', dag=dag, command='make start pubmed 1900-01-01 2100-01-01')

unregistered_trials_task = helpers.create_processor_task(
    name='pubmed_unregistered_trials', dag=dag)

trials_remover_task = helpers.create_processor_task(name='trial_remover',
                                                    dag=dag)

pubmed_publications_task = helpers.create_processor_task(
    name='pubmed_publications', dag=dag)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex', dag=dag)

collector_task.set_upstream(latest_only_task)
unregistered_trials_task.set_upstream(collector_task)
args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2016, 12, 1),
    'retries': 1,
}

dag = airflow.models.DAG(dag_id='cochrane_reviews',
                         default_args=args,
                         max_active_runs=1,
                         schedule_interval='@monthly')

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='cochrane_reviews',
    dag=dag,
    environment={
        'COCHRANE_ARCHIVE_URL':
        airflow.models.Variable.get('COCHRANE_ARCHIVE_URL'),
    })

processor_task = helpers.create_processor_task(name='cochrane_reviews',
                                               dag=dag)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)
예제 #13
0
import datetime
from airflow.models import DAG
from airflow.operators.latest_only_operator import LatestOnlyOperator
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime.datetime(2017, 4, 1),
    'retries': 1,
}

dag = DAG(dag_id='pfizer',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(name='pfizer_collector',
                                               dag=dag)

processor_task = helpers.create_processor_task(name='pfizer_processor',
                                               dag=dag)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)