def get_list(conn): data_provider_stage = 'data_provider' core_executor_stage = 'core_executor' analysis_reporter_stage = 'analysis_reporter' process_archival_stage = 'process_archival' """ Currently pulling in chunck of 300 for the purpose of cronjobs runs""" query = "select process_id,selection_id from process_stages where stage_start is null and stage_end is null and stage_error is \ null and stage_name='{}' and process_id not in (select distinct(process_id) from process_stages where \ (stage_start is not null or stage_end is not null) and stage_name in ('{}','{}')) \ and process_id in (select distinct(process_id) from process_stages where stage_start is not null and stage_end \ is not null and stage_name='{}')".format(core_executor_stage, analysis_reporter_stage, process_archival_stage, data_provider_stage) """ Original query (stage_error is null for dataprovider: for some reason this column is always fill up when under LSF ) query = "select process_id,selection_id from process_stages where stage_start is null and stage_end is null and stage_error is \ null and stage_name='{}' and process_id not in (select distinct(process_id) from process_stages where \ (stage_start is not null or stage_end is not null) and stage_name in ('{}','{}')) \ and process_id in (select distinct(process_id) from process_stages where stage_start is not null and stage_end \ is not null and stage_error is null and stage_name='{}')".format(core_executor_stage, analysis_reporter_stage, process_archival_stage, data_provider_stage) """ print('-' * 100) print(query) print('-' * 100) cursor = conn.cursor() cursor.execute(query) core_executor_list = list() for (process_id, selection_id) in cursor: stage = stages(process_id, selection_id, core_executor_stage) core_executor_list.append(stage) return core_executor_list
def get_list(conn): data_provider_stage = 'data_provider' core_executor_stage = 'core_executor' analysis_reporter_stage = 'analysis_reporter' process_archival_stage = 'process_archival' query = ( "select process_id,selection_id from process_stages where stage_start is null and stage_end is null and stage_error " "is null and stage_name='{}' and process_id not in (select distinct(process_id) from process_stages where " "(stage_start is not null or stage_end is not null) and stage_name='{}') and process_id in " "(select distinct(process_id) from process_stages where stage_start is not null and stage_end is not null " "and stage_error is null and stage_name='{}') and process_id in (select distinct(process_id) " "from process_stages where stage_start is not null and stage_end is not null and stage_error is " "null and stage_name='{}')").format(analysis_reporter_stage, process_archival_stage, data_provider_stage, core_executor_stage) print('-'*100) print(query) print('-'*100) cursor = conn.cursor() cursor.execute(query) #cursor.close() analysis_reporter_list = list() for (process_id, selection_id) in cursor: stage = stages(process_id, selection_id, analysis_reporter_stage) analysis_reporter_list.append(stage) return analysis_reporter_list
def insert_default_stages(conn, process_id, selection_id): stage_list = [ stages.data_provider_stage_name, stages.core_executor_stage_name, stages.analysis_reporter_stage_name, stages.process_archival_stage_name ] print('*' * 100) print(process_id, selection_id, stage_list) print('*' * 100) print(process_id, selection_id, stage_list, file=sys.stdout) default_stage = stages(process_id, selection_id, stage_list) default_stage.insert_all_into_process_stages(conn)
def get_list(conn): """ Get the list of process_id to process :param conn: mysql connection :return: list of process_ids """ stage_name = 'data_provider' query = "select process_id,selection_id from process_stages where \ stage_start is null and stage_end is null and stage_name='{}'".format( stage_name) cursor = conn.cursor() cursor.execute(query) data_provider_list = list() for (process_id, selection_id) in cursor: stage = stages(process_id, selection_id, stage_name) data_provider_list.append(stage) return data_provider_list
def insert_default_stages(conn, process_id, selection_id): """ insert_default_stages: Populate process_stages with various pipeline stages: data_provider, core_executor, analysis_reporter process_archival with start, end time and errors :param conn: PostGreSQL connection :param process_id: process id :param selection_id: selection id :return: None """ stage_list = [stages.data_provider_stage_name, stages.core_executor_stage_name, stages.analysis_reporter_stage_name, stages.process_archival_stage_name] print(ruler) print(process_id, selection_id, stage_list) print(ruler) print(process_id, selection_id, stage_list, file=sys.stdout) default_stage = stages(process_id, selection_id, stage_list) default_stage.insert_all_into_process_stages(conn)
def get_list(conn): data_provider_stage='data_provider' core_executor_stage='core_executor' analysis_reporter_stage='analysis_reporter' process_archival_stage='process_archival' query=("select process_id,selection_id from process_stages where stage_start is null and " "stage_end is null and stage_error is null and stage_name='{}' and " "process_id in (select distinct(a.process_id) from process_stages a,process_stages b, " "process_stages c where a.stage_start is not null and a.stage_end is not null and " "a.stage_error is null and a.stage_name='{}' and b.stage_start is not null " "and b.stage_end is not null and b.stage_error is null and b.stage_name='{}' " "and c.stage_start is not null and c.stage_end is not null and c.stage_error is null " "and c.stage_name='{}' and a.process_id=b.process_id and b.process_id= " "c.process_id)").format(process_archival_stage,data_provider_stage,core_executor_stage,analysis_reporter_stage) print(query) cursor = conn.cursor() cursor.execute(query) process_archival_list=list() for (process_id, selection_id) in cursor: stage=stages(process_id,selection_id,process_archival_stage) process_archival_list.append(stage) return process_archival_list