tl = TokenList(token_type=tok_type, database=db) for k, v in d.items(): print(v) tok = caToken(database=db, token_type=tok_type, token_id=tok_type + '_sub6asec_' + str(cal_obsid) + '_SB' + str(k)) with open('temp_srm.txt', 'w') as f: f.write('\n'.join(v)) tok.build(TokenJsonBuilder(jsonfile)) tok.save() tok.add_attachment(attachment_name='srm.txt', filename='temp_srm.txt') tl.append(tok) tl.save() for tok in tl: tok['OBSID'] = obsid tok['PIPELINE_STEP'] = 'lb_sub6asec_cal1' tok['status'] = 'queued' tok.save() tl.add_attachment( attachment_name='step1_subtract_lotss.parset', filename= '/project/sksp/Software/lofar-highres-widefield/testdir/test_with_GRID_LRT/step1_subtract_lotss.parset' ) print('5) Adding status views.') view_cal = TokenView('step1_subtract_lotss',
class TokenCreator(BaseOperator): """ Using a Token template input, this class creates the tokens for a LOFAR job The tokens are a set of documents that contain the metadata for each processing job as well as the job's progress, step completion times, and etc. :type sbx_task: string :param srms: a list of the srms that need to be staged :type srms: list :param stageID: In case staging was already done # # # # # :type stageID: string :type output_encoding: output encoding of bash command """ template_fields = () template_ext = () ui_color = '#f3f92c' @apply_defaults def __init__(self, tok_config, staging_task, append_task=None, fields_task=None, pc_database=None, subband_prefix=None, subband_suffix=None, token_type='test_', files_per_token=10, output_encoding='utf-8', *args, **kwargs): super(TokenCreator, self).__init__(*args, **kwargs) self.pc_database = pc_database self.tok_config = tok_config self.fields_task = fields_task if subband_prefix: self.subband_prefix = subband_prefix else: self.subband_prefix = "SB" if subband_suffix: self.subband_suffix = subband_suffix else: self.subband_suffix = "_" self.staging_task = staging_task self.append_task = append_task self.files_per_token = files_per_token self.output_encoding = output_encoding self.t_type = token_type self.state = State.QUEUED def execute(self, context): """ Execute the bash command in a temporary directory which will be cleaned afterwards """ srms = self.get_staged_srms(context) if not srms: logging.warn("Could not get the list of staged srms!") logging.info("the list of staged srms is {0}".format(srms)) pc = get_picas_credentials.picas_cred() if self.pc_database: pc.database = self.pc_database if self.fields_task: task_name = self.fields_task['name'] task_parent_dag = self.fields_task['parent_dag'] try: app = get_task_instance( context, task_name, task_parent_dag)['sanitized_field_name'] except KeyError: app = get_task_instance(context, task_name, task_parent_dag)['field_name'] else: app = srms.obsid self.t_type = self.t_type + app tok_settings = json.load(open(self.tok_config, 'rb')) token_variables = tok_settings['Job']['variables'] client = CouchDB(pc.user, pc.password, url='https://picas-lofar.grid.surfsara.nl:6984', connect=True) self.db = client[pc.database] pipe_type = token_variables['PIPELINE_STEP'] self.token_list = TokenList(database=self.db, token_type=self.t_type) if self.files_per_token != 1: d = slice_dicts( srms.sbn_dict(pref=self.subband_prefix, suff=self.subband_suffix), self.files_per_token) else: d = {} for i in srms.sbn_dict(pref=self.subband_prefix, suff=self.subband_suffix): d[i[0]] = i[1] for token_file in d: logging.info("Token file is {}".format(token_file)) with NamedTemporaryFile(delete=False) as savefile: for line in d[token_file]: savefile.write("{}\n".format(line).encode('utf-8')) # pref3_$FIELDNAME_$OBSID_$PIPELINE_SB$SBNUMBER pipeline_step = pipe_type.split('_')[1] # logging.info("Pipeline step is {}, type pipe_type is {}.".format(pipe_type, type(pipe_type))) if 'cal' in pipe_type: token_id = "{}_{}_{}".format(self.t_type, srms.obsid, pipeline_step) elif 'targ' in pipe_type: token_id = "{}_{}_{}_SB{}".format(self.t_type, srms.obsid, pipeline_step, token_file) else: token_id = "fields_$FIELDNAME_$OBSID_$PIPELINE: {}_{}_{}_{}_{}".format( self.t_type, token_file, srms.obsid, pipe_type.split('_')[1], time.time()) logging.info(token_id) self.token_list.append( self.build_token(token_id, attachment={ 'name': 'srm.txt', 'location': savefile.name })) self.token_list[-1]['STARTSB'] = token_file os.remove(savefile.name) self.token_list.add_token_views() if self.append_task: logging.info(self.append_task) logging.info(context) self.modify_fields(context) for token in self.token_list: token["OBSID"] = srms.obsid token['RESULTS_DIR'] += "/" + str(srms.obsid) token_variables['OBSID'] = srms.obsid token_variables['RESULTS_DIR'] += "/" + str(srms.obsid) # create result directory if not exist create_gsiftp_directory(token_variables['RESULTS_DIR']) logging.info('Token type is ' + self.t_type) logging.info( 'Tokens are available at https://picas-lofar.grid.surfsara.nl:6984/_utils/database.html?' + pc.database + '/_design/' + self.t_type + '/_view/overview_total') logging.info("Token settings are :") for i in token_variables.items(): logging.info(str(i)) logging.debug(srms) self.token_list.save() results = dict() results['num_jobs'] = len(d.keys()) results['output_dir'] = token_variables['RESULTS_DIR'] logging.info("output directory is {}".format(results['output_dir'])) results['token_type'] = str(self.t_type) results['view'] = pipe_type results['OBSID'] = token_variables['OBSID'] results['token_ids'] = [i['_id'] for i in self.token_list] return results def upload_tokens(self, tokens): pass def upload_attachments(self, attachment): pass def build_token(self, token_id, attachment=None): t1 = caToken(database=self.db, token_type=self.t_type, token_id=token_id) t1.build(TokenJsonBuilder(self.tok_config)) logging.info(self.tok_config) t1.save() if attachment: t1.add_attachment(attachment_name=attachment['name'], filename=attachment['location']) return t1 def modify_fields(self, context): """If the append_task exists, this moethod will add all key:value pairs in its xcom to every token. This is used to for example take the results of the calibration taks and name it 'CAL_RESULTS'""" print(self.append_task) append_xcom = get_task_instance( context, self.append_task['name'], parent_dag=self.append_task['parent_dag']) if append_xcom is None: logging.info("No calibration results found!") for k in append_xcom: for token in self.token_list: token[k] = append_xcom[k] self.token_list.save() def get_staged_srms(self, context): task_name = self.staging_task['name'] task_parent_dag = self.staging_task['parent_dag'] srm_xcom = get_task_instance(context, task_name, task_parent_dag) srmfile = srm_xcom['srmfile'] logging.info("Srmfile is " + srmfile) if srmfile == None: raise RuntimeError("Could not get the srm list from the " + str(self.staging_task) + " task") return self.get_list_from_files(srmfile) def get_list_from_files(self, filename): loaded_srmlist = srmlist() for link in open(filename, 'rb').readlines(): loaded_srmlist.append(link.decode('utf-8').strip('\n')) return loaded_srmlist def success(self): self.status = State.SUCCESS logging.info("Successfully uploaded " + str(self.progress['Percent done']) + " % of the tokens.") def on_kill(self): logging.warn('Sending SIGTERM signal to staging group') self.state = State.SHUTDOWN os.killpg(os.getpgid(self.sp.pid), signal.SIGTERM)