tok['OBSID'] = obsid
    tok['PIPELINE_STEP'] = 'lb_sub6asec_cal1'
    tok['status'] = 'queued'
    tok.save()

tl.add_attachment(
    attachment_name='step1_subtract_lotss.parset',
    filename=
    '/project/sksp/Software/lofar-highres-widefield/testdir/test_with_GRID_LRT/step1_subtract_lotss.parset'
)

print('5) Adding status views.')
view_cal = TokenView('step1_subtract_lotss',
                     condition='doc.PIPELINE_STEP=="lb_sub6asec_cal1"',
                     emit_values=('doc._id', 'doc.status'))
tl.add_token_views()
tl.add_view(view_cal)

tl.save()

print('6) Create and launch the jobs.')
j = submit.SpiderLauncher(numjobs=len(d.keys()),
                          token_type=tok_type,
                          wholenode=True,
                          parameter_step=4,
                          NCPU=24)

with j:
    url = j.launch()
print('Job ID: ' + str(url))
Пример #2
0
class TokenCreator(BaseOperator):
    """
    Using a Token template input, this class creates the tokens for a LOFAR job
    The tokens are a set of documents that contain the metadata for each processing
    job as well as the job's progress, step completion times, and etc. 

    :type sbx_task: string
    :param srms: a list of the srms that need to be staged
    :type srms: list
    :param stageID: In case staging was already done
    # 
    # 
    # 
    # 
    # 
    :type stageID: string
    :type output_encoding: output encoding of bash command
    """
    template_fields = ()
    template_ext = ()
    ui_color = '#f3f92c'

    @apply_defaults
    def __init__(self,
                 tok_config,
                 staging_task,
                 append_task=None,
                 fields_task=None,
                 pc_database=None,
                 subband_prefix=None,
                 subband_suffix=None,
                 token_type='test_',
                 files_per_token=10,
                 output_encoding='utf-8',
                 *args,
                 **kwargs):

        super(TokenCreator, self).__init__(*args, **kwargs)
        self.pc_database = pc_database
        self.tok_config = tok_config
        self.fields_task = fields_task
        if subband_prefix:
            self.subband_prefix = subband_prefix
        else:
            self.subband_prefix = "SB"
        if subband_suffix:
            self.subband_suffix = subband_suffix
        else:
            self.subband_suffix = "_"
        self.staging_task = staging_task
        self.append_task = append_task
        self.files_per_token = files_per_token
        self.output_encoding = output_encoding
        self.t_type = token_type
        self.state = State.QUEUED

    def execute(self, context):
        """
        Execute the bash command in a temporary directory
        which will be cleaned afterwards
        """
        srms = self.get_staged_srms(context)
        if not srms:
            logging.warn("Could not get the list of staged srms!")
        logging.info("the list of staged srms is {0}".format(srms))
        pc = get_picas_credentials.picas_cred()
        if self.pc_database:
            pc.database = self.pc_database
        if self.fields_task:
            task_name = self.fields_task['name']
            task_parent_dag = self.fields_task['parent_dag']
            try:
                app = get_task_instance(
                    context, task_name,
                    task_parent_dag)['sanitized_field_name']
            except KeyError:
                app = get_task_instance(context, task_name,
                                        task_parent_dag)['field_name']
        else:
            app = srms.obsid
        self.t_type = self.t_type + app
        tok_settings = json.load(open(self.tok_config, 'rb'))
        token_variables = tok_settings['Job']['variables']
        client = CouchDB(pc.user,
                         pc.password,
                         url='https://picas-lofar.grid.surfsara.nl:6984',
                         connect=True)
        self.db = client[pc.database]

        pipe_type = token_variables['PIPELINE_STEP']
        self.token_list = TokenList(database=self.db, token_type=self.t_type)

        if self.files_per_token != 1:
            d = slice_dicts(
                srms.sbn_dict(pref=self.subband_prefix,
                              suff=self.subband_suffix), self.files_per_token)
        else:
            d = {}
            for i in srms.sbn_dict(pref=self.subband_prefix,
                                   suff=self.subband_suffix):
                d[i[0]] = i[1]

        for token_file in d:
            logging.info("Token file is  {}".format(token_file))
            with NamedTemporaryFile(delete=False) as savefile:
                for line in d[token_file]:
                    savefile.write("{}\n".format(line).encode('utf-8'))
            # pref3_$FIELDNAME_$OBSID_$PIPELINE_SB$SBNUMBER
            pipeline_step = pipe_type.split('_')[1]
            # logging.info("Pipeline step is {}, type pipe_type is {}.".format(pipe_type, type(pipe_type)))
            if 'cal' in pipe_type:
                token_id = "{}_{}_{}".format(self.t_type, srms.obsid,
                                             pipeline_step)
            elif 'targ' in pipe_type:
                token_id = "{}_{}_{}_SB{}".format(self.t_type, srms.obsid,
                                                  pipeline_step, token_file)
            else:
                token_id = "fields_$FIELDNAME_$OBSID_$PIPELINE: {}_{}_{}_{}_{}".format(
                    self.t_type, token_file, srms.obsid,
                    pipe_type.split('_')[1], time.time())

            logging.info(token_id)
            self.token_list.append(
                self.build_token(token_id,
                                 attachment={
                                     'name': 'srm.txt',
                                     'location': savefile.name
                                 }))
            self.token_list[-1]['STARTSB'] = token_file
            os.remove(savefile.name)
        self.token_list.add_token_views()

        if self.append_task:
            logging.info(self.append_task)
            logging.info(context)
            self.modify_fields(context)

        for token in self.token_list:
            token["OBSID"] = srms.obsid
            token['RESULTS_DIR'] += "/" + str(srms.obsid)

        token_variables['OBSID'] = srms.obsid
        token_variables['RESULTS_DIR'] += "/" + str(srms.obsid)

        # create result directory if not exist
        create_gsiftp_directory(token_variables['RESULTS_DIR'])

        logging.info('Token type is ' + self.t_type)
        logging.info(
            'Tokens are available at https://picas-lofar.grid.surfsara.nl:6984/_utils/database.html?'
            + pc.database + '/_design/' + self.t_type +
            '/_view/overview_total')
        logging.info("Token settings are :")
        for i in token_variables.items():
            logging.info(str(i))
        logging.debug(srms)

        self.token_list.save()
        results = dict()
        results['num_jobs'] = len(d.keys())
        results['output_dir'] = token_variables['RESULTS_DIR']
        logging.info("output directory is {}".format(results['output_dir']))
        results['token_type'] = str(self.t_type)
        results['view'] = pipe_type
        results['OBSID'] = token_variables['OBSID']
        results['token_ids'] = [i['_id'] for i in self.token_list]
        return results

    def upload_tokens(self, tokens):
        pass

    def upload_attachments(self, attachment):
        pass

    def build_token(self, token_id, attachment=None):
        t1 = caToken(database=self.db,
                     token_type=self.t_type,
                     token_id=token_id)
        t1.build(TokenJsonBuilder(self.tok_config))
        logging.info(self.tok_config)
        t1.save()
        if attachment:
            t1.add_attachment(attachment_name=attachment['name'],
                              filename=attachment['location'])
        return t1

    def modify_fields(self, context):
        """If the append_task exists, this moethod will add all key:value pairs
        in its xcom to every token. This is used to for example take the results
        of the calibration taks and name it 'CAL_RESULTS'"""
        print(self.append_task)
        append_xcom = get_task_instance(
            context,
            self.append_task['name'],
            parent_dag=self.append_task['parent_dag'])
        if append_xcom is None:
            logging.info("No calibration results found!")

        for k in append_xcom:
            for token in self.token_list:
                token[k] = append_xcom[k]
        self.token_list.save()

    def get_staged_srms(self, context):
        task_name = self.staging_task['name']
        task_parent_dag = self.staging_task['parent_dag']
        srm_xcom = get_task_instance(context, task_name, task_parent_dag)
        srmfile = srm_xcom['srmfile']
        logging.info("Srmfile is " + srmfile)
        if srmfile == None:
            raise RuntimeError("Could not get the srm list from the " +
                               str(self.staging_task) + " task")
        return self.get_list_from_files(srmfile)

    def get_list_from_files(self, filename):
        loaded_srmlist = srmlist()
        for link in open(filename, 'rb').readlines():
            loaded_srmlist.append(link.decode('utf-8').strip('\n'))
        return loaded_srmlist

    def success(self):
        self.status = State.SUCCESS
        logging.info("Successfully uploaded " +
                     str(self.progress['Percent done']) + " % of the tokens.")

    def on_kill(self):
        logging.warn('Sending SIGTERM signal to staging group')
        self.state = State.SHUTDOWN
        os.killpg(os.getpgid(self.sp.pid), signal.SIGTERM)