Пример #1
0
    def go(self):
        self.logger.info("Starting thumbnail_combine run")
        super(thumbnail_combine, self).go()

        hosts = self.inputs['target_hosts']
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host in hosts:
            jobs.append(
                ComputeJob(host,
                           command,
                           arguments=[
                               self.inputs['executable'],
                               self.inputs['file_pattern'],
                               self.inputs['input_dir'],
                               self.inputs['output_file'],
                               self.inputs['clobber']
                           ]))
        self._schedule_jobs(jobs)

        if self.error.isSet():
            self.logger.warn("Failed compute job process detected")
            return 1
        else:
            return 0
Пример #2
0
    def go(self):
        self.logger.info("Starting thumbnail_combine run")
        super(thumbnail_combine, self).go()

        # Hosts on which to execute
        hosts = ['lce019']

        # Path to node script
        command = "python %s" % (self.__file__.replace('master', 'nodes'))

        # Build a list of jobs
        jobs = []
        for host in hosts:
            jobs.append(
                ComputeJob(
                    host,
                    command,
                    arguments=[
                        "/usr/bin/montage",  # executable
                        "*.th.png",  # file_pattern
                        "/path/to/png/files",  # input_dir
                        "/path/to/output.png",  # output_dir
                        True  # clobber
                    ]))

        # And run them
        self._schedule_jobs(jobs)

        # The error flag is set if a job failed
        if self.error.isSet():
            self.logger.warn("Failed compute job process detected")
            return 1
        else:
            return 0
Пример #3
0
 def append_job(self, host, arguments):
     """
     append_job adds a job to the current job list. It expects the host,
     a list of arguments.
     """
     compute_job = ComputeJob(host, self._command, arguments)
     self._jobs.append(compute_job)
Пример #4
0
    def go(self):
        self.logger.info("Starting demixing run")
        super(demixing, self).go()

        job_dir = os.path.join(self.inputs['working_directory'],
                               self.inputs['job_name'])

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = load_data_map(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = load_data_map(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed")
                return 1
        else:
            # This is a bit of a kludge. The input MS-filenames are supposed to
            # contain the string "_uv". The demixing node script will produce
            # output MS-files, whose names have the string "_uv" replaced by
            # "_" + self.inputs['ms_target'] + "_sub".
            outdata = [(host,
                        os.path.join(
                            job_dir,
                            os.path.basename(infile).replace(
                                '_uv',
                                '_' + self.inputs['ms_target'] + '_sub')))
                       for host, infile in indata]

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, infile in indata:
            jobs.append(
                ComputeJob(
                    host,
                    command,
                    arguments=[
                        infile, job_dir, self.inputs['initscript'],
                        self.inputs['demix_sources'], self.inputs['ms_target'],
                        self.config.get('cluster', 'clusterdesc'),
                        self.inputs['timestep'], self.inputs['freqstep'],
                        self.inputs['half_window'], self.inputs['threshold'],
                        self.inputs['demix_parset_dir'],
                        self.inputs['skymodel'], self.inputs['db_host']
                    ]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            return 1
        else:
            self.logger.debug("Writing mapfile %s" % self.inputs['mapfile'])
            store_data_map(self.inputs['mapfile'], outdata)
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0
Пример #5
0
    def _run_create_dbs_node(self, input_map, slice_paths_map,
             assoc_theta, source_list_map):
        """
        Decompose the input mapfiles into task for specific nodes and
        distribute these to the node recipes. Wait for the jobs to finish and
        return the list of created jobs.
        """
        # Compile the command to be executed on the remote machine
        node_command = " python3 %s" % (self.__file__.replace("master", "nodes"))
        # create jobs
        jobs = []
        output_map = copy.deepcopy(input_map)

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        align_data_maps(input_map, output_map, slice_paths_map,
                        source_list_map)

        source_list_map.iterator = slice_paths_map.iterator = \
               input_map.iterator = DataMap.SkipIterator
        for idx, (input_item, slice_item, source_list_item) in enumerate(zip(
                                  input_map, slice_paths_map, source_list_map)):
            host_ms, concat_ms = input_item.host, input_item.file
            host_slice, slice_paths = slice_item.host, slice_item.file

            # Create the parameters depending on the input_map
            sourcedb_target_path = os.path.join(
                  concat_ms + self.inputs["sourcedb_suffix"])

            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(self.inputs['working_directory'], "imager_create_dbs_{0}".format(idx))

            # The actual call for the node script
            arguments = [concat_ms,
                         sourcedb_target_path,
                         self.inputs["monetdb_hostname"],
                         self.inputs["monetdb_port"],
                         self.inputs["monetdb_name"],
                         self.inputs["monetdb_user"],
                         self.inputs["monetdb_password"],
                         assoc_theta,
                         self.inputs["parmdb_executable"],
                         slice_paths,
                         self.inputs["parmdb_suffix"],
                         self.environment,
                         working_dir,
                         self.inputs["makesourcedb_path"],
                         source_list_item.file,
                         self.inputs["major_cycle"]]

            jobs.append(ComputeJob(host_ms, node_command, arguments))
        # Wait the nodes to finish
        if len(jobs) > 0:
            self._schedule_jobs(jobs)

        return jobs, output_map
Пример #6
0
    def go(self):
        self.logger.info("Starting rficonsole run")
        super(rficonsole, self).go()

        #                           Load file <-> compute node mapping from disk
        # ----------------------------------------------------------------------
        self.logger.debug("Loading map from %s" % self.inputs['args'])
        data = load_data_map(self.inputs['args'][0])

        #        Jobs being dispatched to each host are arranged in a dict. Each
        #            entry in the dict is a list of list of filnames to process.
        # ----------------------------------------------------------------------
        hostlist = defaultdict(lambda: list([[]]))
        for host, filename in data:
            if (
                'nmeasurementsets' in self.inputs and
                len(hostlist[host][-1]) >= self.inputs['nmeasurementsets']
            ):
                hostlist[host].append([filename])
            else:
                hostlist[host][-1].append(filename)

        if 'strategy' in self.inputs:
            strategy = self.inputs['strategy']
        else:
            strategy = None

        command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, file_lists in hostlist.items():
            for file_list in file_lists:
                jobs.append(
                    ComputeJob(
                        host, command,
                        arguments=[
                            self.inputs['executable'],
                            self.inputs['nthreads'],
                            strategy,
                            self.inputs['indirect_read'],
                            self.inputs['skip_flagged'],
                            self.inputs['working_dir']
                        ] + file_list,
                        resources={
                            "cores": self.inputs['nthreads']
                        }
                    )
                )
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            self.logger.warn("Failed rficonsole process detected")
            return 1
        else:
            return 0
Пример #7
0
 def go(self):
     super(example_parallel, self).go()
     node_command = "python %s" % (self.__file__.replace("master", "nodes"))
     job = ComputeJob("localhost",
                      node_command,
                      arguments=["example_argument"])
     self._schedule_jobs([job])
     if self.error.isSet():
         return 1
     else:
         return 0
Пример #8
0
 def _run_jobs(self):
     """
     Create and schedule the compute jobs
     """
     command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
     self.data_map.iterator = DataMap.SkipIterator
     self.inst_map.iterator = DataMap.SkipIterator
     self.sky_map.iterator = DataMap.SkipIterator
     for data, inst, sky in zip(self.data_map, self.inst_map, self.sky_map):
         self.jobs.append(
             ComputeJob(data.host,
                        command,
                        arguments=[(data.file, inst.file, sky.file),
                                   self.inputs['executable'],
                                   self.inputs['parset'], self.environment],
                        resources={"cores": self.inputs['nthreads']}))
     self._schedule_jobs(self.jobs)
    def _run_create_dbs_node(self, input_map, slice_paths_map, assoc_theta):
        """
        Decompose the input mapfiles into task for specific nodes and 
        distribute these to the node recipes. Wait for the jobs to finish and
        return the list of created jobs.
        """
        # Compile the command to be executed on the remote machine
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))
        # create jobs
        jobs = []
        output_map = copy.deepcopy(input_map)

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y in zip(input_map, output_map, slice_paths_map):
            w.skip = x.skip = y.skip = (w.skip or x.skip or y.skip)
        slice_paths_map.iterator = input_map.iterator = DataMap.SkipIterator
        for (input_item, slice_item) in zip(input_map, slice_paths_map):
            host_ms, concat_ms = input_item.host, input_item.file
            host_slice, slice_paths = slice_item.host, slice_item.file

            # Create the parameters depending on the input_map
            sourcedb_target_path = os.path.join(concat_ms +
                                                self.inputs["sourcedb_suffix"])

            # The actual call for the node script
            arguments = [
                concat_ms, sourcedb_target_path,
                self.inputs["monetdb_hostname"], self.inputs["monetdb_port"],
                self.inputs["monetdb_name"], self.inputs["monetdb_user"],
                self.inputs["monetdb_password"], assoc_theta,
                self.inputs["parmdb_executable"], slice_paths,
                self.inputs["parmdb_suffix"], self.environment,
                self.inputs["working_directory"],
                self.inputs["makesourcedb_path"],
                self.inputs["source_list_path"]
            ]

            jobs.append(ComputeJob(host_ms, node_command, arguments))
        # Wait the nodes to finish
        if len(jobs) > 0:
            self._schedule_jobs(jobs)

        return jobs, output_map
Пример #10
0
    def go(self):
        self.logger.info("Starting flag_baseline run")
        super(flag_baseline, self).go()

        #       Serialise list of baselines to disk for compute nodes to pick up
        # ----------------------------------------------------------------------
        fd, baseline_filename = mkstemp(
            dir=self.config.get("layout", "job_directory")
        )
        baseline_file = os.fdopen(fd, "w")
        dump(self.inputs["baselines"], baseline_file)
        baseline_file.close()

        #                 try block ensures baseline_filename is always unlinked
        # ----------------------------------------------------------------------
        try:
            #                       Load file <-> compute node mapping from disk
            # ------------------------------------------------------------------
            self.logger.debug("Loading map from %s" % self.inputs['args'][0])
            data = load_data_map(self.inputs['args'][0])

            command = "python %s" % (self.__file__.replace('master', 'nodes'))
            jobs = []
            for host, ms in data:
                jobs.append(
                    ComputeJob(
                        host, command,
                        arguments=[
                            ms,
                            baseline_filename
                        ]
                    )
                )
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        finally:
            os.unlink(baseline_filename)

        if self.error.isSet():
            return 1
        else:
            self.outputs['mapfile'] = self.inputs['args'][0]
            return 0
Пример #11
0
    def go(self):
        self.logger.info("Starting count_timesteps run")
        super(count_timesteps, self).go()

        self.logger.debug("Loading map from %s" % self.inputs['args'][0])
        data = load_data_map(self.inputs['args'][0])

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, ms in data:
            jobs.append(ComputeJob(host, command, arguments=[ms]))
        jobs = self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        self.outputs['start_time'] = min(job.results['start_time']
                                         for job in jobs.values())
        self.outputs['end_time'] = max(job.results['end_time']
                                       for job in jobs.values())

        if self.error.isSet():
            return 1
        else:
            return 0
Пример #12
0
    def go(self):
        self.logger.info("Starting make_flaggable run")
        super(make_flaggable, self).go()

        #                       Load file <-> compute node mapping from disk
        # ------------------------------------------------------------------
        self.logger.debug("Loading map from %s" % self.inputs['args'][0])
        data = load_data_map(self.inputs['args'][0])

        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for host, ms in data:
            jobs.append(
                ComputeJob(host,
                           command,
                           arguments=[ms, self.inputs['makeflagwritable']]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])

        if self.error.isSet():
            return 1
        else:
            self.outputs['mapfile'] = self.inputs['args'][0]
            return 0
Пример #13
0
    def go(self):
        """
        Entry point for recipe: Called by the pipeline framework
        """
        super(imager_prepare, self).go()
        self.logger.info("Starting imager_prepare run")
        # *********************************************************************
        # input data
        input_map = DataMap.load(self.inputs['args'][0])
        output_map = DataMap.load(self.inputs['target_mapfile'])
        slices_per_image = self.inputs['slices_per_image']
        subbands_per_image = self.inputs['subbands_per_image']
        # Validate input
        if not self._validate_input_map(input_map, output_map,
                                        slices_per_image, subbands_per_image):
            return 1

        # outputs
        output_ms_mapfile_path = self.inputs['mapfile']

        # *********************************************************************
        # schedule the actual work
        # TODO: Refactor this function into: load data, perform work,
        # create output
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))

        jobs = []
        paths_to_image_mapfiles = []
        n_subband_groups = len(output_map)
        for idx_sb_group, item in enumerate(output_map):
            #create the input files for this node
            self.logger.debug("Creating input data subset for processing"
                              "on: {0}".format(item.host))
            inputs_for_image_map = \
                self._create_input_map_for_sbgroup(
                                slices_per_image, n_subband_groups,
                                subbands_per_image, idx_sb_group, input_map)

            # Save the mapfile
            job_directory = self.config.get("layout", "job_directory")
            inputs_for_image_mapfile_path = os.path.join(
                job_directory, "mapfiles",
                "ms_per_image_{0}".format(idx_sb_group))
            self._store_data_map(inputs_for_image_mapfile_path,
                                 inputs_for_image_map, "inputmap for location")

            #save the (input) ms, as a list of  mapfiles
            paths_to_image_mapfiles.append(
                tuple([item.host, inputs_for_image_mapfile_path, False]))

            arguments = [
                self.environment, self.inputs['parset'],
                self.inputs['working_directory'],
                self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'],
                item.file, slices_per_image, subbands_per_image,
                inputs_for_image_mapfile_path,
                self.inputs['asciistat_executable'],
                self.inputs['statplot_executable'],
                self.inputs['msselect_executable'],
                self.inputs['rficonsole_executable'],
                self.inputs['add_beam_tables']
            ]

            jobs.append(ComputeJob(item.host, node_command, arguments))

        # Hand over the job(s) to the pipeline scheduler
        self._schedule_jobs(jobs)

        # *********************************************************************
        # validate the output, cleanup, return output
        if self.error.isSet():  #if one of the nodes failed
            self.logger.warn("Failed prepare_imager run detected: Generating "
                             "new output_ms_mapfile_path without failed runs:"
                             " {0}".format(output_ms_mapfile_path))

        concat_ms = copy.deepcopy(output_map)
        slices = []
        finished_runs = 0
        #scan the return dict for completed key
        for (item, job) in zip(concat_ms, jobs):
            # only save the slices if the node has completed succesfull
            if job.results["returncode"] == 0:
                finished_runs += 1
                slices.append(
                    tuple([item.host, job.results["time_slices"], False]))
            else:
                # Set the dataproduct to skipped!!
                item.skip = True
                slices.append(tuple([item.host, ["/Failed"], True]))
                msg = "Failed run on {0}. NOT Created: {1} ".format(
                    item.host, item.file)
                self.logger.warn(msg)

        if finished_runs == 0:
            self.logger.error(
                "None of the started compute node finished:"
                "The current recipe produced no output, aborting")
            return 1

        # Write the output mapfiles:
        # concat.ms paths:
        self._store_data_map(output_ms_mapfile_path, concat_ms,
                             "mapfile with concat.ms")

        # timeslices
        MultiDataMap(slices).save(self.inputs['slices_mapfile'])
        self.logger.info(
            "Wrote MultiMapfile with produces timeslice: {0}".format(
                self.inputs['slices_mapfile']))

        #map with actual input mss.
        self._store_data_map(self.inputs["raw_ms_per_image_mapfile"],
                             DataMap(paths_to_image_mapfiles),
                             "mapfile containing (raw) input ms per image:")

        # Set the return values
        self.outputs['mapfile'] = output_ms_mapfile_path
        self.outputs['slices_mapfile'] = self.inputs['slices_mapfile']
        self.outputs['raw_ms_per_image_mapfile'] = \
            self.inputs["raw_ms_per_image_mapfile"]
        return 0
Пример #14
0
    def go(self):
        """
        """
        super(imager_source_finding, self).go()
        self.logger.info("Starting imager_source_finding run")
        # ********************************************************************
        # 1. load mapfiles with input images and collect some parameters from
        # The input ingredients
        input_map = DataMap.load(self.inputs['args'][0])
        catalog_output_path = self.inputs["catalog_output_path"]

        # ********************************************************************
        # 2. Start the node script
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))
        jobs = []
        input_map.iterator = DataMap.SkipIterator
        for idx, item in enumerate(input_map):
            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(self.inputs['working_directory'],
                                       "imager_source_finding_{0}".format(idx))

            arguments = [
                item.file, self.inputs["bdsm_parset_file_run1"],
                self.inputs["bdsm_parset_file_run2x"],
                "%s-%s" % (catalog_output_path, idx),
                os.path.join(self.inputs["working_directory"],
                             "bdsm_output-%s.img" % (idx, )),
                "%s-%s" % (self.inputs['sourcedb_target_path'], idx),
                self.environment, working_dir, self.inputs['makesourcedb_path']
            ]

            jobs.append(ComputeJob(item.host, node_command, arguments))

        # Hand over the job(s) to the pipeline scheduler
        self._schedule_jobs(jobs)

        # ********************************************************************
        # 3. Test for errors and return output
        if self.error.isSet():
            self.logger.warn("Failed imager_source_finding run detected")

        # Collect the nodes that succeeded
        source_dbs_from_nodes = copy.deepcopy(input_map)
        catalog_output_path_from_nodes = copy.deepcopy(input_map)
        source_dbs_from_nodes.iterator = \
            catalog_output_path_from_nodes.iterator = DataMap.SkipIterator

        for job, sourcedb_item, catalog_item in zip(
                jobs, source_dbs_from_nodes, catalog_output_path_from_nodes):

            if "source_db" in job.results:
                succesfull_job = True
                sourcedb_item.file = job.results["source_db"]
                catalog_item.file = job.results["catalog_output_path"]
            else:
                sourcedb_item.file = "failed"
                sourcedb_item.skip = True
                catalog_item.file = "failed"
                catalog_item.skip = True
                # We now also have catalog path

        # Abort if none of the recipes succeeded
        if not succesfull_job:
            self.logger.error("None of the source finding recipes succeeded")
            self.logger.error("Exiting with a failure status")
            return 1

        self._store_data_map(self.inputs['mapfile'],
                             catalog_output_path_from_nodes,
                             "datamap with created sourcelists")
        self._store_data_map(self.inputs['sourcedb_map_path'],
                             source_dbs_from_nodes,
                             " datamap with created sourcedbs")

        self.outputs["mapfile"] = self.inputs['mapfile']
        self.outputs["sourcedb_map_path"] = self.inputs['sourcedb_map_path']

        return 0
Пример #15
0
    def go(self):
        self.logger.info("Starting BBS run")
        super(bbs, self).go()

        #             Generate source and parameter databases for all input data
        # ----------------------------------------------------------------------
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['parmdbm']
        inputs['working_directory'] = self.config.get(
            "DEFAULT", "default_working_directory")
        inputs['mapfile'] = self.task_definitions.get('parmdb', 'mapfile')
        inputs['suffix'] = ".instrument"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('parmdb', inputs, outputs):
            self.logger.warn("parmdb reports failure")
            return 1
        inputs['args'] = self.inputs['args']
        inputs['executable'] = self.inputs['makesourcedb']
        inputs['skymodel'] = self.inputs['skymodel']
        inputs['mapfile'] = self.task_definitions.get('sourcedb', 'mapfile')
        inputs['suffix'] = ".sky"
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('sourcedb', inputs, outputs):
            self.logger.warn("sourcedb reports failure")
            return 1

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for BBS")
        vds_file = os.path.join(self.config.get("layout", "job_directory"),
                                "vds", "bbs.gvds")
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = vds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(vds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("BBS GVDS is %s" % (vds_file, ))

        #      Iterate over groups of subbands divided up for convenient cluster
        #          procesing -- ie, no more than nproc subbands per compute node
        # ----------------------------------------------------------------------
        for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])):
            #               to_process is a list of (host, filename, vds) tuples
            # ------------------------------------------------------------------
            hosts, ms_names, vds_files = map(list, zip(*to_process))

            #             The BBS session database should be cleared for our key
            # ------------------------------------------------------------------
            self.logger.debug("Cleaning BBS database for key %s" %
                              (self.inputs["key"]))
            with closing(
                    psycopg2.connect(
                        host=self.inputs["db_host"],
                        user=self.inputs["db_user"],
                        database=self.inputs["db_name"])) as db_connection:
                db_connection.set_isolation_level(
                    psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
                with closing(db_connection.cursor()) as db_cursor:
                    db_cursor.execute(
                        "DELETE FROM blackboard.session WHERE key=%s",
                        (self.inputs["key"], ))

            #     BBS GlobalControl requires a GVDS file describing all the data
            #          to be processed. We assemble that from the separate parts
            #                                         already available on disk.
            # ------------------------------------------------------------------
            self.logger.debug("Building VDS file describing data for BBS run")
            vds_dir = tempfile.mkdtemp(suffix=".%s" %
                                       (os.path.basename(__file__), ))
            vds_file = os.path.join(vds_dir, "bbs.gvds")
            combineproc = utilities.spawn_process([
                self.inputs['combinevds'],
                vds_file,
            ] + vds_files, self.logger)
            sout, serr = combineproc.communicate()
            log_process_output(self.inputs['combinevds'], sout, serr,
                               self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(combineproc.returncode,
                                                    command)

            #      Construct a parset for BBS GlobalControl by patching the GVDS
            #           file and database information into the supplied template
            # ------------------------------------------------------------------
            self.logger.debug("Building parset for BBS control")
            bbs_parset = utilities.patch_parset(
                self.inputs['parset'],
                {
                    'Observation': vds_file,
                    'BBDB.Key': self.inputs['key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
                    #                'BBDB.Port': self.inputs['db_name'],
                })
            self.logger.debug("BBS control parset is %s" % (bbs_parset, ))

            try:
                #        When one of our processes fails, we set the killswitch.
                #      Everything else will then come crashing down, rather than
                #                                         hanging about forever.
                # --------------------------------------------------------------
                self.killswitch = threading.Event()
                self.killswitch.clear()
                signal.signal(signal.SIGTERM, self.killswitch.set)

                #                           GlobalControl runs in its own thread
                # --------------------------------------------------------------
                run_flag = threading.Event()
                run_flag.clear()
                bbs_control = threading.Thread(target=self._run_bbs_control,
                                               args=(bbs_parset, run_flag))
                bbs_control.start()
                run_flag.wait()  # Wait for control to start before proceeding

                #      We run BBS KernelControl on each compute node by directly
                #                             invoking the node script using SSH
                #      Note that we use a job_server to send out job details and
                #           collect logging information, so we define a bunch of
                #    ComputeJobs. However, we need more control than the generic
                #     ComputeJob.dispatch method supplies, so we'll control them
                #                                          with our own threads.
                # --------------------------------------------------------------
                command = "python %s" % (self.__file__.replace(
                    'master', 'nodes'))
                env = {
                    "LOFARROOT":
                    utilities.read_initscript(
                        self.logger, self.inputs['initscript'])["LOFARROOT"],
                    "PYTHONPATH":
                    self.config.get('deploy', 'engine_ppath'),
                    "LD_LIBRARY_PATH":
                    self.config.get('deploy', 'engine_lpath')
                }
                jobpool = {}
                bbs_kernels = []
                with job_server(self.logger, jobpool,
                                self.error) as (jobhost, jobport):
                    self.logger.debug("Job server at %s:%d" %
                                      (jobhost, jobport))
                    for job_id, details in enumerate(to_process):
                        host, file, vds = details
                        jobpool[job_id] = ComputeJob(
                            host,
                            command,
                            arguments=[
                                self.inputs['kernel_exec'],
                                self.inputs['initscript'], file,
                                self.inputs['key'], self.inputs['db_name'],
                                self.inputs['db_user'], self.inputs['db_host']
                            ])
                        bbs_kernels.append(
                            threading.Thread(target=self._run_bbs_kernel,
                                             args=(host, command, env, job_id,
                                                   jobhost, str(jobport))))
                    self.logger.info("Starting %d threads" % len(bbs_kernels))
                    [thread.start() for thread in bbs_kernels]
                    self.logger.debug("Waiting for all kernels to complete")
                    [thread.join() for thread in bbs_kernels]

                #         When GlobalControl finishes, our work here is done
                # ----------------------------------------------------------
                self.logger.info("Waiting for GlobalControl thread")
                bbs_control.join()
            finally:
                os.unlink(bbs_parset)
                shutil.rmtree(vds_dir)
                if self.killswitch.isSet():
                    #  If killswitch is set, then one of our processes failed so
                    #                                   the whole run is invalid
                    # ----------------------------------------------------------
                    return 1

        return 0
Пример #16
0
    def go(self):
        self.logger.info("Starting setupsourcedb run")
        super(setupsourcedb, self).go()

        # *********************************************************************
        # 1. Load input and output mapfiles. Validate

        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed"
                )
                return 1
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'],
                    self.inputs['job_name'],
                    os.path.basename(item.file) + self.inputs['suffix']
                )

        # *********************************************************************
        # 2. Check if input skymodel file exists. If not, make filename empty.
        try:
            skymodel = self.inputs['skymodel']
        except KeyError:
            skymodel = ""
            self.logger.info("No skymodel specified. Using an empty one")

        # ********************************************************************
        # 3. Call node side of script
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        outdata.iterator = DataMap.SkipIterator
        jobs = []
        for outp in outdata:
            jobs.append(
                ComputeJob(
                    outp.host,
                    command,
                    arguments=[
                        self.inputs['executable'],
                        skymodel,
                        outp.file,
                        self.inputs['type']
                    ]
                )
            )
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True

        # *********************************************************************
        # 4. Check job results, and create output data map file
        if self.error.isSet():
             # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        self.logger.debug("Writing sky map file: %s" % self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Пример #17
0
    def go(self):
        if 'executable' in self.inputs:
            executable = self.inputs['executable']

        if self.inputs['nthreads']:
            self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads'])

        if 'environment' in self.inputs:
            self.environment.update(self.inputs['environment'])

        self.logger.info("Starting %s run" % executable)
        super(executable_args, self).go()

        # args format stuff
        args_format = {'args_format': self.inputs['args_format'],
                       'args_format_argument': self.inputs['args_format_argument'],
                       'args_format_option': self.inputs['args_format_option'],
                       'args_formatlongoption': self.inputs['args_format_longoption'],
                       'args_format_option_argument': self.inputs['args_format_option_argument']}
        mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name'])
        # *********************************************************************
        # try loading input/output data file, validate output vs the input location if
        #    output locations are provided
        try:
            inputmapfiles = []
            inlist = []
            if self.inputs['mapfile_in']:
                inlist.append(self.inputs['mapfile_in'])

            if self.inputs['mapfiles_in']:
                for item in self.inputs['mapfiles_in']:
                    inlist.append(item)
                self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0]

            for item in inlist:
                inputmapfiles.append(DataMap.load(item))

        except Exception:
            self.logger.error('Could not load input Mapfile %s' % inlist)
            return 1

        outputmapfiles = []
        if self.inputs['mapfile_out']:
            try:
                outdata = DataMap.load(self.inputs['mapfile_out'])
                outputmapfiles.append(outdata)
            except Exception:
                self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out'])
                return 1
            # sync skip fields in the mapfiles
            align_data_maps(inputmapfiles[0], outputmapfiles[0])

        elif self.inputs['mapfiles_out']:
            for item in self.inputs['mapfiles_out']:
                outputmapfiles.append(DataMap.load(item))
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        else:
            # ouput will be directed in the working directory if no output mapfile is specified
            outdata = copy.deepcopy(inputmapfiles[0])
            if not self.inputs['inplace']:
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1]
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname']
                    )
                self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile')
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            else:
                self.inputs['mapfile_out'] = self.inputs['mapfile_in']
                self.inputs['mapfiles_out'].append(self.inputs['mapfile_out'])
            outputmapfiles.append(outdata)

        if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]):
            self.logger.error(
                "Validation of data mapfiles failed!"
            )
            return 1

        if self.inputs['outputsuffixes']:
            # Handle multiple outputfiles
            for name in self.inputs['outputsuffixes']:
                outputmapfiles.append(copy.deepcopy(inputmapfiles[0]))
                self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile'))
                for item in outputmapfiles[-1]:
                    item.file = os.path.join(
                        work_dir,
                        os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name
                    )
            self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0]

        # prepare arguments
        arglist = self.inputs['arguments']
        parsetdict = {}
        if 'parset' in self.inputs:
            parset = Parset()
            parset.adoptFile(self.inputs['parset'])
            for k in parset.keys:
                parsetdict[k] = str(parset[k])

        # construct multiple input data
        if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']:
            self.inputs['inputkeys'].insert(0, self.inputs['inputkey'])

        if not self.inputs['outputkeys'] and self.inputs['outputkey']:
            self.inputs['outputkeys'].append(self.inputs['outputkey'])

        if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles):
            self.logger.error("Number of input mapfiles %d and input keys %d have to match." %
                              (len(inputmapfiles), len(self.inputs['inputkeys'])))
            return 1

        filedict = {}
        if self.inputs['inputkeys'] and not self.inputs['skip_infile']:
            for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist):
                if not mapname in self.inputs['mapfiles_as_string']:
                    filedict[key] = []
                    for inp in filemap:
                        filedict[key].append(inp.file)
                else:
                    if key != mapname:
                        filedict[key] = []
                        for inp in filemap:
                            filedict[key].append(mapname)

        if self.inputs['outputkey']:
            filedict[self.inputs['outputkey']] = []
            for item in outputmapfiles[0]:
                filedict[self.inputs['outputkey']].append(item.file)

        # ********************************************************************
        # Call the node side of the recipe
        # Create and schedule the compute jobs
        #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript'])
        recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories'))
        recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',')
        pylist = os.getenv('PYTHONPATH').split(':')
        command = None
        for pl in pylist:
            if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')
        for pl in recipe_directories:
            if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')):
                command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')

        inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator
        jobs = []
        for i, (outp, inp,) in enumerate(zip(
            outputmapfiles[0], inputmapfiles[0])
        ):
            arglist_copy = copy.deepcopy(arglist)
            parsetdict_copy = copy.deepcopy(parsetdict)

            if filedict:
                for name, value in filedict.items():
                    replaced = False
                    if arglist_copy:
                        for arg in arglist:
                            if name == arg:
                                ind = arglist_copy.index(arg)
                                arglist_copy[ind] = arglist_copy[ind].replace(name, value[i])
                                replaced = True
                    if parsetdict_copy:
                        if name in list(parsetdict_copy.values()):
                            for k, v in parsetdict_copy.items():
                                if v == name:
                                    parsetdict_copy[k] = value[i]
                        else:
                            if not replaced:
                                parsetdict_copy[name] = value[i]

            jobs.append(
                ComputeJob(
                    inp.host, command,
                    arguments=[
                        inp.file,
                        executable,
                        arglist_copy,
                        parsetdict_copy,
                        work_dir,
                        self.inputs['parsetasfile'],
                        args_format,
                        self.environment
                    ],
                    resources={
                        "cores": self.inputs['nthreads']
                    }
                )
            )
        max_per_node = self.inputs['max_per_node']
        self._schedule_jobs(jobs, max_per_node)
        jobresultdict = {}
        resultmap = {}
        for job, outp in zip(jobs, outputmapfiles[0]):
            if job.results['returncode'] != 0:
                outp.skip = True
                if not self.inputs['error_tolerance']:
                    self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode'])
                    return 1
            for k, v in list(job.results.items()):
                if not k in jobresultdict:
                    jobresultdict[k] = []
                jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip))
                if k == 'break':
                    self.outputs.update({'break': v})

        # temp solution. write all output dict entries to a mapfile
        #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles")
        #check directory for stand alone mode
        if not os.path.isdir(mapfile_dir):
            try:
                os.mkdir(mapfile_dir, )
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir):
                    pass
                else:
                    raise
        for k, v in list(jobresultdict.items()):
            dmap = DataMap(v)
            dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile'))
            resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')
        self.outputs.update(resultmap)
        # *********************************************************************
        # Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs"
                )
        mapdict = {}
        for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']):
            self.logger.debug("Writing data map file: %s" % name)
            item.save(name)
            mapdict[os.path.basename(name)] = name

        self.outputs['mapfile'] = self.inputs['mapfile_out']
        if self.inputs['outputsuffixes']:
            self.outputs.update(mapdict)

        return 0
Пример #18
0
    def go(self):
        self.logger.info("Starting setupparmdb run")
        super(setupparmdb, self).go()

        # *********************************************************************
        # 1. Create a temporary template parmdb at the master side of the recipe
        self.logger.info("Generating template parmdb")

        # generate a temp dir
        pdbdir = tempfile.mkdtemp(
            dir=self.config.get("layout", "job_directory"),
            suffix=".%s" % (os.path.basename(__file__), ))
        pdbfile = os.path.join(pdbdir, self.inputs['suffix'])

        # Create a template use tempdir for location
        try:
            parmdbm_process = subprocess.Popen([self.inputs['executable']],
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            sout, serr = communicate_returning_strings(
                parmdbm_process, input=(template % pdbfile).encode())
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError as err:
            self.logger.error("Failed to spawn parmdbm: %s" % str(err))
            return 1

        # *********************************************************************
        # 2. Call node side of recipe with template and possible targets
        #    If output location are provided as input these are validated.
        try:
            #                       Load file <-> compute node mapping from disk
            # ------------------------------------------------------------------
            args = self.inputs['args']
            self.logger.debug("Loading input-data mapfile: %s" % args[0])
            indata = DataMap.load(args[0])
            if len(args) > 1:
                # If output location provide validate the input and outputmap
                self.logger.debug("Loading output-data mapfile: %s" % args[1])
                outdata = DataMap.load(args[1])
                if not validate_data_maps(indata, outdata):
                    self.logger.error(
                        "Validation of input/output data mapfiles failed")
                    return 1
                # else output location is inputlocation+suffix
            else:
                outdata = copy.deepcopy(indata)
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        os.path.basename(item.file) + self.inputs['suffix'])
            #  Call the node side
            command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
            outdata.iterator = DataMap.SkipIterator
            jobs = []
            for outp in outdata:
                jobs.append(
                    ComputeJob(outp.host,
                               command,
                               arguments=[pdbfile, outp.file]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for job, outp in zip(jobs, outdata):
                # If the returncode is 123456, failing ssh
                if job.results['returncode'] == 123456:
                    self.logger.warning(
                        "ssh connection with {0} failed."
                        "Skipping further work on this task".format(outp.host))
                    self.logger.warning("Error code 123456.")
                    outp.skip = True
                elif job.results['returncode'] != 0:
                    outp.skip = True

        # *********************************************************************
        # 3. validate performance, cleanup of temp files, construct output
        finally:
            self.logger.debug("Removing template parmdb")
            shutil.rmtree(pdbdir, ignore_errors=True)

        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Writing parmdb map file: %s" %
                          self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Пример #19
0
    def go(self):
        super(gainoutliercorrection, self).go()
        self.logger.info("Starting gainoutliercorrection run")
        # ********************************************************************
        # 1. Validate input
        # if sigma is none use default behaviour and use executable: test if
        # It excists
        executable = self.inputs['executable']
        if executable == "":
            pass
        elif not os.access(executable, os.X_OK):
            self.logger.warn(
                "No parmexportcal excecutable is not found on the suplied"
                "path: {0}".format(self.inputs['executable']))
            self.logger.warn("Defaulting to edit_parmdb behaviour")

        # ********************************************************************
        # 2. load mapfiles, validate if a target output location is provided
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
            if not validate_data_maps(indata, outdata):
                self.logger.error(
                    "Validation of input/output data mapfiles failed")
                return 1
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'], self.inputs['job_name'],
                    (os.path.splitext(os.path.basename(item.file))[0] +
                     self.inputs['suffix']))

        # Update the skip fields of the two maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y in zip(indata, outdata):
            x.skip = y.skip = (x.skip or y.skip)

        # ********************************************************************
        # 3. Call node side of the recipe
        command = "python3 %s" % (self.__file__.replace('master', 'nodes'))
        indata.iterator = outdata.iterator = DataMap.SkipIterator
        jobs = []
        for inp, outp in zip(indata, outdata):
            jobs.append(
                ComputeJob(outp.host,
                           command,
                           arguments=[
                               inp.file, outp.file, self.inputs['executable'],
                               self.environment, self.inputs['sigma'],
                               self.inputs['export_instrument_model']
                           ]))
        self._schedule_jobs(jobs)
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True

        # ********************************************************************
        # 4. validate performance, return corrected files
        if self.error.isSet():
            self.logger.warn("Detected failed gainoutliercorrection job")
            return 1
        else:
            self.logger.debug("Writing instrument map file: %s" %
                              self.inputs['mapfile'])
            outdata.save(self.inputs['mapfile'])
            self.outputs['mapfile'] = self.inputs['mapfile']
            return 0
Пример #20
0
    def go(self):
        """
        Entry point for recipe: Called by the pipeline framework
        """
        super(imager_prepare, self).go()
        self.logger.info("Starting imager_prepare run")
        job_directory = self.config.get("layout", "job_directory")
        # *********************************************************************
        # input data
        input_map = DataMap.load(self.inputs['args'][0])
        output_map = DataMap.load(self.inputs['target_mapfile'])
        slices_per_image = self.inputs['slices_per_image']
        subbands_per_image = self.inputs['subbands_per_image']
        # Validate input
        if not self._validate_input_map(input_map, output_map,
                                        slices_per_image, subbands_per_image):
            return 1

        # outputs
        output_ms_mapfile_path = self.inputs['mapfile']

        # *********************************************************************
        # schedule the actual work
        # TODO: Refactor this function into: load data, perform work,
        # create output
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))

        jobs = []
        paths_to_image_mapfiles = []
        n_subband_groups = len(output_map)  # needed for subsets in sb list

        globalfs = self.config.has_option(
            "remote", "globalfs") and self.config.getboolean(
                "remote", "globalfs")

        for idx_sb_group, item in enumerate(output_map):
            #create the input files for this node
            self.logger.debug("Creating input data subset for processing"
                              "on: {0}".format(item.host))
            inputs_for_image_map = \
                self._create_input_map_for_sbgroup(
                                slices_per_image, n_subband_groups,
                                subbands_per_image, idx_sb_group, input_map)

            # Save the mapfile
            inputs_for_image_mapfile_path = os.path.join(
                job_directory, "mapfiles",
                "ms_per_image_{0}.map".format(idx_sb_group))

            self._store_data_map(inputs_for_image_mapfile_path,
                                 inputs_for_image_map, "inputmap for location")

            # skip the current step if skip is set, cannot use skip due to
            # the enumerate: dependency on the index in the map
            if item.skip == True:
                # assure that the mapfile is correct
                paths_to_image_mapfiles.append(tuple([item.host, [], True]))
                continue

            #save the (input) ms, as a list of  mapfiles
            paths_to_image_mapfiles.append(
                tuple([item.host, inputs_for_image_mapfile_path, False]))

            # use unique working directories per job, to prevent interference between jobs on a global fs
            working_dir = os.path.join(
                self.inputs['working_directory'],
                "imager_prepare_{0}".format(idx_sb_group))

            arguments = [
                self.environment, self.inputs['parset'], working_dir,
                self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'],
                item.file, slices_per_image, subbands_per_image,
                inputs_for_image_mapfile_path,
                self.inputs['asciistat_executable'],
                self.inputs['statplot_executable'],
                self.inputs['msselect_executable'],
                self.inputs['rficonsole_executable'],
                self.inputs['do_rficonsole'], self.inputs['add_beam_tables'],
                globalfs
            ]

            jobs.append(
                ComputeJob(item.host,
                           node_command,
                           arguments,
                           resources={"cores": self.inputs['nthreads']}))

        # Hand over the job(s) to the pipeline scheduler
        self._schedule_jobs(jobs)

        # *********************************************************************
        # validate the output, cleanup, return output
        if self.error.isSet():  #if one of the nodes failed
            self.logger.warn("Failed prepare_imager run detected: Generating "
                             "new output_ms_mapfile_path without failed runs:"
                             " {0}".format(output_ms_mapfile_path))

        concat_ms = copy.deepcopy(output_map)
        slices = []
        finished_runs = 0
        #scan the return dict for completed key
        # loop over the potential jobs including the skipped
        # If we have a skipped item, add the item to the slices with skip set
        jobs_idx = 0
        for item in concat_ms:
            # If this is an item that is skipped via the skip parameter in
            # the parset, append a skipped
            if item.skip:
                slices.append(tuple([item.host, [], True]))
                continue

            # we cannot use the skip iterator so we need to manually get the
            # current job from the list
            job = jobs[jobs_idx]

            # only save the slices if the node has completed succesfull
            if job.results["returncode"] == 0:
                finished_runs += 1
                slices.append(
                    tuple([item.host, job.results["time_slices"], False]))
            else:
                # Set the dataproduct to skipped!!
                item.skip = True
                slices.append(tuple([item.host, [], True]))
                msg = "Failed run on {0}. NOT Created: {1} ".format(
                    item.host, item.file)
                self.logger.warn(msg)

            # we have a non skipped workitem, increase the job idx
            jobs_idx += 1

        if finished_runs == 0:
            self.logger.error(
                "None of the started compute node finished:"
                "The current recipe produced no output, aborting")
            return 1

        # Write the output mapfiles:
        # concat.ms paths:
        self._store_data_map(output_ms_mapfile_path, concat_ms,
                             "mapfile with concat.ms")

        # timeslices
        MultiDataMap(slices).save(self.inputs['slices_mapfile'])
        self.logger.info(
            "Wrote MultiMapfile with produces timeslice: {0}".format(
                self.inputs['slices_mapfile']))

        #map with actual input mss.
        self._store_data_map(self.inputs["ms_per_image_mapfile"],
                             DataMap(paths_to_image_mapfiles),
                             "mapfile containing (used) input ms per image:")

        # Set the return values
        self.outputs['mapfile'] = output_ms_mapfile_path
        self.outputs['slices_mapfile'] = self.inputs['slices_mapfile']
        self.outputs['ms_per_image_mapfile'] = \
            self.inputs["ms_per_image_mapfile"]
        return 0
Пример #21
0
    def go(self):
        self.logger.info("Starting DPPP run")
        super(dppp, self).go()

        #        #                Keep track of "Total flagged" messages in the DPPP logs
        #        # ----------------------------------------------------------------------
        #        self.logger.searchpatterns["fullyflagged"] = "Fully flagged baselines"

        # *********************************************************************
        # 1. load input data file, validate output vs the input location if
        #    output locations are provided
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        indata = DataMap.load(args[0])
        if len(args) > 1:
            self.logger.debug("Loading output-data mapfile: %s" % args[1])
            outdata = DataMap.load(args[1])
        else:
            outdata = copy.deepcopy(indata)
            for item in outdata:
                item.file = os.path.join(
                    self.inputs['working_directory'], self.inputs['job_name'],
                    os.path.basename(item.file) + self.inputs['suffix'])

        # ********************************************************************
        # 2. Load parmdb and sourcedb
        # Load parmdb-mapfile, if one was given.
        if self.inputs.has_key('parmdb_mapfile'):
            self.logger.debug("Loading parmdb mapfile: %s" %
                              self.inputs['parmdb_mapfile'])
            parmdbdata = DataMap.load(self.inputs['parmdb_mapfile'])
        else:
            parmdbdata = copy.deepcopy(indata)
            for item in parmdbdata:
                item.file = ''

        # Load sourcedb-mapfile, if one was given.
        if self.inputs.has_key('sourcedb_mapfile'):
            self.logger.debug("Loading sourcedb mapfile: %s" %
                              self.inputs['sourcedb_mapfile'])
            sourcedbdata = DataMap.load(self.inputs['sourcedb_mapfile'])
        else:
            sourcedbdata = copy.deepcopy(indata)
            for item in sourcedbdata:
                item.file = ''

        # Validate all the data maps.
        if not validate_data_maps(indata, outdata, parmdbdata, sourcedbdata):
            self.logger.error("Validation of data mapfiles failed!")
            return 1

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y, z in zip(indata, outdata, parmdbdata, sourcedbdata):
            w.skip = x.skip = y.skip = z.skip = (w.skip or x.skip or y.skip
                                                 or z.skip)

        # ********************************************************************
        # 3. Call the node side of the recipe
        # Create and schedule the compute jobs
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        indata.iterator = outdata.iterator = DataMap.SkipIterator
        parmdbdata.iterator = sourcedbdata.iterator = DataMap.SkipIterator
        jobs = []
        for inp, outp, pdb, sdb in zip(indata, outdata, parmdbdata,
                                       sourcedbdata):
            jobs.append(
                ComputeJob(inp.host,
                           command,
                           arguments=[
                               inp.file, outp.file, pdb.file, sdb.file,
                               self.inputs['parset'],
                               self.inputs['executable'], self.environment,
                               self.inputs['demix_always'],
                               self.inputs['demix_if_needed'],
                               self.inputs['data_start_time'],
                               self.inputs['data_end_time'],
                               self.inputs['nthreads'], self.inputs['clobber']
                           ],
                           resources={"cores": self.inputs['nthreads']}))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        for job, outp in zip(jobs, outdata):
            if job.results['returncode'] != 0:
                outp.skip = True


#        # *********************************************************************
#        # 4. parse logfile for fully flagged baselines
#        matches = self.logger.searchpatterns["fullyflagged"].results
#        self.logger.searchpatterns.clear() # finished searching
#        stripchars = "".join(set("Fully flagged baselines: "))
#        baselinecounter = defaultdict(lambda: 0)
#        for match in matches:
#            for pair in (
#                pair.strip(stripchars) for pair in match.getMessage().split(";")
#            ):
#                baselinecounter[pair] += 1
#        self.outputs['fullyflagged'] = baselinecounter.keys()

# *********************************************************************
# 4. Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Writing data map file: %s" % self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Пример #22
0
    def go(self):
        """
        Steps:

        1. Load and validate the input datamaps
        2. Run the node parts of the recipe
        3. Validate node output and format the recipe output
        """
        super(selfcal_finalize, self).go()
        # *********************************************************************
        # 1. Load the datamaps
        awimager_output_map = DataMap.load(self.inputs["awimager_output_map"])
        ms_per_image_map = DataMap.load(self.inputs["ms_per_image_map"])
        sourcelist_map = DataMap.load(self.inputs["sourcelist_map"])
        sourcedb_map = DataMap.load(self.inputs["sourcedb_map"])
        target_mapfile = DataMap.load(self.inputs["target_mapfile"])
        output_image_mapfile = DataMap.load(
            self.inputs["output_image_mapfile"])
        concat_ms_mapfile = DataMap.load(self.inputs["concat_ms_map_path"])
        output_correlated_map = DataMap.load(
            self.inputs["output_correlated_mapfile"])
        processed_ms_dir = self.inputs["processed_ms_dir"]
        fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"]

        # Align the skip fields
        align_data_maps(awimager_output_map, ms_per_image_map, sourcelist_map,
                        target_mapfile, output_image_mapfile, sourcedb_map,
                        concat_ms_mapfile, output_correlated_map)

        # Set the correct iterator
        sourcelist_map.iterator = awimager_output_map.iterator = \
            ms_per_image_map.iterator = target_mapfile.iterator = \
            output_image_mapfile.iterator = sourcedb_map.iterator = \
            concat_ms_mapfile.iterator = output_correlated_map.iterator = \
            DataMap.SkipIterator

        # *********************************************************************
        # 2. Run the node side of the recupe
        command = " python3 %s" % (self.__file__.replace("master", "nodes"))
        jobs = []
        for (awimager_output_item, ms_per_image_item, sourcelist_item,
             target_item, output_image_item, sourcedb_item, concat_ms_item,
             correlated_item) in zip(awimager_output_map, ms_per_image_map,
                                     sourcelist_map, target_mapfile,
                                     output_image_mapfile, sourcedb_map,
                                     concat_ms_mapfile, output_correlated_map):
            # collect the files as argument
            arguments = [
                awimager_output_item.file,
                ms_per_image_item.file,
                sourcelist_item.file,
                target_item.file,
                output_image_item.file,
                self.inputs["minbaseline"],
                self.inputs["maxbaseline"],
                processed_ms_dir,
                fillrootimagegroup_exec,
                self.environment,
                sourcedb_item.file,
                concat_ms_item.file,
                correlated_item.file,
                self.inputs["msselect_executable"],
            ]

            self.logger.info(
                "Starting finalize with the folowing args: {0}".format(
                    arguments))
            jobs.append(ComputeJob(target_item.host, command, arguments))

        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Validate the performance of the node script and assign output
        succesful_run = False
        for (job, output_image_item,
             output_correlated_item) in zip(jobs, output_image_mapfile,
                                            output_correlated_map):
            if not "hdf5" in job.results:
                # If the output failed set the skip to True
                output_image_item.skip = True
                output_correlated_item = True
            else:
                succesful_run = True
                # signal that we have at least a single run finished ok.
                # No need to set skip in this case

        if not succesful_run:
            self.logger.warn("Not a single finalizer succeeded")
            return 1

        # Save the location of the output images
        output_image_mapfile.save(self.inputs['placed_image_mapfile'])
        self.logger.debug(
            "Wrote mapfile containing placed hdf5 images: {0}".format(
                self.inputs['placed_image_mapfile']))

        # save the location of measurements sets
        output_correlated_map.save(self.inputs['placed_correlated_mapfile'])
        self.logger.debug("Wrote mapfile containing placed mss: {0}".format(
            self.inputs['placed_correlated_mapfile']))

        self.outputs["placed_image_mapfile"] = self.inputs[
            'placed_image_mapfile']
        self.outputs["placed_correlated_mapfile"] = self.inputs[
            'placed_correlated_mapfile']

        return 0
Пример #23
0
    def go(self):
        """
        imager_bbs functionality. Called by framework performing all the work
        """
        super(imager_bbs, self).go()
        self.logger.info("Starting imager_bbs run")

        # ********************************************************************
        # 1. Load the and validate the data

        ms_map = MultiDataMap.load(self.inputs['args'][0])
        parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile'])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile'])

        # TODO: DataMap extention
#        #Check if the input has equal length and on the same nodes
#        if not validate_data_maps(ms_map, parmdb_map):
#            self.logger.error("The combination of mapfiles failed validation:")
#            self.logger.error("ms_map: \n{0}".format(ms_map))
#            self.logger.error("parmdb_map: \n{0}".format(parmdb_map))
#            return 1

        # *********************************************************************
        # 2. Start the node scripts
        jobs = []
        node_command = " python3 %s" % (self.__file__.replace("master", "nodes"))
        map_dir = os.path.join(
                        self.config.get("layout", "job_directory"), "mapfiles")
        run_id = str(self.inputs.get("id"))

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for w, x, y in zip(ms_map, parmdb_map, sourcedb_map):
            w.skip = x.skip = y.skip = (
                w.skip or x.skip or y.skip
            )

        ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \
            DataMap.SkipIterator
        for (idx, (ms, parmdb, sourcedb)) in enumerate(zip(ms_map, parmdb_map, sourcedb_map)):
            # host is same for each entry (validate_data_maps)
            host, ms_list = ms.host, ms.file

            # Write data maps to MultaDataMaps
            ms_list_path = os.path.join(
                    map_dir, "%s-%s_map_%s.map" % (host, idx, run_id))
            MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path)

            parmdb_list_path = os.path.join(
                    map_dir, "%s-%s_parmdb_%s.map" % (host, idx, run_id))
            MultiDataMap(
                [tuple([host, parmdb.file, False])]).save(parmdb_list_path)

            sourcedb_list_path = os.path.join(
                    map_dir, "%s-%s_sky_%s.map" % (host, idx, run_id))
            MultiDataMap(
                [tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path)

            arguments = [self.inputs['bbs_executable'],
                         self.inputs['parset'],
                         ms_list_path, parmdb_list_path, sourcedb_list_path]
            jobs.append(ComputeJob(host, node_command, arguments,
                    resources = {
                        "cores": self.inputs['nthreads']
                    }))

        # start and wait till all are finished
        self._schedule_jobs(jobs)

        # **********************************************************************
        # 3. validate the node output and construct the output mapfile.
        if self.error.isSet():    # if one of the nodes failed
            self.logger.error("One of the nodes failed while performing"
                              "a BBS run. Aborting: concat.ms corruption")
            return 1

        # return the output: The measurement set that are calibrated:
        # calibrated data is placed in the ms sets
        MultiDataMap(ms_map).save(self.inputs['mapfile'])
        self.logger.info("Wrote file with  calibrated data")

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Пример #24
0
    def go(self):
        """
        Contains functionality of the vdsmaker
        """
        super(vdsmaker, self).go()
        # **********************************************************************
        # 1. Load data from disk create output files
        args = self.inputs['args']
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        data = DataMap.load(args[0])

        # Skip items in `data` that have 'skip' set to True
        data.iterator = DataMap.SkipIterator

        # Create output vds names
        vdsnames = [
            os.path.join(self.inputs['directory'],
                         os.path.basename(item.file) + '.vds') for item in data
        ]

        # *********************************************************************
        # 2. Call vdsmaker
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        jobs = []
        for inp, vdsfile in zip(data, vdsnames):
            jobs.append(
                ComputeJob(inp.host,
                           command,
                           arguments=[
                               inp.file,
                               self.config.get('cluster', 'clusterdesc'),
                               vdsfile, self.inputs['makevds']
                           ]))
        self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
        vdsnames = [
            vds for vds, job in zip(vdsnames, jobs)
            if job.results['returncode'] == 0
        ]
        if not vdsnames:
            self.logger.error("All makevds processes failed. Bailing out!")
            return 1

        # *********************************************************************
        # 3. Combine VDS files to produce GDS
        failure = False
        self.logger.info("Combining VDS files")
        executable = self.inputs['combinevds']
        gvds_out = self.inputs['gvds']
        # Create the gvds directory for output files, needed for combine
        create_directory(os.path.dirname(gvds_out))

        try:
            command = [executable, gvds_out] + vdsnames
            combineproc = subprocess.Popen(command,
                                           close_fds=True,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.PIPE)
            sout, serr = combineproc.communicate()
            log_process_output(executable, sout, serr, self.logger)
            if combineproc.returncode != 0:
                raise subprocess.CalledProcessError(combineproc.returncode,
                                                    command)
            self.outputs['gvds'] = gvds_out
            self.logger.info("Wrote combined VDS file: %s" % gvds_out)
        except subprocess.CalledProcessError, cpe:
            self.logger.exception("combinevds failed with status %d: %s" %
                                  (cpe.returncode, serr))
            failure = True
Пример #25
0
    def go(self):
        """
        imager_bbs functionality. Called by framework performing all the work
        """
        super(selfcal_bbs, self).go()
        self.logger.info("Starting imager_bbs run")

        # ********************************************************************
        # 1. Load the and validate the data
        ms_map = MultiDataMap.load(self.inputs['args'][0])
        parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile'])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile'])
        concat_ms_map = DataMap.load(self.inputs['concat_ms_map_path'])

        # *********************************************************************
        # 2. Start the node scripts
        jobs = []
        node_command = " python %s" % (self.__file__.replace(
            "master", "nodes"))
        map_dir = os.path.join(self.config.get("layout", "job_directory"),
                               "mapfiles")
        run_id = str(self.inputs.get("id"))

        # Update the skip fields of the four maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        align_data_maps(ms_map, parmdb_map, sourcedb_map, concat_ms_map)

        ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \
            concat_ms_map.iterator = DataMap.SkipIterator

        # *********************************************************************
        for (ms, parmdb, sourcedb, concat_ms) in zip(ms_map, parmdb_map,
                                                     sourcedb_map,
                                                     concat_ms_map):
            #host is same for each entry (validate_data_maps)
            host, ms_list = ms.host, ms.file

            # Write data maps to MultaDataMaps
            ms_list_path = os.path.join(map_dir,
                                        host + "_ms_" + run_id + ".map")
            MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path)

            parmdb_list_path = os.path.join(
                map_dir, host + "_parmdb_" + run_id + ".map")
            MultiDataMap([tuple([host, parmdb.file,
                                 False])]).save(parmdb_list_path)

            sourcedb_list_path = os.path.join(map_dir,
                                              host + "_sky_" + run_id + ".map")
            MultiDataMap([tuple([host, [sourcedb.file],
                                 False])]).save(sourcedb_list_path)

            # THe concat ms does not have to be written: It already is a
            # singular item (it is the output of the reduce step)
            # redmine issue #6021
            arguments = [
                self.inputs['bbs_executable'], self.inputs['parset'],
                ms_list_path, parmdb_list_path, sourcedb_list_path,
                concat_ms.file, self.inputs['major_cycle']
            ]
            jobs.append(ComputeJob(host, node_command, arguments))

        # start and wait till all are finished
        self._schedule_jobs(jobs)

        # **********************************************************************
        # 3. validate the node output and construct the output mapfile.
        if self.error.isSet():  #if one of the nodes failed
            self.logger.warn("Failed bbs node run detected, skipping work"
                             "on this work item for further computations")

        # find failed job and set the skip field
        for (ms_item, concat_item, job) in zip(ms_map, concat_ms_map, jobs):
            if job.results["returncode"] == 0:
                continue
            else:
                ms_item.skip = True
                concat_item.skip = True
                self.logger.warn("bbs failed on item: {0}".format(
                    ms_item.file))

        # return the output: The measurement set that are calibrated:
        # calibrated data is placed in the ms sets
        MultiDataMap(ms_map).save(self.inputs['mapfile'])
        # also save the concat_ms map with possible skips
        DataMap(concat_ms_map).save(self.inputs['concat_ms_map_path'])
        self.logger.info("Wrote file with  calibrated data")

        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Пример #26
0
    def go(self):
        super(get_metadata, self).go()
        # ********************************************************************
        # 1. Parse and validate inputs
        args = self.inputs['args']
        product_type = self.inputs['product_type']
        global_prefix = self.inputs['parset_prefix']
        # Add a trailing dot (.) if not present in the prefix.
        if global_prefix and not global_prefix.endswith('.'):
            global_prefix += '.'

        if not product_type in self.valid_product_types:
            self.logger.error(
                "Unknown product type: %s\n\tValid product types are: %s" %
                (product_type, ', '.join(self.valid_product_types)))

        # ********************************************************************
        # 2. Load mapfiles
        self.logger.debug("Loading input-data mapfile: %s" % args[0])
        data = DataMap.load(args[0])

        # ********************************************************************
        # 3. call node side of the recipe
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        data.iterator = DataMap.SkipIterator
        jobs = []
        for inp in data:
            jobs.append(
                ComputeJob(inp.host,
                           command,
                           arguments=[inp.file, self.inputs['product_type']]))
        self._schedule_jobs(jobs)
        for job, inp in zip(jobs, data):
            if job.results['returncode'] != 0:
                inp.skip = True

        # ********************************************************************
        # 4. validate performance
        # 4. Check job results, and create output data map file
        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Updating data map file: %s" % args[0])
        data.save(args[0])

        # ********************************************************************
        # 5. Create the parset-file and write it to disk.
        parset = parameterset()
        prefix = "Output_%s_" % product_type
        parset.replace('%snrOf%s' % (global_prefix, prefix), str(len(jobs)))
        prefix = global_prefix + prefix
        for idx, job in enumerate(jobs):
            self.logger.debug("job[%d].results = %s" % (idx, job.results))
            parset.adoptCollection(metadata.to_parset(job.results),
                                   '%s[%d].' % (prefix, idx))
        try:
            create_directory(os.path.dirname(self.inputs['parset_file']))
            parset.writeFile(self.inputs['parset_file'])
            self.logger.info("Wrote meta data to: " +
                             self.inputs['parset_file'])
        except RuntimeError, err:
            self.logger.error("Failed to write meta-data: %s" % str(err))
            return 1
Пример #27
0
    def go(self):
        self.logger.info("Starting cimager run")
        super(cimager, self).go()
        self.outputs['images'] = []

        #              Build a GVDS file describing all the data to be processed
        # ----------------------------------------------------------------------
        self.logger.debug("Building VDS file describing all data for cimager")
        gvds_file = os.path.join(self.config.get("layout", "job_directory"),
                                 "vds", "cimager.gvds")
        inputs = LOFARinput(self.inputs)
        inputs['args'] = self.inputs['args']
        inputs['gvds'] = gvds_file
        inputs['unlink'] = False
        inputs['makevds'] = self.inputs['makevds']
        inputs['combinevds'] = self.inputs['combinevds']
        inputs['nproc'] = self.inputs['nproc']
        inputs['directory'] = os.path.dirname(gvds_file)
        outputs = LOFARoutput(self.inputs)
        if self.cook_recipe('vdsmaker', inputs, outputs):
            self.logger.warn("vdsmaker reports failure")
            return 1
        self.logger.debug("cimager GVDS is %s" % (gvds_file, ))

        #                            Read data for processing from the GVDS file
        # ----------------------------------------------------------------------
        parset = Parset(gvds_file)

        data = []
        for part in range(parset.getInt('NParts')):
            host = parset.getString("Part%d.FileSys" % part).split(":")[0]
            vds = parset.getString("Part%d.Name" % part)
            data.append((host, vds))

        #                                 Divide data into timesteps for imaging
        #          timesteps is a list of (start, end, results directory) tuples
        # ----------------------------------------------------------------------
        timesteps = []
        results_dir = self.inputs['results_dir']
        if self.inputs['timestep'] == 0:
            self.logger.info("No timestep specified; imaging all data")
            timesteps = [(None, None, results_dir)]
        else:
            self.logger.info("Using timestep of %s s" %
                             self.inputs['timestep'])
            gvds = get_parset(gvds_file)
            start_time = quantity(gvds['StartTime'].get()).get('s').get_value()
            end_time = quantity(gvds['EndTime'].get()).get('s').get_value()
            step = float(self.inputs['timestep'])
            while start_time < end_time:
                timesteps.append((start_time, start_time + step,
                                  os.path.join(results_dir, str(start_time))))
                start_time += step

        #                          Run each cimager process in a separate thread
        # ----------------------------------------------------------------------
        command = "python %s" % (self.__file__.replace('master', 'nodes'))
        for label, timestep in enumerate(timesteps):
            self.logger.info("Processing timestep %d" % label)
            jobs = []
            parsets = []
            start_time, end_time, resultsdir = timestep
            for host, vds in data:
                vds_data = Parset(vds)
                frequency_range = [
                    vds_data.getDoubleVector("StartFreqs")[0],
                    vds_data.getDoubleVector("EndFreqs")[-1]
                ]
                parsets.append(
                    self.__get_parset(
                        os.path.basename(
                            vds_data.getString('FileName')).split('.')[0],
                        vds_data.getString("FileName"),
                        str(frequency_range),
                        vds_data.getStringVector("Extra.FieldDirectionType")
                        [0],
                        vds_data.getStringVector("Extra.FieldDirectionRa")[0],
                        vds_data.getStringVector("Extra.FieldDirectionDec")[0],
                        'True',  # cimager bug: non-restored image unusable
                    ))
                jobs.append(
                    ComputeJob(host,
                               command,
                               arguments=[
                                   self.inputs['imager_exec'], vds,
                                   parsets[-1], resultsdir, start_time,
                                   end_time
                               ]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for parset in parsets:
                parset = Parset(parset)
                image_names = parset.getStringVector("Cimager.Images.Names")
                self.outputs['images'].extend(image_names)
            [os.unlink(parset) for parset in parsets]

        #                Check if we recorded a failing process before returning
        # ----------------------------------------------------------------------
        if self.error.isSet():
            self.logger.warn("Failed imager process detected")
            return 1
        else:
            return 0
Пример #28
0
    def go(self):
        self.logger.info("Starting BBS run")
        super(new_bbs, self).go()

        #                Check for relevant input parameters in the parset-file
        # ---------------------------------------------------------------------
        self.logger.debug("Reading parset from %s" % self.inputs['parset'])
        self.parset = parameterset(self.inputs['parset'])

        self._set_input('db_host', 'BBDB.Host')
        self._set_input('db_user', 'BBDB.User')
        self._set_input('db_name', 'BBDB.Name')
        self._set_input('db_key', 'BBDB.Key')

        #self.logger.debug("self.inputs = %s" % self.inputs)

        #                                         Clean the blackboard database
        # ---------------------------------------------------------------------
        self.logger.info(
            "Cleaning BBS database for key '%s'" % (self.inputs['db_key'])
        )
        command = ["psql",
                   "-h", self.inputs['db_host'],
                   "-U", self.inputs['db_user'],
                   "-d", self.inputs['db_name'],
                   "-c", "DELETE FROM blackboard.session WHERE key='%s';" %
                         self.inputs['db_key']
                  ]
        self.logger.debug(command)
        if subprocess.call(command) != 0:
            self.logger.warning(
                "Failed to clean BBS database for key '%s'" %
                self.inputs['db_key']
            )

        #                  Create a bbs_map describing the file mapping on disk
        # ---------------------------------------------------------------------
        if not self._make_bbs_map():
            return 1

        # Produce a GVDS file, describing the data that must be processed.
        gvds_file = self.run_task(
            "vdsmaker",
            self.inputs['data_mapfile'],
            gvds=self.inputs['gvds']
        )['gvds']

        #      Construct a parset for BBS GlobalControl by patching the GVDS
        #           file and database information into the supplied template
        # ------------------------------------------------------------------
        self.logger.debug("Building parset for BBS control")
        # Create a location for parsets
        job_directory = self.config.get(
                            "layout", "job_directory")
        parset_directory = os.path.join(job_directory, "parsets")
        create_directory(parset_directory)

        # patch the parset and copy result to target location remove tempfile
        try:
            bbs_parset = utilities.patch_parset(
                self.parset,
                {
                    'Observation': gvds_file,
                    'BBDB.Key': self.inputs['db_key'],
                    'BBDB.Name': self.inputs['db_name'],
                    'BBDB.User': self.inputs['db_user'],
                    'BBDB.Host': self.inputs['db_host'],
                    #'BBDB.Port': self.inputs['db_name'],
                }
            )
            bbs_parset_path = os.path.join(parset_directory,
                                           "bbs_control.parset")
            shutil.copyfile(bbs_parset, bbs_parset_path)
            self.logger.debug("BBS control parset is %s" % (bbs_parset_path,))

        finally:
            # Always remove the file in the tempdir
            os.remove(bbs_parset)

        try:
            #        When one of our processes fails, we set the killswitch.
            #      Everything else will then come crashing down, rather than
            #                                         hanging about forever.
            # --------------------------------------------------------------
            self.killswitch = threading.Event()
            self.killswitch.clear()
            signal.signal(signal.SIGTERM, self.killswitch.set)

            #                           GlobalControl runs in its own thread
            # --------------------------------------------------------------
            run_flag = threading.Event()
            run_flag.clear()
            bbs_control = threading.Thread(
                target=self._run_bbs_control,
                args=(bbs_parset, run_flag)
            )
            bbs_control.start()
            run_flag.wait()    # Wait for control to start before proceeding

            #      We run BBS KernelControl on each compute node by directly
            #                             invoking the node script using SSH
            #      Note that we use a job_server to send out job details and
            #           collect logging information, so we define a bunch of
            #    ComputeJobs. However, we need more control than the generic
            #     ComputeJob.dispatch method supplies, so we'll control them
            #                                          with our own threads.
            # --------------------------------------------------------------
            command = "python %s" % (self.__file__.replace('master', 'nodes'))
            jobpool = {}
            bbs_kernels = []
            with job_server(self.logger, jobpool, self.error) as(jobhost,
                                                                   jobport):
                self.logger.debug("Job server at %s:%d" % (jobhost, jobport))
                for job_id, details in enumerate(self.bbs_map):
                    host, files = details
                    jobpool[job_id] = ComputeJob(
                        host, command,
                        arguments=[
                            self.inputs['kernel_exec'],
                            files,
                            self.inputs['db_key'],
                            self.inputs['db_name'],
                            self.inputs['db_user'],
                            self.inputs['db_host']
                        ]
                    )
                    bbs_kernels.append(
                        threading.Thread(
                            target=self._run_bbs_kernel,
                            args=(host, command, job_id, jobhost, str(jobport))
                        )
                    )
                self.logger.info("Starting %d threads" % len(bbs_kernels))
                for thread in bbs_kernels:
                    thread.start()
                self.logger.debug("Waiting for all kernels to complete")
                for thread in bbs_kernels:
                    thread.join()

            #         When GlobalControl finishes, our work here is done
            # ----------------------------------------------------------
            self.logger.info("Waiting for GlobalControl thread")
            bbs_control.join()
        finally:
            os.unlink(bbs_parset)

        if self.killswitch.isSet():
            #  If killswitch is set, then one of our processes failed so
            #                                   the whole run is invalid
            # ----------------------------------------------------------
            return 1

        self.outputs['mapfile'] = self.inputs['data_mapfile']
        return 0
Пример #29
0
class setupparmdb(BaseRecipe, RemoteCommandRecipeMixIn):
    """
    Create a distributed parameter database (ParmDB) for a distributed 
    Measurement set (MS).
    
    1. Create a parmdb template at the master side of the recipe
    2. Call node side of recipe with template and possible targets
    3. Validate performance, cleanup of temp files, construct output

    **Command line arguments**

    1. A mapfile describing the data to be processed.
    2. A mapfile with output location (If provide input and output are validated)
    
    """
    inputs = {
        'executable':
        ingredient.ExecField(
            '--executable',
            help="Full path to parmdbm executable",
        ),
        'nproc':
        ingredient.IntField(
            '--nproc',
            help="Maximum number of simultaneous processes per compute node",
            default=8),
        'suffix':
        ingredient.StringField(
            '--suffix',
            help="Suffix of the table name of the empty parmameter database",
            default=".parmdb"),
        'working_directory':
        ingredient.StringField('-w',
                               '--working-directory',
                               help="Working directory used on output nodes. "
                               "Results will be written here."),
        'mapfile':
        ingredient.StringField(
            '--mapfile',
            help="Full path of mapfile to produce; it will contain "
            "a list of the generated empty parameter database files")
    }

    outputs = {'mapfile': ingredient.FileField()}

    def go(self):
        self.logger.info("Starting setupparmdb run")
        super(setupparmdb, self).go()

        # *********************************************************************
        # 1. Create a temporary template parmdb at the master side of the recipe
        self.logger.info("Generating template parmdb")

        # generate a temp dir
        pdbdir = tempfile.mkdtemp(
            dir=self.config.get("layout", "job_directory"))
        pdbfile = os.path.join(pdbdir, self.inputs['suffix'])

        # Create a template use tempdir for location
        try:
            parmdbm_process = subprocess.Popen([self.inputs['executable']],
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            sout, serr = parmdbm_process.communicate(template % pdbfile)
            log_process_output("parmdbm", sout, serr, self.logger)
        except OSError, err:
            self.logger.error("Failed to spawn parmdbm: %s" % str(err))
            return 1

        # *********************************************************************
        # 2. Call node side of recipe with template and possible targets
        #    If output location are provided as input these are validated.
        try:
            #                       Load file <-> compute node mapping from disk
            # ------------------------------------------------------------------
            args = self.inputs['args']
            self.logger.debug("Loading input-data mapfile: %s" % args[0])
            indata = DataMap.load(args[0])
            if len(args) > 1:
                # If output location provide validate the input and outputmap
                self.logger.debug("Loading output-data mapfile: %s" % args[1])
                outdata = DataMap.load(args[1])
                if not validate_data_maps(indata, outdata):
                    self.logger.error(
                        "Validation of input/output data mapfiles failed")
                    return 1
                # else output location is inputlocation+suffix
            else:
                outdata = copy.deepcopy(indata)
                for item in outdata:
                    item.file = os.path.join(
                        self.inputs['working_directory'],
                        self.inputs['job_name'],
                        os.path.basename(item.file) + self.inputs['suffix'])
            #  Call the node side
            command = "python %s" % (self.__file__.replace('master', 'nodes'))
            outdata.iterator = DataMap.SkipIterator
            jobs = []
            for outp in outdata:
                jobs.append(
                    ComputeJob(outp.host,
                               command,
                               arguments=[pdbfile, outp.file]))
            self._schedule_jobs(jobs, max_per_node=self.inputs['nproc'])
            for job, outp in zip(jobs, outdata):
                # If the returncode is 123456, failing ssh
                if job.results['returncode'] == 123456:
                    self.logger.warning(
                        "ssh connection with {0} failed."
                        "Skipping further work on this task".format(outp.host))
                    self.logger.warning("Error code 123456.")
                    outp.skip = True
                elif job.results['returncode'] != 0:
                    outp.skip = True

        # *********************************************************************
        # 3. validate performance, cleanup of temp files, construct output
        finally:
            self.logger.debug("Removing template parmdb")
            shutil.rmtree(pdbdir, ignore_errors=True)

        if self.error.isSet():
            # Abort if all jobs failed
            if all(job.results['returncode'] != 0 for job in jobs):
                self.logger.error("All jobs failed. Bailing out!")
                return 1
            else:
                self.logger.warn(
                    "Some jobs failed, continuing with succeeded runs")
        self.logger.debug("Writing parmdb map file: %s" %
                          self.inputs['mapfile'])
        outdata.save(self.inputs['mapfile'])
        self.outputs['mapfile'] = self.inputs['mapfile']
        return 0
Пример #30
0
    def go(self):
        """
        This member contains all the functionality of the imager_awimager.
        Functionality is all located at the node side of the script.
        """
        super(selfcal_awimager, self).go()
        self.logger.info("Starting imager_awimager run")

        # *********************************************************************
        # 1. collect the inputs and validate
        input_map = DataMap.load(self.inputs['args'][0])
        sourcedb_map = DataMap.load(self.inputs['sourcedb_path'])

        if not validate_data_maps(input_map, sourcedb_map):
            self.logger.error(
                        "the supplied input_ms mapfile and sourcedb mapfile"
                        "are incorrect. Aborting")
            self.logger.error(repr(input_map))
            self.logger.error(repr(sourcedb_map))
            return 1

        # *********************************************************************
        # 2. Start the node side of the awimager recipe
        # Compile the command to be executed on the remote machine
        node_command = "python3 %s" % (self.__file__.replace("master", "nodes"))
        jobs = []

        output_map = copy.deepcopy(input_map)        
        align_data_maps(input_map, output_map, sourcedb_map)

        sourcedb_map.iterator = input_map.iterator = output_map.iterator = \
            DataMap.SkipIterator

        for measurement_item, source_item in zip(input_map, sourcedb_map):
            if measurement_item.skip or source_item.skip:
                jobs.append(None)
                continue
            # both the sourcedb and the measurement are in a map
            # unpack both
            host , measurement_path = measurement_item.host, measurement_item.file
            host2 , sourcedb_path = source_item.host, source_item.file

            # construct and save the output name
            arguments = [self.inputs['executable'],
                         self.environment,
                         self.inputs['parset'],
                         self.inputs['working_directory'],
                         self.inputs['output_image'],
                         measurement_path,
                         sourcedb_path,
                         self.inputs['mask_patch_size'],
                         self.inputs['autogenerate_parameters'],
                         self.inputs['specify_fov'],
                         self.inputs['fov'],
                         self.inputs['major_cycle'],
                         self.inputs['nr_cycles'],
                         self.inputs['perform_self_cal']
                         ]

            jobs.append(ComputeJob(host, node_command, arguments))
        self._schedule_jobs(jobs)

        # *********************************************************************
        # 3. Check output of the node scripts

        for job, output_item in  zip(jobs, output_map):
            # job ==  None on skipped job
            if not "image" in job.results:
                output_item.file = "failed"
                output_item.skip = True

            else:
                output_item.file = job.results["image"]
                output_item.skip = False

        # Check if there are finished runs
        succesfull_runs = None
        for item in output_map:
            if item.skip == False:
                succesfull_runs = True
                break

        if not succesfull_runs:
            self.logger.error(
                    "None of the started awimager run finished correct")
            self.logger.error(
                    "No work left to be done: exiting with error status")
            return 1

        # If partial succes
        if self.error.isSet():
            self.logger.error("Failed awimager node run detected. continue with"
                              "successful tasks.")

        self._store_data_map(self.inputs['mapfile'], output_map,
                             "mapfile containing produces awimages")

        self.outputs["mapfile"] = self.inputs['mapfile']
        return 0