Exemplo n.º 1
0
    def execute(self, slot, subindex, ignored_roi, result):
        configFilePath = self.ConfigFilePath.value
        config = parseClusterConfigFile(configFilePath)

        blockwiseFileset = self._primaryBlockwiseFileset

        # Check axis compatibility
        inputAxes = self.Input.meta.getTaggedShape().keys()
        outputAxes = list(blockwiseFileset.description.axes)
        assert set(inputAxes) == set(outputAxes), \
            "Output dataset has the wrong set of axes.  Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) )

        roiString = self.RoiString.value
        roi = Roi.loads(roiString)
        if len(roi.start) != len(self.Input.meta.shape):
            assert False, "Task roi: {} is not valid for this input.  Did the master launch this task correctly?".format(
                roiString)

        logger.info("Executing for roi: {}".format(roi))

        if config.use_node_local_scratch:
            assert False, "FIXME."

        assert (
            blockwiseFileset.getEntireBlockRoi(roi.start)[1] == roi.stop
        ).all(
        ), "Each task must execute exactly one full block.  ({},{}) is not a valid block roi.".format(
            roi.start, roi.stop)
        assert self.Input.ready()

        # Convert the task subrequest shape dict into a shape for this dataset (and axisordering)
        subrequest_shape = map(
            lambda tag: config.task_subrequest_shape[tag.key],
            self.Input.meta.axistags)
        primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape
        if primary_subrequest_shape is not None:
            # If the output dataset specified a sub_block_shape, override the cluster config
            subrequest_shape = primary_subrequest_shape

        with Timer() as computeTimer:
            # Stream the data out to disk.
            streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop),
                                          subrequest_shape,
                                          config.task_parallel_subrequests)
            streamer.progressSignal.subscribe(self.progressSignal)
            streamer.resultSignal.subscribe(self._handlePrimaryResultBlock)
            streamer.execute()

            # Now the block is ready.  Update the status.
            blockwiseFileset.setBlockStatus(roi.start,
                                            BlockwiseFileset.BLOCK_AVAILABLE)

        logger.info("Finished task in {} seconds".format(
            computeTimer.seconds()))
        result[0] = True
        return result
Exemplo n.º 2
0
    def _prepareTaskInfos(self, roiList):
        # Divide up the workload into large pieces
        logger.info("Dividing into {} node jobs.".format(len(roiList)))

        taskInfos = collections.OrderedDict()
        for roiIndex, roi in enumerate(roiList):
            roi = (tuple(roi[0]), tuple(roi[1]))
            taskInfo = OpClusterize.TaskInfo()
            taskInfo.subregion = SubRegion(None, start=roi[0], stop=roi[1])

            taskName = "J{:02}".format(roiIndex)

            commandArgs = []
            commandArgs.append("--option_config_file=" +
                               self.ConfigFilePath.value)
            commandArgs.append("--project=" + self.ProjectFilePath.value)
            commandArgs.append("--_node_work_=\"" +
                               Roi.dumps(taskInfo.subregion) + "\"")
            commandArgs.append("--process_name={}".format(taskName))
            commandArgs.append("--output_description_file={}".format(
                self.OutputDatasetDescription.value))
            for slot in self.SecondaryOutputDescriptions:
                commandArgs.append(
                    "--secondary_output_description_file={}".format(
                        slot.value))

            # Check the command format string: We need to know where to put our args...
            commandFormat = self._config.command_format
            assert commandFormat.find("{task_args}") != -1

            # Output log directory might be a relative path (relative to config file)
            absLogDir, relLogDir = getPathVariants(
                self._config.output_log_directory,
                os.path.split(self.ConfigFilePath.value)[0])
            taskOutputLogFilename = taskName + ".log"
            taskOutputLogPath = os.path.join(absLogDir, taskOutputLogFilename)

            allArgs = " " + " ".join(commandArgs) + " "
            taskInfo.taskName = taskName
            taskInfo.command = commandFormat.format(
                task_args=allArgs,
                task_name=taskName,
                task_output_file=taskOutputLogPath)
            taskInfos[roi] = taskInfo

        return taskInfos
Exemplo n.º 3
0
    def execute(self, slot, subindex, ignored_roi, result):
        configFilePath = self.ConfigFilePath.value
        config = parseClusterConfigFile( configFilePath )        
        
        blockwiseFileset = self._primaryBlockwiseFileset
        
        # Check axis compatibility
        inputAxes = self.Input.meta.getTaggedShape().keys()
        outputAxes = list(blockwiseFileset.description.axes)
        assert set(inputAxes) == set(outputAxes), \
            "Output dataset has the wrong set of axes.  Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) )
        
        roiString = self.RoiString.value
        roi = Roi.loads(roiString)
        if len( roi.start ) != len( self.Input.meta.shape ):
            assert False, "Task roi: {} is not valid for this input.  Did the master launch this task correctly?".format( roiString )

        logger.info( "Executing for roi: {}".format(roi) )

        if config.use_node_local_scratch:
            assert False, "FIXME."

        assert (blockwiseFileset.getEntireBlockRoi( roi.start )[1] == roi.stop).all(), "Each task must execute exactly one full block.  ({},{}) is not a valid block roi.".format( roi.start, roi.stop )
        assert self.Input.ready()

        # Convert the task subrequest shape dict into a shape for this dataset (and axisordering)
        subrequest_shape = map( lambda tag: config.task_subrequest_shape[tag.key], self.Input.meta.axistags )
        primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape
        if primary_subrequest_shape is not None:
            # If the output dataset specified a sub_block_shape, override the cluster config
            subrequest_shape = primary_subrequest_shape

        with Timer() as computeTimer:
            # Stream the data out to disk.
            streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), subrequest_shape, config.task_parallel_subrequests )
            streamer.progressSignal.subscribe( self.progressSignal )
            streamer.resultSignal.subscribe( self._handlePrimaryResultBlock )
            streamer.execute()

            # Now the block is ready.  Update the status.
            blockwiseFileset.setBlockStatus( roi.start, BlockwiseFileset.BLOCK_AVAILABLE )

        logger.info( "Finished task in {} seconds".format( computeTimer.seconds() ) )
        result[0] = True
        return result
Exemplo n.º 4
0
    def _prepareTaskInfos(self, roiList):
        # Divide up the workload into large pieces
        logger.info( "Dividing into {} node jobs.".format( len(roiList) ) )

        taskInfos = collections.OrderedDict()
        for roiIndex, roi in enumerate(roiList):
            roi = ( tuple(roi[0]), tuple(roi[1]) )
            taskInfo = OpClusterize.TaskInfo()
            taskInfo.subregion = SubRegion( None, start=roi[0], stop=roi[1] )
            
            taskName = "J{:02}".format(roiIndex)

            commandArgs = []
            commandArgs.append( "--option_config_file=" + self.ConfigFilePath.value )
            commandArgs.append( "--project=" + self.ProjectFilePath.value )
            commandArgs.append( "--_node_work_=\"" + Roi.dumps( taskInfo.subregion ) + "\"" )
            commandArgs.append( "--process_name={}".format(taskName)  )
            commandArgs.append( "--output_description_file={}".format( self.OutputDatasetDescription.value )  )
            for slot in self.SecondaryOutputDescriptions:
                commandArgs.append( "--secondary_output_description_file={}".format( slot.value )  )

            # Check the command format string: We need to know where to put our args...
            commandFormat = self._config.command_format
            assert commandFormat.find("{task_args}") != -1

            # Output log directory might be a relative path (relative to config file)
            absLogDir, _ = getPathVariants(self._config.output_log_directory, os.path.split( self.ConfigFilePath.value )[0] )
            taskOutputLogFilename = taskName + ".log"
            taskOutputLogPath = os.path.join( absLogDir, taskOutputLogFilename )
            
            allArgs = " " + " ".join(commandArgs) + " "
            taskInfo.taskName = taskName
            taskInfo.command = commandFormat.format( task_args=allArgs, task_name=taskName, task_output_file=taskOutputLogPath )
            taskInfos[roi] = taskInfo

        return taskInfos