def fileTasksForFrames(self):
        numTotalFrames = self.ds.getNumSlices()
        logging.debug('numTotalFrames: %s, currentFrameNum: %d' % (numTotalFrames, self.currentFrameNum))
        numFramesOutstanding = 0
        while  numTotalFrames > (self.currentFrameNum + 1):
            logging.debug('we have unpublished frames - push them')

            #turn our metadata to a string once (outside the loop)
            #mdstring = self.mdh.to_JSON() #TODO - use a URI instead
            
            newFrameNum = min(self.currentFrameNum + 100000, numTotalFrames-1)

            #create task definitions for each frame

            s = clusterIO._getSession(self.taskQueueURI)
            r = s.get('%s/release_rule_tasks?ruleID=%s&release_start=%d&release_end=%d' % (self.taskQueueURI, self._ruleID, self.currentFrameNum, newFrameNum),
                       data='',
                       headers={'Content-Type': 'application/json'})

            if r.status_code == 200 and r.json()['ok']:
                logging.debug('Successfully posted tasks')
            else:
                logging.error('Failed on posting tasks with status code: %d' % r.status_code)

            self.currentFrameNum = newFrameNum

            numFramesOutstanding = numTotalFrames  - 1 - self.currentFrameNum

        return  numFramesOutstanding
    def fileTasksForInputs(self, **kwargs):
        from PYME.IO import clusterIO
        input_names = kwargs.keys()
        inputs = {k : kwargs[k] if isinstance(kwargs[k], list) else clusterIO.cglob(kwargs[k], include_scheme=True) for k in input_names}

        numTotalFrames = len(list(inputs.values())[0])
        self.currentFrameNum = 0

        logger.debug('numTotalFrames = %d' % numTotalFrames)
        logger.debug('inputs = %s' % inputs)
        
        inputs_by_task = {frameNum: {k : inputs[k][frameNum] for k in inputs.keys()} for frameNum in range(numTotalFrames)}

        rule = {'template': self._taskTemplate, 'inputsByTask' : inputs_by_task}

        s = clusterIO._getSession(self.taskQueueURI)
        r = s.post('%s/add_integer_id_rule?max_tasks=%d&release_start=%d&release_end=%d' % (self.taskQueueURI,numTotalFrames, 0, numTotalFrames), data=json.dumps(rule),
                        headers = {'Content-Type': 'application/json'})

        if r.status_code == 200:
            resp = r.json()
            self._ruleID = resp['ruleID']
            logging.debug('Successfully created rule')
        else:
            logging.error('Failed creating rule with status code: %d' % r.status_code)
Пример #3
0
    def _post_rule(self,
                   timeout=3600,
                   max_tasks=1e6,
                   release_start=None,
                   release_end=None):
        """ wrapper around add_integer_rule api endpoint"""
        from PYME.IO import clusterIO

        s = clusterIO._getSession(self.taskQueueURI)
        if release_start is None:
            cmd = '%s/add_integer_id_rule?timeout=%d&max_tasks=%d' % (
                self.taskQueueURI, timeout, max_tasks)
        else:
            # TODO - can we get rid of this special casing?
            cmd = '%s/add_integer_id_rule?timeout=%d&max_tasks=%d&release_start=%d&release_end=%d' % (
                self.taskQueueURI, timeout, max_tasks, release_start,
                release_end)

        r = s.post(cmd,
                   data=json.dumps(self.rule),
                   headers={'Content-Type': 'application/json'})

        if r.status_code == 200:
            resp = r.json()
            self._ruleID = resp['ruleID']
            logger.debug('Successfully created rule')
        else:
            logger.error('Failed creating rule with status code: %d' %
                         r.status_code)
Пример #4
0
    def _postTasks(self, task_list):
        if isinstance(task_list[0], string_types):
            task_list = '[' + ',\n'.join(task_list) + ']'
        else:
            task_list = json.dumps(task_list)

        s = clusterIO._getSession(self.taskQueueURI)
        r = s.post('%s/distributor/tasks?queue=%s' % (self.taskQueueURI, self.queueID), data=task_list,
                   headers={'Content-Type': 'application/json'})

        if r.status_code == 200 and r.json()['ok']:
            logging.debug('Successfully posted tasks')
        else:
            logging.error('Failed on posting tasks with status code: %d' % r.status_code)
Пример #5
0
    def _mark_complete(self):
        """ Thin wrapper around release_rule_tasks api endpoint"""
        from PYME.IO import clusterIO
        s = clusterIO._getSession(self.taskQueueURI)
        r = s.get('%s/mark_release_complete?ruleID=%s' %
                  (self.taskQueueURI, self._ruleID),
                  data='',
                  headers={'Content-Type': 'application/json'})

        if r.status_code == 200 and r.json()['ok']:
            logging.debug('Successfully marked rule as complete')
        else:
            logging.error('Failed to mark rule complete with status code: %d' %
                          r.status_code)
Пример #6
0
    def _release_tasks(self, release_start, release_end):
        """ Thin wrapper around release_rule_tasks api endpoint"""
        from PYME.IO import clusterIO
        s = clusterIO._getSession(self.taskQueueURI)
        r = s.get(
            '%s/release_rule_tasks?ruleID=%s&release_start=%d&release_end=%d' %
            (self.taskQueueURI, self._ruleID, release_start, release_end),
            data='',
            headers={'Content-Type': 'application/json'})

        if r.status_code == 200 and r.json()['ok']:
            logging.debug('Successfully released tasks (%d:%d)' %
                          (release_start, release_end))
        else:
            logging.error('Failed on releasing tasks with status code: %d' %
                          r.status_code)
    def post_rule(self):
        rule = {'template' : self._taskTemplate}

        if self.ds.isComplete():
            queueSize = self.ds.getNumSlices()
        else:
            queueSize = 1e6
        
        s = clusterIO._getSession(self.taskQueueURI)
        r = s.post('%s/add_integer_id_rule?timeout=300&max_tasks=%d' % (self.taskQueueURI,queueSize), data=json.dumps(rule),
                   headers={'Content-Type': 'application/json'})

        if r.status_code == 200:
            resp = r.json()
            self._ruleID = resp['ruleID']
            logging.debug('Successfully created rule')
        else:
            logging.error('Failed creating rule with status code: %d' % r.status_code)
Пример #8
0
    def _get_tasks(self):
        """

        Query nodeserver for tasks and place them in the queue for this worker,
        if available

        Returns
        -------
        new_tasks : bool
            flag to report whether _get_tasks added new tasks to the taskWorker queue

        """
        tasks = []
        queueURL = self._local_queue_url

        try:
            # ask the queue for tasks
            s = clusterIO._getSession(queueURL)
            r = s.get(queueURL +
                      'node/tasks?workerID=%s&numWant=50' % self.procName)
            if r.status_code == 200:
                resp = r.json()
                if resp['ok']:
                    res = resp['result']
                    if isinstance(res, list):
                        tasks += [(queueURL, t) for t in res]
                    else:
                        tasks.append((queueURL, res))
        except requests.Timeout:
            logger.info('Read timout requesting tasks from %s' % queueURL)

        except Exception:
            import traceback
            logger.exception(traceback.format_exc())

        if len(tasks) != 0:
            for t in tasks:
                self.inputQueue.put(t)
            return True
        else:
            # flag that there were no new tasks
            return False
Пример #9
0
    def _return_task_results(self):
        """

        File all results that this worker has completed

        Returns
        -------

        """
        while True:  # loop over results queue until it's empty
            # print 'getting results'
            try:
                queueURL, taskDescr, res = self.resultsQueue.get_nowait()
                outputs = taskDescr.get('outputs', {})
            except Queue.Empty:
                # queue is empty
                return

            if isinstance(res, TaskError):
                # failure
                clusterResults.fileResults(res.log_url, res.to_string())

                s = clusterIO._getSession(queueURL)
                r = s.post(queueURL + 'node/handin?taskID=%s&status=failure' %
                           taskDescr['id'])
                if not r.status_code == 200:
                    logger.error('Returning task failed with error: %s' %
                                 r.status_code)
            elif res is None:
                # failure
                s = clusterIO._getSession(queueURL)
                r = s.post(queueURL + 'node/handin?taskID=%s&status=failure' %
                           taskDescr['id'])
                if not r.status_code == 200:
                    logger.error('Returning task failed with error: %s' %
                                 r.status_code)

            elif res == True:  # isinstance(res, ModuleCollection): #recipe output
                # res.save(outputs) #abuse outputs dictionary as context

                s = clusterIO._getSession(queueURL)
                r = s.post(queueURL + 'node/handin?taskID=%s&status=success' %
                           taskDescr['id'])
                if not r.status_code == 200:
                    logger.error('Returning task failed with error: %s' %
                                 r.status_code)

            else:
                # success
                try:
                    if 'results' in outputs.keys():
                        # old style pickled results
                        clusterResults.fileResults(outputs['results'], res)
                    else:
                        if len(res.results) > 0:
                            clusterResults.fileResults(outputs['fitResults'],
                                                       res.results)

                        if len(res.driftResults) > 0:
                            clusterResults.fileResults(outputs['driftResults'],
                                                       res.driftResults)
                except requests.Timeout:
                    logger.exception('Filing results failed on timeout.')
                    s = clusterIO._getSession(queueURL)
                    r = s.post(queueURL +
                               'node/handin?taskID=%s&status=failure' %
                               taskDescr['id'])
                    if not r.status_code == 200:
                        logger.error('Returning task failed with error: %s' %
                                     r.status_code)
                else:
                    s = clusterIO._getSession(queueURL)
                    r = s.post(queueURL +
                               'node/handin?taskID=%s&status=success' %
                               taskDescr['id'])
                    if not r.status_code == 200:
                        logger.error('Returning task failed with error: %s' %
                                     r.status_code)
Пример #10
0
    def _get_tasks(self, local_queue_name):
        """

        Query nodeserver for tasks and place them in the queue for this worker, if available

        Parameters
        ----------
        local_queue_name : str
            computer name prepended by 'PYMENodeServer: '

        Returns
        -------
        new_tasks : bool
            flag to report whether _get_tasks added new tasks to the taskWorker queue

        """
        queue_URLs = distribution.getNodeInfo()
        queue_URLs = {
            k: v
            for k, v in queue_URLs.items() if k == local_queue_name
        }

        # loop over all queues, looking for tasks to process
        tasks = []
        while len(tasks) == 0 and len(queue_URLs) > 0:
            # try queue on current machine first
            # TODO - only try local machine?
            # print queueNames

            if local_queue_name in queue_URLs.keys():
                qName = local_queue_name
                queueURL = queue_URLs.pop(qName)
            else:
                logger.error('Could not find local node server')

            try:
                # ask the queue for tasks
                # TODO - make the server actually return a list of tasks, not just one (or implement pipelining in another way)
                # try:
                s = clusterIO._getSession(queueURL)
                r = s.get(queueURL + 'node/tasks?workerID=%s&numWant=50' %
                          self.procName)  # , timeout=0)
                if r.status_code == 200:
                    resp = r.json()
                    if resp['ok']:
                        res = resp['result']
                        if isinstance(res, list):
                            tasks += [(queueURL, t) for t in res]
                        else:
                            tasks.append((queueURL, res))
            except requests.Timeout:
                logger.info('Read timout requesting tasks from %s' % queueURL)

            except Exception:
                import traceback
                logger.exception(traceback.format_exc())

        if len(tasks) != 0:
            for t in tasks:
                self.inputQueue.put(t)
            return True
        else:
            # flag that there were no new tasks
            return False