示例#1
0
def test_s3import_via_pilotapi():
    COORDINATION_URL="redis://localhost:6379"
    from pilot import PilotComputeService, PilotDataService, ComputeDataService, State
    pilot_data_service = PilotDataService(coordination_url=COORDINATION_URL)
    
    ###################################################################################################
    # Pick one of the Pilot Data Descriptions below    
    
    pilot_data_description_aws={
                                "service_url": "s3://pilot-data-andre-workflow",
                                "size": 100,   
                                "affinity_datacenter_label": "us-east-1",              
                                "affinity_machine_label": ""    ,
                                "access_key_id": "AKIAJPGNDJRYIG5LIEUA",
                                "secret_access_key":"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",                                                         
                                }

    pd = pilot_data_service.create_pilot(pilot_data_description=pilot_data_description_aws)
     
    data_unit_description = {
                              "file_urls": ['s3://pilot-data-cec5d816-fa8f-11e1-ab5e-e61f1322a75c/du-67b4c762-fa90-11e1-ab5e-e61f1322a75c/ip-10-84-173-21512MB_2.input-chunk-02'],
                              "affinity_datacenter_label": "us-east-1",              
                              "affinity_machine_label": ""
                             }    
      
    # submit pilot data to a pilot store 
    input_data_unit = pd.submit_data_unit(data_unit_description)
    input_data_unit.wait()
def create_pilotdata():
    pilot_data_service = PilotDataService(coordination_url=COORDINATION_URL)
    pilot_data_description={
        "service_url": "ssh://localhost/tmp/pilot-data/",
    }
    pilotdata=pilot_data_service.create_pilot(pilot_data_description=pilot_data_description)
    return pilotdata
示例#3
0
    def start(self):
        darelogger.info("Creating Compute Engine service ")
        self.pilot_compute_service = PilotComputeService(
            coordination_url=COORDINATION_URL)
        self.pilot_data_service = PilotDataService(
            coordination_url=COORDINATION_URL)

        for compute_pilot, desc in list(
                self.workflow.compute_pilot_repo.items()):
            self.pilot_compute_service.create_pilot(
                pilot_compute_description=desc)

        for data_pilot, desc in list(self.workflow.data_pilot_repo.items()):
            self.data_pilot_service_repo.append(
                self.pilot_data_service.create_pilot(
                    pilot_data_description=desc))

        self.compute_data_service = ComputeDataServiceDecentral()
        self.compute_data_service.add_pilot_compute_service(
            self.pilot_compute_service)
        self.compute_data_service.add_pilot_data_service(
            self.pilot_data_service)

        ### run the steps
        self.step_start_lock = threading.RLock()
        self.step_run_lock = threading.RLock()

        for step_id in list(self.workflow.step_units_repo.keys()):
            darelogger.info(" Sumitted step %s " % step_id)
            self.step_start_lock.acquire()
            self.start_thread_step_id = step_id
            self.step_start_lock.release()
            self.step_threads[step_id] = threading.Thread(
                target=self.start_step)
            self.step_threads[step_id].start()

        while (1):
            count_step = [
                v.is_alive() for k, v in list(self.step_threads.items())
            ]
            darelogger.info('count_step %s' % count_step)
            if not True in count_step and len(count_step) > 0:
                break
            time.sleep(10)

        darelogger.info(" All Steps Done processing")

        self.quit(message='quit gracefully')
示例#4
0
    def start(self):         
       # try:
            from pilot import PilotComputeService, PilotDataService, ComputeDataService, State

            darelogger.info("Create Compute Engine service ")

            self.pilot_compute_service = PilotComputeService(coordination_url=COORDINATION_URL)
            self.pilot_data_service = PilotDataService()

            for compute_pilot, desc in self.workflow.compute_pilot_repo.items():
                self.compute_pilot_service_repo.append(self.pilot_compute_service.create_pilot(pilot_compute_description=desc))

            #for data_pilot, desc in self.workflow.data_pilot_repo.items():            
             #   self.data_pilot_service_repo.append(self.pilot_data_service.create_pilot(pilot_data_description=desc))
    
            self.compute_data_service = ComputeDataService()
            self.compute_data_service.add_pilot_compute_service(self.pilot_compute_service)
           # self.compute_data_service.add_pilot_data_service(self.pilot_data_service) 

            self.step_thread= {}

            ### run the steps
            self.step_start_lock=threading.RLock()
            self.step_run_lock=threading.RLock()

            for step_id in self.workflow.step_units_repo.keys():
                    darelogger.info(" Sumitted step %s "%step_id)
                    self.step_start_lock.acquire()
                    self.start_thread_step_id =step_id
                    self.step_start_lock.release()

                    self.step_thread[step_id] = threading.Thread(target=self.start_step)
                    self.step_thread[step_id].start()
                    
            while(1):     
                count_step = [v.is_alive() for k,v in self.step_thread.items()]
                darelogger.info('count_step %s'%count_step)
                if not True in count_step and len(count_step)>0:                      
                    break
                time.sleep(10)
                       
            darelogger.info(" All Steps Done processing")

            self.cancel()
 base_dir = "/Users/luckow/workspace-saga/applications/pilot-store/test/data1"
 url_list = os.listdir(base_dir)
 # make absolute paths
 absolute_url_list = [os.path.join(base_dir, i) for i in url_list]
 data_unit_description = {
                            "file_urls":absolute_url_list,
                            "number_of_replicas": 2
                          }
 logging.debug("Pilot Data Description: \n%s"%str(data_unit_description))
 
 
     
 
 
 # create pilot data service (factory for pilot stores (physical, distributed storage))
 pilot_data_service = PilotDataService(coordination_url=COORDINATION_URL)
 pd1 = pilot_data_service.create_pilot({
                             'service_url': "ssh://localhost/tmp/pilotdata-1/",
                             'size':100,
                            'affinity_datacenter_label': "eu-de-south",              
                            'affinity_machine_label': "mymachine-1"
                             })
 
 pd2 = pilot_data_service.create_pilot({
                             'service_url': "ssh://localhost/tmp/pilotdata-2/",
                             'size':100,
                            'affinity_datacenter_label': "eu-de-south",              
                            'affinity_machine_label': "mymachine-2"
                             })
 
 
logging.basicConfig(level=logging.DEBUG)

sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from pilot import PilotDataService, ComputeDataService, DataUnit, State


if __name__ == "__main__":        
    
    if len(sys.argv)==2:
        reconnect_url=sys.argv[1]
    else:
        print "Usage: " + sys.executable + " " + __file__ + " <Data Unit URL to Reconnect to>"
        sys.exit(-1)
        
    # create pilot data service (factory for pilot stores (physical, distributed storage))
    pilot_data_service = PilotDataService()
    pd_new = pilot_data_service.create_pilot({
                                'service_url': "ssh://localhost/tmp/pilotdata-reconnect/",
                                'size':100,
                               'affinity_datacenter_label': "eu-de-south",              
                               'affinity_machine_label': "mymachine-1"
                                })
    
    
    logging.debug("Pilot Data URL: %s"%pilot_data_service.url)
    
    
    ###########################################################################
    # PD should only be scheduled to machine 1    
    logging.debug("Connect to PD URL: %s"%reconnect_url)
    pd = DataUnit(du_url=reconnect_url)
示例#7
0
sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from pilot import PilotDataService, ComputeDataService, DataUnit, State

COORDINATION_URL = "redis://localhost:6379"

if __name__ == "__main__":        
    
    if len(sys.argv)==2:
        reconnect_url=sys.argv[1]
    else:
        print "Usage: " + sys.executable + " " + __file__ + " <Data Unit URL to Reconnect to>"
        sys.exit(-1)
        
    # create pilot data service (factory for pilot stores (physical, distributed storage))
    pilot_data_service = PilotDataService(coordination_url=COORDINATION_URL)
    pd_new = pilot_data_service.create_pilot({
                                'service_url': "ssh://localhost/tmp/pilotdata-reconnect/",
                                'size':100,
                               'affinity_datacenter_label': "eu-de-south",              
                               'affinity_machine_label': "mymachine-1"
                                })
    
    
    logging.debug("Pilot Data URL: %s"%pilot_data_service.url)
    
    
    ###########################################################################
    # PD should only be scheduled to machine 1    
    logging.debug("Connect to PD URL: %s"%reconnect_url)
    pd = DataUnit(du_url=reconnect_url)
    # create pilot job service and initiate a pilot job
    pilot_compute_description = {
                             "service_url": 'fork://localhost',
                             "number_of_processes": 1,                             
                             "working_directory": "/tmp/pilot-compute/",
                             'affinity_datacenter_label': "eu-de-south",              
                             'affinity_machine_label': "mymachine-1" 
                            }
    
    pilotjob = pilot_compute_service.create_pilot(pilot_compute_description=pilot_compute_description)
    
    
    # create pilot data service (factory for data pilots (physical, distributed storage))
    # and pilot data
    pilot_data_service = PilotDataService()
    pilot_data_description={
                                "service_url": "ssh://localhost/tmp/pilot-data/",
                                "size": 100,   
                                "affinity_datacenter_label": "eu-de-south",              
                                "affinity_machine_label": "mymachine-1"                              
                             }
    ps = pilot_data_service.create_pilot(pilot_data_description=pilot_data_description)
     
    compute_data_service = ComputeDataService()
    compute_data_service.add_pilot_compute_service(pilot_compute_service)
    compute_data_service.add_pilot_data_service(pilot_data_service)
    
    # Create Data Unit Description
    base_dir = "/Users/luckow/workspace-saga/applications/pilot-store/test/data1"
    url_list = os.listdir(base_dir)
示例#9
0
    # What files? Create Pilot Data Description using remote SSH URLs
    # make remotete paths
    remote_url_list = ["ssh://localhost" + os.path.join(base_dir, i) for i in url_list]
    data_unit_description2 = {
        "file_urls": remote_url_list,
        "affinity_datacenter_label": "eu-de-south",
        "affinity_machine_label": "mymachine-2",
    }

    logging.debug("Pilot Data Description 2: \n%s" % str(data_unit_description2))

    # create pilot data service
    compute_data_service = ComputeDataService()

    # create pilot data service (factory for pilot stores (physical, distributed storage))
    pilot_data_service = PilotDataService()
    ps1 = pilot_data_service.create_pilot(
        {
            "service_url": "ssh://localhost/tmp/pilotdata-1/",
            "size": 100,
            "affinity_datacenter_label": "eu-de-south",
            "affinity_machine_label": "mymachine-1",
        }
    )

    ps2 = pilot_data_service.create_pilot(
        {
            "service_url": "ssh://localhost/tmp/pilotdata-2/",
            "size": 100,
            "affinity_datacenter_label": "eu-de-south",
            "affinity_machine_label": "mymachine-2",
示例#10
0
class DareManager(object):
    """DARE manager:
       - reads different configuration files
       - submits compute/data units as that in various steps"""
    """Constructor"""
    def __init__(self, conffile="/path/to/conf/file"):
        "" ""
        self.dare_conffile = conffile
        self.workflow = PrepareWorkFlow(self.dare_conffile)
        self.updater = Updater(self.workflow.update_site_db,
                               self.workflow.dare_web_id)
        self.dare_id = "dare-" + str(uuid.uuid1())
        self.data_pilot_service_repo = []
        self.step_threads = {}
        try:
            self.start()
        except KeyboardInterrupt:
            self.quit(message='KeyboardInterrupt')

    def start(self):
        darelogger.info("Creating Compute Engine service ")
        self.pilot_compute_service = PilotComputeService(
            coordination_url=COORDINATION_URL)
        self.pilot_data_service = PilotDataService(
            coordination_url=COORDINATION_URL)

        for compute_pilot, desc in list(
                self.workflow.compute_pilot_repo.items()):
            self.pilot_compute_service.create_pilot(
                pilot_compute_description=desc)

        for data_pilot, desc in list(self.workflow.data_pilot_repo.items()):
            self.data_pilot_service_repo.append(
                self.pilot_data_service.create_pilot(
                    pilot_data_description=desc))

        self.compute_data_service = ComputeDataServiceDecentral()
        self.compute_data_service.add_pilot_compute_service(
            self.pilot_compute_service)
        self.compute_data_service.add_pilot_data_service(
            self.pilot_data_service)

        ### run the steps
        self.step_start_lock = threading.RLock()
        self.step_run_lock = threading.RLock()

        for step_id in list(self.workflow.step_units_repo.keys()):
            darelogger.info(" Sumitted step %s " % step_id)
            self.step_start_lock.acquire()
            self.start_thread_step_id = step_id
            self.step_start_lock.release()
            self.step_threads[step_id] = threading.Thread(
                target=self.start_step)
            self.step_threads[step_id].start()

        while (1):
            count_step = [
                v.is_alive() for k, v in list(self.step_threads.items())
            ]
            darelogger.info('count_step %s' % count_step)
            if not True in count_step and len(count_step) > 0:
                break
            time.sleep(10)

        darelogger.info(" All Steps Done processing")

        self.quit(message='quit gracefully')

    def check_to_start_step(self, step_id):
        flags = []
        darelogger.info(self.workflow.step_units_repo[step_id].
                        UnitInfo['start_after_steps'])
        if self.workflow.step_units_repo[step_id].get_status(
        ) == StepUnitStates.New:
            for dep_step_id in self.workflow.step_units_repo[step_id].UnitInfo[
                    'start_after_steps']:
                if self.workflow.step_units_repo[dep_step_id].get_status(
                ) != StepUnitStates.Done:
                    flags.append(False)
                darelogger.info(
                    self.workflow.step_units_repo[dep_step_id].get_status())
        return False if False in flags else True

    def start_step(self):
        self.step_start_lock.acquire()
        step_id = self.start_thread_step_id
        self.step_start_lock.release()

        while (1):
            darelogger.info(" Checking to start step %s " % step_id)
            if self.check_to_start_step(step_id):
                self.run_step(step_id)
                break
            else:
                darelogger.info(" Cannot start this step %s sleeping..." %
                                step_id)
                time.sleep(10)

    def run_step(self, step_id):
        #self.step_run_lock.acquire()
        #job started update status
        this_su = self.workflow.step_units_repo[step_id].UnitInfo
        self.updater.update_status(
            this_su['dare_web_id'],
            "%s in step %s" % ('Running', this_su['name']))

        darelogger.info(" Started running %s " % step_id)

        jobs = []
        job_start_times = {}
        job_states = {}
        NUMBER_JOBS = len(
            self.workflow.step_units_repo[step_id].UnitInfo['compute_units'])
        for cu_id in self.workflow.step_units_repo[step_id].UnitInfo[
                'compute_units']:
            compute_unit_desc = self.workflow.compute_units_repo[cu_id]
            input_dus = compute_unit_desc.pop('input_data_units')
            output_dus = compute_unit_desc.pop('output_data_units')
            input_data_units = []
            for du_id in input_dus:
                input_data_units.append(
                    self.compute_data_service.submit_data_unit(
                        self.workflow.data_units_repo[du_id]))
            output_data_units = []
            for du_id in output_dus:
                output_data_units.append(
                    self.compute_data_service.submit_data_unit(
                        self.workflow.data_units_repo[du_id]))

            compute_unit_desc["input_data"] = [
                du.get_url() for du in input_data_units
            ]
            compute_unit_desc["output_data"] = [{
                du.get_url(): ['std*']
            } for du in output_data_units]
            compute_unit = self.compute_data_service.submit_compute_unit(
                compute_unit_desc)

            darelogger.info("Compute Unit: Description: \n%s" %
                            (str(self.workflow.compute_units_repo[cu_id])))
            jobs.append(compute_unit)
            job_start_times[compute_unit] = time.time()
            job_states[compute_unit] = compute_unit.get_state()

        darelogger.debug(
            "************************ All Jobs submitted ************************"
        )

        while 1:
            finish_counter = 0
            result_map = {}
            for i in range(0, NUMBER_JOBS):
                old_state = job_states[jobs[i]]
                state = jobs[i].get_state()
                if state in result_map == False:
                    result_map[state] = 0
                result_map[state] = result_map.get(state, 0) + 1
                #print "counter: " + str(i) + " job: " + str(jobs[i]) + " state: " + state
                if old_state != state:
                    darelogger.debug("Job " + str(jobs[i]) +
                                     " changed from: " + old_state + " to " +
                                     state)
                if old_state != state and self.has_finished(state) == True:
                    darelogger.info("%s step Job: " %
                                    (self.workflow.step_units_repo[step_id].
                                     UnitInfo['name']) + str(jobs[i]) +
                                    " Runtime: " +
                                    str(time.time() -
                                        job_start_times[jobs[i]]) + " s.")
                if self.has_finished(state) == True:
                    finish_counter = finish_counter + 1
                job_states[jobs[i]] = state

            darelogger.debug("Current states: " + str(result_map))
            time.sleep(5)
            if finish_counter == NUMBER_JOBS:
                break

        self.workflow.step_units_repo[step_id].set_status(StepUnitStates.Done)

        #self.compute_data_service.wait()
        darelogger.debug(" Compute jobs for step %s complete" % step_id)

        #runtime = time.time()-starttime

        #all jobs done update status
        self.updater.update_status(this_su['dare_web_id'],
                                   "%s is Done" % this_su['name'])

        #self.step_run_lock.release()

    def has_finished(self, state):
        state = state.lower()
        if state == "done" or state == "failed" or state == "canceled":
            return True
        else:
            return False

    def quit(self, message=None):
        if message:
            darelogger.debug(message)
        darelogger.debug("Terminating steps")
        for step, thread in list(self.step_threads.items()):
            darelogger.debug("Stoppping step %s" % step)
            thread._Thread__stop()

        darelogger.debug("Terminating Pilot Compute/Data Service")
        try:
            self.compute_data_service.cancel()
            self.pilot_data_service.cancel()
            self.pilot_compute_service.cancel()
        except:
            pass
    # create pilot job service and initiate a pilot job
    pilot_compute_description = {
        "service_url": 'fork://localhost',
        "number_of_processes": 1,
        "working_directory": os.getcwd() + "/work/",
        'affinity_datacenter_label': "eu-de-south",
        'affinity_machine_label': "mymachine-1"
    }

    pilot_compute_service.create_pilot(
        pilot_compute_description=pilot_compute_description)

    # create pilot data service (factory for data pilots (physical, distributed storage))
    # and pilot data
    pilot_data_service = PilotDataService(coordination_url=COORDINATION_URL)
    pilot_data_description = {
        "service_url": "ssh://localhost/tmp/pilot-data/",
        "size": 100,
        "affinity_datacenter_label": "eu-de-south",
        "affinity_machine_label": "mymachine-1",
        #"userkey":"/Users/luckow/.ssh/rsa_osg",
    }

    pilot_data_service.create_pilot(
        pilot_data_description=pilot_data_description)

    compute_data_service = ComputeDataService()
    compute_data_service.add_pilot_compute_service(pilot_compute_service)
    compute_data_service.add_pilot_data_service(pilot_data_service)
示例#12
0
    # What files? Create Pilot Data Description using remote SSH URLs
    # make remotete paths
    remote_url_list = [
        "ssh://localhost" + os.path.join(base_dir, i) for i in url_list
    ]
    data_unit_description2 = {
        "file_urls": remote_url_list,
        'affinity_datacenter_label': "eu-de-south",
        'affinity_machine_label': "mymachine-2"
    }

    logging.debug("Pilot Data Description 2: \n%s" %
                  str(data_unit_description2))

    # create pilot data service (factory for pilot stores (physical, distributed storage))
    pilot_data_service = PilotDataService(coordination_url=COORDINATION_URL)
    ps1 = pilot_data_service.create_pilot({
        'service_url':
        "ssh://localhost/tmp/pilotdata-1/",
        'size':
        100,
        'affinity_datacenter_label':
        "eu-de-south",
        'affinity_machine_label':
        "mymachine-1"
    })

    ps2 = pilot_data_service.create_pilot({
        'service_url':
        "ssh://localhost/tmp/pilotdata-2/",
        'size':
import uuid
#logging.basicConfig(level=logging.DEBUG)

#sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from pilot import PilotComputeService, PilotDataService, ComputeDataService, State
from bigjob import logger 

#COORDINATION_URL = "redis://*****:*****@gw68.quarry.iu.teragrid.org:6379/pcs/pcs-4867ff08-e192-11e1-a694-00003e980000"

if __name__ == "__main__":      
    
    print COORDINATION_URL
    # create pilot data service (factory for data pilots (physical, distributed storage))
    # and pilot data
    pilot_data_service = PilotDataService(coordination_url=COORDINATION_URL)
    
    ###################################################################################################
    # Pick one of the Pilot Data Descriptions below    
    
#    pilot_data_description_aws={
#                                "service_url": "s3://pilot-data-" + str(uuid.uuid1()),
#                                "size": 100,   
#                                "affinity_datacenter_label": "us-east-1",              
#                                "affinity_machine_label": ""                              
#                             }
    
    pilot_data_description_india = {
                                "service_url": "walrus://149.165.146.135/pilot-data-" + str(uuid.uuid1()),
                                #"service_url": "ssh://localhost/tmp/pilot-data-" + str(uuid.uuid1()),
                                "size": 100,   
示例#14
0
class DareManager(object):
    """DARE manager:
       - reads different configuration files
       - submits compute/data units as that in various steps"""

    """Constructor"""
    def __init__(self, conffile="/path/to/conf/file"):
        "" ""
        self.dare_conffile = conffile
        self.workflow = PrepareWorkFlow(self.dare_conffile)
        self.updater = Updater(self.workflow.update_site_db, self.workflow.dare_web_id)
        self.dare_id = "dare-" + str(uuid.uuid1())
        self.data_pilot_service_repo = []
        self.step_threads = {}
        try:
            self.start()
        except KeyboardInterrupt:
            self.quit(message='KeyboardInterrupt')

    def start(self):
        darelogger.info("Creating Compute Engine service ")
        self.pilot_compute_service = PilotComputeService(coordination_url=COORDINATION_URL)
        self.pilot_data_service = PilotDataService(coordination_url=COORDINATION_URL)

        for compute_pilot, desc in self.workflow.compute_pilot_repo.items():
            self.pilot_compute_service.create_pilot(pilot_compute_description=desc)

        for data_pilot, desc in self.workflow.data_pilot_repo.items():
            self.data_pilot_service_repo.append(self.pilot_data_service.create_pilot(pilot_data_description=desc))

        self.compute_data_service = ComputeDataServiceDecentral()
        self.compute_data_service.add_pilot_compute_service(self.pilot_compute_service)
        self.compute_data_service.add_pilot_data_service(self.pilot_data_service)

        ### run the steps
        self.step_start_lock = threading.RLock()
        self.step_run_lock = threading.RLock()

        for step_id in self.workflow.step_units_repo.keys():
                darelogger.info(" Sumitted step %s " % step_id)
                self.step_start_lock.acquire()
                self.start_thread_step_id = step_id
                self.step_start_lock.release()
                self.step_threads[step_id] = threading.Thread(target=self.start_step)
                self.step_threads[step_id].start()

        while(1):
            count_step = [v.is_alive() for k, v in self.step_threads.items()]
            darelogger.info('count_step %s' % count_step)
            if not True in count_step and len(count_step) > 0:
                break
            time.sleep(10)

        darelogger.info(" All Steps Done processing")

        self.quit(message='quit gracefully')

    def check_to_start_step(self, step_id):
        flags = []
        darelogger.info(self.workflow.step_units_repo[step_id].UnitInfo['start_after_steps'])
        if self.workflow.step_units_repo[step_id].get_status() == StepUnitStates.New:
            for dep_step_id in self.workflow.step_units_repo[step_id].UnitInfo['start_after_steps']:
                if self.workflow.step_units_repo[dep_step_id].get_status() != StepUnitStates.Done:
                    flags.append(False)
                darelogger.info(self.workflow.step_units_repo[dep_step_id].get_status())
        return False if False in flags else True

    def start_step(self):
        self.step_start_lock.acquire()
        step_id = self.start_thread_step_id
        self.step_start_lock.release()

        while(1):
            darelogger.info(" Checking to start step %s " % step_id)
            if self.check_to_start_step(step_id):
                self.run_step(step_id)
                break
            else:
                darelogger.info(" Cannot start this step %s sleeping..." % step_id)
                time.sleep(10)

    def run_step(self, step_id):
        #self.step_run_lock.acquire()
        #job started update status
        this_su = self.workflow.step_units_repo[step_id].UnitInfo
        self.updater.update_status(this_su['dare_web_id'], "%s in step %s" % ('Running',  this_su['name']))

        darelogger.info(" Started running %s " % step_id)

        jobs = []
        job_start_times = {}
        job_states = {}
        NUMBER_JOBS = len(self.workflow.step_units_repo[step_id].UnitInfo['compute_units'])
        for cu_id in self.workflow.step_units_repo[step_id].UnitInfo['compute_units']:
            compute_unit_desc = self.workflow.compute_units_repo[cu_id]
            input_dus = compute_unit_desc.pop('input_data_units')
            output_dus = compute_unit_desc.pop('output_data_units')
            input_data_units = []
            for du_id in input_dus:
                input_data_units.append(self.compute_data_service.submit_data_unit(self.workflow.data_units_repo[du_id]))
            output_data_units = []
            for du_id in output_dus:
                output_data_units.append(self.compute_data_service.submit_data_unit(self.workflow.data_units_repo[du_id]))

            compute_unit_desc["input_data"] = [du.get_url() for du in input_data_units]
            compute_unit_desc["output_data"] = [{du.get_url(): ['std*']} for du in output_data_units]
            compute_unit = self.compute_data_service.submit_compute_unit(compute_unit_desc)

            darelogger.info("Compute Unit: Description: \n%s" % (str(self.workflow.compute_units_repo[cu_id])))
            jobs.append(compute_unit)
            job_start_times[compute_unit] = time.time()
            job_states[compute_unit] = compute_unit.get_state()

        darelogger.debug("************************ All Jobs submitted ************************")

        while 1:
            finish_counter = 0
            result_map = {}
            for i in range(0, NUMBER_JOBS):
                old_state = job_states[jobs[i]]
                state = jobs[i].get_state()
                if  state in result_map == False:
                    result_map[state] = 0
                result_map[state] = result_map.get(state, 0) + 1
                #print "counter: " + str(i) + " job: " + str(jobs[i]) + " state: " + state
                if old_state != state:
                    darelogger.debug("Job " + str(jobs[i]) + " changed from: " + old_state + " to " + state)
                if old_state != state and self.has_finished(state) == True:
                    darelogger.info("%s step Job: " % (self.workflow.step_units_repo[step_id].UnitInfo['name']) + str(jobs[i]) + " Runtime: " + str(time.time() - job_start_times[jobs[i]]) + " s.")
                if self.has_finished(state) == True:
                    finish_counter = finish_counter + 1
                job_states[jobs[i]] = state

            darelogger.debug("Current states: " + str(result_map))
            time.sleep(5)
            if finish_counter == NUMBER_JOBS:
                break

        self.workflow.step_units_repo[step_id].set_status(StepUnitStates.Done)

        #self.compute_data_service.wait()
        darelogger.debug(" Compute jobs for step %s complete" % step_id)

        #runtime = time.time()-starttime

        #all jobs done update status
        self.updater.update_status(this_su['dare_web_id'], "%s is Done" % this_su['name'])

        #self.step_run_lock.release()

    def has_finished(self, state):
        state = state.lower()
        if state == "done" or state == "failed" or state == "canceled":
            return True
        else:
            return False

    def quit(self, message=None):
        if message:
            darelogger.debug(message)
        darelogger.debug("Terminating steps")
        for step, thread  in self.step_threads.items():
            darelogger.debug("Stoppping step %s" % step)
            thread._Thread__stop()

        darelogger.debug("Terminating Pilot Compute/Data Service")
        try:
            self.compute_data_service.cancel()
            self.pilot_data_service.cancel()
            self.pilot_compute_service.cancel()
        except:
            pass