Exemplo n.º 1
0
 def __init__(self, pilot_data_service=None, pilot_data_description=None, pd_url=None):    
     """ 
         Initialize PilotData at given service url:
         
         ssh://<hostname>
         gsissh://<hostname>
         
         Currently only ssh schemes are supported. In the future all 
         SAGA URL schemes/adaptors should be supported.        
     """ 
     self.id = None
     self.url = None
     self.pilot_data_description = None
     self.service_url=None
     self.size = None
     self.data_unit_description = None
     self.data_units={}
     
     if pd_url==None and pilot_data_service!=None:      # new pd          
         self.id = self.PD_ID_PREFIX+str(uuid.uuid1())
         self.pilot_data_description = pilot_data_description
         self.url = CoordinationAdaptor.add_pd(CoordinationAdaptor.get_base_url(application_id)+"/"+pilot_data_service.id, self)
     elif pd_url != None:
         logger.warn("Reconnect to PilotData: %s"%pd_url)
         dictionary = CoordinationAdaptor.get_pd(pd_url)
         pd_dict = dictionary["pilot_data"]
         for i in pd_dict:
             self.__setattr__(i, pd_dict[i])
                     
     self.initialize_pilot_data()
Exemplo n.º 2
0
 def __create_remote_directory(self, target_url):
     #result = urlparse.urlparse(target_url)
     #target_host = result.netloc
     #target_path = result.path
     
     # Python 2.6 compatible URL parsing
     scheme = target_url[:target_url.find("://")+3]
     target_host = target_url[len(scheme):target_url.find("/", len(scheme))]
     target_path = target_url[len(scheme)+len(target_host):]    
     target_user = None
     if target_host.find("@")>1:
         comp = target_host.split("@")
         target_host =comp[1]
         target_user =comp[0]
     logger.debug("Create remote directory; scheme: %s, host: %s, path: %s"%(scheme, target_host, target_path))
     if scheme.startswith("fork") or target_host.startswith("localhost"):
         os.makedirs(target_path)
         return True
     else:
         try:
             client = self.__get_ssh_client(target_host, target_user)
             sftp = client.open_sftp()            
             sftp.mkdir(target_path)
             sftp.close()
             client.close()
             return True
         except:
             self.__print_traceback()	
             logger.warn("Error creating directory: " + str(target_path) 
                          + " at: " + str(target_host) + " SSH password-less login activated?" )
             return False
Exemplo n.º 3
0
 def __refresh(self):
     """ Update list of data units items 
         from coordination service """
     try:
         if self.url != None:
             du_dict = CoordinationAdaptor.get_du(self.url)
             data_unit_dict_list = eval(du_dict["data_unit_items"])
             self.data_unit_items = [DataUnitItem.create_data_unit_from_dict(i) for i in data_unit_dict_list]
     except:
         logger.warn("Refresh of DU %s failed"%(self.get_url()))
Exemplo n.º 4
0
 def __refresh(self):
     """ Update list of data units items 
         from coordination service """
     try:
         if self.url != None:
             du_dict = CoordinationAdaptor.get_du(self.url)
             data_unit_dict_list = eval(du_dict["data_unit_items"])
             self.data_unit_items = [
                 DataUnitItem.create_data_unit_from_dict(i)
                 for i in data_unit_dict_list
             ]
     except:
         logger.warn("Refresh of DU %s failed" % (self.get_url()))
Exemplo n.º 5
0
    def __init__(self,
                 pilot_data_service=None,
                 pilot_data_description=None,
                 pd_url=None):
        """ 
            Initialize PilotData at given service url::
            
                ssh://<hostname>
                gsissh://<hostname>
                go://<hostname>
                gs://google.com
                s3://aws.amazon.com
            
            In the future more SAGA/Bliss URL schemes/adaptors are supported.        
        """
        self.id = None
        self.url = pd_url
        self.pilot_data_description = None
        self.pilot_data_service = pilot_data_service
        self.service_url = None
        self.size = None
        self.data_unit_urls = []
        self.security_context = None

        if pd_url == None and pilot_data_service != None:  # new pd
            self.id = self.PD_ID_PREFIX + str(uuid.uuid1())
            self.pilot_data_description = pilot_data_description
            self.url = CoordinationAdaptor.add_pd(
                CoordinationAdaptor.get_base_url(application_id) + ":" +
                pilot_data_service.id, self)
        elif pd_url != None:
            logger.warn("Reconnect to PilotData: %s" % pd_url)
            dictionary = CoordinationAdaptor.get_pd(pd_url)
            if dictionary.has_key("security_context"):
                self.security_context = dictionary["security_context"]
            pd_dict = eval(dictionary["pilot_data"])
            for i in pd_dict:
                self.__setattr__(i, pd_dict[i])
            # A Pilot Data does not hold a direct reference to a Data Unit (only URL refs are stored)
            self.data_unit_urls = eval(dictionary["data_unit_urls"])

        self.__initialize_pilot_data()
        CoordinationAdaptor.update_pd(self)
Exemplo n.º 6
0
    def __init__(self, pilot_data_service=None, pilot_data_description=None, pd_url=None):
        """ 
            Initialize PilotData at given service url::
            
                ssh://<hostname>
                gsissh://<hostname>
                go://<hostname>
                gs://google.com
                s3://aws.amazon.com
            
            In the future more SAGA/Bliss URL schemes/adaptors are supported.        
        """
        self.id = None
        self.url = pd_url
        self.pilot_data_description = None
        self.pilot_data_service = pilot_data_service
        self.service_url = None
        self.size = None
        self.data_unit_urls = []
        self.security_context = None

        if pd_url == None and pilot_data_service != None:  # new pd
            self.id = self.PD_ID_PREFIX + str(uuid.uuid1())
            self.pilot_data_description = pilot_data_description
            self.url = CoordinationAdaptor.add_pd(
                CoordinationAdaptor.get_base_url(application_id) + ":" + pilot_data_service.id, self
            )
        elif pd_url != None:
            logger.warn("Reconnect to PilotData: %s" % pd_url)
            dictionary = CoordinationAdaptor.get_pd(pd_url)
            if dictionary.has_key("security_context"):
                self.security_context = dictionary["security_context"]
            pd_dict = eval(dictionary["pilot_data"])
            for i in pd_dict:
                self.__setattr__(i, pd_dict[i])
            # A Pilot Data does not hold a direct reference to a Data Unit (only URL refs are stored)
            self.data_unit_urls = eval(dictionary["data_unit_urls"])

        self.__initialize_pilot_data()
        CoordinationAdaptor.update_pd(self)
Exemplo n.º 7
0
 def __create_remote_directory(self, target_url):
     #result = urlparse.urlparse(target_url)
     #target_host = result.netloc
     #target_path = result.path
     
     # Python 2.6 compatible URL parsing
     scheme = target_url[:target_url.find("://")+3]
     target_host = target_url[len(scheme):target_url.find("/", len(scheme))]
     target_path = target_url[len(scheme)+len(target_host):]    
     if target_host == "localhost":
         os.makedirs(target_path)
     else:
         try:
             client = paramiko.SSHClient()
             client.load_system_host_keys()
             client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
             client.connect(target_host)
             sftp = client.open_sftp()            
             sftp.mkdir(target_path)
             sftp.close()
             client.close()
         except:
             logger.warn("Error creating directory: " + str(target_path) 
                          + " at: " + str(target_host) + " Already exists?" )
Exemplo n.º 8
0
#!/usr/bin/env python

import textwrap
import re

from bigjob import logger

try:
    import saga
except:
    logger.warn("pbs-ssh://<hostname> plugin not compatible with SAGA Bliss. Use pbs+ssh://<hostname>")

import os

class pbsssh:
    """Constructor"""
    def __init__(self,bootstrap_script,lrms_saga_url,walltime,nodes,ppn,userproxy,working_directory=None):
        self.job_id = ""
        self.lrms_saga_url = lrms_saga_url
        self.lrms_saga_url.scheme="ssh"
        self.userproxy = userproxy
        self.working_directory = ""
        if working_directory == None:
            self.working_directory = ""
        else:
            self.working_directory = working_directory

        self.bootstrap_script = textwrap.dedent("""import sys
import os
import urllib
import sys
Exemplo n.º 9
0
    pass 


# import other BigJob packages
# import API
import api.base
sys.path.append(os.path.dirname(__file__))


if SAGA_BLISS == False:
    try:
        import saga
        logger.info("Using SAGA C++/Python.")
        is_bliss=False
    except:
        logger.warn("SAGA C++ and Python bindings not found. Using Bliss.")
        try:
            import bliss.saga as saga
            is_bliss=True
        except:
            logger.warn("SAGA Bliss not found")
else:
    logger.info("Using SAGA Bliss.")
    try:
        import bliss.saga as saga
        is_bliss=True 
    except:
        logger.warn("SAGA Bliss not found")


"""BigJob Job Description is always derived from BLISS Job Description
Exemplo n.º 10
0
Set environment variable BIGJOB_HOME to installation directory
"""

import sys
from bigjob import logger
import time
import os
import traceback
import logging
import textwrap
import urlparse

try:
    import paramiko
except:
    logger.warn("Paramiko not found. Without Paramiko file staging is not supported!")

from bigjob import SAGA_BLISS 
from bigjob.state import Running, New, Failed, Done, Unknown

if SAGA_BLISS == False:
    try:
        import saga
        logger.debug("Using SAGA C++/Python.")
        is_bliss=False
    except:
        logger.warn("SAGA C++ and Python bindings not found. Using Bliss.")
        try:
            import bliss.sagacompat as saga
            is_bliss=True
        except:
Exemplo n.º 11
0
    def start_pilot_job(self,
                        lrms_url,
                        number_nodes=1,
                        queue=None,
                        project=None,
                        working_directory=None,
                        userproxy=None,
                        walltime=None,
                        processes_per_node=1,
                        filetransfers=None,
                        external_queue="",
                        pilot_compute_description=None):
        """ Start a batch job (using SAGA Job API) at resource manager. Currently, the following resource manager are supported:
            fork://localhost/ (Default Job Adaptor
            gram://qb1.loni.org/jobmanager-pbs (Globus Adaptor)
            pbspro://localhost (PBS Pro Adaptor)
        
        """
        if self.job != None:
            raise BigJobError(
                "One BigJob already active. Please stop BigJob first.")
            return

        ##############################################################################
        # initialization of coordination and communication subsystem
        # Communication & Coordination initialization
        lrms_saga_url = SAGAUrl(lrms_url)
        self.url = lrms_saga_url
        self.pilot_url = self.app_url + ":" + lrms_saga_url.host
        self.number_nodes = int(number_nodes) * int(processes_per_node)

        # Store references to BJ in global dict
        _pilot_url_dict[self.pilot_url] = self
        _pilot_url_dict[external_queue] = self

        logger.debug("create pilot job entry on backend server: " +
                     self.pilot_url)
        self.coordination.set_pilot_state(self.pilot_url, str(Unknown), False)
        self.coordination.set_pilot_description(self.pilot_url, filetransfers)
        logger.debug("set pilot state to: " + str(Unknown))

        ##############################################################################
        # Create Job Service (Default: SAGA Job Service, alternative Job Services supported)
        self.js = None
        if lrms_saga_url.scheme == "gce+ssh":
            self.js = GCEService(lrms_saga_url, pilot_compute_description)
        elif lrms_saga_url.scheme=="ec2+ssh" or lrms_saga_url.scheme=="euca+ssh" \
            or lrms_saga_url.scheme=="nova+ssh":
            self.js = EC2Service(lrms_saga_url, pilot_compute_description)
        else:
            self.js = SAGAJobService(lrms_saga_url)

        ##############################################################################
        # create job description
        jd = SAGAJobDescription()

        #  Attempt to create working directory (e.g. in local scenario)
        if working_directory != None:
            if not os.path.isdir(working_directory) \
                and (lrms_saga_url.scheme.startswith("fork") or lrms_saga_url.scheme.startswith("condor")) \
                and working_directory.startswith("go:")==False:
                os.mkdir(working_directory)
            self.working_directory = working_directory
        else:
            # if no working dir is set assume use home directory
            # will fail if home directory is not the same on remote machine
            # but this is just a guess to avoid failing
            #self.working_directory = os.path.expanduser("~")
            self.working_directory = ""

        if queue != None:
            jd.queue = queue
        if project != None:
            jd.project = project
        if walltime != None:
            if is_bliss:
                jd.wall_time_limit = int(walltime)
            else:
                jd.wall_time_limit = str(walltime)

        ##############################################################################
        # File Management and Stage-In
        # Determine whether target machine use gsissh or ssh to logon.
        # logger.debug("Detect launch method for: " + lrms_saga_url.host)
        # self.launch_method = self.__get_launch_method(lrms_saga_url.host,lrms_saga_url.username)
        self.bigjob_working_directory_url = ""
        if lrms_saga_url.scheme.startswith("gce") or lrms_saga_url.scheme.startswith("ec2")\
            or lrms_saga_url.scheme.startswith("euca") or lrms_saga_url.scheme.startswith("nova"):
            logger.debug(
                "File Staging for Cloud Instances currently not supported.")
        elif lrms_saga_url.scheme.startswith("condor") == True:
            logger.debug("Using Condor file staging")
        else:
            # build target url for working directory
            # this will also create the remote directory for the BJ
            # Fallback if working directory is not a valid URL
            if not (self.working_directory.startswith("go:")
                    or self.working_directory.startswith("ssh://")):
                if lrms_saga_url.username != None and lrms_saga_url.username != "":
                    self.bigjob_working_directory_url = "ssh://" + lrms_saga_url.username + "@" + lrms_saga_url.host + self.__get_bigjob_working_dir(
                    )
                else:
                    self.bigjob_working_directory_url = "ssh://" + lrms_saga_url.host + self.__get_bigjob_working_dir(
                    )
            elif self.working_directory.startswith("go:"):
                self.bigjob_working_directory_url = os.path.join(
                    self.working_directory, self.uuid)
            else:
                # working directory is a valid file staging URL
                self.bigjob_working_directory_url = self.working_directory

            # initialize file manager that takes care of file movement and directory creation
            if self.__filemanager == None:
                self.__initialize_pilot_data(
                    self.bigjob_working_directory_url)  # determines the url

            if self.__filemanager != None and not self.working_directory.startswith(
                    "/"):
                self.working_directory = self.__filemanager.get_path(
                    self.bigjob_working_directory_url)

            # determine working directory of bigjob
            # if a remote sandbox can be created via ssh => create a own dir for each bj job id
            # otherwise use specified working directory
            logger.debug("BigJob working directory: %s" %
                         self.bigjob_working_directory_url)
            if self.__filemanager != None and self.__filemanager.create_remote_directory(
                    self.bigjob_working_directory_url) == True:
                self.working_directory = self.__get_bigjob_working_dir()
                self.__stage_files(filetransfers,
                                   self.bigjob_working_directory_url)
            else:
                logger.warn("No file staging adaptor found.")

            logger.debug("BJ Working Directory: %s", self.working_directory)

        if lrms_saga_url.scheme.startswith("condor") == False:
            jd.working_directory = self.working_directory
        else:
            jd.working_directory = ""

        ##############################################################################
        # Create and process BJ bootstrap script
        bootstrap_script = self.__generate_bootstrap_script(
            self.coordination.get_address(),
            self.pilot_url,  # Queue 1 used by this BJ object 
            external_queue  # Queue 2 used by Pilot Compute Service 
            # or another external scheduler
        )
        logger.debug("Adaptor specific modifications: " +
                     str(lrms_saga_url.scheme))
        if is_bliss:
            bootstrap_script = self.__escape_bliss(bootstrap_script)
        else:
            if lrms_saga_url.scheme == "gram":
                bootstrap_script = self.__escape_rsl(bootstrap_script)
            elif lrms_saga_url.scheme == "pbspro" or lrms_saga_url.scheme == "xt5torque" or lrms_saga_url.scheme == "torque":
                bootstrap_script = self.__escape_pbs(bootstrap_script)
            elif lrms_saga_url.scheme == "ssh":
                bootstrap_script = self.__escape_ssh(bootstrap_script)
        logger.debug(bootstrap_script)

        # Define Agent Executable in Job description
        # in Condor case bootstrap script is staged
        # (Python app cannot be passed inline in Condor job description)
        if lrms_saga_url.scheme.startswith("condor") == True:

            condor_bootstrap_filename = os.path.join(
                "/tmp", "bootstrap-" + str(self.uuid))
            condor_bootstrap_file = open(condor_bootstrap_filename, "w")
            condor_bootstrap_file.write(bootstrap_script)
            condor_bootstrap_file.close()
            logger.debug("Using Condor - bootstrap file: " +
                         condor_bootstrap_filename)

            jd.executable = "/usr/bin/env"
            jd.arguments = [
                "python",
                os.path.basename(condor_bootstrap_filename)
            ]
            bj_file_transfers = []
            file_transfer_spec = condor_bootstrap_filename + " > " + os.path.basename(
                condor_bootstrap_filename)
            bj_file_transfers.append(file_transfer_spec)
            output_file_name = "output-" + str(self.uuid) + ".tar.gz"
            output_file_transfer_spec = os.path.join(
                self.working_directory,
                output_file_name) + " < " + output_file_name
            #output_file_transfer_spec = os.path.join(self.working_directory, "output.tar.gz") +" < output.tar.gz"
            logger.debug("Output transfer: " + output_file_transfer_spec)
            bj_file_transfers.append(output_file_transfer_spec)
            if filetransfers != None:
                for t in filetransfers:
                    bj_file_transfers.append(t)
            logger.debug("Condor file transfers: " + str(bj_file_transfers))
            jd.file_transfer = bj_file_transfers
        else:
            if is_bliss:
                jd.total_cpu_count = int(number_nodes)
            else:
                jd.number_of_processes = str(number_nodes)
                jd.processes_per_host = str(processes_per_node)
            jd.spmd_variation = "single"
            jd.arguments = ["python", "-c", bootstrap_script]
            jd.executable = "/usr/bin/env"

        logger.debug("Working directory: " + jd.working_directory)

        jd.output = os.path.join(self.working_directory,
                                 "stdout-" + self.uuid + "-agent.txt")
        jd.error = os.path.join(self.working_directory,
                                "stderr-" + self.uuid + "-agent.txt")

        ##############################################################################
        # Create and submit pilot job to job service
        logger.debug("Creating pilot job with description: %s" % str(jd))
        self.job = self.js.create_job(jd)
        logger.debug("Submit pilot job to: " + str(lrms_saga_url))
        self.job.run()
        return self.pilot_url
Exemplo n.º 12
0
import threading
import time
import pdb
import Queue

import saga
sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from bigjob import logger

from pilot.api import PilotData, DataUnit, PilotDataService
from pilot.api import State
from pilot.filemanagement.ssh_adaptor import SSHFileAdaptor 
try:
    from pilot.filemanagement.webhdfs_adaptor import WebHDFSFileAdaptor
except:
    logger.warn("WebHDFS package not found.") 
from pilot.coordination.advert import AdvertCoordinationAdaptor as CoordinationAdaptor


# generate global application id for this instance
#application_id = str(uuid.uuid1())
application_id = "bigdata"

class PilotData(PilotData):
    """ PilotData. 
    
        Reserves a space of physical storage on the resource specified in the pilot_data_description
    """   
    
    PD_ID_PREFIX="pd-"   
Exemplo n.º 13
0
#!/usr/bin/env python

import textwrap
import re
import os
import pdb

from bigjob import logger
import bigjob

try:
    import bliss.saga as saga
except:
    logger.warn("slurm+ssh://<hostname> plugin not compatible with SAGA Bliss. Use slurm+ssh://<hostname>")
    
    

    
class Service(object):
    """ Plugin for SlURM    """ 

    def __init__(self, resource_url, pilot_compute_description=None):
        """Constructor"""
        self.resource_url = resource_url
        self.pilot_compute_description = pilot_compute_description
            
    
    def create_job(self, job_description):
        j = Job(job_description, self.resource_url, self.pilot_compute_description)
        return j
            
Exemplo n.º 14
0
except:
    pass

# import other BigJob packages
# import API
import api.base

sys.path.append(os.path.dirname(__file__))

if SAGA_BLISS == False:
    try:
        import saga
        logger.info("Using SAGA C++/Python.")
        is_bliss = False
    except:
        logger.warn("SAGA C++ and Python bindings not found. Using Bliss.")
        try:
            import bliss.saga as saga
            is_bliss = True
        except:
            logger.warn("SAGA Bliss not found")
else:
    logger.info("Using SAGA Bliss.")
    try:
        import bliss.saga as saga
        is_bliss = True
    except:
        logger.warn("SAGA Bliss not found")
"""BigJob Job Description is always derived from BLISS Job Description
   BLISS Job Description behaves compatible to SAGA C++ job description
"""
Exemplo n.º 15
0
#!/usr/bin/env python

import textwrap
import re

from bigjob import logger
import bigjob

try:
    import saga
except:
    logger.warn("sge-ssh://<hostname> plugin not compatible with SAGA Bliss.")

import os

class sgessh:
    """Constructor"""
    def __init__(self, bootstrap_script, lrms_saga_url, walltime, number_nodes, processes_per_node, userproxy, project, queue, working_directory=None, bj_working_directory=None):
        self.job_id = ""
        self.lrms_saga_url = lrms_saga_url
        self.lrms_saga_url.scheme="ssh"
        self.userproxy = userproxy
        self.working_directory = ""
        if working_directory == None:
            self.working_directory = ""
        else:
            self.working_directory = working_directory
        if bj_working_directory==None:
            bj_working_directory=self.working_directory
        ### convert walltime in minutes to PBS representation of time ###
        walltime_sge="1:00:00"
Exemplo n.º 16
0
    def __init__(self, args):

        self.coordination_url = args[1]
        # objects to store running jobs and processes
        self.jobs = []
        self.processes = {}
        self.freenodes = []
        self.busynodes = []
        self.restarted = {}

        # read config file
        conf_file = os.path.dirname(
            os.path.abspath(__file__)) + "/../" + CONFIG_FILE
        if not os.path.exists(conf_file):
            conf_file = os.path.join(sys.prefix, CONFIG_FILE)
        logging.debug("read configfile: " + conf_file)
        config = ConfigParser.ConfigParser()
        config.read(conf_file)
        default_dict = config.defaults()
        self.CPR = False
        if default_dict.has_key("cpr"):
            self.CPR = default_dict["cpr"]
        self.SHELL = "/bin/bash"
        if default_dict.has_key("shell"):
            self.SHELL = default_dict["shell"]
        self.MPIRUN = "mpirun"
        # On TACC resources the default MPICH is
        # linked under mpirun_rsh
        if default_dict.has_key("mpirun"):
            self.MPIRUN = default_dict["mpirun"]

        if default_dict.has_key("number_executor_threads"):
            THREAD_POOL_SIZE = int(default_dict["number_executor_threads"])

        self.OUTPUT_TAR = False
        if default_dict.has_key("create_output_tar"):
            self.OUTPUT_TAR = eval(default_dict["create_output_tar"])
            logger.debug("Create output tar: %r", self.OUTPUT_TAR)

        self.failed_polls = 0

        ##############################################################################
        # initialization of coordination and communication subsystem
        # Redis initialization
        self.base_url = args[2]
        self.cds_queue_url = None
        if len(args) == 4:
            self.cds_queue_url = args[3]
        logger.debug("External queue: " + str(self.cds_queue_url))
        self.id = self.__get_bj_id(self.base_url)
        logger.debug("BigJob Agent arguments: " + str(args))
        logger.debug("Initialize C&C subsystem to pilot-url: " + self.base_url)
        logger.debug("BigJob ID: %s" % self.id)

        # create bj directory
        self.work_dir = os.getcwd()
        if self.work_dir.find(
                self.id) == -1:  # working directory already contains BJ id
            self.bj_dir = os.path.join(os.getcwd(), self.id)
            logger.debug("Agent working directory: %s" % self.bj_dir)
            try:
                os.makedirs(self.bj_dir)
            except:
                logger.debug("Directory already exists.")
        else:
            self.bj_dir = os.getcwd()

        os.chdir(self.bj_dir)

        if (self.coordination_url.startswith("advert://")
                or self.coordination_url.startswith("sqlasyncadvert://")):
            try:
                from coordination.bigjob_coordination_advert import bigjob_coordination
                logging.debug("Utilizing ADVERT Backend: " +
                              self.coordination_url)
            except:
                logger.error("Advert Backend could not be loaded")
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_exc(file=sys.stderr)
                traceback.print_tb(exc_traceback, file=sys.stderr)
        elif (self.coordination_url.startswith("redis://")):
            try:
                from coordination.bigjob_coordination_redis import bigjob_coordination
                logger.debug("Utilizing Redis Backend: " +
                             self.coordination_url + ".")
            except:
                logger.error(
                    "Error loading pyredis. Check configuration in bigjob_coordination_redis.py."
                )
        elif (self.coordination_url.startswith("tcp://")):
            try:
                from coordination.bigjob_coordination_zmq import bigjob_coordination
                logger.debug("Utilizing ZMQ Backend")
            except:
                logger.error(
                    "ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and "
                    + "PYZMQ (http://zeromq.github.com/pyzmq/)")

        ###
        # Initiate coordination sub-system of both BJ agent and Pilot Data
        self.coordination = bigjob_coordination(
            server_connect_url=self.coordination_url)
        try:
            # initialize coordination subsystem of pilot data
            self.pilot_data_service = PilotDataService(
                coordination_url=self.coordination_url)
        except:
            logger.warn("Pilot-Data could not be initialized.")

        # update state of pilot job to running
        logger.debug("set state to : " + str(bigjob.state.Running))
        self.coordination.set_pilot_state(self.base_url,
                                          str(bigjob.state.Running), False)
        self.pilot_description = self.coordination.get_pilot_description(
            self.base_url)
        try:
            self.pilot_description = ast.literal_eval(self.pilot_description)
        except:
            logger.warn("Unable to parse pilot description")
            self.pilot_description = None

        ############################################################################
        # Detect launch method
        self.LAUNCH_METHOD = "ssh"
        if default_dict.has_key("launch_method"):
            self.LAUNCH_METHOD = default_dict["launch_method"]

        self.LAUNCH_METHOD = self.__get_launch_method(self.LAUNCH_METHOD)

        logging.debug("Launch Method: " + self.LAUNCH_METHOD + " mpi: " +
                      self.MPIRUN + " shell: " + self.SHELL)

        # init rms (SGE/PBS)
        self.init_rms()

        ##############################################################################
        # start background thread for polling new jobs and monitoring current jobs
        # check whether user requested a certain threadpool size
        if self.pilot_description != None and self.pilot_description.has_key(
                "number_executor_threads"):
            THREAD_POOL_SIZE = int(
                self.pilot_description["number_executor_threads"])
        logger.debug("Creating executor thread pool of size: %d" %
                     (THREAD_POOL_SIZE))
        self.resource_lock = threading.RLock()
        self.threadpool = ThreadPool(THREAD_POOL_SIZE)

        self.launcher_thread = threading.Thread(target=self.dequeue_new_jobs)
        self.launcher_thread.start()

        self.monitoring_thread = threading.Thread(
            target=self.start_background_thread)
        self.monitoring_thread.start()
Exemplo n.º 17
0
    def start_pilot_job(self, 
                 lrms_url, 
                 bigjob_agent_executable=None,
                 number_nodes=1,
                 queue=None,
                 project=None,
                 working_directory=None,
                 userproxy=None,
                 walltime=None,
                 processes_per_node=1,
                 filetransfers=None):
        """ Start a batch job (using SAGA Job API) at resource manager. Currently, the following resource manager are supported:
            fork://localhost/ (Default Job Adaptor
            gram://qb1.loni.org/jobmanager-pbs (Globus Adaptor)
            pbspro://localhost (PBS Prop Adaptor)
        
        """
         
        if self.job != None:
            raise BigJobError("One BigJob already active. Please stop BigJob first.") 
            return

        ##############################################################################
        # initialization of coordination and communication subsystem
        # Communication & Coordination initialization
        lrms_saga_url = saga.url(lrms_url)
        self.pilot_url = self.app_url + ":" + lrms_saga_url.host
        pilot_url_dict[self.pilot_url]=self
        
        logger.debug("create pilot job entry on backend server: " + self.pilot_url)
        self.coordination.set_pilot_state(self.pilot_url, str(Unknown), False)
                
        logger.debug("set pilot state to: " + str(Unknown))
        ##############################################################################
        
        self.number_nodes=int(number_nodes)        
        
        # create job description
        jd = saga.job.description()
        
        # XXX Isn't the working directory about the remote site?
        # Yes, it is: This is to make sure that if fork
        if working_directory != None:
            if not os.path.isdir(working_directory) and lrms_saga_url.scheme=="fork":
                os.mkdir(working_directory)
            self.working_directory = working_directory
        else:
            # if no working dir is set assume use home directory
            # will fail if home directory is not the same on remote machine
            # but this is just a guess to avoid failing
            self.working_directory = os.path.expanduser("~") 
            
        # Stage BJ Input files
        # build target url
        # this will also create the remote directory for the BJ
        if lrms_saga_url.username!=None and lrms_saga_url.username!="":
            bigjob_working_directory_url = "ssh://" + lrms_saga_url.username + "@" + lrms_saga_url.host + self.__get_bigjob_working_dir()
        else:
            bigjob_working_directory_url = "ssh://" + lrms_saga_url.host + self.__get_bigjob_working_dir()
        
        # determine working directory of bigjob 
        # if a remote sandbox can be created via ssh => create a own dir for each bj job id
        # otherwise use specified working directory
        if self.__create_remote_directory(bigjob_working_directory_url)==True:
            self.working_directory = self.__get_bigjob_working_dir()
            self.__stage_files(filetransfers, bigjob_working_directory_url)
        else:        
            logger.warn("For file staging. SSH (incl. password-less authentication is required.")
         
        logger.debug("BJ Working Directory: %s", self.working_directory)
        logger.debug("Adaptor specific modifications: "  + str(lrms_saga_url.scheme))
        if lrms_saga_url.scheme == "condorg":
            jd.arguments = [ self.coordination.get_address(), self.pilot_url]
            agent_exe = os.path.abspath(os.path.join(os.path.dirname(__file__),"..","bootstrap","bigjob-condor-bootstrap.py"))
            logger.debug("agent_exe",agent_exe)
            jd.executable = agent_exe             
        else:
            bootstrap_script = self.generate_bootstrap_script(self.coordination.get_address(), self.pilot_url)
            if lrms_saga_url.scheme == "gram":
                bootstrap_script = self.escape_rsl(bootstrap_script)
            elif lrms_saga_url.scheme == "pbspro" or lrms_saga_url.scheme=="xt5torque" or lrms_saga_url.scheme=="torque":                
                bootstrap_script = self.escape_pbs(bootstrap_script)
            elif lrms_saga_url.scheme == "ssh":
                bootstrap_script = self.escape_ssh(bootstrap_script)
            ############ submit pbs script which launches bigjob agent using ssh adaptors########## 
            elif lrms_saga_url.scheme == "pbs-ssh":
                bootstrap_script = self.escape_ssh(bootstrap_script)
                # PBS specific BJ plugin
                pbssshj = pbsssh(bootstrap_script, lrms_saga_url, walltime, number_nodes, 
                                 processes_per_node, userproxy, self.working_directory, self.working_directory)
                self.job = pbssshj
                self.job.run()
                return
            ############ submit sge script which launches bigjob agent using ssh adaptors########## 
            elif lrms_saga_url.scheme == "sge-ssh":
                bootstrap_script = self.escape_ssh(bootstrap_script)
                # PBS specific BJ plugin
                sgesshj = sgessh(bootstrap_script, lrms_saga_url, walltime, number_nodes, 
                                 processes_per_node, userproxy, project, queue, self.working_directory, self.working_directory)
                self.job = sgesshj
                self.job.run()
                return
            elif is_bliss:
                bootstrap_script = self.escape_bliss(bootstrap_script)

            #logger.debug(bootstrap_script)
            if is_bliss==False:
                jd.number_of_processes = str(number_nodes)
                jd.processes_per_host=str(processes_per_node)
            else:
                jd.TotalCPUCount=str(int(number_nodes)*int(processes_per_node))
                
            jd.spmd_variation = "single"
            #jd.arguments = [bigjob_agent_executable, self.coordination.get_address(), self.pilot_url]
            jd.arguments = ["python", "-c", bootstrap_script]
            jd.executable = "/usr/bin/env"
            if queue != None:
                jd.queue = queue
            if project !=None:
                jd.job_project = [project]
            if walltime!=None:
                jd.wall_time_limit=str(walltime)
        
            jd.working_directory = self.working_directory
    
            logger.debug("Working directory: " + jd.working_directory)
            jd.output = os.path.join(self.working_directory, "stdout-bigjob_agent.txt")
            jd.error = os.path.join(self.working_directory,"stderr-bigjob_agent.txt")
          
           
        # Submit job
        js = None    
        if userproxy != None and userproxy != '':
            s = saga.session()
            os.environ["X509_USER_PROXY"]=userproxy
            ctx = saga.context("x509")
            ctx.set_attribute ("UserProxy", userproxy)
            s.add_context(ctx)
            logger.debug("use proxy: " + userproxy)
            js = saga.job.service(s, lrms_saga_url)
        else:
            logger.debug("use standard proxy")
            js = saga.job.service(lrms_saga_url)

        logger.debug("Creating pilot job with description: %s" % str(jd))
              

        self.job = js.create_job(jd)
        logger.debug("Submit pilot job to: " + str(lrms_saga_url))
        self.job.run()
        return self.pilot_url
Exemplo n.º 18
0
import tldextract
tldextract.tldextract.LOG.setLevel(logging.WARNING)

from pilot.api.api import PilotError

sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from bigjob import logger
from pilot.api import PilotData, DataUnit, PilotDataService, State


""" Load file management adaptors """
from pilot.filemanagement.ssh_adaptor import SSHFileAdaptor 
try:
    from pilot.filemanagement.webhdfs_adaptor import WebHDFSFileAdaptor
except:
    logger.warn("WebHDFS package not found.") 
try:
    from pilot.filemanagement.globusonline_adaptor import GlobusOnlineFileAdaptor
except:
    logger.warn("Globus Online package not found.") 
    
try:
    from pilot.filemanagement.gs_adaptor import GSFileAdaptor
except:
    logger.warn("Goggle Storage package not found.") 


try:
    from pilot.filemanagement.s3_adaptor import S3FileAdaptor
except:
    logger.warn("Amazon S3 package not found.") 
Exemplo n.º 19
0
    def start_pilot_job(self, 
                 lrms_url, 
                 number_nodes=1,
                 queue=None,
                 project=None,
                 working_directory=None,
                 userproxy=None,
                 walltime=None,
                 processes_per_node=1,
                 filetransfers=None,
                 external_queue="",
                 pilot_compute_description=None):
        """ Start a batch job (using SAGA Job API) at resource manager. Currently, the following resource manager are supported:
            fork://localhost/ (Default Job Adaptor
            gram://qb1.loni.org/jobmanager-pbs (Globus Adaptor)
            pbspro://localhost (PBS Pro Adaptor)
        
        """         
        if self.job != None:
            raise BigJobError("One BigJob already active. Please stop BigJob first.") 
            return

        ##############################################################################
        # initialization of coordination and communication subsystem
        # Communication & Coordination initialization
        lrms_saga_url = SAGAUrl(lrms_url)
        self.url = lrms_saga_url
        self.pilot_url = self.app_url + ":" + lrms_saga_url.host
        self.number_nodes=int(number_nodes)*int(processes_per_node)  
        
        # Store references to BJ in global dict
        _pilot_url_dict[self.pilot_url]=self
        _pilot_url_dict[external_queue]=self
        
        logger.debug("create pilot job entry on backend server: " + self.pilot_url)
        self.coordination.set_pilot_state(self.pilot_url, str(Unknown), False)
        if pilot_compute_description==None:
            pilot_compute_description={"service_url": lrms_url, 
                                       "number_of_processes": number_nodes, 
                                       "processes_per_node": processes_per_node,
                                       "working_directory": working_directory}
        self.coordination.set_pilot_description(self.pilot_url, pilot_compute_description)    
        logger.debug("set pilot state to: " + str(Unknown))

        # Create Job Service (Default: SAGA Job Service, alternative Job Services supported)
        self.js =None
        if lrms_saga_url.scheme=="gce+ssh":
            self.js = GCEService(lrms_saga_url, pilot_compute_description) 
        elif lrms_saga_url.scheme=="ec2+ssh" or lrms_saga_url.scheme=="euca+ssh" \
            or lrms_saga_url.scheme=="nova+ssh":
            self.js = EC2Service(lrms_saga_url, pilot_compute_description)    
        elif lrms_saga_url.scheme=="slurm+ssh":
            self.js = SlurmService(lrms_saga_url, pilot_compute_description)          
        else:
            self.js = SAGAJobService(lrms_saga_url)        
        ##############################################################################
        # create job description
        jd = SAGAJobDescription()
        
        #  Attempt to create working directory (e.g. in local scenario)
        if working_directory != None:
            if not os.path.isdir(working_directory) \
                and (lrms_saga_url.scheme.startswith("fork") or lrms_saga_url.scheme.startswith("condor")) \
                and working_directory.startswith("go:")==False:
                os.mkdir(working_directory)
            self.working_directory = working_directory
        else:
            # if no working dir is set assume use home directory
            # will fail if home directory is not the same on remote machine
            # but this is just a guess to avoid failing
            #self.working_directory = os.path.expanduser("~")
            self.working_directory = "" 
        
        if queue != None:
            jd.queue = queue
        if project !=None:
            jd.project=project       
        if walltime!=None:
            logger.debug("setting walltime to: " + str(walltime))
            if is_bliss:
                jd.wall_time_limit=int(walltime)
            else:
                jd.wall_time_limit=str(walltime)
    
        
        ##############################################################################
        # File Management and Stage-In            
        # Determine whether target machine use gsissh or ssh to logon.
        # logger.debug("Detect launch method for: " + lrms_saga_url.host)        
        # self.launch_method = self.__get_launch_method(lrms_saga_url.host,lrms_saga_url.username)
        self.bigjob_working_directory_url=""
        if lrms_saga_url.scheme.startswith("gce") or lrms_saga_url.scheme.startswith("ec2")\
            or lrms_saga_url.scheme.startswith("euca") or lrms_saga_url.scheme.startswith("nova"):
            logger.debug("File Staging for Cloud Instances currently not supported.")
        elif lrms_saga_url.scheme.startswith("condor") == True:
            logger.debug("Using Condor file staging")
        else:           
            # build target url for working directory
            # this will also create the remote directory for the BJ
            # Fallback if working directory is not a valid URL
            if not (self.working_directory.startswith("go:") or self.working_directory.startswith("ssh://")):            
                if lrms_saga_url.username!=None and lrms_saga_url.username!="":
                    self.bigjob_working_directory_url = "ssh://" + lrms_saga_url.username + "@" + lrms_saga_url.host + "/" + self.__get_bigjob_working_dir()
                else:
                    self.bigjob_working_directory_url = "ssh://" + lrms_saga_url.host + "/" + self.__get_bigjob_working_dir()
            elif self.working_directory.startswith("go:"):
                    self.bigjob_working_directory_url=os.path.join(self.working_directory, self.uuid)
            else:
                # working directory is a valid file staging URL
                self.bigjob_working_directory_url=self.working_directory            
                
            # initialize file manager that takes care of file movement and directory creation
            if self.__filemanager==None:
                self.__initialize_pilot_data(self.bigjob_working_directory_url) # determines the url
            
            if self.__filemanager != None and not self.working_directory.startswith("/"):
                self.working_directory = self.__filemanager.get_path(self.bigjob_working_directory_url)
            
            # determine working directory of bigjob 
            # if a remote sandbox can be created via ssh => create a own dir for each bj job id
            # otherwise use specified working directory
            logger.debug("BigJob working directory: %s"%self.bigjob_working_directory_url)
            if self.__filemanager!=None and self.__filemanager.create_remote_directory(self.bigjob_working_directory_url)==True:
                self.working_directory = self.__get_bigjob_working_dir()
                self.__stage_files(filetransfers, self.bigjob_working_directory_url)
            else:        
                logger.warn("No file staging adaptor found.")
            
            logger.debug("BJ Working Directory: %s", self.working_directory)      
        
        if lrms_saga_url.scheme.startswith("condor")==False:
            jd.working_directory = self.working_directory
        else:
            jd.working_directory=""
        
        
        
        ##############################################################################
        # Create and process BJ bootstrap script
        bootstrap_script = self.__generate_bootstrap_script(
                                                          self.coordination.get_address(), 
                                                          self.pilot_url, # Queue 1 used by this BJ object 
                                                          external_queue  # Queue 2 used by Pilot Compute Service 
                                                                          # or another external scheduler
                                                          )
        logger.debug("Adaptor specific modifications: "  + str(lrms_saga_url.scheme))

        if is_bliss and lrms_saga_url.scheme.startswith("condor")==False:
            bootstrap_script = self.__escape_bliss(bootstrap_script)
        else:
            if lrms_saga_url.scheme == "gram":
                bootstrap_script = self.__escape_rsl(bootstrap_script)
            elif lrms_saga_url.scheme == "pbspro" or lrms_saga_url.scheme=="xt5torque" or lrms_saga_url.scheme=="torque":                
                bootstrap_script = self.__escape_pbs(bootstrap_script)
            elif lrms_saga_url.scheme == "ssh" and lrms_saga_url.scheme == "slurm+ssh":
                bootstrap_script = self.__escape_ssh(bootstrap_script)                    
                
        logger.debug(bootstrap_script)
        
        
        # Define Agent Executable in Job description
        # in Condor case bootstrap script is staged 
        # (Python app cannot be passed inline in Condor job description)
        if lrms_saga_url.scheme.startswith("condor")==True:

            bootstrap_script = self.__generate_bootstrap_script_from_binary(
                                                          self.coordination.get_address(), 
                                                          self.pilot_url, # Queue 1 used by this BJ object 
                                                          external_queue  # Queue 2 used by Pilot Compute Service 
                                                                          # or another external scheduler
                                                          )
     
            condor_bootstrap_filename = os.path.join("/tmp", "bootstrap-"+str(self.uuid))
            condor_bootstrap_file = open(condor_bootstrap_filename, "w")
            condor_bootstrap_file.write(bootstrap_script)
            condor_bootstrap_file.close()
            logger.debug("Using Condor - bootstrap file: " + condor_bootstrap_filename)
           
            jd.executable = "/usr/bin/env"
            jd.arguments = ["python",  os.path.basename(condor_bootstrap_filename)]                
            if pilot_compute_description.has_key("candidate_hosts"):
                jd.candidate_hosts = pilot_compute_description["candidate_hosts"]
            bj_file_transfers = []
            file_transfer_spec = condor_bootstrap_filename + " > " + os.path.basename(condor_bootstrap_filename)
            bj_file_transfers.append(file_transfer_spec)
            output_file_name = "output-" + str(self.uuid) + ".tar.gz"
            #output_file_transfer_spec = os.path.join(self.working_directory, output_file_name) +" < " + output_file_name
            output_file_transfer_spec = output_file_name +" < " + output_file_name
            #output_file_transfer_spec = os.path.join(self.working_directory, "output.tar.gz") +" < output.tar.gz"
            #logger.debug("Output transfer: " + output_file_transfer_spec)
            #bj_file_transfers.append(output_file_transfer_spec)
            if filetransfers != None:
                for t in filetransfers:
                    bj_file_transfers.append(t)
            logger.debug("Condor file transfers: " + str(bj_file_transfers))
            jd.file_transfer = bj_file_transfers
        else:
            if is_bliss:
                jd.total_cpu_count=int(number_nodes)                   
            else:
                jd.number_of_processes=str(number_nodes)
                jd.processes_per_host=str(processes_per_node)
            jd.spmd_variation = "single"
            if pilot_compute_description!=None and pilot_compute_description.has_key("spmd_variation"):
                jd.spmd_variation=pilot_compute_description["spmd_variation"]
            jd.arguments = ["python", "-c", bootstrap_script]
            jd.executable = "/usr/bin/env"           
      
        logger.debug("Working directory: " + jd.working_directory + " Job Description: " + str(jd))
        
        jd.output = os.path.join(self.working_directory, "stdout-" + self.uuid + "-agent.txt")
        jd.error = os.path.join(self.working_directory, "stderr-" + self.uuid + "-agent.txt")
      
        ##############################################################################
        # Create and submit pilot job to job service
        logger.debug("Creating pilot job with description: %s" % str(jd))
        self.job = self.js.create_job(jd)
        logger.debug("Submit pilot job to: " + str(lrms_saga_url))
        self.job.run()
        return self.pilot_url
Exemplo n.º 20
0
    def __init__(self, args):
        
        self.coordination_url = args[1]
        # objects to store running jobs and processes
        self.jobs = []
        self.processes = {}
        self.freenodes = []
        self.busynodes = []
        self.restarted = {}

        # read config file
        conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/../" + CONFIG_FILE
        if not os.path.exists(conf_file):
            conf_file = os.path.join(sys.prefix, CONFIG_FILE)
        logging.debug ("read configfile: " + conf_file)
        config = ConfigParser.ConfigParser()
        config.read(conf_file)
        default_dict = config.defaults()        
        self.CPR=False
        if default_dict.has_key("cpr"):
            self.CPR = default_dict["cpr"]
        self.SHELL="/bin/bash"
        if default_dict.has_key("shell"):
            self.SHELL=default_dict["shell"]
        self.MPIRUN="mpirun"
        # On TACC resources the default MPICH is 
        # linked under mpirun_rsh
        if default_dict.has_key("mpirun"):
            self.MPIRUN=default_dict["mpirun"]
        self.OUTPUT_TAR=False
        if default_dict.has_key("create_output_tar"):
            self.OUTPUT_TAR=eval(default_dict["create_output_tar"])
            logger.debug("Create output tar: %r", self.OUTPUT_TAR)
        
        self.LAUNCH_METHOD="ssh"                    
        if default_dict.has_key("launch_method"):
            self.LAUNCH_METHOD=self.__get_launch_method(default_dict["launch_method"])
        
        logging.debug("Launch Method: " + self.LAUNCH_METHOD + " mpi: " + self.MPIRUN + " shell: " + self.SHELL)
        
        # init rms (SGE/PBS)
        self.init_rms()
        self.failed_polls = 0
        
        ##############################################################################
        # initialization of coordination and communication subsystem
        # Redis initialization
        self.base_url = args[2]
        self.cds_queue_url = None
        if len(args)==4:
            self.cds_queue_url = args[3]
        logger.debug("External queue: " + str(self.cds_queue_url))
        self.id = self.__get_bj_id(self.base_url)
        logger.debug("BigJob Agent arguments: " + str(args))
        logger.debug("Initialize C&C subsystem to pilot-url: " + self.base_url)
        logger.debug("BigJob ID: %s"%self.id)
        
        # create bj directory
        self.work_dir = os.getcwd()
        if self.work_dir.find(self.id)==-1: # working directory already contains BJ id
            self.bj_dir = os.path.join(os.getcwd(), self.id)
            logger.debug("Agent working directory: %s"%self.bj_dir)
            try:
                os.makedirs(self.bj_dir)
            except:
                logger.debug("Directory already exists.")
        else:
            self.bj_dir = os.getcwd()
        
        os.chdir(self.bj_dir)
        
        if(self.coordination_url.startswith("advert://") or self.coordination_url.startswith("sqlasyncadvert://")):
            try:
                from coordination.bigjob_coordination_advert import bigjob_coordination
                logging.debug("Utilizing ADVERT Backend: " + self.coordination_url)
            except:
                logger.error("Advert Backend could not be loaded")
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_exc(file=sys.stderr)
                traceback.print_tb(exc_traceback, file=sys.stderr)
        elif (self.coordination_url.startswith("redis://")):
            try:
                from coordination.bigjob_coordination_redis import bigjob_coordination      
                logger.debug("Utilizing Redis Backend: " + self.coordination_url + ". Please make sure Redis server is configured in bigjob_coordination_redis.py")
            except:
                logger.error("Error loading pyredis.")
        elif (self.coordination_url.startswith("tcp://")):
            try:
                from coordination.bigjob_coordination_zmq import bigjob_coordination
                logger.debug("Utilizing ZMQ Backend")
            except:
                logger.error("ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " 
                      +"PYZMQ (http://zeromq.github.com/pyzmq/)")

        ###
        # Initiate coordination sub-system of both BJ agent and Pilot Data
        self.coordination = bigjob_coordination(server_connect_url=self.coordination_url)
        try:
            # initialize coordination subsystem of pilot data
            self.pilot_data_service = PilotDataService(coordination_url=self.coordination_url)
        except:
            logger.warn("Pilot-Data could not be initialized.")
            
        # update state of pilot job to running
        logger.debug("set state to : " +  str(bigjob.state.Running))
        self.coordination.set_pilot_state(self.base_url, str(bigjob.state.Running), False)
        self.pilot_description = self.coordination.get_pilot_description(self.base_url)
        
        ##############################################################################
        # start background thread for polling new jobs and monitoring current jobs
        self.resource_lock=threading.RLock()
        self.threadpool = ThreadPool(THREAD_POOL_SIZE)
        
        self.launcher_thread=threading.Thread(target=self.dequeue_new_jobs)
        self.launcher_thread.start()
        
        self.monitoring_thread=threading.Thread(target=self.start_background_thread)
        self.monitoring_thread.start()
Exemplo n.º 21
0
import random
import threading
import time
import pdb
import Queue
from pilot.api.api import PilotError

sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from bigjob import logger
from pilot.api import PilotData, DataUnit, PilotDataService, State
""" Load file management adaptors """
from pilot.filemanagement.ssh_adaptor import SSHFileAdaptor
try:
    from pilot.filemanagement.webhdfs_adaptor import WebHDFSFileAdaptor
except:
    logger.warn("WebHDFS package not found.")
try:
    from pilot.filemanagement.globusonline_adaptor import GlobusOnlineFileAdaptor
except:
    logger.warn("Globus Online package not found.")

try:
    from pilot.filemanagement.gs_adaptor import GSFileAdaptor
except:
    logger.warn("Goggle Storage package not found.")

try:
    from pilot.filemanagement.s3_adaptor import S3FileAdaptor
except:
    logger.warn("Amazon S3 package not found.")