Exemplo n.º 1
0
    def runWorker(self, params, dxsock, client_address):
        #buffer = AsynchronousSocketWorkerBuffer(sock)       #SocketWorkerBuffer(sock)
        #print "runWorker: HDescriptors:", params.HDescriptors
        buffer = SocketWorkerBuffer(dxsock, params.HDescriptors, log=self.log)
        module_name = "%s_%s" % (params.WorkerModuleName, os.getpid())
        worker_module = sys.modules.get(module_name)
        module_path = None

        #print "worker text=", params.WorkerText

        if worker_module is None:
            module_path = "%s/%s.py" % (self.ModuleStorage, module_name)
            open(module_path, "w").write(params.WorkerText)
            #worker_module = sandbox_import_module(module_name, ["Worker"])
            worker_module = __import__(module_name, {}, {}, ["Worker"])

        self.WorkerModules[module_name] = time.time()

        #reload(worker_module)
        worker_class = worker_module.Worker
        dataset_name = params.DatasetName
        rgids = params.RGIDs
        wid = params.WID
        user_params = params.UserParams
        use_data_cache = params.UseDataCache
        jid = params.JID

        self.log("request from %s (%s): jid/wid=%s/%s, dataset=%s, %d frames" %
                 (client_address, gethostbyaddr(client_address[0])[0], jid,
                  wid, dataset_name, len(rgids)))

        data_mod_client = None
        if params.DataModURL is not None and params.DataModToken is not None:
            data_mod_client = StripedClient(
                params.DataModURL, data_modification_token=params.DataModToken)
        worker = WorkerDriver(jid, wid, self.Client, worker_class,
                              dataset_name, rgids, params.NWorkers, buffer,
                              user_params, use_data_cache, data_mod_client,
                              self.log)
        nevents = worker.run()
        self.log("jid/wid=%s/%s: worker.run() ended with nevents=%s" %
                 (jid, wid, nevents))

        buffer.close(nevents)
        if module_path:
            #print "removing %s" % (module_path,)
            os.unlink(module_path)
            #print "deleted %s" % (module_path,)
            if module_path.endswith(".py"):
                #print "removing %s" % (module_path+"c",)
                os.unlink(module_path + "c")
        self.purgeWorkerModules()
        self.log("jid/wid=%s/%s: exit from runWorker" % (jid, wid))
Exemplo n.º 2
0
 def __init__(self, host, port, worker_registry, authenticator,
              data_server_url, bulk_transport_port, queue_capacity,
              max_jobs_running, source_archive, log_file_dir):
     PyThread.__init__(self)
     self.DataServerURL = data_server_url
     self.WorkerRegistry = worker_registry
     self.Sock = None
     self.Port = port
     self.Host = host
     self.Stop = False
     self.Authenticator = authenticator
     self.MaxJobs = max_jobs_running
     self.QueueCapacity = queue_capacity
     self.JobQueue = TaskQueue(max_jobs_running, capacity=queue_capacity)
     self.JIDPrefix = "%03d" % (os.getpid() % 1000, )
     self.NextJID = 1
     self.DataClient = StripedClient(data_server_url)
     self.SourceArchive = source_archive
     self.LogFileDir = log_file_dir
     self.JobHistory = []
     self.BulkTransportPort = bulk_transport_port
Exemplo n.º 3
0
 def __init__(self, wid, nworkers, striped_server_url, logfile_template, cache_limit, module_storage):
     multiprocessing.Process.__init__(self)
     self.ID = wid
     self.NWorkers = nworkers
     self.Client = StripedClient(striped_server_url, cache="long", cache_limit=cache_limit, log=self.log)
     self.ModuleStorage = module_storage
     self.Stop = False
     self.LogFile = None
     self.Sock = socket(AF_INET, SOCK_STREAM)
     self.Sock.bind(("127.0.0.1", 0))
     self.Port = self.Sock.getsockname()[1]
     self.Address = self.Sock.getsockname()
     self.Tasks = TaskQueue(2, capacity=10)
     if logfile_template != None:
         self.LogFile = LogFile(logfile_template % {"wid":self.ID}, keep=3)
     self.log("created at port %d" % (self.Port,))
Exemplo n.º 4
0
    def runWorker(self, params, dxsock, frames, wid):
        t0 = time.time()
        self.log("------ runWorker entry for job %s worker %s" % (params.JID, self.ID))
        buffer_id = "%s_%s" % (params.JID, self.ID)
        buffer = SocketWorkerBuffer(buffer_id, dxsock, params.HDescriptors, log=self.log)
        #worker_module = sandbox_import_module(module_name, ["Worker"])
        worker_module = __import__(params.WorkerModuleName, {}, {}, ["Worker"])

        T = Tracer()


        bulk_storage = None
        if params.BulkDataName:
                with T["open_bulk_storage"]:
                        bulk_storage = BulkStorage.open(params.BulkDataName)
                        #print "Worker: len(bulk_storage)=%d" % (len(bulk_storage),)
                self.log("t=%.3f: bulk data received %d bytes, %d keys" % (time.time() - t0, len(bulk_storage), len(bulk_storage.keys())))
        
        worker_class = worker_module.Worker
        dataset_name = params.DatasetName
        user_params = params.UserParams
        use_data_cache = params.UseDataCache
        jid = params.JID

        data_mod_client = None
        if params.DataModURL is not None and params.DataModToken is not None:
            data_mod_client = StripedClient(params.DataModURL, data_modification_token=params.DataModToken)
            
        self.log("t=%.3f: StripedClient initialized" % (time.time() - t0,))
            
        worker = WorkerDriver(jid, wid, self.Client, worker_class, dataset_name, frames, self.NWorkers, buffer, 
                user_params, bulk_storage, use_data_cache, 
                data_mod_client,
                tracer = T, log = self.log)
        self.log("t=%.3f: Worker driver created for frames: %s" % (time.time() - t0, frames))
        with T["worker.run"]:
            nevents = worker.run()
        self.log("t=%.3f: worker.run() ended with nevents=%s" % (time.time() - t0, nevents))
        
        buffer.close(nevents)
        del sys.modules[params.WorkerModuleName]
        self.log("------ Worker %s stats: -----\n%s" % (self.ID, T.formatStats()))
        self.log("t=%.3f: ------ exit from runWorker" % (time.time() - t0,))
Exemplo n.º 5
0
 def __init__(self, port, striped_server_url, module_storage,
              registry_address, tag, logfile, cache_limit):
     multiprocessing.Process.__init__(self)
     self.Client = StripedClient(striped_server_url,
                                 cache="long",
                                 cache_limit=cache_limit,
                                 log=self.log)
     self.Sock = socket(AF_INET, SOCK_STREAM)
     if False:
         self.Sock.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
         self.Sock.bind(("", port))
     else:
         self.Sock.bind(("", 0))
         port = self.Sock.getsockname()[1]
     self.Port = port
     self.Sock.listen(10)
     self.ModuleStorage = module_storage
     self.Pinger = WorkerRegistryPinger(registry_address, port, tag)
     self.Stop = False
     self.WorkerModules = {}  # {module_name -> time_accessed}
     self.LogFile = None
     if logfile != None:
         logfile = "%s.%d.log" % (logfile, port)
         self.LogFile = open(logfile, "w")
Exemplo n.º 6
0
class JobServer(PyThread):
    def __init__(self, host, port, worker_registry, authenticator,
                 data_server_url, bulk_transport_port, queue_capacity,
                 max_jobs_running, source_archive, log_file_dir):
        PyThread.__init__(self)
        self.DataServerURL = data_server_url
        self.WorkerRegistry = worker_registry
        self.Sock = None
        self.Port = port
        self.Host = host
        self.Stop = False
        self.Authenticator = authenticator
        self.MaxJobs = max_jobs_running
        self.QueueCapacity = queue_capacity
        self.JobQueue = TaskQueue(max_jobs_running, capacity=queue_capacity)
        self.JIDPrefix = "%03d" % (os.getpid() % 1000, )
        self.NextJID = 1
        self.DataClient = StripedClient(data_server_url)
        self.SourceArchive = source_archive
        self.LogFileDir = log_file_dir
        self.JobHistory = []
        self.BulkTransportPort = bulk_transport_port

    @synchronized
    def purgeJobHistory(self):
        now = time.time()
        self.JobHistory = list(
            filter(lambda j, tmax=now - 24 * 3600: j.Ended and j.Ended > tmax,
                   self.JobHistory))

    @synchronized
    def jid(self):
        t = "%s%04d" % (
            self.JIDPrefix,
            self.NextJID,
        )
        self.NextJID = (self.NextJID + 1) % 10000
        return t

    def log(self, msg):
        log("[server]: %s" % (msg, ))

    def run(self):
        self.Sock = socket(AF_INET, SOCK_STREAM)
        self.Sock.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
        self.Sock.bind(('', self.Port))
        self.Sock.listen(5)
        data_exchange_listener = DataExchangeSocket(self.Sock)

        while not self.Stop:
            data_exchange = None
            try:
                data_exchange = data_exchange_listener.accept()
                msg = data_exchange.recv()
                #print "msg:", msg.Type
                if msg and msg.Type == 'job_request':
                    job_description = JobDescription.fromDXMsg(msg)
                    exists = self.DataClient.dataset(
                        job_description.DatasetName).exists
                    #print "exists:", exists
                    if not exists:
                        self.log("Dataset not found: %s" %
                                 (job_description.DatasetName, ))
                        data_exchange.send(
                            DXMessage("job_failed").append(
                                reason="Dataset '%s' not found" %
                                (job_description.DatasetName, )))
                    else:
                        jid = self.jid()
                        self.log(
                            "Job description received. Job id %s assigned" %
                            (jid, ))
                        job_log_file_path = None if self.LogFileDir is None else "%s/job_%s.log" % (
                            self.LogFileDir, jid)
                        jt = JobTask(self, jid, job_description,
                                     self.DataServerURL,
                                     self.BulkTransportPort, self.DataClient,
                                     data_exchange, job_log_file_path)
                        self.JobQueue << jt
                        data_exchange = None  # the job task owns it now !
                        if self.SourceArchive is not None:
                            open("%s/ws_%s.txt" % (self.SourceArchive, jid),
                                 "w").write(job_description.WorkerText)
                self.purgeJobHistory()
            except:
                dump = traceback.format_exc()
                self.log("Uncaught exception: %s" % (dump, ))
                if data_exchange is not None:
                    data_exchange.send(
                        DXMessage("job_failed").append(reason="Exception: %s" %
                                                       (dump, )))
            finally:
                if data_exchange is not None:
                    data_exchange.close()
                    data_exchange = None

    def workers(self, tags=None):
        return self.WorkerRegistry.workers(tags=tags)

    def validate_job(self, job_description):
        validated, identity = self.Authenticator.validate(
            job_description.AuthToken, job_description.Username)
        if validated:
            job_description.Identity = identity
        return validated

    @synchronized
    def jobStarted(self, job_task):
        jid = job_task.JID
        self.log("Jobs running: " +
                 ",".join([j.JID for j in self.JobQueue.activeTasks()]))

    @synchronized
    def jobs(self):
        self.purgeJobHistory()
        queued, running = self.JobQueue.tasks()[:]
        ids = set([j.JID for j in queued + running])
        return queued, running, [
            j for j in self.JobHistory if not j.JID in ids
        ]

    @synchronized
    def jobEnded(self, job_task):
        self.JobHistory.append(job_task)
        jid = job_task.JID
        self.log("Jobs running: " + ",".join(
            [j.JID for j in self.JobQueue.activeTasks() if j.JID != jid]))

    @synchronized
    def jobFailed(self, job_task, reason):
        self.JobHistory.append(job_task)
        jid = job_task.JID
        self.log("Jobs running: " + ",".join(
            [j.JID for j in self.JobQueue.activeTasks() if j.JID != jid]))
Exemplo n.º 7
0
import os, time, itertools
import sys, getopt, json, pprint
from couchbase import FMT_BYTES, FMT_JSON
from couchbase.exceptions import KeyExistsError, TemporaryFailError, TimeoutError, NotFoundError
import numpy as np
from numpy.lib.recfunctions import append_fields
import uproot
from striped.client import StripedClient
import fitsio, healpy

opts, args = getopt.getopt(sys.argv[1:], "")
data_url = args[0]
dataset = args[1]
rgid = int(args[2])
columns = args[3:]

c = StripedClient(data_url)
ds = c.dataset(dataset)

for cn in columns:
    c = ds.column(cn)
    print "column %s: %s" % (cn, c.descriptor)

print "RGInfo:", ds.rginfo(rgid)

for cn in columns:
    stripe = ds.stripe(cn, rgid)
    print "Data %s: %s %s %s" % (cn, stripe.dtype, stripe.shape, stripe)
Exemplo n.º 8
0
"""

ServerURL = None

global_opts, rest = getopt.getopt(sys.argv[1:], "s:")
global_opts = dict(global_opts)

if not rest or rest[0] == "help" or not "-s" in global_opts:
    print Usage
    sys.exit(1)

ServerURL = global_opts["-s"]

command, args = rest[0], rest[1:]

client = StripedClient(ServerURL)

if command == "stripe":
    print "stripe..."
    opts, args = getopt.getopt(args, "a")
    opts = dict(opts)
    assembled = "-a" in opts
    dataset, column, rgid = args[0], args[1], int(args[2])
    ds = client.dataset(dataset)
    col = ds.column(column)
    s = ds.stripe(column, rgid)
    if hasattr(s, "shape"):
        print s.shape, s
    else:
        for x in s[:10]:
            print x