Пример #1
0
 def __init__(self, file_path, schema):
     self.Path = file_path
     self.Tree = uproot.open(file_path)["Events"]
     self.BranchSizeArrays = {}
     self.Schema = schema
     self.T = Tracer()
     self.Converted = {}
Пример #2
0
 def __init__(self, dxsock, request, workers, storage, bulk_data_transport, log_file):
     Task.__init__(self)
     self.DXSock = dxsock
     self.Request = request
     self.JID = request.JID
     self.Workers = workers
     self.ModuleStorage = storage
     self.Accumulator = None
     self.EventsSeen = 0
     self.EventsReported = 0
     self.T = Tracer()
     self.BulkDataTransport = bulk_data_transport
     self.LogFile = log_file
     self.HAccumulators = {hid:HAccumulator(desc) for hid, desc in request.HDescriptors.items()}
     self.HistSentTime = 0.0
     self.HistSendInterval = 20.0
Пример #3
0
 def __init__(self, jid, data_server_url, bulk_transport_port, dx,
              data_client, workers, job_description, log_file_path):
     multiprocessing.Process.__init__(self)
     self.daemon = True
     self.JID = jid
     self.DataExchange = dx
     self.Workers = workers  # list of WorkerInfo objects
     self.JobDesc = job_description
     self.DataClient = data_client
     self.TotalEvents = 0
     self.T = Tracer()
     self.ContractStartedT = None
     self.FirstWorkerExitT = self.LastWorkerExitT = None
     self.DataServerURL = data_server_url
     self.Contract = None
     self.LogFile = None
     self.LogFilePath = log_file_path
     self.BulkTransportPort = bulk_transport_port
Пример #4
0
 def __init__(self, backend, dataset_name):
     PyThread.__init__(self)
     self.Queue = Queue(self.QUEUESIZE)
     self.DatasetName = dataset_name            
     self.Backend = backend
     self.Buf = {}
     self.TotalBytes = 0
     self.Shutdown = False
     self.T = Tracer()
Пример #5
0
    def runWorker(self, params, dxsock, frames, wid):
        t0 = time.time()
        self.log("------ runWorker entry for job %s worker %s" % (params.JID, self.ID))
        buffer_id = "%s_%s" % (params.JID, self.ID)
        buffer = SocketWorkerBuffer(buffer_id, dxsock, params.HDescriptors, log=self.log)
        #worker_module = sandbox_import_module(module_name, ["Worker"])
        worker_module = __import__(params.WorkerModuleName, {}, {}, ["Worker"])

        T = Tracer()


        bulk_storage = None
        if params.BulkDataName:
                with T["open_bulk_storage"]:
                        bulk_storage = BulkStorage.open(params.BulkDataName)
                        #print "Worker: len(bulk_storage)=%d" % (len(bulk_storage),)
                self.log("t=%.3f: bulk data received %d bytes, %d keys" % (time.time() - t0, len(bulk_storage), len(bulk_storage.keys())))
        
        worker_class = worker_module.Worker
        dataset_name = params.DatasetName
        user_params = params.UserParams
        use_data_cache = params.UseDataCache
        jid = params.JID

        data_mod_client = None
        if params.DataModURL is not None and params.DataModToken is not None:
            data_mod_client = StripedClient(params.DataModURL, data_modification_token=params.DataModToken)
            
        self.log("t=%.3f: StripedClient initialized" % (time.time() - t0,))
            
        worker = WorkerDriver(jid, wid, self.Client, worker_class, dataset_name, frames, self.NWorkers, buffer, 
                user_params, bulk_storage, use_data_cache, 
                data_mod_client,
                tracer = T, log = self.log)
        self.log("t=%.3f: Worker driver created for frames: %s" % (time.time() - t0, frames))
        with T["worker.run"]:
            nevents = worker.run()
        self.log("t=%.3f: worker.run() ended with nevents=%s" % (time.time() - t0, nevents))
        
        buffer.close(nevents)
        del sys.modules[params.WorkerModuleName]
        self.log("------ Worker %s stats: -----\n%s" % (self.ID, T.formatStats()))
        self.log("t=%.3f: ------ exit from runWorker" % (time.time() - t0,))
Пример #6
0
 def __init__(self, jid, wid, client, worker_class, dataset_name, rgids, nworkers, buffer, user_params, bulk_data, use_data_cache, 
             data_mod_client, tracer = None, log = None):
     self.JID = jid
     self.Client = client
     self.DataModClient = data_mod_client
     self.WorkerClass = worker_class
     self.DatasetName = dataset_name
     self.MyID = wid
     self.NWorkers = nworkers
     self.Buffer = buffer
     self.RGIDs = rgids
     self.UserParams = user_params
     self.BulkData = bulk_data
     self.T = tracer or Tracer()
     self.Buffer.set_trace(self.T)
     self.UseDataCache = use_data_cache
     self.Log = log
     self.SeenEvents = 0
Пример #7
0
class AccumulatorDriver(Task):

    class JobInterface(object):
        def __init__(self, driver):
            self.Driver = driver

        @property
        def job_id(self):
            return self.Driver.JID
            
        def message(self, text):
            self.Driver.message(text)
            
    class DBInterface(object):
        #
        # dummy for now
        #
        def __init__(self, driver):
            self.Driver = driver
        
        
    def __init__(self, dxsock, request, workers, storage, bulk_data_transport, log_file):
        Task.__init__(self)
        self.DXSock = dxsock
        self.Request = request
        self.JID = request.JID
        self.Workers = workers
        self.ModuleStorage = storage
        self.Accumulator = None
        self.EventsSeen = 0
        self.EventsReported = 0
        self.T = Tracer()
        self.BulkDataTransport = bulk_data_transport
        self.LogFile = log_file
        self.HAccumulators = {hid:HAccumulator(desc) for hid, desc in request.HDescriptors.items()}
        self.HistSentTime = 0.0
        self.HistSendInterval = 20.0

    def eventsDelta(self, n=0):
        self.EventsSeen += n
        delta = self.EventsSeen - self.EventsReported
        self.EventsReported = self.EventsSeen
        return delta

    def log(self, msg):
        msg = ("AccumulatorDriver(%s): %s" % (self.JID, msg))
        print(msg)
        if self.LogFile is not None:
            self.LogFile.log(msg)
        
    def run(self):
        try:
            storage = None
            bulk_data = None
            
            worker_module_name = "m_%s_%s" % (os.getpid(), self.Request.JID)     
            module_file = "%s/%s.py" % (self.ModuleStorage, worker_module_name)
            open(module_file, "w").write(self.Request.WorkerText)

            frames = self.Request.RGIDs
            frames_by_worker = distribute_items(frames, len(self.Workers))
            params = WorkerParams.fromRequest(self.Request, worker_module_name)

            #
            # Store bulk data in shared memory
            #
            if self.Request.BulkDataName:
                with self.T["wait_for_bulk_data"]:
                    t0 = time.time()
                    bulk_data = self.BulkDataTransport.pop(self.Request.BulkDataName, timeout=30)
                    t1 = time.time()
                    self.log("bulk data %s received, %d bytes encoded, %.2f wait time" % (self.Request.BulkDataName, len(bulk_data), t1-t0))
                    bulk_data = decodeData(bulk_data)
                with self.T["store_bulk_data"]:
                    assert isinstance(bulk_data, dict)
                    n = sum([len(v.data)+1000 for v in bulk_data.values()])
                    n = int(n*1.1)+1000000      # for safety
                    storage = BulkStorage.create(params.BulkDataName, bulk_data)
                    storage.save()
                self.log("bulk data stored. %f MB allocated" % (float(n)/1024/1024,))

            #
            # Create Accumulator if specified
            #
            worker_module = __import__(worker_module_name, {}, {}, ["Accumulator"])
            if hasattr(worker_module, "Accumulator"):
                job_interface = self.JobInterface(self)
                db_interface = self.DBInterface(self)
                self.Accumulator = worker_module.Accumulator(
                    params.UserParams, bulk_data,
                    job_interface, db_interface
                )        

            worker_interfaces = []
            for iw, (w, frames) in enumerate(zip(self.Workers, frames_by_worker)):
                if frames:
                    wid = "%s/%d" % (self.Request.WID, iw)
                    wi = WorkerInterface(self, w.Address, params, wid, frames)
                    wi.start()
                    worker_interfaces.append(wi)

            for wi in worker_interfaces:
                wi.join()
            self.log("all worker interfaces closed")

            if self.Accumulator is not None:
                data = self.Accumulator.values()
                if data is not None:
                    with self.T["send accumulated data"]:
                        events_delta = self.eventsDelta()
                        self.log("sending accumulated data with events_delta=%d" % (events_delta,))
                        self.DXSock.send(DXMessage("data", events_delta = events_delta,
                                format="encode")(data=encodeData(data)))

            self.sendHistograms()

            #self.DXSock.send(DXMessage("flush", nevents=self.EventsAccumulated))
                        
        except:
            self.DXSock.send(DXMessage("exception").append(info=traceback.format_exc()))

        finally:
            self.DXSock.close()
            self.log("socket closed")

            if storage:
                    storage.unlink()
                    self.log("bulk storage unlinked")

            os.unlink(module_file)
            if module_file.endswith(".py"):
                try:    os.unlink(module_file+"c")
                except OSError:
                    pass
                
            self.log("---- Accumulator stats ----\n" + self.T.formatStats())

    @synchronized
    def message(self, message):
        self.DXSock.send(DXMessage("message", nevents=0).append(message=message))

    @synchronized            
    def messageFromWorker(self, worker_interface, msg):
        # Can be message, hist, stream, flush, exception
        if msg.Type == "data":
            storage = BulkStorage.open(msg["storage"])
            #print "Accumulator.messageFromWorker(data): keys:", storage.keys()
            events_delta = msg["events_delta"]
            #self.log("data message: events_delta=%s" % (events_delta,))
            data = storage.asDict()
            if self.Accumulator is None:
                msg = DXMessage("data", events_delta = self.eventsDelta(events_delta), format="encode")(data=encodeData(data))
                self.DXSock.send(msg)
            else:
                    through = None
                    try:
                        with self.T["accumulate"]:
                            through = self.Accumulator.add(data)
                    except:
                        self.DXSock.send(DXMessage("exception").append(info=traceback.format_exc()))
                    if through is not None:
                        with self.T["send through data"]:
                            msg = DXMessage("data", events_delta = self.eventsDelta(events_delta), format="encode")(data=encodeData(through))
                            self.DXSock.send(msg)       
                    else:
                        self.EventsSeen += events_delta
            storage.unlink()
        elif msg.Type == "hist":
            for k, v in msg.items():
                if k.startswith("h:"):
                    hid = k[2:]
                    self.HAccumulators[hid].add(v)
                    #print("AccumulatorDriver: h(%s).Counts->%s" % (hid, self.HAccumulators[hid].H.Counts))
            now = time.time()
            if now > self.HistSentTime + self.HistSendInterval:
                self.sendHistograms()
                self.HistSentTime = now
        else:
            self.DXSock.send(msg)       

    def sendHistograms(self):
                msg = DXMessage("hist")
                nhist = 0
                for hid, hacc in self.HAccumulators.items():
                    if hacc.NFills:
                        #print ("sendHistograms: counts=", hacc.H.Counts)
                        msg.append("h:"+hid, hacc.dump())
                        nhist += 1
                if nhist:
                    self.DXSock.send(msg)
Пример #8
0
from striped.common import Tracer
T = Tracer()

with T["run"]:
        with T["imports"]:
                        from striped.job import SinglePointStripedSession as Session
                        import numpy as np
                        from numpy.lib.recfunctions import append_fields
                        import fitsio, healpy as hp
                        import sys, time


        #job_server_address = ("dbwebdev.fnal.gov", 8765) #development
        job_server_address = ("ifdb01.fnal.gov", 8765) #production

        session = Session(job_server_address)

        input_file = sys.argv[1]
        input_filename = input_file.rsplit("/",1)[-1].rsplit(".",1)[-1]

        with T["fits/read"]:
                input_data = fitsio.read(input_file, ext=2, columns=["ALPHAWIN_J2000","DELTAWIN_J2000"])
        with T["hpix"]:
                hpix = hp.ang2pix(nside=16384,theta=input_data['ALPHAWIN_J2000'],phi=input_data['DELTAWIN_J2000'],
                        lonlat=True, nest=True)

        hpix = np.asarray(hpix, np.float64)
        input_data = append_fields(input_data, "HPIX", hpix)
        np.sort(input_data, order="HPIX")

        input_data = np.array(zip(input_data['ALPHAWIN_J2000'], input_data['DELTAWIN_J2000'], input_data['HPIX']))
Пример #9
0
class JobProcess(multiprocessing.Process):
    def __init__(self, jid, data_server_url, bulk_transport_port, dx,
                 data_client, workers, job_description, log_file_path):
        multiprocessing.Process.__init__(self)
        self.daemon = True
        self.JID = jid
        self.DataExchange = dx
        self.Workers = workers  # list of WorkerInfo objects
        self.JobDesc = job_description
        self.DataClient = data_client
        self.TotalEvents = 0
        self.T = Tracer()
        self.ContractStartedT = None
        self.FirstWorkerExitT = self.LastWorkerExitT = None
        self.DataServerURL = data_server_url
        self.Contract = None
        self.LogFile = None
        self.LogFilePath = log_file_path
        self.BulkTransportPort = bulk_transport_port

    def log(self, msg):
        print(("%s: %s" % (time.ctime(time.time()), msg)))
        if self.LogFile is not None:
            self.LogFile.write("%s: %s\n" % (time.ctime(time.time()), msg))
            self.LogFile.flush()

    def run(self):

        self.log("job process %s started" % (self.JID, ))

        if self.LogFilePath is not None:
            self.LogFile = open(self.LogFilePath, "w")

        try:
            with self.T["JobProcess/run"]:
                setproctitle("striped_job %s" % (self.JID, ))
                self.log("started: dataset: %s, fraction: %s, %d workers" %
                         (self.JobDesc.DatasetName, self.JobDesc.Fraction,
                          len(self.Workers)))
                callback_delegate = self
                with self.T["JobProcess/run/create_contract"]:
                    self.Contract = Contract(
                        self.JID, self.DataServerURL, self.BulkTransportPort,
                        self.DataClient.dataset(self.JobDesc.DatasetName),
                        self.JobDesc, self.Workers, callback_delegate,
                        self.log, self.T)

                self.DataExchange.send(
                    DXMessage("job_started",
                              nworkers=len(self.Workers),
                              jid=self.JID,
                              total_events=self.Contract.TotalEvents,
                              selected_events=self.Contract.SelectedEvents,
                              selected_frames=json.dumps(
                                  self.Contract.SelectedFrames)))

                self.log("job_started sent")

                with self.T["JobProcess/run/start_contract"]:
                    self.Contract.start()

                self.ContractStartedT = self.FirstWorkerExitT = self.LastWorkerExitT = time.time(
                )

                self.log("contract started. waiting...")

                with self.T["JobProcess/run/wait_contract"]:
                    self.Contract.wait()

                self.DataExchange.send(
                    DXMessage("job_done", total_events=self.TotalEvents))

                self.log(
                    "Job finished. Worker exit timestamps: first: %.5f, last:%.5f"
                    % (self.FirstWorkerExitT - self.ContractStartedT,
                       self.LastWorkerExitT - self.ContractStartedT))
                self.DataExchange.close()
                self.log("---- exit ----")
        except:
            tb = traceback.format_exc()
            self.DataExchange.send(DXMessage("job_failed").append(reason=tb))
            self.log("Exception: ------------\n%s" % (tb, ))
        finally:
            self.log("----- job stats: -----\n" + self.T.formatStats())
            if self.LogFile is not None: self.LogFile.close()

    def updateReceived(self, wid, hists, streams, nevents_delta):

        self.TotalEvents += nevents_delta
        client_disconnected = False

        if hists:
            msg = DXMessage("histograms",
                            total_events=self.TotalEvents,
                            wid=wid)
            for k, v in hists.items():
                msg[k] = v
            try:
                self.DataExchange.send(msg)
            except:
                self.log("Error sending message to the client:\n%s" %
                         (traceback.format_exc(), ))
                client_disconnected = True

        if streams:
            for k, data in streams.items():
                msg = DXMessage("stream",
                                name=k,
                                format="pickle",
                                total_events=self.TotalEvents,
                                wid=wid)
                msg.append(
                    data=data
                )  # this is still pickled data because the WorkerInterface does not unpickle
                try:
                    self.DataExchange.send(msg)
                except:
                    self.log("Error sending message to the client:\n%s" %
                             (traceback.format_exc(), ))
                    client_disconnected = True

        if not streams and not hists:
            #print "sending empty(%d)" % (self.TotalEvents,)
            msg = DXMessage("empty", total_events=self.TotalEvents, wid=wid)
            try:
                self.DataExchange.send(msg)
            except:
                self.log("Error sending message to the client:\n%s" %
                         (traceback.format_exc(), ))
                client_disconnected = True
        if client_disconnected:
            self.log(
                "Client disconnected (because of the communication error). Aborting"
            )
            self.Contract.abort()

    def forward(self, msg):
        with self.T["callback/forward/%s" % (msg.Type, )]:
            self.DataExchange.send(msg)

    def eventsDelta(self, wid, events_delta):
        with self.T["callback/eventsDelta"]:
            self.DataExchange.send(
                DXMessage("events", wid=wid, events_delta=events_delta))

    def dataReceived(self, wid, events_delta, data):
        with self.T["callback/data"]:
            self.DataExchange.send(
                DXMessage("data", wid=wid,
                          events_delta=events_delta).append(data=data))

    def exceptionReceived(self, wid, info):
        with self.T["callback/exception"]:
            self.DataExchange.send(
                DXMessage("exception", wid=wid).append(info=info))

    def messageReceived(self, wid, nevents, message):
        with self.T["callback/message"]:
            self.DataExchange.send(
                DXMessage("message", wid=wid,
                          nevents=nevents).append(message=message))

    def dataLoadFailureReceived(self, wid, rgid):
        with self.T["callback/data_load_failure"]:
            self.DataExchange.send(
                DXMessage("data_load_failure", wid=wid, rgid=rgid))

    def workerExited(self, wid, status, t, nevents, nrunning):
        if self.FirstWorkerExitT is None:
            self.FirstWorkerExitT = time.time()
        self.LastWorkerExitT = time.time()
        with self.T["callback/worker_exit"]:
            self.DataExchange.send(
                DXMessage("worker_exit",
                          nrunning=nrunning,
                          wid=wid,
                          status=status,
                          t=t,
                          nevents=nevents))
Пример #10
0
    def __init__(self,
                 striped_client,
                 data_buffer,
                 dataset_name,
                 columns,
                 schema=None,
                 trace=None):
        self.T = trace or Tracer()
        global T
        T = self.T
        self.Name = dataset_name

        self.BranchNames = set()
        self.AttrNames = set()
        self.Columns = set(columns)
        data_columns = set(columns)

        if not schema:
            self.ClientDataset = striped_client.dataset(dataset_name, columns)
            columns_dict = self.ClientDataset.columns(
                columns, include_size_columns=True)

            # check if any columns are missing in the dataset
            missing = [cn for cn in columns if not cn in columns_dict]
            if len(missing):
                raise KeyError(
                    "The following columns are not found in the dataset: %s" %
                    (",".join(missing), ))

            self.ColumnToBranch = {
                cn: (cc.descriptor.ParentArray, cc.descriptor.SizeColumn)
                for cn, cc in columns_dict.items()
            }
            for cn in columns:
                bn, sn = self.ColumnToBranch.get(cn)
                if bn:
                    self.BranchNames.add(bn)
                    data_columns.add(bn + ".@size")
                else:
                    self.AttrNames.add(cn)
                    if sn:
                        data_columns.add(sn)
            self.FetchColumns = self.ClientDataset.columnsAndSizes(columns)
        else:
            self.ClientDataset = None
            columns_to_branch = {}
            fetch_columns = set()
            missing = []
            for cn in columns:
                if '.' in cn:
                    bn, an = cn.split('.', 1)
                    sn = bn + ".@size"
                    columns_to_branch[cn] = (bn, sn)
                    fetch_columns.add(sn)
                    self.BranchNames.add(bn)
                else:
                    columns_to_branch[cn] = (None, None)
                    self.AttrNames.add(cn)
                fetch_columns.add(cn)

            self.ColumnToBranch = columns_to_branch
            self.FetchColumns = list(fetch_columns)

        self.TagConditions = []
        self.ProcessedEvents = 0
        #print self.EventTemplate.branchTemplate
        #print "Q Dataset: fetch columns:", self.FetchColumns
        self.Filter = None
        self.DataBuffer = data_buffer
Пример #11
0
from striped.common import Tracer
from QArrays2 import QAEventGroup
from Vault import Vault
import numpy as np, sys, traceback

T = Tracer()


class Frame:
    def __init__(
        self,
        rginfo,
        column_to_branch,
        raw_stroll,
        tagged_event_ids,
    ):
        self.RGInfo = rginfo
        #self.RGID = rginfo.RGID
        self.NEvents = rginfo.NEvents
        #print "Frame: created for rgid=%d, nevents=%d" % (rginfo.RGID, rginfo.NEvents)
        self.AttrVault = Vault()
        self.VarAttrVaults = {}
        self.BranchVaults = {}  # name -> branch vault

        for cn, (bn, sc) in column_to_branch.items():
            if bn:
                prefix = bn + '.'
                assert cn.startswith(prefix)
                aname = cn[len(prefix):]
                if sc != bn + ".@size":
                    raise NotImplementedError(
Пример #12
0
 def __init__(self, file_path, schema):
     self.Schema = schema
     self.T = Tracer()
     self.Config = yaml.load(open(file_path, "r"))
     self.NEvents = self.Config["NEvents"]
     self.NBPerEvent = self.Config["NBPerEvent"]