Exemplo n.º 1
0
        self.Optimizer = optimizer
        self.Sum = None

    def on_data(self, wid, nevents, data):
        if "sum" in data:
            self.NSamples += data["n"]
            if self.Sum is None: self.Sum = data["sum"].copy()
            else: self.Sum += data["sum"]

    def run(self):
        job = self.Session.createJob(
            "MNIST",
            user_params={"model": {
                "config": model.config()
            }},
            callbacks=[self],
            worker_class_file="sumup_worker.py")
        job.run()
        self.Runtime = job.runtime


model = create_model()

session = Session("striped_dev.yaml")
optimizer = MomentumOptimizer()
job = MLJob(session, model, optimizer)
job.run()
print "NSamples:", job.NSamples
print "Average:", job.Sum / job.NSamples
print job.Runtime
Exemplo n.º 2
0
from striped.common import Tracer
T = Tracer()

with T["run"]:
        with T["imports"]:
                        from striped.job import SinglePointStripedSession as Session
                        import numpy as np
                        from numpy.lib.recfunctions import append_fields
                        import fitsio, healpy as hp
                        import sys, time


        #job_server_address = ("dbwebdev.fnal.gov", 8765) #development
        job_server_address = ("ifdb01.fnal.gov", 8765) #production

        session = Session(job_server_address)

        input_file = sys.argv[1]
        input_filename = input_file.rsplit("/",1)[-1].rsplit(".",1)[-1]

        with T["fits/read"]:
                input_data = fitsio.read(input_file, ext=2, columns=["ALPHAWIN_J2000","DELTAWIN_J2000"])
        with T["hpix"]:
                hpix = hp.ang2pix(nside=16384,theta=input_data['ALPHAWIN_J2000'],phi=input_data['DELTAWIN_J2000'],
                        lonlat=True, nest=True)

        hpix = np.asarray(hpix, np.float64)
        input_data = append_fields(input_data, "HPIX", hpix)
        np.sort(input_data, order="HPIX")

        input_data = np.array(zip(input_data['ALPHAWIN_J2000'], input_data['DELTAWIN_J2000'], input_data['HPIX']))
Exemplo n.º 3
0
import os
from striped.job import Session
import numpy as np

session = Session("striped_130tb.yaml")
dataset = "NanoTuples-2016_QCD_HT1500to2000_TuneCUETP8M1_13TeV-madgraphMLM-pythia8"


class DataCallback:
    def __init__(self):
        self.Sum_pt = 0.0
        self.N = 0
        self.AveragePt = None

    def on_data(self, wid, nevents, data):
        self.N += data["n_jets"]
        self.Sum_pt += data["sum_pt"]

    def on_job_finish(self, nsamples, error):
        self.AveragePt = self.Sum_pt / self.N


data_collector = DataCallback()

job = session.createJob(
    dataset,
    user_params={"dataset": dataset},
    bulk_data={"calibrations": np.random.random((100, 100))},
    callbacks=[data_collector],
    worker_class_file="nano_worker.py")
job.run()
Exemplo n.º 4
0
from striped.job.callbacks import ProgressBarCallback
import numpy as np

class MyCallback:
	def __init__(self):
		self.N = 0

	def on_data(self, wid, nevents, data):
		#print ("on_data: %s %s %s" % (wid, nevents, data))
		self.N += data.get("count", 0)

cb = MyCallback()

dataset = "mnist"

session = Session("striped.yaml")

bulk_data = np.random.random((1000,1000))

job = session.createJob(dataset, 
				bulk_data = {"data":bulk_data},
				callbacks = [ProgressBarCallback(), cb],
				user_params = {"param":{"a":"b"}},
                            worker_class_file = "worker.py"
)
print ("running...")
job.run()
runtime = job.TFinish - job.TStart
nevents = job.EventsProcessed
print ("%s: %.6fM events, %.6fM events/second" % (dataset, float(nevents)/1000000, nevents/runtime/1000000))
Exemplo n.º 5
0
from striped.job import Session

from striped.ml import ML_Job
from model import create_model

model = create_model()

session = Session("striped_130tb.yaml")

for epoch in range(5):
	job = ML_Job(session, model, worker_file="worker.py")
	job.run("MNIST", 0.1)
	print "epoch: %d, runtime: %f, loss: %s" % (epoch+1, job.Runtime, job.Loss)
Exemplo n.º 6
0
    def on_job_end(self, nsamples, error):
        if not error:
            for d in self.Deltas:
                self.Deltas /= nsamples
            weights = [
                w + d for w, d in zip(self.Model.get_weights, self.Deltas)
            ]
            self.Model.set_weights(weights)

    def run(self):
        job = self.Session.createJob("CIFAR-10",
                                     user_params={
                                         "model":
                                         pack_model(
                                             self.Model,
                                             loss="categorical_crossentropy",
                                             lr=0.001)
                                     },
                                     callbacks=[self],
                                     worker_class_file="cifar10_worker.py")
        job.run()
        self.Runtime = job.runtime


model = create_model()

session = Session("striped.yaml")
job = MLJob(session, model)
job.run()
print job.Runtime
Exemplo n.º 7
0
import numpy as np
import pandas as pd

worker_class = """
import cloudpickle

class Worker(object):

    Columns = ["NJets"]

    def run(self, events, job):
        job.message("%d events" % (len(events),))
	x = 5/0
"""

session = Session(("ifdb01.fnal.gov", 8765))

h_by_dataset = Hist(hbin("NJets", 20, 0, 20), groupby("dataset"))

datasets = [
        "Summer16.TTHH_TuneCUETP8M2T4_13TeV-madgraph-pythia8"          		# 100000 events
]

class Callback:
	def on_message(self, wid, nevents, message):
		print "Message received from worker %d after seeing %d events: <%s>" % (wid, nevents, message)

	def on_exception(self, wid, info):
		print "Worker %d failed with exception:\n%s" % (wid, info)

callback = Callback()
Exemplo n.º 8
0
opts, args = getopt.getopt(sys.argv[1:], "?hm:s:")
opts = dict(opts)
max_matchers = int(opts.get("-m", 5))
stagger = float(opts.get("-s", 0.1))

if len(args) < 2 or "-?" in opts or "-h" in opts:
    print Usage
    sys.exit(1)

outprefix = args[0]
files = args[1:]

#job_server_address = ("dbwebdev.fnal.gov", 8765)
job_server_address = ("ifdb01.fnal.gov", 8765)
session = Session(job_server_address)  #, worker_tags=["DES"])

job = MatchJob(session, files, max_matchers, stagger)
job.wait()

all_matches = job.Matches
all_unmatches = job.Unmatches

if len(all_matches):
    all_matches = np.concatenate(all_matches)
    oidmap = {}
    for i in xrange(len(all_matches)):
        rgid = all_matches["rgid"][i]
        oid = all_matches["OBJECT_ID"][i]
        range = oidmap.get(rgid, (oid, oid))
        omin, omax = range
Exemplo n.º 9
0
import pandas as pd

worker_class = """
class Worker(object):

    Columns = ["nJet","nMuon","nElectron","Jet.pt", "Muon.pt"]

    def run(self, events, job):
        job.fill(nJet=events.nJet)
	job.fill(nElectron=events.nElectron)
	job.fill(nMuon=events.nMuon)
        job.fill(JetPt = events.Jet.pt)
	job.fill(MuonPt = events.Muon.pt)
"""

session = Session()

njets = Hist(hbin("nJet", 20, 0, 20))
nmuon = Hist(hbin("nMuon", 20, 0, 20))
nelectron = Hist(hbin("nElectron", 20, 0, 20))
muon_pt = Hist(hbin("MuonPt", 70, 0., 700.))
jet_pt = Hist(hbin("JetPt", 70, 0., 700.))


dataset = "QCD_HT200to300_PUMoriond17_05Feb2018_94X_mcRun2_asymptotic_v2-v1_NANOAODSIM"
#dataset = "JetHT_Run2016H_05Feb2018_ver2-v1_NANOAOD"

class Callback:
	def on_exception(self, wid, info):
		print "Exception:", info
        
Exemplo n.º 10
0
worker_class = """
import numpy as np

class Worker(object):

    Columns = ["NJets"]

    def run(self, events, job, db):
	data = np.frombuffer(db["calib200"], "<f4")
        job.fill(x = data)
	job.message("average=%f" % (np.mean(data),))
"""

job_server = ("ifdb02.fnal.gov", 8765)
session = Session(job_server)

h = Hist(hbin("x", 20, 0, 1))

dataset = "Summer16.TTHH_TuneCUETP8M2T4_13TeV-madgraph-pythia8"

job = session.createJob(dataset,
                        worker_class_source=worker_class,
                        histograms=[h])
job.run()
runtime = job.TFinish - job.TStart
nevents = job.EventsProcessed
print "%s: %.6fM events, %.6fM events/second" % (
    dataset, float(nevents) / 1000000, nevents / runtime / 1000000)

data_frame = h.pandas()