self.Optimizer = optimizer self.Sum = None def on_data(self, wid, nevents, data): if "sum" in data: self.NSamples += data["n"] if self.Sum is None: self.Sum = data["sum"].copy() else: self.Sum += data["sum"] def run(self): job = self.Session.createJob( "MNIST", user_params={"model": { "config": model.config() }}, callbacks=[self], worker_class_file="sumup_worker.py") job.run() self.Runtime = job.runtime model = create_model() session = Session("striped_dev.yaml") optimizer = MomentumOptimizer() job = MLJob(session, model, optimizer) job.run() print "NSamples:", job.NSamples print "Average:", job.Sum / job.NSamples print job.Runtime
from striped.common import Tracer T = Tracer() with T["run"]: with T["imports"]: from striped.job import SinglePointStripedSession as Session import numpy as np from numpy.lib.recfunctions import append_fields import fitsio, healpy as hp import sys, time #job_server_address = ("dbwebdev.fnal.gov", 8765) #development job_server_address = ("ifdb01.fnal.gov", 8765) #production session = Session(job_server_address) input_file = sys.argv[1] input_filename = input_file.rsplit("/",1)[-1].rsplit(".",1)[-1] with T["fits/read"]: input_data = fitsio.read(input_file, ext=2, columns=["ALPHAWIN_J2000","DELTAWIN_J2000"]) with T["hpix"]: hpix = hp.ang2pix(nside=16384,theta=input_data['ALPHAWIN_J2000'],phi=input_data['DELTAWIN_J2000'], lonlat=True, nest=True) hpix = np.asarray(hpix, np.float64) input_data = append_fields(input_data, "HPIX", hpix) np.sort(input_data, order="HPIX") input_data = np.array(zip(input_data['ALPHAWIN_J2000'], input_data['DELTAWIN_J2000'], input_data['HPIX']))
import os from striped.job import Session import numpy as np session = Session("striped_130tb.yaml") dataset = "NanoTuples-2016_QCD_HT1500to2000_TuneCUETP8M1_13TeV-madgraphMLM-pythia8" class DataCallback: def __init__(self): self.Sum_pt = 0.0 self.N = 0 self.AveragePt = None def on_data(self, wid, nevents, data): self.N += data["n_jets"] self.Sum_pt += data["sum_pt"] def on_job_finish(self, nsamples, error): self.AveragePt = self.Sum_pt / self.N data_collector = DataCallback() job = session.createJob( dataset, user_params={"dataset": dataset}, bulk_data={"calibrations": np.random.random((100, 100))}, callbacks=[data_collector], worker_class_file="nano_worker.py") job.run()
from striped.job.callbacks import ProgressBarCallback import numpy as np class MyCallback: def __init__(self): self.N = 0 def on_data(self, wid, nevents, data): #print ("on_data: %s %s %s" % (wid, nevents, data)) self.N += data.get("count", 0) cb = MyCallback() dataset = "mnist" session = Session("striped.yaml") bulk_data = np.random.random((1000,1000)) job = session.createJob(dataset, bulk_data = {"data":bulk_data}, callbacks = [ProgressBarCallback(), cb], user_params = {"param":{"a":"b"}}, worker_class_file = "worker.py" ) print ("running...") job.run() runtime = job.TFinish - job.TStart nevents = job.EventsProcessed print ("%s: %.6fM events, %.6fM events/second" % (dataset, float(nevents)/1000000, nevents/runtime/1000000))
from striped.job import Session from striped.ml import ML_Job from model import create_model model = create_model() session = Session("striped_130tb.yaml") for epoch in range(5): job = ML_Job(session, model, worker_file="worker.py") job.run("MNIST", 0.1) print "epoch: %d, runtime: %f, loss: %s" % (epoch+1, job.Runtime, job.Loss)
def on_job_end(self, nsamples, error): if not error: for d in self.Deltas: self.Deltas /= nsamples weights = [ w + d for w, d in zip(self.Model.get_weights, self.Deltas) ] self.Model.set_weights(weights) def run(self): job = self.Session.createJob("CIFAR-10", user_params={ "model": pack_model( self.Model, loss="categorical_crossentropy", lr=0.001) }, callbacks=[self], worker_class_file="cifar10_worker.py") job.run() self.Runtime = job.runtime model = create_model() session = Session("striped.yaml") job = MLJob(session, model) job.run() print job.Runtime
import numpy as np import pandas as pd worker_class = """ import cloudpickle class Worker(object): Columns = ["NJets"] def run(self, events, job): job.message("%d events" % (len(events),)) x = 5/0 """ session = Session(("ifdb01.fnal.gov", 8765)) h_by_dataset = Hist(hbin("NJets", 20, 0, 20), groupby("dataset")) datasets = [ "Summer16.TTHH_TuneCUETP8M2T4_13TeV-madgraph-pythia8" # 100000 events ] class Callback: def on_message(self, wid, nevents, message): print "Message received from worker %d after seeing %d events: <%s>" % (wid, nevents, message) def on_exception(self, wid, info): print "Worker %d failed with exception:\n%s" % (wid, info) callback = Callback()
opts, args = getopt.getopt(sys.argv[1:], "?hm:s:") opts = dict(opts) max_matchers = int(opts.get("-m", 5)) stagger = float(opts.get("-s", 0.1)) if len(args) < 2 or "-?" in opts or "-h" in opts: print Usage sys.exit(1) outprefix = args[0] files = args[1:] #job_server_address = ("dbwebdev.fnal.gov", 8765) job_server_address = ("ifdb01.fnal.gov", 8765) session = Session(job_server_address) #, worker_tags=["DES"]) job = MatchJob(session, files, max_matchers, stagger) job.wait() all_matches = job.Matches all_unmatches = job.Unmatches if len(all_matches): all_matches = np.concatenate(all_matches) oidmap = {} for i in xrange(len(all_matches)): rgid = all_matches["rgid"][i] oid = all_matches["OBJECT_ID"][i] range = oidmap.get(rgid, (oid, oid)) omin, omax = range
import pandas as pd worker_class = """ class Worker(object): Columns = ["nJet","nMuon","nElectron","Jet.pt", "Muon.pt"] def run(self, events, job): job.fill(nJet=events.nJet) job.fill(nElectron=events.nElectron) job.fill(nMuon=events.nMuon) job.fill(JetPt = events.Jet.pt) job.fill(MuonPt = events.Muon.pt) """ session = Session() njets = Hist(hbin("nJet", 20, 0, 20)) nmuon = Hist(hbin("nMuon", 20, 0, 20)) nelectron = Hist(hbin("nElectron", 20, 0, 20)) muon_pt = Hist(hbin("MuonPt", 70, 0., 700.)) jet_pt = Hist(hbin("JetPt", 70, 0., 700.)) dataset = "QCD_HT200to300_PUMoriond17_05Feb2018_94X_mcRun2_asymptotic_v2-v1_NANOAODSIM" #dataset = "JetHT_Run2016H_05Feb2018_ver2-v1_NANOAOD" class Callback: def on_exception(self, wid, info): print "Exception:", info
worker_class = """ import numpy as np class Worker(object): Columns = ["NJets"] def run(self, events, job, db): data = np.frombuffer(db["calib200"], "<f4") job.fill(x = data) job.message("average=%f" % (np.mean(data),)) """ job_server = ("ifdb02.fnal.gov", 8765) session = Session(job_server) h = Hist(hbin("x", 20, 0, 1)) dataset = "Summer16.TTHH_TuneCUETP8M2T4_13TeV-madgraph-pythia8" job = session.createJob(dataset, worker_class_source=worker_class, histograms=[h]) job.run() runtime = job.TFinish - job.TStart nevents = job.EventsProcessed print "%s: %.6fM events, %.6fM events/second" % ( dataset, float(nevents) / 1000000, nevents / runtime / 1000000) data_frame = h.pandas()