/
fastloop.py
154 lines (126 loc) · 5.44 KB
/
fastloop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
A module for a fast looping through a dataset (using python's multiprocessing)
Author: Benedikt J. Daurer (benedikt@xray.bmc.uu.se) and Max Hantke
Last update: March 9, 2014
"""
import multiprocessing
import numpy as np
import h5py, os, sys, time
import utils
def _worker_call(worker, pipe):
work_package = pipe.recv()
while work_package != "fika":
pipe.send(worker(work_package))
work_package = pipe.recv()
class FastLoop(object):
def __init__(self, inputfile, outputfile, Nprocesses, Njobs, loopfunc, *args, **kwargs):
self.inputfile = inputfile
self.outputfile = outputfile
self.index = 0
self.Nprocesses = Nprocesses
self.Njobs = Njobs
self.framerate_update = int(Njobs/100.)
self.loopfunc = loopfunc
self.args = args
self.kwargs = kwargs
self.load()
def load(self):
with h5py.File(self.inputfile, 'r') as datafile: self.input = {item:datafile[item][:] for item in datafile.keys()}
with h5py.File(self.outputfile, 'r') as datafile: self.output = {item:datafile[item][:] for item in datafile.keys()}
#@profile
def next(self):
self.index += 1
data = {item:self.input[item][self.index-1] for item in self.input}
input = {'i':self.index-1, 'data':data, 'args':self.args, 'kwargs':self.kwargs}
return input
#@profile
def save(self, res):
for k in res:
if not k == 'i': self.output[k][res['i']] = res[k]
#@profile
def write(self, progress = 1.):
utils.progressbar(progress, 'Saving results to file')
with h5py.File(self.outputfile, 'a') as datafile:
for item in self.output: datafile[item][...] = self.output[item]
#@profile
def start(self):
# Decide how many parallel processes to use
available_cpus = multiprocessing.cpu_count()
if self.Nprocesses == 'max': self.Nprocesses = available_cpus
elif self.Nprocesses > available_cpus: self.Nprocesses = available_cpus
# Initialize lists keeping track of read/write pipes
pipes_end_host = list(np.zeros(self.Nprocesses))
pipes_end_worker = list(np.zeros(self.Nprocesses))
processes = list(np.zeros(self.Nprocesses))
# Start multiple processes
for i in range(self.Nprocesses):
pipes_end_host[i], pipes_end_worker[i] = multiprocessing.Pipe()
processes[i] = multiprocessing.Process(target=_worker_call, args=(self.loopfunc, pipes_end_worker[i],) )
processes[i].start()
# Variables to keep track of jobs started/done
Njobs_done = 0
Njobs_started = 0
# Send initial jobs
for r in pipes_end_host:
r.send(self.next())
Njobs_started += 1
# Some parameters for diagnostics
t_start = time.time()
message = 'Datarate %.2f Hz; job %i/%i; process'
# This is the main loop, it waits for new jobs and sends the input
while Njobs_done < self.Njobs:
for r in pipes_end_host:
if r.poll():
result = r.recv()
if Njobs_started < self.Njobs:
r.send(self.next())
Njobs_started += 1
self.save(result)
Njobs_done += 1
# Give some feedback to the command line once in a while
if ((Njobs_done + 1) % self.framerate_update) == 0:
progress = float(Njobs_done) / self.Njobs
datarate = (Njobs_done + 1) / (time.time() - t_start)
utils.progressbar(progress, message %(datarate, Njobs_done + 1, self.Njobs), t_start)
# Close all processes
for i in range(self.Nprocesses):
pipes_end_host[i].send('fika')
processes[i].join()
pipes_end_host[i].close()
# ===================================
# Some code for testing/profiling
# ===================================
if __name__ == '__main__':
from fitting import fit_photon_histograms
import logging
logging.captureWarnings(True)
histfile = h5py.File('testing/histogram.h5', 'r')
Hmap = histfile['data/histogram']
Hbinsize = histfile['data/histogramBinsize'][0]
Hcount = histfile['data/histogramCount'][0]
Hmin = histfile['data/histogramMin'][0]
Hnbins = histfile['data/histogramNbins'][0]
Hbins = np.linspace(Hmin, Hnbins + Hmin - 1, Hnbins)
NY = Hmap.shape[0]
NX = Hmap.shape[1]
SH = (NY, NX)
NXY = NX * NY
infile = h5py.File('testing/tmpin.h5', 'w')
infile['histogram'] = Hmap[:].reshape(NXY, Hnbins)
infile['mask'] = np.array(NXY * [False])
infile.close()
histfile.close()
outfile = h5py.File('testing/tmpout.h5', 'w')
outfile.create_dataset('bg_offset', (NXY,))
outfile.create_dataset('bg_amp', (NXY,))
outfile.create_dataset('bg_sigma', (NXY,))
outfile.create_dataset('photon_offset', (NXY,))
outfile.create_dataset('photon_amp', (NXY,))
outfile.create_dataset('photon_sigma', (NXY,))
outfile.create_dataset('status', (NXY,), dtype=h5py.special_dtype(vlen=str))
outfile.close()
fastloop = FastLoop('testing/tmpin.h5', 'testing/tmpout.h5', 30, NXY, fit_photon_histograms, Hbins)
fastloop.start()
fastloop.write()
os.remove('testing/tmpin.h5')
os.remove('testing/tmpout.h5')