-
Notifications
You must be signed in to change notification settings - Fork 0
/
volumetric_processing.py
545 lines (506 loc) · 22.2 KB
/
volumetric_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
import cv2
from scipy.ndimage.morphology import binary_fill_holes
import numpy as np
import vsi_metadata as v
import bioformats
import javabridge
import pandas as pd
import skimage.filters as f
import skimage
import skimage.feature as feature
from skimage import exposure
import matplotlib.pyplot as plt
def single_volume_time_series(path,meta_number=None, cycle_vm=True,
crop_img=False,print_number=5,image_channel=1,
edge=True,stats=False,show_linear=False,
t_lag_level=10,zero_index='None',rescale_factor=255,meta_stage_loop=True,
t_sample=1,show=False,rebin=True,compute_height=False,canny_range=.99,
hist_cutoff=0.01,manual_thresh=False,m_thresh_low=100
):
if compute_height:
area_df,height_df=z_and_t_area(path,meta_number=meta_number, cycle_vm=cycle_vm,
crop_img=crop_img,print_number=print_number,image_channel=image_channel,
edge=edge,rescale_factor=rescale_factor,meta_stage_loop=meta_stage_loop,
t_sample=t_sample,show=show,rebin=rebin,compute_height=compute_height,canny_range=canny_range,
hist_cutoff=hist_cutoff,manual_thresh=manual_thresh,
m_thresh_low=m_thresh_low)
else:
area_df=z_and_t_area(path,meta_number=meta_number, cycle_vm=cycle_vm,
crop_img=crop_img,print_number=print_number,image_channel=image_channel,
edge=edge,rescale_factor=rescale_factor,meta_stage_loop=meta_stage_loop,
t_sample=t_sample,show=show,rebin=rebin,compute_height=compute_height,canny_range=canny_range,
hist_cutoff=hist_cutoff,manual_thresh=manual_thresh,
m_thresh_low=m_thresh_low)
volume_df=thrombus_volume_time_series(area_df)
if not zero_index == 'None':
zero_index=int(zero_index)
volume_df=volume_df.iloc[zero_index:,:]
volume_df=volume_df.reset_index()
volume_df['time (s)']=volume_df['time (s)']-np.min(volume_df['time (s)'])
if stats:
vol_metrics=get_v_metrics(volume_df,show_linear=show_linear,t_lag_level=t_lag_level)
if compute_height:
return volme_df,vol_metrics,height_df
else:
return volume_df, vol_metrics
else:
if compute_height:
return volume_df,height_df
else:
return volume_df
# calculates volume versus time for different segments
def segmented_volume_time_series(area_df, segments=3):
z_range = area_df['Z loc (um)'].unique()
# Segment z-stacks into even groups
z_segments=segment_series(z_range,segments)
master_df=pd.DataFrame()
# iterate through these groups
for i in range(np.shape(z_segments)[0]):
# isolate the z-stacks for given loop
z_segments_loop=z_segments[i,:]
# find parts of area df corresponding to this z-range
loop_area_df=area_df[(area_df['Z loc (um)']>=z_segments_loop[0])&(area_df['Z loc (um)']<=z_segments_loop[1])]
# make a string to store the z-range we are looking at
z_range_string='{:.2f}-{:.2f}'.format(*z_segments_loop)
# take volume integral
loop_volume_df=thrombus_volume_time_series(loop_area_df)
# store the z-range and segments
loop_volume_df['Z-range (um)']=[z_range_string]*len(loop_volume_df)
loop_volume_df['Z-segment']=[i+1]*len(loop_volume_df)
# append all of this to a bigger df and return this bad boy
master_df=master_df.append(loop_volume_df,ignore_index=True)
return master_df
# function for making segments out of the z stacks to analyze different regions
# of the data
def segment_series(series,segments):
# calculate the space between the different series for a certain number of
# segments
iterator=int(np.rint(len(series)/segments))
segment_list=[]
# iterate through the space by stepping though with the iterator
for i in range(0,len(series)-iterator,iterator):
# If the data doesn't divide into even segments we will need to handle
# this by truncating the last entry
if i+iterator>len(series) or i>len(series):
segment_list.append([series[i],series[-1]])
# Otherwise evenly truncate by segmenting at i+ iterator
else:
segment_list.append([series[i],series[i+iterator]])
segment_list=np.array(segment_list)
return segment_list
# calculate the thrombus volume as a function of time
def thrombus_volume_time_series(area_df):
# get the nonrepeating timesteps
time = area_df['time (s)'].unique()
time_step_length = (np.max(time) - np.min(time)) / len(time)
# get nonrepeating z steps
z_range = area_df['Z loc (um)'].unique()
# make an interpolated z series
z_range_fine = np.linspace(np.min(z_range), np.max(z_range), 50)
# calculate z_step
z_step = (np.max(z_range_fine) - np.min(z_range_fine)) / len(z_range_fine)
# make an empty volume array to be filled
volume = np.zeros(len(time))
dV = np.empty(len(time))
dV[:] = np.nan
count = 0
for t in time:
time_step=area_df[area_df['time (s)']==t]
# interpolate the z series for a finer integration
area_fine=interpolate_in_z(time_step,'Thrombus Area (um^2)',50)
# take the sum of this and multiply by dZ to get the volume integral
sum_area=np.sum(area_fine)
volume[count]=sum_area*z_step
# calculate the volumetric growth rate (dV/dt)
if count>1:
dV[count-1]=(volume[count]-volume[count-1])/time_step_length
count+=1
# store values in dataframe
volume_df=pd.DataFrame({'time (s)':time,'Thrombus Volume (um^3)':volume,'dV/dt':dV})
return volume_df
# function to interpolate the z data more finely to more accurately calculate
# volume of the images
def interpolate_in_z(df,variable,steps):
z_range = df['Z loc (um)']
z_range_fine = np.linspace(np.min(z_range), np.max(z_range), steps)
var = df[variable].values
if len(var)==0:
var_fine=np.zeros(len(z_range_fine))
else:
var_fine = np.interp(z_range_fine, z_range, var)
return var_fine
# combines in height profile into loops of z and t
def z_and_t_area(path,meta_number=None, cycle_vm=True,crop_img=False,print_number=5,edge=False,binary=False,
image_channel=1,rescale_factor=255,meta_stage_loop=True,t_sample=1,show=True,
rebin=True,compute_height=False,canny_range=.33,
hist_cutoff=0.01,manual_thresh=False,m_thresh_high=1000,m_thresh_low=100):
# start javabridge
if cycle_vm:
javabridge.start_vm(class_path=bioformats.JARS)
# read in metadata using bioformats and make ararys for t ans z slices
metadata=v.extract_metadata(path,cycle_vm=False,meta_number=meta_number,stage_loop=meta_stage_loop)
z_slices=np.arange(0,metadata['size_Z'])
z_slices_scaled=z_slices*float(metadata['relative step width'])
t_slices=np.arange(0,metadata['size_T']-1)
t_slices=t_slices[::t_sample]
t_slices_scaled=t_slices*float(metadata['cycle time'])
# declare dataframes to store analysis results in
area_df=pd.DataFrame()
if compute_height:
height_df=pd.DataFrame()
# crop image option to do if specified
if crop_img:
# determine cropping bounds
img=bioformats.load_image(path=path,z=z_slices[3],t=t_slices[-1],series=0)
img = (img * rescale_factor).astype('uint8')
bounds = get_bounds(img,scale=0.4)
print_thresh=np.linspace(0,100,print_number)
print_count=0
plot_grid_count=1
finished_plotting=False
# iterate through time slices, stop before the last one
for i in range(len(t_slices)-1):
percent=t_slices[i]/(len(t_slices)-1)
if percent>print_thresh[print_count]:
# print('\tt: {:.2f} S'.format(t_slices_scaled[i]))
print_count+=1
# declare surface_area array outside of z-stack loop
area=np.zeros(len(z_slices))
for j in range(len(z_slices)):
# read image
img=bioformats.load_image(path=path,z=z_slices[j],t=t_slices[i],
rescale=False)
# if the image consists of multiple channels take just one channel
if len(np.shape(img))>2:
img=img[:,:,image_channel]
equalize_hist=exposure.equalize_adapthist(img,clip_limit=hist_cutoff,nbins=65535)
ratio=np.amax(equalize_hist)/65535
equalize_hist=np.round(equalize_hist/ratio,0).astype(int)
# if rebin is specified rebin image using 2x2 binning
if rebin:
equalize_hist=bin2d(img,2)
if crop_img:
crop=equalize_hist[int(bounds[1]):int(bounds[1] + bounds[3]), int(bounds[0]):int(bounds[0] + bounds[2])]
else:
crop=equalize_hist
# threshold image
if edge:
bw=edge_and_fill(crop,canny_range=canny_range)
else:
if manual_thresh:
bw=thresh_and_fill(img,low=m_thresh_low)
else:
bw=auto_threshold(img,'triangle')
# if compute height specified and on the firt step of loop make a zeros array
# the length and width of the image and height of the z_stacks number
if compute_height:
if j==0:
img_shape=np.shape(img)
img_stack=np.zeros([img_shape[0],img_shape[1],len(z_slices)])
img_stack[:,:,j]=bw
# subpart of program to show images comparing plots andbin
if show:
f1=plt.figure()
z_count=0
t_count=0
count_satisfied,t_count,z_count=show_handler(i,len(t_slices),
j,len(z_slices),
t_count,z_count)
if count_satisfied and not finished_plotting:
plt.subplot(6,6,plot_grid_count)
plt.imshow(equalize_hist)
plt.axis('off')
plt.subplot(6,6,plot_grid_count+1)
plt.imshow(bw)
plt.axis('off')
plot_grid_count+=2
if plot_grid_count>=36:
finished_plotting=True
f1.show()
# calculate area of thresholded image
if rebin:
scale=metadata['scale']**2
else:
scale=metadata['scale']**2
area[j]=get_area(bw,scale)
if compute_height:
average_height,length=calc_height(img_stack,scale,metadata['relative step width'])
loop_height=pd.DataFrame({
'Length (um)':length,
'Height (um)':average_height,
'time (s)':[t_slices_scaled[i]]*len(length)
})
height_df=height_df.append(loop_height,ignore_index=True)
# store in datafame along with other imporant variables
loop_area_df=pd.DataFrame({'Thrombus Area (um^2)':area,
'Z loc (um)':z_slices_scaled,
'time (s)':[t_slices_scaled[i]]*len(area)
})
area_df=area_df.append(loop_area_df,ignore_index=True)
if cycle_vm:
javabridge.kill_vm()
if compute_height:
return area_df,height_df
else:
return area_df
# compute height as a func of length
def calc_height(img_stack,scale,z_scale):
dims=np.shape(img_stack)
height=np.zeros((dims[0],dims[1]))
avg_height=np.zeros(dims[0])
for i in range(dims[0]):
for j in range(dims[1]):
img_slice=img_stack[i,j,:]
height[i,j]=np.max(np.where(img_slice))*z_scale
avg_height=np.mean(height,axis=0)
total_length=dims[0]*scale
length_array=np.linspace(0,total_length,dims[0])
return avg_height,length_array
def bin2d(a,K):
m_bins = a.shape[0]//K
n_bins = a.shape[1]//K
new_img=a.reshape(m_bins, K, n_bins, K).sum(3).sum(1)
return new_img
def show_handler(i,t_slices,j,z_slices,t_count,z_count,
z_regions=[1,50,100],t_points=[25,50,90]):
count_satisfied=False
t_percent=i/t_slices*100
z_percent=j/z_slices*100
if t_percent>t_points[t_count] and z_percent>z_regions[z_count]:
count_satisfied=True
t_count+=1
z_count+=1
return count_satisfied, t_count,z_count
# function to get calculat the area of a thresholded image
def get_area(bw,scale):
# get contours from thresholded image
# bw=bw.astype('uint8')
# contours, _ = cv2.findContours(bw, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# # calculate the surface area of each contour
# area= [cv2.contourArea(c)*scale**2 for c in contours]
# take the sum and return
area_tot=np.sum(bw)*scale
return area_tot
# autothresholding function
# - works by passing in a bw image and the filter_type
# - Filter options are seen below in the if statements
def auto_threshold(image,filter_type):
if filter_type=='otsu':
filt= f.threshold_otsu(image)
bw= image>filt
bw=bw.astype(int)
elif filter_type=='isodata':
filt= f.threshold_isodata(image)
bw= image>filt
bw=bw.astype(int)
elif filter_type=='triangle':
filt= f.threshold_triangle(image)
bw= image>filt
bw=bw.astype(int)
elif filter_type=='entropy':
filt= f.threshold_li(image)
bw= image>filt
bw=bw.astype(int)
elif filter_type=='mixed':
snr=np.mean(image)/np.std(image)
if snr>2:
filt= f.threshold_triangle(image)
else:
filt= f.threshold_otsu(image)
bw= image>filt
bw=bw.astype(int)
return bw
# Manual thresholding function
def thresh_and_fill(img,low=125):
low_pass=img>low
# high_pass=low_pass<high
# fill holes
filled=binary_fill_holes(low_pass)
# cast as 8 bit agin
filled = np.array(filled,dtype=np.uint8)
return filled
def edge_and_fill(img,canny_range=0.99):
edges=auto_canny(img,sigma=canny_range)
# Dilate edges to make unconnected lines connected
# Kernel to dialiate the image later on and fill the holes. Declared outside of the loop to save on processing power
kernel = np.ones((3, 3), np.uint8)
dilate = skimage.morphology.dilation(edges, selem=kernel)
# Fill them holes
fill = binary_fill_holes(dilate).astype(int)
# Lines of code I found online to fill in stuff around the edges better
labels = skimage.morphology.label(fill)
labelCount = np.bincount(labels.ravel())
background = np.argmax(labelCount)
fill[labels != background] = 1
return fill
def auto_canny(image, sigma=0.33,std_eval=False,correct_fact=1):
float_img=np.float32(image)
blurred = cv2.GaussianBlur(float_img, (3, 3),0)
#compute the median of the single channel pixel intensities
v = np.mean(blurred)
std=np.std(blurred)
mean_calc=v
std_calc=std
snr=mean_calc/std_calc
# if snr>3:
# correct_fact=1
# apply automatic Canny edge detection using the computed median
if std_eval:
lower = int((v-2*std)*correct_fact)
upper = int((v+2*std)*correct_fact*correct_fact)
else:
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(65535, (1.0 + sigma) * v))
edged =feature.canny(blurred, low_threshold=lower, high_threshold=upper)
return edged
# Function that uses crop function to get bounds of the image
def get_bounds(img,scale=0.4):
# resize image so it fits on screen when cropping
dims = list(np.shape(img))
dims = [float(d) for d in dims]
adj_dims = [int(np.rint(scale * d)) for d in dims]
adj_dims = tuple(adj_dims)
img_scale = cv2.resize(img, dsize=adj_dims, interpolation=cv2.INTER_CUBIC)
# crop image and return the bounds
bounds = crop_img(img_scale, get_bounds=True)
bounds=list(bounds) # convert to list for operations
# rescale bounds based on rescale factor
for i in range(len(bounds)):
bounds [i] = int(np.rint(bounds[i]/scale))
return bounds
# Function for cropping image
def crop_img(img,get_bounds=False,scale=1):
# Select ROI
r = cv2.selectROI(img)
cv2.destroyAllWindows()
if get_bounds==True:
return r
else:
# Crop image
im_crop = img[int(r[1]):int(r[1] + r[3]), int(r[0]):int(r[0] + r[2])]
return im_crop
def get_v_metrics(df,show_linear=False,max_t_lag=5*60,t_lag_level=10):
# get min and max surface coverages
maxSC = np.max(df['Thrombus Volume (um^3)'])
minSC = np.min(df['Thrombus Volume (um^3)']) + 0.000001
# resample series to finer grain for better analysis
x,y=interpolate_series(df)
# get index of 80% val of max
max_index=get_max_index(x,y)
# get lag time and update certain indices if need be
t_lag,t_lag_index,max_index=get_lag_time(x,y,t_lag_level,max_t_lag,max_index,maxSC)
# Get slope of linear region
slope,intercept=get_slope(x,y,t_lag_index,max_index)
if show_linear:
# Plot raw data
f2=plt.figure(2)
y_linear = x[t_lag_index:max_index] * slope + intercept
plt.plot(x[t_lag_index:max_index], y_linear, '-r', label='Linear Fit', linewidth=2)
plt.plot(df['time (s)'], df['Thrombus Volume (um^3)'], 'ob', label='Experimental Data',alpha=0.5)
plt.axvline(x=t_lag, color='k', linestyle='--', label='T-lag')
plt.axhline(y=maxSC, color='g', linestyle='--', label='Max')
plt.xlabel('time (s)')
plt.ylabel('Thrombus Volume (um^3)')
plt.title('Experimental Data and Fitted Kinetic Metrics')
plt.legend()
plt.show()
# Store data using pandas
SC_values = pd.DataFrame([{'V T-lag': t_lag, 'V Max': maxSC, 'V Slope': slope}])
return SC_values
def get_slope(x,y,t_lag_index,max_index,metric='max_slope',min_fit=0.5):
min_fit_length = int(round(min_fit*(len(x)+1)))
if max_index>=100:
iterator=int(round(len(x)/100))
else:
iterator=1
# Now the code chooses the slope on the criteria of either:
# - getting the largest possible slope of a line that is min_fit*the fitting length
if metric=='max_slope':
coefs=max_slope(x,y,t_lag_index,max_index,iterator,min_fit_length)
# - or by getting the most linear region for the fitting space
if metric=='best_fit':
# Search on the interval from t-lag to 2% from the max value
coefs=chi_squared_min(x,y,t_lag_index,max_index,iterator,min_fit_length)
slope=coefs[0]
# no negative slopes!
if slope<0: slope =0
try:
coefs[1]
except:
intercept=0
else:
intercept=coefs[1]
return slope,intercept
def max_slope(x,y,t_lag_index,max_index,iterator,min_fit_length=25):
x_resample=np.arange(t_lag_index,max_index)
y_resample=np.interp(x_resample,x,y)
grad=np.gradient(y_resample,x_resample)
slope=np.max(grad)
intercept=y[t_lag_index]
coefs=np.array([slope,intercept])
return coefs
def chi_squared_min(x,y,t_lag_index,max_index,iterator,min_fit_length):
chi_min=1E-3
for i in range(t_lag_index, max_index ,iterator):
for j in range(i+min_fit_length, max_index,iterator):
# If the array is zero
if not x[i:j].size or not y[i:j].size:
print ('Array Empty in Loop')
print (i,j)
elif not np.absolute(j-i)<min_fit_length:
coefs_loop = np.polyfit(x[i:j], y[i:j], 1)
y_linear = x * coefs_loop[0] + coefs_loop[1]
chi = 0
for k in range(i, j):
chi += (y_linear[k] - y[k]) ** 2
if chi < chi_min:
i_best = i
j_best = j
chi_min = chi
# print 'Chi-min: '+str(chi_min)
# print 'Chi:'+str(chi)
if not x[i_best:j_best].size or not y[i_best:j_best].size:
print('Array Empty Outside of Loop')
print(i_best,j_best)
coefs=[float('nan')]
else:
coefs = np.polyfit(x[i_best:j_best], y[i_best:j_best], 1)
return coefs
# Get index for first data point 2% away from the max
def get_lag_time(x,y,t_lag_level,max_t_lag,max_index,maxSC):
y=y-y[0]
t_lag_index=None
for i in range(len(y)):
# If the surface coverage is greater or equal 5% then the time at that point is stored and the loop ends
if round(y[i], 2) >= t_lag_level:
t_lag = x[i]
t_lag_index = i
break
# if a lag time wasn't picked up auto assing the maximum
if t_lag_index == None:
t_lag=max_t_lag
t_lag_index=0
# If the lag time is huge, change the index we pass the slope fitter
if t_lag_index>0.6*len(x):
t_lag_index=0
return t_lag,t_lag_index,max_index
# Get index for first data point 20% away from the max
def get_max_index(x,y,tolerance=0.2):
max_index=-1
max_SC=np.max(y)
for i in range(len(y)):
difference = np.absolute((y[i] - max_SC) / max_SC)
if difference <= tolerance:
max_index = i
break
if max_SC<100:
max_index=len(y)+1
return max_index
def interpolate_series(df,interp_interval=1):
# Get index
interval=df['time (s)'][1]-df['time (s)'][0]
start=np.min(df['time (s)'])
stop=np.max(df['time (s)'])+interval
x=np.arange(start,stop,1)
y=np.interp(x,df['time (s)'],df['Thrombus Volume (um^3)'])
return x,y