def read_data_generator(masked_file, trajectories_data): roi_generator = generateMoviesROI(masked_file, trajectories_data) for worms_in_frame in roi_generator: for ind, roi_dat in worms_in_frame.items(): row_data = trajectories_data.loc[ind] worm_img, roi_corner = roi_dat skeleton_id = int(row_data['skeleton_id']) yield (ind, worm_img, roi_corner, skeleton_id)
def indentifyValidWorms(masked_file, trajectories_data, model_path, frame_subsampling): ''' Use a pre-trained nn to identify blobs that correspond to worms or worm aggregates frame_subsamplig - number of frames skipped. We do not need to calculate in every frame. A value of near the number of fps is sensible. ''' if model_path.endswith('.pth'): is_pytorch = True model = load_pytorch_model(model_path) roi_size = MODEL_ROIS_TRAINED_SIZE else: is_pytorch = False model = load_model(model_path) roi_size = model.input_shape[2] proba_func = partial(getWormProba, roi_size=roi_size, model=model, is_pytorch=is_pytorch) frame_numbers = trajectories_data['frame_number'].unique() frame_numbers = frame_numbers[::frame_subsampling] trajectories_data_rec = trajectories_data[ trajectories_data['frame_number'].isin(frame_numbers)].copy() base_name = masked_file.rpartition('.')[0].rpartition(os.sep)[-1] progress_prefix = base_name + ' Identifying valid worm trajectories.' #get generators to get the ROI and calculate the worm probabilities from them ROIs_generator = generateMoviesROI(masked_file, trajectories_data_rec, roi_size=roi_size, progress_prefix=progress_prefix) worm_probs_gen = map(proba_func, ROIs_generator) #here we really execute the code out_per_frame = [x for x in worm_probs_gen] #pull all the outputs into a nice format and add the results into the table indexes, worm_probs = [np.concatenate(x) for x in zip(*out_per_frame)] trajectories_data_rec['worm_prob'] = pd.Series(worm_probs, indexes) worm_ind_prob = trajectories_data_rec.groupby( 'worm_index_joined').aggregate({'worm_prob': np.median})['worm_prob'] valid_worms_indexes = worm_ind_prob.index[worm_ind_prob > 0.5] return valid_worms_indexes
def getBlobsFeats(skeletons_file, masked_image_file, strel_size): # extract the base name from the masked_image_file. This is used in the # progress status. base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1] progress_prefix = base_name + ' Calculating individual blobs features.' #read trajectories data with pandas with pd.HDFStore(skeletons_file, 'r') as ske_file_id: trajectories_data = ske_file_id['/trajectories_data'] with tables.File(skeletons_file, 'r') as ske_file_id: dd = ske_file_id.get_node('/trajectories_data') is_light_background = dd._v_attrs['is_light_background'] expected_fps = dd._v_attrs['expected_fps'] bgnd_param = dd._v_attrs['bgnd_param'] bgnd_param = json.loads(bgnd_param.decode("utf-8")) #get generators to get the ROI for each frame ROIs_generator = generateMoviesROI(masked_image_file, trajectories_data, bgnd_param=bgnd_param, progress_prefix=progress_prefix) def _gen_rows_blocks(): block_size = 1000 #use rows for the ROIs_generator, this should balance the data in a given tread block = [] for roi_dicts in ROIs_generator: for irow, (roi_image, roi_corner) in roi_dicts.items(): block.append((irow, (roi_image.copy(), roi_corner))) if len(block) == block_size: yield block block = [] if len(block) > 0: yield block def _roi2feats(block): #from a output = [] for irow, (roi_image, roi_corner) in block: row_data = trajectories_data.loc[irow] blob_mask, blob_cnt, _ = getWormMask( roi_image, row_data['threshold'], strel_size, min_blob_area=row_data['area'] / 2, is_light_background=is_light_background) feats = _getBlobFeatures(blob_cnt, blob_mask, roi_image, roi_corner) output.append((irow, feats)) return output # initialize output data as a numpy recarray (pytables friendly format) feats_names = [ 'coord_x', 'coord_y', 'area', 'perimeter', 'box_length', 'box_width', 'quirkiness', 'compactness', 'box_orientation', 'solidity', 'intensity_mean', 'intensity_std', 'hu0', 'hu1', 'hu2', 'hu3', 'hu4', 'hu5', 'hu6' ] features_df = np.recarray(len(trajectories_data), dtype=[(x, np.float32) for x in feats_names]) feats_generator = map(_roi2feats, _gen_rows_blocks()) for block in feats_generator: for irow, row_dat in block: features_df[irow] = row_dat with tables.File(skeletons_file, 'r+') as fid: if '/blob_features' in fid: fid.remove_node('/blob_features') fid.create_table('/', 'blob_features', obj=features_df, filters=TABLE_FILTERS) assert all(x in feats_names for x in fid.get_node('/blob_features').colnames)
def trajectories2Skeletons(skeletons_file, masked_image_file, resampling_N=49, min_blob_area=50, strel_size=5, worm_midbody=(0.35, 0.65), analysis_type="WORM", skel_args = {'num_segments' : 24, 'head_angle_thresh' : 60} ): #get the index number for the width limit midbody_ind = (int(np.floor( worm_midbody[0]*resampling_N)), int(np.ceil(worm_midbody[1]*resampling_N))) #read trajectories data with pandas with pd.HDFStore(skeletons_file, 'r') as ske_file_id: trajectories_data = ske_file_id['/trajectories_data'] # extract the base name from the masked_image_file. This is used in the # progress status. base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1] progress_prefix = base_name + ' Calculating skeletons.' # open skeleton file for append and #the compressed videos as read with tables.File(skeletons_file, "r+") as ske_file_id: #attribute useful to understand if we are dealing with dark or light worms bgnd_param = ske_file_id.get_node('/trajectories_data')._v_attrs['bgnd_param'] bgnd_param = json.loads(bgnd_param.decode("utf-8")) is_light_background = ske_file_id.get_node('/trajectories_data')._v_attrs['is_light_background'] if len(bgnd_param) > 0: #invert (at least if is_light_background is true) is_light_background = not is_light_background #get generators to get the ROI for each frame ROIs_generator = generateMoviesROI(masked_image_file, trajectories_data, bgnd_param = bgnd_param, progress_prefix = progress_prefix) # add data from the experiment info (currently only for singleworm) with tables.File(skeletons_file, "r") as mask_fid: if '/experiment_info' in ske_file_id: ske_file_id.remove_node('/', 'experiment_info') if '/experiment_info' in mask_fid: dd = mask_fid.get_node('/experiment_info').read() ske_file_id.create_array('/', 'experiment_info', obj=dd) #initialize arrays to save the skeletons data tot_rows = len(trajectories_data) skel_arrays, has_skeleton = _initSkeletonsArrays(ske_file_id, tot_rows, resampling_N, worm_midbody) # dictionary to store previous skeletons prev_skeleton = {} for worms_in_frame in ROIs_generator: for ind, roi_dat in worms_in_frame.items(): row_data = trajectories_data.loc[ind] worm_img, roi_corner = roi_dat skeleton_id = int(row_data['skeleton_id']) # get the previous worm skeletons to orient them worm_index = row_data['worm_index_joined'] if worm_index not in prev_skeleton: prev_skeleton[worm_index] = np.zeros(0) if analysis_type == "ZEBRAFISH": output = _zebra_func(worm_img, skel_args, resampling_N) else: _, worm_cnt, _ = getWormMask(worm_img, row_data['threshold'], strel_size, min_blob_area=row_data['area'] / 2, is_light_background = is_light_background) # get skeletons output = getSkeleton(worm_cnt, prev_skeleton[worm_index], resampling_N, **skel_args) if output is not None and output[0].size > 0: skeleton, ske_len, cnt_side1, cnt_side2, cnt_widths, cnt_area = output prev_skeleton[worm_index] = skeleton.copy() #mark row as a valid skeleton has_skeleton[skeleton_id] = True # save segwrom_results skel_arrays['skeleton_length'][skeleton_id] = ske_len skel_arrays['contour_width'][skeleton_id, :] = cnt_widths mid_width = np.median(cnt_widths[midbody_ind[0]:midbody_ind[1]+1]) skel_arrays['width_midbody'][skeleton_id] = mid_width # convert into the main image coordinates skel_arrays['skeleton'][skeleton_id, :, :] = skeleton + roi_corner skel_arrays['contour_side1'][skeleton_id, :, :] = cnt_side1 + roi_corner skel_arrays['contour_side2'][skeleton_id, :, :] = cnt_side2 + roi_corner skel_arrays['contour_area'][skeleton_id] = cnt_area
from pathlib import Path import pandas as pd from tierpsy.analysis.ske_create.helperIterROI import generateMoviesROI mask_file = Path( '/Users/avelinojaver/OneDrive - Nexus365/worms/Bertie_movies/CX11314_Ch1_04072017_103259.hdf5' ) root_dir = '/Users/avelinojaver/OneDrive - Nexus365/worms/Bertie_movies/' for mask_file in list(Path(root_dir).glob('*.hdf5')): skeletons_file = mask_file.parent / 'Results' / (mask_file.stem + '_skeletons.hdf5') with pd.HDFStore(str(skeletons_file), "r") as ske_file_id: #attribute useful to understand if we are dealing with dark or light worms bgnd_param = ske_file_id.get_node( '/plate_worms')._v_attrs['bgnd_param'] bgnd_param = json.loads(bgnd_param.decode("utf-8")) print(bgnd_param) #%% ROIs_generator = generateMoviesROI(masked_image_file, trajectories_data, bgnd_param=bgnd_param, progress_prefix='') for frame_props in ROIs_generator: break
""" Created on Thu Dec 17 21:57:30 2015 @author: ajaver """ import pandas as pd import matplotlib.pylab as plt from tierpsy.analysis.ske_create.helperIterROI import generateMoviesROI, getROIfromInd masked_file = "/Volumes/behavgenom_archive$/Avelino/screening/CeNDR/MaskedVideos/CeNDR_Exp_250417/BRC20067_worms10_food1-3_Set4_Pos5_Ch2_25042017_140346.hdf5" skeletons_file = masked_file.replace('MaskedVideos', 'Results').replace('.hdf5', '_skeletons.hdf5') with pd.HDFStore(skeletons_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] roi_generator = generateMoviesROI(masked_file, trajectories_data) frame_data = next(roi_generator) for row in frame_data: img_roi, roi_corner = frame_data[row] plt.figure() plt.imshow(img_roi, interpolation=None, cmap='gray') #import h5py #import pandas as pd #import numpy as np #from skimage.filters import threshold_otsu #from scipy.signal import medfilt
def _process_file(row): experiment_id, skel_file = row mask_file = skel_file.replace(results_dir, mask_dir).replace(traj_ext, '.hdf5') with pd.HDFStore(skel_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] trajectories_data['roi_index'] = np.float32(-1) #reduce the table to save trajectories larger than worm_index_joined traj_sizes = trajectories_data['worm_index_joined'].value_counts() valid_ind = traj_sizes[traj_sizes > min_num_frames].index good = trajectories_data['worm_index_joined'].isin(valid_ind) trajectories_data = trajectories_data[good] #lets sort by worm_index, and timeframe so the ROIs are stored in a friendlier way trajectories_data_r = trajectories_data[valid_columns] trajectories_data_r = trajectories_data_r.sort_values(by = ['worm_index_joined', 'frame_number']) tot_rows = len(trajectories_data_r) trajectories_data_r['roi_index'] = np.arange(tot_rows, dtype=np.int32) #make file to save video roi_file = mask_file.replace(mask_dir, roi_dir).replace('.hdf5', '_ROIs.hdf5') dname = os.path.dirname(roi_file) if not os.path.exists(dname): os.makedirs(dname) if os.path.exists(roi_file): try: with tables.File(roi_file, 'r') as fid: has_finished = fid.get_node('/mask')._v_attrs['has_finished'] if has_finished == 1: return except (OSError, tables.exceptions.HDF5ExtError, KeyError): pass except Exception as e: print(type(e)) raise e with tables.File(roi_file, 'w') as fid_roi: rois_c = fid_roi.create_carray('/', 'mask', atom = tables.UInt8Atom(), shape = (tot_rows, roi_size, roi_size), chunkshape = (25, roi_size, roi_size), filters = TABLE_FILTERS ) rois_c._v_attrs['has_finished'] = 0 fid_roi.create_table('/', "trajectories_data", trajectories_data_r.to_records(index=False), filters = TABLE_FILTERS) progress_prefix = '{} of {} | {}'.format(experiment_id + 1, len(fnames), os.path.basename(mask_file)) gen = generateMoviesROI(mask_file, trajectories_data, roi_size, progress_prefix = progress_prefix) for worms_in_frame in gen: for row_ind, (roi_img, roi_corner) in worms_in_frame.items(): roi_img, roi_corner = pad_if_necessary(roi_img, roi_corner, roi_size) roi_index = trajectories_data_r.loc[row_ind, 'roi_index'] rois_c[roi_index] = roi_img rois_c._v_attrs['has_finished'] = 1
def get_egg_probabilities(masked_file, trajectories_data, model, roi_size=-1, progress_prefix=''): #%% #%% tot_frames = trajectories_data['frame_number'].max() + 1 progress_prefix = progress_prefix + ' Searching egg events' ROIs_generator = generateMoviesROI(masked_file, trajectories_data, roi_size=roi_size, progress_prefix=progress_prefix) roi_model = model.input_shape[1] buff_size = model.input_shape[-1] output_size = model.output_shape[-2] assert model.output_shape[-1] == 2 worm_buff = [] seq_dat = [] worm_probs = np.full((tot_frames, output_size, 2), np.nan) for worms_in_frame in ROIs_generator: assert len( worms_in_frame) == 1 #we are only dealing with one worm case for ind, (worm_img, roi_corner) in worms_in_frame.items(): row_data = trajectories_data.loc[ind] frame_number = row_data['frame_number'] worm_img = _fix_padding(worm_img, roi_corner, row_data['roi_size']) if len(worm_buff) < buff_size: worm_buff.append(worm_img) else: worm_buff = worm_buff[1:] + [worm_img] worm_seq = np.array(worm_buff, np.float32) worm_seq = normalize_seq(worm_seq, channel_axis=0) if worm_img.shape[0] != roi_model: worm_seq = [ cv2.resize(x, (roi_model, roi_model)) for x in worm_seq ] worm_seq = np.rollaxis(np.array(worm_seq), 0, 3) #worm_seq = shift_and_normalize(worm_seq) seq_dat.append((frame_number - 1, worm_seq)) if (len(seq_dat) + 1) % 100 == 0: frame_numbers, worm_seq_batch = map(np.array, zip(*seq_dat)) worm_prob_batch = model.predict(worm_seq_batch, verbose=0) worm_probs[frame_numbers] = worm_prob_batch seq_dat = [] #return worm_probs if len(seq_dat) > 0: frame_numbers, worm_seq_batch = map(np.array, zip(*seq_dat)) worm_prob = model.predict(worm_seq_batch, verbose=0) worm_probs[frame_numbers] = worm_prob return worm_probs