def getFoodContour(mask_file, skeletons_file, use_nn_food_cnt, model_path, solidity_th=0.98, _is_debug = False ): base_name = get_base_name(mask_file) progress_timer = TimeCounter('') print_flush("{} Calculating food contour {}".format(base_name, progress_timer.get_time_str())) food_cnt = calculate_food_cnt(mask_file, use_nn_food_cnt = use_nn_food_cnt, model_path = model_path, solidity_th= solidity_th, _is_debug = _is_debug) #store contour coordinates into the skeletons file and mask_file the contour file for fname in [skeletons_file, mask_file]: with tables.File(fname, 'r+') as fid: if '/food_cnt_coord' in fid: fid.remove_node('/food_cnt_coord') #if it is a valid contour save it if food_cnt is not None and \ food_cnt.size >= 2 and \ food_cnt.ndim == 2 and \ food_cnt.shape[1] == 2: tab = fid.create_array('/', 'food_cnt_coord', obj=food_cnt) tab._v_attrs['use_nn_food_cnt'] = int(use_nn_food_cnt)
def get_food_contour(mask_video, min_area=None, n_bins=180, frac_lowess=0.1, is_debug=False): ''' Identify the contour of a food patch. I tested this for the worm rig. It assumes the food has a semi-circular shape. The food lawn is very thin so the challenge was to estimate the contour of a very dim area. ''' #%% progress_timer = TimeCounter('') base_name = get_base_name(mask_video) print_flush('{} Calculating food contour...'.format(base_name)) try: with tables.File(mask_video, 'r') as fid: full_data = fid.get_node( '/full_data' )[:5] # I am using the first two images to calculate this info except tables.exceptions.NoSuchNodeError: return None, None img = np.max(full_data[:2], axis=0) #dark_mask = get_dark_mask(full_data) mask = get_patch_mask(img, min_area=min_area) circx, circy, best_fit = mask_to_food_contour(mask, n_bins=n_bins, frac_lowess=frac_lowess) #%% dd = '{} Food contour calculated. Total time: {}'.format( base_name, progress_timer.get_time_str()) print_flush(dd) #%% if is_debug: from skimage.draw import circle_perimeter import matplotlib.pylab as plt cpx, cpy = circle_perimeter(*best_fit[1:]) plt.figure(figsize=(5, 5)) plt.gca().xaxis.set_ticklabels([]) plt.gca().yaxis.set_ticklabels([]) (px, py) = np.where(skeletonize(mask)) plt.imshow(img, cmap='gray') plt.plot(py, px, '.') plt.plot(cpx, cpy, '.r') plt.plot(circy, circx, '.') plt.suptitle(base_name) plt.grid('off') #%% return circx, circy
def fix_wrong_merges(mask_video, skeletons_file, min_area_limit=50): #get the trajectories table base_name = get_base_name(skeletons_file) #%% #trajectories_data = rejoin_traj(skeletons_file, base_name) with pd.HDFStore(skeletons_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] trajectories_data_f = trajectories_data.copy() trajectories_data_f['worm_index_auto'] = trajectories_data_f['worm_index_joined'] #%% print_flush(base_name + ' Spliting wrong merge events.') max_n_iter = 10 #I do a few iterations because sometimes new trajectories are form when previous ones are splitted splitted_points = {} for ii in range(max_n_iter): #%% trajectories_data_f = filter_table_by_area(trajectories_data_f, worm_index_type = 'worm_index_auto', min_area_limit = min_area_limit ) #%% worm_index_new, points2split = split_trajectories(mask_video, trajectories_data_f, worm_index_type='worm_index_auto', min_area_limit=25, min_area_ratio=0.5, buf_size=11 ) #%% trajectories_data['worm_index_auto'] = np.int32(-1) #like that it force the unassigned indexes to be empty trajectories_data.loc[worm_index_new.index, 'worm_index_auto'] = worm_index_new.values #%% #print(points2split) if len(points2split) == 0: break else: for x in points2split: if not x in splitted_points: splitted_points[x] = [] splitted_points[x] += points2split[x] #%% return trajectories_data, points2split
def get_indexes_for_training(csv_dir, thresh=0.99, win_size=5): fnames = glob.glob(os.path.join(csv_dir, '*.csv')) def _group_by_index(inds): gg = [] n_groups = 0 curr = np.nan for gi in sorted(inds): if np.isnan(curr): gg.append([gi]) curr = gi n_groups += 1 elif gi - curr < 3: gg[n_groups - 1].append(gi) curr = gi else: curr = np.nan return gg X = [] Y = [] BN = [] for ii, fname in enumerate(fnames): bad_inds = [] base_name = get_base_name(fname) print(base_name) results = pd.read_csv(fname, index_col=0) pred_inds = np.where(results['egg_prob'] > thresh)[0] real_inds = np.where(results['true_events'] > 0)[0] dist_d = np.abs(pred_inds - real_inds[:, np.newaxis]) bad = np.min(dist_d, axis=1) > 1 bad_pred = pred_inds[np.any(dist_d > 1, axis=0)] if np.any(bad): gg = _group_by_index(bad_pred) bad_inds = [int(np.median(x)) for x in gg] indexes2add = bad_inds + list(real_inds) dat = _get_indexes(indexes2add, results['true_events'], win_size) xx, yy = zip(*dat) BN += [base_name[:-5]] * len(xx) X += xx Y += yy return BN, X, Y
def orient_pharinx(masked_file, skeletons_file=None, n_batch=1): if skeletons_file is None: skeletons_file = masked_file.replace('MaskedVideos', 'Results').replace( '.hdf5', '_skeletons.hdf5') base_name = get_base_name(masked_file) progress_prefix = base_name + ' Calculating skeletons.' with pd.HDFStore(skeletons_file, 'r') as ske_file_id: trajectories_data = ske_file_id['/trajectories_data'] frame_generator = generateMoviesROI(masked_file, trajectories_data, progress_prefix=progress_prefix) ROIs_generator = generateIndividualROIs(frame_generator, trajectories_data) tot = 0 with tables.File(skeletons_file, "r+") as ske_file_id: tot_rows = len(trajectories_data) skeletons, has_skeleton = init_data(ske_file_id, tot_rows) batch_input = [] for input_data in ROIs_generator: if n_batch <= 1: #run a no parallelize version of the code and do not save the data into the hdf5 _process_row(input_data) tot += 1 else: batch_input.append(input_data) if len(batch_input) >= n_batch: p = mp.Pool(processes=n_batch) output = list(p.map(_process_row, batch_input)) skeletons_id, peaks_coords = zip(*output) skeletons_id, peaks_coords = map(np.array, zip(*output)) skeletons[skeletons_id, :, :] = peaks_coords for ind in skeletons_id: has_skeleton[ind] = True batch_input = [] tot += 1
def get_train_test_files(prev_results=[]): #%% food_root = '/Users/ajaver/OneDrive - Imperial College London/food/' valid_files = glob.glob(os.path.join(food_root, 'segmentation', '**', '*_res.png'), recursive=True) base_names_f = [ os.path.basename(x).replace('_res.png', '') for x in valid_files ] all_mask_dir = [ '/Volumes/behavgenom_archive$/Avelino/screening/CeNDR/MaskedVideos/', '/Users/ajaver/OneDrive - Imperial College London/optogenetics/', '/Volumes/behavgenom_archive$/Avelino/screening/Development/MaskedVideos/', '/Volumes/behavgenom_archive$/Avelino/Worm_Rig_Tests/' ] food_train_dir = os.path.join(food_root, 'train_set_corr') if not os.path.exists(food_train_dir): os.makedirs(food_train_dir) mask_files = [] for mask_dir in all_mask_dir: fnames = glob.glob(os.path.join(mask_dir, '**', '*.hdf5'), recursive=True) fnames = [ x for x in fnames if not any(x.endswith(ext) for ext in RESERVED_EXT) ] mask_files += fnames bnames = [(x, get_base_name(x)) for x in mask_files] bnames = [x for x in bnames if x[1] not in prev_results] train_files = [x for x, bn in bnames if bn in base_names_f] test_files = [x for x, bn in bnames if not bn in base_names_f] return train_files, test_files
if __name__ == '__main__': save_plot_dir = os.path.join('.', 'plots') if not os.path.exists(save_plot_dir): os.makedirs(save_plot_dir) main_dir = './example/' feat_files = glob.glob(os.path.join(main_dir, '**', '*_features.hdf5'), recursive=True) feats2plot = None for feat_file in feat_files: print(feat_file) segworm_feat_file = feat_file.replace('.hdf5', '.mat') basename = get_base_name(feat_file) pdf_file = os.path.join(save_plot_dir, basename + '_feat_comparison.pdf') feats_reader = FeatsReaderComp(feat_file, segworm_feat_file) tierpsy_feats = feats_reader.read_plate_features() segworm_feats = feats_reader.read_feats_segworm() tierpsy_feats, segworm_feats = \ save_features_pdf(tierpsy_feats, segworm_feats, pdf_file, feats2plot=feats2plot)
bgnd_s = [ rescale(x, resize_factor, mode='constant') * 255 for x in bgnd ] input_size, output_size, pad_size, tile_corners = get_sizes( bgnd_s[0].shape) for b_img in bgnd_s: Y_pred = [] for mod in [model_patch, model_border]: yy = background_prediction(b_img, mod, n_flips=1, n_tiles=4) Y_pred.append(yy) if IS_PLOT: import matplotlib.pylab as plt n_rows = len(Y_pred) + 1 plt.figure() plt.subplot(1, n_rows, 1) plt.imshow(b_img, cmap='gray') for irow, yy in enumerate(Y_pred): plt.subplot(1, n_rows, irow + 2) plt.imshow(yy, interpolation='none') print(time.time() - tic) if SAVE_RESULTS: result = np.stack([b_img] + Y_pred) bn = get_base_name(mask_file) sname = os.path.join(save_dir, bn + '_food.npy') np.save(sname, result)
def getFoodFeatures(mask_file, skeletons_file, features_file=None, cnt_method='NN', solidity_th=0.98, batch_size=100000, _is_debug=False): if features_file is None: features_file = remove_ext(skeletons_file) + '_featuresN.hdf5' base_name = get_base_name(mask_file) progress_timer = TimeCounter('') print_flush("{} Calculating food features {}".format( base_name, progress_timer.get_time_str())) food_cnt = calculate_food_cnt(mask_file, method=cnt_method, solidity_th=solidity_th, _is_debug=_is_debug) microns_per_pixel = read_microns_per_pixel(skeletons_file) #store contour coordinates in pixels into the skeletons file for visualization purposes food_cnt_pix = food_cnt / microns_per_pixel with tables.File(skeletons_file, 'r+') as fid: if '/food_cnt_coord' in fid: fid.remove_node('/food_cnt_coord') if _is_valid_cnt(food_cnt): tab = fid.create_array('/', 'food_cnt_coord', obj=food_cnt_pix) tab._v_attrs['method'] = cnt_method print_flush("{} Calculating food features {}".format( base_name, progress_timer.get_time_str())) feats_names = [ 'orient_to_food_cnt', 'dist_from_food_cnt', 'closest_cnt_ind' ] feats_dtypes = [(x, np.float32) for x in feats_names] with tables.File(skeletons_file, 'r') as fid: tot_rows = fid.get_node('/skeleton').shape[0] features_df = np.full(tot_rows, np.nan, dtype=feats_dtypes) if food_cnt.size > 0: for ii in range(0, tot_rows, batch_size): skeletons = fid.get_node('/skeleton')[ii:ii + batch_size] skeletons *= microns_per_pixel outputs = get_cnt_feats(skeletons, food_cnt, _is_debug=_is_debug) for irow, row in enumerate(zip(*outputs)): features_df[irow + ii] = row with tables.File(features_file, 'a') as fid: if '/food' in fid: fid.remove_node('/food', recursive=True) fid.create_group('/', 'food') if _is_valid_cnt(food_cnt): fid.create_carray('/food', 'cnt_coordinates', obj=food_cnt, filters=TABLE_FILTERS) fid.create_table('/food', 'features', obj=features_df, filters=TABLE_FILTERS) #%% print_flush("{} Calculating food features {}".format( base_name, progress_timer.get_time_str()))
def alignStageMotion(masked_file, skeletons_file): base_name = get_base_name(masked_file) print_flush(base_name + ' Aligning Stage Motion...') #%% fps = read_fps(skeletons_file) #%% # Open the information file and read the tracking delay time. # (help from segworm findStageMovement) # 2. The info file contains the tracking delay. This delay represents the # minimum time between stage movements and, conversely, the maximum time it # takes for a stage movement to complete. If the delay is too small, the # stage movements become chaotic. We load the value for the delay. with tables.File(masked_file, 'r') as fid: xml_info = fid.get_node('/xml_info').read().decode() g_mask = fid.get_node('/mask') tot_frames = g_mask.shape[0] # Read the scale conversions, we would need this when we want to convert the pixels into microns pixelPerMicronX = 1 / g_mask._v_attrs['pixels2microns_x'] pixelPerMicronY = 1 / g_mask._v_attrs['pixels2microns_y'] with pd.HDFStore(masked_file, 'r') as fid: stage_log = fid['/stage_log'] #%this is not the cleaneast but matlab does not have a xml parser from #%text string delay_str = xml_info.partition('<delay>')[-1].partition('</delay>')[0] delay_time = float(delay_str) / 1000 delay_frames = np.ceil(delay_time * fps) normScale = np.sqrt((pixelPerMicronX**2 + pixelPerMicronX**2) / 2) pixelPerMicronScale = normScale * np.array( (np.sign(pixelPerMicronX), np.sign(pixelPerMicronY))) #% Compute the rotation matrix. #%rotation = 1; angle = np.arctan(pixelPerMicronY / pixelPerMicronX) if angle > 0: angle = np.pi / 4 - angle else: angle = np.pi / 4 + angle cosAngle = np.cos(angle) sinAngle = np.sin(angle) rotation_matrix = np.array(((cosAngle, -sinAngle), (sinAngle, cosAngle))) #%% #% Ev's code uses the full vectors without dropping frames #% 1. video2Diff differentiates a video frame by frame and outputs the #% differential variance. We load these frame differences. frame_diffs_d = getFrameDiffVar(masked_file) print_flush(base_name + ' Aligning Stage Motion...') #%% Read the media times and locations from the log file. #% (help from segworm findStageMovement) #% 3. The log file contains the initial stage location at media time 0 as #% well as the subsequent media times and locations per stage movement. Our #% algorithm attempts to match the frame differences in the video (see step #% 1) to the media times in this log file. Therefore, we load these media #% times and stage locations. #%from the .log.csv file mediaTimes = stage_log['stage_time'].values locations = stage_log[['stage_x', 'stage_y']].values #ini stage movement fields with tables.File(skeletons_file, 'r+') as fid: # delete data from previous analysis if any if '/stage_movement' in fid: fid.remove_node('/stage_movement', recursive=True) g_stage_movement = fid.create_group('/', 'stage_movement') g_stage_movement._v_attrs['has_finished'] = 0 #read and prepare timestamp try: video_timestamp_ind = fid.get_node('/timestamp/raw')[:] if np.any(np.isnan(video_timestamp_ind)): raise ValueError() else: video_timestamp_ind = video_timestamp_ind.astype(np.int) except (tables.exceptions.NoSuchNodeError, ValueError): warnings.warn( 'It is corrupt or do not exist. I will assume no dropped frames and deduce it from the number of frames.' ) video_timestamp_ind = np.arange(tot_frames, dtype=np.int) #%% The shift makes everything a bit more complicated. I have to remove the first frame, before resizing the array considering the dropping frames. if video_timestamp_ind.size > frame_diffs_d.size + 1: #%i can tolerate one frame (two with respect to the frame_diff) #%extra at the end of the timestamp video_timestamp_ind = video_timestamp_ind[:frame_diffs_d.size + 1] dd = video_timestamp_ind - np.min(video_timestamp_ind) - 1 #shift data dd = dd[dd >= 0] #%% if frame_diffs_d.size != dd.size: raise ValueError( 'Number of timestamps do not match the number of frames in the movie.' ) frame_diffs = np.full(int(np.max(video_timestamp_ind)), np.nan) frame_diffs[dd] = frame_diffs_d #%% save stage data into the skeletons.hdf5 with tables.File(skeletons_file, 'r+') as fid: # I am saving this data before for debugging purposes g_stage_movement = fid.get_node('/stage_movement') fid.create_carray(g_stage_movement, 'frame_diffs', obj=frame_diffs_d) g_stage_movement._v_attrs['fps'] = fps g_stage_movement._v_attrs['delay_frames'] = delay_frames g_stage_movement._v_attrs[ 'microns_per_pixel_scale'] = pixelPerMicronScale g_stage_movement._v_attrs['rotation_matrix'] = rotation_matrix #%% try to run the aligment and return empty data if it fails is_stage_move, movesI, stage_locations = \ findStageMovement(frame_diffs, mediaTimes, locations, delay_frames, fps) stage_vec_d, is_stage_move_d = shift2video_ref(is_stage_move, movesI, stage_locations, video_timestamp_ind) #%% save stage data into the skeletons.hdf5 with tables.File(skeletons_file, 'r+') as fid: g_stage_movement = fid.get_node('/stage_movement') fid.create_carray(g_stage_movement, 'stage_vec', obj=stage_vec_d) fid.create_carray(g_stage_movement, 'is_stage_move', obj=is_stage_move_d) g_stage_movement._v_attrs['has_finished'] = 1 _h_add_stage_position_pix(masked_file, skeletons_file) print_flush(base_name + ' Aligning Stage Motion. Finished.')
def _get_timeseries_feats(features_file, delta_time=1 / 3): ''' Get the all the time series features from the skeletons ''' timeseries_features = [] fps = read_fps(features_file) with pd.HDFStore(features_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] #only use data that was skeletonized #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0] trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(features_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) with tables.File(features_file, 'r') as fid: if '/food_cnt_coord' in fid: food_cnt = fid.get_node('/food_cnt_coord')[:] else: food_cnt = None #If i find the ventral side in the multiworm case this has to change ventral_side = read_ventral_side(features_file) timeseries_features = [] for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): with tables.File(features_file, 'r') as fid: skel_id = worm_data['skeleton_id'].values #deal with any nan in the skeletons good_id = skel_id >= 0 skel_id_val = skel_id[good_id] traj_size = skel_id.size args = [] for p in ('skeletons', 'widths', 'dorsal_contours', 'ventral_contours'): node = fid.get_node('/coordinates/' + p) dat = np.full((traj_size, *node.shape[1:]), np.nan) if skel_id_val.size > 0: if len(node.shape) == 3: dd = node[skel_id_val, :, :] else: dd = node[skel_id_val, :] dat[good_id] = dd args.append(dat) timestamp = worm_data['timestamp_raw'].values.astype(np.int32) feats = get_timeseries_features(*args, timestamp=timestamp, food_cnt=food_cnt, fps=fps, ventral_side=ventral_side) #save timeseries features data feats = feats.astype(np.float32) feats['worm_index'] = worm_index #move the last fields to the first columns cols = feats.columns.tolist() cols = cols[-1:] + cols[:-1] feats = feats[cols] feats['worm_index'] = feats['worm_index'].astype(np.int32) feats['timestamp'] = feats['timestamp'].astype(np.int32) timeseries_features.append(feats) _display_progress(ind_n) timeseries_features = pd.concat(timeseries_features, ignore_index=True) return timeseries_features
def save_timeseries_feats_table(features_file, derivate_delta_time, fovsplitter_param={}): timeseries_features = [] fps = read_fps(features_file) # initialise class for splitting fov if len(fovsplitter_param) > 0: is_fov_tosplit = True assert all(key in fovsplitter_param for key in ['total_n_wells', 'whichsideup', 'well_shape']) assert fovsplitter_param['total_n_wells'] > 0 else: is_fov_tosplit = False print('is fov to split?', is_fov_tosplit) if is_fov_tosplit: # split fov in wells masked_image_file = features_file.replace('Results', 'MaskedVideos') masked_image_file = masked_image_file.replace('_featuresN.hdf5', '.hdf5') # fovsplitter = FOVMultiWellsSplitter(masked_image_file=masked_image_file, # total_n_wells=fovsplitter_param['total_n_wells'], # whichsideup=fovsplitter_param['whichsideup'], # well_shape=fovsplitter_param['well_shape']) fovsplitter = FOVMultiWellsSplitter(masked_image_file, **fovsplitter_param) # store wells data in the features file fovsplitter.write_fov_wells_to_file(features_file) with pd.HDFStore(features_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(features_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) with tables.File(features_file, 'r+') as fid: for gg in [ '/timeseries_data', '/event_durations', '/timeseries_features' ]: if gg in fid: fid.remove_node(gg) feat_dtypes = [(x, np.float32) for x in timeseries_all_columns] feat_dtypes = [('worm_index', np.int32), ('timestamp', np.int32), ('well_name', 'S3')] + feat_dtypes timeseries_features = fid.create_table('/', 'timeseries_data', obj=np.recarray(0, feat_dtypes), filters=TABLE_FILTERS) if '/food_cnt_coord' in fid: food_cnt = fid.get_node('/food_cnt_coord')[:] else: food_cnt = None #If i find the ventral side in the multiworm case this has to change ventral_side = read_ventral_side(features_file) for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): skel_id = worm_data['skeleton_id'].values #deal with any nan in the skeletons good_id = skel_id >= 0 skel_id_val = skel_id[good_id] traj_size = skel_id.size args = [] for p in ('skeletons', 'widths', 'dorsal_contours', 'ventral_contours'): node_str = '/coordinates/' + p if node_str in fid: node = fid.get_node(node_str) dat = np.full((traj_size, *node.shape[1:]), np.nan) if skel_id_val.size > 0: if len(node.shape) == 3: dd = node[skel_id_val, :, :] else: dd = node[skel_id_val, :] dat[good_id] = dd else: dat = None args.append(dat) timestamp = worm_data['timestamp_raw'].values.astype(np.int32) feats = get_timeseries_features( *args, timestamp=timestamp, food_cnt=food_cnt, fps=fps, ventral_side=ventral_side, derivate_delta_time=derivate_delta_time) #save timeseries features data feats = feats.astype(np.float32) feats['worm_index'] = worm_index if is_fov_tosplit: feats[ 'well_name'] = fovsplitter.find_well_from_trajectories_data( worm_data) else: feats['well_name'] = 'n/a' # cast well_name to the correct type # (before shuffling columns, so it remains the last entry) # needed because for some reason this does not work: # feats['well_name'] = feats['well_name'].astype('S3') feats['_well_name'] = feats['well_name'].astype('S3') feats.drop(columns='well_name', inplace=True) feats.rename(columns={'_well_name': 'well_name'}, inplace=True) #move the last fields to the first columns cols = feats.columns.tolist() cols = cols[-2:] + cols[:-2] cols[1], cols[2] = cols[2], cols[1] feats = feats[cols] feats['worm_index'] = feats['worm_index'].astype(np.int32) feats['timestamp'] = feats['timestamp'].astype(np.int32) feats = feats.to_records(index=False) timeseries_features.append(feats) _display_progress(ind_n)
def getWormFeaturesFilt(skeletons_file, features_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param, split_traj_time): feat_filt_param = min_num_skel_defaults(skeletons_file, **feat_filt_param) def _iniFileGroups(): # initialize groups for the timeseries and event features header_timeseries = { feat: tables.Float32Col(pos=ii) for ii, (feat, _) in enumerate(wStats.feat_timeseries_dtype) } table_timeseries = features_fid.create_table('/', 'features_timeseries', header_timeseries, filters=TABLE_FILTERS) # save some data used in the calculation as attributes fps, microns_per_pixel, _ = copy_unit_conversions( table_timeseries, skeletons_file) table_timeseries._v_attrs['worm_index_type'] = worm_index_type # node to save features events group_events = features_fid.create_group('/', 'features_events') # save the skeletons with tables.File(skeletons_file, 'r') as ske_file_id: skel_shape = ske_file_id.get_node('/skeleton').shape worm_coords_array = {} w_node = features_fid.create_group('/', 'coordinates') for array_name in ['skeletons', 'dorsal_contours', 'ventral_contours']: worm_coords_array[array_name] = features_fid.create_earray( w_node, array_name, shape=(0, skel_shape[1], skel_shape[2]), atom=tables.Float32Atom(shape=()), filters=TABLE_FILTERS) # initialize rec array with the averaged features of each worm stats_features_df = { stat: np.full(tot_worms, np.nan, dtype=wStats.feat_avg_dtype) for stat in FUNC_FOR_DIV } return header_timeseries, table_timeseries, group_events, worm_coords_array, stats_features_df progress_timer = TimeCounter('') def _displayProgress(n): # display progress dd = " Extracting features. Worm %i of %i done." % (n, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) #get the valid number of worms good_traj_index, worm_index_type = getGoodTrajIndexes( skeletons_file, use_skel_filter, use_manual_join, is_single_worm, feat_filt_param) fps = read_fps(skeletons_file) split_traj_frames = int(np.round(split_traj_time * fps)) #the fps could be non integer # function to calculate the progress time. Useful to display progress base_name = get_base_name(skeletons_file) with tables.File(features_file, 'w') as features_fid: #check if the stage was not aligned correctly. Return empty features file otherwise. with tables.File(skeletons_file, 'r') as skel_fid: if '/experiment_info' in skel_fid: dd = skel_fid.get_node('/experiment_info').read() features_fid.create_array('/', 'experiment_info', obj=dd) #total number of worms tot_worms = len(good_traj_index) if tot_worms == 0: print_flush( base_name + ' No valid worms found to calculate features. Creating empty file.' ) return # initialize by getting the specs data subdivision wStats = WormStats() all_splitted_feats = {stat: [] for stat in FUNC_FOR_DIV} #initialize file header_timeseries, table_timeseries, group_events, \ worm_coords_array, stats_features_df = _iniFileGroups() _displayProgress(0) # start to calculate features for each worm trajectory for ind_N, worm_index in enumerate(good_traj_index): # initialize worm object, and extract data from skeletons file worm = WormFromTable(skeletons_file, worm_index, use_skel_filter=use_skel_filter, worm_index_type=worm_index_type, smooth_window=5) if is_single_worm: #worm with the stage correction applied worm.correct_schafer_worm() if np.all(np.isnan(worm.skeleton[:, 0, 0])): print_flush( '{} Not valid skeletons found after stage correction. Skiping worm index {}' .format(base_name, worm_index)) return # calculate features timeseries_data, events_data, worm_stats = getOpenWormData( worm, wStats) #get splitted features splitted_worms = [ x for x in worm.split(split_traj_frames) if x.n_valid_skel > feat_filt_param['min_num_skel'] and x.n_valid_skel / x.n_frames >= feat_filt_param['bad_seg_thresh'] ] dd = [getFeatStats(x, wStats)[1] for x in splitted_worms] splitted_feats = { stat: [x[stat] for x in dd] for stat in FUNC_FOR_DIV } #% add data to save # save timeseries data table_timeseries.append(timeseries_data) table_timeseries.flush() # save skeletons worm_coords_array['skeletons'].append(worm.skeleton) worm_coords_array['dorsal_contours'].append(worm.dorsal_contour) worm_coords_array['ventral_contours'].append(worm.ventral_contour) # save event data as a subgroup per worm worm_node = features_fid.create_group(group_events, 'worm_%i' % worm_index) worm_node._v_attrs['worm_index'] = worm_index worm_node._v_attrs['frame_range'] = np.array( (worm.first_frame, worm.last_frame)) for feat in events_data: tmp_data = events_data[feat] # consider the cases where the output is a single number, empty # or None if isinstance(tmp_data, (float, int)): tmp_data = np.array([tmp_data]) if tmp_data is None or tmp_data.size == 0: tmp_data = np.array([np.nan]) features_fid.create_carray(worm_node, feat, obj=tmp_data, filters=TABLE_FILTERS) # store the average for each worm feature for stat in FUNC_FOR_DIV: stats_features_df[stat][ind_N] = worm_stats[stat] #append the splitted traj features all_splitted_feats[stat] += splitted_feats[stat] # report progress _displayProgress(ind_N + 1) # create and save a table containing the averaged worm feature for each # worm f_node = features_fid.create_group('/', 'features_summary') for stat, stats_df in stats_features_df.items(): splitted_feats = all_splitted_feats[stat] #check that the array is not empty if len(splitted_feats) > 0: splitted_feats_arr = np.array(splitted_feats) else: #return a row full of nan to indicate a fail splitted_feats_arr = np.full(1, np.nan, dtype=wStats.feat_avg_dtype) features_fid.create_table(f_node, stat, obj=stats_df, filters=TABLE_FILTERS) feat_stat_split = features_fid.create_table(f_node, stat + '_split', obj=splitted_feats_arr, filters=TABLE_FILTERS) feat_stat_split._v_attrs['split_traj_frames'] = split_traj_frames if stat == 'means': #FUTURE: I am duplicating this field for backward compatibility, I should remove it later on. features_fid.create_table('/', 'features_means', obj=stats_df, filters=TABLE_FILTERS) features_fid.create_table('/', 'features_means_split', obj=splitted_feats_arr, filters=TABLE_FILTERS) print_flush(base_name + ' Feature extraction finished: ' + progress_timer.get_time_str())
def filterByPopulationMorphology(skeletons_file, good_skel_row, critical_alpha=0.01): base_name = get_base_name(skeletons_file) progress_timer = TimeCounter('') print_flush(base_name + ' Filter Skeletons: Starting...') with pd.HDFStore(skeletons_file, 'r') as table_fid: trajectories_data = table_fid['/trajectories_data'] if not 'is_good_skel' in trajectories_data: trajectories_data['is_good_skel'] = trajectories_data['has_skeleton'] if good_skel_row.size > 0: # nothing to do if there are not valid skeletons left. print_flush( base_name + ' Filter Skeletons: Reading features for outlier identification.') #add possible missing fields that were con calculated in older versions of the software _addMissingFields(skeletons_file) # calculate classifier for the outliers nodes4fit = ['/skeleton_length', '/contour_area', '/width_midbody'] worm_morph = _h_nodes2Array(skeletons_file, nodes4fit, -1) #worm_morph[~trajectories_data['is_good_skel'].values] = np.nan feats4fit = [worm_morph] #feats4fit = _h_readFeat2Check(skeletons_file) print_flush(base_name + ' Filter Skeletons: Calculating outliers. Total time:' + progress_timer.get_time_str()) tot_rows2fit = feats4fit[0].shape[0] # check all the data to fit has the same size in the first axis assert all(tot_rows2fit == featdat.shape[0] for featdat in feats4fit) outliers_rob = np.zeros(tot_rows2fit, np.bool) outliers_flag = np.zeros(tot_rows2fit, np.int) assert len(feats4fit) < 64 # otherwise the outlier flag will not work for out_ind, dat in enumerate(feats4fit): maha, out_d, lim_d = _h_getMahalanobisRobust( dat, critical_alpha, good_skel_row) outliers_rob = outliers_rob | out_d # flag the outlier flag by turning on the corresponding bit outliers_flag += (out_d) * (2**out_ind) print_flush( base_name + ' Filter Skeletons: Labeling valid skeletons. Total time:' + progress_timer.get_time_str()) # labeled rows of valid individual skeletons as GOOD_SKE trajectories_data['is_good_skel'] &= ~outliers_rob trajectories_data['skel_outliers_flag'] = outliers_flag # Save the new is_good_skel column if trajectories_data['is_good_skel'].dtypes == bool: trajectories_data['is_good_skel'] = trajectories_data[ 'is_good_skel'].astype(np.uint8) save_modified_table(skeletons_file, trajectories_data, 'trajectories_data') print_flush(base_name + ' Filter Skeletons: Finished. Total time:' + progress_timer.get_time_str())
tables.Float32Atom(dflt=np.nan), dims, filters=TABLE_FILTERS) traj_dat = ske_file_id.get_node('/trajectories_data') has_skeleton = traj_dat.cols.has_skeleton has_skeleton[:] = np.zeros_like(has_skeleton) #delete previous return skeletons, has_skeleton if __name__ == '__main__': masked_file = '/Volumes/behavgenom_archive$/Serena/SpikingDatasetRecordings51-64/MaskedVideos/recording60/recording60.2g/recording60.2g_X1.hdf5' skeletons_file = masked_file.replace('MaskedVideos', 'Results').replace('.hdf5', '_skeletons.hdf5') base_name = get_base_name(masked_file) progress_prefix = base_name + ' Calculating skeletons.' with pd.HDFStore(skeletons_file, 'r') as ske_file_id: trajectories_data = ske_file_id['/trajectories_data'] n_batch = mp.cpu_count() frame_generator = generateMoviesROI(masked_file, trajectories_data, progress_prefix=progress_prefix) ROIs_generator = generateIndividualROIs(frame_generator) tot = 0
#is this head coord vs tail coord? orientation = skel_coords[:, 0, :] - skel_coords[:, -1, :] speed = np.sqrt(np.sum(velocity**2, 1)) signed_speed = np.sign(np.sum(velocity * orientation, 1)) * speed return signed_speed, velocity DEBUG = False if __name__ == '__main__': filenames = glob.glob( "/data2/shared/data/twoColour/Results/*/*/*52.1g_X1_skeletons.hdf5") for skeletons_file in filenames: base_name = get_base_name(skeletons_file) progress_prefix = base_name + ' Calculating skeletons.' with pd.HDFStore(skeletons_file, 'r') as ske_file_id: trajectories_data = ske_file_id['/trajectories_data'] blob_features = ske_file_id['/blob_features'] #I want to update blob_features blob_features['signed_speed'] = np.float32(np.nan) blob_features['velocity_x'] = np.float32(np.nan) blob_features['velocity_y'] = np.float32(np.nan) progress_timer = TimeCounter('') with tables.File(skeletons_file, 'r') as fid: skeletons = fid.get_node('/skeleton') grouped_by_index = trajectories_data.groupby('worm_index_joined')
def smooth_skeletons_table(skeletons_file, features_file, is_WT2=False, skel_smooth_window=5, coords_smooth_window_s=0.25, gap_to_interp_s=0.25): #%% #%% fps = read_fps(skeletons_file) coords_smooth_window = int(np.round(fps * coords_smooth_window_s)) gap_to_interp = int(np.round(fps * gap_to_interp_s)) if coords_smooth_window <= 3: #do not interpolate coords_smooth_window = None trajectories_data = _r_fill_trajectories_data(skeletons_file) #%% trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(skeletons_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Smoothing skeletons. Worm %i of %i done." % (n, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) #%% #initialize arrays food_cnt = read_food_contour(skeletons_file) with tables.File(skeletons_file, 'r') as fid: n_segments = fid.get_node('/skeleton').shape[1] with tables.File(features_file, 'w') as fid_features: if food_cnt is not None: fid_features.create_array('/', 'food_cnt_coord', obj=food_cnt.astype(np.float32)) worm_coords_array = {} w_node = fid_features.create_group('/', 'coordinates') for array_name in [ 'skeletons', 'dorsal_contours', 'ventral_contours', 'widths' ]: if array_name != 'widths': a_shape = (0, n_segments, 2) else: a_shape = (0, n_segments) worm_coords_array[array_name] = fid_features.create_earray( w_node, array_name, shape=a_shape, atom=tables.Float32Atom(shape=()), filters=TABLE_FILTERS) tot_skeletons = 0 for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): if worm_data['was_skeletonized'].sum() < 2: continue worm = WormFromTable(skeletons_file, worm_index, worm_index_type='worm_index_joined') if is_WT2: worm.correct_schafer_worm() if np.sum(~np.isnan(worm.skeleton[:, 0, 0])) <= 2: warnings.warn('Not enough data to smooth. Empty file?') wormN = worm else: wormN = SmoothedWorm(worm.skeleton, worm.widths, worm.ventral_contour, worm.dorsal_contour, skel_smooth_window=skel_smooth_window, coords_smooth_window=coords_smooth_window, gap_to_interp=gap_to_interp) dat_index = pd.Series(False, index=worm_data['timestamp_raw'].values) try: dat_index[worm.timestamp] = True except ValueError: import pdb pdb.set_trace() #%% skeleton_id = np.arange(wormN.skeleton.shape[0]) + tot_skeletons tot_skeletons = skeleton_id[-1] + 1 row_ind = worm_data.index[dat_index.values] trajectories_data.loc[row_ind, 'skeleton_id'] = skeleton_id #%% #add data worm_coords_array['skeletons'].append(getattr(wormN, 'skeleton')) worm_coords_array['dorsal_contours'].append( getattr(wormN, 'dorsal_contour')) worm_coords_array['ventral_contours'].append( getattr(wormN, 'ventral_contour')) worm_coords_array['widths'].append(getattr(wormN, 'widths')) #display progress _display_progress(ind_n + 1) #save trajectories data newT = fid_features.create_table( '/', 'trajectories_data', obj=trajectories_data.to_records(index=False), filters=TABLE_FILTERS) copy_unit_conversions(newT, skeletons_file) newT._v_attrs['is_WT2'] = is_WT2 newT._v_attrs['ventral_side'] = read_ventral_side(skeletons_file) #save blob features interpolating in dropped frames and stage movement (WT2) blob_features = _r_fill_blob_features(skeletons_file, trajectories_data, is_WT2) if blob_features is not None: fid_features.create_table( '/', 'blob_features', obj=blob_features.to_records(index=False), filters=TABLE_FILTERS)
#mask_dir = '/Users/ajaver/OneDrive - Imperial College London/optogenetics/ATR_210417' #mask_dir = '/Users/ajaver/OneDrive - Imperial College London/optogenetics/Arantza/MaskedVideos/**/' fnames = glob.glob(os.path.join(mask_dir, '*.hdf5')) fnames = [ x for x in fnames if any(fnmatch.fnmatch(x, ext) for ext in exts) ] for mask_video in fnames: circx, circy = get_food_contour(mask_video, n_bins=n_bins, frac_lowess=frac_lowess, is_debug=True) #%% base_name = get_base_name(mask_video) with tables.File(mask_video, 'r') as fid: full_data = fid.get_node('/full_data')[:] full_min = np.max(full_data, axis=0) full_max = np.min(full_data, axis=0) #%% import matplotlib.pylab as plt for nn in range(full_data.shape[0]): plt.figure() plt.imshow(full_data[nn], cmap='gray') plt.plot(circy, circx) break #%%
def save_timeseries_feats_table(features_file, derivate_delta_time): timeseries_features = [] fps = read_fps(features_file) with pd.HDFStore(features_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] #only use data that was skeletonized #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0] trajectories_data_g = trajectories_data.groupby('worm_index_joined') progress_timer = TimeCounter('') base_name = get_base_name(features_file) tot_worms = len(trajectories_data_g) def _display_progress(n): # display progress dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1, tot_worms) print_flush(base_name + dd + ' Total time:' + progress_timer.get_time_str()) _display_progress(0) with tables.File(features_file, 'r+') as fid: for gg in [ '/timeseries_data', '/event_durations', '/timeseries_features' ]: if gg in fid: fid.remove_node(gg) feat_dtypes = [(x, np.float32) for x in timeseries_all_columns] feat_dtypes = [('worm_index', np.int32), ('timestamp', np.int32)] + feat_dtypes timeseries_features = fid.create_table('/', 'timeseries_data', obj=np.recarray(0, feat_dtypes), filters=TABLE_FILTERS) if '/food_cnt_coord' in fid: food_cnt = fid.get_node('/food_cnt_coord')[:] else: food_cnt = None #If i find the ventral side in the multiworm case this has to change ventral_side = read_ventral_side(features_file) for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g): with tables.File(features_file, 'r') as fid: skel_id = worm_data['skeleton_id'].values #deal with any nan in the skeletons good_id = skel_id >= 0 skel_id_val = skel_id[good_id] traj_size = skel_id.size args = [] for p in ('skeletons', 'widths', 'dorsal_contours', 'ventral_contours'): node_str = '/coordinates/' + p if node_str in fid: node = fid.get_node(node_str) dat = np.full((traj_size, *node.shape[1:]), np.nan) if skel_id_val.size > 0: if len(node.shape) == 3: dd = node[skel_id_val, :, :] else: dd = node[skel_id_val, :] dat[good_id] = dd else: dat = None args.append(dat) timestamp = worm_data['timestamp_raw'].values.astype(np.int32) feats = get_timeseries_features( *args, timestamp=timestamp, food_cnt=food_cnt, fps=fps, ventral_side=ventral_side, derivate_delta_time=derivate_delta_time) #save timeseries features data feats = feats.astype(np.float32) feats['worm_index'] = worm_index #move the last fields to the first columns cols = feats.columns.tolist() cols = cols[-1:] + cols[:-1] feats = feats[cols] feats['worm_index'] = feats['worm_index'].astype(np.int32) feats['timestamp'] = feats['timestamp'].astype(np.int32) feats = feats.to_records(index=False) timeseries_features.append(feats) _display_progress(ind_n)
#mask_dir = '/Users/ajaver/OneDrive - Imperial College London/optogenetics/ATR_210417' #mask_dir = '/Users/ajaver/OneDrive - Imperial College London/optogenetics/Arantza/MaskedVideos/**/' mask_dir = '/Users/ajaver/OneDrive - Imperial College London/aggregation/' fnames = glob.glob(os.path.join(mask_dir, '*.hdf5')) #fnames = glob.glob(os.path.join(mask_dir, 'oig-8_ChR2_ATR_herms_3_Ch1_11052017_170502.hdf5')) fnames = [x for x in fnames if not any(x.endswith(ext) for ext in RESERVED_EXT)] for ivid, mask_video in enumerate(fnames): skeletons_file = mask_video.replace('MaskedVideos','Results').replace('.hdf5', '_skeletons.hdf5') if not os.path.exists(skeletons_file): continue base_name = get_base_name(mask_video) print('{} of {} {}'.format(ivid+1, len(fnames), base_name)) #%% trajectories_data, splitted_points = \ fix_wrong_merges(mask_video, skeletons_file, min_area_limit ) #%% print_flush('{} Creating trajectories graph network.'.format(base_name)) node_weights, DG, trajectories_data = \ get_node_weights(trajectories_data, mask_video, **args_graph)
#is this head coord vs tail coord? orientation = skel_coords[:,0,:]-skel_coords[:,-1,:] #remove last point for consistency with the velocity vector orientation = orientation[:-1] speed = np.sqrt(np.sum(velocity**2, 1)); signed_speed = np.sign(np.sum(velocity*orientation, 1))*speed; return signed_speed DEBUG = False if __name__ == '__main__': skeletons_file = '/Volumes/behavgenom_archive$/Serena/SpikingDatasetRecordings51-64/Results/recording60/recording60.2g/recording60.2g_X1_skeletons.hdf5' base_name = get_base_name(skeletons_file) progress_prefix = base_name + ' Calculating skeletons.' with pd.HDFStore(skeletons_file, 'r') as ske_file_id: trajectories_data = ske_file_id['/trajectories_data'] blob_features = ske_file_id['/blob_features'] #I want to update blob_features blob_features['signed_speed'] = np.nan progress_timer = TimeCounter('') with tables.File(skeletons_file, 'r') as fid: skeletons = fid.get_node('/skeleton') grouped_by_index = trajectories_data.groupby('worm_index_joined') tot_worms = len(grouped_by_index) for ii, (worm_index, worm_data) in enumerate(grouped_by_index):