Пример #1
0
    def correct_schafer_worm(self):
        if hasattr(self, 'stage_vec_inv'):
            print(
                'The worm has been previously corrected. The attribute "stage_vec_inv" exists. '
            )
            return

        self.ventral_side = read_ventral_side(self.file_name)

        assert isGoodStageAligment(self.file_name)
        self.stage_vec_inv, _ = _h_get_stage_inv(self.file_name,
                                                 self.timestamp)

        #remove data where the stage is moving (the blurred image can induce artifacts)
        self.is_stage_move = np.isnan(self.stage_vec_inv[:, 0])
        self.widths[self.is_stage_move, :] = np.nan

        for field in ['skeleton', 'ventral_contour', 'dorsal_contour']:
            if hasattr(self, field):
                tmp_dat = getattr(self, field)
                # rotate the skeletons
                # for ii in range(tot_skel):
                #tmp_dat[ii] = np.dot(rotation_matrix, tmp_dat[ii].T).T
                tmp_dat = tmp_dat + self.stage_vec_inv[:, np.newaxis, :]
                setattr(self, field, tmp_dat)
Пример #2
0
def _get_ventral_side(features_file):
    ventral_side = read_ventral_side(features_file)
    if not ventral_side or ventral_side == 'unknown':
        ventral_type = '?'
    else:
        #we will merge the ventral and dorsal contours so the ventral contour is clockwise
        ventral_type = 'CW'
    return ventral_type
def save_timeseries_feats_table(features_file,
                                derivate_delta_time,
                                fovsplitter_param={}):
    timeseries_features = []
    fps = read_fps(features_file)

    # initialise class for splitting fov
    if len(fovsplitter_param) > 0:
        is_fov_tosplit = True
        assert all(key in fovsplitter_param
                   for key in ['total_n_wells', 'whichsideup', 'well_shape'])
        assert fovsplitter_param['total_n_wells'] > 0
    else:
        is_fov_tosplit = False
    print('is fov to split?', is_fov_tosplit)

    if is_fov_tosplit:
        # split fov in wells
        masked_image_file = features_file.replace('Results', 'MaskedVideos')
        masked_image_file = masked_image_file.replace('_featuresN.hdf5',
                                                      '.hdf5')
        #        fovsplitter = FOVMultiWellsSplitter(masked_image_file=masked_image_file,
        #                                            total_n_wells=fovsplitter_param['total_n_wells'],
        #                                            whichsideup=fovsplitter_param['whichsideup'],
        #                                            well_shape=fovsplitter_param['well_shape'])
        fovsplitter = FOVMultiWellsSplitter(masked_image_file,
                                            **fovsplitter_param)
        # store wells data in the features file
        fovsplitter.write_fov_wells_to_file(features_file)

    with pd.HDFStore(features_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']

    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(features_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1,
                                                                     tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)
    with tables.File(features_file, 'r+') as fid:

        for gg in [
                '/timeseries_data', '/event_durations', '/timeseries_features'
        ]:
            if gg in fid:
                fid.remove_node(gg)

        feat_dtypes = [(x, np.float32) for x in timeseries_all_columns]

        feat_dtypes = [('worm_index', np.int32), ('timestamp', np.int32),
                       ('well_name', 'S3')] + feat_dtypes

        timeseries_features = fid.create_table('/',
                                               'timeseries_data',
                                               obj=np.recarray(0, feat_dtypes),
                                               filters=TABLE_FILTERS)

        if '/food_cnt_coord' in fid:
            food_cnt = fid.get_node('/food_cnt_coord')[:]
        else:
            food_cnt = None

        #If i find the ventral side in the multiworm case this has to change
        ventral_side = read_ventral_side(features_file)

        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):

            skel_id = worm_data['skeleton_id'].values

            #deal with any nan in the skeletons
            good_id = skel_id >= 0
            skel_id_val = skel_id[good_id]
            traj_size = skel_id.size

            args = []
            for p in ('skeletons', 'widths', 'dorsal_contours',
                      'ventral_contours'):

                node_str = '/coordinates/' + p
                if node_str in fid:
                    node = fid.get_node(node_str)
                    dat = np.full((traj_size, *node.shape[1:]), np.nan)
                    if skel_id_val.size > 0:
                        if len(node.shape) == 3:
                            dd = node[skel_id_val, :, :]
                        else:
                            dd = node[skel_id_val, :]
                        dat[good_id] = dd
                else:
                    dat = None

                args.append(dat)

            timestamp = worm_data['timestamp_raw'].values.astype(np.int32)

            feats = get_timeseries_features(
                *args,
                timestamp=timestamp,
                food_cnt=food_cnt,
                fps=fps,
                ventral_side=ventral_side,
                derivate_delta_time=derivate_delta_time)
            #save timeseries features data
            feats = feats.astype(np.float32)
            feats['worm_index'] = worm_index
            if is_fov_tosplit:
                feats[
                    'well_name'] = fovsplitter.find_well_from_trajectories_data(
                        worm_data)
            else:
                feats['well_name'] = 'n/a'
            # cast well_name to the correct type
            # (before shuffling columns, so it remains the last entry)
            # needed because for some reason this does not work:
            # feats['well_name'] = feats['well_name'].astype('S3')
            feats['_well_name'] = feats['well_name'].astype('S3')
            feats.drop(columns='well_name', inplace=True)
            feats.rename(columns={'_well_name': 'well_name'}, inplace=True)

            #move the last fields to the first columns
            cols = feats.columns.tolist()
            cols = cols[-2:] + cols[:-2]
            cols[1], cols[2] = cols[2], cols[1]

            feats = feats[cols]

            feats['worm_index'] = feats['worm_index'].astype(np.int32)
            feats['timestamp'] = feats['timestamp'].astype(np.int32)
            feats = feats.to_records(index=False)

            timeseries_features.append(feats)
            _display_progress(ind_n)
def save_timeseries_feats_table(features_file, derivate_delta_time):
    timeseries_features = []
    fps = read_fps(features_file)

    with pd.HDFStore(features_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']

    #only use data that was skeletonized
    #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0]

    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(features_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1,
                                                                     tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)
    with tables.File(features_file, 'r+') as fid:

        for gg in [
                '/timeseries_data', '/event_durations', '/timeseries_features'
        ]:
            if gg in fid:
                fid.remove_node(gg)

        feat_dtypes = [(x, np.float32) for x in timeseries_all_columns]

        feat_dtypes = [('worm_index', np.int32),
                       ('timestamp', np.int32)] + feat_dtypes
        timeseries_features = fid.create_table('/',
                                               'timeseries_data',
                                               obj=np.recarray(0, feat_dtypes),
                                               filters=TABLE_FILTERS)

        if '/food_cnt_coord' in fid:
            food_cnt = fid.get_node('/food_cnt_coord')[:]
        else:
            food_cnt = None

        #If i find the ventral side in the multiworm case this has to change
        ventral_side = read_ventral_side(features_file)

        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):
            with tables.File(features_file, 'r') as fid:
                skel_id = worm_data['skeleton_id'].values

                #deal with any nan in the skeletons
                good_id = skel_id >= 0
                skel_id_val = skel_id[good_id]
                traj_size = skel_id.size

                args = []
                for p in ('skeletons', 'widths', 'dorsal_contours',
                          'ventral_contours'):

                    node_str = '/coordinates/' + p
                    if node_str in fid:
                        node = fid.get_node(node_str)
                        dat = np.full((traj_size, *node.shape[1:]), np.nan)
                        if skel_id_val.size > 0:
                            if len(node.shape) == 3:
                                dd = node[skel_id_val, :, :]
                            else:
                                dd = node[skel_id_val, :]
                            dat[good_id] = dd
                    else:
                        dat = None

                    args.append(dat)

                timestamp = worm_data['timestamp_raw'].values.astype(np.int32)

            feats = get_timeseries_features(
                *args,
                timestamp=timestamp,
                food_cnt=food_cnt,
                fps=fps,
                ventral_side=ventral_side,
                derivate_delta_time=derivate_delta_time)
            #save timeseries features data
            feats = feats.astype(np.float32)
            feats['worm_index'] = worm_index
            #move the last fields to the first columns
            cols = feats.columns.tolist()
            cols = cols[-1:] + cols[:-1]
            feats = feats[cols]

            feats['worm_index'] = feats['worm_index'].astype(np.int32)
            feats['timestamp'] = feats['timestamp'].astype(np.int32)
            feats = feats.to_records(index=False)

            timeseries_features.append(feats)
            _display_progress(ind_n)
Пример #5
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 16 14:52:11 2017

@author: ajaver
"""

import glob
import os
import tables
from tierpsy.helper.params import read_ventral_side

main_dir = '/Volumes/behavgenom_archive$/single_worm/finished'

fnames = glob.glob(os.path.join(main_dir, '**', '*_featuresN.hdf5'),
                   recursive=True)

for ii, fname in enumerate(fnames):
    print(ii + 1, len(fnames))
    skeletons_file = fname.replace('_featuresN', '_skeletons')
    try:
        with tables.File(fname, 'r+') as fid:
            fid.get_node('/trajectories_data')._v_attrs[
                'ventral_side'] = read_ventral_side(skeletons_file)
    except tables.exceptions.NoSuchNodeError:
        pass
Пример #6
0
def smooth_skeletons_table(skeletons_file,
                           features_file,
                           is_WT2=False,
                           skel_smooth_window=5,
                           coords_smooth_window_s=0.25,
                           gap_to_interp_s=0.25):

    #%%

    #%%
    fps = read_fps(skeletons_file)
    coords_smooth_window = int(np.round(fps * coords_smooth_window_s))
    gap_to_interp = int(np.round(fps * gap_to_interp_s))

    if coords_smooth_window <= 3:  #do not interpolate
        coords_smooth_window = None

    trajectories_data = _r_fill_trajectories_data(skeletons_file)
    #%%
    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(skeletons_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Smoothing skeletons. Worm %i of %i done." % (n, tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)
    #%%

    #initialize arrays
    food_cnt = read_food_contour(skeletons_file)
    with tables.File(skeletons_file, 'r') as fid:
        n_segments = fid.get_node('/skeleton').shape[1]

    with tables.File(features_file, 'w') as fid_features:
        if food_cnt is not None:
            fid_features.create_array('/',
                                      'food_cnt_coord',
                                      obj=food_cnt.astype(np.float32))

        worm_coords_array = {}
        w_node = fid_features.create_group('/', 'coordinates')
        for array_name in [
                'skeletons', 'dorsal_contours', 'ventral_contours', 'widths'
        ]:
            if array_name != 'widths':
                a_shape = (0, n_segments, 2)
            else:
                a_shape = (0, n_segments)

            worm_coords_array[array_name] = fid_features.create_earray(
                w_node,
                array_name,
                shape=a_shape,
                atom=tables.Float32Atom(shape=()),
                filters=TABLE_FILTERS)

        tot_skeletons = 0
        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):
            if worm_data['was_skeletonized'].sum() < 2:
                continue

            worm = WormFromTable(skeletons_file,
                                 worm_index,
                                 worm_index_type='worm_index_joined')

            if is_WT2:
                worm.correct_schafer_worm()
            if np.sum(~np.isnan(worm.skeleton[:, 0, 0])) <= 2:
                warnings.warn('Not enough data to smooth. Empty file?')
                wormN = worm

            else:
                wormN = SmoothedWorm(worm.skeleton,
                                     worm.widths,
                                     worm.ventral_contour,
                                     worm.dorsal_contour,
                                     skel_smooth_window=skel_smooth_window,
                                     coords_smooth_window=coords_smooth_window,
                                     gap_to_interp=gap_to_interp)
            dat_index = pd.Series(False,
                                  index=worm_data['timestamp_raw'].values)

            try:
                dat_index[worm.timestamp] = True
            except ValueError:
                import pdb
                pdb.set_trace()

            #%%
            skeleton_id = np.arange(wormN.skeleton.shape[0]) + tot_skeletons
            tot_skeletons = skeleton_id[-1] + 1
            row_ind = worm_data.index[dat_index.values]
            trajectories_data.loc[row_ind, 'skeleton_id'] = skeleton_id
            #%%
            #add data
            worm_coords_array['skeletons'].append(getattr(wormN, 'skeleton'))
            worm_coords_array['dorsal_contours'].append(
                getattr(wormN, 'dorsal_contour'))
            worm_coords_array['ventral_contours'].append(
                getattr(wormN, 'ventral_contour'))
            worm_coords_array['widths'].append(getattr(wormN, 'widths'))

            #display progress
            _display_progress(ind_n + 1)

        #save trajectories data
        newT = fid_features.create_table(
            '/',
            'trajectories_data',
            obj=trajectories_data.to_records(index=False),
            filters=TABLE_FILTERS)
        copy_unit_conversions(newT, skeletons_file)
        newT._v_attrs['is_WT2'] = is_WT2
        newT._v_attrs['ventral_side'] = read_ventral_side(skeletons_file)

        #save blob features interpolating in dropped frames and stage movement (WT2)
        blob_features = _r_fill_blob_features(skeletons_file,
                                              trajectories_data, is_WT2)
        if blob_features is not None:
            fid_features.create_table(
                '/',
                'blob_features',
                obj=blob_features.to_records(index=False),
                filters=TABLE_FILTERS)
Пример #7
0
def _get_timeseries_feats(features_file, delta_time=1 / 3):
    '''
    Get the all the time series features from the skeletons
    '''
    timeseries_features = []
    fps = read_fps(features_file)

    with pd.HDFStore(features_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']

    #only use data that was skeletonized
    #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0]

    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(features_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1,
                                                                     tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)

    with tables.File(features_file, 'r') as fid:
        if '/food_cnt_coord' in fid:
            food_cnt = fid.get_node('/food_cnt_coord')[:]
        else:
            food_cnt = None

        #If i find the ventral side in the multiworm case this has to change
        ventral_side = read_ventral_side(features_file)

        timeseries_features = []
        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):
            with tables.File(features_file, 'r') as fid:
                skel_id = worm_data['skeleton_id'].values

                #deal with any nan in the skeletons
                good_id = skel_id >= 0
                skel_id_val = skel_id[good_id]
                traj_size = skel_id.size

                args = []
                for p in ('skeletons', 'widths', 'dorsal_contours',
                          'ventral_contours'):
                    node = fid.get_node('/coordinates/' + p)

                    dat = np.full((traj_size, *node.shape[1:]), np.nan)
                    if skel_id_val.size > 0:
                        if len(node.shape) == 3:
                            dd = node[skel_id_val, :, :]
                        else:
                            dd = node[skel_id_val, :]
                        dat[good_id] = dd

                    args.append(dat)

                timestamp = worm_data['timestamp_raw'].values.astype(np.int32)

            feats = get_timeseries_features(*args,
                                            timestamp=timestamp,
                                            food_cnt=food_cnt,
                                            fps=fps,
                                            ventral_side=ventral_side)
            #save timeseries features data
            feats = feats.astype(np.float32)
            feats['worm_index'] = worm_index
            #move the last fields to the first columns
            cols = feats.columns.tolist()
            cols = cols[-1:] + cols[:-1]
            feats = feats[cols]

            feats['worm_index'] = feats['worm_index'].astype(np.int32)
            feats['timestamp'] = feats['timestamp'].astype(np.int32)

            timeseries_features.append(feats)
            _display_progress(ind_n)

        timeseries_features = pd.concat(timeseries_features, ignore_index=True)

    return timeseries_features
Пример #8
0
import os
import pandas as pd
from tierpsy.helper.params import read_fps, read_ventral_side

if __name__ == '__main__':
    experiments_df = pd.read_csv('ageing_celine.csv')

    bad_f = []
    good_f = []
    for irow, row in experiments_df.iterrows():
        print(irow, len(experiments_df))
        skel_file = os.path.join(row['directory'],
                                 row['base_name'] + '_skeletons.hdf5')
        print(skel_file)

        ventral_side = read_ventral_side(skel_file)
        if '_CW_' in skel_file and ventral_side != 'clockwise':
            bad_f.append(skel_file)
            print(skel_file)
        elif '_CCW_' in skel_file and ventral_side != 'anticlockwise':
            bad_f.append(skel_file)
            print(skel_file)
        else:
            good_f.append((ventral_side, skel_file))
    #%%
#    import tables
#
#    for ii, skel_file in enumerate(bad_f):
#        print(ii, len(bad_f))
#
#        bn = skel_file.replace('_skeletons.hdf5', '')