示例#1
0
def add_trajectory_info(df_stats, worm_index, timeseries_data, fps):
    df_stats['worm_index'] = worm_index
    df_stats['ini_time'] = timeseries_data['timestamp'].min() / fps
    df_stats['tot_time'] = timeseries_data['timestamp'].size / fps
    df_stats['frac_valid_skels'] = (~timeseries_data['length'].isnull()).mean()

    is_fov_tosplit = was_fov_split(timeseries_data)
    if is_fov_tosplit:
        try:
            assert len(set(timeseries_data['well_name']) - set(['n/a'])) == 1, \
        "A single trajectory is spanning more than one well!"
        except:
            pdb.set_trace()
        well_name = list(set(timeseries_data['well_name']) - set(['n/a']))[0]
        df_stats['well_name'] = well_name

    cols = df_stats.columns.tolist()
    if not is_fov_tosplit:
        cols = cols[-4:] + cols[:-4]
    else:  # there's one extra column
        cols = cols[-5:] + cols[:-5]

    #import pdb
    #pdb.set_trace()
    df_stats = df_stats[cols]

    return df_stats
示例#2
0
def tierpsy_trajectories_summary(
        fname, time_windows, time_units, only_abs_ventral=False,
        selected_feat=None, is_manual_index=False, delta_time=1/3):
    """
    Calculate the trajectory summaries for a given file fname, within a given time window
    (units of start time and end time are in frame numbers).
    """
    fps = read_fps(fname)
    data_in = read_data(fname, time_windows, time_units, fps, is_manual_index)
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]
    timeseries_data, blob_features = data_in

    is_fov_tosplit = was_fov_split(timeseries_data[0])
    #    is_fov_tosplit = False
    if is_fov_tosplit:
        fovsplitter = FOVMultiWellsSplitter(fname)
        good_wells_df = fovsplitter.wells[['well_name','is_good_well']].copy()
        # print(good_wells_df)

    # initialize list of summaries for all time windows
    all_summaries_list = []
    # loop over time windows
    for iwin,window in enumerate(time_windows):
        if timeseries_data[iwin].empty:
            all_summary = pd.DataFrame([])
        else:
            # initialize list of trajectory summaries for given time window
            all_summary = []
            # loop over worm indexes (individual trajectories)
            for w_ind, w_ts_data in timeseries_data[iwin].groupby('worm_index'):
                w_blobs = blob_features[iwin].loc[w_ts_data.index]

                w_ts_data = w_ts_data.reset_index(drop=True)
                w_blobs = w_blobs.reset_index(drop=True)

                worm_feats = get_summary_stats(
                    w_ts_data, fps,  w_blobs, delta_time,
                    only_abs_ventral=only_abs_ventral,
                    selected_feat=selected_feat
                    ) # returns empty dataframe when w_ts_data is empty
                worm_feats = pd.DataFrame(worm_feats).T
                worm_feats = add_trajectory_info(worm_feats, w_ind, w_ts_data, fps)

                all_summary.append(worm_feats)
            # concatenate all trajectories in given time window into one dataframe
            all_summary = pd.concat(all_summary, ignore_index=True, sort=False)
            # attach whether the wells was good or bad
            if is_fov_tosplit:  #  but only do this if we have wells
                all_summary = all_summary.merge(good_wells_df,
                                                on='well_name',
                                                how='left')

        # add dataframe to the list of summaries for all time windows
        all_summaries_list.append(all_summary)

    return all_summaries_list
示例#3
0
def tierpsy_plate_summary(fname, time_windows, time_units, is_manual_index = False, delta_time = 1/3):
    """
    Calculate the plate summaries for a given file fname, within a given time window
    (units of start time and end time are in frame numbers).
    """
    fps = read_fps(fname)
    data_in = read_data(fname, time_windows, time_units, fps, is_manual_index)

    # if manual annotation was chosen and the trajectories_data does not contain
    # worm_index_manual, then data_in is None
    # if time_windows in seconds and fps is not defined (fps=-1), then data_in is None
    if data_in is None:
        return [pd.DataFrame() for iwin in range(len(time_windows))]

    timeseries_data, blob_features = data_in

    # was the fov split in wells? only use the first window to detect that,
    # and to extract the list of well names
    is_fov_tosplit = was_fov_split(timeseries_data[0])
#    is_fov_tosplit = False

    # initialize list of plate summaries for all time windows
    plate_feats_list = []
    for iwin,window in enumerate(time_windows):
        if is_fov_tosplit == False:
            plate_feats = get_summary_stats(timeseries_data[iwin], fps,  blob_features[iwin], delta_time)
            plate_feats_list.append(pd.DataFrame(plate_feats).T)
        else:
            # get list of well names in this time window
            # (maybe some wells looked empty during a whole window,
            # this prevents errors later on)
            well_names_list = list(set(timeseries_data[iwin]['well_name']) - set(['n/a']))
            # create a list of well-specific, one-line long dataframes
            well_feats_list = []
            for well_name in well_names_list:
                # find entries in timeseries_data[iwin] belonging to the right well
                idx_well = timeseries_data[iwin]['well_name'] == well_name
                well_feats = get_summary_stats(timeseries_data[iwin][idx_well].reset_index(),
                                               fps,
                                               blob_features[iwin][idx_well].reset_index(),
                                               delta_time)
                # first prepend the well_name_s to the well_feats series,
                # then transpose it so it is a single-row dataframe,
                # and append it to the well_feats_list
                well_name_s = pd.Series({'well_name':well_name})
                well_feats_list.append(pd.DataFrame(pd.concat([well_name_s,well_feats])).T)
            # check: did we find any well?
            if len(well_feats_list) == 0:
                plate_feats_list.append(pd.DataFrame())
            else:
                # now concatenate all the single-row df in well_feats_list in a single df
                # and append it to the growing list (1 entry = 1 window)
                plate_feats = pd.concat(well_feats_list, ignore_index=True, sort=False)
                plate_feats_list.append(plate_feats)

    return plate_feats_list
def save_feats_stats(features_file, derivate_delta_time):
    with pd.HDFStore(features_file, 'r') as fid:
        fps = fid.get_storer('/trajectories_data').attrs['fps']
        timeseries_data = fid['/timeseries_data']
        blob_features = fid[
            '/blob_features'] if '/blob_features' in fid else None
        is_fov_tosplit = was_fov_split(
            features_file)  # do we need split-FOV sumaries?

    # check
    if is_fov_tosplit:
        assert 'well_name' in timeseries_data.columns, (
            'fov_wells in features file but no well_name in timeseries_data')

    #Now I want to calculate the stats of the video
    if is_fov_tosplit:
        # get summary stats per well and then concatenate them all
        well_name_list = list(set(timeseries_data['well_name']) - set(['n/a']))
        exp_feats = []
        for wc, well in enumerate(well_name_list):
            print('Processing well {} out of {}'.format(
                wc, len(well_name_list)))
            idx = timeseries_data['well_name'] == well
            # calculate stats per well
            tmp = get_summary_stats(timeseries_data[idx].reset_index(), fps,
                                    blob_features[idx].reset_index(),
                                    derivate_delta_time)
            tmp = pd.DataFrame(zip(tmp.index, tmp), columns=['name', 'value'])
            tmp['well_name'] = well
            exp_feats.append(tmp)

        # now concat all
        exp_feats = pd.concat(exp_feats, ignore_index=True)

    else:  # we don't need to split the FOV

        exp_feats = get_summary_stats(timeseries_data, fps, blob_features,
                                      derivate_delta_time)

    # save on disk
    # now if is_fov_tosplit exp_feats is a dataframe, otherwise a series
    if len(exp_feats) > 0:

        # different syntax according to df or series
        if is_fov_tosplit:
            tot = max(len(x) for x in exp_feats['name'])
            dtypes = {
                'name': 'S{}'.format(tot),
                'value': np.float32,
                'well_name': 'S3'
            }
            exp_feats_rec = exp_feats.to_records(index=False,
                                                 column_dtypes=dtypes)
        else:
            tot = max(len(x) for x in exp_feats.index)
            dtypes = [('name', 'S{}'.format(tot)), ('value', np.float32)]
            exp_feats_rec = np.array(list(zip(exp_feats.index, exp_feats)),
                                     dtype=dtypes)

        # write on hdf5 file
        with tables.File(features_file, 'r+') as fid:
            for gg in ['/features_stats']:
                if gg in fid:
                    fid.remove_node(gg)
            fid.create_table('/',
                             'features_stats',
                             obj=exp_feats_rec,
                             filters=TABLE_FILTERS)