Пример #1
0
def getFoodContour(mask_file,
                skeletons_file,
                use_nn_food_cnt,
                model_path,
                solidity_th=0.98,
                _is_debug = False
                ):
    base_name = get_base_name(mask_file)

    progress_timer = TimeCounter('')
    print_flush("{} Calculating food contour {}".format(base_name, progress_timer.get_time_str()))


    food_cnt = calculate_food_cnt(mask_file,
                                  use_nn_food_cnt = use_nn_food_cnt,
                                  model_path = model_path,
                                  solidity_th=  solidity_th,
                                  _is_debug = _is_debug)

    #store contour coordinates into the skeletons file and mask_file the contour file
    for fname in [skeletons_file, mask_file]:
        with tables.File(fname, 'r+') as fid:
            if '/food_cnt_coord' in fid:
                fid.remove_node('/food_cnt_coord')

            #if it is a valid contour save it
            if food_cnt is not None and \
               food_cnt.size >= 2 and \
               food_cnt.ndim == 2 and \
               food_cnt.shape[1] == 2:

                tab = fid.create_array('/',
                                       'food_cnt_coord',
                                       obj=food_cnt)
                tab._v_attrs['use_nn_food_cnt'] = int(use_nn_food_cnt)
Пример #2
0
def generateMoviesROI(masked_file,
                      trajectories_data,
                      roi_size=-1,
                      progress_prefix='',
                      bgnd_param={},
                      progress_refresh_rate_s=20):

    if len(trajectories_data) == 0:
        print_flush(progress_prefix + ' No valid data. Exiting.')

    else:
        frames = trajectories_data['frame_number'].unique()

        img_generator = generateImages(masked_file,
                                       frames=frames,
                                       bgnd_param=bgnd_param)

        traj_group_by_frame = trajectories_data.groupby('frame_number')
        progress_time = TimeCounter(progress_prefix, max(frames))

        fps = read_fps(masked_file, dflt=25)
        progress_refresh_rate = int(round(fps * progress_refresh_rate_s))

        for ii, (current_frame, img) in enumerate(img_generator):
            frame_data = traj_group_by_frame.get_group(current_frame)

            #dictionary where keys are the table row and the values the worms ROIs
            yield getAllImgROI(img, frame_data, roi_size)

            if current_frame % progress_refresh_rate == 0:
                print_flush(progress_time.get_str(current_frame))

        print_flush(progress_time.get_str(current_frame))
Пример #3
0
    def filterFiles(self, valid_files, print_cmd=False):
        # for ii, video_file in enumerate(valid_files):
        #     label, ap_obj, unfinished_points = self._checkIndFile(video_file)
        #     self.filtered_files[label].append((ap_obj, unfinished_points))

        #     if (ii % 10) == 0:

        progress_timer = TimeCounter('')

        n_batch = mp.cpu_count()
        if self.is_parallel_check:
            lock = mp.Lock()
            p = mp.Pool(n_batch,
                        initializer=init_analysis_point_lock,
                        initargs=(lock, ))

        all_points = []
        tot_files = len(valid_files)
        for ii in range(0, tot_files, n_batch):
            dat = valid_files[ii:ii + n_batch]

            if self.is_parallel_check:
                res = list(p.map(self._checkIndFile, dat))
            else:
                res = list(map(self._checkIndFile, dat))

            all_points.append(res)
            n_files = len(dat)
            print('Checking file {} of {}. Total time: {}'.format(
                ii + n_files, tot_files, progress_timer.get_time_str()))
        all_points = sum(all_points, [])  #flatten

        # intialize filtered files lists
        filtered_files_fields = ('SOURCE_GOOD', 'SOURCE_BAD', 'FINISHED_GOOD',
                                 'FINISHED_BAD', 'EMPTY_ANALYSIS_LIST')
        self.filtered_files = {key: [] for key in filtered_files_fields}
        for label, ap_obj, unfinished_points in all_points:
            self.filtered_files[label].append((ap_obj, unfinished_points))

        print(BREAK_L)
        print('''Finished to check files.\nTotal time elapsed {}'''.format(
            progress_timer.get_time_str()))
        print(BREAK_L + '\n')

        cmd_list = self.getCMDlist()
        if print_cmd:
            #print the commands to be executed
            print(BREAK_L)
            print('Commands to be executed.')
            print(BREAK_L)
            print_cmd_list(cmd_list)
            print(BREAK_L + '\n')

        print(self.summary_msg)
        return cmd_list
Пример #4
0
def createSampleVideo(masked_image_file,
                      sample_video_name='',
                      time_factor=8,
                      size_factor=5,
                      skip_factor=2,
                      dflt_fps=30,
                      codec='MPEG',
                      shift_bgnd=False):
    #skip factor is to reduce the size of the movie by using less frames (so we use 15fps for example instead of 30fps)

    #%%
    if not sample_video_name:
        sample_video_name = getSubSampleVidName(masked_image_file)

    # initialize timers
    base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1]
    progressTime = TimeCounter(
        '{} Generating subsampled video.'.format(base_name))

    with tables.File(masked_image_file, 'r') as fid:
        masks = fid.get_node('/mask')
        tot_frames, im_h, im_w = masks.shape
        im_h, im_w = im_h // size_factor, im_w // size_factor

        fps = read_fps(masked_image_file, dflt_fps)

        tt_vec = _getCorrectedTimeVec(fid, tot_frames)
        #%%
        #codec values that work 'H264' #'MPEG' #XVID
        vid_writer = cv2.VideoWriter(sample_video_name, \
                            cv2.VideoWriter_fourcc(*codec), fps/skip_factor, (im_w,im_h), isColor=False)
        assert vid_writer.isOpened()

        if shift_bgnd:
            #lazy bgnd calculation, just take the last and first frame and get the top 95 pixel value
            mm = masks[[0, -1], :, :]
            _bgnd_val = np.percentile(mm[mm != 0], [97.5])[0]

        for frame_number in range(0, tot_frames,
                                  int(time_factor * skip_factor)):
            current_frame = int(tt_vec[frame_number])
            img = masks[current_frame]

            if shift_bgnd:
                img[img == 0] = _bgnd_val

            im_new = cv2.resize(img, (im_w, im_h))
            vid_writer.write(im_new)

            if frame_number % (500 * time_factor) == 0:
                # calculate the progress and put it in a string
                print_flush(progressTime.get_str(frame_number))

        vid_writer.release()
        print_flush(progressTime.get_str(frame_number) + ' DONE.')
Пример #5
0
def get_food_contour(mask_video,
                     min_area=None,
                     n_bins=180,
                     frac_lowess=0.1,
                     is_debug=False):
    '''
    Identify the contour of a food patch. I tested this for the worm rig.
    It assumes the food has a semi-circular shape. 
    The food lawn is very thin so the challenge was to estimate the contour of a very dim area.
    '''
    #%%
    progress_timer = TimeCounter('')
    base_name = get_base_name(mask_video)
    print_flush('{} Calculating food contour...'.format(base_name))

    try:
        with tables.File(mask_video, 'r') as fid:
            full_data = fid.get_node(
                '/full_data'
            )[:5]  # I am using the first two images to calculate this info
    except tables.exceptions.NoSuchNodeError:
        return None, None

    img = np.max(full_data[:2], axis=0)
    #dark_mask = get_dark_mask(full_data)

    mask = get_patch_mask(img, min_area=min_area)
    circx, circy, best_fit = mask_to_food_contour(mask,
                                                  n_bins=n_bins,
                                                  frac_lowess=frac_lowess)
    #%%
    dd = '{} Food contour calculated. Total time: {}'.format(
        base_name, progress_timer.get_time_str())
    print_flush(dd)
    #%%
    if is_debug:
        from skimage.draw import circle_perimeter
        import matplotlib.pylab as plt

        cpx, cpy = circle_perimeter(*best_fit[1:])

        plt.figure(figsize=(5, 5))
        plt.gca().xaxis.set_ticklabels([])
        plt.gca().yaxis.set_ticklabels([])

        (px, py) = np.where(skeletonize(mask))
        plt.imshow(img, cmap='gray')
        plt.plot(py, px, '.')
        plt.plot(cpx, cpy, '.r')
        plt.plot(circy, circx, '.')
        plt.suptitle(base_name)
        plt.grid('off')
    #%%
    return circx, circy
Пример #6
0
def reformatRigMaskedVideo(original_file, new_file, plugin_param_file,
                           expected_fps, microns_per_pixel):
    plugin_params = _getWormEnconderParams(plugin_param_file)

    base_name = original_file.rpartition('.')[0].rpartition(os.sep)[-1]

    if not _isValidSource(original_file):
        print_flush(new_file + ' ERROR. File might be corrupt. ' +
                    original_file)

        return
    save_full_interval, buffer_size, mask_params = _getReformatParams(
        plugin_params)
    with tables.File(original_file, 'r') as fid_old, \
        tables.File(new_file, 'w') as fid_new:
        mask_old = fid_old.get_node('/mask')
        tot_frames, im_height, im_width = mask_old.shape
        progress_timer = TimeCounter('Reformating Gecko plugin hdf5 video.',
                                     tot_frames)

        attr_params = dict(expected_fps=expected_fps,
                           microns_per_pixel=microns_per_pixel,
                           is_light_background=True)
        mask_new, full_new, _ = initMasksGroups(fid_new,
                                                tot_frames,
                                                im_height,
                                                im_width,
                                                attr_params,
                                                save_full_interval,
                                                is_expandable=False)
        mask_new.attrs['plugin_params'] = json.dumps(plugin_params)

        img_buff_ini = mask_old[:buffer_size]
        full_new[0] = img_buff_ini[0]
        mask_new[:buffer_size] = img_buff_ini * (mask_old[buffer_size] != 0)
        for frame in range(buffer_size, tot_frames):
            if frame % save_full_interval != 0:
                mask_new[frame] = mask_old[frame]
            else:

                full_frame_n = frame // save_full_interval

                img = mask_old[frame]
                full_new[full_frame_n] = img
                mask_new[frame] = img * (mask_old[frame - 1] != 0)

            if frame % 500 == 0:
                # calculate the progress and put it in a string
                progress_str = progress_timer.get_str(frame)
                print_flush(base_name + ' ' + progress_str)

        print_flush(base_name + ' Compressed video done. Total time:' +
                    progress_timer.get_time_str())
Пример #7
0
def generateROIBuff(masked_image_file,
                    buffer_size,
                    bgnd_param,
                    progress_str='',
                    progress_refresh_rate_s=20):
    img_generator = generateImages(masked_image_file, bgnd_param=bgnd_param)

    with tables.File(masked_image_file, 'r') as mask_fid:
        tot_frames, im_h, im_w = mask_fid.get_node("/mask").shape

    #loop, save data and display progress
    base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1]
    progress_str = base_name + progress_str
    fps = read_fps(masked_image_file, dflt=25)
    progress_refresh_rate = fps * progress_refresh_rate_s

    progress_time = TimeCounter(progress_str, tot_frames)
    for frame_number, image in img_generator:
        if frame_number % buffer_size == 0:
            if frame_number + buffer_size > tot_frames:
                buffer_size = tot_frames - frame_number  #change this value, otherwise the buffer will not get full
            image_buffer = np.zeros((buffer_size, im_h, im_w), np.uint8)
            ini_frame = frame_number

        image_buffer[frame_number - ini_frame] = image

        #compress if it is the last frame in the buffer
        if (frame_number + 1) % buffer_size == 0 or (frame_number + 1
                                                     == tot_frames):
            # z projection and select pixels as connected regions that were selected as worms at
            # least once in the masks
            main_mask = np.any(image_buffer, axis=0)

            # change from bool to uint since same datatype is required in
            # opencv
            main_mask = main_mask.astype(np.uint8)

            #calculate the contours, only keep the external contours (no holes) and
            _, ROI_cnts, _ = cv2.findContours(main_mask, cv2.RETR_EXTERNAL,
                                              cv2.CHAIN_APPROX_NONE)

            yield ROI_cnts, image_buffer, ini_frame

        if frame_number % progress_refresh_rate == 0:
            print_flush(progress_time.get_str(frame_number))

    print_flush(progress_time.get_str(frame_number))
def generateImages(masked_image_file,
                   frames=[],
                   bgnd_param={},
                   progress_str='',
                   progress_refresh_rate_s=20):

    #loop, save data and display progress
    base_name = Path(masked_image_file).stem
    progress_str = base_name + progress_str
    fps = read_fps(masked_image_file, dflt=25)

    progress_refresh_rate = fps * progress_refresh_rate_s

    with tables.File(masked_image_file, 'r') as mask_fid:
        mask_dataset = mask_fid.get_node("/mask")

        tot_frames = mask_dataset.shape[0]
        progress_time = TimeCounter(progress_str, tot_frames)

        if len(bgnd_param) > 0:

            if '/bgnd' in mask_fid:
                bgnd_subtractor = BackgroundSubtractorPrecalculated(
                    masked_image_file, **bgnd_param)
            else:
                bgnd_subtractor = BackgroundSubtractorMasked(
                    masked_image_file, **bgnd_param)
        else:
            bgnd_subtractor = None

        if len(frames) == 0:
            frames = range(mask_dataset.shape[0])

        for frame_number in frames:
            if frame_number % progress_refresh_rate == 0:
                print_flush(progress_time.get_str(frame_number))

            image = mask_dataset[frame_number]

            if bgnd_subtractor is not None:
                image = bgnd_subtractor.apply(image, frame_number)

            yield frame_number, image

    print_flush(progress_time.get_str(frame_number))
Пример #9
0
def exec_parallel(input_data, func):
    print('*******', len(input_data))
    progress_timer = TimeCounter()
    
    n_batch = mp.cpu_count()
    p = mp.Pool(n_batch)
    tot = len(input_data)
    
    #all_files = all_files[slice(0, len(all_files), 10)] #FOR TESTING
    output_data = []
    for ii in range(0, tot, n_batch):
        dat = input_data[ii:ii + n_batch]
        for x in p.map(func, dat):
            output_data.append(x)
        
        print('{} of {}. Total time: {}'.format(min(ii + n_batch, tot), 
                  tot, progress_timer.get_time_str()))
    
    return output_data
Пример #10
0
def getWormFeaturesFilt(
        skeletons_file,
        features_file,
        use_skel_filter,
        use_manual_join,
        is_single_worm,
        feat_filt_param,
        split_traj_time):
    
    feat_filt_param = min_num_skel_defaults(skeletons_file, **feat_filt_param)


    def _iniFileGroups():
        # initialize groups for the timeseries and event features
        header_timeseries = {
            feat: tables.Float32Col(
                pos=ii) for ii, (feat, _) in enumerate(
                wStats.feat_timeseries_dtype)}
                
        table_timeseries = features_fid.create_table(
            '/', 'features_timeseries', header_timeseries, filters=TABLE_FILTERS)

        # save some data used in the calculation as attributes
        fps, microns_per_pixel, _ = copy_unit_conversions(table_timeseries, skeletons_file)
        table_timeseries._v_attrs['worm_index_type'] = worm_index_type

        # node to save features events
        group_events = features_fid.create_group('/', 'features_events')

        # save the skeletons
        with tables.File(skeletons_file, 'r') as ske_file_id:
            skel_shape = ske_file_id.get_node('/skeleton').shape

        

        worm_coords_array = {}
        w_node = features_fid.create_group('/', 'coordinates')
        for  array_name in ['skeletons', 'dorsal_contours', 'ventral_contours']:
            worm_coords_array[array_name] = features_fid.create_earray(
                w_node,
                array_name,
                shape=(
                    0,
                    skel_shape[1],
                    skel_shape[2]),
                atom=tables.Float32Atom(
                    shape=()),
                filters=TABLE_FILTERS)
        
        # initialize rec array with the averaged features of each worm
        stats_features_df = {stat:np.full(tot_worms, np.nan, dtype=wStats.feat_avg_dtype) for stat in FUNC_FOR_DIV}
    
        return header_timeseries, table_timeseries, group_events, worm_coords_array, stats_features_df
    
    progress_timer = TimeCounter('')
    def _displayProgress(n):
            # display progress
        dd = " Extracting features. Worm %i of %i done." % (n, tot_worms)
        print_flush(
            base_name +
            dd +
            ' Total time:' +
            progress_timer.get_time_str())

    #get the valid number of worms
    good_traj_index, worm_index_type = getGoodTrajIndexes(skeletons_file,
        use_skel_filter,
        use_manual_join,
        is_single_worm, 
        feat_filt_param)
    
    fps = read_fps(skeletons_file)
    split_traj_frames = int(np.round(split_traj_time*fps)) #the fps could be non integer
    
    # function to calculate the progress time. Useful to display progress
    base_name = skeletons_file.rpartition('.')[0].rpartition(os.sep)[-1].rpartition('_')[0]
    
    with tables.File(features_file, 'w') as features_fid:
        #check if the stage was not aligned correctly. Return empty features file otherwise.
        with tables.File(skeletons_file, 'r') as skel_fid:
            if '/experiment_info' in skel_fid:
                dd = skel_fid.get_node('/experiment_info').read()
                features_fid.create_array(
                    '/', 'experiment_info', obj=dd)

        #total number of worms
        tot_worms = len(good_traj_index)
        if tot_worms == 0:
            print_flush(base_name + ' No valid worms found to calculate features. Creating empty file.')
            return

        # initialize by getting the specs data subdivision
        wStats = WormStats()
        all_splitted_feats = {stat:[] for stat in FUNC_FOR_DIV}
    

        #initialize file
        header_timeseries, table_timeseries, group_events, \
        worm_coords_array, stats_features_df = _iniFileGroups()



        _displayProgress(0)
        # start to calculate features for each worm trajectory
        for ind_N, worm_index in enumerate(good_traj_index):
            # initialize worm object, and extract data from skeletons file
            worm = WormFromTable(
            skeletons_file,
            worm_index,
            use_skel_filter=use_skel_filter,
            worm_index_type=worm_index_type,
            smooth_window=5)
            
            if is_single_worm:
                #worm with the stage correction applied
                worm.correct_schafer_worm()
                if np.all(np.isnan(worm.skeleton[:, 0, 0])):
                    print_flush('{} Not valid skeletons found after stage correction. Skiping worm index {}'.format(base_name, worm_index))
                    return
            # calculate features
            timeseries_data, events_data, worm_stats = getOpenWormData(worm, wStats)
            
            #get splitted features
            splitted_worms = [x for x in worm.split(split_traj_frames) 
            if x.n_valid_skel > feat_filt_param['min_num_skel'] and 
            x.n_valid_skel/x.n_frames >= feat_filt_param['bad_seg_thresh']]
            
            dd = [getFeatStats(x, wStats)[1] for x in splitted_worms]
            splitted_feats = {stat:[x[stat] for x in dd] for stat in FUNC_FOR_DIV}

            #% add data to save
            # save timeseries data
            table_timeseries.append(timeseries_data)
            table_timeseries.flush()


            # save skeletons
            worm_coords_array['skeletons'].append(worm.skeleton)
            worm_coords_array['dorsal_contours'].append(worm.dorsal_contour)
            worm_coords_array['ventral_contours'].append(worm.ventral_contour)
            
            # save event data as a subgroup per worm
            worm_node = features_fid.create_group(
                group_events, 'worm_%i' % worm_index)
            worm_node._v_attrs['worm_index'] = worm_index
            worm_node._v_attrs['frame_range'] = np.array(
                (worm.first_frame, worm.last_frame))

            for feat in events_data:
                tmp_data = events_data[feat]
                # consider the cases where the output is a single number, empty
                # or None
                if isinstance(tmp_data, (float, int)):
                    tmp_data = np.array([tmp_data])
                if tmp_data is None or tmp_data.size == 0:
                    tmp_data = np.array([np.nan])
                features_fid.create_carray(
                    worm_node, feat, obj=tmp_data, filters=TABLE_FILTERS)

            # store the average for each worm feature
            for stat in FUNC_FOR_DIV:
                stats_features_df[stat][ind_N] = worm_stats[stat]
                
                #append the splitted traj features
                all_splitted_feats[stat] += splitted_feats[stat]
            # report progress
            _displayProgress(ind_N + 1)
        # create and save a table containing the averaged worm feature for each
        # worm
       
        f_node = features_fid.create_group('/', 'features_summary')
        for stat, stats_df in stats_features_df.items():
            splitted_feats = all_splitted_feats[stat]

            #check that the array is not empty
            if len(splitted_feats) > 0:
                splitted_feats_arr = np.array(splitted_feats)
            else:
                #return a row full of nan to indicate a fail
                splitted_feats_arr = np.full(1, np.nan, dtype=wStats.feat_avg_dtype)

            features_fid.create_table(
                f_node, 
                stat, 
                obj = stats_df, 
                filters = TABLE_FILTERS
                )
            
            feat_stat_split = features_fid.create_table(
                f_node, 
                stat + '_split', 
                obj=splitted_feats_arr, 
                filters=TABLE_FILTERS
                )
            feat_stat_split._v_attrs['split_traj_frames'] = split_traj_frames
        
            

            if stat == 'means':
                #FUTURE: I am duplicating this field for backward compatibility, I should remove it later on.
                features_fid.create_table(
                    '/', 
                    'features_means', 
                    obj = stats_df, 
                    filters = TABLE_FILTERS
                    )
                
                features_fid.create_table(
                    '/', 
                    'features_means_split', 
                    obj=splitted_feats_arr, 
                    filters=TABLE_FILTERS
                    )
        
        
    print_flush(
        base_name +
        ' Feature extraction finished: ' +
        progress_timer.get_time_str())
Пример #11
0
def RunMultiCMD(cmd_list, 
                local_obj='', 
                max_num_process=3, 
                refresh_time=10,
                is_debug = True):
    '''Start different process using the command is cmd_list'''
    

    start_obj = partial(StartProcess, local_obj=local_obj, is_debug=is_debug)

    total_timer = TimeCounter() #timer to meassure the total time 

    cmd_list = cmd_list[::-1]  # since I am using pop to get the next element i need to invert the list to get athe same order
    tot_tasks = len(cmd_list)
    if tot_tasks < max_num_process:
        max_num_process = tot_tasks

    # initialize the first max_number_process in the list
    finished_tasks = []
    
    current_tasks = []
    for ii in range(max_num_process):
        cmd = cmd_list.pop()
        current_tasks.append(start_obj(cmd))

    # keep loop tasks as long as there are tasks in the list
    while current_tasks:
        time.sleep(refresh_time)

        print(GUI_CLEAR_SIGNAL)
        os.system(['clear', 'cls'][os.name == 'nt'])

        # print info of the finished tasks
        for task_finish_msg in finished_tasks:
            sys.stdout.write(task_finish_msg)

        # loop along the process list to update output and see if there is any
        # task finished
        next_tasks = []
        
        #I want to close the tasks after starting the next the tasks. It has de disadvantage of 
        #requiring more disk space, (required files for the new task + the finished files)
        #but at least it should start a new tasks while it is copying the old results.
        tasks_to_close = [] 
        
        for task in current_tasks:
            task.read_buff()
            if task.proc.poll() is None:
                # add task to the new list if it hasn't complete
                next_tasks.append(task)
                sys.stdout.write(task.output[-1])
            else:
                # close the task and add its las output to the finished_tasks
                # list
                tasks_to_close.append(task)
                # add new task once the previous one was finished
                if cmd_list and len(next_tasks) < max_num_process:
                    cmd = cmd_list.pop()
                    next_tasks.append(start_obj(cmd))

        # if there is stlll space add a new tasks.
        while cmd_list and len(next_tasks) < max_num_process:
            cmd = cmd_list.pop()
            next_tasks.append(start_obj(cmd))


        #close tasks (copy finished files to final destination)
        for task in tasks_to_close:
            task.close()
            sys.stdout.write(task.output[-1])
            finished_tasks.append(task.output[-1])
                
        #start the new loop
        current_tasks = next_tasks


        #display progress
        n_finished = len(finished_tasks)
        n_remaining = len(current_tasks) + len(cmd_list)
        progress_str = 'Tasks: {} finished, {} remaining. Total_time {}.'.format(
            n_finished, n_remaining, total_timer.get_time_str())
        
        print('*************************************************')
        print(progress_str)
        print('*************************************************')

    #if i don't add this the GUI could terminate before displaying the last text.
    sys.stdout.flush()
    time.sleep(1)
Пример #12
0
def correctHeadTailIntensity(skeletons_file,
                             intensities_file,
                             smooth_W=5,
                             gap_size=-1,
                             min_block_size=-1,
                             local_avg_win=-1,
                             min_frac_in=0.85,
                             head_tail_param={},
                             head_tail_int_method='MEDIAN_INT'):

    output = head_tail_int_defaults(skeletons_file,
                                    smooth_W=smooth_W,
                                    gap_size=gap_size,
                                    min_block_size=min_block_size,
                                    local_avg_win=local_avg_win)
    smooth_W = output['smooth_W']
    gap_size = output['gap_size']
    min_block_size = output['min_block_size']
    local_avg_win = output['local_avg_win']

    head_tail_param = head_tail_defaults(skeletons_file, **head_tail_param)

    # get the trajectories table
    with pd.HDFStore(skeletons_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']
        # at this point the int_map_id with the intensity maps indexes must
        # exist in the table
        assert 'int_map_id' in trajectories_data

    grouped_trajectories = trajectories_data.groupby('worm_index_joined')

    tot_worms = len(grouped_trajectories)

    # variables to report progress
    base_name = skeletons_file.rpartition('.')[0].rpartition(
        os.sep)[-1].rpartition('_')[0]
    progress_timer = TimeCounter('')

    bad_worms = [
    ]  # worms with not enough difference between the normal and inverted median intensity profile
    switched_blocks = []  # data from the blocks that were switched

    #ind2check = [765]
    for index_n, (worm_index,
                  trajectories_worm) in enumerate(grouped_trajectories):
        # if not worm_index in ind2check: continue

        if index_n % 10 == 0:
            dd = " Correcting Head-Tail using intensity profiles. Worm %i of %i." % (
                index_n + 1, tot_worms)
            dd = base_name + dd + ' Total time:' + progress_timer.get_time_str(
            )
            print_flush(dd)

        # correct head tail using the intensity profiles
        dd = correctHeadTailIntWorm(trajectories_worm, skeletons_file,
                                    intensities_file, smooth_W, gap_size,
                                    min_block_size, local_avg_win, min_frac_in,
                                    head_tail_int_method)

        switched_blocks += [(worm_index, t0, tf) for t0, tf in dd]

        # check that the final orientation is correct, otherwise switch the
        # whole trajectory

        if head_tail_int_method != 'HEAD_BRIGHTER':
            p_tot, skel_group, int_group = checkFinalOrientation(
                skeletons_file, intensities_file, trajectories_worm,
                min_block_size, head_tail_param)

            if p_tot < 0.5:
                switchBlocks(skel_group, skeletons_file, int_group,
                             intensities_file)

    # label the process as finished and store the indexes of the switched worms
    with tables.File(skeletons_file, 'r+') as fid:
        if not '/intensity_analysis' in fid:
            fid.create_group('/', 'intensity_analysis')

        if '/intensity_analysis/bad_worms' in fid:
            fid.remove_node('/intensity_analysis/min_block_size/bad_worms')
        if '/intensity_analysis/switched_head_tail' in fid:
            fid.remove_node('/intensity_analysis/switched_head_tail')

        if bad_worms:
            fid.create_array('/intensity_analysis', 'bad_worms',
                             np.array(bad_worms))

        if switched_blocks:
            # to rec array
            switched_blocks = np.array(switched_blocks,
                                       dtype=[('worm_index', np.int),
                                              ('ini_frame', np.int),
                                              ('last_frame', np.int)])
            fid.create_table('/intensity_analysis', 'switched_head_tail',
                             switched_blocks)

        fid.get_node('/skeleton')._v_attrs['has_finished'] = 4

    print_flush(base_name +
                ' Head-Tail correction using intensity profiles finished: ' +
                progress_timer.get_time_str())
Пример #13
0
        "/data2/shared/data/twoColour/Results/*/*/*52.1g_X1_skeletons.hdf5")

    for skeletons_file in filenames:
        base_name = get_base_name(skeletons_file)
        progress_prefix = base_name + ' Calculating skeletons.'

        with pd.HDFStore(skeletons_file, 'r') as ske_file_id:
            trajectories_data = ske_file_id['/trajectories_data']
            blob_features = ske_file_id['/blob_features']

        #I want to update blob_features
        blob_features['signed_speed'] = np.float32(np.nan)
        blob_features['velocity_x'] = np.float32(np.nan)
        blob_features['velocity_y'] = np.float32(np.nan)

        progress_timer = TimeCounter('')
        with tables.File(skeletons_file, 'r') as fid:
            skeletons = fid.get_node('/skeleton')
            grouped_by_index = trajectories_data.groupby('worm_index_joined')
            tot_worms = len(grouped_by_index)
            for ii, (worm_index, worm_data) in enumerate(grouped_by_index):
                feats = blob_features.loc[worm_data.index]
                skel_coords = skeletons[worm_data.index]
                xx = feats['coord_x']
                yy = feats['coord_y']

                signed_speed, velocity = _get_signed_velocity(
                    xx, yy, skel_coords)
                #import pdb
                #pdb.set_trace()
                blob_features.loc[worm_data.index,
Пример #14
0
def calculate_summaries(root_dir,
                        feature_type,
                        summary_type,
                        is_manual_index,
                        time_windows,
                        time_units,
                        _is_debug=False,
                        **fold_args):
    """
    Gets input from the GUI, calls the function that chooses the type of summary 
    and runs the summary calculation for each file in the root_dir.
    """
    save_base_name = 'summary_{}_{}'.format(feature_type, summary_type)
    if is_manual_index:
        save_base_name += '_manual'
    save_base_name += '_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

    #check the options are valid
    check_in_list(feature_type, valid_feature_types, 'feature_type')
    check_in_list(summary_type, valid_summary_types, 'summary_type')

    #convert time windows to list of integers in frame number units
    time_windows_ints = time_windows_parser(time_windows)

    #get summary function
    summary_func = get_summary_func(feature_type, summary_type,
                                    is_manual_index, **fold_args)

    #get extension of results file
    possible_ext = feature_files_ext[feature_type]
    ext = possible_ext[1] if is_manual_index else possible_ext[0]

    fnames = glob.glob(os.path.join(root_dir, '**', '*' + ext), recursive=True)
    if not fnames:
        print_flush('No valid files found. Nothing to do here.')
        return

    # EM :Make df_files list with one dataframe per time window
    dd = tuple(zip(*enumerate(sorted(fnames))))
    df_files = [
        pd.DataFrame({
            'file_id': dd[0],
            'file_name': dd[1]
        }) for x in range(len(time_windows_ints))
    ]
    for iwin in range(len(time_windows_ints)):
        df_files[iwin]['is_good'] = False

    progress_timer = TimeCounter('')

    def _displayProgress(n):
        args = (n + 1, len(df_files[0]), progress_timer.get_time_str())
        dd = "Extracting features summary. File {} of {} done. Total time: {}".format(
            *args)
        print_flush(dd)

    _displayProgress(-1)

    # EM :Make all_summaries list with one dataframe per time window
    all_summaries = [[] for x in range(len(time_windows_ints))]
    for ifile, row in df_files[0].iterrows():
        fname = row['file_name']

        df_list = summary_func(fname, time_windows_ints, time_units)
        for iwin, df in enumerate(df_list):
            try:
                df.insert(0, 'file_id', ifile)
                all_summaries[iwin].append(df)
            except (AttributeError, IOError, KeyError,
                    tables.exceptions.HDF5ExtError,
                    tables.exceptions.NoSuchNodeError):
                continue
            else:
                if not df.empty:
                    df_files[iwin].loc[ifile, 'is_good'] = True
        _displayProgress(ifile)

    for iwin in range(len(time_windows_ints)):
        all_summaries[iwin] = pd.concat(all_summaries[iwin],
                                        ignore_index=True,
                                        sort=False)

        f1 = os.path.join(
            root_dir,
            'filenames_{}_window_{}.csv'.format(save_base_name, iwin))
        df_files[iwin].to_csv(f1, index=False)

        f2 = os.path.join(
            root_dir, 'features_{}_window_{}.csv'.format(save_base_name, iwin))
        all_summaries[iwin].to_csv(f2, index=False)

    out = '****************************'
    out += '\nFINISHED. Created Files:\n-> {}\n-> {}'.format(f1, f2)

    print_flush(out)

    return df_files, all_summaries
Пример #15
0
def _get_timeseries_feats(features_file, delta_time=1 / 3):
    '''
    Get the all the time series features from the skeletons
    '''
    timeseries_features = []
    fps = read_fps(features_file)

    with pd.HDFStore(features_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']

    #only use data that was skeletonized
    #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0]

    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(features_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1,
                                                                     tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)

    with tables.File(features_file, 'r') as fid:
        if '/food_cnt_coord' in fid:
            food_cnt = fid.get_node('/food_cnt_coord')[:]
        else:
            food_cnt = None

        #If i find the ventral side in the multiworm case this has to change
        ventral_side = read_ventral_side(features_file)

        timeseries_features = []
        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):
            with tables.File(features_file, 'r') as fid:
                skel_id = worm_data['skeleton_id'].values

                #deal with any nan in the skeletons
                good_id = skel_id >= 0
                skel_id_val = skel_id[good_id]
                traj_size = skel_id.size

                args = []
                for p in ('skeletons', 'widths', 'dorsal_contours',
                          'ventral_contours'):
                    node = fid.get_node('/coordinates/' + p)

                    dat = np.full((traj_size, *node.shape[1:]), np.nan)
                    if skel_id_val.size > 0:
                        if len(node.shape) == 3:
                            dd = node[skel_id_val, :, :]
                        else:
                            dd = node[skel_id_val, :]
                        dat[good_id] = dd

                    args.append(dat)

                timestamp = worm_data['timestamp_raw'].values.astype(np.int32)

            feats = get_timeseries_features(*args,
                                            timestamp=timestamp,
                                            food_cnt=food_cnt,
                                            fps=fps,
                                            ventral_side=ventral_side)
            #save timeseries features data
            feats = feats.astype(np.float32)
            feats['worm_index'] = worm_index
            #move the last fields to the first columns
            cols = feats.columns.tolist()
            cols = cols[-1:] + cols[:-1]
            feats = feats[cols]

            feats['worm_index'] = feats['worm_index'].astype(np.int32)
            feats['timestamp'] = feats['timestamp'].astype(np.int32)

            timeseries_features.append(feats)
            _display_progress(ind_n)

        timeseries_features = pd.concat(timeseries_features, ignore_index=True)

    return timeseries_features
Пример #16
0
def compressVideo(video_file,
                  masked_image_file,
                  mask_param,
                  expected_fps=25,
                  microns_per_pixel=None,
                  bgnd_param={},
                  buffer_size=-1,
                  save_full_interval=-1,
                  max_frame=1e32,
                  is_extract_timestamp=False,
                  fovsplitter_param={}):
    '''
    Compresses video by selecting pixels that are likely to have worms on it and making the rest of
    the image zero. By creating a large amount of redundant data, any lossless compression
    algorithm will dramatically increase its efficiency. The masked images are saved as hdf5 with gzip compression.
    The mask is calculated over a minimum projection of an image stack. This projection preserves darker regions
    (or brighter regions, in the case of fluorescent labelling)
    where the worm has more probability to be located. Additionally it has the advantage of reducing
    the processing load by only requiring to calculate the mask once per image stack.
     video_file --  original video file
     masked_image_file --
     buffer_size -- size of the image stack used to calculate the minimal projection and the mask
     save_full_interval -- have often a full image is saved
     max_frame -- last frame saved (default a very large number, so it goes until the end of the video)
     mask_param -- parameters used to calculate the mask
    '''

    #get the default values if there is any bad parameter
    output = compress_defaults(masked_image_file,
                               expected_fps,
                               buffer_size=buffer_size,
                               save_full_interval=save_full_interval)

    buffer_size = output['buffer_size']
    save_full_interval = output['save_full_interval']

    if len(bgnd_param) > 0:
        is_bgnd_subtraction = True
        assert bgnd_param['buff_size'] > 0 and bgnd_param['frame_gap'] > 0
    else:
        is_bgnd_subtraction = False

    if len(fovsplitter_param) > 0:
        is_fov_tosplit = True
        assert all(key in fovsplitter_param
                   for key in ['total_n_wells', 'whichsideup', 'well_shape'])
        assert fovsplitter_param['total_n_wells'] > 0
    else:
        is_fov_tosplit = False

    # processes identifier.
    base_name = masked_image_file.rpartition('.')[0].rpartition(os.sep)[-1]

    # select the video reader class according to the file type.
    vid = selectVideoReader(video_file)

    # delete any previous  if it existed
    with tables.File(masked_image_file, "w") as mask_fid:
        pass

    #Extract metadata
    if is_extract_timestamp:
        # extract and store video metadata using ffprobe
        #NOTE: i cannot calculate /timestamp until i am sure of the total number of frames
        print_flush(base_name + ' Extracting video metadata...')
        expected_frames = store_meta_data(video_file, masked_image_file)

    else:
        expected_frames = 1

    # Initialize background subtraction if required

    if is_bgnd_subtraction:
        print_flush(base_name + ' Initializing background subtraction.')
        bgnd_subtractor = BackgroundSubtractorVideo(video_file, **bgnd_param)

    # intialize some variables
    max_intensity, min_intensity = np.nan, np.nan
    frame_number = 0
    full_frame_number = 0
    image_prev = np.zeros([])

    # Initialise FOV splitting if needed
    if is_bgnd_subtraction:
        img_fov = bgnd_subtractor.bgnd.astype(np.uint8)
    else:
        ret, img_fov = vid.read()
        # close and reopen the video, to restart from the beginning
        vid.release()
        vid = selectVideoReader(video_file)

    if is_fov_tosplit:
        # TODO: change class creator so it only needs the video name? by using
        # Tierpsy's functions such as selectVideoReader it can then read the first image by itself

        camera_serial = parse_camera_serial(masked_image_file)

        fovsplitter = FOVMultiWellsSplitter(img_fov,
                                            camera_serial=camera_serial,
                                            px2um=microns_per_pixel,
                                            **fovsplitter_param)
        wells_mask = fovsplitter.wells_mask
    else:
        wells_mask = None

    # initialize timers
    print_flush(base_name + ' Starting video compression.')

    if expected_frames == 1:
        progressTime = TimeCounter('Compressing video.')
    else:
        #if we know the number of frames display it in the progress
        progressTime = TimeCounter('Compressing video.', expected_frames)

    with tables.File(masked_image_file, "r+") as mask_fid:

        #initialize masks groups
        attr_params = dict(expected_fps=expected_fps,
                           microns_per_pixel=microns_per_pixel,
                           is_light_background=int(
                               mask_param['is_light_background']))
        mask_dataset, full_dataset, mean_intensity = initMasksGroups(
            mask_fid, expected_frames, vid.height, vid.width, attr_params,
            save_full_interval)

        if is_bgnd_subtraction:
            bg_dataset = createImgGroup(mask_fid,
                                        "/bgnd",
                                        1,
                                        vid.height,
                                        vid.width,
                                        is_expandable=False)
            bg_dataset[0, :, :] = img_fov

        if vid.dtype != np.uint8:
            # this will worm as flags to be sure that the normalization took place.
            normalization_range = mask_fid.create_earray(
                '/',
                'normalization_range',
                atom=tables.Float32Atom(),
                shape=(0, 2),
                expectedrows=expected_frames,
                filters=TABLE_FILTERS)

        while frame_number < max_frame:

            ret, image = vid.read()
            if ret != 0:
                # increase frame number
                frame_number += 1

                # opencv can give an artificial rgb image. Let's get it back to
                # gray scale.
                if image.ndim == 3:
                    image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

                if image.dtype != np.uint8:
                    # normalise image intensities if the data type is other
                    # than uint8
                    image, img_norm_range = normalizeImage(image)
                    normalization_range.append(img_norm_range)

                #limit the image range to 1 to 255, 0 is a reserved value for the background
                assert image.dtype == np.uint8
                image = np.clip(image, 1, 255)

                # Add a full frame every save_full_interval
                if frame_number % save_full_interval == 1:
                    full_dataset.append(image[np.newaxis, :, :])
                    full_frame_number += 1

                # buffer index
                ind_buff = (frame_number - 1) % buffer_size

                # initialize the buffer when the index correspond to 0
                if ind_buff == 0:
                    Ibuff = np.zeros((buffer_size, vid.height, vid.width),
                                     dtype=np.uint8)

                # add image to the buffer
                Ibuff[ind_buff, :, :] = image.copy()
                mean_int = np.mean(image)
                assert mean_int >= 0
                mean_intensity.append(np.array([mean_int]))

            else:
                # sometimes the last image is all zeros, control for this case
                if np.all(Ibuff[ind_buff] == 0):
                    frame_number -= 1
                    ind_buff -= 1

                # close the buffer
                Ibuff = Ibuff[:ind_buff + 1]

            # mask buffer and save data into the hdf5 file
            if (ind_buff == buffer_size - 1 or ret == 0) and Ibuff.size > 0:
                if is_bgnd_subtraction:
                    Ibuff_b = bgnd_subtractor.apply(Ibuff, frame_number)
                else:
                    Ibuff_b = Ibuff

                #calculate the max/min in the of the buffer
                img_reduce = reduceBuffer(Ibuff_b,
                                          mask_param['is_light_background'])

                mask = getROIMask(img_reduce,
                                  wells_mask=wells_mask,
                                  **mask_param)

                Ibuff *= mask

                # now apply the well_mask if is MWP
                if is_fov_tosplit:
                    fovsplitter.apply_wells_mask(
                        Ibuff)  # Ibuff will be modified after this

                # add buffer to the hdf5 file
                frame_first_buff = frame_number - Ibuff.shape[0]
                mask_dataset.append(Ibuff)

            if frame_number % 500 == 0:
                # calculate the progress and put it in a string
                progress_str = progressTime.get_str(frame_number)
                print_flush(base_name + ' ' + progress_str)

            # finish process
            if ret == 0:
                break

        # now that the whole video is read, we definitely have a better estimate
        # for its number of frames. so set the save_interval again
        if is_bgnd_subtraction:
            # bg_dataset._v_attrs['save_interval'] = len(vid)
            # the above line is not accurate when using ffmpeg,
            # it's just safer to do:
            bg_dataset._v_attrs['save_interval'] = mask_dataset.shape[0]

        # close the video
        vid.release()

    # save fovsplitting data
    if is_fov_tosplit:
        fovsplitter.write_fov_wells_to_file(masked_image_file)
        if fovsplitter.is_dubious:
            print(f'Check {masked_image_file} for plate alignment')

    read_and_save_timestamp(masked_image_file)
    print_flush(base_name + ' Compressed video done.')
def save_timeseries_feats_table(features_file,
                                derivate_delta_time,
                                fovsplitter_param={}):
    timeseries_features = []
    fps = read_fps(features_file)

    # initialise class for splitting fov
    if len(fovsplitter_param) > 0:
        is_fov_tosplit = True
        assert all(key in fovsplitter_param
                   for key in ['total_n_wells', 'whichsideup', 'well_shape'])
        assert fovsplitter_param['total_n_wells'] > 0
    else:
        is_fov_tosplit = False
    print('is fov to split?', is_fov_tosplit)

    if is_fov_tosplit:
        # split fov in wells
        masked_image_file = features_file.replace('Results', 'MaskedVideos')
        masked_image_file = masked_image_file.replace('_featuresN.hdf5',
                                                      '.hdf5')
        #        fovsplitter = FOVMultiWellsSplitter(masked_image_file=masked_image_file,
        #                                            total_n_wells=fovsplitter_param['total_n_wells'],
        #                                            whichsideup=fovsplitter_param['whichsideup'],
        #                                            well_shape=fovsplitter_param['well_shape'])
        fovsplitter = FOVMultiWellsSplitter(masked_image_file,
                                            **fovsplitter_param)
        # store wells data in the features file
        fovsplitter.write_fov_wells_to_file(features_file)

    with pd.HDFStore(features_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']

    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(features_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1,
                                                                     tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)
    with tables.File(features_file, 'r+') as fid:

        for gg in [
                '/timeseries_data', '/event_durations', '/timeseries_features'
        ]:
            if gg in fid:
                fid.remove_node(gg)

        feat_dtypes = [(x, np.float32) for x in timeseries_all_columns]

        feat_dtypes = [('worm_index', np.int32), ('timestamp', np.int32),
                       ('well_name', 'S3')] + feat_dtypes

        timeseries_features = fid.create_table('/',
                                               'timeseries_data',
                                               obj=np.recarray(0, feat_dtypes),
                                               filters=TABLE_FILTERS)

        if '/food_cnt_coord' in fid:
            food_cnt = fid.get_node('/food_cnt_coord')[:]
        else:
            food_cnt = None

        #If i find the ventral side in the multiworm case this has to change
        ventral_side = read_ventral_side(features_file)

        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):

            skel_id = worm_data['skeleton_id'].values

            #deal with any nan in the skeletons
            good_id = skel_id >= 0
            skel_id_val = skel_id[good_id]
            traj_size = skel_id.size

            args = []
            for p in ('skeletons', 'widths', 'dorsal_contours',
                      'ventral_contours'):

                node_str = '/coordinates/' + p
                if node_str in fid:
                    node = fid.get_node(node_str)
                    dat = np.full((traj_size, *node.shape[1:]), np.nan)
                    if skel_id_val.size > 0:
                        if len(node.shape) == 3:
                            dd = node[skel_id_val, :, :]
                        else:
                            dd = node[skel_id_val, :]
                        dat[good_id] = dd
                else:
                    dat = None

                args.append(dat)

            timestamp = worm_data['timestamp_raw'].values.astype(np.int32)

            feats = get_timeseries_features(
                *args,
                timestamp=timestamp,
                food_cnt=food_cnt,
                fps=fps,
                ventral_side=ventral_side,
                derivate_delta_time=derivate_delta_time)
            #save timeseries features data
            feats = feats.astype(np.float32)
            feats['worm_index'] = worm_index
            if is_fov_tosplit:
                feats[
                    'well_name'] = fovsplitter.find_well_from_trajectories_data(
                        worm_data)
            else:
                feats['well_name'] = 'n/a'
            # cast well_name to the correct type
            # (before shuffling columns, so it remains the last entry)
            # needed because for some reason this does not work:
            # feats['well_name'] = feats['well_name'].astype('S3')
            feats['_well_name'] = feats['well_name'].astype('S3')
            feats.drop(columns='well_name', inplace=True)
            feats.rename(columns={'_well_name': 'well_name'}, inplace=True)

            #move the last fields to the first columns
            cols = feats.columns.tolist()
            cols = cols[-2:] + cols[:-2]
            cols[1], cols[2] = cols[2], cols[1]

            feats = feats[cols]

            feats['worm_index'] = feats['worm_index'].astype(np.int32)
            feats['timestamp'] = feats['timestamp'].astype(np.int32)
            feats = feats.to_records(index=False)

            timeseries_features.append(feats)
            _display_progress(ind_n)
Пример #18
0
DEBUG = False

if __name__ == '__main__':
    skeletons_file = '/Volumes/behavgenom_archive$/Serena/SpikingDatasetRecordings51-64/Results/recording60/recording60.2g/recording60.2g_X1_skeletons.hdf5'

    base_name = get_base_name(skeletons_file)
    progress_prefix =  base_name + ' Calculating skeletons.'
    
    with pd.HDFStore(skeletons_file, 'r') as ske_file_id:
        trajectories_data = ske_file_id['/trajectories_data']
        blob_features = ske_file_id['/blob_features']
    
    #I want to update blob_features
    blob_features['signed_speed'] = np.nan
    
    progress_timer = TimeCounter('')
    with tables.File(skeletons_file, 'r') as fid:
        skeletons = fid.get_node('/skeleton')
        grouped_by_index = trajectories_data.groupby('worm_index_joined')
        tot_worms = len(grouped_by_index)
        for ii, (worm_index, worm_data) in enumerate(grouped_by_index):
            feats = blob_features.loc[worm_data.index]
            skel_coords = skeletons[worm_data.index]
            xx = feats['coord_x']
            yy = feats['coord_y']
            
            signed_speed = _get_signed_velocity(xx, yy, skel_coords)
            blob_features.loc[worm_data.index[:-1], 'signed_speed'] = signed_speed
            
            if ii % 100 == 0:
                dd = " Calculating signed speed. Worm %i of %i." % (ii + 1, tot_worms)
def save_timeseries_feats_table(features_file, derivate_delta_time):
    timeseries_features = []
    fps = read_fps(features_file)

    with pd.HDFStore(features_file, 'r') as fid:
        trajectories_data = fid['/trajectories_data']

    #only use data that was skeletonized
    #trajectories_data = trajectories_data[trajectories_data['skeleton_id']>=0]

    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(features_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Calculating tierpsy features. Worm %i of %i done." % (n + 1,
                                                                     tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)
    with tables.File(features_file, 'r+') as fid:

        for gg in [
                '/timeseries_data', '/event_durations', '/timeseries_features'
        ]:
            if gg in fid:
                fid.remove_node(gg)

        feat_dtypes = [(x, np.float32) for x in timeseries_all_columns]

        feat_dtypes = [('worm_index', np.int32),
                       ('timestamp', np.int32)] + feat_dtypes
        timeseries_features = fid.create_table('/',
                                               'timeseries_data',
                                               obj=np.recarray(0, feat_dtypes),
                                               filters=TABLE_FILTERS)

        if '/food_cnt_coord' in fid:
            food_cnt = fid.get_node('/food_cnt_coord')[:]
        else:
            food_cnt = None

        #If i find the ventral side in the multiworm case this has to change
        ventral_side = read_ventral_side(features_file)

        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):
            with tables.File(features_file, 'r') as fid:
                skel_id = worm_data['skeleton_id'].values

                #deal with any nan in the skeletons
                good_id = skel_id >= 0
                skel_id_val = skel_id[good_id]
                traj_size = skel_id.size

                args = []
                for p in ('skeletons', 'widths', 'dorsal_contours',
                          'ventral_contours'):

                    node_str = '/coordinates/' + p
                    if node_str in fid:
                        node = fid.get_node(node_str)
                        dat = np.full((traj_size, *node.shape[1:]), np.nan)
                        if skel_id_val.size > 0:
                            if len(node.shape) == 3:
                                dd = node[skel_id_val, :, :]
                            else:
                                dd = node[skel_id_val, :]
                            dat[good_id] = dd
                    else:
                        dat = None

                    args.append(dat)

                timestamp = worm_data['timestamp_raw'].values.astype(np.int32)

            feats = get_timeseries_features(
                *args,
                timestamp=timestamp,
                food_cnt=food_cnt,
                fps=fps,
                ventral_side=ventral_side,
                derivate_delta_time=derivate_delta_time)
            #save timeseries features data
            feats = feats.astype(np.float32)
            feats['worm_index'] = worm_index
            #move the last fields to the first columns
            cols = feats.columns.tolist()
            cols = cols[-1:] + cols[:-1]
            feats = feats[cols]

            feats['worm_index'] = feats['worm_index'].astype(np.int32)
            feats['timestamp'] = feats['timestamp'].astype(np.int32)
            feats = feats.to_records(index=False)

            timeseries_features.append(feats)
            _display_progress(ind_n)
Пример #20
0
def calculate_summaries(root_dir,
                        feature_type,
                        summary_type,
                        is_manual_index,
                        time_windows,
                        time_units,
                        n_processes=1,
                        _is_debug=False,
                        **fold_args):
    """
    Gets input from the GUI, calls the function that chooses the type of summary 
    and runs the summary calculation for each file in the root_dir.
    """
    save_base_name = 'summary_{}_{}'.format(feature_type, summary_type)
    if is_manual_index:
        save_base_name += '_manual'
    save_base_name += '_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

    #check the options are valid
    check_in_list(feature_type, valid_feature_types, 'feature_type')
    check_in_list(summary_type, valid_summary_types, 'summary_type')

    # EM : convert time windows to list of integers in frame number units
    time_windows_ints = time_windows_parser(time_windows)

    # EM : get list of keywords to include and to exclude
    # TODO: catch conflicts
    keywords_in = keywords_parser(keywords_include)
    keywords_ex = keywords_parser(keywords_exclude)

    # EM : get full path to feature set file
    selected_feat = feat_set_parser(select_feat)

    #get summary function
    # INPUT time windows time units here
    summary_func = get_summary_func(feature_type, summary_type,
                                    time_windows_ints, time_units,
                                    is_manual_index, **fold_args)

    #get extension of results file
    possible_ext = feature_files_ext[feature_type]
    ext = possible_ext[1] if is_manual_index else possible_ext[0]

    fnames = glob.glob(os.path.join(root_dir, '**', '*' + ext), recursive=True)

    if len(fnames) == 0:
        print_flush('No valid files found. Nothing to do here.')
        return None, None

    # EM :Make df_files list with one features_summaries dataframe per time window
    df_files = make_df_filenames(fnames, time_windows_ints, time_units)

    progress_timer = TimeCounter('')

    def _displayProgress(n):
        args = (n + 1, len(df_files[0]), progress_timer.get_time_str())
        dd = "Extracting features summary. File {} of {} done. Total time: {}".format(
            *args)
        print_flush(dd)

    _displayProgress(-1)

    # EM :Make all_summaries list with one element per time window. Each element contains
    # the extracted feature summaries from all the files for the given time window.

    all_summaries = [[] for x in range(len(time_windows_ints))]

    #i need to use partial and redifine this otherwise multiprocessing since it will not be pickable
    _process_row = partial(process_helper,
                           summary_func=summary_func,
                           time_windows_ints=time_windows_ints,
                           time_units=time_units)

    data2process = [x for x in df_files[0].iterrows()]

    n_processes = max(n_processes, 1)
    if n_processes <= 1:
        gen = map(_process_row, data2process)
    else:
        p = mp.Pool(n_processes)
        gen = p.imap(_process_row, data2process)

    for ii, (ifile, df_list) in enumerate(gen):
        #reformat the outputs and remove any failed
        for iwin, df in enumerate(df_list):
            df.insert(0, 'file_id', ifile)
            all_summaries[iwin].append(df)
            if not df.empty:
                df_files[iwin].loc[ifile, 'is_good'] = True
        _displayProgress(ii + 1)

    # EM : Concatenate summaries for each window into one dataframe and select features
    for iwin in range(len(time_windows_ints)):
        all_summaries[iwin] = pd.concat(all_summaries[iwin],
                                        ignore_index=True,
                                        sort=False)
        all_summaries[iwin] = select_features(all_summaries[iwin], keywords_in,
                                              keywords_ex, selected_feat)

        # EM : Save results
        if select_feat != 'all':
            win_save_base_name = save_base_name.replace(
                'tierpsy', select_feat + '_tierpsy')
        else:
            win_save_base_name = save_base_name

        if not (len(time_windows_ints) == 1
                and time_windows_ints[0] == [0, -1]):
            win_save_base_name = win_save_base_name + '_window_{}'.format(iwin)

        f1 = os.path.join(root_dir,
                          'filenames_{}.csv'.format(win_save_base_name))
        f2 = os.path.join(root_dir,
                          'features_{}.csv'.format(win_save_base_name))

        df_files[iwin].to_csv(f1, index=False)
        all_summaries[iwin].to_csv(f2, index=False)

    out = '****************************'
    out += '\nFINISHED. Created Files:\n-> {}\n-> {}'.format(f1, f2)

    print_flush(out)

    return df_files, all_summaries
Пример #21
0
def smooth_skeletons_table(skeletons_file,
                           features_file,
                           is_WT2=False,
                           skel_smooth_window=5,
                           coords_smooth_window_s=0.25,
                           gap_to_interp_s=0.25):

    #%%

    #%%
    fps = read_fps(skeletons_file)
    coords_smooth_window = int(np.round(fps * coords_smooth_window_s))
    gap_to_interp = int(np.round(fps * gap_to_interp_s))

    if coords_smooth_window <= 3:  #do not interpolate
        coords_smooth_window = None

    trajectories_data = _r_fill_trajectories_data(skeletons_file)
    #%%
    trajectories_data_g = trajectories_data.groupby('worm_index_joined')
    progress_timer = TimeCounter('')
    base_name = get_base_name(skeletons_file)
    tot_worms = len(trajectories_data_g)

    def _display_progress(n):
        # display progress
        dd = " Smoothing skeletons. Worm %i of %i done." % (n, tot_worms)
        print_flush(base_name + dd + ' Total time:' +
                    progress_timer.get_time_str())

    _display_progress(0)
    #%%

    #initialize arrays
    food_cnt = read_food_contour(skeletons_file)
    with tables.File(skeletons_file, 'r') as fid:
        n_segments = fid.get_node('/skeleton').shape[1]

    with tables.File(features_file, 'w') as fid_features:
        if food_cnt is not None:
            fid_features.create_array('/',
                                      'food_cnt_coord',
                                      obj=food_cnt.astype(np.float32))

        worm_coords_array = {}
        w_node = fid_features.create_group('/', 'coordinates')
        for array_name in [
                'skeletons', 'dorsal_contours', 'ventral_contours', 'widths'
        ]:
            if array_name != 'widths':
                a_shape = (0, n_segments, 2)
            else:
                a_shape = (0, n_segments)

            worm_coords_array[array_name] = fid_features.create_earray(
                w_node,
                array_name,
                shape=a_shape,
                atom=tables.Float32Atom(shape=()),
                filters=TABLE_FILTERS)

        tot_skeletons = 0
        for ind_n, (worm_index, worm_data) in enumerate(trajectories_data_g):
            if worm_data['was_skeletonized'].sum() < 2:
                continue

            worm = WormFromTable(skeletons_file,
                                 worm_index,
                                 worm_index_type='worm_index_joined')

            if is_WT2:
                worm.correct_schafer_worm()
            if np.sum(~np.isnan(worm.skeleton[:, 0, 0])) <= 2:
                warnings.warn('Not enough data to smooth. Empty file?')
                wormN = worm

            else:
                wormN = SmoothedWorm(worm.skeleton,
                                     worm.widths,
                                     worm.ventral_contour,
                                     worm.dorsal_contour,
                                     skel_smooth_window=skel_smooth_window,
                                     coords_smooth_window=coords_smooth_window,
                                     gap_to_interp=gap_to_interp)
            dat_index = pd.Series(False,
                                  index=worm_data['timestamp_raw'].values)

            try:
                dat_index[worm.timestamp] = True
            except ValueError:
                import pdb
                pdb.set_trace()

            #%%
            skeleton_id = np.arange(wormN.skeleton.shape[0]) + tot_skeletons
            tot_skeletons = skeleton_id[-1] + 1
            row_ind = worm_data.index[dat_index.values]
            trajectories_data.loc[row_ind, 'skeleton_id'] = skeleton_id
            #%%
            #add data
            worm_coords_array['skeletons'].append(getattr(wormN, 'skeleton'))
            worm_coords_array['dorsal_contours'].append(
                getattr(wormN, 'dorsal_contour'))
            worm_coords_array['ventral_contours'].append(
                getattr(wormN, 'ventral_contour'))
            worm_coords_array['widths'].append(getattr(wormN, 'widths'))

            #display progress
            _display_progress(ind_n + 1)

        #save trajectories data
        newT = fid_features.create_table(
            '/',
            'trajectories_data',
            obj=trajectories_data.to_records(index=False),
            filters=TABLE_FILTERS)
        copy_unit_conversions(newT, skeletons_file)
        newT._v_attrs['is_WT2'] = is_WT2
        newT._v_attrs['ventral_side'] = read_ventral_side(skeletons_file)

        #save blob features interpolating in dropped frames and stage movement (WT2)
        blob_features = _r_fill_blob_features(skeletons_file,
                                              trajectories_data, is_WT2)
        if blob_features is not None:
            fid_features.create_table(
                '/',
                'blob_features',
                obj=blob_features.to_records(index=False),
                filters=TABLE_FILTERS)
Пример #22
0
def getFoodFeatures(mask_file,
                    skeletons_file,
                    features_file=None,
                    cnt_method='NN',
                    solidity_th=0.98,
                    batch_size=100000,
                    _is_debug=False):
    if features_file is None:
        features_file = remove_ext(skeletons_file) + '_featuresN.hdf5'

    base_name = get_base_name(mask_file)

    progress_timer = TimeCounter('')
    print_flush("{} Calculating food features {}".format(
        base_name, progress_timer.get_time_str()))

    food_cnt = calculate_food_cnt(mask_file,
                                  method=cnt_method,
                                  solidity_th=solidity_th,
                                  _is_debug=_is_debug)
    microns_per_pixel = read_microns_per_pixel(skeletons_file)

    #store contour coordinates in pixels into the skeletons file for visualization purposes
    food_cnt_pix = food_cnt / microns_per_pixel
    with tables.File(skeletons_file, 'r+') as fid:
        if '/food_cnt_coord' in fid:
            fid.remove_node('/food_cnt_coord')
        if _is_valid_cnt(food_cnt):
            tab = fid.create_array('/', 'food_cnt_coord', obj=food_cnt_pix)
            tab._v_attrs['method'] = cnt_method

    print_flush("{} Calculating food features {}".format(
        base_name, progress_timer.get_time_str()))

    feats_names = [
        'orient_to_food_cnt', 'dist_from_food_cnt', 'closest_cnt_ind'
    ]
    feats_dtypes = [(x, np.float32) for x in feats_names]

    with tables.File(skeletons_file, 'r') as fid:
        tot_rows = fid.get_node('/skeleton').shape[0]
        features_df = np.full(tot_rows, np.nan, dtype=feats_dtypes)

        if food_cnt.size > 0:
            for ii in range(0, tot_rows, batch_size):
                skeletons = fid.get_node('/skeleton')[ii:ii + batch_size]
                skeletons *= microns_per_pixel

                outputs = get_cnt_feats(skeletons,
                                        food_cnt,
                                        _is_debug=_is_debug)
                for irow, row in enumerate(zip(*outputs)):
                    features_df[irow + ii] = row

    with tables.File(features_file, 'a') as fid:
        if '/food' in fid:
            fid.remove_node('/food', recursive=True)
        fid.create_group('/', 'food')
        if _is_valid_cnt(food_cnt):
            fid.create_carray('/food',
                              'cnt_coordinates',
                              obj=food_cnt,
                              filters=TABLE_FILTERS)

        fid.create_table('/food',
                         'features',
                         obj=features_df,
                         filters=TABLE_FILTERS)
    #%%
    print_flush("{} Calculating food features {}".format(
        base_name, progress_timer.get_time_str()))
Пример #23
0
def get_ffprobe_metadata(video_file):
    if not os.path.exists(video_file):
        raise FileNotFoundError(video_file)

    if not os.path.exists(FFPROBE_CMD):
        raise FileNotFoundError('ffprobe do not found.')
        
    command = [
        FFPROBE_CMD,
        '-v',
        'error',
        '-show_frames',
        '-print_format',
        'compact',
        video_file]
    
    base_name = video_file.rpartition('.')[0].rpartition(os.sep)[-1]
    progressTime = TimeCounter(base_name + ' Extracting video metadata.')
    
    frame_number = 0
    buff = []
    buff_err = []
    proc = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
    buf_reader = ReadEnqueue(proc.stdout, timeout=1)
    buf_reader_err = ReadEnqueue(proc.stderr)

    while proc.poll() is None:
        # read line without blocking
        line = buf_reader.read()
        if line is None:
            print('cannot read')
        else:
            buff.append(line)
            if "media_type" in line: #i use the filed "media_type" as a proxy for frame number (just in case the media does not have frame number)
                frame_number += 1
                if frame_number % 500 == 0:
                    print_flush(progressTime.get_str(frame_number))
        
        line = buf_reader_err.read()
        if line is not None:
            buff_err.append(None)


    #the buff is in the shape
    # frame|feat1=val1|feat2=val2|feat3=val3\n 
    # I want to store each property as a vector
    dat = [[d.split('=') for d in x.split('|')] for x in ''.join(buff).split('\n')]
    
    # use the first frame as reference
    frame_fields = [x[0] for x in dat[0] if len(x) == 2]
    
    # store data into numpy arrays
    video_metadata = OrderedDict()
    for row in dat:
        row_fields = [x[0] for x in dat[0] if len(x) == 2]
        for dd in row:
            if (len(dd) != 2) or (not dd[0] in frame_fields):
                continue
            field, value = dd

            if not field in video_metadata:
                video_metadata[field] = []

            try:  # if possible convert the data into float
                value = float(value)
            except (ValueError, TypeError):
                if value == 'N/A':
                    value = np.nan
                else:
                    # pytables does not support unicode strings (python3)
                    #the str before is to convert a possible dictionary into a string before converting it to bytes
                    value = bytes(str(value), 'utf-8')

            video_metadata[field].append(value)


    #convert all the lists into numpy arrays
    video_metadata = {field:np.asarray(values) for field,values in video_metadata.items()}
    
    #convert data into a recarray to store in pytables
    video_metadata = dict2recarray(video_metadata)

    #sometimes the last frame throws a nan in the timestamp. I want to remove it
    if np.isnan(video_metadata[-1]['best_effort_timestamp']):
        video_metadata = video_metadata[:-1]

    #if there is still nan's raise an error
    if np.any(np.isnan(video_metadata['best_effort_timestamp'])):
        raise ValueError('The timestamp contains nan values')
    return video_metadata
Пример #24
0
def calculate_summaries(root_dir,
                        feature_type,
                        summary_type,
                        is_manual_index,
                        _is_debug=False,
                        **fold_args):
    save_base_name = 'summary_{}_{}'.format(feature_type, summary_type)
    if is_manual_index:
        save_base_name += '_manual'
    save_base_name += '_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

    #check the options are valid
    check_in_list(feature_type, valid_feature_types, 'feature_type')
    check_in_list(summary_type, valid_summary_types, 'summary_type')

    summary_func = get_summary_func(feature_type, summary_type,
                                    is_manual_index, **fold_args)

    possible_ext = feature_files_ext[feature_type]
    ext = possible_ext[1] if is_manual_index else possible_ext[0]

    fnames = glob.glob(os.path.join(root_dir, '**', '*' + ext), recursive=True)
    if not fnames:
        print_flush('Not valid files found. Nothing to do here.')
        return

    dd = tuple(zip(*enumerate(sorted(fnames))))
    df_files = pd.DataFrame({'file_id': dd[0], 'file_name': dd[1]})
    df_files['is_good'] = False

    progress_timer = TimeCounter('')

    def _displayProgress(n):
        args = (n + 1, len(df_files), progress_timer.get_time_str())
        dd = "Extracting features summary. File {} of {} done. Total time: {}".format(
            *args)
        print_flush(dd)

    _displayProgress(-1)
    all_summaries = []
    for ifile, row in df_files.iterrows():
        fname = row['file_name']
        try:
            df = summary_func(fname)
            df.insert(0, 'file_id', ifile)
            all_summaries.append(df)
        except (IOError, KeyError, tables.exceptions.HDF5ExtError,
                tables.exceptions.NoSuchNodeError):
            continue

        df_files.loc[ifile, 'is_good'] = True
        _displayProgress(ifile)

    all_summaries = pd.concat(all_summaries, ignore_index=True, sort=False)

    f1 = os.path.join(root_dir, 'filenames_{}.csv'.format(save_base_name))
    df_files.to_csv(f1, index=False)

    f2 = os.path.join(root_dir, 'features_{}.csv'.format(save_base_name))
    all_summaries.to_csv(f2, index=False)

    out = '****************************'
    out += '\nFINISHED. Created Files:\n-> {}\n-> {}'.format(f1, f2)

    print_flush(out)

    return df_files, all_summaries
def filterByPopulationMorphology(skeletons_file,
                                 good_skel_row,
                                 critical_alpha=0.01):
    base_name = get_base_name(skeletons_file)
    progress_timer = TimeCounter('')

    print_flush(base_name + ' Filter Skeletons: Starting...')
    with pd.HDFStore(skeletons_file, 'r') as table_fid:
        trajectories_data = table_fid['/trajectories_data']

    if not 'is_good_skel' in trajectories_data:
        trajectories_data['is_good_skel'] = trajectories_data['has_skeleton']

    if good_skel_row.size > 0:
        # nothing to do if there are not valid skeletons left.

        print_flush(
            base_name +
            ' Filter Skeletons: Reading features for outlier identification.')

        #add possible missing fields that were con calculated in older versions of the software
        _addMissingFields(skeletons_file)

        # calculate classifier for the outliers
        nodes4fit = ['/skeleton_length', '/contour_area', '/width_midbody']
        worm_morph = _h_nodes2Array(skeletons_file, nodes4fit, -1)
        #worm_morph[~trajectories_data['is_good_skel'].values] = np.nan
        feats4fit = [worm_morph]

        #feats4fit = _h_readFeat2Check(skeletons_file)

        print_flush(base_name +
                    ' Filter Skeletons: Calculating outliers. Total time:' +
                    progress_timer.get_time_str())

        tot_rows2fit = feats4fit[0].shape[0]
        # check all the data to fit has the same size in the first axis
        assert all(tot_rows2fit == featdat.shape[0] for featdat in feats4fit)
        outliers_rob = np.zeros(tot_rows2fit, np.bool)
        outliers_flag = np.zeros(tot_rows2fit, np.int)
        assert len(feats4fit) < 64  # otherwise the outlier flag will not work

        for out_ind, dat in enumerate(feats4fit):
            maha, out_d, lim_d = _h_getMahalanobisRobust(
                dat, critical_alpha, good_skel_row)
            outliers_rob = outliers_rob | out_d

            # flag the outlier flag by turning on the corresponding bit
            outliers_flag += (out_d) * (2**out_ind)

        print_flush(
            base_name +
            ' Filter Skeletons: Labeling valid skeletons. Total time:' +
            progress_timer.get_time_str())

        # labeled rows of valid individual skeletons as GOOD_SKE
        trajectories_data['is_good_skel'] &= ~outliers_rob
        trajectories_data['skel_outliers_flag'] = outliers_flag

    # Save the new is_good_skel column
    if trajectories_data['is_good_skel'].dtypes == bool:
        trajectories_data['is_good_skel'] = trajectories_data[
            'is_good_skel'].astype(np.uint8)
    save_modified_table(skeletons_file, trajectories_data, 'trajectories_data')

    print_flush(base_name + ' Filter Skeletons: Finished. Total time:' +
                progress_timer.get_time_str())
Пример #26
0
def calculate_summaries(root_dir,
                        feature_type,
                        summary_type,
                        is_manual_index,
                        abbreviate_features,
                        dorsal_side_known,
                        time_windows='0:end',
                        time_units=None,
                        select_feat='all',
                        keywords_include='',
                        keywords_exclude='',
                        _is_debug=False,
                        **kwargs):
    """
    Gets input from the GUI, calls the function that chooses the type of
    summary and runs the summary calculation for each file in the root_dir.
    """
    filter_args = {k: kwargs[k] for k in kwargs.keys() if 'filter' in k}
    fold_args = {k: kwargs[k] for k in kwargs.keys() if 'filter' not in k}

    save_base_name = 'summary_{}_{}'.format(feature_type, summary_type)
    if is_manual_index:
        save_base_name += '_manual'
    save_base_name += '_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

    #check the options are valid
    check_in_list(feature_type, valid_feature_types, 'feature_type')
    check_in_list(summary_type, valid_summary_types, 'summary_type')

    # EM : convert time windows to list of integers in frame number units
    time_windows_ints = time_windows_parser(time_windows)
    filter_params = filter_args_parser(filter_args)
    # EM: get lists of strings (in a tuple) defining the feature selection
    # from keywords_in,
    # keywords_ex and select_feat.
    selected_feat = select_parser(feature_type, keywords_include,
                                  keywords_exclude, select_feat,
                                  dorsal_side_known)

    #get summary function
    # INPUT time windows time units here
    summary_func = get_summary_func(feature_type, summary_type,
                                    time_windows_ints, time_units,
                                    selected_feat, dorsal_side_known,
                                    filter_params, is_manual_index,
                                    **fold_args)

    #get extension of results file
    possible_ext = feature_files_ext[feature_type]
    ext = possible_ext[1] if is_manual_index else possible_ext[0]

    fnames = glob.glob(os.path.join(root_dir, '**', '*' + ext), recursive=True)
    if not fnames:
        print_flush('No valid files found. Nothing to do here.')
        return None, None

    # EM :Make df_files dataframe with filenames and file ids
    df_files = make_df_filenames(fnames)

    # EM : Create features_summaries and filenames_summaries files
    #      and write headers
    fnames_files = []
    featsum_files = []
    for iwin in range(len(time_windows_ints)):
        # EM : Create features_summaries and filenames_summaries files
        if select_feat != 'all':
            win_save_base_name = save_base_name.replace(
                'tierpsy', select_feat + '_tierpsy')
        else:
            win_save_base_name = save_base_name

        if not (len(time_windows_ints) == 1
                and time_windows_ints[0] == [0, -1]):
            win_save_base_name = win_save_base_name + '_window_{}'.format(iwin)

        f1 = os.path.join(root_dir,
                          'filenames_{}.csv'.format(win_save_base_name))
        f2 = os.path.join(root_dir,
                          'features_{}.csv'.format(win_save_base_name))

        fnamesum_headers = get_fnamesum_headers(f2, feature_type, summary_type,
                                                iwin, time_windows_ints[iwin],
                                                time_units,
                                                len(time_windows_ints),
                                                select_feat, filter_params,
                                                df_files.columns.to_list())
        featsum_headers = get_featsum_headers(f1)

        with open(f1, 'w') as fid:
            fid.write(fnamesum_headers)

        with open(f2, 'w') as fid:
            fid.write(featsum_headers)

        fnames_files.append(f1)
        featsum_files.append(f2)

    progress_timer = TimeCounter('')

    def _displayProgress(n):
        args = (n + 1, len(df_files), progress_timer.get_time_str())
        dd = "Extracting features summary. "
        dd += "File {} of {} done. Total time: {}".format(*args)
        print_flush(dd)

    _displayProgress(-1)

    # EM : Extract feature summaries from all the files for all time windows.
    is_featnames_written = [False for i in range(len(time_windows_ints))]

    for ifile, row in df_files.iterrows():
        fname = row['filename']
        file_id = row['file_id']

        summaries_per_win = summary_func(fname)

        for iwin, df in enumerate(summaries_per_win):

            f1 = fnames_files[iwin]
            f2 = featsum_files[iwin]

            try:
                df.insert(0, 'file_id', file_id)
                df = sort_columns(df, selected_feat)
            except (AttributeError, IOError, KeyError,
                    tables.exceptions.HDF5ExtError,
                    tables.exceptions.NoSuchNodeError):
                continue
            else:
                # Get the filename summary line
                filenames = row.copy()
                if not df.empty:
                    filenames['is_good'] = True
                # Store the filename summary line
                with open(f1, 'a') as fid:
                    fid.write(','.join([str(x)
                                        for x in filenames.values]) + "\n")

                if not df.empty:
                    # Abbreviate names
                    if abbreviate_features:
                        df = shorten_feature_names(df)

                    # Store line(s) of features summaries for the given file
                    # and given window
                    with open(f2, 'a') as fid:
                        if not is_featnames_written[iwin]:
                            df.to_csv(fid, header=True, index=False)
                            is_featnames_written[iwin] = True
                        else:
                            df.to_csv(fid, header=False, index=False)

        _displayProgress(ifile)

    out = '****************************'
    out += '\nFINISHED. Created Files:'
    for f1, f2 in zip(fnames_files, featsum_files):
        out += '\n-> {}\n-> {}'.format(f1, f2)

    print_flush(out)

    return df_files
Пример #27
0
def calculate_summaries(root_dir, feature_type, summary_type, is_manual_index, time_windows, time_units,
                        select_feat, keywords_include, keywords_exclude, abbreviate_features, _is_debug = False, **fold_args):
    """
    Gets input from the GUI, calls the function that chooses the type of summary
    and runs the summary calculation for each file in the root_dir.
    """
    save_base_name = 'summary_{}_{}'.format(feature_type, summary_type)
    if is_manual_index:
        save_base_name += '_manual'
    save_base_name += '_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

    #check the options are valid
    check_in_list(feature_type, valid_feature_types, 'feature_type')
    check_in_list(summary_type, valid_summary_types, 'summary_type')

    # EM : convert time windows to list of integers in frame number units
    time_windows_ints = time_windows_parser(time_windows)

    # EM : get list of keywords to include and to exclude
    # TODO: catch conflicts
    keywords_in = keywords_parser(keywords_include)
    keywords_ex = keywords_parser(keywords_exclude)

    # EM : get full path to feature set file
    selected_feat = feat_set_parser(select_feat)

    #get summary function
    # INPUT time windows time units here
    summary_func = get_summary_func(feature_type, summary_type, time_windows_ints, time_units, is_manual_index, **fold_args)

    #get extension of results file
    possible_ext = feature_files_ext[feature_type]
    ext = possible_ext[1] if is_manual_index else possible_ext[0]

    fnames = glob.glob(os.path.join(root_dir, '**', '*' + ext), recursive=True)
    if not fnames:
        print_flush('No valid files found. Nothing to do here.')
        return None,None

    # EM :Make df_files list with one features_summaries dataframe per time window
    df_files = make_df_filenames(fnames,time_windows_ints)

    progress_timer = TimeCounter('')
    def _displayProgress(n):
        args = (n + 1, len(df_files[0]), progress_timer.get_time_str())
        dd = "Extracting features summary. File {} of {} done. Total time: {}".format(*args)
        print_flush(dd)

    _displayProgress(-1)

    # EM :Make all_summaries list with one element per time window. Each element contains
    # the extracted feature summaries from all the files for the given time window.
    all_summaries = [[] for x in range(len(time_windows_ints))]
    for ifile, row in df_files[0].iterrows():
        fname = row['file_name']

        df_list = summary_func(fname)
        for iwin,df in enumerate(df_list):
            try:
                df.insert(0, 'file_id', ifile)
                all_summaries[iwin].append(df)
            except (AttributeError, IOError, KeyError, tables.exceptions.HDF5ExtError, tables.exceptions.NoSuchNodeError):
                continue
            else:
                if not df.empty:
                    df_files[iwin].loc[ifile, 'is_good'] = True
        _displayProgress(ifile)

    # EM : Concatenate summaries for each window into one dataframe and select features
    for iwin in range(len(time_windows_ints)):
        all_summaries[iwin] = pd.concat(all_summaries[iwin], ignore_index=True, sort=False)
        all_summaries[iwin] = select_features(all_summaries[iwin],keywords_in,keywords_ex,selected_feat)

    #IB : add in the option to abbreviate features
        if abbreviate_features:
            all_summaries[iwin] = shorten_feature_names(all_summaries[iwin])

    # EM : Save results
        if select_feat != 'all':
            win_save_base_name = save_base_name.replace('tierpsy',select_feat+'_tierpsy')
        else:
            win_save_base_name = save_base_name

        if not (len(time_windows_ints)==1 and time_windows_ints[0]==[0,-1]):
            win_save_base_name = win_save_base_name+'_window_{}'.format(iwin)

        f1 = os.path.join(root_dir, 'filenames_{}.csv'.format(win_save_base_name))
        f2 = os.path.join(root_dir,'features_{}.csv'.format(win_save_base_name))

        fnamesum_headers = get_fnamesum_headers(
            f2,feature_type,summary_type,iwin,time_windows_ints[iwin],
            time_units,len(time_windows_ints),select_feat)
        featsum_headers = get_featsum_headers(f1)

        with open(f1,'w') as fid:
            fid.write(fnamesum_headers)
            df_files[iwin].to_csv(fid, index=False)
        with open(f2,'w') as fid:
            fid.write(featsum_headers)
            all_summaries[iwin].to_csv(fid, index=False)

    out = '****************************'
    out += '\nFINISHED. Created Files:\n-> {}\n-> {}'.format(f1,f2)

    print_flush(out)


    return df_files, all_summaries
def getIntensityProfile(masked_image_file,
                        skeletons_file,
                        intensities_file,
                        width_resampling=15,
                        length_resampling=131,
                        min_num_skel=100,
                        smooth_win=11,
                        pol_degree=3,
                        width_percentage=0.5,
                        save_maps=False):

    min_num_skel = min_num_skel_defaults(skeletons_file,
                                         min_num_skel=min_num_skel)

    assert smooth_win > pol_degree
    assert min_num_skel > 0
    assert 0 < width_percentage < 1

    # we want to use symetrical distance centered in the skeleton
    if length_resampling % 2 == 0:
        length_resampling += 1
    if width_resampling % 2 == 0:
        width_resampling += 1

    # get the limits to be averaged from the intensity map
    if save_maps:
        width_win_ind = getWidthWinLimits(width_resampling, width_percentage)
    else:
        width_win_ind = (0, width_resampling)

    # filters for the tables structures
    table_filters = tables.Filters(complevel=5,
                                   complib='zlib',
                                   shuffle=True,
                                   fletcher32=True)

    # Get a reduced version of the trajectories_data table with only the valid skeletons.
    # The rows of this new table are going to be saved into skeletons_file
    trajectories_data_valid = setIntMapIndexes(skeletons_file, min_num_skel)

    # let's save this new table into the intensities file
    with tables.File(intensities_file, 'w') as fid:
        fid.create_table('/',
                         'trajectories_data_valid',
                         obj=trajectories_data_valid.to_records(index=False),
                         filters=table_filters)

    tot_rows = len(trajectories_data_valid)
    if tot_rows == 0:
        with tables.File(intensities_file, "r+") as int_file_id:
            # nothing to do here let's save empty data and go out
            worm_int_avg_tab = int_file_id.create_array(
                "/", "straighten_worm_intensity_median", obj=np.zeros(0))
            worm_int_avg_tab._v_attrs['has_finished'] = 1
        return

    with tables.File(masked_image_file, 'r')  as mask_fid, \
            tables.File(skeletons_file, 'r') as ske_file_id, \
            tables.File(intensities_file, "r+") as int_file_id:

        # pointer to the compressed videos
        mask_dataset = mask_fid.get_node("/mask")

        # pointer to skeletons
        skel_tab = ske_file_id.get_node('/skeleton')
        skel_width_tab = ske_file_id.get_node('/width_midbody')

        filters = tables.Filters(complevel=5, complib='zlib', shuffle=True)

        # we are using Float16 to save space, I am assuing the intensities are
        # between uint8
        worm_int_avg_tab = int_file_id.create_carray(
            "/",
            "straighten_worm_intensity_median",
            tables.Float16Atom(dflt=np.nan), (tot_rows, length_resampling),
            chunkshape=(1, length_resampling),
            filters=table_filters)

        worm_int_avg_tab._v_attrs['has_finished'] = 0
        worm_int_avg_tab.attrs['width_win_ind'] = width_win_ind

        if save_maps:
            worm_int_tab = int_file_id.create_carray(
                "/",
                "straighten_worm_intensity",
                tables.Float16Atom(dflt=np.nan),
                (tot_rows, length_resampling, width_resampling),
                chunkshape=(1, length_resampling, width_resampling),
                filters=table_filters)

        grouped_frames = trajectories_data_valid.groupby('frame_number')
        # variables used to report progress
        base_name = skeletons_file.rpartition('.')[0].rpartition(
            os.sep)[-1].rpartition('_')[0]
        progressTime = TimeCounter('Obtaining intensity maps.',
                                   len(grouped_frames))

        for frame, frame_data in grouped_frames:
            img = mask_dataset[frame, :, :]
            for ii, row_data in frame_data.iterrows():
                skeleton_id = int(row_data['skeleton_id'])
                worm_index = int(row_data['worm_index_joined'])
                int_map_id = int(row_data['int_map_id'])

                # read ROI and skeleton, and put them in the same coordinates
                # map
                worm_img, roi_corner = getWormROI(img, row_data['coord_x'],
                                                  row_data['coord_y'],
                                                  row_data['roi_size'])
                skeleton = skel_tab[skeleton_id, :, :] - roi_corner

                half_width = skel_width_tab[skeleton_id] / 2
                assert not np.isnan(skeleton[0, 0])

                skel_smooth = smoothSkeletons(
                    skeleton,
                    length_resampling=length_resampling,
                    smooth_win=smooth_win,
                    pol_degree=pol_degree)
                straighten_worm, grid_x, grid_y = getStraightenWormInt(
                    worm_img,
                    skel_smooth,
                    half_width=half_width,
                    width_resampling=width_resampling)

                # if you use the mean it is better to do not use float16
                int_avg = np.median(
                    straighten_worm[width_win_ind[0]:width_win_ind[1], :],
                    axis=0)

                worm_int_avg_tab[int_map_id] = int_avg

                # only save the full map if it is specified by the user
                if save_maps:
                    worm_int_tab[int_map_id] = straighten_worm.T

            if frame % 500 == 0:
                progress_str = progressTime.get_str(frame)
                print_flush(base_name + ' ' + progress_str)

        worm_int_avg_tab._v_attrs['has_finished'] = 1
Пример #29
0
def assignBlobTrajDF(traj_df, max_allowed_dist, area_ratio_lim, base_name=''):
    def _get_cost_matrix(frame_data, frame_data_prev):
        coord = frame_data[['coord_x', 'coord_y']].values
        coord_prev = frame_data_prev[['coord_x', 'coord_y']].values
        costMatrix = cdist(coord_prev, coord)  # calculate the cost matrix

        # assign a large value to non-valid combinations by area
        area = frame_data['area'].values
        area_prev = frame_data_prev['area'].values
        area_ratio = area_prev[:, None] / area[None, :]
        area_ratio[np.isnan(area_ratio)] = 1e20

        bad_ratio = (area_ratio<area_ratio_lim[0]) | \
        (area_ratio>area_ratio_lim[1]) | \
        np.isnan(costMatrix)

        costMatrix[bad_ratio] = 1e20
        return costMatrix

    def _get_prev_ind_match(costMatrix):
        def _label_bad_ind(indexes, dist, max_allowed_dist):
            #label as bad the pairs that have a distance larger than max_allowed_dist
            indexes[dist > max_allowed_dist] = -1
            #remove indexes that where assigned twice (either a merge or a split event)
            uind, counts = np.unique(indexes, return_counts=True)
            duplicated_ind = uind[counts > 1]
            bad_ind = np.in1d(indexes, duplicated_ind)
            indexes[bad_ind] = -1
            return indexes

        #I get the corresponding index in the previous data_frame
        #I remove pairs located at positions larger than max_allowed_dist
        #And indexes that where assigned twice or more (split events)
        map_to_prev = np.argmin(costMatrix,
                                axis=0)  #must have dimensions of frame_data
        min_dist_pp = costMatrix[map_to_prev, np.arange(costMatrix.shape[1])]
        _label_bad_ind(map_to_prev, min_dist_pp, max_allowed_dist)

        #here i am looking at in the prev indexes that would have been
        #assigned twice or more to the next indexes (merge events)
        map_to_next = np.argmin(
            costMatrix, axis=1)  #must have dimensions of frame_data_prev
        min_dist_pp = costMatrix[np.arange(costMatrix.shape[0]), map_to_next]
        _label_bad_ind(map_to_next, min_dist_pp, max_allowed_dist)

        bad_prev_ind = np.where(map_to_next == -1)[
            0]  #techincally either index too far away or duplicated
        possible_merges = np.in1d(map_to_prev, bad_prev_ind)
        map_to_prev[possible_merges] = -1
        return map_to_prev

    frame_data_prev = None
    tot_worms = 0
    all_indexes = []
    frames_grouped = traj_df.groupby('frame_number')

    #if isinstance(area_ratio_lim, (float, int)):
    #    area_ratio_lim = (1/area_ratio_lim, area_ratio_lim)

    progress_time = TimeCounter(base_name + ' Assigning trajectories.',
                                len(frames_grouped))
    for frame, frame_data in frames_grouped:
        #what happens if the frames are not continous?
        if frame_data_prev is not None:
            _, prev_traj_ind = all_indexes[-1]
            costMatrix = _get_cost_matrix(frame_data, frame_data_prev)
            map_to_prev = _get_prev_ind_match(costMatrix)

            traj_indexes = np.zeros_like(map_to_prev)
            unmatched = map_to_prev == -1
            matched = ~unmatched

            #assign matched index from the previous indexes
            traj_indexes[matched] = prev_traj_ind[map_to_prev[matched]]

            vv = np.arange(1, np.sum(unmatched) + 1) + tot_worms
            if vv.size > 0:
                tot_worms = vv[-1]
                traj_indexes[unmatched] = vv

        else:
            # initialize worm indexes
            traj_indexes = tot_worms + np.arange(1, len(frame_data) + 1)
            tot_worms = traj_indexes[-1]

        all_indexes.append((frame_data.index, traj_indexes))

        frame_data_prev = frame_data
        if frame % 500 == 0:
            # calculate the progress and put it in a string
            print_flush(progress_time.get_str(frame))

    if all_indexes:
        row_ind, traj_ind = map(np.concatenate, zip(*all_indexes))
        traj_ind = traj_ind[np.argsort(row_ind)]
        return traj_ind
Пример #30
0
def correctHeadTail(skeletons_file, **params):
    '''
    Correct Head Tail orientation using skeleton movement. Head must move more than the tail (have a higher rolling standar deviation). This might fail if the amount of contingously skeletonized frames is too little (a few seconds). Head must be in the first position of the single frame skeleton array, while the tail must be in the last.

    max_gap_allowed - maximimun number of consecutive skeletons lost before consider it a new block
    window_std - frame windows to calculate the standard deviation
    segment4angle - separation between skeleton segments to calculate the angles
    min_block_size - consider only around 10s intervals to determine if it is head or tail...
    '''

    params = head_tail_defaults(skeletons_file, **params)
    max_gap_allowed = params['max_gap_allowed']
    window_std = params['window_std']
    segment4angle = params['segment4angle']
    min_block_size = params['min_block_size']

    base_name = skeletons_file.rpartition('.')[0].rpartition(
        os.sep)[-1].rpartition('_')[0]

    with pd.HDFStore(skeletons_file, 'r') as ske_file_id:
        indexes_data = ske_file_id['/trajectories_data'][[
            'worm_index_joined', 'skeleton_id'
        ]]
        # get the first and last frame of each worm_index
        rows_indexes = indexes_data.groupby('worm_index_joined').agg(
            [min, max])['skeleton_id']
        del indexes_data

    # check if the skeletonization finished succesfully
    with tables.File(skeletons_file, "r") as ske_file_id:
        skeleton_table = ske_file_id.get_node('/skeleton')
        if 'has_finished' in dir(skeleton_table._v_attrs):
            assert skeleton_table._v_attrs['has_finished'] >= 2

    progress_timer = TimeCounter('')
    for ii, dat in enumerate(rows_indexes.iterrows()):
        if ii % 10 == 0:
            dd = " Correcting Head-Tail using worm movement. Worm %i of %i." % (
                ii + 1, len(rows_indexes))
            dd = base_name + dd + ' Total time:' + progress_timer.get_time_str(
            )
            print_flush(dd)

        worm_index, row_range = dat
        worm_data = WormClass(skeletons_file,
                              worm_index,
                              rows_range=(row_range['min'], row_range['max']))

        if not np.all(np.isnan(worm_data.skeleton_length)):
            is_switched_skel, roll_std = isWormHTSwitched(
                worm_data.skeleton,
                segment4angle=segment4angle,
                max_gap_allowed=max_gap_allowed,
                window_std=window_std,
                min_block_size=min_block_size)

            worm_data.switchHeadTail(is_switched_skel)

        worm_data.writeData()
        #%%
    print_flush('Head-Tail correction using worm movement finished:' +
                progress_timer.get_time_str())

    with tables.File(skeletons_file, "r+") as ske_file_id:
        # Mark a succesful termination
        ske_file_id.get_node('/skeleton')._v_attrs['has_finished'] = 3