def filterpredictions(config,video,shuffle=1,trainingsetindex=0,comparisonbodyparts='all',p_bound=.01,ARdegree=3,MAdegree=1,alpha=.01): """ Fits frame-by-frame pose predictions with SARIMAX model. Parameter ---------- config : string Full path of the config.yaml file as a string. video : string Full path of the video to extract the frame from. Make sure that this video is already analyzed. shuffle : int, optional The shufle index of training dataset. The extracted frames will be stored in the labeled-dataset for the corresponding shuffle of training dataset. Default is set to 1 trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). comparisonbodyparts: list of strings, optional This select the body parts for which SARIMAX models are fit. Either ``all``, then all body parts from config.yaml are used orr a list of strings that are a subset of the full list. E.g. ['hand','Joystick'] for the demo Reaching-Mackenzie-2018-08-30/config.yaml to select only these two body parts. p_bound: float between 0 and 1, optional For outlieralgorithm 'uncertain' this parameter defines the likelihood below, below which a body part will be flagged as a putative outlier. ARdegree: int, optional For outlieralgorithm 'fitting': Autoregressive degree of Sarimax model degree. see https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html MAdegree: int For outlieralgorithm 'fitting': Moving Avarage degree of Sarimax model degree. See https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html alpha: float Significance level for detecting outliers based on confidence interval of fitted SARIMAX model. Example -------- tba -------- Returns filtered pandas array (incl. confidence interval), original data, distance and average outlier vector. """ cfg = auxiliaryfunctions.read_config(config) scorer=auxiliaryfunctions.GetScorerName(cfg,shuffle,trainFraction = cfg['TrainingFraction'][trainingsetindex]) print("network parameters:", scorer) videofolder = str(Path(video).parents[0]) dataname = str(Path(video).stem)+scorer bodyparts=auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(cfg,comparisonbodyparts) try: Dataframe = pd.read_hdf(os.path.join(videofolder,dataname+'.h5')) except FileExistsError: print("Could not find data.") data,d,o = ComputeDeviations(Dataframe,cfg,bodyparts,scorer,dataname,p_bound,alpha,ARdegree,MAdegree,storeoutput='full') return data,Dataframe,d,o
def refine_tracklets(self, event): DLCscorer, _ = auxiliaryfunctions.GetScorerName( self.cfg, self.shuffle.GetValue(), self.cfg["TrainingFraction"][-1], ) track_method = self.cfg.get("default_track_method", "ellipse") if track_method == "ellipse": method = "el" elif track_method == "box": method = "bx" else: method = "sk" dest = str(Path(self.video).parents[0]) vname = Path(self.video).stem datafile = os.path.join(dest, vname + DLCscorer + f"_{method}.h5") self.manager, self.viz = deeplabcut.refine_tracklets( self.config, datafile, self.video, min_swap_len=self.slider_swap.GetValue(), trail_len=self.length_track.GetValue(), max_gap=self.slider_gap.GetValue(), ) self.export.Enable()
def plot_trajectories(config, videos, videotype='.avi', shuffle=1, trainingsetindex=0, showfigures=False): """ Plots the trajectories of various bodyparts across the video. Parameters ---------- config : string Full path of the config.yaml file as a string. videos : list A list of strings containing the full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. videotype: string, optional Checks for the extension of the video in case the input to the video is a directory.\n Only videos with this extension are analyzed. The default is ``.avi`` shuffle: list, optional List of integers specifying the shuffle indices of the training dataset. The default is [1] trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). showfigures: bool, default false If true then plots are also displayed. Example -------- for labeling the frames >>> deeplabcut.plot_trajectories('home/alex/analysis/project/reaching-task/config.yaml',['/home/alex/analysis/project/videos/reachingvideo1.avi']) -------- """ cfg = auxiliaryfunctions.read_config(config) trainFraction = cfg['TrainingFraction'][trainingsetindex] DLCscorer = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction ) #automatically loads corresponding model (even training iteration based on snapshot index) Videos = auxiliaryfunctions.Getlistofvideos(videos, videotype) for video in Videos: print(video) videofolder = str( Path(video).parents[0]) #where your folder with videos is. videotype = str(Path(video).suffix) print("Starting % ", videofolder, videos) basefolder = videofolder auxiliaryfunctions.attempttomakefolder(basefolder) RunTrajectoryAnalysis(video, basefolder, DLCscorer, videofolder, cfg, showfigures) print( 'Plots created! Please check the directory "plot-poses" within the video directory' )
def init_session(cfg, gputouse=None, shuffle=1, trainIndex=0, locate_on_gpu=False): if isinstance(cfg, (str, _Path)): cfg = _load_config(cfg) TF.reset_default_graph() projpath = cfg['project_path'] trainFraction = cfg['TrainingFraction'][trainIndex] modelfolder = projpath / _aux.GetModelFolder(trainFraction,shuffle,cfg) dlc_cfg = _get_pose_config(cfg, modelfolder, shuffle=shuffle, trainIndex=trainIndex) snapshot, iteration = _get_snapshot(cfg, modelfolder, shuffle=shuffle) dlc_cfg['init_weights'] = str(snapshot) #update batchsize (based on parameters in config.yaml) dlc_cfg['batch_size'] = cfg['batch_size'] # update number of outputs dlc_cfg['num_outputs'] = cfg.get('num_outputs', 1) print('num_outputs = ', dlc_cfg['num_outputs']) DLCscorer = _aux.GetScorerName(cfg,shuffle,trainFraction,trainingsiterations=iteration) cls = DirectTFSession if cfg["batch_size"] == 1 else BatchTFSession return cls(dlc_cfg, *(_dlc_setup_pose_prediction(dlc_cfg, locate_on_gpu=locate_on_gpu)))
def plot_trajectories( config, videos, videotype=".avi", shuffle=1, trainingsetindex=0, filtered=False, displayedbodyparts="all", displayedindividuals="all", showfigures=False, destfolder=None, modelprefix="", imagetype=".png", resolution=100, linewidth=1.0, track_method="", ): """ Plots the trajectories of various bodyparts across the video. Parameters ---------- config : string Full path of the config.yaml file as a string. videos : list A list of strings containing the full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. videotype: string, optional Checks for the extension of the video in case the input to the video is a directory.\n Only videos with this extension are analyzed. The default is ``.avi`` shuffle: list, optional List of integers specifying the shuffle indices of the training dataset. The default is [1] trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). filtered: bool, default false Boolean variable indicating if filtered output should be plotted rather than frame-by-frame predictions. Filtered version can be calculated with deeplabcut.filterpredictions displayedbodyparts: list of strings, optional This select the body parts that are plotted in the video. Either ``all``, then all body parts from config.yaml are used, or a list of strings that are a subset of the full list. E.g. ['hand','Joystick'] for the demo Reaching-Mackenzie-2018-08-30/config.yaml to select only these two body parts. showfigures: bool, default false If true then plots are also displayed. destfolder: string, optional Specifies the destination folder that was used for storing analysis data (default is the path of the video). imagetype: string, default ".png" Specifies the output image format, tested '.tif', '.jpg', '.svg' and ".png". resolution: int, default 100 Specifies the resolution (in dpi) of saved figures. Note higher resolution figures take longer to generate. linewidth: float, default 1.0 Specifies width of line for line and histogram plots. track_method: string, optional Specifies the tracker used to generate the data. Empty by default (corresponding to a single animal project). For multiple animals, must be either 'box', 'skeleton', or 'ellipse' and will be taken from the config.yaml file if none is given. Example -------- for labeling the frames >>> deeplabcut.plot_trajectories('home/alex/analysis/project/reaching-task/config.yaml',['/home/alex/analysis/project/videos/reachingvideo1.avi']) -------- """ cfg = auxiliaryfunctions.read_config(config) track_method = auxfun_multianimal.get_track_method( cfg, track_method=track_method) trainFraction = cfg["TrainingFraction"][trainingsetindex] DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, modelprefix=modelprefix ) # automatically loads corresponding model (even training iteration based on snapshot index) bodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, displayedbodyparts) individuals = auxfun_multianimal.IntersectionofIndividualsandOnesGivenbyUser( cfg, displayedindividuals) Videos = auxiliaryfunctions.Getlistofvideos(videos, videotype) if not len(Videos): print( "No videos found. Make sure you passed a list of videos and that *videotype* is right." ) return failed = [] for video in Videos: if destfolder is None: videofolder = str(Path(video).parents[0]) else: videofolder = destfolder vname = str(Path(video).stem) print("Loading ", video, "and data.") try: df, _, _, suffix = auxiliaryfunctions.load_analyzed_data( videofolder, vname, DLCscorer, filtered, track_method) failed.append(False) tmpfolder = os.path.join(videofolder, "plot-poses", vname) auxiliaryfunctions.attempttomakefolder(tmpfolder, recursive=True) # Keep only the individuals and bodyparts that were labeled labeled_bpts = [ bp for bp in df.columns.get_level_values("bodyparts").unique() if bp in bodyparts ] # Either display the animals defined in the config if they are found # in the dataframe, or all the trajectories regardless of their names try: animals = set(df.columns.get_level_values("individuals")) except KeyError: animals = {""} for animal in animals.intersection(individuals) or animals: PlottingResults( tmpfolder, df, cfg, labeled_bpts, animal, showfigures, suffix + animal + imagetype, resolution=resolution, linewidth=linewidth, ) except FileNotFoundError as e: failed.append(True) print(e) try: _ = auxiliaryfunctions.load_detection_data( video, DLCscorer, track_method) print('Call "deeplabcut.stitch_tracklets()"' " prior to plotting the trajectories.") except FileNotFoundError as e: print(e) print( f"Make sure {video} was previously analyzed, and that " f'detections were successively converted to tracklets using "deeplabcut.convert_detections2tracklets()" ' f'and "deeplabcut.stitch_tracklets()".') if not all(failed): print( 'Plots created! Please check the directory "plot-poses" within the video directory' ) else: print( f"Plots could not be created! " f"Videos were not evaluated with the current scorer {DLCscorer}.")
def bayesian_search( config_path, inferencecfg, pbounds, edgewisecondition=True, shuffle=1, trainingsetindex=0, modelprefix="", snapshotindex=-1, target="rpck_test", maximize=True, init_points=20, n_iter=50, acq="ei", log_file=None, dcorr=5, leastbpts=3, printingintermediatevalues=True, ): # if "rpck" in target: assert maximize == True if "rmse" in target: assert maximize == False cfg = auxiliaryfunctions.read_config(config_path) evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetEvaluationFolder( cfg["TrainingFraction"][int(trainingsetindex)], shuffle, cfg, modelprefix=modelprefix, )), ) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, cfg["TrainingFraction"][int(trainingsetindex)], cfg["iteration"], modelprefix=modelprefix, ) # load params fns = return_evaluate_network_data( config_path, shuffle=shuffle, trainingsetindex=trainingsetindex, modelprefix=modelprefix, ) predictionsfn = fns[snapshotindex] data, metadata = auxfun_multianimal.LoadFullMultiAnimalData(predictionsfn) params = set_up_evaluation(data) columns = ["train_iter", "train_frac", "shuffle"] columns += [ "_".join((b, a)) for a in ("train", "test") for b in ("rmse", "hits", "misses", "falsepos", "ndetects", "pck", "rpck") ] train_iter = trainingsetindex # int(predictionsfn.split('-')[-1].split('.')[0]) train_frac = cfg["TrainingFraction"][ train_iter] # int(predictionsfn.split('trainset')[1].split('shuffle')[0]) trainIndices = metadata["data"]["trainIndices"] testIndices = metadata["data"]["testIndices"] if edgewisecondition: mf = str( auxiliaryfunctions.GetModelFolder( cfg["TrainingFraction"][int(trainingsetindex)], shuffle, cfg, modelprefix=modelprefix, )) modelfolder = os.path.join(cfg["project_path"], mf) path_inferencebounds_config = (Path(modelfolder) / "test" / "inferencebounds.yaml") try: inferenceboundscfg = auxiliaryfunctions.read_plainconfig( path_inferencebounds_config) except FileNotFoundError: print("Computing distances...") from deeplabcut.pose_estimation_tensorflow import calculatepafdistancebounds inferenceboundscfg = calculatepafdistancebounds( config_path, shuffle, trainingsetindex) auxiliaryfunctions.write_plainconfig(path_inferencebounds_config, inferenceboundscfg) partaffinityfield_graph = params["paf_graph"] upperbound = np.array([ float(inferenceboundscfg[str(edge[0]) + "_" + str(edge[1])]["intra_max"]) for edge in partaffinityfield_graph ]) lowerbound = np.array([ float(inferenceboundscfg[str(edge[0]) + "_" + str(edge[1])]["intra_min"]) for edge in partaffinityfield_graph ]) upperbound *= inferencecfg["upperbound_factor"] lowerbound *= inferencecfg["lowerbound_factor"] else: lowerbound = None upperbound = None def dlc_hyperparams(**kwargs): inferencecfg.update(kwargs) # Ensure type consistency for k, (bound, _) in pbounds.items(): inferencecfg[k] = type(bound)(inferencecfg[k]) stats = compute_crossval_metrics_preloadeddata( params, columns, inferencecfg, data, trainIndices, testIndices, train_iter, train_frac, shuffle, lowerbound, upperbound, dcorr=dcorr, leastbpts=leastbpts, ) # stats = compute_crossval_metrics(config_path, inferencecfg, shuffle,trainingsetindex, # dcorr=dcorr,leastbpts=leastbpts,modelprefix=modelprefix) if printingintermediatevalues: print( "rpck", stats["rpck_test"].values[0], "rpck train:", stats["rpck_train"].values[0], ) print( "rmse", stats["rmse_test"].values[0], "miss", stats["misses_test"].values[0], "hit", stats["hits_test"].values[0], ) # val = stats['rmse_test'].values[0]*(1+stats['misses_test'].values[0]*1./stats['hits_test'].values[0]) val = stats[target].values[0] if np.isnan(val): if maximize: # pck case val = -1e9 # random small number else: # RMSE, return a large RMSE val = 1e9 if not maximize: val = -val return val opt = BayesianOptimization(f=dlc_hyperparams, pbounds=pbounds, random_state=42) if log_file: load_logs(opt, log_file) logger = JSONLogger(path=os.path.join(evaluationfolder, "opti_log" + DLCscorer + ".json")) opt.subscribe(Events.OPTIMIZATION_STEP, logger) opt.maximize(init_points=init_points, n_iter=n_iter, acq=acq) inferencecfg.update(opt.max["params"]) for k, (bound, _) in pbounds.items(): tmp = type(bound)(inferencecfg[k]) if isinstance(tmp, np.floating): tmp = np.round(tmp, 2).item() inferencecfg[k] = tmp return inferencecfg, opt
def create_video_with_all_detections( config, videos, shuffle=1, trainingsetindex=0, displayedbodyparts="all", destfolder=None, modelprefix="", ): """ Create a video labeled with all the detections stored in a '*_full.pickle' file. Parameters ---------- config : str Absolute path to the config.yaml file videos : list of str A list of strings containing the full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. shuffle : int, optional Number of shuffles of training dataset. Default is set to 1. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). displayedbodyparts: list of strings, optional This selects the body parts that are plotted in the video. Either ``all``, then all body parts from config.yaml are used orr a list of strings that are a subset of the full list. E.g. ['hand','Joystick'] for the demo Reaching-Mackenzie-2018-08-30/config.yaml to select only these two body parts. destfolder: string, optional Specifies the destination folder that was used for storing analysis data (default is the path of the video). """ from deeplabcut.pose_estimation_tensorflow.lib.inferenceutils import Assembler import pickle, re cfg = auxiliaryfunctions.read_config(config) trainFraction = cfg["TrainingFraction"][trainingsetindex] DLCscorername, _ = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, modelprefix=modelprefix) for video in videos: videofolder = os.path.splitext(video)[0] if destfolder is None: outputname = "{}_full.mp4".format(videofolder + DLCscorername) full_pickle = os.path.join(videofolder + DLCscorername + "_full.pickle") else: auxiliaryfunctions.attempttomakefolder(destfolder) outputname = os.path.join( destfolder, str(Path(video).stem) + DLCscorername + "_full.mp4") full_pickle = os.path.join( destfolder, str(Path(video).stem) + DLCscorername + "_full.pickle") if not (os.path.isfile(outputname)): print("Creating labeled video for ", str(Path(video).stem)) with open(full_pickle, "rb") as file: data = pickle.load(file) header = data.pop("metadata") all_jointnames = header["all_joints_names"] if displayedbodyparts == "all": numjoints = len(all_jointnames) bpts = range(numjoints) else: # select only "displayedbodyparts" bpts = [] for bptindex, bp in enumerate(all_jointnames): if bp in displayedbodyparts: bpts.append(bptindex) numjoints = len(bpts) frame_names = list(data) frames = [int(re.findall(r"\d+", name)[0]) for name in frame_names] colorclass = plt.cm.ScalarMappable(cmap=cfg["colormap"]) C = colorclass.to_rgba(np.linspace(0, 1, numjoints)) colors = (C[:, :3] * 255).astype(np.uint8) pcutoff = cfg["pcutoff"] dotsize = cfg["dotsize"] clip = vp(fname=video, sname=outputname, codec="mp4v") ny, nx = clip.height(), clip.width() for n in trange(clip.nframes): frame = clip.load_frame() try: ind = frames.index(n) dets = Assembler._flatten_detections( data[frame_names[ind]]) for det in dets: if det.label not in bpts or det.confidence < pcutoff: continue x, y = det.pos rr, cc = disk((y, x), dotsize, shape=(ny, nx)) frame[rr, cc] = colors[bpts.index(det.label)] except ValueError: # No data stored for that particular frame print(n, "no data") pass try: clip.save_frame(frame) except: print(n, "frame writing error.") pass clip.close() else: print("Detections already plotted, ", outputname)
for bpindex,bp in enumerate(bodyparts): Index=data[scorer][bp]['likelihood'].values #plot the histogram of cumulative frequency distribution of each bodypart plt.hist(Index, bins = 20,normed=True,cumulative=True, color = colors(bpindex)) sm = plt.cm.ScalarMappable(cmap=plt.get_cmap(cfg['colormap']), norm=plt.Normalize(vmin=0, vmax=len(bodyparts)-1)) sm._A = [] cbar = plt.colorbar(sm,ticks=range(len(bodyparts))) cbar.set_ticklabels(bodyparts) plt.xlabel('Likelihood') plt.ylabel('Cumulative frequency distribution') plt.title(bp) plt.savefig(tmpfolder+"/{}-cumulative-2095.png".format(bp)) plt.clf() if __name__=='__main__': #type the correct address of yours configname = 'full path of the config.yaml' video_path = 'full path of the videos' savefolder = 'full path of the file you would like to save figures in' cfg = auxiliaryfunctions.read_config(configname) trainFraction = cfg['TrainingFraction'][0] bodyparts2plot = cfg['bodyparts'] colors = plt.get_cmap(lut=len(bodyparts2plot), name=cfg['colormap']) DLCscorer = auxiliaryfunctions.GetScorerName(cfg,1,trainFraction) DF = read_the_data(video=video_path,cfg=cfg,scorer=DLCscorer) plot_histogram(data=DF,cfg=cfg,bodyparts=bodyparts2plot,scorer=DLCscorer,tmpfolder=savefolder) plot_cumulative_histogram(data=DF,cfg=cfg,bodyparts=bodyparts2plot,scorer=DLCscorer,tmpfolder=savefolder)
def plot_trajectories(config, videos, videotype='.avi', shuffle=1, trainingsetindex=0, filtered=False, displayedbodyparts='all', showfigures=False, destfolder=None): """ Plots the trajectories of various bodyparts across the video. Parameters ---------- config : string Full path of the config.yaml file as a string. videos : list A list of strings containing the full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. videotype: string, optional Checks for the extension of the video in case the input to the video is a directory.\n Only videos with this extension are analyzed. The default is ``.avi`` shuffle: list, optional List of integers specifying the shuffle indices of the training dataset. The default is [1] trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). filtered: bool, default false Boolean variable indicating if filtered output should be plotted rather than frame-by-frame predictions. Filtered version can be calculated with deeplabcut.filterpredictions displayedbodyparts: list of strings, optional This select the body parts that are plotted in the video. Either ``all``, then all body parts from config.yaml are used, or a list of strings that are a subset of the full list. E.g. ['hand','Joystick'] for the demo Reaching-Mackenzie-2018-08-30/config.yaml to select only these two body parts. showfigures: bool, default false If true then plots are also displayed. destfolder: string, optional Specifies the destination folder that was used for storing analysis data (default is the path of the video). Example -------- for labeling the frames >>> deeplabcut.plot_trajectories('home/alex/analysis/project/reaching-task/config.yaml',['/home/alex/analysis/project/videos/reachingvideo1.avi']) -------- """ cfg = auxiliaryfunctions.read_config(config) trainFraction = cfg['TrainingFraction'][trainingsetindex] DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction ) #automatically loads corresponding model (even training iteration based on snapshot index) bodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, displayedbodyparts) Videos = auxiliaryfunctions.Getlistofvideos(videos, videotype) for video in Videos: print(video) if destfolder is None: videofolder = str(Path(video).parents[0]) else: videofolder = destfolder vname = str(Path(video).stem) print("Starting % ", videofolder, video) notanalyzed, dataname, DLCscorer = auxiliaryfunctions.CheckifNotAnalyzed( videofolder, vname, DLCscorer, DLCscorerlegacy, flag='checking') if notanalyzed: print("The video was not analyzed with this scorer:", DLCscorer) else: #LoadData print("Loading ", video, "and data.") datafound, metadata, Dataframe, DLCscorer, suffix = auxiliaryfunctions.LoadAnalyzedData( str(videofolder), vname, DLCscorer, filtered ) #returns boolean variable if data was found and metadata + pandas array if datafound: basefolder = videofolder auxiliaryfunctions.attempttomakefolder(basefolder) auxiliaryfunctions.attempttomakefolder( os.path.join(basefolder, 'plot-poses')) tmpfolder = os.path.join(basefolder, 'plot-poses', vname) auxiliaryfunctions.attempttomakefolder(tmpfolder) PlottingResults(tmpfolder, Dataframe, DLCscorer, cfg, bodyparts, showfigures, suffix + '.png') print( 'Plots created! Please check the directory "plot-poses" within the video directory' )
def filterpredictions(config, video, videotype='avi', shuffle=1, trainingsetindex=0, filtertype='median', windowlength=5, p_bound=.001, ARdegree=3, MAdegree=1, alpha=.01, save_as_csv=True, destfolder=None): """ Fits frame-by-frame pose predictions with ARIMA model (filtertype='arima') or median filter (default). Parameter ---------- config : string Full path of the config.yaml file as a string. video : string Full path of the video to extract the frame from. Make sure that this video is already analyzed. shuffle : int, optional The shufle index of training dataset. The extracted frames will be stored in the labeled-dataset for the corresponding shuffle of training dataset. Default is set to 1 trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). filtertype: string Select which filter, 'arima' or 'median' filter. windowlength: int For filtertype='median' filters the input array using a local window-size given by windowlength. The array will automatically be zero-padded. https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.medfilt.html The windowlenght should be an odd number. p_bound: float between 0 and 1, optional For filtertype 'arima' this parameter defines the likelihood below, below which a body part will be consided as missing data for filtering purposes. ARdegree: int, optional For filtertype 'arima' Autoregressive degree of Sarimax model degree. see https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html MAdegree: int For filtertype 'arima' Moving Avarage degree of Sarimax model degree. See https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html alpha: float Significance level for detecting outliers based on confidence interval of fitted SARIMAX model. save_as_csv: bool, optional Saves the predictions in a .csv file. The default is ``False``; if provided it must be either ``True`` or ``False`` destfolder: string, optional Specifies the destination folder for analysis data (default is the path of the video). Note that for subsequent analysis this folder also needs to be passed. Example -------- Arima model: deeplabcut.filterpredictions('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,filterype='arima',ARdegree=5,MAdegree=2) Use median filter over 10bins: deeplabcut.filterpredictions('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,windowlength=10) One can then use the filtered rather than the frame-by-frame predictions by calling: deeplabcut.plot_trajectories('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,filtered=True) deeplabcut.create_labeled_video('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,filtered=True) -------- Returns filtered pandas array with the same structure as normal output of network. """ cfg = auxiliaryfunctions.read_config(config) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction=cfg['TrainingFraction'][trainingsetindex]) Videos = auxiliaryfunctions.Getlistofvideos(video, videotype) if len(Videos) > 0: for video in Videos: if destfolder is None: destfolder = str(Path(video).parents[0]) print("Filtering with %s model %s" % (filtertype, video)) videofolder = destfolder vname = Path(video).stem notanalyzed, outdataname, sourcedataname, scorer = auxiliaryfunctions.CheckifPostProcessing( destfolder, vname, DLCscorer, DLCscorerlegacy, suffix='filtered') if notanalyzed: Dataframe = pd.read_hdf(sourcedataname, 'df_with_missing') for bpindex, bp in tqdm(enumerate(cfg['bodyparts'])): pdindex = pd.MultiIndex.from_product( [[scorer], [bp], ['x', 'y', 'likelihood']], names=['scorer', 'bodyparts', 'coords']) x, y, p = Dataframe[scorer][bp]['x'].values, Dataframe[ scorer][bp]['y'].values, Dataframe[scorer][bp][ 'likelihood'].values if filtertype == 'arima': meanx, CIx = FitSARIMAXModel(x, p, p_bound, alpha, ARdegree, MAdegree, False) meany, CIy = FitSARIMAXModel(y, p, p_bound, alpha, ARdegree, MAdegree, False) meanx[0] = x[0] meany[0] = y[0] else: meanx = signal.medfilt(x, kernel_size=windowlength) meany = signal.medfilt(y, kernel_size=windowlength) if bpindex == 0: data = pd.DataFrame(np.hstack([ np.expand_dims(meanx, axis=1), np.expand_dims(meany, axis=1), np.expand_dims(p, axis=1) ]), columns=pdindex) else: item = pd.DataFrame(np.hstack([ np.expand_dims(meanx, axis=1), np.expand_dims(meany, axis=1), np.expand_dims(p, axis=1) ]), columns=pdindex) data = pd.concat([data.T, item.T]).T data.to_hdf(outdataname, 'df_with_missing', format='table', mode='w') if save_as_csv: print("Saving filtered csv poses!") data.to_csv(outdataname.split('.h5')[0] + '.csv')
def evaluate_multianimal_crossvalidate( config, Shuffles=[1], trainingsetindex=0, pbounds=None, edgewisecondition=True, target="rpck_train", inferencecfg=None, init_points=20, n_iter=50, dcorr=10.0, leastbpts=1, printingintermediatevalues=True, modelprefix="", plotting=False, ): """ Crossvalidate inference parameters on evaluation data; optimal parametrs will be stored in " inference_cfg.yaml". They will then be then used for inference (for analysis of videos). Performs Bayesian Optimization with https://github.com/fmfn/BayesianOptimization This is a crucial step. The most important variable (in inferencecfg) to cross-validate is minimalnumberofconnections. Pass a reasonable range to optimze (e.g. if you have 5 edges from 1 to 5. If you have 4 bpts and 11 connections from 3 to 9). config: string Full path of the config.yaml file as a string. shuffle: int, optional An integer specifying the shuffle index of the training dataset used for training the network. The default is 1. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). pbounds: dictionary of variables with ranges to crossvalidate. By default: pbounds = { 'pafthreshold': (0.05, 0.7), 'detectionthresholdsquare': (0, 0.9), 'minimalnumberofconnections': (1, # connections in your skeleton), } inferencecfg: dict, OPTIONAL For the variables that are *not* crossvalidated the parameters from inference_cfg.yaml are used, or you can overwrite them by passing a dictinary with your preferred parameters. edgewisecondition: bool, default True Estimates Euclidean distances for each skeleton edge and uses those distance for excluding possible connections. If false, uses only one distance for all bodyparts (which is obviously suboptimal). target: string, default='rpck_train' What metric to optimize. Options are pck/rpck/rmse on train/test set. init_points: int, optional (default=10) Number of random initial explorations. Probing random regions helps diversify the exploration space. Parameter from BayesianOptimization. n_iter: int, optional (default=20) Number of iterations of Bayesian optimization to perform. The larger it is, the higher the likelihood of finding a good extremum. Parameter from BayesianOptimization. dcorr: float, Distance thereshold for percent correct keypoints / relative percent correct keypoints (see paper). leastbpts: integer (should be a small number) If an animals has less or equal as many body parts in an image it will not be used for cross validation. Imagine e.g. if only a single bodypart is present, then if animals need a certain minimal number of bodyparts for assembly (minimalnumberofconnections), this might not be predictable. printingintermediatevalues: bool, default True If intermediate metrics RMSE/hits/.. per sample should be printed. Examples -------- first run evalute: deeplabcut.evaluate_network(path_config_file,Shuffles=[shuffle],plotting=True) Then e.g. for finding inference parameters to minimize rmse on test set: deeplabcut.evaluate_multianimal_crossvalidate(path_config_file,Shuffles=[shuffle],target='rmse_test') """ from deeplabcut.pose_estimation_tensorflow.lib import crossvalutils from deeplabcut.utils import auxfun_multianimal, auxiliaryfunctions from easydict import EasyDict as edict cfg = auxiliaryfunctions.read_config(config) trainFraction = cfg["TrainingFraction"][trainingsetindex] trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ), "df_with_missing", ) comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, "all") colors = visualization.get_cmap(len(comparisonbodyparts), name=cfg["colormap"]) # wild guesses for a wide range: maxconnections = len(cfg["skeleton"]) minconnections = 1 # len(cfg['multianimalbodyparts'])-1 _pbounds = { "pafthreshold": (0.05, 0.7), "detectionthresholdsquare": ( 0, 0.9, ), # TODO: set to minimum (from pose_cfg.yaml) "minimalnumberofconnections": (minconnections, maxconnections), } if pbounds is not None: _pbounds.update(pbounds) if "rpck" in target or "pck" in target: maximize = True if "rmse" in target: maximize = False # i.e. minimize for shuffle in Shuffles: evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetEvaluationFolder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True) datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg) _, trainIndices, testIndices, _ = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn)) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetModelFolder(trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction)) # Check which snapshots are available and sort them by # iterations Snapshots = np.array([ fn.split(".")[0] for fn in os.listdir(os.path.join(str(modelfolder), "train")) if "index" in fn ]) snapindex = -1 dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex]) # setting weights to corresponding snapshot. trainingsiterations = (dlc_cfg["init_weights"].split( os.sep)[-1]).split("-")[ -1] # read how many training siterations that corresponds to. DLCscorer, _ = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations, modelprefix=modelprefix) path_inference_config = Path( modelfolder) / "test" / "inference_cfg.yaml" if inferencecfg is None: # then load or initialize inferencecfg = auxfun_multianimal.read_inferencecfg( path_inference_config, cfg) else: inferencecfg = edict(inferencecfg) auxfun_multianimal.check_inferencecfg_sanity(cfg, inferencecfg) inferencecfg.topktoretain = np.inf inferencecfg, opt = crossvalutils.bayesian_search( config, inferencecfg, _pbounds, edgewisecondition=edgewisecondition, shuffle=shuffle, trainingsetindex=trainingsetindex, target=target, maximize=maximize, init_points=init_points, n_iter=n_iter, acq="ei", dcorr=dcorr, leastbpts=leastbpts, modelprefix=modelprefix, ) # update number of individuals to retain. inferencecfg.topktoretain = len( cfg["individuals"]) + 1 * (len(cfg["uniquebodyparts"]) > 0) # calculating result at best best solution DataOptParams, poses_gt, poses = crossvalutils.compute_crossval_metrics( config, inferencecfg, shuffle, trainingsetindex, modelprefix) path_inference_config = str(path_inference_config) # print("Quantification:", DataOptParams.head()) DataOptParams.to_hdf( path_inference_config.split(".yaml")[0] + ".h5", "df_with_missing", format="table", mode="w", ) DataOptParams.to_csv(path_inference_config.split(".yaml")[0] + ".csv") print("Saving optimal inference parameters...") print(DataOptParams.to_string()) auxiliaryfunctions.write_plainconfig(path_inference_config, dict(inferencecfg)) # Store best predictions max_indivs = max(pose.shape[0] for pose in poses) bpts = dlc_cfg["all_joints_names"] container = np.full((len(poses), max_indivs * len(bpts) * 3), np.nan) for n, pose in enumerate(poses): temp = pose.flatten() container[n, :len(temp)] = temp header = pd.MultiIndex.from_product( [ [DLCscorer], [f"individual{i}" for i in range(1, max_indivs + 1)], bpts, ["x", "y", "likelihood"], ], names=["scorer", "individuals", "bodyparts", "coords"], ) df = pd.DataFrame(container, columns=header) df.to_hdf(os.path.join(evaluationfolder, f"{DLCscorer}.h5"), key="df_with_missing") if plotting: foldername = os.path.join( str(evaluationfolder), "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex], ) auxiliaryfunctions.attempttomakefolder(foldername) for imageindex, imagename in tqdm(enumerate(Data.index)): image_path = os.path.join(cfg["project_path"], imagename) image = io.imread(image_path) frame = img_as_ubyte(skimage.color.gray2rgb(image)) groundtruthcoordinates = poses_gt[imageindex] coords_pred = poses[imageindex][:, :, :2] probs_pred = poses[imageindex][:, :, -1:] fig = visualization.make_multianimal_labeled_image( frame, groundtruthcoordinates, coords_pred, probs_pred, colors, cfg["dotsize"], cfg["alphavalue"], cfg["pcutoff"], ) visualization.save_labeled_frame(fig, image_path, foldername, imageindex in trainIndices)
def analyzeskeleton( config, videos, videotype="", shuffle=1, trainingsetindex=0, filtered=False, save_as_csv=False, destfolder=None, modelprefix="", track_method="", ): """Extracts length and orientation of each "bone" of the skeleton. The bone and skeleton information is defined in the config file. Parameters ---------- config: str Full path of the config.yaml file. videos: list[str] The full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. videotype: str, optional, default="" Checks for the extension of the video in case the input to the video is a directory. Only videos with this extension are analyzed. If left unspecified, videos with common extensions ('avi', 'mp4', 'mov', 'mpeg', 'mkv') are kept. shuffle : int, optional, default=1 The shuffle index of training dataset. The extracted frames will be stored in the labeled-dataset for the corresponding shuffle of training dataset. trainingsetindex: int, optional, default=0 Integer specifying which TrainingsetFraction to use. Note that TrainingFraction is a list in config.yaml. filtered: bool, optional, default=False Boolean variable indicating if filtered output should be plotted rather than frame-by-frame predictions. Filtered version can be calculated with ``deeplabcut.filterpredictions``. save_as_csv: bool, optional, default=False Saves the predictions in a .csv file. destfolder: string or None, optional, default=None Specifies the destination folder for analysis data. If ``None``, the path of the video is used. Note that for subsequent analysis this folder also needs to be passed. modelprefix: str, optional, default="" Directory containing the deeplabcut models to use when evaluating the network. By default, the models are assumed to exist in the project folder. track_method: string, optional, default="" Specifies the tracker used to generate the data. Empty by default (corresponding to a single animal project). For multiple animals, must be either 'box', 'skeleton', or 'ellipse' and will be taken from the config.yaml file if none is given. Returns ------- None """ # Load config file, scorer and videos cfg = auxiliaryfunctions.read_config(config) if not cfg["skeleton"]: raise ValueError("No skeleton defined in the config.yaml.") track_method = auxfun_multianimal.get_track_method( cfg, track_method=track_method) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction=cfg["TrainingFraction"][trainingsetindex], modelprefix=modelprefix, ) Videos = auxiliaryfunctions.get_list_of_videos(videos, videotype) for video in Videos: print("Processing %s" % (video)) if destfolder is None: destfolder = str(Path(video).parents[0]) vname = Path(video).stem try: df, filepath, scorer, _ = auxiliaryfunctions.load_analyzed_data( destfolder, vname, DLCscorer, filtered, track_method) output_name = filepath.replace(".h5", f"_skeleton.h5") if os.path.isfile(output_name): print( f"Skeleton in video {vname} already processed. Skipping..." ) continue bones = {} if "individuals" in df.columns.names: for animal_name, df_ in df.groupby(level="individuals", axis=1): temp = df_.droplevel(["scorer", "individuals"], axis=1) if animal_name != "single": for bp1, bp2 in cfg["skeleton"]: name = "{}_{}_{}".format(animal_name, bp1, bp2) bones[name] = analyzebone(temp[bp1], temp[bp2]) else: for bp1, bp2 in cfg["skeleton"]: name = "{}_{}".format(bp1, bp2) bones[name] = analyzebone(df[scorer][bp1], df[scorer][bp2]) skeleton = pd.concat(bones, axis=1) skeleton.to_hdf(output_name, "df_with_missing", format="table", mode="w") if save_as_csv: skeleton.to_csv(output_name.replace(".h5", ".csv")) except FileNotFoundError as e: print(e) continue
def evaluate_multianimal_full( config, Shuffles=[1], trainingsetindex=0, plotting=None, show_errors=True, comparisonbodyparts="all", gputouse=None, modelprefix="", c_engine=False, ): from deeplabcut.pose_estimation_tensorflow.nnet import predict from deeplabcut.pose_estimation_tensorflow.nnet import ( predict_multianimal as predictma, ) from deeplabcut.utils import auxiliaryfunctions, auxfun_multianimal import tensorflow as tf if "TF_CUDNN_USE_AUTOTUNE" in os.environ: del os.environ[ "TF_CUDNN_USE_AUTOTUNE"] # was potentially set during training tf.reset_default_graph() os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # if gputouse is not None: # gpu selectinon os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse) start_path = os.getcwd() ################################################## # Load data... ################################################## cfg = auxiliaryfunctions.read_config(config) if trainingsetindex == "all": TrainingFractions = cfg["TrainingFraction"] else: TrainingFractions = [cfg["TrainingFraction"][trainingsetindex]] # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ), "df_with_missing", ) # Handle data previously annotated on a different platform sep = "/" if "/" in Data.index[0] else "\\" if sep != os.path.sep: Data.index = Data.index.str.replace(sep, os.path.sep) # Get list of body parts to evaluate network for comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts) all_bpts = np.asarray( len(cfg["individuals"]) * cfg["multianimalbodyparts"] + cfg["uniquebodyparts"]) colors = visualization.get_cmap(len(comparisonbodyparts), name=cfg["colormap"]) # Make folder for evaluation auxiliaryfunctions.attempttomakefolder( str(cfg["project_path"] + "/evaluation-results/")) for shuffle in Shuffles: for trainFraction in TrainingFractions: ################################################## # Load and setup CNN part detector ################################################## datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetModelFolder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" # Load meta data ( data, trainIndices, testIndices, trainFraction, ) = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn)) try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction)) # TODO: IMPLEMENT for different batch sizes? dlc_cfg["batch_size"] = 1 # due to differently sized images!!! joints = dlc_cfg["all_joints_names"] # Create folder structure to store results. evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetEvaluationFolder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True) # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml' # Check which snapshots are available and sort them by # iterations Snapshots = np.array([ fn.split(".")[0] for fn in os.listdir(os.path.join(str(modelfolder), "train")) if "index" in fn ]) if len(Snapshots) == 0: print( "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so." % (shuffle, trainFraction)) else: increasing_indices = np.argsort( [int(m.split("-")[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] if cfg["snapshotindex"] == -1: snapindices = [-1] elif cfg["snapshotindex"] == "all": snapindices = range(len(Snapshots)) elif cfg["snapshotindex"] < len(Snapshots): snapindices = [cfg["snapshotindex"]] else: print( "Invalid choice, only -1 (last), any integer up to last, or all (as string)!" ) final_result = [] ################################################## # Compute predictions over images ################################################## for snapindex in snapindices: dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex] ) # setting weights to corresponding snapshot. trainingsiterations = ( dlc_cfg["init_weights"].split(os.sep)[-1] ).split( "-" )[-1] # read how many training siterations that corresponds to. # name for deeplabcut net (based on its parameters) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations, modelprefix=modelprefix, ) print( "Running ", DLCscorer, " with # of trainingiterations:", trainingsiterations, ) ( notanalyzed, resultsfilename, DLCscorer, ) = auxiliaryfunctions.CheckifNotEvaluated( str(evaluationfolder), DLCscorer, DLCscorerlegacy, Snapshots[snapindex], ) if os.path.isfile( resultsfilename.split(".h5")[0] + "_full.pickle"): print("Model already evaluated.", resultsfilename) else: if plotting: foldername = os.path.join( str(evaluationfolder), "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex], ) auxiliaryfunctions.attempttomakefolder(foldername) # print(dlc_cfg) # Specifying state of model (snapshot / training state) sess, inputs, outputs = predict.setup_pose_prediction( dlc_cfg) PredicteData = {} dist = np.full((len(Data), len(all_bpts)), np.nan) conf = np.full_like(dist, np.nan) distnorm = np.full(len(Data), np.nan) print("Analyzing data...") for imageindex, imagename in tqdm(enumerate( Data.index)): image_path = os.path.join(cfg["project_path"], imagename) image = io.imread(image_path) frame = img_as_ubyte(skimage.color.gray2rgb(image)) GT = Data.iloc[imageindex] df = GT.unstack("coords").reindex( joints, level='bodyparts') # Evaluate PAF edge lengths to calibrate `distnorm` temp = GT.unstack("bodyparts")[joints] xy = temp.values.reshape( (-1, 2, temp.shape[1])).swapaxes(1, 2) edges = xy[:, dlc_cfg["partaffinityfield_graph"]] lengths = np.sum( (edges[:, :, 0] - edges[:, :, 1])**2, axis=2) distnorm[imageindex] = np.nanmax(lengths) # FIXME Is having an empty array vs nan really that necessary?! groundtruthidentity = list( df.index.get_level_values( "individuals").to_numpy().reshape((-1, 1))) groundtruthcoordinates = list( df.values[:, np.newaxis]) for i, coords in enumerate(groundtruthcoordinates): if np.isnan(coords).any(): groundtruthcoordinates[i] = np.empty( (0, 2), dtype=float) groundtruthidentity[i] = np.array( [], dtype=str) PredicteData[imagename] = {} PredicteData[imagename]["index"] = imageindex pred = predictma.get_detectionswithcostsandGT( frame, groundtruthcoordinates, dlc_cfg, sess, inputs, outputs, outall=False, nms_radius=dlc_cfg.nmsradius, det_min_score=dlc_cfg.minconfidence, c_engine=c_engine, ) PredicteData[imagename]["prediction"] = pred PredicteData[imagename]["groundtruth"] = [ groundtruthidentity, groundtruthcoordinates, GT, ] coords_pred = pred["coordinates"][0] probs_pred = pred["confidence"] for bpt, xy_gt in df.groupby(level="bodyparts"): inds_gt = np.flatnonzero( np.all(~np.isnan(xy_gt), axis=1)) n_joint = joints.index(bpt) xy = coords_pred[n_joint] if inds_gt.size and xy.size: # Pick the predictions closest to ground truth, # rather than the ones the model has most confident in d = cdist(xy_gt.iloc[inds_gt], xy) rows, cols = linear_sum_assignment(d) min_dists = d[rows, cols] inds = np.flatnonzero(all_bpts == bpt) sl = imageindex, inds[inds_gt[rows]] dist[sl] = min_dists conf[sl] = probs_pred[n_joint][ cols].squeeze() if plotting: fig = visualization.make_multianimal_labeled_image( frame, groundtruthcoordinates, coords_pred, probs_pred, colors, cfg["dotsize"], cfg["alphavalue"], cfg["pcutoff"], ) visualization.save_labeled_frame( fig, image_path, foldername, imageindex in trainIndices, ) sess.close() # closes the current tf session # Compute all distance statistics df_dist = pd.DataFrame(dist, columns=df.index) df_conf = pd.DataFrame(conf, columns=df.index) df_joint = pd.concat([df_dist, df_conf], keys=["rmse", "conf"], names=["metrics"], axis=1) df_joint = df_joint.reorder_levels(list( np.roll(df_joint.columns.names, -1)), axis=1) df_joint.sort_index(axis=1, level=["individuals", "bodyparts"], ascending=[True, True], inplace=True) write_path = os.path.join( evaluationfolder, f"dist_{trainingsiterations}.csv") df_joint.to_csv(write_path) # Calculate overall prediction error error = df_joint.xs("rmse", level="metrics", axis=1) mask = df_joint.xs("conf", level="metrics", axis=1) >= cfg["pcutoff"] error_masked = error[mask] error_train = np.nanmean(error.iloc[trainIndices]) error_train_cut = np.nanmean( error_masked.iloc[trainIndices]) error_test = np.nanmean(error.iloc[testIndices]) error_test_cut = np.nanmean( error_masked.iloc[testIndices]) results = [ trainingsiterations, int(100 * trainFraction), shuffle, np.round(error_train, 2), np.round(error_test, 2), cfg["pcutoff"], np.round(error_train_cut, 2), np.round(error_test_cut, 2), ] final_result.append(results) # For OKS/PCK, compute the standard deviation error across all frames sd = df_dist.groupby("bodyparts", axis=1).mean().std(axis=0) sd["distnorm"] = np.sqrt(np.nanmax(distnorm)) sd.to_csv(write_path.replace("dist.csv", "sd.csv")) if show_errors: string = "Results for {} training iterations: {}, shuffle {}:\n" \ "Train error: {} pixels. Test error: {} pixels.\n" \ "With pcutoff of {}:\n" \ "Train error: {} pixels. Test error: {} pixels." print(string.format(*results)) print("##########################################") print( "Average Euclidean distance to GT per individual (in pixels)" ) print( error_masked.groupby( 'individuals', axis=1).mean().mean().to_string()) print( "Average Euclidean distance to GT per bodypart (in pixels)" ) print( error_masked.groupby( 'bodyparts', axis=1).mean().mean().to_string()) PredicteData["metadata"] = { "nms radius": dlc_cfg.nmsradius, "minimal confidence": dlc_cfg.minconfidence, "PAFgraph": dlc_cfg.partaffinityfield_graph, "all_joints": [[i] for i in range(len(dlc_cfg.all_joints))], "all_joints_names": [ dlc_cfg.all_joints_names[i] for i in range(len(dlc_cfg.all_joints)) ], "stride": dlc_cfg.get("stride", 8), } print( "Done and results stored for snapshot: ", Snapshots[snapindex], ) dictionary = { "Scorer": DLCscorer, "DLC-model-config file": dlc_cfg, "trainIndices": trainIndices, "testIndices": testIndices, "trainFraction": trainFraction, } metadata = {"data": dictionary} auxfun_multianimal.SaveFullMultiAnimalData( PredicteData, metadata, resultsfilename) tf.reset_default_graph() if len(final_result ) > 0: # Only append if results were calculated make_results_file(final_result, evaluationfolder, DLCscorer) # returning to intial folder os.chdir(str(start_path))
def analyze_videos(config, videos, videotype='avi', shuffle=1, trainingsetindex=0, gputouse=None, save_as_csv=False, destfolder=None, cropping=None): """ Makes prediction based on a trained network. The index of the trained network is specified by parameters in the config file (in particular the variable 'snapshotindex') You can crop the video (before analysis), by changing 'cropping'=True and setting 'x1','x2','y1','y2' in the config file. The same cropping parameters will then be used for creating the video. Note: you can also pass cropping = [x1,x2,y1,y2] coordinates directly, that then will be used for all videos. You can of course loop over videos & pass specific coordinates for each case. Output: The labels are stored as MultiIndex Pandas Array, which contains the name of the network, body part name, (x, y) label position \n in pixels, and the likelihood for each frame per body part. These arrays are stored in an efficient Hierarchical Data Format (HDF) \n in the same directory, where the video is stored. However, if the flag save_as_csv is set to True, the data can also be exported in \n comma-separated values format (.csv), which in turn can be imported in many programs, such as MATLAB, R, Prism, etc. Parameters ---------- config : string Full path of the config.yaml file as a string. videos : list A list of strings containing the full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. videotype: string, optional Checks for the extension of the video in case the input to the video is a directory.\n Only videos with this extension are analyzed. The default is ``.avi`` shuffle: int, optional An integer specifying the shuffle index of the training dataset used for training the network. The default is 1. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). gputouse: int, optional. Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None. See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries save_as_csv: bool, optional Saves the predictions in a .csv file. The default is ``False``; if provided it must be either ``True`` or ``False`` destfolder: string, optional Specifies the destination folder for analysis data (default is the path of the video). Note that for subsequent analysis this folder also needs to be passed. Examples -------- Windows example for analyzing 1 video >>> deeplabcut.analyze_videos('C:\\myproject\\reaching-task\\config.yaml',['C:\\yourusername\\rig-95\\Videos\\reachingvideo1.avi']) -------- If you want to analyze only 1 video >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi']) -------- If you want to analyze all videos of type avi in a folder: >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos'],videotype='.avi') -------- If you want to analyze multiple videos >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi','/analysis/project/videos/reachingvideo2.avi']) -------- If you want to analyze multiple videos with shuffle = 2 >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi','/analysis/project/videos/reachingvideo2.avi'], shuffle=2) -------- If you want to analyze multiple videos with shuffle = 2 and save results as an additional csv file too >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi','/analysis/project/videos/reachingvideo2.avi'], shuffle=2,save_as_csv=True) -------- """ if 'TF_CUDNN_USE_AUTOTUNE' in os.environ: del os.environ[ 'TF_CUDNN_USE_AUTOTUNE'] #was potentially set during training if gputouse is not None: #gpu selection os.environ['CUDA_VISIBLE_DEVICES'] = str(gputouse) vers = (tf.__version__).split('.') if int(vers[0]) == 1 and int(vers[1]) > 12: TF = tf.compat.v1 else: TF = tf TF.reset_default_graph() start_path = os.getcwd( ) #record cwd to return to this directory in the end cfg = auxiliaryfunctions.read_config(config) if cropping is not None: cfg['cropping'] = True cfg['x1'], cfg['x2'], cfg['y1'], cfg['y2'] = cropping print("Overwriting cropping parameters:", cropping) print( "These are used for all videos, but won't be save to the cfg file." ) trainFraction = cfg['TrainingFraction'][trainingsetindex] modelfolder = os.path.join( cfg["project_path"], str(auxiliaryfunctions.GetModelFolder(trainFraction, shuffle, cfg))) path_test_config = Path(modelfolder) / 'test' / 'pose_cfg.yaml' try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction)) # Check which snapshots are available and sort them by # iterations try: Snapshots = np.array([ fn.split('.')[0] for fn in os.listdir(os.path.join(modelfolder, 'train')) if "index" in fn ]) except FileNotFoundError: raise FileNotFoundError( "Snapshots not found! It seems the dataset for shuffle %s has not been trained/does not exist.\n Please train it before using it to analyze videos.\n Use the function 'train_network' to train the network for shuffle %s." % (shuffle, shuffle)) if cfg['snapshotindex'] == 'all': print( "Snapshotindex is set to 'all' in the config.yaml file. Running video analysis with all snapshots is very costly! Use the function 'evaluate_network' to choose the best the snapshot. For now, changing snapshot index to -1!" ) snapshotindex = -1 else: snapshotindex = cfg['snapshotindex'] increasing_indices = np.argsort([int(m.split('-')[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] print("Using %s" % Snapshots[snapshotindex], "for model", modelfolder) ################################################## # Load and setup CNN part detector ################################################## # Check if data already was generated: dlc_cfg['init_weights'] = os.path.join(modelfolder, 'train', Snapshots[snapshotindex]) trainingsiterations = (dlc_cfg['init_weights'].split( os.sep)[-1]).split('-')[-1] #update batchsize (based on parameters in config.yaml) dlc_cfg['batch_size'] = cfg['batch_size'] # update number of outputs dlc_cfg['num_outputs'] = cfg.get('num_outputs', 1) print('num_outputs = ', dlc_cfg['num_outputs']) # Name for scorer: DLCscorer = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations=trainingsiterations) sess, inputs, outputs = predict.setup_pose_prediction(dlc_cfg) xyz_labs_orig = ['x', 'y', 'likelihood'] suffix = [str(s + 1) for s in range(dlc_cfg['num_outputs'])] suffix[0] = '' # first one has empty suffix for backwards compatibility xyz_labs = [x + s for s in suffix for x in xyz_labs_orig] pdindex = pd.MultiIndex.from_product( [[DLCscorer], dlc_cfg['all_joints_names'], xyz_labs], names=['scorer', 'bodyparts', 'coords']) ################################################## # Datafolder ################################################## Videos = auxiliaryfunctions.Getlistofvideos(videos, videotype) if len(Videos) > 0: #looping over videos for video in Videos: AnalyzeVideo(video, DLCscorer, trainFraction, cfg, dlc_cfg, sess, inputs, outputs, pdindex, save_as_csv, destfolder) os.chdir(str(start_path)) print( "The videos are analyzed. Now your research can truly start! \n You can create labeled videos with 'create_labeled_video'." ) print( "If the tracking is not satisfactory for some videos, consider expanding the training set. You can use the function 'extract_outlier_frames' to extract any outlier frames!" ) else: print("No video was found in the path/ or single video with path:", videos) print( "Perhaps the videotype is distinct from the videos in the path, I was looking for:", videotype) return DLCscorer
def analyzeskeleton( config, videos, videotype="avi", shuffle=1, trainingsetindex=0, filtered=False, save_as_csv=False, destfolder=None, modelprefix="", track_method="", ): """ Extracts length and orientation of each "bone" of the skeleton as defined in the config file. Parameter ---------- config : string Full path of the config.yaml file as a string. videos : list A list of strings containing the full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. shuffle : int, optional The shufle index of training dataset. The extracted frames will be stored in the labeled-dataset for the corresponding shuffle of training dataset. Default is set to 1 trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). filtered: bool, default false Boolean variable indicating if filtered output should be plotted rather than frame-by-frame predictions. Filtered version can be calculated with deeplabcut.filterpredictions save_as_csv: bool, optional Saves the predictions in a .csv file. The default is ``False``; if provided it must be either ``True`` or ``False`` destfolder: string, optional Specifies the destination folder for analysis data (default is the path of the video). Note that for subsequent analysis this folder also needs to be passed. track_method: string, optional Specifies the tracker used to generate the data. Empty by default (corresponding to a single animal project). For multiple animals, must be either 'box', 'skeleton', or 'ellipse'. """ # Load config file, scorer and videos cfg = auxiliaryfunctions.read_config(config) if not cfg["skeleton"]: raise ValueError("No skeleton defined in the config.yaml.") DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction=cfg["TrainingFraction"][trainingsetindex], modelprefix=modelprefix, ) Videos = auxiliaryfunctions.Getlistofvideos(videos, videotype) for video in Videos: print("Processing %s" % (video)) if destfolder is None: destfolder = str(Path(video).parents[0]) vname = Path(video).stem try: df, filepath, scorer, _ = auxiliaryfunctions.load_analyzed_data( destfolder, vname, DLCscorer, filtered, track_method) output_name = filepath.replace(".h5", f"_skeleton.h5") if os.path.isfile(output_name): print( f"Skeleton in video {vname} already processed. Skipping..." ) continue bones = {} if "individuals" in df.columns.names: for animal_name, df_ in df.groupby(level="individuals", axis=1): temp = df_.droplevel(["scorer", "individuals"], axis=1) if animal_name != "single": for bp1, bp2 in cfg["skeleton"]: name = "{}_{}_{}".format(animal_name, bp1, bp2) bones[name] = analyzebone(temp[bp1], temp[bp2]) else: for bp1, bp2 in cfg["skeleton"]: name = "{}_{}".format(bp1, bp2) bones[name] = analyzebone(df[scorer][bp1], df[scorer][bp2]) skeleton = pd.concat(bones, axis=1) skeleton.to_hdf(output_name, "df_with_missing", format="table", mode="w") if save_as_csv: skeleton.to_csv(output_name.replace(".h5", ".csv")) except FileNotFoundError as e: print(e) continue
def extract_outlier_frames(config, videos, shuffle=1, trainingsetindex=0, outlieralgorithm='fitting', comparisonbodyparts='all', epsilon=20, p_bound=.01, ARdegree=3, MAdegree=1, alpha=.01, extractionalgorithm='uniform', automatic=False): """ Extracts the outlier frames in case, the predictions are not correct for a certain video from the cropped video running from start to stop as defined in config.yaml. Another crucial parameter in config.yaml is how many frames to extract 'numframes2extract'. Parameter ---------- config : string Full path of the config.yaml file as a string. videos: list Full path of the video to extract the frame from. Make sure that this video is already analyzed. shuffle : int, optional The shufle index of training dataset. The extracted frames will be stored in the labeled-dataset for the corresponding shuffle of training dataset. Default is set to 1 trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). outlieralgorithm: 'fitting', 'jump', or 'uncertain', optional String specifying the algorithm used to detect the outliers. Currently, deeplabcut supports three methods. 'Fitting' fits a Auto Regressive Integrated Moving Average model to the data and computes the distance to the estimated data. Larger distances than epsilon are then potentially identified as outliers. The methods 'jump' identifies larger jumps than 'epsilon' in any body part; and 'uncertain' looks for frames with confidence below p_bound. The default is set to ``fitting``. comparisonbodyparts: list of strings, optional This select the body parts for which the comparisons with the outliers are carried out. Either ``all``, then all body parts from config.yaml are used orr a list of strings that are a subset of the full list. E.g. ['hand','Joystick'] for the demo Reaching-Mackenzie-2018-08-30/config.yaml to select only these two body parts. p_bound: float between 0 and 1, optional For outlieralgorithm 'uncertain' this parameter defines the likelihood below, below which a body part will be flagged as a putative outlier. epsilon; float,optional Meaning depends on outlieralgoritm. The default is set to 20 pixels. For outlieralgorithm 'fitting': Float bound according to which frames are picked when the (average) body part estimate deviates from model fit For outlieralgorithm 'jump': Float bound specifying the distance by which body points jump from one frame to next (Euclidean distance) ARdegree: int, optional For outlieralgorithm 'fitting': Autoregressive degree of ARIMA model degree. (Note we use SARIMAX without exogeneous and seasonal part) see https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html MAdegree: int For outlieralgorithm 'fitting': MovingAvarage degree of ARIMA model degree. (Note we use SARIMAX without exogeneous and seasonal part) See https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html alpha: float Significance level for detecting outliers based on confidence interval of fitted ARIMA model. Only the distance is used however. extractionalgorithm : string, optional String specifying the algorithm to use for selecting the frames from the identified putatative outlier frames. Currently, deeplabcut supports either ``kmeans`` or ``uniform`` based selection (same logic as for extract_frames). The default is set to``uniform``, if provided it must be either ``uniform`` or ``kmeans``. automatic : bool, optional Set it to True, if you want to extract outliers without being asked for user feedback. Example -------- for extracting the frames with default settings >>> deeplabcut.extract_outlier_frames('/analysis/project/reaching-task/config.yaml',['/analysis/project/video/reachinvideo1.avi']) -------- for extracting the frames with kmeans >>> deeplabcut.extract_outlier_frames('/analysis/project/reaching-task/config.yaml',['/analysis/project/video/reachinvideo1.avi'],extractionalgorithm='kmeans') -------- for extracting the frames with kmeans and epsilon = 5 pixels. >>> deeplabcut.extract_outlier_frames('/analysis/project/reaching-task/config.yaml',['/analysis/project/video/reachinvideo1.avi'],epsilon = 5,extractionalgorithm='kmeans') -------- """ cfg = auxiliaryfunctions.read_config(config) scorer = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction=cfg['TrainingFraction'][trainingsetindex]) print("network parameters:", scorer) for video in videos: videofolder = str(Path(video).parents[0]) dataname = str(Path(video).stem) + scorer try: Dataframe = pd.read_hdf(os.path.join(videofolder, dataname + '.h5')) nframes = np.size(Dataframe.index) #extract min and max index based on start stop interval. startindex = max([int(np.floor(nframes * cfg['start'])), 0]) stopindex = min([int(np.ceil(nframes * cfg['stop'])), nframes]) Index = np.arange(stopindex - startindex) + startindex #figure out body part list: bodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts) Indices = [] if outlieralgorithm == 'uncertain': #necessary parameters: considered body parts and for bpindex, bp in enumerate(bodyparts): if bp in cfg[ 'bodyparts']: #filter [who knows what users put in...] p = Dataframe[scorer][bp]['likelihood'].values[Index] Indices.extend( np.where(p < p_bound)[0] + startindex ) # all indices between start and stop that are below p_bound. elif outlieralgorithm == 'jump': for bpindex, bp in enumerate(bodyparts): if bp in cfg[ 'bodyparts']: #filter [who knows what users put in...] dx = np.diff(Dataframe[scorer][bp]['x'].values[Index]) dy = np.diff(Dataframe[scorer][bp]['y'].values[Index]) # all indices between start and stop with jump larger than epsilon (leading up to this point!) Indices.extend( np.where((dx**2 + dy**2) > epsilon**2)[0] + startindex + 1) elif outlieralgorithm == 'fitting': #deviation_dataname = str(Path(videofolder)/Path(dataname)) # Calculate deviatons for video [d, o] = ComputeDeviations(Dataframe, cfg, comparisonbodyparts, scorer, dataname, p_bound, alpha, ARdegree, MAdegree) #Some heuristics for extracting frames based on distance: Indices = np.where( d > epsilon )[0] # time points with at least average difference of epsilon if len(Index) < cfg['numframes2pick'] * 2 and len(d) > cfg[ 'numframes2pick'] * 2: # if too few points qualify, extract the most distant ones. Indices = np.argsort(d)[::-1][:cfg['numframes2pick'] * 2] Indices = np.sort(list(set(Indices))) #remove repetitions. print("Method ", outlieralgorithm, " found ", len(Indices), " putative outlier frames.") print("Do you want to proceed with extracting ", cfg['numframes2pick'], " of those?") if outlieralgorithm == 'uncertain': print( "If this list is very large, perhaps consider changing the paramters (start, stop, p_bound, comparisonbodyparts) or use a different method." ) elif outlieralgorithm == 'jump': print( "If this list is very large, perhaps consider changing the paramters (start, stop, epsilon, comparisonbodyparts) or use a different method." ) elif outlieralgorithm == 'fitting': print( "If this list is very large, perhaps consider changing the paramters (start, stop, epsilon, ARdegree, MAdegree, alpha, comparisonbodyparts) or use a different method." ) if automatic == False: askuser = input("yes/no") else: askuser = '******' if askuser == 'y' or askuser == 'yes' or askuser == 'Ja' or askuser == 'ha': # multilanguage support :) #Now extract from those Indices! ExtractFramesbasedonPreselection(Indices, extractionalgorithm, Dataframe, dataname, scorer, video, cfg, config) else: print( "Nothing extracted, change parameters and start again...") except FileNotFoundError: print( "The video has not been analyzed yet!. You can only refine the labels, after the pose has been estimate. Please run 'analyze_video' first." )
def filterpredictions(config, video, videotype='avi', shuffle=1, trainingsetindex=0, p_bound=.001, ARdegree=3, MAdegree=1, alpha=.01, save_as_csv=True, destfolder=None): """ Fits frame-by-frame pose predictions with SARIMAX model. Parameter ---------- config : string Full path of the config.yaml file as a string. video : string Full path of the video to extract the frame from. Make sure that this video is already analyzed. shuffle : int, optional The shufle index of training dataset. The extracted frames will be stored in the labeled-dataset for the corresponding shuffle of training dataset. Default is set to 1 trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). comparisonbodyparts: list of strings, optional This select the body parts for which SARIMAX models are fit. Either ``all``, then all body parts from config.yaml are used orr a list of strings that are a subset of the full list. E.g. ['hand','Joystick'] for the demo Reaching-Mackenzie-2018-08-30/config.yaml to select only these two body parts. p_bound: float between 0 and 1, optional For outlieralgorithm 'uncertain' this parameter defines the likelihood below, below which a body part will be consided as missing data for filtering purposes. ARdegree: int, optional For outlieralgorithm 'fitting': Autoregressive degree of Sarimax model degree. see https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html MAdegree: int For outlieralgorithm 'fitting': Moving Avarage degree of Sarimax model degree. See https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html alpha: float Significance level for detecting outliers based on confidence interval of fitted SARIMAX model. save_as_csv: bool, optional Saves the predictions in a .csv file. The default is ``False``; if provided it must be either ``True`` or ``False`` destfolder: string, optional Specifies the destination folder for analysis data (default is the path of the video). Note that for subsequent analysis this folder also needs to be passed. Example -------- deeplabcut.filterpredictions('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,ARdegree=5,MAdegree=2) One can then use the filtered rather than the frame-by-frame predictions by calling: deeplabcut.plot_trajectories('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,filtered=True) deeplabcut.create_labeled_video('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,filtered=True) -------- Returns filtered pandas array with the same structure as normal output of network. """ cfg = auxiliaryfunctions.read_config(config) scorer = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction=cfg['TrainingFraction'][trainingsetindex]) Videos = auxiliaryfunctions.Getlistofvideos(video, videotype) if len(Videos) > 0: for video in Videos: if destfolder is None: destfolder = str(Path(video).parents[0]) print("Filtering with ARIMA model %s", video) videofolder = str(Path(video).parents[0]) dataname = str(Path(video).stem) + scorer filteredname = dataname.split('.h5')[0] + 'filtered.h5' try: Dataframe = pd.read_hdf(os.path.join(videofolder, filteredname)) print("Video already filtered...") except FileNotFoundError: try: Dataframe = pd.read_hdf( os.path.join(videofolder, dataname + '.h5')) for bpindex, bp in tqdm(enumerate(cfg['bodyparts'])): pdindex = pd.MultiIndex.from_product( [[scorer], [bp], ['x', 'y', 'likelihood']], names=['scorer', 'bodyparts', 'coords']) x, y, p = Dataframe[scorer][bp]['x'].values, Dataframe[ scorer][bp]['y'].values, Dataframe[scorer][bp][ 'likelihood'].values meanx, CIx = FitSARIMAXModel(x, p, p_bound, alpha, ARdegree, MAdegree, False) meany, CIy = FitSARIMAXModel(y, p, p_bound, alpha, ARdegree, MAdegree, False) meanx[0] = x[0] meany[0] = y[0] if bpindex == 0: data = pd.DataFrame(np.hstack([ np.expand_dims(meanx, axis=1), np.expand_dims(meany, axis=1), np.expand_dims(p, axis=1) ]), columns=pdindex) else: item = pd.DataFrame(np.hstack([ np.expand_dims(meanx, axis=1), np.expand_dims(meany, axis=1), np.expand_dims(p, axis=1) ]), columns=pdindex) data = pd.concat([data.T, item.T]).T data.to_hdf(os.path.join(videofolder, filteredname), 'df_with_missing', format='table', mode='w') if save_as_csv: print("Saving filtered csv poses!") data.to_csv( os.path.join(videofolder, filteredname.split('.h5')[0] + '.csv')) except FileNotFoundError: print("Video not analyzed -- Run analyze_videos first.")
) print("Analyzing video...") deeplabcut.analyze_videos( config_path, [new_video_path], "mp4", robust_nframes=True, allow_growth=True, use_shelve=USE_SHELVE, ) print("Video analyzed.") print("Create video with all detections...") scorer, _ = auxiliaryfunctions.GetScorerName(cfg, 1, TRAIN_SIZE) deeplabcut.create_video_with_all_detections( config_path, [new_video_path], shuffle=1, displayedbodyparts=["bodypart1"] ) print("Video created.") print("Convert detections to tracklets...") deeplabcut.convert_detections2tracklets( config_path, [new_video_path], "mp4", track_method=TESTTRACKER ) print("Tracklets created...")
def analyze_videos(config,videos,shuffle=1,trainingsetindex=0,videotype='avi',gputouse=None,save_as_csv=False, destfolder=None): """ Makes prediction based on a trained network. The index of the trained network is specified by parameters in the config file (in particular the variable 'snapshotindex') You can crop the video (before analysis), by changing 'cropping'=True and setting 'x1','x2','y1','y2' in the config file. The same cropping parameters will then be used for creating the video. Output: The labels are stored as MultiIndex Pandas Array, which contains the name of the network, body part name, (x, y) label position \n in pixels, and the likelihood for each frame per body part. These arrays are stored in an efficient Hierarchical Data Format (HDF) \n in the same directory, where the video is stored. However, if the flag save_as_csv is set to True, the data can also be exported in \n comma-separated values format (.csv), which in turn can be imported in many programs, such as MATLAB, R, Prism, etc. Parameters ---------- config : string Full path of the config.yaml file as a string. videos : list A list of strings containing the full paths to videos for analysis or a path to the directory where all the videos with same extension are stored. shuffle: int, optional An integer specifying the shuffle index of the training dataset used for training the network. The default is 1. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). videotype: string, optional Checks for the extension of the video in case the input to the video is a directory.\nOnly videos with this extension are analyzed. The default is ``.avi`` gputouse: int, optional. Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None. See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries save_as_csv: bool, optional Saves the predictions in a .csv file. The default is ``False``; if provided it must be either ``True`` or ``False`` destfolder: string, optional Specifies the destination folder for analysis data (default is the path of the video) Examples -------- If you want to analyze only 1 video >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi']) -------- If you want to analyze all videos of type avi in a folder: >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos'],videotype='.avi') -------- If you want to analyze multiple videos >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi','/analysis/project/videos/reachingvideo2.avi']) -------- If you want to analyze multiple videos with shuffle = 2 >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi','/analysis/project/videos/reachingvideo2.avi'], shuffle=2) -------- If you want to analyze multiple videos with shuffle = 2 and save results as an additional csv file too >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi','/analysis/project/videos/reachingvideo2.avi'], shuffle=2,save_as_csv=True) -------- """ if 'TF_CUDNN_USE_AUTOTUNE' in os.environ: del os.environ['TF_CUDNN_USE_AUTOTUNE'] #was potentially set during training tf.reset_default_graph() start_path=os.getcwd() #record cwd to return to this directory in the end cfg = auxiliaryfunctions.read_config(config) trainFraction = cfg['TrainingFraction'][trainingsetindex] modelfolder=os.path.join(cfg["project_path"],str(auxiliaryfunctions.GetModelFolder(trainFraction,shuffle,cfg))) path_test_config = Path(modelfolder) / 'test' / 'pose_cfg.yaml' try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError("It seems the model for shuffle %s and trainFraction %s does not exist."%(shuffle,trainFraction)) # Check which snapshots are available and sort them by # iterations try: Snapshots = np.array([fn.split('.')[0]for fn in os.listdir(os.path.join(modelfolder , 'train'))if "index" in fn]) except FileNotFoundError: raise FileNotFoundError("Snapshots not found! It seems the dataset for shuffle %s has not been trained/does not exist.\n Please train it before using it to analyze videos.\n Use the function 'train_network' to train the network for shuffle %s."%(shuffle,shuffle)) if cfg['snapshotindex'] == 'all': print("Snapshotindex is set to 'all' in the config.yaml file. Running video analysis with all snapshots is very costly! Use the function 'evaluate_network' to choose the best the snapshot. For now, changing snapshot index to -1!") snapshotindex = -1 else: snapshotindex=cfg['snapshotindex'] increasing_indices = np.argsort([int(m.split('-')[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] print("Using %s" % Snapshots[snapshotindex], "for model", modelfolder) ################################################## # Load and setup CNN part detector ################################################## # Check if data already was generated: dlc_cfg['init_weights'] = os.path.join(modelfolder , 'train', Snapshots[snapshotindex]) trainingsiterations = (dlc_cfg['init_weights'].split(os.sep)[-1]).split('-')[-1] #update batchsize (based on parameters in config.yaml) dlc_cfg['batch_size']=cfg['batch_size'] # Name for scorer: DLCscorer = auxiliaryfunctions.GetScorerName(cfg,shuffle,trainFraction,trainingsiterations=trainingsiterations) sess, inputs, outputs = predict.setup_pose_prediction(dlc_cfg) pdindex = pd.MultiIndex.from_product([[DLCscorer], dlc_cfg['all_joints_names'], ['x', 'y', 'likelihood']],names=['scorer', 'bodyparts', 'coords']) if gputouse is not None: #gpu selectinon os.environ['CUDA_VISIBLE_DEVICES'] = str(gputouse) ################################################## # Datafolder ################################################## #checks if input is a directory if [os.path.isdir(i) for i in videos] == [True]:#os.path.isdir(video)==True: """ Analyzes all the videos in the directory. """ print("Analyzing all the videos in the directory") videofolder= videos[0] os.chdir(videofolder) videolist=[fn for fn in os.listdir(os.curdir) if (videotype in fn) and ('_labeled.mp4' not in fn)] #exclude labeled-videos! Videos = sample(videolist,len(videolist)) # this is useful so multiple nets can be used to analzye simultanously else: if isinstance(videos,str): if os.path.isfile(videos): # #or just one direct path! Videos=[videos] else: Videos=[] else: Videos=[v for v in videos if os.path.isfile(v)] if len(Videos)>0: #looping over videos for video in Videos: AnalyzeVideo(video,DLCscorer,trainFraction,cfg,dlc_cfg,sess,inputs, outputs,pdindex,save_as_csv, destfolder) os.chdir(str(start_path)) print("The videos are analyzed. Now your research can truly start! \n You can create labeled videos with 'create_labeled_video'.") print("If the tracking is not satisfactory for some videos, consider expanding the training set. You can use the function 'extract_outlier_frames' to extract any outlier frames!")
def extract_outlier_frames( config, videos, videotype=".avi", shuffle=1, trainingsetindex=0, outlieralgorithm="jump", comparisonbodyparts="all", epsilon=20, p_bound=0.01, ARdegree=3, MAdegree=1, alpha=0.01, extractionalgorithm="kmeans", automatic=False, cluster_resizewidth=30, cluster_color=False, opencv=True, savelabeled=False, destfolder=None, modelprefix="", track_method="", ): """ Extracts the outlier frames in case, the predictions are not correct for a certain video from the cropped video running from start to stop as defined in config.yaml. Another crucial parameter in config.yaml is how many frames to extract 'numframes2extract'. Parameter ---------- config : string Full path of the config.yaml file as a string. videos : list A list of strings containing the full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. videotype: string, optional Checks for the extension of the video in case the input to the video is a directory.\n Only videos with this extension are analyzed. The default is ``.avi`` shuffle : int, optional The shuffle index of training dataset. The extracted frames will be stored in the labeled-dataset for the corresponding shuffle of training dataset. Default is set to 1 trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). outlieralgorithm: 'fitting', 'jump', 'uncertain', or 'manual' String specifying the algorithm used to detect the outliers. Currently, deeplabcut supports three methods + a manual GUI option. 'Fitting' fits a Auto Regressive Integrated Moving Average model to the data and computes the distance to the estimated data. Larger distances than epsilon are then potentially identified as outliers. The methods 'jump' identifies larger jumps than 'epsilon' in any body part; and 'uncertain' looks for frames with confidence below p_bound. The default is set to ``jump``. comparisonbodyparts: list of strings, optional This selects the body parts for which the comparisons with the outliers are carried out. Either ``all``, then all body parts from config.yaml are used orr a list of strings that are a subset of the full list. E.g. ['hand','Joystick'] for the demo Reaching-Mackenzie-2018-08-30/config.yaml to select only these two body parts. p_bound: float between 0 and 1, optional For outlieralgorithm 'uncertain' this parameter defines the likelihood below, below which a body part will be flagged as a putative outlier. epsilon; float,optional Meaning depends on outlieralgoritm. The default is set to 20 pixels. For outlieralgorithm 'fitting': Float bound according to which frames are picked when the (average) body part estimate deviates from model fit For outlieralgorithm 'jump': Float bound specifying the distance by which body points jump from one frame to next (Euclidean distance) ARdegree: int, optional For outlieralgorithm 'fitting': Autoregressive degree of ARIMA model degree. (Note we use SARIMAX without exogeneous and seasonal part) see https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html MAdegree: int For outlieralgorithm 'fitting': MovingAvarage degree of ARIMA model degree. (Note we use SARIMAX without exogeneous and seasonal part) See https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html alpha: float Significance level for detecting outliers based on confidence interval of fitted ARIMA model. Only the distance is used however. extractionalgorithm : string, optional String specifying the algorithm to use for selecting the frames from the identified putatative outlier frames. Currently, deeplabcut supports either ``kmeans`` or ``uniform`` based selection (same logic as for extract_frames). The default is set to``uniform``, if provided it must be either ``uniform`` or ``kmeans``. automatic : bool, optional Set it to True, if you want to extract outliers without being asked for user feedback. cluster_resizewidth: number, default: 30 For k-means one can change the width to which the images are downsampled (aspect ratio is fixed). cluster_color: bool, default: False If false then each downsampled image is treated as a grayscale vector (discarding color information). If true, then the color channels are considered. This increases the computational complexity. opencv: bool, default: True Uses openCV for loading & extractiong (otherwise moviepy (legacy)) savelabeled: bool, default: False If true also saves frame with predicted labels in each folder. destfolder: string, optional Specifies the destination folder that was used for storing analysis data (default is the path of the video). track_method: string, optional Specifies the tracker used to generate the data. Empty by default (corresponding to a single animal project). For multiple animals, must be either 'box', 'skeleton', or 'ellipse' and will be taken from the config.yaml file if none is given. Examples Windows example for extracting the frames with default settings >>> deeplabcut.extract_outlier_frames('C:\\myproject\\reaching-task\\config.yaml',['C:\\yourusername\\rig-95\\Videos\\reachingvideo1.avi']) -------- for extracting the frames with default settings >>> deeplabcut.extract_outlier_frames('/analysis/project/reaching-task/config.yaml',['/analysis/project/video/reachinvideo1.avi']) -------- for extracting the frames with kmeans >>> deeplabcut.extract_outlier_frames('/analysis/project/reaching-task/config.yaml',['/analysis/project/video/reachinvideo1.avi'],extractionalgorithm='kmeans') -------- for extracting the frames with kmeans and epsilon = 5 pixels. >>> deeplabcut.extract_outlier_frames('/analysis/project/reaching-task/config.yaml',['/analysis/project/video/reachinvideo1.avi'],epsilon = 5,extractionalgorithm='kmeans') -------- """ cfg = auxiliaryfunctions.read_config(config) bodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts) if not len(bodyparts): raise ValueError("No valid bodyparts were selected.") track_method = auxfun_multianimal.get_track_method( cfg, track_method=track_method) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction=cfg["TrainingFraction"][trainingsetindex], modelprefix=modelprefix, ) Videos = auxiliaryfunctions.Getlistofvideos(videos, videotype) if len(Videos) == 0: print("No suitable videos found in", videos) for video in Videos: if destfolder is None: videofolder = str(Path(video).parents[0]) else: videofolder = destfolder vname = os.path.splitext(os.path.basename(video))[0] try: df, dataname, _, _ = auxiliaryfunctions.load_analyzed_data( videofolder, vname, DLCscorer, track_method=track_method) nframes = len(df) startindex = max([int(np.floor(nframes * cfg["start"])), 0]) stopindex = min([int(np.ceil(nframes * cfg["stop"])), nframes]) Index = np.arange(stopindex - startindex) + startindex df = df.iloc[Index] mask = df.columns.get_level_values("bodyparts").isin(bodyparts) df_temp = df.loc[:, mask] Indices = [] if outlieralgorithm == "uncertain": p = df_temp.xs("likelihood", level="coords", axis=1) ind = df_temp.index[(p < p_bound).any(axis=1)].tolist() Indices.extend(ind) elif outlieralgorithm == "jump": temp_dt = df_temp.diff(axis=0)**2 temp_dt.drop("likelihood", axis=1, level="coords", inplace=True) sum_ = temp_dt.sum(axis=1, level=1) ind = df_temp.index[(sum_ > epsilon**2).any(axis=1)].tolist() Indices.extend(ind) elif outlieralgorithm == "fitting": d, o = compute_deviations(df_temp, dataname, p_bound, alpha, ARdegree, MAdegree) # Some heuristics for extracting frames based on distance: ind = np.flatnonzero( d > epsilon ) # time points with at least average difference of epsilon if ( len(ind) < cfg["numframes2pick"] * 2 and len(d) > cfg["numframes2pick"] * 2 ): # if too few points qualify, extract the most distant ones. ind = np.argsort(d)[::-1][:cfg["numframes2pick"] * 2] Indices.extend(ind) elif outlieralgorithm == "manual": wd = Path(config).resolve().parents[0] os.chdir(str(wd)) from deeplabcut.gui import outlier_frame_extraction_toolbox outlier_frame_extraction_toolbox.show( config, video, shuffle, df, savelabeled, cfg.get("multianimalproject", False), ) # Run always except when the outlieralgorithm == manual. if not outlieralgorithm == "manual": Indices = np.sort(list(set(Indices))) # remove repetitions. print( "Method ", outlieralgorithm, " found ", len(Indices), " putative outlier frames.", ) print( "Do you want to proceed with extracting ", cfg["numframes2pick"], " of those?", ) if outlieralgorithm == "uncertain" or outlieralgorithm == "jump": print( "If this list is very large, perhaps consider changing the parameters " "(start, stop, p_bound, comparisonbodyparts) or use a different method." ) elif outlieralgorithm == "fitting": print( "If this list is very large, perhaps consider changing the parameters " "(start, stop, epsilon, ARdegree, MAdegree, alpha, comparisonbodyparts) " "or use a different method.") if not automatic: askuser = input("yes/no") else: askuser = "******" if (askuser == "y" or askuser == "yes" or askuser == "Ja" or askuser == "ha"): # multilanguage support :) # Now extract from those Indices! ExtractFramesbasedonPreselection( Indices, extractionalgorithm, df, video, cfg, config, opencv, cluster_resizewidth, cluster_color, savelabeled, ) else: print( "Nothing extracted, please change the parameters and start again..." ) except FileNotFoundError as e: print(e) print( "It seems the video has not been analyzed yet, or the video is not found! " "You can only refine the labels after the a video is analyzed. Please run 'analyze_video' first. " "Or, please double check your video file path")
def analyze_time_lapse_frames(config,directory,frametype='.png',shuffle=1,trainingsetindex=0,gputouse=None,save_as_csv=False): """ Analyzed all images (of type = frametype) in a folder and stores the output in one file. You can crop the frames (before analysis), by changing 'cropping'=True and setting 'x1','x2','y1','y2' in the config file. Output: The labels are stored as MultiIndex Pandas Array, which contains the name of the network, body part name, (x, y) label position \n in pixels, and the likelihood for each frame per body part. These arrays are stored in an efficient Hierarchical Data Format (HDF) \n in the same directory, where the video is stored. However, if the flag save_as_csv is set to True, the data can also be exported in \n comma-separated values format (.csv), which in turn can be imported in many programs, such as MATLAB, R, Prism, etc. Parameters ---------- config : string Full path of the config.yaml file as a string. directory: string Full path to directory containing the frames that shall be analyzed frametype: string, optional Checks for the file extension of the frames. Only images with this extension are analyzed. The default is ``.png`` shuffle: int, optional An integer specifying the shuffle index of the training dataset used for training the network. The default is 1. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). gputouse: int, optional. Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None. See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries save_as_csv: bool, optional Saves the predictions in a .csv file. The default is ``False``; if provided it must be either ``True`` or ``False`` Examples -------- If you want to analyze all frames in /analysis/project/timelapseexperiment1 >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml','/analysis/project/timelapseexperiment1') -------- If you want to analyze all frames in /analysis/project/timelapseexperiment1 >>> deeplabcut.analyze_videos('/analysis/project/reaching-task/config.yaml','/analysis/project/timelapseexperiment1', frametype='.bmp') -------- Note: for test purposes one can extract all frames from a video with ffmeg, e.g. ffmpeg -i testvideo.avi thumb%04d.png """ if 'TF_CUDNN_USE_AUTOTUNE' in os.environ: del os.environ['TF_CUDNN_USE_AUTOTUNE'] #was potentially set during training tf.reset_default_graph() start_path=os.getcwd() #record cwd to return to this directory in the end cfg = auxiliaryfunctions.read_config(config) trainFraction = cfg['TrainingFraction'][trainingsetindex] modelfolder=os.path.join(cfg["project_path"],str(auxiliaryfunctions.GetModelFolder(trainFraction,shuffle,cfg))) path_test_config = Path(modelfolder) / 'test' / 'pose_cfg.yaml' try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError("It seems the model for shuffle %s and trainFraction %s does not exist."%(shuffle,trainFraction)) # Check which snapshots are available and sort them by # iterations try: Snapshots = np.array([fn.split('.')[0]for fn in os.listdir(os.path.join(modelfolder , 'train'))if "index" in fn]) except FileNotFoundError: raise FileNotFoundError("Snapshots not found! It seems the dataset for shuffle %s has not been trained/does not exist.\n Please train it before using it to analyze videos.\n Use the function 'train_network' to train the network for shuffle %s."%(shuffle,shuffle)) if cfg['snapshotindex'] == 'all': print("Snapshotindex is set to 'all' in the config.yaml file. Running video analysis with all snapshots is very costly! Use the function 'evaluate_network' to choose the best the snapshot. For now, changing snapshot index to -1!") snapshotindex = -1 else: snapshotindex=cfg['snapshotindex'] increasing_indices = np.argsort([int(m.split('-')[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] print("Using %s" % Snapshots[snapshotindex], "for model", modelfolder) ################################################## # Load and setup CNN part detector ################################################## # Check if data already was generated: dlc_cfg['init_weights'] = os.path.join(modelfolder , 'train', Snapshots[snapshotindex]) trainingsiterations = (dlc_cfg['init_weights'].split(os.sep)[-1]).split('-')[-1] #update batchsize (based on parameters in config.yaml) dlc_cfg['batch_size']=cfg['batch_size'] # Name for scorer: DLCscorer = auxiliaryfunctions.GetScorerName(cfg,shuffle,trainFraction,trainingsiterations=trainingsiterations) sess, inputs, outputs = predict.setup_pose_prediction(dlc_cfg) pdindex = pd.MultiIndex.from_product([[DLCscorer], dlc_cfg['all_joints_names'], ['x', 'y', 'likelihood']],names=['scorer', 'bodyparts', 'coords']) if gputouse is not None: #gpu selectinon os.environ['CUDA_VISIBLE_DEVICES'] = str(gputouse) ################################################## # Loading the images ################################################## #checks if input is a directory if os.path.isdir(directory)==True: """ Analyzes all the frames in the directory. """ print("Analyzing all frames in the directory: ", directory) os.chdir(directory) framelist=np.sort([fn for fn in os.listdir(os.curdir) if (frametype in fn)]) vname = Path(directory).stem dataname = os.path.join(directory,vname + DLCscorer + '.h5') try: # Attempt to load data... pd.read_hdf(dataname) print("Frames already analyzed!", dataname) except FileNotFoundError: nframes = len(framelist) if nframes>1: start = time.time() PredicteData,nframes,nx,ny=GetPosesofFrames(cfg,dlc_cfg, sess, inputs, outputs,directory,framelist,nframes,dlc_cfg['batch_size']) stop = time.time() if cfg['cropping']==True: coords=[cfg['x1'],cfg['x2'],cfg['y1'],cfg['y2']] else: coords=[0, nx, 0, ny] dictionary = { "start": start, "stop": stop, "run_duration": stop - start, "Scorer": DLCscorer, "config file": dlc_cfg, "batch_size": dlc_cfg["batch_size"], "frame_dimensions": (ny, nx), "nframes": nframes, "cropping": cfg['cropping'], "cropping_parameters": coords } metadata = {'data': dictionary} print("Saving results in %s..." %(directory)) auxiliaryfunctions.SaveData(PredicteData[:nframes,:], metadata, dataname, pdindex, framelist,save_as_csv) print("The folder was analyzed. Now your research can truly start!") print("If the tracking is not satisfactory for some frome, consider expanding the training set.") else: print("No frames were found. Consider changing the path or the frametype.") os.chdir(str(start_path))
def return_evaluate_network_data( config, shuffle=0, trainingsetindex=0, comparisonbodyparts="all", Snapindex=None, rescale=False, fulldata=False, show_errors=True, modelprefix="", returnjustfns=True, ): """ Returns the results for (previously evaluated) network. deeplabcut.evaluate_network(..) Returns list of (per model): [trainingsiterations,trainfraction,shuffle,trainerror,testerror,pcutoff,trainerrorpcutoff,testerrorpcutoff,Snapshots[snapindex],scale,net_type] If fulldata=True, also returns (the complete annotation and prediction array) Returns list of: (DataMachine, Data, data, trainIndices, testIndices, trainFraction, DLCscorer,comparisonbodyparts, cfg, Snapshots[snapindex]) ---------- config : string Full path of the config.yaml file as a string. shuffle: integer integers specifying shuffle index of the training dataset. The default is 0. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This variable can also be set to "all". comparisonbodyparts: list of bodyparts, Default is "all". The average error will be computed for those body parts only (Has to be a subset of the body parts). rescale: bool, default False Evaluate the model at the 'global_scale' variable (as set in the test/pose_config.yaml file for a particular project). I.e. every image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at global_scale=.5, the predictions are calculated on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the original size! Examples -------- If you do not want to plot >>> deeplabcut._evaluate_network_data('/analysis/project/reaching-task/config.yaml', shuffle=[1]) -------- If you want to plot >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',shuffle=[1],True) """ import os from deeplabcut.pose_estimation_tensorflow.config import load_config from deeplabcut.utils import auxiliaryfunctions start_path = os.getcwd() # Read file path for pose_config file. >> pass it on cfg = auxiliaryfunctions.read_config(config) # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) # Data=pd.read_hdf(os.path.join(cfg["project_path"],str(trainingsetfolder),'CollectedData_' + cfg["scorer"] + '.h5'),'df_with_missing') # Get list of body parts to evaluate network for comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts) ################################################## # Load data... ################################################## trainFraction = cfg["TrainingFraction"][trainingsetindex] datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetModelFolder(trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" # Load meta data data, trainIndices, testIndices, trainFraction = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn)) try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction)) ########################### RESCALING (to global scale) if rescale == True: scale = dlc_cfg["global_scale"] print("Rescaling Data to ", scale) Data = (pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", )) * scale) else: scale = 1 Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", )) evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetEvaluationFolder(trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) # Check which snapshots are available and sort them by # iterations Snapshots = np.array([ fn.split(".")[0] for fn in os.listdir(os.path.join(str(modelfolder), "train")) if "index" in fn ]) if len(Snapshots) == 0: print( "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so." % (shuffle, trainFraction)) snapindices = [] else: increasing_indices = np.argsort( [int(m.split("-")[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] if Snapindex == None: Snapindex = cfg["snapshotindex"] if Snapindex == -1: snapindices = [-1] elif Snapindex == "all": snapindices = range(len(Snapshots)) elif Snapindex < len(Snapshots): snapindices = [Snapindex] else: print( "Invalid choice, only -1 (last), any integer up to last, or all (as string)!" ) DATA = [] results = [] resultsfns = [] for snapindex in snapindices: dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex]) # setting weights to corresponding snapshot. trainingsiterations = (dlc_cfg["init_weights"].split( os.sep)[-1]).split("-")[ -1] # read how many training siterations that corresponds to. # name for deeplabcut net (based on its parameters) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations, modelprefix=modelprefix) if not returnjustfns: print( "Retrieving ", DLCscorer, " with # of trainingiterations:", trainingsiterations, ) ( notanalyzed, resultsfilename, DLCscorer, ) = auxiliaryfunctions.CheckifNotEvaluated(str(evaluationfolder), DLCscorer, DLCscorerlegacy, Snapshots[snapindex]) # resultsfilename=os.path.join(str(evaluationfolder),DLCscorer + '-' + str(Snapshots[snapindex])+ '.h5') # + '-' + str(snapshot)+ ' #'-' + Snapshots[snapindex]+ '.h5') print(resultsfilename) resultsfns.append(resultsfilename) if not returnjustfns: if not notanalyzed and os.path.isfile( resultsfilename): # data exists.. DataMachine = pd.read_hdf(resultsfilename) DataCombined = pd.concat([Data.T, DataMachine.T], axis=0).T RMSE, RMSEpcutoff = pairwisedistances( DataCombined, cfg["scorer"], DLCscorer, cfg["pcutoff"], comparisonbodyparts, ) testerror = np.nanmean(RMSE.iloc[testIndices].values.flatten()) trainerror = np.nanmean( RMSE.iloc[trainIndices].values.flatten()) testerrorpcutoff = np.nanmean( RMSEpcutoff.iloc[testIndices].values.flatten()) trainerrorpcutoff = np.nanmean( RMSEpcutoff.iloc[trainIndices].values.flatten()) if show_errors == True: print( "Results for", trainingsiterations, " training iterations:", int(100 * trainFraction), shuffle, "train error:", np.round(trainerror, 2), "pixels. Test error:", np.round(testerror, 2), " pixels.", ) print( "With pcutoff of", cfg["pcutoff"], " train error:", np.round(trainerrorpcutoff, 2), "pixels. Test error:", np.round(testerrorpcutoff, 2), "pixels", ) print("Snapshot", Snapshots[snapindex]) r = [ trainingsiterations, int(100 * trainFraction), shuffle, np.round(trainerror, 2), np.round(testerror, 2), cfg["pcutoff"], np.round(trainerrorpcutoff, 2), np.round(testerrorpcutoff, 2), Snapshots[snapindex], scale, dlc_cfg["net_type"], ] results.append(r) else: print("Model not trained/evaluated!") if fulldata == True: DATA.append([ DataMachine, Data, data, trainIndices, testIndices, trainFraction, DLCscorer, comparisonbodyparts, cfg, evaluationfolder, Snapshots[snapindex], ]) os.chdir(start_path) if returnjustfns: return resultsfns else: if fulldata == True: return DATA, results else: return results
def evaluate_multianimal_full( config, Shuffles=[1], trainingsetindex=0, plotting=None, show_errors=True, comparisonbodyparts="all", gputouse=None, modelprefix="", c_engine=False, ): """ WIP multi animal project. """ import os from deeplabcut.pose_estimation_tensorflow.nnet import predict from deeplabcut.pose_estimation_tensorflow.nnet import ( predict_multianimal as predictma, ) from deeplabcut.utils import auxiliaryfunctions, auxfun_multianimal import tensorflow as tf if "TF_CUDNN_USE_AUTOTUNE" in os.environ: del os.environ[ "TF_CUDNN_USE_AUTOTUNE"] # was potentially set during training tf.reset_default_graph() os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # if gputouse is not None: # gpu selectinon os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse) start_path = os.getcwd() ################################################## # Load data... ################################################## cfg = auxiliaryfunctions.read_config(config) if trainingsetindex == "all": TrainingFractions = cfg["TrainingFraction"] else: TrainingFractions = [cfg["TrainingFraction"][trainingsetindex]] # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ), "df_with_missing", ) # Get list of body parts to evaluate network for comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts) colors = visualization.get_cmap(len(comparisonbodyparts), name=cfg["colormap"]) # Make folder for evaluation auxiliaryfunctions.attempttomakefolder( str(cfg["project_path"] + "/evaluation-results/")) for shuffle in Shuffles: for trainFraction in TrainingFractions: ################################################## # Load and setup CNN part detector ################################################## datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetModelFolder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" # Load meta data ( data, trainIndices, testIndices, trainFraction, ) = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn)) try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction)) # TODO: IMPLEMENT for different batch sizes? dlc_cfg["batch_size"] = 1 # due to differently sized images!!! # Create folder structure to store results. evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetEvaluationFolder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True) # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml' # Check which snapshots are available and sort them by # iterations Snapshots = np.array([ fn.split(".")[0] for fn in os.listdir(os.path.join(str(modelfolder), "train")) if "index" in fn ]) if len(Snapshots) == 0: print( "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so." % (shuffle, trainFraction)) else: increasing_indices = np.argsort( [int(m.split("-")[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] if cfg["snapshotindex"] == -1: snapindices = [-1] elif cfg["snapshotindex"] == "all": snapindices = range(len(Snapshots)) elif cfg["snapshotindex"] < len(Snapshots): snapindices = [cfg["snapshotindex"]] else: print( "Invalid choice, only -1 (last), any integer up to last, or all (as string)!" ) ( individuals, uniquebodyparts, multianimalbodyparts, ) = auxfun_multianimal.extractindividualsandbodyparts(cfg) final_result = [] ################################################## # Compute predictions over images ################################################## for snapindex in snapindices: dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex] ) # setting weights to corresponding snapshot. trainingsiterations = ( dlc_cfg["init_weights"].split(os.sep)[-1] ).split( "-" )[-1] # read how many training siterations that corresponds to. # name for deeplabcut net (based on its parameters) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations, modelprefix=modelprefix, ) print( "Running ", DLCscorer, " with # of trainingiterations:", trainingsiterations, ) ( notanalyzed, resultsfilename, DLCscorer, ) = auxiliaryfunctions.CheckifNotEvaluated( str(evaluationfolder), DLCscorer, DLCscorerlegacy, Snapshots[snapindex], ) if os.path.isfile( resultsfilename.split(".h5")[0] + "_full.pickle"): print("Model already evaluated.", resultsfilename) else: if plotting: foldername = os.path.join( str(evaluationfolder), "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex], ) auxiliaryfunctions.attempttomakefolder(foldername) # print(dlc_cfg) # Specifying state of model (snapshot / training state) sess, inputs, outputs = predict.setup_pose_prediction( dlc_cfg) PredicteData = {} print("Analyzing data...") for imageindex, imagename in tqdm(enumerate( Data.index)): image_path = os.path.join(cfg["project_path"], imagename) image = io.imread(image_path) frame = img_as_ubyte(skimage.color.gray2rgb(image)) GT = Data.iloc[imageindex] # Storing GT data as dictionary, so it can be used for calculating connection costs groundtruthcoordinates = [] groundtruthidentity = [] for bptindex, bpt in enumerate( dlc_cfg["all_joints_names"]): coords = np.zeros([len(individuals), 2 ]) * np.nan identity = [] for prfxindex, prefix in enumerate( individuals): if bpt in uniquebodyparts and prefix == "single": coords[prfxindex, :] = np.array([ GT[cfg["scorer"]][prefix][bpt] ["x"], GT[cfg["scorer"]][prefix][bpt] ["y"], ]) identity.append(prefix) elif (bpt in multianimalbodyparts and prefix != "single"): coords[prfxindex, :] = np.array([ GT[cfg["scorer"]][prefix][bpt] ["x"], GT[cfg["scorer"]][prefix][bpt] ["y"], ]) identity.append(prefix) else: identity.append("nix") groundtruthcoordinates.append( coords[np.isfinite(coords[:, 0]), :]) groundtruthidentity.append( np.array(identity)[np.isfinite(coords[:, 0])]) PredicteData[imagename] = {} PredicteData[imagename]["index"] = imageindex pred = predictma.get_detectionswithcostsandGT( frame, groundtruthcoordinates, dlc_cfg, sess, inputs, outputs, outall=False, nms_radius=dlc_cfg.nmsradius, det_min_score=dlc_cfg.minconfidence, c_engine=c_engine, ) PredicteData[imagename]["prediction"] = pred PredicteData[imagename]["groundtruth"] = [ groundtruthidentity, groundtruthcoordinates, GT, ] if plotting: coords_pred = pred["coordinates"][0] probs_pred = pred["confidence"] fig = visualization.make_multianimal_labeled_image( frame, groundtruthcoordinates, coords_pred, probs_pred, colors, cfg["dotsize"], cfg["alphavalue"], cfg["pcutoff"], ) visualization.save_labeled_frame( fig, image_path, foldername, imageindex in trainIndices, ) sess.close() # closes the current tf session PredicteData["metadata"] = { "nms radius": dlc_cfg.nmsradius, "minimal confidence": dlc_cfg.minconfidence, "PAFgraph": dlc_cfg.partaffinityfield_graph, "all_joints": [[i] for i in range(len(dlc_cfg.all_joints))], "all_joints_names": [ dlc_cfg.all_joints_names[i] for i in range(len(dlc_cfg.all_joints)) ], "stride": dlc_cfg.get("stride", 8), } print( "Done and results stored for snapshot: ", Snapshots[snapindex], ) dictionary = { "Scorer": DLCscorer, "DLC-model-config file": dlc_cfg, "trainIndices": trainIndices, "testIndices": testIndices, "trainFraction": trainFraction, } metadata = {"data": dictionary} auxfun_multianimal.SaveFullMultiAnimalData( PredicteData, metadata, resultsfilename) tf.reset_default_graph() # returning to intial folder os.chdir(str(start_path))
def filterpredictions( config, video, videotype="avi", shuffle=1, trainingsetindex=0, filtertype="median", windowlength=5, p_bound=0.001, ARdegree=3, MAdegree=1, alpha=0.01, save_as_csv=True, destfolder=None, modelprefix="", track_method="", ): """ Fits frame-by-frame pose predictions with ARIMA model (filtertype='arima') or median filter (default). Parameter ---------- config : string Full path of the config.yaml file as a string. video : string Full path of the video to extract the frame from. Make sure that this video is already analyzed. shuffle : int, optional The shufle index of training dataset. The extracted frames will be stored in the labeled-dataset for the corresponding shuffle of training dataset. Default is set to 1 trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). filtertype: string Select which filter, 'arima', 'median' or 'spline'. windowlength: int For filtertype='median' filters the input array using a local window-size given by windowlength. The array will automatically be zero-padded. https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.medfilt.html The windowlenght should be an odd number. If filtertype='spline', windowlength is the maximal gap size to fill. p_bound: float between 0 and 1, optional For filtertype 'arima' this parameter defines the likelihood below, below which a body part will be consided as missing data for filtering purposes. ARdegree: int, optional For filtertype 'arima' Autoregressive degree of Sarimax model degree. see https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html MAdegree: int For filtertype 'arima' Moving Avarage degree of Sarimax model degree. See https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html alpha: float Significance level for detecting outliers based on confidence interval of fitted SARIMAX model. save_as_csv: bool, optional Saves the predictions in a .csv file. The default is ``False``; if provided it must be either ``True`` or ``False`` destfolder: string, optional Specifies the destination folder for analysis data (default is the path of the video). Note that for subsequent analysis this folder also needs to be passed. Example -------- Arima model: deeplabcut.filterpredictions('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,filterype='arima',ARdegree=5,MAdegree=2) Use median filter over 10bins: deeplabcut.filterpredictions('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,windowlength=10) One can then use the filtered rather than the frame-by-frame predictions by calling: deeplabcut.plot_trajectories('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,filtered=True) deeplabcut.create_labeled_video('C:\\myproject\\reaching-task\\config.yaml',['C:\\myproject\\trailtracking-task\\test.mp4'],shuffle=3,filtered=True) -------- Returns filtered pandas array with the same structure as normal output of network. """ cfg = auxiliaryfunctions.read_config(config) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction=cfg["TrainingFraction"][trainingsetindex], modelprefix=modelprefix, ) Videos = auxiliaryfunctions.Getlistofvideos(video, videotype) if not len(Videos): print( "No video(s) were found. Please check your paths and/or 'video_type'." ) return for video in Videos: if destfolder is None: destfolder = str(Path(video).parents[0]) print("Filtering with %s model %s" % (filtertype, video)) vname = Path(video).stem try: _ = auxiliaryfunctions.load_analyzed_data(destfolder, vname, DLCscorer, True, track_method) print(f"Data from {vname} were already filtered. Skipping...") except FileNotFoundError: # Data haven't been filtered yet try: df, filepath, _, _ = auxiliaryfunctions.load_analyzed_data( destfolder, vname, DLCscorer, track_method=track_method) nrows = df.shape[0] if filtertype == "arima": temp = df.values.reshape((nrows, -1, 3)) placeholder = np.empty_like(temp) for i in range(temp.shape[1]): x, y, p = temp[:, i].T meanx, _ = FitSARIMAXModel(x, p, p_bound, alpha, ARdegree, MAdegree, False) meany, _ = FitSARIMAXModel(y, p, p_bound, alpha, ARdegree, MAdegree, False) meanx[0] = x[0] meany[0] = y[0] placeholder[:, i] = np.c_[meanx, meany, p] data = pd.DataFrame( placeholder.reshape((nrows, -1)), columns=df.columns, index=df.index, ) elif filtertype == "median": data = df.copy() mask = data.columns.get_level_values( "coords") != "likelihood" data.loc[:, mask] = df.loc[:, mask].apply(signal.medfilt, args=(windowlength, ), axis=0) elif filtertype == "spline": data = df.copy() mask_data = data.columns.get_level_values("coords").isin( ("x", "y")) xy = data.loc[:, mask_data].values prob = data.loc[:, ~mask_data].values missing = np.isnan(xy) xy_filled = columnwise_spline_interp(xy, windowlength) filled = ~np.isnan(xy_filled) xy[filled] = xy_filled[filled] inds = np.argwhere(missing & filled) if inds.size: # Retrieve original individual label indices inds[:, 1] //= 2 inds = np.unique(inds, axis=0) prob[inds[:, 0], inds[:, 1]] = 0.01 data.loc[:, ~mask_data] = prob data.loc[:, mask_data] = xy else: raise ValueError(f"Unknown filter type {filtertype}") outdataname = filepath.replace(".h5", "_filtered.h5") data.to_hdf(outdataname, "df_with_missing", format="table", mode="w") if save_as_csv: print("Saving filtered csv poses!") data.to_csv(outdataname.split(".h5")[0] + ".csv") except FileNotFoundError as e: print(e) continue
def extract_outlier_frames(config, videos, videotype='avi', shuffle=1, trainingsetindex=0, outlieralgorithm='jump', comparisonbodyparts='all', epsilon=20, p_bound=.01, ARdegree=3, MAdegree=1, alpha=.01, extractionalgorithm='kmeans', automatic=False, cluster_resizewidth=30, cluster_color=False, opencv=True, savelabeled=True, destfolder=None): """ Extracts the outlier frames in case, the predictions are not correct for a certain video from the cropped video running from start to stop as defined in config.yaml. Another crucial parameter in config.yaml is how many frames to extract 'numframes2extract'. Parameter ---------- config : string Full path of the config.yaml file as a string. videos : list A list of strings containing the full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. videotype: string, optional Checks for the extension of the video in case the input to the video is a directory.\n Only videos with this extension are analyzed. The default is ``.avi`` shuffle : int, optional The shufle index of training dataset. The extracted frames will be stored in the labeled-dataset for the corresponding shuffle of training dataset. Default is set to 1 trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). outlieralgorithm: 'fitting', 'jump', 'uncertain', or 'manual' String specifying the algorithm used to detect the outliers. Currently, deeplabcut supports three methods + a manual GUI option. 'Fitting' fits a Auto Regressive Integrated Moving Average model to the data and computes the distance to the estimated data. Larger distances than epsilon are then potentially identified as outliers. The methods 'jump' identifies larger jumps than 'epsilon' in any body part; and 'uncertain' looks for frames with confidence below p_bound. The default is set to ``jump``. comparisonbodyparts: list of strings, optional This select the body parts for which the comparisons with the outliers are carried out. Either ``all``, then all body parts from config.yaml are used orr a list of strings that are a subset of the full list. E.g. ['hand','Joystick'] for the demo Reaching-Mackenzie-2018-08-30/config.yaml to select only these two body parts. p_bound: float between 0 and 1, optional For outlieralgorithm 'uncertain' this parameter defines the likelihood below, below which a body part will be flagged as a putative outlier. epsilon; float,optional Meaning depends on outlieralgoritm. The default is set to 20 pixels. For outlieralgorithm 'fitting': Float bound according to which frames are picked when the (average) body part estimate deviates from model fit For outlieralgorithm 'jump': Float bound specifying the distance by which body points jump from one frame to next (Euclidean distance) ARdegree: int, optional For outlieralgorithm 'fitting': Autoregressive degree of ARIMA model degree. (Note we use SARIMAX without exogeneous and seasonal part) see https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html MAdegree: int For outlieralgorithm 'fitting': MovingAvarage degree of ARIMA model degree. (Note we use SARIMAX without exogeneous and seasonal part) See https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.sarimax.SARIMAX.html alpha: float Significance level for detecting outliers based on confidence interval of fitted ARIMA model. Only the distance is used however. extractionalgorithm : string, optional String specifying the algorithm to use for selecting the frames from the identified putatative outlier frames. Currently, deeplabcut supports either ``kmeans`` or ``uniform`` based selection (same logic as for extract_frames). The default is set to``uniform``, if provided it must be either ``uniform`` or ``kmeans``. automatic : bool, optional Set it to True, if you want to extract outliers without being asked for user feedback. cluster_resizewidth: number, default: 30 For k-means one can change the width to which the images are downsampled (aspect ratio is fixed). cluster_color: bool, default: False If false then each downsampled image is treated as a grayscale vector (discarding color information). If true, then the color channels are considered. This increases the computational complexity. opencv: bool, default: True Uses openCV for loading & extractiong (otherwise moviepy (legacy)) savelabeled: bool, default: True If true also saves frame with predicted labels in each folder. destfolder: string, optional Specifies the destination folder that was used for storing analysis data (default is the path of the video). Examples Windows example for extracting the frames with default settings >>> deeplabcut.extract_outlier_frames('C:\\myproject\\reaching-task\\config.yaml',['C:\\yourusername\\rig-95\\Videos\\reachingvideo1.avi']) -------- for extracting the frames with default settings >>> deeplabcut.extract_outlier_frames('/analysis/project/reaching-task/config.yaml',['/analysis/project/video/reachinvideo1.avi']) -------- for extracting the frames with kmeans >>> deeplabcut.extract_outlier_frames('/analysis/project/reaching-task/config.yaml',['/analysis/project/video/reachinvideo1.avi'],extractionalgorithm='kmeans') -------- for extracting the frames with kmeans and epsilon = 5 pixels. >>> deeplabcut.extract_outlier_frames('/analysis/project/reaching-task/config.yaml',['/analysis/project/video/reachinvideo1.avi'],epsilon = 5,extractionalgorithm='kmeans') -------- """ cfg = auxiliaryfunctions.read_config(config) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction=cfg['TrainingFraction'][trainingsetindex]) Videos = auxiliaryfunctions.Getlistofvideos(videos, videotype) for video in Videos: if destfolder is None: videofolder = str(Path(video).parents[0]) else: videofolder = destfolder notanalyzed, dataname, DLCscorer = auxiliaryfunctions.CheckifNotAnalyzed( videofolder, str(Path(video).stem), DLCscorer, DLCscorerlegacy, flag='checking') if notanalyzed: print( "It seems the video has not been analyzed yet, or the video is not found! You can only refine the labels after the a video is analyzed. Please run 'analyze_video' first. Or, please double check your video file path" ) else: Dataframe = pd.read_hdf(dataname, 'df_with_missing') scorer = Dataframe.columns.get_level_values(0)[ 0] #reading scorer from nframes = np.size(Dataframe.index) # extract min and max index based on start stop interval. startindex = max([int(np.floor(nframes * cfg['start'])), 0]) stopindex = min([int(np.ceil(nframes * cfg['stop'])), nframes]) Index = np.arange(stopindex - startindex) + startindex #figure out body part list: bodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts) Indices = [] if outlieralgorithm == 'uncertain': #necessary parameters: considered body parts and for bpindex, bp in enumerate(bodyparts): if bp in cfg[ 'bodyparts']: #filter [who knows what users put in...] p = Dataframe[scorer][bp]['likelihood'].values[Index] Indices.extend( np.where(p < p_bound)[0] + startindex ) # all indices between start and stop that are below p_bound. elif outlieralgorithm == 'jump': for bpindex, bp in enumerate(bodyparts): if bp in cfg[ 'bodyparts']: #filter [who knows what users put in...] dx = np.diff(Dataframe[scorer][bp]['x'].values[Index]) dy = np.diff(Dataframe[scorer][bp]['y'].values[Index]) # all indices between start and stop with jump larger than epsilon (leading up to this point!) Indices.extend( np.where((dx**2 + dy**2) > epsilon**2)[0] + startindex + 1) elif outlieralgorithm == 'fitting': #deviation_dataname = str(Path(videofolder)/Path(dataname)) # Calculate deviatons for video [d, o] = ComputeDeviations(Dataframe, cfg, bodyparts, scorer, dataname, p_bound, alpha, ARdegree, MAdegree) #Some heuristics for extracting frames based on distance: Indices = np.where( d > epsilon )[0] # time points with at least average difference of epsilon if len(Index) < cfg['numframes2pick'] * 2 and len(d) > cfg[ 'numframes2pick'] * 2: # if too few points qualify, extract the most distant ones. Indices = np.argsort(d)[::-1][:cfg['numframes2pick'] * 2] elif outlieralgorithm == 'manual': wd = Path(config).resolve().parents[0] os.chdir(str(wd)) from deeplabcut.refine_training_dataset import outlier_frame_extraction_toolbox outlier_frame_extraction_toolbox.show(config, video, shuffle, Dataframe, scorer, savelabeled) # Run always except when the outlieralgorithm == manual. if not outlieralgorithm == 'manual': Indices = np.sort(list(set(Indices))) #remove repetitions. print("Method ", outlieralgorithm, " found ", len(Indices), " putative outlier frames.") print("Do you want to proceed with extracting ", cfg['numframes2pick'], " of those?") if outlieralgorithm == 'uncertain': print( "If this list is very large, perhaps consider changing the paramters (start, stop, p_bound, comparisonbodyparts) or use a different method." ) elif outlieralgorithm == 'jump': print( "If this list is very large, perhaps consider changing the paramters (start, stop, epsilon, comparisonbodyparts) or use a different method." ) elif outlieralgorithm == 'fitting': print( "If this list is very large, perhaps consider changing the paramters (start, stop, epsilon, ARdegree, MAdegree, alpha, comparisonbodyparts) or use a different method." ) if automatic == False: askuser = input("yes/no") else: askuser = '******' if askuser == 'y' or askuser == 'yes' or askuser == 'Ja' or askuser == 'ha': # multilanguage support :) #Now extract from those Indices! ExtractFramesbasedonPreselection( Indices, extractionalgorithm, Dataframe, dataname, scorer, video, cfg, config, opencv, cluster_resizewidth, cluster_color, savelabeled) else: print( "Nothing extracted, please change the parameters and start again..." )
def evaluate_multianimal_full( config, Shuffles=[1], trainingsetindex=0, plotting=False, show_errors=True, comparisonbodyparts="all", gputouse=None, modelprefix="", ): from deeplabcut.pose_estimation_tensorflow.core import ( predict, predict_multianimal as predictma, ) from deeplabcut.utils import ( auxiliaryfunctions, auxfun_multianimal, auxfun_videos, conversioncode, ) import tensorflow as tf if "TF_CUDNN_USE_AUTOTUNE" in os.environ: del os.environ["TF_CUDNN_USE_AUTOTUNE"] # was potentially set during training tf.compat.v1.reset_default_graph() os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # if gputouse is not None: # gpu selectinon os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse) start_path = os.getcwd() if plotting is True: plotting = "bodypart" ################################################## # Load data... ################################################## cfg = auxiliaryfunctions.read_config(config) if trainingsetindex == "all": TrainingFractions = cfg["TrainingFraction"] else: TrainingFractions = [cfg["TrainingFraction"][trainingsetindex]] # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ) ) conversioncode.guarantee_multiindex_rows(Data) # Get list of body parts to evaluate network for comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts ) all_bpts = np.asarray( len(cfg["individuals"]) * cfg["multianimalbodyparts"] + cfg["uniquebodyparts"] ) colors = visualization.get_cmap(len(comparisonbodyparts), name=cfg["colormap"]) # Make folder for evaluation auxiliaryfunctions.attempttomakefolder( str(cfg["project_path"] + "/evaluation-results/") ) for shuffle in Shuffles: for trainFraction in TrainingFractions: ################################################## # Load and setup CNN part detector ################################################## datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg ) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetModelFolder( trainFraction, shuffle, cfg, modelprefix=modelprefix ) ), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" # Load meta data ( data, trainIndices, testIndices, trainFraction, ) = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn) ) try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction) ) pipeline = iaa.Sequential(random_order=False) pre_resize = dlc_cfg.get("pre_resize") if pre_resize: width, height = pre_resize pipeline.add(iaa.Resize({"height": height, "width": width})) # TODO: IMPLEMENT for different batch sizes? dlc_cfg["batch_size"] = 1 # due to differently sized images!!! stride = dlc_cfg["stride"] # Ignore best edges possibly defined during a prior evaluation _ = dlc_cfg.pop("paf_best", None) joints = dlc_cfg["all_joints_names"] # Create folder structure to store results. evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetEvaluationFolder( trainFraction, shuffle, cfg, modelprefix=modelprefix ) ), ) auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True) # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml' # Check which snapshots are available and sort them by # iterations Snapshots = np.array( [ fn.split(".")[0] for fn in os.listdir(os.path.join(str(modelfolder), "train")) if "index" in fn ] ) if len(Snapshots) == 0: print( "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so." % (shuffle, trainFraction) ) else: increasing_indices = np.argsort( [int(m.split("-")[1]) for m in Snapshots] ) Snapshots = Snapshots[increasing_indices] if cfg["snapshotindex"] == -1: snapindices = [-1] elif cfg["snapshotindex"] == "all": snapindices = range(len(Snapshots)) elif cfg["snapshotindex"] < len(Snapshots): snapindices = [cfg["snapshotindex"]] else: print( "Invalid choice, only -1 (last), any integer up to last, or all (as string)!" ) final_result = [] ################################################## # Compute predictions over images ################################################## for snapindex in snapindices: dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex] ) # setting weights to corresponding snapshot. trainingsiterations = ( dlc_cfg["init_weights"].split(os.sep)[-1] ).split("-")[ -1 ] # read how many training siterations that corresponds to. # name for deeplabcut net (based on its parameters) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations, modelprefix=modelprefix, ) print( "Running ", DLCscorer, " with # of trainingiterations:", trainingsiterations, ) ( notanalyzed, resultsfilename, DLCscorer, ) = auxiliaryfunctions.CheckifNotEvaluated( str(evaluationfolder), DLCscorer, DLCscorerlegacy, Snapshots[snapindex], ) data_path = resultsfilename.split(".h5")[0] + "_full.pickle" if plotting: foldername = os.path.join( str(evaluationfolder), "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex], ) auxiliaryfunctions.attempttomakefolder(foldername) if plotting == "bodypart": fig, ax = visualization.create_minimal_figure() if os.path.isfile(data_path): print("Model already evaluated.", resultsfilename) else: (sess, inputs, outputs,) = predict.setup_pose_prediction( dlc_cfg ) PredicteData = {} dist = np.full((len(Data), len(all_bpts)), np.nan) conf = np.full_like(dist, np.nan) print("Network Evaluation underway...") for imageindex, imagename in tqdm(enumerate(Data.index)): image_path = os.path.join(cfg["project_path"], *imagename) frame = auxfun_videos.imread(image_path, mode="skimage") GT = Data.iloc[imageindex] if not GT.any(): continue # Pass the image and the keypoints through the resizer; # this has no effect if no augmenters were added to it. keypoints = [GT.to_numpy().reshape((-1, 2)).astype(float)] frame_, keypoints = pipeline( images=[frame], keypoints=keypoints ) frame = frame_[0] GT[:] = keypoints[0].flatten() df = GT.unstack("coords").reindex(joints, level="bodyparts") # FIXME Is having an empty array vs nan really that necessary?! groundtruthidentity = list( df.index.get_level_values("individuals") .to_numpy() .reshape((-1, 1)) ) groundtruthcoordinates = list(df.values[:, np.newaxis]) for i, coords in enumerate(groundtruthcoordinates): if np.isnan(coords).any(): groundtruthcoordinates[i] = np.empty( (0, 2), dtype=float ) groundtruthidentity[i] = np.array([], dtype=str) # Form 2D array of shape (n_rows, 4) where the last dimension # is (sample_index, peak_y, peak_x, bpt_index) to slice the PAFs. temp = df.reset_index(level="bodyparts").dropna() temp["bodyparts"].replace( dict(zip(joints, range(len(joints)))), inplace=True, ) temp["sample"] = 0 peaks_gt = temp.loc[ :, ["sample", "y", "x", "bodyparts"] ].to_numpy() peaks_gt[:, 1:3] = (peaks_gt[:, 1:3] - stride // 2) / stride pred = predictma.predict_batched_peaks_and_costs( dlc_cfg, np.expand_dims(frame, axis=0), sess, inputs, outputs, peaks_gt.astype(int), ) if not pred: continue else: pred = pred[0] PredicteData[imagename] = {} PredicteData[imagename]["index"] = imageindex PredicteData[imagename]["prediction"] = pred PredicteData[imagename]["groundtruth"] = [ groundtruthidentity, groundtruthcoordinates, GT, ] coords_pred = pred["coordinates"][0] probs_pred = pred["confidence"] for bpt, xy_gt in df.groupby(level="bodyparts"): inds_gt = np.flatnonzero( np.all(~np.isnan(xy_gt), axis=1) ) n_joint = joints.index(bpt) xy = coords_pred[n_joint] if inds_gt.size and xy.size: # Pick the predictions closest to ground truth, # rather than the ones the model has most confident in xy_gt_values = xy_gt.iloc[inds_gt].values neighbors = _find_closest_neighbors( xy_gt_values, xy, k=3 ) found = neighbors != -1 min_dists = np.linalg.norm( xy_gt_values[found] - xy[neighbors[found]], axis=1, ) inds = np.flatnonzero(all_bpts == bpt) sl = imageindex, inds[inds_gt[found]] dist[sl] = min_dists conf[sl] = probs_pred[n_joint][ neighbors[found] ].squeeze() if plotting == "bodypart": temp_xy = GT.unstack("bodyparts")[joints].values gt = temp_xy.reshape( (-1, 2, temp_xy.shape[1]) ).T.swapaxes(1, 2) h, w, _ = np.shape(frame) fig.set_size_inches(w / 100, h / 100) ax.set_xlim(0, w) ax.set_ylim(0, h) ax.invert_yaxis() ax = visualization.make_multianimal_labeled_image( frame, gt, coords_pred, probs_pred, colors, cfg["dotsize"], cfg["alphavalue"], cfg["pcutoff"], ax=ax, ) visualization.save_labeled_frame( fig, image_path, foldername, imageindex in trainIndices, ) visualization.erase_artists(ax) sess.close() # closes the current tf session # Compute all distance statistics df_dist = pd.DataFrame(dist, columns=df.index) df_conf = pd.DataFrame(conf, columns=df.index) df_joint = pd.concat( [df_dist, df_conf], keys=["rmse", "conf"], names=["metrics"], axis=1, ) df_joint = df_joint.reorder_levels( list(np.roll(df_joint.columns.names, -1)), axis=1 ) df_joint.sort_index( axis=1, level=["individuals", "bodyparts"], ascending=[True, True], inplace=True, ) write_path = os.path.join( evaluationfolder, f"dist_{trainingsiterations}.csv" ) df_joint.to_csv(write_path) # Calculate overall prediction error error = df_joint.xs("rmse", level="metrics", axis=1) mask = ( df_joint.xs("conf", level="metrics", axis=1) >= cfg["pcutoff"] ) error_masked = error[mask] error_train = np.nanmean(error.iloc[trainIndices]) error_train_cut = np.nanmean(error_masked.iloc[trainIndices]) error_test = np.nanmean(error.iloc[testIndices]) error_test_cut = np.nanmean(error_masked.iloc[testIndices]) results = [ trainingsiterations, int(100 * trainFraction), shuffle, np.round(error_train, 2), np.round(error_test, 2), cfg["pcutoff"], np.round(error_train_cut, 2), np.round(error_test_cut, 2), ] final_result.append(results) if show_errors: string = ( "Results for {} training iterations, training fraction of {}, and shuffle {}:\n" "Train error: {} pixels. Test error: {} pixels.\n" "With pcutoff of {}:\n" "Train error: {} pixels. Test error: {} pixels." ) print(string.format(*results)) print("##########################################") print( "Average Euclidean distance to GT per individual (in pixels; test-only)" ) print( error_masked.iloc[testIndices] .groupby("individuals", axis=1) .mean() .mean() .to_string() ) print( "Average Euclidean distance to GT per bodypart (in pixels; test-only)" ) print( error_masked.iloc[testIndices] .groupby("bodyparts", axis=1) .mean() .mean() .to_string() ) PredicteData["metadata"] = { "nms radius": dlc_cfg["nmsradius"], "minimal confidence": dlc_cfg["minconfidence"], "sigma": dlc_cfg.get("sigma", 1), "PAFgraph": dlc_cfg["partaffinityfield_graph"], "PAFinds": np.arange( len(dlc_cfg["partaffinityfield_graph"]) ), "all_joints": [ [i] for i in range(len(dlc_cfg["all_joints"])) ], "all_joints_names": [ dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"])) ], "stride": dlc_cfg.get("stride", 8), } print( "Done and results stored for snapshot: ", Snapshots[snapindex], ) dictionary = { "Scorer": DLCscorer, "DLC-model-config file": dlc_cfg, "trainIndices": trainIndices, "testIndices": testIndices, "trainFraction": trainFraction, } metadata = {"data": dictionary} _ = auxfun_multianimal.SaveFullMultiAnimalData( PredicteData, metadata, resultsfilename ) tf.compat.v1.reset_default_graph() n_multibpts = len(cfg["multianimalbodyparts"]) if n_multibpts == 1: continue # Skip data-driven skeleton selection unless # the model was trained on the full graph. max_n_edges = n_multibpts * (n_multibpts - 1) // 2 n_edges = len(dlc_cfg["partaffinityfield_graph"]) if n_edges == max_n_edges: print("Selecting best skeleton...") n_graphs = 10 paf_inds = None else: n_graphs = 1 paf_inds = [list(range(n_edges))] ( results, paf_scores, best_assemblies, ) = crossvalutils.cross_validate_paf_graphs( config, str(path_test_config).replace("pose_", "inference_"), data_path, data_path.replace("_full.", "_meta."), n_graphs=n_graphs, paf_inds=paf_inds, oks_sigma=dlc_cfg.get("oks_sigma", 0.1), margin=dlc_cfg.get("bbox_margin", 0), symmetric_kpts=dlc_cfg.get("symmetric_kpts"), ) if plotting == "individual": assemblies, assemblies_unique, image_paths = best_assemblies fig, ax = visualization.create_minimal_figure() n_animals = len(cfg["individuals"]) if cfg["uniquebodyparts"]: n_animals += 1 colors = visualization.get_cmap(n_animals, name=cfg["colormap"]) for k, v in tqdm(assemblies.items()): imname = image_paths[k] image_path = os.path.join(cfg["project_path"], *imname) frame = auxfun_videos.imread(image_path, mode="skimage") h, w, _ = np.shape(frame) fig.set_size_inches(w / 100, h / 100) ax.set_xlim(0, w) ax.set_ylim(0, h) ax.invert_yaxis() gt = [ s.to_numpy().reshape((-1, 2)) for _, s in Data.loc[imname].groupby("individuals") ] coords_pred = [] coords_pred += [ass.xy for ass in v] probs_pred = [] probs_pred += [ass.data[:, 2:3] for ass in v] if assemblies_unique is not None: unique = assemblies_unique.get(k, None) if unique is not None: coords_pred.append(unique[:, :2]) probs_pred.append(unique[:, 2:3]) while len(coords_pred) < len(gt): coords_pred.append(np.full((1, 2), np.nan)) probs_pred.append(np.full((1, 2), np.nan)) ax = visualization.make_multianimal_labeled_image( frame, gt, coords_pred, probs_pred, colors, cfg["dotsize"], cfg["alphavalue"], cfg["pcutoff"], ax=ax, ) visualization.save_labeled_frame( fig, image_path, foldername, k in trainIndices, ) visualization.erase_artists(ax) df = results[1].copy() df.loc(axis=0)[("mAP_train", "mean")] = [ d[0]["mAP"] for d in results[2] ] df.loc(axis=0)[("mAR_train", "mean")] = [ d[0]["mAR"] for d in results[2] ] df.loc(axis=0)[("mAP_test", "mean")] = [ d[1]["mAP"] for d in results[2] ] df.loc(axis=0)[("mAR_test", "mean")] = [ d[1]["mAR"] for d in results[2] ] with open(data_path.replace("_full.", "_map."), "wb") as file: pickle.dump((df, paf_scores), file) if len(final_result) > 0: # Only append if results were calculated make_results_file(final_result, evaluationfolder, DLCscorer) os.chdir(str(start_path))
def triangulate( config, video_path, videotype="avi", filterpredictions=True, filtertype="median", gputouse=None, destfolder=None, save_as_csv=False, ): """ This function triangulates the detected DLC-keypoints from the two camera views using the camera matrices (derived from calibration) to calculate 3D predictions. Parameters ---------- config : string Full path of the config.yaml file as a string. video_path : string/list of list Full path of the directory where videos are saved. If the user wants to analyze only a pair of videos, the user needs to pass them as a list of list of videos, i.e. [['video1-camera-1.avi','video1-camera-2.avi']] videotype: string, optional Checks for the extension of the video in case the input to the video is a directory.\n Only videos with this extension are analyzed. The default is ``.avi`` filterpredictions: Bool, optional Filter the predictions with filter specified by "filtertype". If specified it should be either ``True`` or ``False``. filtertype: string Select which filter, 'arima' or 'median' filter (currently supported). gputouse: int, optional. Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None. See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries destfolder: string, optional Specifies the destination folder for analysis data (default is the path of the video) save_as_csv: bool, optional Saves the predictions in a .csv file. The default is ``False`` Example ------- Linux/MacOS To analyze all the videos in the directory: >>> deeplabcut.triangulate(config,'/data/project1/videos/') To analyze only a few pairs of videos: >>> deeplabcut.triangulate(config,[['/data/project1/videos/video1-camera-1.avi','/data/project1/videos/video1-camera-2.avi'],['/data/project1/videos/video2-camera-1.avi','/data/project1/videos/video2-camera-2.avi']]) Windows To analyze all the videos in the directory: >>> deeplabcut.triangulate(config,'C:\\yourusername\\rig-95\\Videos') To analyze only a few pair of videos: >>> deeplabcut.triangulate(config,[['C:\\yourusername\\rig-95\\Videos\\video1-camera-1.avi','C:\\yourusername\\rig-95\\Videos\\video1-camera-2.avi'],['C:\\yourusername\\rig-95\\Videos\\video2-camera-1.avi','C:\\yourusername\\rig-95\\Videos\\video2-camera-2.avi']]) """ from deeplabcut.pose_estimation_tensorflow import predict_videos from deeplabcut.post_processing import filtering cfg_3d = auxiliaryfunctions.read_config(config) cam_names = cfg_3d["camera_names"] pcutoff = cfg_3d["pcutoff"] scorer_3d = cfg_3d["scorername_3d"] snapshots = {} for cam in cam_names: snapshots[cam] = cfg_3d[str("config_file_" + cam)] # Check if the config file exists if not os.path.exists(snapshots[cam]): raise Exception( str("It seems the file specified in the variable config_file_" + str(cam)) + " does not exist. Please edit the config file with correct file path and retry." ) # flag to check if the video_path variable is a string or a list of list flag = False # assumes that video path is a list if isinstance(video_path, str) == True: flag = True video_list = auxiliaryfunctions_3d.get_camerawise_videos( video_path, cam_names, videotype=videotype) else: video_list = video_path if video_list == []: print("No videos found in the specified video path.", video_path) print( "Please make sure that the video names are specified with correct camera names as entered in the config file or" ) print( "perhaps the videotype is distinct from the videos in the path, I was looking for:", videotype, ) print("List of pairs:", video_list) scorer_name = {} run_triangulate = False for i in range(len(video_list)): dataname = [] for j in range(len(video_list[i])): # looping over cameras if cam_names[j] in video_list[i][j]: print("Analyzing video %s using %s" % (video_list[i][j], str("config_file_" + cam_names[j]))) config_2d = snapshots[cam_names[j]] cfg = auxiliaryfunctions.read_config(config_2d) shuffle = cfg_3d[str("shuffle_" + cam_names[j])] trainingsetindex = cfg_3d[str("trainingsetindex_" + cam_names[j])] trainFraction = cfg["TrainingFraction"][trainingsetindex] if flag == True: video = os.path.join(video_path, video_list[i][j]) else: video_path = str(Path(video_list[i][j]).parents[0]) video = os.path.join(video_path, video_list[i][j]) if destfolder is None: destfolder = str(Path(video).parents[0]) vname = Path(video).stem prefix = str(vname).split(cam_names[j])[0] suffix = str(vname).split(cam_names[j])[-1] if prefix == "": pass elif prefix[-1] == "_" or prefix[-1] == "-": prefix = prefix[:-1] if suffix == "": pass elif suffix[0] == "_" or suffix[0] == "-": suffix = suffix[1:] if prefix == "": output_file = os.path.join(destfolder, suffix) else: if suffix == "": output_file = os.path.join(destfolder, prefix) else: output_file = os.path.join(destfolder, prefix + "_" + suffix) output_filename = os.path.join( output_file + "_" + scorer_3d ) # Check if the videos are already analyzed for 3d if os.path.isfile(output_filename + ".h5"): if save_as_csv is True and not os.path.exists( output_filename + ".csv"): # In case user adds save_as_csv is True after triangulating pd.read_hdf(output_filename + ".h5").to_csv( str(output_filename + ".csv")) print( "Already analyzed...Checking the meta data for any change in the camera matrices and/or scorer names", vname, ) pickle_file = str(output_filename + "_meta.pickle") metadata_ = auxiliaryfunctions_3d.LoadMetadata3d( pickle_file) ( img_path, path_corners, path_camera_matrix, path_undistort, ) = auxiliaryfunctions_3d.Foldernames3Dproject(cfg_3d) path_stereo_file = os.path.join(path_camera_matrix, "stereo_params.pickle") stereo_file = auxiliaryfunctions.read_pickle( path_stereo_file) cam_pair = str(cam_names[0] + "-" + cam_names[1]) if_video_analyzed = False # variable to keep track if the video was already analyzed # Check for the camera matrix for k in metadata_["stereo_matrix"].keys(): if np.all(metadata_["stereo_matrix"][k] == stereo_file[cam_pair][k]): pass else: run_triangulate = True # Check for scorer names in the pickle file of 3d output DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations="unknown") if (metadata_["scorer_name"][cam_names[j]] == DLCscorer ): # TODO: CHECK FOR BOTH? if_video_analyzed = True elif metadata_["scorer_name"][ cam_names[j]] == DLCscorerlegacy: if_video_analyzed = True else: if_video_analyzed = False run_triangulate = True if if_video_analyzed: print("This file is already analyzed!") dataname.append( os.path.join(destfolder, vname + DLCscorer + ".h5")) scorer_name[cam_names[j]] = DLCscorer else: # Analyze video if score name is different DLCscorer = predict_videos.analyze_videos( config_2d, [video], videotype=videotype, shuffle=shuffle, trainingsetindex=trainingsetindex, gputouse=gputouse, destfolder=destfolder, ) scorer_name[cam_names[j]] = DLCscorer if_video_analyzed = False run_triangulate = True if filterpredictions: filtering.filterpredictions( config_2d, [video], videotype=videotype, shuffle=shuffle, trainingsetindex=trainingsetindex, filtertype=filtertype, destfolder=destfolder, ) dataname.append( os.path.join(destfolder, vname + DLCscorer + ".h5")) else: # need to do the whole jam. DLCscorer = predict_videos.analyze_videos( config_2d, [video], videotype=videotype, shuffle=shuffle, trainingsetindex=trainingsetindex, gputouse=gputouse, destfolder=destfolder, ) scorer_name[cam_names[j]] = DLCscorer run_triangulate = True print(destfolder, vname, DLCscorer) if filterpredictions: filtering.filterpredictions( config_2d, [video], videotype=videotype, shuffle=shuffle, trainingsetindex=trainingsetindex, filtertype=filtertype, destfolder=destfolder, ) dataname.append( os.path.join(destfolder, vname + DLCscorer + ".h5")) if run_triangulate: # if len(dataname)>0: # undistort points for this pair print("Undistorting...") ( dataFrame_camera1_undistort, dataFrame_camera2_undistort, stereomatrix, path_stereo_file, ) = undistort_points(config, dataname, str(cam_names[0] + "-" + cam_names[1])) if len(dataFrame_camera1_undistort) != len( dataFrame_camera2_undistort): import warnings warnings.warn( "The number of frames do not match in the two videos. Please make sure that your videos have same number of frames and then retry! Excluding the extra frames from the longer video." ) if len(dataFrame_camera1_undistort) > len( dataFrame_camera2_undistort): dataFrame_camera1_undistort = dataFrame_camera1_undistort[:len( dataFrame_camera2_undistort)] if len(dataFrame_camera2_undistort) > len( dataFrame_camera1_undistort): dataFrame_camera2_undistort = dataFrame_camera2_undistort[:len( dataFrame_camera1_undistort)] # raise Exception("The number of frames do not match in the two videos. Please make sure that your videos have same number of frames and then retry!") X_final = [] triangulate = [] scorer_cam1 = dataFrame_camera1_undistort.columns.get_level_values( 0)[0] scorer_cam2 = dataFrame_camera2_undistort.columns.get_level_values( 0)[0] df_3d, scorer_3d, bodyparts = auxiliaryfunctions_3d.create_empty_df( dataFrame_camera1_undistort, scorer_3d, flag="3d") P1 = stereomatrix["P1"] P2 = stereomatrix["P2"] print("Computing the triangulation...") for bpindex, bp in enumerate(bodyparts): # Extract the indices of frames where the likelihood of a bodypart for both cameras are less than pvalue likelihoods = np.array([ dataFrame_camera1_undistort[scorer_cam1][bp] ["likelihood"].values[:], dataFrame_camera2_undistort[scorer_cam2][bp] ["likelihood"].values[:], ]) likelihoods = likelihoods.T # Extract frames where likelihood for both the views is less than the pcutoff low_likelihood_frames = np.any(likelihoods < pcutoff, axis=1) # low_likelihood_frames = np.all(likelihoods < pcutoff, axis=1) low_likelihood_frames = np.where( low_likelihood_frames == True)[0] points_cam1_undistort = np.array([ dataFrame_camera1_undistort[scorer_cam1][bp] ["x"].values[:], dataFrame_camera1_undistort[scorer_cam1][bp] ["y"].values[:], ]) points_cam1_undistort = points_cam1_undistort.T # For cam1 camera: Assign nans to x and y values of a bodypart where the likelihood for is less than pvalue points_cam1_undistort[low_likelihood_frames] = np.nan, np.nan points_cam1_undistort = np.expand_dims(points_cam1_undistort, axis=1) points_cam2_undistort = np.array([ dataFrame_camera2_undistort[scorer_cam2][bp] ["x"].values[:], dataFrame_camera2_undistort[scorer_cam2][bp] ["y"].values[:], ]) points_cam2_undistort = points_cam2_undistort.T # For cam2 camera: Assign nans to x and y values of a bodypart where the likelihood is less than pvalue points_cam2_undistort[low_likelihood_frames] = np.nan, np.nan points_cam2_undistort = np.expand_dims(points_cam2_undistort, axis=1) X_l = auxiliaryfunctions_3d.triangulatePoints( P1, P2, points_cam1_undistort, points_cam2_undistort) # ToDo: speed up func. below by saving in numpy.array X_final.append(X_l) triangulate.append(X_final) triangulate = np.asanyarray(triangulate) metadata = {} metadata["stereo_matrix"] = stereomatrix metadata["stereo_matrix_file"] = path_stereo_file metadata["scorer_name"] = { cam_names[0]: scorer_name[cam_names[0]], cam_names[1]: scorer_name[cam_names[1]], } # Create an empty dataframe to store x,y,z of 3d data for bpindex, bp in enumerate(bodyparts): df_3d.iloc[:][scorer_3d, bp, "x"] = triangulate[0, bpindex, 0, :] df_3d.iloc[:][scorer_3d, bp, "y"] = triangulate[0, bpindex, 1, :] df_3d.iloc[:][scorer_3d, bp, "z"] = triangulate[0, bpindex, 2, :] df_3d.to_hdf( str(output_filename + ".h5"), "df_with_missing", format="table", mode="w", ) auxiliaryfunctions_3d.SaveMetadata3d( str(output_filename + "_meta.pickle"), metadata) if save_as_csv: df_3d.to_csv(str(output_filename + ".csv")) print("Triangulated data for video", video_list[i]) print("Results are saved under: ", destfolder) # have to make the dest folder none so that it can be updated for a new pair of videos if destfolder == str(Path(video).parents[0]): destfolder = None if len(video_list) > 0: print("All videos were analyzed...") print( "Now you can create 3D video(s) using deeplabcut.create_labeled_video_3d" )
def create_labeled_video( config, videos, videotype="avi", shuffle=1, trainingsetindex=0, filtered=False, fastmode=True, save_frames=False, keypoints_only=False, Frames2plot=None, displayedbodyparts="all", displayedindividuals="all", codec="mp4v", outputframerate=None, destfolder=None, draw_skeleton=False, trailpoints=0, displaycropped=False, color_by="bodypart", modelprefix="", track_method="", ): """ Labels the bodyparts in a video. Make sure the video is already analyzed by the function 'analyze_video' Parameters ---------- config : string Full path of the config.yaml file as a string. videos : list A list of strings containing the full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. videotype: string, optional Checks for the extension of the video in case the input to the video is a directory.\n Only videos with this extension are analyzed. The default is ``.avi`` shuffle : int, optional Number of shuffles of training dataset. Default is set to 1. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). filtered: bool, default false Boolean variable indicating if filtered output should be plotted rather than frame-by-frame predictions. Filtered version can be calculated with deeplabcut.filterpredictions fastmode: bool If true uses openCV (much faster but less customization of video) vs matplotlib (if false). You can also "save_frames" individually or not in the matplotlib mode (if you set the "save_frames" variable accordingly). However, using matplotlib to create the frames it therefore allows much more flexible (one can set transparency of markers, crop, and easily customize). save_frames: bool If true creates each frame individual and then combines into a video. This variant is relatively slow as it stores all individual frames. keypoints_only: bool, optional By default, both video frames and keypoints are visible. If true, only the keypoints are shown. These clips are an hommage to Johansson movies, see https://www.youtube.com/watch?v=1F5ICP9SYLU and of course his seminal paper: "Visual perception of biological motion and a model for its analysis" by Gunnar Johansson in Perception & Psychophysics 1973. Frames2plot: List of indices If not None & save_frames=True then the frames corresponding to the index will be plotted. For example, Frames2plot=[0,11] will plot the first and the 12th frame. displayedbodyparts: list of strings, optional This selects the body parts that are plotted in the video. Either ``all``, then all body parts from config.yaml are used orr a list of strings that are a subset of the full list. E.g. ['hand','Joystick'] for the demo Reaching-Mackenzie-2018-08-30/config.yaml to select only these two body parts. displayedindividuals: list of strings, optional Individuals plotted in the video. By default, all individuals present in the config will be showed. codec: codec for labeled video. Options see http://www.fourcc.org/codecs.php [depends on your ffmpeg installation.] outputframerate: positive number, output frame rate for labeled video (only available for the mode with saving frames.) By default: None, which results in the original video rate. destfolder: string, optional Specifies the destination folder that was used for storing analysis data (default is the path of the video). draw_skeleton: bool If ``True`` adds a line connecting the body parts making a skeleton on on each frame. The body parts to be connected and the color of these connecting lines are specified in the config file. By default: ``False`` trailpoints: int Number of revious frames whose body parts are plotted in a frame (for displaying history). Default is set to 0. displaycropped: bool, optional Specifies whether only cropped frame is displayed (with labels analyzed therein), or the original frame with the labels analyzed in the cropped subset. color_by : string, optional (default='bodypart') Coloring rule. By default, each bodypart is colored differently. If set to 'individual', points belonging to a single individual are colored the same. Examples -------- If you want to create the labeled video for only 1 video >>> deeplabcut.create_labeled_video('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi']) -------- If you want to create the labeled video for only 1 video and store the individual frames >>> deeplabcut.create_labeled_video('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi'],fastmode=True, save_frames=True) -------- If you want to create the labeled video for multiple videos >>> deeplabcut.create_labeled_video('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/reachingvideo1.avi','/analysis/project/videos/reachingvideo2.avi']) -------- If you want to create the labeled video for all the videos (as .avi extension) in a directory. >>> deeplabcut.create_labeled_video('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/']) -------- If you want to create the labeled video for all the videos (as .mp4 extension) in a directory. >>> deeplabcut.create_labeled_video('/analysis/project/reaching-task/config.yaml',['/analysis/project/videos/'],videotype='mp4') -------- """ cfg = auxiliaryfunctions.read_config(config) trainFraction = cfg["TrainingFraction"][trainingsetindex] DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, modelprefix=modelprefix ) # automatically loads corresponding model (even training iteration based on snapshot index) if save_frames: fastmode = False # otherwise one cannot save frames keypoints_only = False bodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, displayedbodyparts) individuals = auxfun_multianimal.IntersectionofIndividualsandOnesGivenbyUser( cfg, displayedindividuals) if draw_skeleton: bodyparts2connect = cfg["skeleton"] skeleton_color = cfg["skeleton_color"] else: bodyparts2connect = None skeleton_color = None start_path = os.getcwd() Videos = auxiliaryfunctions.Getlistofvideos(videos, videotype) if not Videos: print( "No video(s) were found. Please check your paths and/or 'video_type'." ) return func = partial( proc_video, videos, destfolder, filtered, DLCscorer, DLCscorerlegacy, track_method, cfg, individuals, color_by, bodyparts, codec, bodyparts2connect, trailpoints, save_frames, outputframerate, Frames2plot, draw_skeleton, skeleton_color, displaycropped, fastmode, keypoints_only, ) with Pool(min(os.cpu_count(), len(Videos))) as pool: pool.map(func, Videos) os.chdir(start_path)
def evaluate_network( config, Shuffles=[1], trainingsetindex=0, plotting=False, show_errors=True, comparisonbodyparts="all", gputouse=None, rescale=False, modelprefix="", ): """ Evaluates the network based on the saved models at different stages of the training network.\n The evaluation results are stored in the .h5 and .csv file under the subdirectory 'evaluation_results'. Change the snapshotindex parameter in the config file to 'all' in order to evaluate all the saved models. Parameters ---------- config : string Full path of the config.yaml file as a string. Shuffles: list, optional List of integers specifying the shuffle indices of the training dataset. The default is [1] trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This variable can also be set to "all". plotting: bool or str, optional Plots the predictions on the train and test images. The default is ``False``; if provided it must be either ``True``, ``False``, "bodypart", or "individual". Setting to ``True`` defaults as "bodypart" for multi-animal projects. show_errors: bool, optional Display train and test errors. The default is `True`` comparisonbodyparts: list of bodyparts, Default is "all". The average error will be computed for those body parts only (Has to be a subset of the body parts). gputouse: int, optional. Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None. See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries rescale: bool, default False Evaluate the model at the 'global_scale' variable (as set in the test/pose_config.yaml file for a particular project). I.e. every image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at global_scale=.5, the predictions are calculated on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the original size! Examples -------- If you do not want to plot, just evaluate shuffle 1. >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml', Shuffles=[1]) -------- If you want to plot and evaluate shuffle 0 and 1. >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',Shuffles=[0, 1],plotting = True) -------- If you want to plot assemblies for a maDLC project: >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',Shuffles=[1],plotting = "individual") Note: this defaults to standard plotting for single-animal projects. """ if plotting not in (True, False, "bodypart", "individual"): raise ValueError(f"Unknown value for `plotting`={plotting}") import os start_path = os.getcwd() from deeplabcut.utils import auxiliaryfunctions cfg = auxiliaryfunctions.read_config(config) if cfg.get("multianimalproject", False): from .evaluate_multianimal import evaluate_multianimal_full # TODO: Make this code not so redundant! evaluate_multianimal_full( config=config, Shuffles=Shuffles, trainingsetindex=trainingsetindex, plotting=plotting, comparisonbodyparts=comparisonbodyparts, gputouse=gputouse, modelprefix=modelprefix, ) else: from deeplabcut.utils.auxfun_videos import imread, imresize from deeplabcut.pose_estimation_tensorflow.core import predict from deeplabcut.pose_estimation_tensorflow.config import load_config from deeplabcut.pose_estimation_tensorflow.datasets.utils import data_to_input from deeplabcut.utils import auxiliaryfunctions, conversioncode import tensorflow as tf # If a string was passed in, auto-convert to True for backward compatibility plotting = bool(plotting) if "TF_CUDNN_USE_AUTOTUNE" in os.environ: del os.environ[ "TF_CUDNN_USE_AUTOTUNE"] # was potentially set during training tf.compat.v1.reset_default_graph() os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # # tf.logging.set_verbosity(tf.logging.WARN) start_path = os.getcwd() # Read file path for pose_config file. >> pass it on cfg = auxiliaryfunctions.read_config(config) if gputouse is not None: # gpu selectinon os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse) if trainingsetindex == "all": TrainingFractions = cfg["TrainingFraction"] else: if (trainingsetindex < len(cfg["TrainingFraction"]) and trainingsetindex >= 0): TrainingFractions = [ cfg["TrainingFraction"][int(trainingsetindex)] ] else: raise Exception( "Please check the trainingsetindex! ", trainingsetindex, " should be an integer from 0 .. ", int(len(cfg["TrainingFraction"]) - 1), ) # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", )) # Get list of body parts to evaluate network for comparisonbodyparts = ( auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts)) # Make folder for evaluation auxiliaryfunctions.attempttomakefolder( str(cfg["project_path"] + "/evaluation-results/")) for shuffle in Shuffles: for trainFraction in TrainingFractions: ################################################## # Load and setup CNN part detector ################################################## datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetModelFolder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" # Load meta data ( data, trainIndices, testIndices, trainFraction, ) = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn)) try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction)) # change batch size, if it was edited during analysis! dlc_cfg[ "batch_size"] = 1 # in case this was edited for analysis. # Create folder structure to store results. evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetEvaluationFolder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True) # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml' # Check which snapshots are available and sort them by # iterations Snapshots = np.array([ fn.split(".")[0] for fn in os.listdir( os.path.join(str(modelfolder), "train")) if "index" in fn ]) try: # check if any where found? Snapshots[0] except IndexError: raise FileNotFoundError( "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so." % (shuffle, trainFraction)) increasing_indices = np.argsort( [int(m.split("-")[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] if cfg["snapshotindex"] == -1: snapindices = [-1] elif cfg["snapshotindex"] == "all": snapindices = range(len(Snapshots)) elif cfg["snapshotindex"] < len(Snapshots): snapindices = [cfg["snapshotindex"]] else: raise ValueError( "Invalid choice, only -1 (last), any integer up to last, or all (as string)!" ) final_result = [] ########################### RESCALING (to global scale) if rescale: scale = dlc_cfg["global_scale"] Data = (pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", )) * scale) else: scale = 1 conversioncode.guarantee_multiindex_rows(Data) ################################################## # Compute predictions over images ################################################## for snapindex in snapindices: dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex] ) # setting weights to corresponding snapshot. trainingsiterations = ( dlc_cfg["init_weights"].split(os.sep)[-1] ).split( "-" )[-1] # read how many training siterations that corresponds to. # Name for deeplabcut net (based on its parameters) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations, modelprefix=modelprefix, ) print( "Running ", DLCscorer, " with # of training iterations:", trainingsiterations, ) ( notanalyzed, resultsfilename, DLCscorer, ) = auxiliaryfunctions.CheckifNotEvaluated( str(evaluationfolder), DLCscorer, DLCscorerlegacy, Snapshots[snapindex], ) if notanalyzed: # Specifying state of model (snapshot / training state) sess, inputs, outputs = predict.setup_pose_prediction( dlc_cfg) Numimages = len(Data.index) PredicteData = np.zeros( (Numimages, 3 * len(dlc_cfg["all_joints_names"]))) print("Running evaluation ...") for imageindex, imagename in tqdm(enumerate( Data.index)): image = imread( os.path.join(cfg["project_path"], *imagename), mode="skimage", ) if scale != 1: image = imresize(image, scale) image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run( outputs, feed_dict={inputs: image_batch}) scmap, locref = predict.extract_cnn_output( outputs_np, dlc_cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict( scmap, locref, dlc_cfg["stride"]) PredicteData[imageindex, :] = ( pose.flatten() ) # NOTE: thereby cfg_test['all_joints_names'] should be same order as bodyparts! sess.close() # closes the current tf session index = pd.MultiIndex.from_product( [ [DLCscorer], dlc_cfg["all_joints_names"], ["x", "y", "likelihood"], ], names=["scorer", "bodyparts", "coords"], ) # Saving results DataMachine = pd.DataFrame(PredicteData, columns=index, index=Data.index) DataMachine.to_hdf(resultsfilename, "df_with_missing") print( "Analysis is done and the results are stored (see evaluation-results) for snapshot: ", Snapshots[snapindex], ) DataCombined = pd.concat([Data.T, DataMachine.T], axis=0, sort=False).T RMSE, RMSEpcutoff = pairwisedistances( DataCombined, cfg["scorer"], DLCscorer, cfg["pcutoff"], comparisonbodyparts, ) testerror = np.nanmean( RMSE.iloc[testIndices].values.flatten()) trainerror = np.nanmean( RMSE.iloc[trainIndices].values.flatten()) testerrorpcutoff = np.nanmean( RMSEpcutoff.iloc[testIndices].values.flatten()) trainerrorpcutoff = np.nanmean( RMSEpcutoff.iloc[trainIndices].values.flatten()) results = [ trainingsiterations, int(100 * trainFraction), shuffle, np.round(trainerror, 2), np.round(testerror, 2), cfg["pcutoff"], np.round(trainerrorpcutoff, 2), np.round(testerrorpcutoff, 2), ] final_result.append(results) if show_errors: print( "Results for", trainingsiterations, " training iterations:", int(100 * trainFraction), shuffle, "train error:", np.round(trainerror, 2), "pixels. Test error:", np.round(testerror, 2), " pixels.", ) print( "With pcutoff of", cfg["pcutoff"], " train error:", np.round(trainerrorpcutoff, 2), "pixels. Test error:", np.round(testerrorpcutoff, 2), "pixels", ) if scale != 1: print( "The predictions have been calculated for rescaled images (and rescaled ground truth). Scale:", scale, ) print( "Thereby, the errors are given by the average distances between the labels by DLC and the scorer." ) if plotting: print("Plotting...") foldername = os.path.join( str(evaluationfolder), "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex], ) auxiliaryfunctions.attempttomakefolder(foldername) Plotting( cfg, comparisonbodyparts, DLCscorer, trainIndices, DataCombined * 1.0 / scale, foldername, ) # Rescaling coordinates to have figure in original size! tf.compat.v1.reset_default_graph() # print(final_result) else: DataMachine = pd.read_hdf(resultsfilename) conversioncode.guarantee_multiindex_rows(DataMachine) if plotting: DataCombined = pd.concat([Data.T, DataMachine.T], axis=0, sort=False).T print( "Plotting...(attention scale might be inconsistent in comparison to when data was analyzed; i.e. if you used rescale)" ) foldername = os.path.join( str(evaluationfolder), "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex], ) auxiliaryfunctions.attempttomakefolder(foldername) Plotting( cfg, comparisonbodyparts, DLCscorer, trainIndices, DataCombined * 1.0 / scale, foldername, ) if len(final_result ) > 0: # Only append if results were calculated make_results_file(final_result, evaluationfolder, DLCscorer) print( "The network is evaluated and the results are stored in the subdirectory 'evaluation_results'." ) print( "Please check the results, then choose the best model (snapshot) for prediction. You can update the config.yaml file with the appropriate index for the 'snapshotindex'.\nUse the function 'analyze_video' to make predictions on new videos." ) print( "Otherwise, consider adding more labeled-data and retraining the network (see DeepLabCut workflow Fig 2, Nath 2019)" ) # returning to initial folder os.chdir(str(start_path))
def analyze_videos(self, event): shuffle = self.shuffle.GetValue() trainingsetindex = self.trainingset.GetValue() if self.cfg.get("multianimalproject", False): print("Analyzing ... ") else: if self.csv.GetStringSelection() == "Yes": save_as_csv = True else: save_as_csv = False if self.dynamic.GetStringSelection() == "No": dynamic = (False, 0.5, 10) else: dynamic = (True, 0.5, 10) if self.filter.GetStringSelection() == "No": filter = None else: filter = True if self.cfg["cropping"] == "True": crop = self.cfg["x1"], self.cfg["x2"], self.cfg["y1"], self.cfg[ "y2"] else: crop = None if self.cfg.get("multianimalproject", False): if self.robust.GetStringSelection() == "No": robust = False else: robust = True scorername = deeplabcut.analyze_videos( self.config, self.filelist, videotype=self.videotype.GetValue(), shuffle=shuffle, trainingsetindex=trainingsetindex, gputouse=None, cropping=crop, robust_nframes=robust, ) if self.create_video_with_all_detections.GetStringSelection( ) == "Yes": trainFrac = self.cfg["TrainingFraction"][trainingsetindex] scorername, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( self.cfg, shuffle, trainFraction=trainFrac) print(scorername) deeplabcut.create_video_with_all_detections( self.config, self.filelist, DLCscorername=scorername) else: scorername = deeplabcut.analyze_videos( self.config, self.filelist, videotype=self.videotype.GetValue(), shuffle=shuffle, trainingsetindex=trainingsetindex, gputouse=None, save_as_csv=save_as_csv, cropping=crop, dynamic=dynamic, ) if self.filter.GetStringSelection() == "Yes": deeplabcut.filterpredictions( self.config, self.filelist, videotype=self.videotype.GetValue(), shuffle=shuffle, trainingsetindex=trainingsetindex, filtertype="median", windowlength=5, save_as_csv=True, ) if self.trajectory.GetStringSelection() == "Yes": if self.showfigs.GetStringSelection() == "No": showfig = False else: showfig = True deeplabcut.plot_trajectories( self.config, self.filelist, displayedbodyparts=self.bodyparts, videotype=self.videotype.GetValue(), shuffle=shuffle, trainingsetindex=trainingsetindex, filtered=True, showfigures=showfig, )