def MakeTest_pose_yaml(dictionary, keys2save, saveasfile): dict_test = {} for key in keys2save: dict_test[key] = dictionary[key] dict_test["scoremap_dir"] = "test" dict_test["global_scale"] = 1.0 auxiliaryfunctions.write_plainconfig(saveasfile, dict_test)
def read_inferencecfg(path_inference_config, cfg): """Load inferencecfg or initialize it.""" try: inferencecfg = auxiliaryfunctions.read_plainconfig( str(path_inference_config)) except FileNotFoundError: inferencecfg = form_default_inferencecfg(cfg) auxiliaryfunctions.write_plainconfig(str(path_inference_config), dict(inferencecfg)) return inferencecfg
def evaluate_multianimal_crossvalidate( config, Shuffles=[1], trainingsetindex=0, pbounds=None, edgewisecondition=True, target="rpck_train", inferencecfg=None, init_points=20, n_iter=50, dcorr=10.0, leastbpts=1, printingintermediatevalues=True, modelprefix="", plotting=False, ): """ Crossvalidate inference parameters on evaluation data; optimal parametrs will be stored in " inference_cfg.yaml". They will then be then used for inference (for analysis of videos). Performs Bayesian Optimization with https://github.com/fmfn/BayesianOptimization This is a crucial step. The most important variable (in inferencecfg) to cross-validate is minimalnumberofconnections. Pass a reasonable range to optimze (e.g. if you have 5 edges from 1 to 5. If you have 4 bpts and 11 connections from 3 to 9). config: string Full path of the config.yaml file as a string. shuffle: int, optional An integer specifying the shuffle index of the training dataset used for training the network. The default is 1. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). pbounds: dictionary of variables with ranges to crossvalidate. By default: pbounds = { 'pafthreshold': (0.05, 0.7), 'detectionthresholdsquare': (0, 0.9), 'minimalnumberofconnections': (1, # connections in your skeleton), } inferencecfg: dict, OPTIONAL For the variables that are *not* crossvalidated the parameters from inference_cfg.yaml are used, or you can overwrite them by passing a dictinary with your preferred parameters. edgewisecondition: bool, default True Estimates Euclidean distances for each skeleton edge and uses those distance for excluding possible connections. If false, uses only one distance for all bodyparts (which is obviously suboptimal). target: string, default='rpck_train' What metric to optimize. Options are pck/rpck/rmse on train/test set. init_points: int, optional (default=10) Number of random initial explorations. Probing random regions helps diversify the exploration space. Parameter from BayesianOptimization. n_iter: int, optional (default=20) Number of iterations of Bayesian optimization to perform. The larger it is, the higher the likelihood of finding a good extremum. Parameter from BayesianOptimization. dcorr: float, Distance thereshold for percent correct keypoints / relative percent correct keypoints (see paper). leastbpts: integer (should be a small number) If an animals has less or equal as many body parts in an image it will not be used for cross validation. Imagine e.g. if only a single bodypart is present, then if animals need a certain minimal number of bodyparts for assembly (minimalnumberofconnections), this might not be predictable. printingintermediatevalues: bool, default True If intermediate metrics RMSE/hits/.. per sample should be printed. Examples -------- first run evalute: deeplabcut.evaluate_network(path_config_file,Shuffles=[shuffle],plotting=True) Then e.g. for finding inference parameters to minimize rmse on test set: deeplabcut.evaluate_multianimal_crossvalidate(path_config_file,Shuffles=[shuffle],target='rmse_test') """ from deeplabcut.pose_estimation_tensorflow.lib import crossvalutils from deeplabcut.utils import auxfun_multianimal, auxiliaryfunctions from easydict import EasyDict as edict cfg = auxiliaryfunctions.read_config(config) trainFraction = cfg["TrainingFraction"][trainingsetindex] trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ), "df_with_missing", ) comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, "all") colors = visualization.get_cmap(len(comparisonbodyparts), name=cfg["colormap"]) # wild guesses for a wide range: maxconnections = len(cfg["skeleton"]) minconnections = 1 # len(cfg['multianimalbodyparts'])-1 _pbounds = { "pafthreshold": (0.05, 0.7), "detectionthresholdsquare": ( 0, 0.9, ), # TODO: set to minimum (from pose_cfg.yaml) "minimalnumberofconnections": (minconnections, maxconnections), } if pbounds is not None: _pbounds.update(pbounds) if "rpck" in target or "pck" in target: maximize = True if "rmse" in target: maximize = False # i.e. minimize for shuffle in Shuffles: evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetEvaluationFolder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True) datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg) _, trainIndices, testIndices, _ = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn)) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetModelFolder(trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction)) # Check which snapshots are available and sort them by # iterations Snapshots = np.array([ fn.split(".")[0] for fn in os.listdir(os.path.join(str(modelfolder), "train")) if "index" in fn ]) snapindex = -1 dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex]) # setting weights to corresponding snapshot. trainingsiterations = (dlc_cfg["init_weights"].split( os.sep)[-1]).split("-")[ -1] # read how many training siterations that corresponds to. DLCscorer, _ = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations, modelprefix=modelprefix) path_inference_config = Path( modelfolder) / "test" / "inference_cfg.yaml" if inferencecfg is None: # then load or initialize inferencecfg = auxfun_multianimal.read_inferencecfg( path_inference_config, cfg) else: inferencecfg = edict(inferencecfg) auxfun_multianimal.check_inferencecfg_sanity(cfg, inferencecfg) inferencecfg.topktoretain = np.inf inferencecfg, opt = crossvalutils.bayesian_search( config, inferencecfg, _pbounds, edgewisecondition=edgewisecondition, shuffle=shuffle, trainingsetindex=trainingsetindex, target=target, maximize=maximize, init_points=init_points, n_iter=n_iter, acq="ei", dcorr=dcorr, leastbpts=leastbpts, modelprefix=modelprefix, ) # update number of individuals to retain. inferencecfg.topktoretain = len( cfg["individuals"]) + 1 * (len(cfg["uniquebodyparts"]) > 0) # calculating result at best best solution DataOptParams, poses_gt, poses = crossvalutils.compute_crossval_metrics( config, inferencecfg, shuffle, trainingsetindex, modelprefix) path_inference_config = str(path_inference_config) # print("Quantification:", DataOptParams.head()) DataOptParams.to_hdf( path_inference_config.split(".yaml")[0] + ".h5", "df_with_missing", format="table", mode="w", ) DataOptParams.to_csv(path_inference_config.split(".yaml")[0] + ".csv") print("Saving optimal inference parameters...") print(DataOptParams.to_string()) auxiliaryfunctions.write_plainconfig(path_inference_config, dict(inferencecfg)) # Store best predictions max_indivs = max(pose.shape[0] for pose in poses) bpts = dlc_cfg["all_joints_names"] container = np.full((len(poses), max_indivs * len(bpts) * 3), np.nan) for n, pose in enumerate(poses): temp = pose.flatten() container[n, :len(temp)] = temp header = pd.MultiIndex.from_product( [ [DLCscorer], [f"individual{i}" for i in range(1, max_indivs + 1)], bpts, ["x", "y", "likelihood"], ], names=["scorer", "individuals", "bodyparts", "coords"], ) df = pd.DataFrame(container, columns=header) df.to_hdf(os.path.join(evaluationfolder, f"{DLCscorer}.h5"), key="df_with_missing") if plotting: foldername = os.path.join( str(evaluationfolder), "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex], ) auxiliaryfunctions.attempttomakefolder(foldername) for imageindex, imagename in tqdm(enumerate(Data.index)): image_path = os.path.join(cfg["project_path"], imagename) image = io.imread(image_path) frame = img_as_ubyte(skimage.color.gray2rgb(image)) groundtruthcoordinates = poses_gt[imageindex] coords_pred = poses[imageindex][:, :, :2] probs_pred = poses[imageindex][:, :, -1:] fig = visualization.make_multianimal_labeled_image( frame, groundtruthcoordinates, coords_pred, probs_pred, colors, cfg["dotsize"], cfg["alphavalue"], cfg["pcutoff"], ) visualization.save_labeled_frame(fig, image_path, foldername, imageindex in trainIndices)
def calculatepafdistancebounds(config, shuffle=0, trainingsetindex=0, modelprefix="", numdigits=0, onlytrain=False): """ Returns distances along paf edges in train/test data ---------- config : string Full path of the config.yaml file as a string. shuffle: integer integers specifying shuffle index of the training dataset. The default is 0. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This variable can also be set to "all". numdigits: number of digits to round for distances. """ import os from deeplabcut.utils import auxiliaryfunctions, auxfun_multianimal from deeplabcut.pose_estimation_tensorflow.config import load_config # Read file path for pose_config file. >> pass it on cfg = auxiliaryfunctions.read_config(config) if cfg["multianimalproject"]: ( individuals, uniquebodyparts, multianimalbodyparts, ) = auxfun_multianimal.extractindividualsandbodyparts(cfg) # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) trainFraction = cfg["TrainingFraction"][trainingsetindex] datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetModelFolder(trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) # Load meta data & annotations ( data, trainIndices, testIndices, trainFraction, ) = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn)) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ))[cfg["scorer"]] path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" dlc_cfg = load_config(str(path_test_config)) # get the graph! partaffinityfield_graph = dlc_cfg["partaffinityfield_graph"] jointnames = [ dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"])) ] path_inferencebounds_config = (Path(modelfolder) / "test" / "inferencebounds.yaml") inferenceboundscfg = {} for pi, edge in enumerate(partaffinityfield_graph): j1, j2 = jointnames[edge[0]], jointnames[edge[1]] ds_within = [] ds_across = [] for ind in individuals: for ind2 in individuals: if ind != "single" and ind2 != "single": if (ind, j1, "x") in Data.keys() and ( ind2, j2, "y", ) in Data.keys(): distances = (np.sqrt( (Data[ind, j1, "x"] - Data[ind2, j2, "x"])**2 + (Data[ind, j1, "y"] - Data[ind2, j2, "y"])**2) / dlc_cfg["stride"]) else: distances = None if distances is not None: if onlytrain: distances = distances.iloc[trainIndices] if ind == ind2: ds_within.extend(distances.values.flatten()) else: ds_across.extend(distances.values.flatten()) edgeencoding = str(edge[0]) + "_" + str(edge[1]) inferenceboundscfg[edgeencoding] = {} if len(ds_within) > 0: inferenceboundscfg[edgeencoding]["intra_max"] = str( round(np.nanmax(ds_within), numdigits)) inferenceboundscfg[edgeencoding]["intra_min"] = str( round(np.nanmin(ds_within), numdigits)) else: inferenceboundscfg[edgeencoding]["intra_max"] = str( 1e5) # large number (larger than any image diameter) inferenceboundscfg[edgeencoding]["intra_min"] = str(0) # NOTE: the inter-animal distances are currently not used, but are interesting to compare to intra_* if len(ds_across) > 0: inferenceboundscfg[edgeencoding]["inter_max"] = str( round(np.nanmax(ds_across), numdigits)) inferenceboundscfg[edgeencoding]["inter_min"] = str( round(np.nanmin(ds_across), numdigits)) else: inferenceboundscfg[edgeencoding]["inter_max"] = str( 1e5 ) # large number (larger than image diameters in typical experiments) inferenceboundscfg[edgeencoding]["inter_min"] = str(0) auxiliaryfunctions.write_plainconfig(str(path_inferencebounds_config), dict(inferenceboundscfg)) return inferenceboundscfg else: print("You might as well bring owls to Athens.") return {}
def bayesian_search( config_path, inferencecfg, pbounds, edgewisecondition=True, shuffle=1, trainingsetindex=0, modelprefix="", snapshotindex=-1, target="rpck_test", maximize=True, init_points=20, n_iter=50, acq="ei", log_file=None, dcorr=5, leastbpts=3, printingintermediatevalues=True, ): # if "rpck" in target: assert maximize == True if "rmse" in target: assert maximize == False cfg = auxiliaryfunctions.read_config(config_path) evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetEvaluationFolder( cfg["TrainingFraction"][int(trainingsetindex)], shuffle, cfg, modelprefix=modelprefix, )), ) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, cfg["TrainingFraction"][int(trainingsetindex)], cfg["iteration"], modelprefix=modelprefix, ) # load params fns = return_evaluate_network_data( config_path, shuffle=shuffle, trainingsetindex=trainingsetindex, modelprefix=modelprefix, ) predictionsfn = fns[snapshotindex] data, metadata = auxfun_multianimal.LoadFullMultiAnimalData(predictionsfn) params = set_up_evaluation(data) columns = ["train_iter", "train_frac", "shuffle"] columns += [ "_".join((b, a)) for a in ("train", "test") for b in ("rmse", "hits", "misses", "falsepos", "ndetects", "pck", "rpck") ] train_iter = trainingsetindex # int(predictionsfn.split('-')[-1].split('.')[0]) train_frac = cfg["TrainingFraction"][ train_iter] # int(predictionsfn.split('trainset')[1].split('shuffle')[0]) trainIndices = metadata["data"]["trainIndices"] testIndices = metadata["data"]["testIndices"] if edgewisecondition: mf = str( auxiliaryfunctions.GetModelFolder( cfg["TrainingFraction"][int(trainingsetindex)], shuffle, cfg, modelprefix=modelprefix, )) modelfolder = os.path.join(cfg["project_path"], mf) path_inferencebounds_config = (Path(modelfolder) / "test" / "inferencebounds.yaml") try: inferenceboundscfg = auxiliaryfunctions.read_plainconfig( path_inferencebounds_config) except FileNotFoundError: print("Computing distances...") from deeplabcut.pose_estimation_tensorflow import calculatepafdistancebounds inferenceboundscfg = calculatepafdistancebounds( config_path, shuffle, trainingsetindex) auxiliaryfunctions.write_plainconfig(path_inferencebounds_config, inferenceboundscfg) partaffinityfield_graph = params["paf_graph"] upperbound = np.array([ float(inferenceboundscfg[str(edge[0]) + "_" + str(edge[1])]["intra_max"]) for edge in partaffinityfield_graph ]) lowerbound = np.array([ float(inferenceboundscfg[str(edge[0]) + "_" + str(edge[1])]["intra_min"]) for edge in partaffinityfield_graph ]) upperbound *= inferencecfg["upperbound_factor"] lowerbound *= inferencecfg["lowerbound_factor"] else: lowerbound = None upperbound = None def dlc_hyperparams(**kwargs): inferencecfg.update(kwargs) # Ensure type consistency for k, (bound, _) in pbounds.items(): inferencecfg[k] = type(bound)(inferencecfg[k]) stats = compute_crossval_metrics_preloadeddata( params, columns, inferencecfg, data, trainIndices, testIndices, train_iter, train_frac, shuffle, lowerbound, upperbound, dcorr=dcorr, leastbpts=leastbpts, ) # stats = compute_crossval_metrics(config_path, inferencecfg, shuffle,trainingsetindex, # dcorr=dcorr,leastbpts=leastbpts,modelprefix=modelprefix) if printingintermediatevalues: print( "rpck", stats["rpck_test"].values[0], "rpck train:", stats["rpck_train"].values[0], ) print( "rmse", stats["rmse_test"].values[0], "miss", stats["misses_test"].values[0], "hit", stats["hits_test"].values[0], ) # val = stats['rmse_test'].values[0]*(1+stats['misses_test'].values[0]*1./stats['hits_test'].values[0]) val = stats[target].values[0] if np.isnan(val): if maximize: # pck case val = -1e9 # random small number else: # RMSE, return a large RMSE val = 1e9 if not maximize: val = -val return val opt = BayesianOptimization(f=dlc_hyperparams, pbounds=pbounds, random_state=42) if log_file: load_logs(opt, log_file) logger = JSONLogger(path=os.path.join(evaluationfolder, "opti_log" + DLCscorer + ".json")) opt.subscribe(Events.OPTIMIZATION_STEP, logger) opt.maximize(init_points=init_points, n_iter=n_iter, acq=acq) inferencecfg.update(opt.max["params"]) for k, (bound, _) in pbounds.items(): tmp = type(bound)(inferencecfg[k]) if isinstance(tmp, np.floating): tmp = np.round(tmp, 2).item() inferencecfg[k] = tmp return inferencecfg, opt
def UpdateTrain_pose_yaml(dict_train, dict2change, saveasfile): for key in dict2change.keys(): dict_train[key] = dict2change[key] auxiliaryfunctions.write_plainconfig(saveasfile, dict_train)