def convertannotationdata_fromwindows2unixstyle(config, userfeedback=True, win2linux=True): """ Converts paths in annotation file (CollectedData_*user*.h5) in labeled-data/videofolder1, etc. from windows to linux format. This is important when one e.g. labeling on Windows, but wants to re-label/check_labels/ on a Linux computer (and vice versa). Note for training data annotated on Windows in Linux this is not necessary, as the data gets converted during training set creation. config : string Full path of the config.yaml file as a string. userfeedback: bool, optional If true the user will be asked specifically for each folder in labeled-data if the containing csv shall be converted to hdf format. win2linux: bool, optional. By default converts from windows to linux. If false, converts from unix to windows. """ cfg = auxiliaryfunctions.read_config(config) folders = [ Path(config).parent / "labeled-data" / trainingsetmanipulation._robust_path_split(vid)[1] for vid in cfg["video_sets"] ] for folder in folders: if userfeedback: print("Do you want to convert the annotationdata in folder:", folder, "?") askuser = input("yes/no") else: askuser = "******" if askuser == "y" or askuser == "yes" or askuser == "Ja" or askuser == "ha": fn = os.path.join(str(folder), "CollectedData_" + cfg["scorer"]) if os.path.exists(fn + ".h5"): Data = pd.read_hdf(fn + ".h5") if win2linux: convertpaths_to_unixstyle(Data, fn) else: convertpaths_to_windowsstyle(Data, fn) else: warnings.warn(f"Could not find '{fn+'.h5'}'. skipping")
def convert_cropped_to_standard_dataset( config_path, recreate_datasets=True, delete_crops=True, back_up=True, ): import pandas as pd import pickle import shutil from deeplabcut.generate_training_dataset import trainingsetmanipulation from deeplabcut.utils import read_plainconfig, write_config cfg = auxiliaryfunctions.read_config(config_path) videos_orig = cfg.pop("video_sets_original") is_cropped = cfg.pop("croppedtraining") if videos_orig is None or not is_cropped: print("Labeled data do not appear to be cropped. " "Project will remain unchanged...") return project_path = cfg["project_path"] if back_up: print("Backing up project...") shutil.copytree(project_path, project_path + "_bak", symlinks=True) if delete_crops: print("Deleting crops...") data_path = os.path.join(project_path, "labeled-data") for video in cfg["video_sets"]: _, filename, _ = trainingsetmanipulation._robust_path_split(video) if "_cropped" in video: # One can never be too safe... shutil.rmtree(os.path.join(data_path, filename), ignore_errors=True) cfg["video_sets"] = videos_orig write_config(config_path, cfg) if not recreate_datasets: return datasets_folder = os.path.join( project_path, auxiliaryfunctions.GetTrainingSetFolder(cfg), ) df_old = pd.read_hdf( os.path.join(datasets_folder, "CollectedData_" + cfg["scorer"] + ".h5"), ) def strip_cropped_image_name(path): head, filename = os.path.split(path) head = head.replace("_cropped", "") file, ext = filename.split(".") file = file.split("c")[0] return os.path.join(head, file + "." + ext) img_names_old = np.asarray( [strip_cropped_image_name(img) for img in df_old.index.to_list()]) df = merge_annotateddatasets(cfg, datasets_folder) img_names = df.index.to_numpy() train_idx = [] test_idx = [] pickle_files = [] for filename in os.listdir(datasets_folder): if filename.endswith("pickle"): pickle_file = os.path.join(datasets_folder, filename) pickle_files.append(pickle_file) if filename.startswith("Docu"): with open(pickle_file, "rb") as f: _, train_inds, test_inds, train_frac = pickle.load(f) train_inds_temp = np.flatnonzero( np.isin(img_names, img_names_old[train_inds])) test_inds_temp = np.flatnonzero( np.isin(img_names, img_names_old[test_inds])) train_inds, test_inds = pad_train_test_indices( train_inds_temp, test_inds_temp, train_frac) train_idx.append(train_inds) test_idx.append(test_inds) # Search a pose_config.yaml file to parse missing information pose_config_path = "" for dirpath, _, filenames in os.walk( os.path.join(project_path, "dlc-models")): for file in filenames: if file.endswith("pose_cfg.yaml"): pose_config_path = os.path.join(dirpath, file) break pose_cfg = read_plainconfig(pose_config_path) net_type = pose_cfg["net_type"] if net_type == "resnet_50" and pose_cfg.get("multi_stage", False): net_type = "dlcrnet_ms5" # Clean the training-datasets folder prior to recreating the data pickles shuffle_inds = set() for file in pickle_files: os.remove(file) shuffle_inds.add(int(re.findall(r"shuffle(\d+)", file)[0])) create_multianimaltraining_dataset( config_path, trainIndices=train_idx, testIndices=test_idx, Shuffles=sorted(shuffle_inds), net_type=net_type, paf_graph=pose_cfg["partaffinityfield_graph"], crop_size=pose_cfg.get("crop_size", [400, 400]), crop_sampling=pose_cfg.get("crop_sampling", "hybrid"), )
def convert2_maDLC(config, userfeedback=True, forceindividual=None): """ Converts single animal annotation file into a multianimal annotation file, by introducing an individuals column with either the first individual in individuals list in config.yaml or whatever is passsed via "forceindividual". ---------- config : string Full path of the config.yaml file as a string. userfeedback: bool, optional If this is set to false during automatic mode then frames for all videos are extracted. The user can set this to true, which will result in a dialog, where the user is asked for each video if (additional/any) frames from this video should be extracted. Use this, e.g. if you have already labeled some folders and want to extract data for new videos. forceindividual: None default If a string is given that is used in the individuals column. Examples -------- Converts mulianimalbodyparts under the 'first individual' in individuals list in config.yaml and uniquebodyparts under 'single' >>> deeplabcut.convert2_maDLC('/socialrearing-task/config.yaml') -------- Converts mulianimalbodyparts under the individual label mus17 and uniquebodyparts under 'single' >>> deeplabcut.convert2_maDLC('/socialrearing-task/config.yaml', forceindividual='mus17') """ cfg = auxiliaryfunctions.read_config(config) videos = cfg["video_sets"].keys() video_names = [ trainingsetmanipulation._robust_path_split(i)[1] for i in videos ] folders = [ Path(config).parent / "labeled-data" / Path(i) for i in video_names ] individuals, uniquebodyparts, multianimalbodyparts = extractindividualsandbodyparts( cfg) if forceindividual is None: if len(individuals) == 0: print("At least one individual should exist...") folders = [] forceindividual = "" else: forceindividual = individuals[ 0] # note that single is added at then end! if forceindividual == "single": # no specific individual () if len(multianimalbodyparts ) > 0: # there should be an individual name... print( "At least one individual should exist beyond 'single', as there are multianimalbodyparts..." ) folders = [] for folder in folders: if userfeedback == True: print("Do you want to convert the annotation file in folder:", folder, "?") askuser = input("yes/no") else: askuser = "******" if (askuser == "y" or askuser == "yes" or askuser == "Ja" or askuser == "ha"): # multilanguage support :) fn = os.path.join(str(folder), "CollectedData_" + cfg["scorer"]) Data = pd.read_hdf(fn + ".h5") imindex = Data.index print("This is a single animal data set, converting to multi...", folder) # -> adding (single,bpt) for uniquebodyparts for j, bpt in enumerate(uniquebodyparts): index = pd.MultiIndex.from_arrays( np.array([ 2 * [cfg["scorer"]], 2 * ["single"], 2 * [bpt], ["x", "y"] ]), names=["scorer", "individuals", "bodyparts", "coords"], ) if bpt in Data[cfg["scorer"]].keys(): frame = pd.DataFrame(Data[cfg["scorer"]][bpt].values, columns=index, index=imindex) else: frame = pd.DataFrame( np.ones((len(imindex), 2)) * np.nan, columns=index, index=imindex, ) if j == 0: dataFrame = frame else: dataFrame = pd.concat([dataFrame, frame], axis=1) if len(uniquebodyparts) == 0: dataFrame = None # -> adding (indivdual,bpt) for multianimalbodyparts for j, bpt in enumerate(multianimalbodyparts): index = pd.MultiIndex.from_arrays( np.array([ 2 * [cfg["scorer"]], 2 * [str(forceindividual)], 2 * [bpt], ["x", "y"], ]), names=["scorer", "individuals", "bodyparts", "coords"], ) if bpt in Data[cfg["scorer"]].keys(): frame = pd.DataFrame(Data[cfg["scorer"]][bpt].values, columns=index, index=imindex) else: frame = pd.DataFrame( np.ones((len(imindex), 2)) * np.nan, columns=index, index=imindex, ) if j == 0 and dataFrame is None: dataFrame = frame else: dataFrame = pd.concat([dataFrame, frame], axis=1) Data.to_hdf(fn + "singleanimal.h5", "df_with_missing", format="table", mode="w") Data.to_csv(fn + "singleanimal.csv") dataFrame.to_hdf(fn + ".h5", "df_with_missing", format="table", mode="w") dataFrame.to_csv(fn + ".csv")