def save_checkpoint(model, path, optimizer=None, metadata=None): """ Save checkpoint to file. The checkpoint will have 3 fields: ``metadata``, ``state_dict`` and ``optimizer``. Args: model (Module): module whose params are to be saved. path (str): path to save the checkpoint file. optimizer ('Optimizer', optional): optimizer to be saved. metadata (dict, optional): metadata to be saved in checkpoint. """ assert isinstance(metadata, (dict, type(None))) if metadata is None: metadata = {} mv.mkdirs(mv.parentdir(path)) # if wrapped by nn.DataParallel, remove the wrapper if hasattr(model, 'module'): model = model.module # make a checkpoint checkpoint = {'state_dict': _weights_to_cpu(model.state_dict())} if optimizer is not None: checkpoint['optimizer'] = optimizer.state_dict() if metadata is not None: checkpoint['metadata'] = metadata torch.save(checkpoint, path)
def init_logging(log_dir=None, config_file=None): if log_dir is None: log_dir = os.getcwd() if config_file is None: config_file = mv.joinpath(mv.parentdir(mv.parentdir(__file__)), 'configs/default_log_config.yaml') with open(config_file, 'rt') as f: config = yaml.safe_load(f.read()) config['handlers']['info_file_handler']['filename'] = \ mv.joinpath(log_dir, 'info.log') config['handlers']['error_file_handler']['filename'] = \ mv.joinpath(log_dir, 'error.log') mv.mkdirs(log_dir) logging.config.dictConfig(config)
def split_dsmd_file(dsmd_filepath, datasplit=None, shuffle=True, suffix='.csv'): """ Split a dataset medadata file into 3 parts. Split a dataset metadata file into 'train.csv', 'val.csv' and 'test.csv'. And put them in the same directory with specified dsmd file. dsmd_filepath (str): file path of dataset metadata. datasplit (dict[str, float]): how to split the dataset. e.g. {'train': 0.9, 'val': 0.1, 'test': 0.0} shuffle (bool): whether to shuffle the dataset before splitting. Note: 0.0 < datasplit['train'] + datasplit['val'] + datasplit['test'] <= 1.0 If there's no image in a split. The corresponding dsmd file will not be saved. """ if datasplit is None: datasplit = {'train': 0.9, 'val': 0.1} dsmd_dir = mv.parentdir(dsmd_filepath) dsmd = mv.load_dsmd(dsmd_filepath) num_total = len(dsmd) keys = list(dsmd.keys()) if shuffle: random.shuffle(keys) sum_ratio = 0.0 splits = {} for mode, ratio in datasplit.items(): file_path = mv.joinpath(dsmd_dir, mode + suffix) splits[file_path] = int(num_total * ratio) sum_ratio += ratio assert 0.0 < sum_ratio <= 1.0 start_index = 0 for file_path, num_cur_split in splits.items(): end_index = start_index + num_cur_split start_index = np.clip(start_index, 0, num_total) end_index = np.clip(end_index, 0, num_total) keys_split = keys[start_index:end_index] keys_split = natsorted(keys_split) dsmd_split = {keys: dsmd[keys] for keys in keys_split} if len(dsmd_split) != 0: mv.save_dsmd(file_path, dsmd_split) mv.save_dsmd(file_path, dsmd_split) start_index = end_index
def save_cls_dsmd(dsmd_path, data, auto_mkdirs=True): if auto_mkdirs: mv.mkdirs(mv.parentdir(dsmd_path)) dsmd = mv.make_dsmd(data) with open(dsmd_path, 'w') as fd: for key, value in dsmd.items(): if mv.isarrayinstance(value): # handle multi-label case value = ','.join([str(entry) for entry in value]) line = '%s,%s\n' % (str(key), str(value)) fd.write(line)
def save_dsmd(dsmd, file_path, auto_mkdirs=True): """ Save dataset metadata to specified file. Args: dsmd (dict): dataset metadata. file_path (str): file path to save dataset metadata. auto_mkdirs (bool): If the parent folder of `file_path` does not exist, whether to create it automatically. """ if auto_mkdirs: mv.mkdirs(mv.parentdir(file_path)) ordered_dsmd = collections.OrderedDict(natsorted(dsmd.items(), key=lambda t: t[0])) with open(file_path, 'w') as fd: for key, value in ordered_dsmd.items(): if mv.isarrayinstance(value): # for multi label case value = ', '.join([str(entry) for entry in value]) line = '%s, %s\n' % (str(key), str(value)) fd.write(line)
def load_rws_contour(filepath): with open(filepath, 'r', encoding='utf-8') as f: data = json.load(f) # relative path from label file to relative path from cwd image_path = mv.joinpath(mv.parentdir(filepath), data['imagePath']) height = data.get('imageHeight') width = data.get('imageWidth') shapes = [] for s in data['shapes']: shape = ( s['label'], s['points'], ) shapes.append(shape) return { 'height': height, 'width': width, 'image_path': image_path, 'shapes': shapes }
def imwrite(file_path, img, auto_mkdirs=True): """ Save image to specified file. Args: file_path (str): specified file path to save to. img (ndarray): image array to be written. auto_mkdirs (bool): If the parent folder of `file_path` does not exist, whether to create it automatically. Returns: (bool): returns whether the image is saved successfully. Note: If the given image is a color image. It should be in RGB format. """ if auto_mkdirs: mv.mkdirs(mv.parentdir(file_path)) if img.ndim == 3: img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) return cv2.imwrite(file_path, img)
def save_det_dsmd(dsmd_path, data, class2label, auto_mkdirs=True): """ Save dataset metadata to specified file. Args: dsmd_path (str): file path to save dataset metadata. data (dict): dataset metadata, refer to 'load_dsmd'. class2label (str or dict): class-to-label file or class2label dict. auto_mkdirs (bool): If the parent folder of `file_path` does not exist, whether to create it automatically. """ if auto_mkdirs: mv.mkdirs(mv.parentdir(dsmd_path)) # get label->class mapping if isinstance(class2label, str): class2label = mv.load_c2l(class2label) label2class = {value: key for key, value in class2label.items()} # write dataset metadata loop dsmd = mv.make_dsmd(data) with open(dsmd_path, 'w') as fd: for key, value in dsmd.items(): _write_record(fd, key, value, label2class)
def gen_cls_ds_from_datafolder( in_dir, out_dir, auto_mkdirs=True, classnames=None): """ Generate classification dataset from DataFolder. This function will make a copy of each image in the DataFolder to the specified directory. Original DataFolder is left unchanged. Args: in_dir (str): DataFolder root directory. out_dir (str): directory to save all the images in DataFolder. auto_mkdirs (bool): If `out_dir` does not exist, whether to create it automatically. classnames (list[str]): names of specified classes to be collected. If not given, all classes are considered. Note: This function is expected to be used together with gen_cls_dsmd_file_from_datafolder(). Filename of each image in DataFolder should be unique. Otherwise, A FileExistsError will be thrown. DataFolder is described in 'gen_cls_dsmd_file_from_datafolder()'. """ assert mv.isdir(in_dir) # clean output directory if auto_mkdirs: mv.mkdirs(mv.parentdir(out_dir)) mv.empty_dir(out_dir) if classnames is None: classnames = mv.listdir(in_dir) for classname in classnames: class_dir = mv.joinpath(in_dir, classname) assert mv.isdir(class_dir) filenames = natsorted(mv.listdir(class_dir)) mv.copyfiles(filenames, out_dir, class_dir, non_overwrite=True)
import math import numpy as np import torch import medvision as mv import pytest DATA_DIR = mv.joinpath(mv.parentdir(__file__), 'data') PNG_IMG_PATH = mv.joinpath(DATA_DIR, 'pngs', 'Blue-Ogi.png') IM_GRAY = mv.imread(PNG_IMG_PATH, mv.ImreadMode.GRAY) IM_RGB = mv.imread(PNG_IMG_PATH) def assert_image_equal(a, b): assert a.shape == b.shape assert a.dtype == b.dtype diff = np.abs(a.astype(np.float32) - b.astype(np.float32)) assert math.isclose(diff.max(), 0.0) @pytest.mark.parametrize('img', [IM_GRAY, IM_RGB]) def test_make_np(img): tensor = torch.from_numpy(img) img_converted = mv.make_np(tensor) assert_image_equal(img, img_converted) @mv.nograd def test_no_grad(): x = torch.zeros(1, requires_grad=True) y = x * 2 assert y.requires_grad is False