示例#1
0
def get_records_from_datadir(datadir: Union[str, bytes, os.PathLike]) -> dict:
    """ Gets a dictionary of record dictionaries from a data directory

    Parameters
    ----------
    datadir: str, bytes, os.PathLike

    Returns
    -------
    records: dict
        e.g.
        {'animal0':
            {'rgb': /path/to/DATA/animal0/rgb_video.mp4,
             'label': /path/to/DATA/animal0/labels.csv,
             'output': /path/to/DATA/animal0/outputs.h5,
             'stats': /path/to/DATA/animal0/stats.yaml
             }
        'animal1': {...}
        ...
        }
    """
    assert os.path.isdir(datadir), 'datadir does not exist: {}'.format(datadir)
    subdirs = get_subfiles(datadir, return_type='directory')
    records = {}
    for subdir in subdirs:
        parsed_record = get_record_from_subdir(os.path.join(datadir, subdir))
        records[parsed_record['key']] = parsed_record
    write_all_records(datadir)
    return records
示例#2
0
def find_flowfiles(root: Union[str, bytes, os.PathLike]) -> list:
    """ DEPRECATED.

    Args:
        root ():

    Returns:

    """
    files = get_subfiles(root, return_type='any')
    endings = [os.path.splitext(i)[1] for i in files]
    valid_endings = ['.avi', '.mp4', '.h5']
    movies = [files[i] for i in range(len(files)) if endings[i] in valid_endings
              and 'flow' in os.path.basename(files[i])]
    framedirs = [i for i in get_subfiles(root, return_type='directory') if 'frame' in i
                 and 'flow' in os.path.basename(i)]
    return movies + framedirs
示例#3
0
def check_if_annotations_exist(annotation_paths):
    all_correct = True
    for split, directory in annotation_paths.items():
        subfiles = utils.get_subfiles(directory, 'file')
        txts = [i for i in subfiles if i.endswith('.txt')]
        if len(txts) < 21:
            all_correct = False
            break
    return all_correct
示例#4
0
def main(cfg: DictConfig):
    # turn "models" in your project configuration to "full/path/to/models"
    cfg = utils.get_absolute_paths_from_cfg(cfg)
    log.info('configuration used: ')
    log.info(cfg.pretty())

    weights = projects.get_weightfile_from_cfg(cfg, model_type='sequence')
    assert weights is not None, 'Must either specify a weightfile or use reload.latest=True'

    if cfg.sequence.latent_name is None:
        # find the latent name used in the weight file you loaded
        rundir = os.path.dirname(weights)
        loaded_cfg = utils.load_yaml(os.path.join(rundir, 'config.yaml'))
        latent_name = loaded_cfg['sequence']['latent_name']
        # if this latent name is also None, use the arch of the feature extractor
        # this should never happen
        if latent_name is None:
            latent_name = loaded_cfg['feature_extractor']['arch']
    else:
        latent_name = cfg.sequence.latent_name

    # the output name will be a group in the output hdf5 dataset containing probabilities, etc
    if cfg.sequence.output_name is None:
        output_name = cfg.sequence.arch
    else:
        output_name = cfg.sequence.output_name
    directory_list = cfg.inference.directory_list
    if directory_list is None or len(directory_list) == 0:
        raise ValueError('must pass list of directories from commmand line. '
                         'Ex: directory_list=[path_to_dir1,path_to_dir2] or directory_list=all')
    elif type(directory_list) == str and directory_list == 'all':
        basedir = cfg.project.data_path
        directory_list = utils.get_subfiles(basedir, 'directory')

    outputfiles = []
    for directory in directory_list:
        assert os.path.isdir(directory), 'Not a directory: {}'.format(directory)
        record = projects.get_record_from_subdir(directory)
        assert record['output'] is not None
        outputfiles.append(record['output'])


    model = build_model_from_cfg(cfg, 1024, len(cfg.project.class_names))
    log.info('model: {}'.format(model))


    model = utils.load_weights(model, weights)
    metrics_file = os.path.join(os.path.dirname(weights), 'classification_metrics.h5')
    with h5py.File(metrics_file, 'r') as f:
        thresholds = f['threshold_curves']['val']['optimum'][:]
        log.info('thresholds: {}'.format(thresholds))
    device = 'cuda:{}'.format(cfg.compute.gpu_id)
    class_names = cfg.project.class_names
    class_names = np.array(class_names)
    extract(model, outputfiles, thresholds, cfg.feature_extractor.final_activation, latent_name, output_name,
            cfg.sequence.sequence_length, None, True, device, cfg.inference.ignore_error,
            cfg.inference.overwrite, class_names=class_names)
示例#5
0
def find_rgbfiles(root: Union[str, bytes, os.PathLike]) -> list:
    """Finds all possible RGB video files in a deepethogram data directory

    Args:
        root (str, pathlike): deepethogram data directory

    Returns:
        list of absolute paths to RGB videos, or subdirectories containing individual images (framedirs)
    """
    files = get_subfiles(root, return_type='any')
    endings = [os.path.splitext(i)[1] for i in files]
    valid_endings = ['.avi', '.mp4', '.h5']
    excluded = ['flow', 'label', 'output', 'score']
    movies = [i for i in files if os.path.splitext(i)[1] in valid_endings]
    movies = exclude_strings_from_filelist(movies, excluded)

    framedirs = get_subfiles(root, return_type='directory')
    framedirs = exclude_strings_from_filelist(framedirs, excluded)
    return movies + framedirs
示例#6
0
def clean_deg_directory(testing_directory):
    model_subfiles = utils.get_subfiles(
        os.path.join(testing_directory, 'models'))

    for subfile in model_subfiles:
        if os.path.isfile(subfile):
            os.remove(subfile)
        elif os.path.isdir(subfile):
            shutil.rmtree(subfile)

    datadir = os.path.join(testing_directory, 'DATA')
    subfiles = utils.get_subfiles(datadir)
    for subfile in subfiles:
        if os.path.isdir(subfile):
            clean_subdir(subfile)
        else:
            if os.path.basename(subfile) == 'split.yaml':
                pass
            else:
                os.remove(subfile)
示例#7
0
def find_outputfiles(root: Union[str, bytes, os.PathLike]) -> list:
    """ Finds deepethogram outputfiles, containing RGB and flow features, along with P(K)

    Args:
        root (str, pathlike): deepethogram data directory

    Returns:
        list of outputfiles. should only have one element
    """
    files = get_subfiles(root, return_type='file')
    files = [i for i in files if 'output' in os.path.basename(i) and os.path.splitext(i)[1] == '.h5']
    return files
示例#8
0
def find_labelfiles(root: Union[str, bytes, os.PathLike]) -> list:
    """ Gets label files from a deepethogram data directory

    Args:
        root (str, pathlike): directory containing labels, movies, etc

    Returns:
        files: list of score or label files
    """
    files = get_subfiles(root, return_type='file')
    files = [i for i in files if 'label' in os.path.basename(i) or 'score' in os.path.basename(i)]
    return files
示例#9
0
def find_statsfiles(root: Union[str, bytes, os.PathLike]) -> list:
    """ Finds normalization statistics in deepethogram data directory

    Args:
        root (str, pathlike)
            deepethogram data directory

    Returns:
        list of stats files, should only have 1 or 0 elements
    """
    files = get_subfiles(root, return_type='file')
    files = [i for i in files if 'stats' in os.path.basename(i) and os.path.splitext(i)[1] == '.yaml']
    return files
示例#10
0
def test_feature_extraction():
    # the reason for this complexity is that I don't want to run inference on all directories
    string = (f'python -m deepethogram.feature_extractor.inference preset=deg_f reload.latest=True ')
    datadir = os.path.join(testing_directory, 'DATA')
    subdirs = utils.get_subfiles(datadir, 'directory')
    np.random.seed(42)
    subdirs = np.random.choice(subdirs, size=100, replace=False)
    dir_string = ','.join([str(i) for i in subdirs])
    dir_string = '[' + dir_string + ']'
    string += f'inference.directory_list={dir_string} inference.overwrite=True '
    string = add_default_arguments(string, train=False)
    command = command_from_string(string)
    ret = subprocess.run(command)
    assert ret.returncode == 0
示例#11
0
def clean_subdir(subdir):
    files = utils.get_subfiles(subdir)

    for file in files:
        basename = os.path.basename(file)
        if basename == 'record.yaml':
            pass
        elif basename == 'stats.yaml':
            pass
        elif basename.startswith('video') and basename.endswith('.csv'):
            pass
        elif basename.startswith('video') and basename.endswith('.mp4'):
            pass
        else:
            os.remove(file)
示例#12
0
def check_if_videos_exist(video_paths, n_videos):
    all_correct = True
    filelists = read_lists_for_verification()
    for split in ['val', 'test']:
        directory = video_paths[split]
        subfiles = utils.get_subfiles(directory, return_type='file')
        assert len(subfiles) >= n_videos[split]

        basenames = [
            os.path.splitext(os.path.basename(i))[0] for i in subfiles
        ]

        for file_to_check in filelists[split]:
            if file_to_check not in basenames:
                all_correct = False
                break
    return all_correct
示例#13
0
def write_all_records(root: Union[str, bytes, os.PathLike], preference: list = None):
    """ For a given data directory, finds all subdirs and their files. Saves their records as .yaml files

    Parameters
    ----------
    root: str, bytes, os.PathLike
        data directory. e.g. '/path/to/DATA', which contains 'animal0, animal1, animal2'
    preference: list
        list of filetype preferences. see parse_subdir

    Returns
    -------
    None
    """
    subdirs = get_subfiles(root, return_type='directory')
    for subdir in subdirs:
        record = parse_subdir(subdir, preference=preference)
        outfile = os.path.join(subdir, 'record.yaml')
        utils.save_dict_to_yaml(record, outfile)
示例#14
0
def get_weights_from_model_path(model_path: Union[str, os.PathLike]) -> dict:
    """ Finds absolute path to weight files for each model type and architecture

    Parameters
    ----------
    model_path: str, os.PathLike
        /path/to/models

    Returns
    -------
    model_weights: dict of dicts of lists
        Easiest to understand by example
        {'flow_generator':
            {'TinyMotionNet': [/path/to/oldest/tinymotionnet_checkpoint.pt, path_to_newest/tinymotionnet_checkpoint.pt],
            'MotionNet': [path/to/oldest/motionnet_checkpoint.pt, path/to/newest/motionnet_checkpoint.pt],
            'TinyMotionNet3D': ...
            },
        'feature_extractor':
            {'resnet18': [path/to/oldest/resnet18.checkpoint, path/to/newest/resnet18.checkpoint],
            'resnet50': ...
            }
        'sequence:
            {tgmj: ...
            }
        }
    """
    rundirs = get_subfiles(model_path, return_type='directory')
    # assume the models are only at most one sub directory underneath
    for rundir in rundirs:
        subdirs = get_subfiles(rundir, return_type='directory')
        rundirs += subdirs
    rundirs.sort()

    # model_weights = defaultdict(list)
    model_weights = {'flow_generator': {},
                     'feature_extractor': {},
                     'sequence': {}}
    for rundir in rundirs:
        # for backwards compatibility
        paramfile = os.path.join(rundir, 'hyperparameters.yaml')
        if not os.path.isfile(paramfile):
            paramfile = os.path.join(rundir, 'config.yaml')

            if not os.path.isfile(paramfile):
                continue
        params = utils.load_yaml(paramfile)

        # this horrible if-else tree is for backwards compatability with how I used to save config files
        if 'model' in params.keys():
            model_type = params['model']
            if params['model'] in params.keys():
                arch = params[params['model']]
            elif params['model'] == 'feature_extractor':
                arch = params['classifier']
            elif 'arch' in params.keys():
                arch = params['arch']
            else:
                raise ValueError('Could not find architecture from config: {}'.format(params))

        elif 'run' in params.keys():
            model_type = params['run']['model']
            arch = params[model_type]['arch']
        else:
            continue

        # architecture = params[model_type]['arch']

        weightfile = os.path.join(rundir, 'checkpoint.pt')
        if os.path.isfile(weightfile):
            if arch in model_weights[model_type].keys():
                model_weights[model_type][arch].append(weightfile)
            else:
                model_weights[model_type][arch] = [weightfile]
            # model_weights[model_type].append(weightfile)
            # model_weights[model_type][arch].append(weightfile)
    for model in model_weights.keys():
        for arch, runlist in model_weights[model].items():
            model_weights[model][arch] = sort_runs_by_date(runlist)
    return model_weights
示例#15
0
def feature_extractor_inference(cfg: DictConfig):
    """Runs inference on the feature extractor from an OmegaConf configuration. 

    Parameters
    ----------
    cfg : DictConfig
        Configuration, e.g. that returned by deepethogram.configuration.make_feature_extractor_inference_cfg

    Raises
    ------
    ValueError
        cfg.inference.directory_list must contain a list of input directories, or 'all'
    ValueError
        Checks directory list types
    """
    cfg = projects.setup_run(cfg)
    # turn "models" in your project configuration to "full/path/to/models"
    log.info('args: {}'.format(' '.join(sys.argv)))

    log.info('configuration used in inference: ')
    log.info(OmegaConf.to_yaml(cfg))
    if 'sequence' not in cfg.keys() or 'latent_name' not in cfg.sequence.keys(
    ) or cfg.sequence.latent_name is None:
        latent_name = cfg.feature_extractor.arch
    else:
        latent_name = cfg.sequence.latent_name
    log.info('Latent name used in HDF5 file: {}'.format(latent_name))
    directory_list = cfg.inference.directory_list

    if directory_list is None or len(directory_list) == 0:
        raise ValueError('must pass list of directories from commmand line. '
                         'Ex: directory_list=[path_to_dir1,path_to_dir2]')
    elif type(directory_list) == str and directory_list == 'all':
        basedir = cfg.project.data_path
        directory_list = utils.get_subfiles(basedir, 'directory')
    elif isinstance(directory_list, str):
        directory_list = [directory_list]
    elif isinstance(directory_list, list):
        pass
    elif isinstance(directory_list, ListConfig):
        directory_list = OmegaConf.to_container(directory_list)
    else:
        raise ValueError(
            'unknown value for directory list: {}'.format(directory_list))

    # video files are found in your input list of directories using the records.yaml file that should be present
    # in each directory
    records = []
    for directory in directory_list:
        assert os.path.isdir(directory), 'Not a directory: {}'.format(
            directory)
        record = projects.get_record_from_subdir(directory)
        assert record['rgb'] is not None
        records.append(record)
    assert cfg.feature_extractor.n_flows + 1 == cfg.flow_generator.n_rgb, 'Flow generator inputs must be one greater ' \
                                                                          'than feature extractor num flows '

    input_images = cfg.feature_extractor.n_flows + 1
    mode = '3d' if '3d' in cfg.feature_extractor.arch.lower() else '2d'
    # get the validation transforms. should have resizing, etc
    cpu_transform = get_cpu_transforms(cfg.augs)['val']
    gpu_transform = get_gpu_transforms(cfg.augs, mode)['val']
    log.info('gpu_transform: {}'.format(gpu_transform))

    rgb = []
    for record in records:
        rgb.append(record['rgb'])

    feature_extractor_weights = projects.get_weightfile_from_cfg(
        cfg, 'feature_extractor')
    assert os.path.isfile(feature_extractor_weights)
    run_files = utils.get_run_files_from_weights(feature_extractor_weights)
    if cfg.inference.use_loaded_model_cfg:
        loaded_config_file = run_files['config_file']
        loaded_cfg = OmegaConf.load(loaded_config_file)
        loaded_model_cfg = loaded_cfg.feature_extractor
        current_model_cfg = cfg.feature_extractor
        model_cfg = OmegaConf.merge(current_model_cfg, loaded_model_cfg)
        cfg.feature_extractor = model_cfg
        # we don't want to use the weights that the trained model was initialized with, but the weights after training
        # therefore, overwrite the loaded configuration with the current weights
        cfg.feature_extractor.weights = feature_extractor_weights
        # num_classes = len(loaded_cfg.project.class_names)

    # log.warning('Overwriting current project classes with loaded classes! REVERT')
    model_components = build_feature_extractor(cfg)
    _, _, _, _, model = model_components
    device = 'cuda:{}'.format(cfg.compute.gpu_id)

    metrics_file = run_files['metrics_file']
    assert os.path.isfile(metrics_file)
    best_epoch = utils.get_best_epoch_from_weightfile(
        feature_extractor_weights)
    # best_epoch = -1
    log.info('best epoch from loaded file: {}'.format(best_epoch))
    with h5py.File(metrics_file, 'r') as f:
        try:
            thresholds = f['val']['metrics_by_threshold']['optimum'][
                best_epoch, :]
        except KeyError:
            # backwards compatibility
            thresholds = f['threshold_curves']['val']['optimum'][best_epoch, :]
    log.info('thresholds: {}'.format(thresholds))

    class_names = list(cfg.project.class_names)
    if len(thresholds) != len(class_names):
        error_message = '''Number of classes in trained model: {}
            Number of classes in project: {}
            Did you add or remove behaviors after training this model? If so, please retrain!
        '''.format(len(thresholds), len(class_names))
        raise ValueError(error_message)
    # class_names = projects.get_classes_from_project(cfg)
    class_names = np.array(class_names)
    postprocessor = get_postprocessor_from_cfg(cfg, thresholds)
    extract(rgb,
            model,
            final_activation=cfg.feature_extractor.final_activation,
            thresholds=thresholds,
            postprocessor=postprocessor,
            mean_by_channels=cfg.augs.normalization.mean,
            fusion=cfg.feature_extractor.fusion,
            num_rgb=input_images,
            latent_name=latent_name,
            device=device,
            cpu_transform=cpu_transform,
            gpu_transform=gpu_transform,
            ignore_error=cfg.inference.ignore_error,
            overwrite=cfg.inference.overwrite,
            class_names=class_names,
            num_workers=cfg.compute.num_workers,
            batch_size=cfg.compute.batch_size)
示例#16
0
def main(cfg: DictConfig):
    # turn "models" in your project configuration to "full/path/to/models"
    cfg = utils.get_absolute_paths_from_cfg(cfg)
    log.info('configuration used in inference: ')
    log.info(cfg.pretty())
    if cfg.sequence.latent_name is None:
        latent_name = cfg.feature_extractor.arch
    else:
        latent_name = cfg.sequence.latent_name
    directory_list = cfg.inference.directory_list
    if directory_list is None or len(directory_list) == 0:
        raise ValueError('must pass list of directories from commmand line. '
                         'Ex: directory_list=[path_to_dir1,path_to_dir2]')
    elif type(directory_list) == str and directory_list == 'all':
        basedir = cfg.project.data_path
        directory_list = utils.get_subfiles(basedir, 'directory')

    # video files are found in your input list of directories using the records.yaml file that should be present
    # in each directory
    records = []
    for directory in directory_list:
        assert os.path.isdir(directory), 'Not a directory: {}'.format(
            directory)
        record = projects.get_record_from_subdir(directory)
        assert record['rgb'] is not None
        records.append(record)
    assert cfg.feature_extractor.n_flows + 1 == cfg.flow_generator.n_rgb, 'Flow generator inputs must be one greater ' \
                                                                          'than feature extractor num flows '

    input_images = cfg.feature_extractor.n_flows + 1
    mode = '3d' if '3d' in cfg.feature_extractor.arch.lower() else '2d'
    # get the validation transforms. should have resizing, etc
    transform = get_transforms(cfg.augs, input_images, mode)['val']

    rgb = []
    for record in records:
        rgb.append(record['rgb'])

    model = build_feature_extractor(cfg)
    device = 'cuda:{}'.format(cfg.compute.gpu_id)
    feature_extractor_weights = projects.get_weightfile_from_cfg(
        cfg, 'feature_extractor')
    metrics_file = os.path.join(os.path.dirname(feature_extractor_weights),
                                'classification_metrics.h5')
    assert os.path.isfile(metrics_file)
    with h5py.File(metrics_file, 'r') as f:
        thresholds = f['threshold_curves']['val']['optimum'][:]
        log.info('thresholds: {}'.format(thresholds))
    class_names = list(cfg.project.class_names)
    # class_names = projects.get_classes_from_project(cfg)
    class_names = np.array(class_names)
    extract(rgb,
            model,
            final_activation=cfg.feature_extractor.final_activation,
            thresholds=thresholds,
            fusion=cfg.feature_extractor.fusion,
            num_rgb=input_images,
            latent_name=latent_name,
            device=device,
            transform=transform,
            ignore_error=cfg.inference.ignore_error,
            overwrite=cfg.inference.overwrite,
            class_names=class_names,
            conv_2d=mode == '2d')

    # update each record file in the subdirectory to add our new output files
    projects.write_all_records(cfg.project.data_path)
示例#17
0
def flow_generator_inference(cfg):
    # make configuration
    cfg = projects.setup_run(cfg)
    # turn "models" in your project configuration to "full/path/to/models"
    log.info('args: {}'.format(' '.join(sys.argv)))
    log.info('configuration used in inference: ')
    log.info(OmegaConf.to_yaml(cfg))
    if 'sequence' not in cfg.keys() or 'latent_name' not in cfg.sequence.keys(
    ) or cfg.sequence.latent_name is None:
        latent_name = cfg.feature_extractor.arch
    else:
        latent_name = cfg.sequence.latent_name
    log.info('Latent name used in HDF5 file: {}'.format(latent_name))
    directory_list = cfg.inference.directory_list

    # figure out which videos to run inference on
    if directory_list is None or len(directory_list) == 0:
        raise ValueError('must pass list of directories from commmand line. '
                         'Ex: directory_list=[path_to_dir1,path_to_dir2]')
    elif type(directory_list) == str and directory_list == 'all':
        basedir = cfg.project.data_path
        directory_list = utils.get_subfiles(basedir, 'directory')
    elif isinstance(directory_list, str):
        directory_list = [directory_list]
    elif isinstance(directory_list, list):
        pass
    elif isinstance(directory_list, ListConfig):
        directory_list = OmegaConf.to_container(directory_list)
    else:
        raise ValueError(
            'unknown value for directory list: {}'.format(directory_list))

    # video files are found in your input list of directories using the records.yaml file that should be present
    # in each directory
    records = []
    for directory in directory_list:
        assert os.path.isdir(directory), 'Not a directory: {}'.format(
            directory)
        record = projects.get_record_from_subdir(directory)
        assert record['rgb'] is not None
        records.append(record)
    rgb = []
    for record in records:
        rgb.append(record['rgb'])

    assert cfg.feature_extractor.n_flows + 1 == cfg.flow_generator.n_rgb, 'Flow generator inputs must be one greater ' \
                                                                          'than feature extractor num flows '
    # set up gpu augmentation
    input_images = cfg.feature_extractor.n_flows + 1
    mode = '3d' if '3d' in cfg.feature_extractor.arch.lower() else '2d'
    # get the validation transforms. should have resizing, etc
    cpu_transform = get_cpu_transforms(cfg.augs)['val']
    gpu_transform = get_gpu_transforms(cfg.augs, mode)
    log.info('gpu_transform: {}'.format(gpu_transform))

    flow_generator_weights = projects.get_weightfile_from_cfg(
        cfg, 'flow_generator')
    assert os.path.isfile(flow_generator_weights)
    run_files = get_run_files_from_weights(flow_generator_weights,
                                           'opticalflow')
    if cfg.inference.use_loaded_model_cfg:
        loaded_config_file = run_files['config_file']
        loaded_cfg = OmegaConf.load(loaded_config_file)
        loaded_model_cfg = loaded_cfg.flow_generator
        current_model_cfg = cfg.flow_generator
        model_cfg = OmegaConf.merge(current_model_cfg, loaded_model_cfg)
        cfg.flow_generator = model_cfg
        # we don't want to use the weights that the trained model was initialized with, but the weights after training
        # therefore, overwrite the loaded configuration with the current weights
        cfg.flow_generator.weights = flow_generator_weights
        # num_classes = len(loaded_cfg.project.class_names)
    log.info('model loaded')
    # log.warning('Overwriting current project classes with loaded classes! REVERT')
    model = build_flow_generator(cfg)
    model = utils.load_weights(model, flow_generator_weights, device='cpu')
    # _, _, _, _, model = model_components
    device = 'cuda:{}'.format(cfg.compute.gpu_id)
    model = model.to(device)

    movie_format = 'ffmpeg'
    maxval = 5
    polar = True
    save_rgb_side_by_side = True
    for movie in tqdm(rgb):
        out_video = os.path.splitext(movie)[0] + '_flows'
        if movie_format == 'directory':
            pass
        elif movie_format == 'hdf5':
            out_video += '.h5'
        elif movie_format == 'ffmpeg':
            out_video += '.mp4'
        else:
            out_video += '.avi'
        if os.path.isdir(out_video):
            shutil.rmtree(out_video)
        elif os.path.isfile(out_video):
            os.remove(out_video)

        extract_movie(movie,
                      out_video,
                      model,
                      device,
                      cpu_transform,
                      gpu_transform,
                      mean_by_channels=cfg.augs.normalization.mean,
                      num_workers=1,
                      num_rgb=input_images,
                      maxval=maxval,
                      polar=polar,
                      movie_format=movie_format,
                      save_rgb_side_by_side=save_rgb_side_by_side)
示例#18
0
def sequence_inference(cfg: DictConfig):
    cfg = projects.setup_run(cfg)
    log.info('args: {}'.format(' '.join(sys.argv)))
    # turn "models" in your project configuration to "full/path/to/models"
    log.info('configuration used: ')
    log.info(OmegaConf.to_yaml(cfg))

    weights = projects.get_weightfile_from_cfg(cfg, model_type='sequence')
    assert weights is not None, 'Must either specify a weightfile or use reload.latest=True'

    run_files = utils.get_run_files_from_weights(weights)
    if cfg.sequence.latent_name is None:
        # find the latent name used in the weight file you loaded
        rundir = os.path.dirname(weights)
        loaded_cfg = utils.load_yaml(run_files['config_file'])
        latent_name = loaded_cfg['sequence']['latent_name']
        # if this latent name is also None, use the arch of the feature extractor
        # this should never happen
        if latent_name is None:
            latent_name = loaded_cfg['feature_extractor']['arch']
    else:
        latent_name = cfg.sequence.latent_name

    if cfg.inference.use_loaded_model_cfg:
        output_name = cfg.sequence.output_name
        loaded_config_file = run_files['config_file']
        loaded_model_cfg = OmegaConf.load(loaded_config_file).sequence
        current_model_cfg = cfg.sequence
        model_cfg = OmegaConf.merge(current_model_cfg, loaded_model_cfg)
        cfg.sequence = model_cfg
        # we don't want to use the weights that the trained model was initialized with, but the weights after training
        # therefore, overwrite the loaded configuration with the current weights
        cfg.sequence.weights = weights
        cfg.sequence.latent_name = latent_name
        cfg.sequence.output_name = output_name
    log.info('latent name used for running sequence inference: {}'.format(latent_name))

    # the output name will be a group in the output hdf5 dataset containing probabilities, etc
    if cfg.sequence.output_name is None:
        output_name = cfg.sequence.arch
    else:
        output_name = cfg.sequence.output_name
    directory_list = cfg.inference.directory_list
    if directory_list is None or len(directory_list) == 0:
        raise ValueError('must pass list of directories from commmand line. '
                         'Ex: directory_list=[path_to_dir1,path_to_dir2] or directory_list=all')
    elif type(directory_list) == str and directory_list == 'all':
        basedir = cfg.project.data_path
        directory_list = utils.get_subfiles(basedir, 'directory')

    outputfiles = []
    for directory in directory_list:
        assert os.path.isdir(directory), 'Not a directory: {}'.format(directory)
        record = projects.get_record_from_subdir(directory)
        assert record['output'] is not None
        outputfiles.append(record['output'])

    model = build_model_from_cfg(cfg, 1024, len(cfg.project.class_names))
    log.info('model: {}'.format(model))

    model = utils.load_weights(model, weights)

    metrics_file = run_files['metrics_file']
    assert os.path.isfile(metrics_file)
    best_epoch = utils.get_best_epoch_from_weightfile(weights)
    # best_epoch = -1
    log.info('best epoch from loaded file: {}'.format(best_epoch))
    with h5py.File(metrics_file, 'r') as f:
        try:
            thresholds = f['val']['metrics_by_threshold']['optimum'][best_epoch, :]
        except KeyError:
            # backwards compatibility
            thresholds = f['threshold_curves']['val']['optimum'][:]  # [best_epoch, :]
            if thresholds.ndim > 1:
                thresholds = thresholds[best_epoch, :]
    log.info('thresholds: {}'.format(thresholds))

    class_names = list(cfg.project.class_names)
    if len(thresholds) != len(class_names):
        error_message = '''Number of classes in trained model: {}
            Number of classes in project: {}
            Did you add or remove behaviors after training this model? If so, please retrain!
        '''.format(len(thresholds), len(class_names))
        raise ValueError(error_message)

    device = 'cuda:{}'.format(cfg.compute.gpu_id)
    class_names = cfg.project.class_names
    class_names = np.array(class_names)
    extract(model,
            outputfiles,
            thresholds,
            cfg.feature_extractor.final_activation,
            latent_name,
            output_name,
            cfg.sequence.sequence_length,
            True,
            device,
            cfg.inference.ignore_error,
            cfg.inference.overwrite,
            class_names=class_names)