Пример #1
0
def compute_gmm_given_dataset(src_cfg, nr_clusters, **kwargs):
    """ Uses the conventions from the dataset for loading a subset of
    descriptors, loading the PCA object and choosing an output file.

    """
    # Set default parameters.
    ip_type = kwargs.get('ip_type', 'dense5.track15mbh')
    nr_iterations = kwargs.get('nr_iterations', 100)
    nr_threads = kwargs.get('nr_threads', multiprocessing.cpu_count())
    seed = kwargs.get('seed', 1)
    nr_redos = kwargs.get('nr_redos', 4)
    suffix = kwargs.get('suffix', '')
    nr_pca_components = kwargs.get('nr_pca_components', 64)

    dataset = Dataset(src_cfg, ip_type=ip_type, suffix=suffix)
    filename_pca = os.path.join(dataset.FEAT_DIR, 'pca', 'pca_%d.pkl' % nr_pca_components)

    data = load_subsample_descriptors(dataset)
    pca = load_pca(filename_pca)
    transformed_data = pca.transform(data)

    # Do the computation.
    gmm = compute_gmm(
        transformed_data, nr_clusters, nr_iterations,
        nr_threads, seed, nr_redos)

    outfilename = os.path.join(
        dataset.FEAT_DIR, 'gmm', 'gmm_%d' % nr_clusters)
    save_gmm(gmm, outfilename)
Пример #2
0
def compute_gmm_given_dataset(src_cfg, nr_clusters, **kwargs):
    """ Uses the conventions from the dataset for loading a subset of
    descriptors, loading the PCA object and choosing an output file.

    """
    # Set default parameters.
    ip_type = kwargs.get('ip_type', 'dense5.track15mbh')
    nr_iterations = kwargs.get('nr_iterations', 100)
    nr_threads = kwargs.get('nr_threads', multiprocessing.cpu_count())
    seed = kwargs.get('seed', 1)
    nr_redos = kwargs.get('nr_redos', 4)
    suffix = kwargs.get('suffix', '')
    nr_pca_components = kwargs.get('nr_pca_components', 64)

    dataset = Dataset(src_cfg, ip_type=ip_type, suffix=suffix)
    filename_pca = os.path.join(dataset.FEAT_DIR, 'pca',
                                'pca_%d.pkl' % nr_pca_components)

    data = load_subsample_descriptors(dataset)
    pca = load_pca(filename_pca)
    transformed_data = pca.transform(data)

    # Do the computation.
    gmm = compute_gmm(transformed_data, nr_clusters, nr_iterations, nr_threads,
                      seed, nr_redos)

    outfilename = os.path.join(dataset.FEAT_DIR, 'gmm', 'gmm_%d' % nr_clusters)
    save_gmm(gmm, outfilename)
Пример #3
0
def compute_pca_given_dataset(src_cfg, **kwargs):
    """ Uses the conventions from the dataset for loading a subset of
    descriptors and choosing an output file.

    """
    # Set default parameters.
    ip_type = kwargs.get('ip_type', 'dense5.track15mbh')
    n_components = kwargs.get('n_components', 64)
    suffix = kwargs.get('suffix', '')

    dataset = Dataset(src_cfg, ip_type=ip_type, suffix=suffix)
    data = load_subsample_descriptors(dataset)

    # Do the computation.
    pca = compute_pca(data, n_components)

    outfilename = os.path.join(
        dataset.FEAT_DIR, 'pca', 'pca_%d.pkl' % n_components)
    save_pca(pca, outfilename)