Пример #1
0
def refine_akmeans(data, datax2_clusterx, centroids, max_iters=5,
                   flann_params={}, cache_dir=None, cfgstr='',
                   use_data_hash=True, akmeans_cfgstr=None):
    """ Refines the approximates centroids """
    print('[akmeans.precompute] refining:')
    if akmeans_cfgstr is None:
        akmeans_cfgstr = nn.get_flann_cfgstr(data, flann_params, cfgstr, use_data_hash)
    datax2_clusterx_old = datax2_clusterx
    (datax2_clusterx, centroids) = _akmeans_iterate(data, centroids, datax2_clusterx_old, max_iters, flann_params, 0, 10)
    ut.save_cache(cache_dir, CLUSTERS_FNAME, akmeans_cfgstr, centroids)
    ut.save_cache(cache_dir, DATAX2CL_FNAME, akmeans_cfgstr, datax2_clusterx)
    return (datax2_clusterx, centroids)
Пример #2
0
def precompute_akmeans(data, num_clusters, max_iters=5, flann_params={},
                       cache_dir=None, force_recomp=False, use_data_hash=True,
                       cfgstr='', refine=False, akmeans_cfgstr=None):
    """ precompute aproximate kmeans with builtin caching """
    print('[akmeans] pre_akmeans()')
    # filename prefix constants
    assert cache_dir is not None, 'choose a cache directory'
    # Build a cfgstr if the full one is not specified
    if akmeans_cfgstr is None:
        # compute a hashstr based on the data
        akmeans_cfgstr = nn.get_flann_cfgstr(data, flann_params, cfgstr, use_data_hash)
    try:
        # Try and load a previous clustering
        if force_recomp:
            raise UserWarning('forceing recommpute')
        centroids        = ut.load_cache(cache_dir, CLUSTERS_FNAME, akmeans_cfgstr)
        datax2_clusterx = ut.load_cache(cache_dir, DATAX2CL_FNAME, akmeans_cfgstr)
        print('[akmeans.precompute] load successful')
        if refine:
            # Refines the cluster centers if specified
            (datax2_clusterx, centroids) =\
                refine_akmeans(data, datax2_clusterx, centroids,
                               max_iters=max_iters, flann_params=flann_params,
                               cache_dir=cache_dir, akmeans_cfgstr=akmeans_cfgstr)
        return (datax2_clusterx, centroids)
    except IOError as ex:
        ut.printex(ex, 'cache miss', iswarning=True)
    except UserWarning:
        pass
    # First time computation
    print('[akmeans.precompute] pre_akmeans(): calling akmeans')
    (datax2_clusterx, centroids) = akmeans(data, num_clusters, max_iters, flann_params)
    print('[akmeans.precompute] save and return')
    ut.save_cache(cache_dir, CLUSTERS_FNAME, akmeans_cfgstr, centroids)
    ut.save_cache(cache_dir, DATAX2CL_FNAME, akmeans_cfgstr, datax2_clusterx)
    return (datax2_clusterx, centroids)