def refine_akmeans(data, datax2_clusterx, centroids, max_iters=5, flann_params={}, cache_dir=None, cfgstr='', use_data_hash=True, akmeans_cfgstr=None): """ Refines the approximates centroids """ print('[akmeans.precompute] refining:') if akmeans_cfgstr is None: akmeans_cfgstr = nn.get_flann_cfgstr(data, flann_params, cfgstr, use_data_hash) datax2_clusterx_old = datax2_clusterx (datax2_clusterx, centroids) = _akmeans_iterate(data, centroids, datax2_clusterx_old, max_iters, flann_params, 0, 10) ut.save_cache(cache_dir, CLUSTERS_FNAME, akmeans_cfgstr, centroids) ut.save_cache(cache_dir, DATAX2CL_FNAME, akmeans_cfgstr, datax2_clusterx) return (datax2_clusterx, centroids)
def precompute_akmeans(data, num_clusters, max_iters=5, flann_params={}, cache_dir=None, force_recomp=False, use_data_hash=True, cfgstr='', refine=False, akmeans_cfgstr=None): """ precompute aproximate kmeans with builtin caching """ print('[akmeans] pre_akmeans()') # filename prefix constants assert cache_dir is not None, 'choose a cache directory' # Build a cfgstr if the full one is not specified if akmeans_cfgstr is None: # compute a hashstr based on the data akmeans_cfgstr = nn.get_flann_cfgstr(data, flann_params, cfgstr, use_data_hash) try: # Try and load a previous clustering if force_recomp: raise UserWarning('forceing recommpute') centroids = ut.load_cache(cache_dir, CLUSTERS_FNAME, akmeans_cfgstr) datax2_clusterx = ut.load_cache(cache_dir, DATAX2CL_FNAME, akmeans_cfgstr) print('[akmeans.precompute] load successful') if refine: # Refines the cluster centers if specified (datax2_clusterx, centroids) =\ refine_akmeans(data, datax2_clusterx, centroids, max_iters=max_iters, flann_params=flann_params, cache_dir=cache_dir, akmeans_cfgstr=akmeans_cfgstr) return (datax2_clusterx, centroids) except IOError as ex: ut.printex(ex, 'cache miss', iswarning=True) except UserWarning: pass # First time computation print('[akmeans.precompute] pre_akmeans(): calling akmeans') (datax2_clusterx, centroids) = akmeans(data, num_clusters, max_iters, flann_params) print('[akmeans.precompute] save and return') ut.save_cache(cache_dir, CLUSTERS_FNAME, akmeans_cfgstr, centroids) ut.save_cache(cache_dir, DATAX2CL_FNAME, akmeans_cfgstr, datax2_clusterx) return (datax2_clusterx, centroids)