Exemplo n.º 1
0
 def get_model_ll(points, labels, clusters):
     """
     Wrapper for DPMMSubClusters cluster statistics
     :param points: data
     :param labels: labels
     :param clusters: vector of clusters distributions
     :return: vector with each cluster avg ll
     """
     return DPMMSubClusters.cluster_statistics(points, labels, clusters)[0]
Exemplo n.º 2
0
 def predict(model, data):
     '''
     Given a DPMM Model (which is located in fit(...)[2][-1] for backwards compatibility),
     predict the clusters for a data. The predict is using each cluster predictive posterior, 
     in contrary to the model itself during training, which sample from the posterior. 
     :params model: a DPMM (Julia object) model, returned from fit
     :data: The data in which to predict, DxN (similar to the fit argument)
     :return: labels
     '''
     return DPMMSubClusters.predict(model, data)
Exemplo n.º 3
0
 def fit(data,alpha, prior = None,
         iterations= 100, verbose = False,
         burnout = 15, gt = None, outlier_weight = 0, outlier_params = None):
     """
     Wrapper for DPMMSubClusters fit, refer to "https://bgu-cs-vil.github.io/DPMMSubClusters.jl/stable/usage/" for specification
     Note that directly working with the returned clusters can be problematic software displaying the workspace (such as PyCharm debugger).
     :return: labels, clusters, sublabels, renormalized weights
     """
     if prior == None:
         results = DPMMSubClusters.fit(data,alpha, iters = iterations,
                                       verbose = verbose, burnout = burnout,
                                       gt = gt, outlier_weight = outlier_weight,
                                       outlier_params = outlier_params)
     else:
         results = DPMMSubClusters.fit(data, prior.to_julia_prior(), alpha, iters=iterations,
                                       verbose=verbose, burnout=burnout,
                                       gt=gt, outlier_weight=outlier_weight,
                                       outlier_params=outlier_params)
         
     weights = results[2] / np.sum(results[2])
     return results[0],results[1],results[-1],weights
Exemplo n.º 4
0
 def generate_gaussian_data(sample_count,dim,components,var):
     '''
     Wrapper for DPMMSubClusters cluster statistics
     :param sample_count: how much of samples
     :param dim: samples dimension
     :param components: number of components
     :param var: variance between componenets means
     :return: (data, gt)
     '''
     data = DPMMSubClusters.generate_gaussian_data(sample_count, dim, components, var)
     gt =  data[1]
     data = data[0]
     return data,gt
Exemplo n.º 5
0
 def fit(data,
         alpha,
         prior=None,
         iterations=100,
         verbose=False,
         burnout=15,
         gt=None,
         outlier_weight=0,
         outlier_params=None):
     """
     Data should be DxN.
     Burnout is the interval between cluster creation and the time it may start splitting, lower burnout will leader to faster convergence, but less stable results. 15 works great.
     Wrapper for DPMMSubClusters fit, reffer to "https://bgu-cs-vil.github.io/DPMMSubClusters.jl/stable/usage/" for specification
     Note that directly working with the returned clusters can be problematic software displaying the workspace (such as PyCharm debugger).
     :return: labels, clusters, sublabels
     """
     if prior == None:
         results = DPMMSubClusters.fit(data,
                                       alpha,
                                       iters=iterations,
                                       verbose=verbose,
                                       burnout=burnout,
                                       gt=gt,
                                       outlier_weight=outlier_weight,
                                       outlier_params=outlier_params)
     else:
         results = DPMMSubClusters.fit(data,
                                       prior.to_julia_prior(),
                                       alpha,
                                       iters=iterations,
                                       verbose=verbose,
                                       burnout=burnout,
                                       gt=gt,
                                       outlier_weight=outlier_weight,
                                       outlier_params=outlier_params)
     return results[0], results[1], results[-1]
Exemplo n.º 6
0
 def fit(data,
         alpha,
         prior=None,
         iterations=100,
         verbose=False,
         burnout=15,
         gt=None,
         outlier_weight=0,
         outlier_params=None,
         gpu=False,
         force_kernel=2):
     """
     Wrapper for DPMMSubClusters fit, reffer to "https://bgu-cs-vil.github.io/DPMMSubClusters.jl/stable/usage/" for specification
     Note that directly working with the returned clusters can be problematic software displaying the workspace (such as PyCharm debugger).
     :return: labels, clusters, sublabels
     """
     if gpu == True:
         np.save("modelData.npy", np.swapaxes(data, 0, 1))
         modelParams = {
             'alpha': alpha,
             'iterations': iterations,
             'use_verbose': verbose,
             'burnout_period': burnout,
             'force_kernel': force_kernel,
             'outlier_mod': outlier_weight,
             'outlier_hyper_params': outlier_params,
             'hyper_params': prior.to_JSON()
         }
         if gt is not None:
             modelParams['gt'] = gt.tolist()
         with open('modelParams.json', 'w') as f:
             json.dump(modelParams, f)
         if platform.system().startswith('Windows'):
             FULL_PATH_TO_PACKAGE_IN_WINDOWS = os.environ.get(
                 'DPMM_GPU_FULL_PATH_TO_PACKAGE_IN_WINDOWS')
             process = subprocess.Popen([
                 FULL_PATH_TO_PACKAGE_IN_WINDOWS,
                 "--prior_type=" + prior.get_type(),
                 "--model_path=modelData.npy",
                 "--params_path=modelParams.json",
                 "--result_path=result.json"
             ])
         elif platform.system().startswith("Linux"):
             FULL_PATH_TO_PACKAGE_IN_LINUX = os.environ.get(
                 'DPMM_GPU_FULL_PATH_TO_PACKAGE_IN_LINUX')
             process = subprocess.Popen([
                 FULL_PATH_TO_PACKAGE_IN_LINUX,
                 "--prior_type=" + prior.get_type(),
                 "--model_path=modelData.npy",
                 "--params_path=modelParams.json",
                 "--result_path=result.json"
             ])
         else:
             print(f'Not support {platform.system()} OS')
         out, err = process.communicate()
         errcode = process.returncode
         process.kill()
         process.terminate()
         with open('result.json') as f:
             results_json = json.load(f)
         if "error" in results_json:
             print(f'Error:{results_json["error"]}')
             return [], []
         os.remove("result.json")
         return results_json["labels"], None, [
             results_json["weights"], results_json["iter_count"]
         ]
     else:
         if prior == None:
             results = DPMMSubClusters.fit(data,
                                           alpha,
                                           iters=iterations,
                                           verbose=verbose,
                                           burnout=burnout,
                                           gt=gt,
                                           outlier_weight=outlier_weight,
                                           outlier_params=outlier_params)
         else:
             results = DPMMSubClusters.fit(data,
                                           prior.to_julia_prior(),
                                           alpha,
                                           iters=iterations,
                                           verbose=verbose,
                                           burnout=burnout,
                                           gt=gt,
                                           outlier_weight=outlier_weight,
                                           outlier_params=outlier_params)
         return results[0], results[1], results[2:]
Exemplo n.º 7
0
 def to_julia_prior(self):
     return DPMMSubClusters.multinomial_hyper(self.alpha)
Exemplo n.º 8
0
 def to_julia_prior(self):
     return DPMMSubClusters.niw_hyperparams(self.kappa, self.mu, self.nu,
                                            self.psi)