Exemplo n.º 1
0
        def test_dbscan_spmd(self):
            epsilon = 0.04
            minObservations = 45
            data = np_read_csv(
                os.path.join(".", 'data', 'batch', 'dbscan_dense.csv'))

            batch_algo = d4p.dbscan(minObservations=minObservations,
                                    epsilon=epsilon,
                                    resultsToCompute='computeCoreIndices')
            batch_result = batch_algo.compute(data)

            rpp = int(data.shape[0] / d4p.num_procs())
            node_stride = rpp * d4p.my_procid()
            node_range = range(node_stride, node_stride + rpp)
            node_data = data[node_range, :]

            spmd_algo = d4p.dbscan(minObservations=minObservations,
                                   epsilon=epsilon,
                                   distributed=True)
            spmd_result = spmd_algo.compute(node_data)

            # clusters can get different indexes in batch and spmd algos,
            # to compare them we should take care about it
            cluster_index_dict = {}
            for i in node_range:
                # border points assignments can be different
                # with different amount of nodes but cores are the same
                if i in batch_result.coreIndices:
                    right = spmd_result.assignments[i - node_stride][0]
                    if not batch_result.assignments[i][0] in cluster_index_dict:
                        cluster_index_dict[batch_result.assignments[i]
                                           [0]] = right
                    left = cluster_index_dict[batch_result.assignments[i][0]]
                    self.assertTrue(left == right)
Exemplo n.º 2
0
 def verify_on_dbscan(self, X):
     alg1 = d4p.dbscan(epsilon=2.0, minObservations=5, fptype='double')
     res1 = alg1.compute(X)
     Xc = np.ascontiguousarray(X)
     alg2 = d4p.dbscan(epsilon=2.0, minObservations=5, fptype='double')
     res2 = alg2.compute(Xc)
     self.assertTrue(np.array_equal(res1.assignments, res2.assignments))
     self.assertTrue(len(np.unique(res1.assignments)) > 2)
Exemplo n.º 3
0
def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None):
    if eps <= 0.0:
        raise ValueError("eps must be positive.")

    X = check_array(X, dtype=[np.float64, np.float32])
    if sample_weight is not None:
        sample_weight = _check_sample_weight(sample_weight, X)
        ww = make2d(sample_weight)
    else:
        ww = None

    XX = make2d(X)

    fpt = getFPType(XX)
    alg = daal4py.dbscan(method='defaultDense',
                         fptype=fpt,
                         epsilon=float(eps),
                         minObservations=int(min_samples),
                         memorySavingMode=False,
                         resultsToCompute="computeCoreIndices")

    daal_res = alg.compute(XX, ww)
    n_clusters = daal_res.nClusters[0, 0]
    assignments = daal_res.assignments.ravel()
    if daal_res.coreIndices is not None:
        core_ind = daal_res.coreIndices.ravel()
    else:
        core_ind = np.array([], dtype=np.intc)

    return (core_ind, assignments)
Exemplo n.º 4
0
def main(readcsv=read_csv, method='defaultDense'):
    infile = "./data/batch/dbscan_dense.csv"
    epsilon = 0.02
    minObservations = 180

    # Load the data
    data = readcsv(infile, range(2))

    # configure dbscan main object: we also request the indices and observations of cluster cores
    algo = d4p.dbscan(
        minObservations=minObservations,
        epsilon=epsilon,
        resultsToCompute='computeCoreIndices|computeCoreObservations')
    # and compute
    result = algo.compute(data)

    # Note: we could have done this in just one line:
    # assignments = d4p.dbscan(minObservations=minObservations, epsilon=epsilon, resultsToCompute='computeCoreIndices|computeCoreObservations').compute(data).assignments

    # DBSCAN result objects provide assignments, nClusters and coreIndices/coreObservations (if requested)
    assert result.assignments.shape == (data.shape[0], 1)
    assert result.coreObservations.shape == (result.coreIndices.shape[0],
                                             data.shape[1])

    return result
Exemplo n.º 5
0
def compute(data, minObservations, epsilon):
    # configure dbscan main object: we also request the indices and observations of cluster cores
    algo = d4p.dbscan(minObservations=minObservations,
                      epsilon=epsilon,
                      resultsToCompute='computeCoreIndices|computeCoreObservations',
                      memorySavingMode=True)
    # and compute
    return algo.compute(data)
Exemplo n.º 6
0
def main(method='defaultDense'):
    infile = "./data/batch/dbscan_dense.csv"
    epsilon = 0.04
    minObservations = 45

    # Load the data
    data = np.loadtxt(infile, delimiter=',')
    rpp = int(data.shape[0] / d4p.num_procs())
    data = data[rpp * d4p.my_procid(): rpp * d4p.my_procid() + rpp, :]

    # configure dbscan main object
    algo = d4p.dbscan(minObservations=minObservations, epsilon=epsilon, distributed=True)
    # and compute
    result = algo.compute(data)

    return result
Exemplo n.º 7
0
def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None):
    ww = make2d(sample_weight) if sample_weight is not None else None
    XX = make2d(X)

    fpt = getFPType(XX)
    alg = daal4py.dbscan(method='defaultDense',
                         fptype=fpt,
                         epsilon=float(eps),
                         minObservations=int(min_samples),
                         memorySavingMode=False,
                         resultsToCompute="computeCoreIndices")

    daal_res = alg.compute(XX, ww)
    assignments = daal_res.assignments.ravel()
    if daal_res.coreIndices is not None:
        core_ind = daal_res.coreIndices.ravel()
    else:
        core_ind = np.array([], dtype=np.intc)

    return (core_ind, assignments)
Exemplo n.º 8
0
def test_dbscan(X):
    algorithm = dbscan(fptype=getFPType(X),
                       epsilon=params.eps,
                       minObservations=params.min_samples,
                       resultsToCompute='computeCoreIndices')
    return algorithm.compute(X)