Пример #1
0
def set_reach_dist(SetOfObjects, point_index, epsilon):
    """
    Sets reachability distance and ordering. This function is the primary workhorse of
    the OPTICS algorithm.
    
    SetofObjects: Instantiated and prepped instance of 'setOfObjects' class
    epsilon: Determines maximum object size that can be extracted. Smaller epsilons
        reduce run time. (float)

    """

    row = [SetOfObjects.data[point_index, :]]
    indices = np.argsort(row)
    distances = np.sort(row)

    if scipy.iterable(distances):

        unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T]
        rdistances = scipy.maximum(
            distances[(SetOfObjects._processed[indices] < 1)[0].T],
            SetOfObjects._core_dist[point_index])
        SetOfObjects._reachability[unprocessed] = scipy.minimum(
            SetOfObjects._reachability[unprocessed], rdistances)

        if unprocessed.size > 0:
            return unprocessed[np.argsort(
                np.array(SetOfObjects._reachability[unprocessed]))[0]]
        else:
            return point_index
    else:
        return point_index
Пример #2
0
def set_reach_dist(SetOfObjects,point_index,epsilon):

    ###  Assumes that the query returns ordered (smallest distance first) entries     ###
    ###  This is the case for the balltree query...                                   ###
    ###  ...switching to a query structure that does not do this will break things!   ###
    ###  And break in a non-obvious way: For cases where multiple entries are tied in ###
    ###  reachablitly distance, it will cause the next point to be processed in       ###
    ###  random order, instead of the closest point. This may manefest in edge cases  ###
    ###  where different runs of OPTICS will give different ordered lists and hence   ### 
    ###  different clustering structure...removing reproducability.                   ###
    
    distances, indices = SetOfObjects.query(SetOfObjects.data[point_index],
                                            SetOfObjects._nneighbors[point_index])
    
    ## Checks to see if there more than one member in the neighborhood ##
    if scipy.iterable(distances):

        ## Masking processed values ##
        unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T]
        rdistances = scipy.maximum(distances[(SetOfObjects._processed[indices] < 1)[0].T],SetOfObjects._core_dist[point_index])
        SetOfObjects._reachability[unprocessed] = scipy.minimum(SetOfObjects._reachability[unprocessed], rdistances)

        ### Checks to see if everything is already processed; if so, return control to main loop ##
        if unprocessed.size > 0:            
            ### Define return order based on reachability distance ###
            return sorted(zip(SetOfObjects._reachability[unprocessed],unprocessed), key=lambda reachability: reachability[0])[0][1]
        else:
            return point_index
    else: ## Not sure if this else statement is actaully needed... ##
        return point_index
Пример #3
0
def _set_reach_dist(setofobjects, point_index, epsilon):

    # Assumes that the query returns ordered (smallest distance first)
    # entries. This is the case for the balltree query...

    dists, indices = setofobjects.query(setofobjects.data[point_index],
                                        setofobjects._nneighbors[point_index])

    # Checks to see if there more than one member in the neighborhood ##
    if sp.iterable(dists):

        # Masking processed values ##
        # n_pr is 'not processed'
        n_pr = indices[(setofobjects._processed[indices] < 1)[0].T]
        rdists = sp.maximum(dists[(setofobjects._processed[indices] < 1)[0].T],
                            setofobjects.core_dists_[point_index])

        new_reach = sp.minimum(setofobjects.reachability_[n_pr], rdists)
        setofobjects.reachability_[n_pr] = new_reach

        # Checks to see if everything is already processed;
        # if so, return control to main loop ##
        if n_pr.size > 0:
            # Define return order based on reachability distance ###
            return n_pr[sp.argmin(setofobjects.reachability_[n_pr])]
        else:
            return point_index
Пример #4
0
def _set_reach_dist(SetOfObjects, point_index, epsilon):

    # Assumes that the query returns ordered (smallest distance first)
    # entries. This is the case for the balltree query...

    distances, indices = SetOfObjects.query(
        SetOfObjects.data[point_index], SetOfObjects._nneighbors[point_index])

    # Checks to see if there more than one member in the neighborhood ##
    if scipy.iterable(distances):

        # Masking processed values ##
        unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T]
        rdistances = scipy.maximum(
            distances[(SetOfObjects._processed[indices] < 1)[0].T],
            SetOfObjects._core_dist[point_index])
        SetOfObjects._reachability[unprocessed] = scipy.minimum(
            SetOfObjects._reachability[unprocessed], rdistances)

        # Checks to see if everything is already processed;
        # if so, return control to main loop ##
        if unprocessed.size > 0:
            # Define return order based on reachability distance ###
            return sorted(zip(SetOfObjects._reachability[unprocessed],
                              unprocessed),
                          key=lambda reachability: reachability[0])[0][1]
        else:
            return point_index
    else:  # Not sure if this else statement is actually needed... ##
        return point_index
Пример #5
0
def set_reach_dist(SetOfObjects, point_index, epsilon):

    """
    Sets reachability distance and ordering. This function is the primary workhorse of
    the OPTICS algorithm.
    
    SetofObjects: Instantiated and prepped instance of 'setOfObjects' class
    epsilon: Determines maximum object size that can be extracted. Smaller epsilons
        reduce run time. (float)

    """
    
    row = [SetOfObjects.data[point_index,:]]
    indices = np.argsort(row)
    distances = np.sort(row)

    if scipy.iterable(distances):

        unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T]
        rdistances = scipy.maximum(distances[(SetOfObjects._processed[indices] < 1)[0].T],
            SetOfObjects._core_dist[point_index])
        SetOfObjects._reachability[unprocessed] = scipy.minimum(
            SetOfObjects._reachability[unprocessed], rdistances)

        if unprocessed.size > 0:
            return unprocessed[np.argsort(np.array(SetOfObjects._reachability[
                unprocessed]))[0]]
        else:
            return point_index
    else:
        return point_index
Пример #6
0
def set_reach_dist(SetOfObjects, point_index, epsilon):

    # Assumes that the query returns ordered (smallest distance first)
    # entries. This is the case for the balltree query...

#    distances, indices = SetOfObjects.query(SetOfObjects.data[point_index],
#                                            SetOfObjects._nneighbors[point_index])

    row = [SetOfObjects.data[point_index,:]]
    indices = np.argsort(row)
    distances = np.sort(row)

    # Checks to see if there more than one member in the neighborhood ##
    if scipy.iterable(distances):

        # Masking processed values ##
        unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T]
        rdistances = scipy.maximum(
            distances[(SetOfObjects._processed[indices] < 1)[0].T],
            SetOfObjects._core_dist[point_index])
        SetOfObjects._reachability[
            unprocessed] = scipy.minimum(
                SetOfObjects._reachability[
                    unprocessed],
                rdistances)

        # Checks to see if everything is already processed;
        # if so, return control to main loop ##
        if unprocessed.size > 0:
            # Define return order based on reachability distance ###
            return sorted(zip(SetOfObjects._reachability[unprocessed], unprocessed), key=lambda reachability: reachability[0])[0][1]
        else:
            return point_index
    else: # Not sure if this else statement is actaully needed... ##
        return point_index
 def set_reach_dist(self, point_index, epsilon, dtype):
     indices, distances = self.get_neighbors_dist(point_index, epsilon, dtype)
     if scipy.iterable(distances):
         unprocessed = ((self._processed[indices] < 1).T)[0].tolist()
         unprocessed = filter_list(indices, unprocessed)
         rdistances = scipy.maximum(filter_list(distances, unprocessed),self._core_dist[point_index])
         self._reachability[unprocessed] = scipy.minimum(self._reachability[unprocessed], rdistances)
         if len(unprocessed) > 0:
             return sorted(zip(self._reachability[unprocessed],unprocessed), key=lambda reachability: reachability[0])[0][1]
         else:
             return point_index
     else:
         return point_index
Пример #8
0
def set_reach_dist(SetOfObjects,point_index,epsilon):
    distances, indices = SetOfObjects.query(SetOfObjects.data[point_index],
                                            SetOfObjects._nneighbors[point_index],
                                            distance_upper_bound=epsilon)
    if scipy.iterable(distances):
        if scipy.isfinite(distances[-1]):
            c_dist = distances[-1]
            unprocessed = SetOfObjects._index[(scipy.where(test_set._processed < 1)[0])]
            SetOfObjects._reachability[unprocessed] = scipy.minimum(SetOfObjects._reachability[unprocessed],c_dist)
            return unprocessed[0]
        else: 
            return point_index
    else:
        return point_index
 def set_reach_dist(self, point_index, epsilon, dtype):
     indices, distances = self.get_neighbors_dist(point_index, epsilon,
                                                  dtype)
     if scipy.iterable(distances):
         unprocessed = ((self._processed[indices] < 1).T)[0].tolist()
         unprocessed = filter_list(indices, unprocessed)
         rdistances = scipy.maximum(filter_list(distances, unprocessed),
                                    self._core_dist[point_index])
         self._reachability[unprocessed] = scipy.minimum(
             self._reachability[unprocessed], rdistances)
         if len(unprocessed) > 0:
             return sorted(zip(self._reachability[unprocessed],
                               unprocessed),
                           key=lambda reachability: reachability[0])[0][1]
         else:
             return point_index
     else:
         return point_index
Пример #10
0
def _set_reach_dist(setofobjects, point_index, epsilon):
    X = np.array(setofobjects.data[point_index]).reshape(1, -1)
    indices = setofobjects.query_radius(X,
                                        r=epsilon,
                                        return_distance=False,
                                        count_only=False,
                                        sort_results=False)[0]

    # Checks to see if there more than one member in the neighborhood
    if sp.iterable(indices):
        # Masking processed values; n_pr is 'not processed'
        n_pr = np.compress(
            (np.take(setofobjects._processed, indices, axis=0) < 1).ravel(),
            indices,
            axis=0)
        # n_pr = indices[(setofobjects._processed[indices] < 1).ravel()]
        if len(n_pr) > 0:
            dists = pairwise_distances(X,
                                       np.take(setofobjects.get_arrays()[0],
                                               n_pr,
                                               axis=0),
                                       setofobjects.metric,
                                       n_jobs=1).ravel()

            rdists = sp.maximum(dists, setofobjects.core_dists_[point_index])
            new_reach = sp.minimum(
                np.take(setofobjects.reachability_, n_pr, axis=0), rdists)
            setofobjects.reachability_[n_pr] = new_reach

        # Checks to see if everything is already processed;
        # if so, return control to main loop
        if n_pr.size > 0:
            # Define return order based on reachability distance
            return (n_pr[quick_scan(
                np.take(setofobjects.reachability_, n_pr, axis=0), dists)])
        else:
            return point_index