Пример #1
0
def compute_cluster_labels(feature_vectors, search_box_half_span,
                           min_cluster_size):
    """Use DBSCAN to compute clusters for a set of points.

    DBSCAN is a clustering algorithm that looks for regions of high
    density in a set of points.  Connected regions of high density are
    identified as clusters.  Small regions of low density or even
    ingle points get identified as noise (belonging to no cluster).

    There are three arguments to the process.  First, you supply the points
    to cluster.  Second, you ask for cluster labels with respect to
    two parameters: the search box size (defining "nearby" points) and
    the minimum number of points that you're willing to call a
    cluster.
    """

    native_feature_vectors = [
        convert_to_feature_vector(p) for p in feature_vectors
    ]
    native_box_half_span = convert_to_feature_vector(search_box_half_span)

    cluster_engine_name = 'dbscan_learn_cluster_ids_{}'.format(
        len(feature_vectors[0]))
    dbscan_learn_cluster_labels = getattr(_dbscan_clustering,
                                          cluster_engine_name)
    return dbscan_learn_cluster_labels(native_feature_vectors,
                                       native_box_half_span, min_cluster_size)
Пример #2
0
def test_feature_vector_repr(dimension):
    """Make sure that feature vector repr() works as expected

    We expect repr(my_feature_vector) to return a representation
    like 'tracktable.domain.feature_vectors.FeatureVector3(1, 2, 3)'.

    Arguments:
        dimension {int}: How many components to give the feature vector

    Returns:
        0 on success, 1 on error (also prints an error message)
    """

    components = [x + 0.5 for x in range(dimension)]
    my_feature_vector = convert_to_feature_vector(components)

    expected_representation = (
        'tracktable.domain.feature_vectors.FeatureVector{}({})').format(
            dimension,
            ', '.join([str(x) for x in components]))

    if expected_representation != repr(my_feature_vector):
        logger = logging.getLogger(__name__)
        logger.error(
            ('Expected repr(my_feature_vector) to be "'
             '{}" but got "{}" instead').format(
                 expected_representation,
                 repr(my_feature_vector)))
        return 1
    else:
        return 0
def distance_geometry_signature(trajectory,
                                num_control_points=4,
                                normalize_distance=True):
    # Sets the distance increment for control points based on the number of control points
    # Calculates the fractions of the trajectory where control points should be
    # Gives the values where the control points are located
    control_point_increment = 1.0 / (num_control_points - 1)
    control_point_fractions = [
        control_point_increment * i for i in range(num_control_points)
    ]
    control_points = [
        point_at_length_fraction(trajectory, t)
        for t in control_point_fractions
    ]

    # A signature is a collection of the calculated distances that will be converted to a feature vector
    signature = []
    # Calculate the list of distances
    for stepsize in range(num_control_points - 1, 0, -1):
        for start in range(0, num_control_points - stepsize):
            end = start + stepsize
            signature.append(
                distance(control_points[start], control_points[end]))
    # Normalize distances to compare trajectory shapes
    if normalize_distance:
        largest_distance = max(signature)
        signature = [
            0 if not largest_distance else d / largest_distance
            for d in signature
        ]
    # Convert distances to a feature vector
    return convert_to_feature_vector(signature)
Пример #4
0
    def __init__(self, points=None):
        self._tree = None
        self._original_points = None

        if points is not None:
            self._original_points = points
            self._feature_vectors = [ convert_to_feature_vector(p) for p in points ]
            self._setup_tree()
Пример #5
0
    def __init__(self, points=None):
        self._tree = None
        self._original_points = None

        if points is not None:
            self._original_points = points
            self._feature_vectors = [
                convert_to_feature_vector(p) for p in points
            ]
            self._setup_tree()
Пример #6
0
    def points(self, new_points):
        """Populate the r-tree with a new set of points

        You must supply points (points in space or feature vectors)
        with dimension between 1 and 30.  A new R-tree will be
        initialized with copies of those points.

        NOTE: This version of the code does indeed copy the points.  A
        future version might get around that.

        Args:
           new_points: List of points to use
        """

        if new_points != self._original_points:
            self._original_points = list(new_points)
            self._feature_vectors = [ convert_to_feature_vector(p) for p in self._original_points ]
            self._setup_tree()
Пример #7
0
    def points(self, new_points):
        """Populate the r-tree with a new set of points

        You must supply points (points in space or feature vectors)
        with dimension between 1 and 30.  A new R-tree will be
        initialized with copies of those points.

        NOTE: This version of the code does indeed copy the points.  A
        future version might get around that.

        Args:
           new_points: List of points to use
        """

        if new_points != self._original_points:
            self._original_points = list(new_points)
            self._feature_vectors = [
                convert_to_feature_vector(p) for p in self._original_points
            ]
            self._setup_tree()
Пример #8
0
 def find_nearest_neighbors(self, seed_point, num_neighbors):
     return self._tree.find_nearest_neighbors(convert_to_feature_vector(seed_point),
                                              num_neighbors)
Пример #9
0
 def find_points_in_box(self, min_corner, max_corner):
     return self._tree.find_points_in_box(
         convert_to_feature_vector(min_corner),
         convert_to_feature_vector(max_corner)
         )
Пример #10
0
 def find_nearest_neighbors(self, seed_point, num_neighbors):
     return self._tree.find_nearest_neighbors(
         convert_to_feature_vector(seed_point), num_neighbors)
Пример #11
0
 def find_points_in_box(self, min_corner, max_corner):
     return self._tree.find_points_in_box(
         convert_to_feature_vector(min_corner),
         convert_to_feature_vector(max_corner))
def get_features(trajectory):
    signature = []
    signature.append(cha(trajectory))
    signature.append(distance(trajectory[0], trajectory[len(trajectory)-1]))
    return convert_to_feature_vector(signature)
Пример #13
0
def compute_cluster_labels(feature_vectors, search_box_half_span, min_cluster_size):
    """Use DBSCAN to compute clusters for a set of points.

    DBSCAN is a clustering algorithm that looks for regions of high
    density in a set of points.  Connected regions of high density are
    identified as clusters.  Small regions of low density or even
    single points get identified as noise (belonging to no cluster).

    There are three arguments to the process.  First, you supply the points
    to cluster.  Second, you ask for cluster labels with respect to
    two parameters: the search box size (defining "nearby" points) and
    the minimum number of points that you're willing to call a
    cluster.

    You will get back a list of (vertex_id, cluster_id) pairs.  If you
    supplied a list of points as input the vertex IDs will be indices
    into that list.  If you supplied pairs of (my_vertex_id, point)
    instead, the vertex IDs will be whatever you supplied.

    """

    # Are we dealing with decorated points?
    decorated_points = False
    first_point = feature_vectors[0]
    vertex_ids = list(range(len(feature_vectors)))

    logger = logging.getLogger(__name__)
    logger.debug("Testing for point decoration.  First point: {}".format(
        first_point))
    try:
        if len(first_point) == 2 and len(first_point[0]) > 0:
            logger.debug(
                ("Points are decorated. First point: {}").format(
                    first_point))
            decorated_points = True
            vertex_ids = [ point[1] for point in feature_vectors ]
    except TypeError:
        # The second element of the point is something that doesn't
        # have a len().  It is probably a coordinate, meaning we've
        # got bare points.
        pass

    if not decorated_points:
        logger.debug("Points are not decorated", logger)
    if decorated_points:
        native_feature_vectors = [ convert_to_feature_vector(p[0]) for p in feature_vectors ]
    else:
        native_feature_vectors = [ convert_to_feature_vector(p) for p in feature_vectors ]

    native_box_half_span = convert_to_feature_vector(search_box_half_span)

    if decorated_points:
        point_size = len(first_point[0])
    else:
        point_size = len(first_point)

    cluster_engine_name = 'dbscan_learn_cluster_ids_{}'.format(point_size)
    dbscan_learn_cluster_labels = getattr(_dbscan_clustering, cluster_engine_name)
    integer_labels = dbscan_learn_cluster_labels(
        native_feature_vectors,
        native_box_half_span,
        min_cluster_size
        )

    final_labels = []
    for (vertex_index, cluster_id) in integer_labels:
        final_labels.append((vertex_ids[vertex_index], cluster_id))

    return final_labels