def find_collocations(self, primary_data, secondary_data, max_interval, max_distance, **kwargs): timer = time.time() # Find all spatial collocations by brute-force: primary_points = geocentric2cart(typhon.constants.earth_radius, primary_data["lat"], primary_data["lon"]) secondary_points = geocentric2cart(typhon.constants.earth_radius, secondary_data["lat"], secondary_data["lon"]) distances = distance_matrix(np.column_stack(primary_points), np.column_stack(secondary_points)) primary_indices, secondary_indices = np.nonzero( distances < max_distance * 1000) logging.debug( "\tFound {} primary and {} secondary spatial collocations in " "{:.2f}s.".format(primary_indices.size, secondary_indices.size, time.time() - timer)) # Check the temporal condition: if max_interval is not None: intervals = \ np.abs(primary_data["time"][primary_indices] - secondary_data["time"][secondary_indices]) passed_time_check = intervals < max_interval primary_indices = primary_indices[passed_time_check] secondary_indices = secondary_indices[passed_time_check] return Array(primary_indices), Array(secondary_indices)
def test_geocentric2cart2geocentric(self): """Test conversion from geocentric to cartesian system and back.""" ref = (1, -13, 42) cart = geodesy.geocentric2cart(*ref) geo = geodesy.cart2geocentric(*cart) assert np.allclose(ref, geo)
def test_geocentric2cart(self): """Test conversion from cartesian to geocentric system.""" geocentric = (np.array([1, 1, 1]), np.array([0, 0, 90]), np.array([0, 90, 0])) # r # lat # lon reference = (np.array([1, 0, 0]), np.array([0, 1, 0]), np.array([0, 0, 1])) # x # y # z conversion = geodesy.geocentric2cart(*geocentric) assert np.allclose(conversion, reference)
def _to_metric(self, lat, lon): if not (isinstance(lat, np.ndarray) or isinstance(lon, np.ndarray)): raise ValueError("lat and lon must be numpy.ndarray objects (no " "pandas.Series or xarray.DataArray)!") if self.metric == "minkowski": return np.column_stack(geocentric2cart(earth_radius, lat, lon)) elif self.metric == "haversine": return np.radians(np.column_stack([lat, lon])) else: raise ValueError(f"Unknown metric '{self.metric}!'")
def test_geocentric2cart(self): """Test conversion from geocentric to cartesian system.""" geocentric = (np.array([1, 1, 1]), # r np.array([0, 0, 90]), # lat np.array([0, 90, 0]), # lon ) reference = (np.array([1, 0, 0]), # x np.array([0, 1, 0]), # y np.array([0, 0, 1]), # z ) conversion = geodesy.geocentric2cart(*geocentric) assert np.allclose(conversion, reference)
def find_collocations(self, primary_data, secondary_data, max_interval, max_distance, **kwargs): if max_interval is not None: max_interval = to_timedelta(max_interval) if max_distance is None: # Search for temporal collocations only primary_time = \ primary_data["time"].astype("M8[s]").astype("int") secondary_time = \ secondary_data["time"].astype("M8[s]").astype("int") # The BallTree implementation only allows 2-dimensional data, hence # we need to add an empty second dimension # TODO: Change this primary_points = np.column_stack( [primary_time, np.zeros_like(primary_time)]) secondary_points = np.column_stack( [secondary_time, np.zeros_like(secondary_time)]) max_radius = max_interval.total_seconds() else: # We try to find collocations by building one 3-d Ball tree # (see https://en.wikipedia.org/wiki/K-d_tree) and searching for # the nearest neighbours. Since a k-d tree cannot handle latitude / # longitude data, we have to convert them to 3D-cartesian # coordinates. This introduces an error of the distance calculation # since it is now the distance in a 3D euclidean space and not the # distance along the sphere any longer. When having two points with # a distance of 5 degrees in longitude, the error is smaller than # 177 meters. cart_points = geocentric2cart( 6371000.0, # typhon.constants.earth_radius, primary_data["lat"], primary_data["lon"]) primary_points = np.column_stack(cart_points) # We need to convert the secondary data as well: cart_points = geocentric2cart( 6371000.0, # typhon.constants.earth_radius, secondary_data["lat"], secondary_data["lon"]) secondary_points = np.column_stack(cart_points) # The parameter max_distance is in kilometers, so we have to # convert it to meters. max_radius = max_distance * 1000 # It is more efficient to build the tree with the largest data corpus: tree_with_primary = primary_points.size > secondary_points.size # Search for all collocations if tree_with_primary: tree = SklearnBallTree(primary_points, leaf_size=self.leaf_size) results = tree.query_radius(secondary_points, r=max_radius) # Build the list of the collocation pairs: pairs = np.array( [[primary_index, secondary_index] for secondary_index, primary_indices in enumerate(results) for primary_index in primary_indices]).T else: tree = SklearnBallTree(secondary_points, leaf_size=self.leaf_size) results = tree.query_radius(primary_points, r=max_radius) # Build the list of the collocation pairs: pairs = np.array( [[primary_index, secondary_index] for primary_index, secondary_indices in enumerate(results) for secondary_index in secondary_indices]).T # No collocations were found. if not pairs.any(): return pairs # Check here for temporal collocations: if max_distance is not None and max_interval is not None: # Check whether the time differences between the spatial # collocations are less than the temporal boundary: passed_time_check = np.abs(primary_data["time"][pairs[0]] - secondary_data["time"][pairs[1]] ) < np.timedelta64(max_interval) # Just keep all indices which satisfy the temporal condition. pairs = pairs[:, passed_time_check] return pairs