def create_all_polygons_on_grid(self): """ Create all polygons that are represented in a grid and store them in a new dic_grid key . """ operation = begin_operation('create_all_polygons_on_grid') try: print('\nCreating all polygons on virtual grid', flush=True) grid_polygon = np.array( [[None for i in range(self.grid_size_lon_x)] for j in range(self.grid_size_lat_y)]) lat_init = self.lat_min_y cell_size = self.cell_size_by_degree for i in progress_bar(range(self.grid_size_lat_y)): lon_init = self.lon_min_x for j in range(self.grid_size_lon_x): # Cria o polygon da célula grid_polygon[i][j] = Polygon(( (lat_init, lon_init), (lat_init + cell_size, lon_init), (lat_init + cell_size, lon_init + cell_size), (lat_init, lon_init + cell_size), )) lon_init += cell_size lat_init += cell_size self.grid_polygon = grid_polygon print('...geometries saved on Grid grid_polygon property') self.last_operation = end_operation(operation) except Exception as e: self.last_operation = end_operation(operation) raise e
def save_grid_pkl(self, filename): """ Save a grid with new file .pkl. Parameters ---------- filename : String Represents the name of a file. dic_grid : dict Contains informations about virtual grid, how - lon_min_x: longitude mínima. - lat_min_y: latitude miníma. - grid_size_lat_y: tamanho da grid latitude. - grid_size_lon_x: tamanho da longitude da grid. - cell_size_by_degree: tamanho da célula da Grid. """ operation = begin_operation('save_grid_pkl') try: with open(filename, 'wb') as f: joblib.dump(self.get_grid(), f) print('\nA file was saved') self.last_operation = end_operation(operation) except Exception as e: self.last_operation = end_operation(operation) raise e
def read_grid_pkl(self, filename): """ Read grid dict from a file .pkl. Parameters ---------- filename : String Represents the name of a file. Returns ------- dict Contains informations about virtual grid, how - lon_min_x: longitude mínima. - lat_min_y: latitude miníma. - grid_size_lat_y: tamanho da grid latitude. - grid_size_lon_x: tamanho da longitude da grid. - cell_size_by_degree: tamanho da célula da Grid. """ operation = begin_operation('read_grid_pkl') try: with open(filename, 'rb') as f: dict_grid = joblib.load(f) self.last_operation = end_operation(operation) return dict_grid except Exception as e: self.last_operation = end_operation(operation) raise e
def create_update_index_grid_feature(self, data, label_dtype=np.int64, sort=True): """ Create or update index grid feature. It'srs not necessary pass dic_grid, because if don't pass, the function create a dic_grid. Parameters ---------- data : pandas.core.frame.DataFrame Represents the dataset with contains lat, long and datetime. label_dtype : String Represents the type_ of a value of new column in dataframe. sort : boolean Represents the state of dataframe, if is sorted. """ operation = begin_operation('create_update_index_grid_feature') print('\nCreating or updating index of the grid feature..\n') try: if sort: data.sort_values([TRAJ_ID, DATETIME], inplace=True) lat_, lon_ = self.point_to_index_grid(data[LATITUDE], data[LONGITUDE]) data[INDEX_GRID_LAT] = label_dtype(lat_) data[INDEX_GRID_LON] = label_dtype(lon_) self.last_operation = end_operation(operation) except Exception as e: self.last_operation = end_operation(operation) raise e
def create_all_polygons_to_all_point_on_grid(self, data, unique_index=True): """ Create all polygons to all points represented in a grid. Parameters ---------- data : pandas.core.frame.DataFrame Represents the dataset with contains lat, long and datetime. unique_index: boolean How to index the grid Returns ------- pandas.core.frame.DataFrame Represents the same dataset with new key 'polygon' where polygons were saved. """ operation = begin_operation('create_all_polygons_to_all_point_on_grid') try: self.create_update_index_grid_feature(data, unique_index=False) print(data) datapolygons = data.loc[ :, ['id', 'index_grid_lat', 'index_grid_lon'] ].drop_duplicates() size = datapolygons.shape[0] # transform series in numpyarray index_grid_lat = np.array(data['index_grid_lat']) index_grid_lon = np.array(data['index_grid_lon']) # transform series in numpyarray polygons = np.array([]) for i in progress_bar(range(size)): p = self.create_one_polygon_to_point_on_grid( index_grid_lat[i], index_grid_lon[i] ) polygons = np.append(polygons, p) print('...polygons were created') datapolygons['polygon'] = polygons self.last_operation = end_operation(operation) return datapolygons except Exception as e: self.last_operation = end_operation(operation) print('size:{}, i:{}'.format(size, i)) raise e
def create_all_polygons_on_grid(self): """ Create all polygons that are represented in a grid. Stores the polygons in the `grid_polygon` key """ operation = begin_operation('create_all_polygons_on_grid') logger.debug('\nCreating all polygons on virtual grid') grid_polygon = np.array([[None for _ in range(self.grid_size_lon_x)] for _ in range(self.grid_size_lat_y)]) lat_init = self.lat_min_y cell_size = self.cell_size_by_degree for i in progress_bar(range(self.grid_size_lat_y), desc='Creating polygons'): lon_init = self.lon_min_x for j in range(self.grid_size_lon_x): # Cria o polygon da célula grid_polygon[i][j] = Polygon( ((lon_init, lat_init), (lon_init, lat_init + cell_size), (lon_init + cell_size, lat_init + cell_size), (lon_init + cell_size, lat_init))) lon_init += cell_size lat_init += cell_size self.grid_polygon = grid_polygon logger.debug('...geometries saved on Grid grid_polygon property') self.last_operation = end_operation(operation)
def create_all_polygons_to_all_point_on_grid(self, data: DataFrame) -> DataFrame: """ Create all polygons to all points represented in a grid. Parameters ---------- data : DataFrame Represents the dataset with contains lat, long and datetime Returns ------- DataFrame Represents the same dataset with new key 'polygon' where polygons were saved. """ operation = begin_operation('create_all_polygons_to_all_point_on_grid') if INDEX_GRID_LAT not in data or INDEX_GRID_LON not in data: self.create_update_index_grid_feature(data, unique_index=False) datapolygons = data[[TRAJ_ID, INDEX_GRID_LAT, INDEX_GRID_LON]].drop_duplicates() polygons = datapolygons.apply( lambda row: self.create_one_polygon_to_point_on_grid( row[INDEX_GRID_LAT], row[INDEX_GRID_LON]), axis=1) logger.debug('...polygons were created') datapolygons['polygon'] = polygons self.last_operation = end_operation(operation) return datapolygons
def point_to_index_grid(self, event_lat: float, event_lon: float) -> tuple[int, int]: """ Locate the coordinates x and y in a grid of point (lat, long). Parameters ---------- event_lat : float Represents the latitude of a point event_lon : float Represents the longitude of a point Returns ------- Tuple[int, int] Represents the index y in a grid of a point (lat, long) Represents the index x in a grid of a point (lat, long) """ operation = begin_operation('create_all_polygons_to_all_point_on_grid') indexes_lat_y = np.floor((np.float64(event_lat) - self.lat_min_y) / self.cell_size_by_degree) indexes_lon_x = np.floor((np.float64(event_lon) - self.lon_min_x) / self.cell_size_by_degree) logger.debug('...[%s,%s] indexes were created to lat and lon' % (indexes_lat_y.size, indexes_lon_x.size)) self.last_operation = end_operation(operation) return indexes_lat_y, indexes_lon_x
def create_one_polygon_to_point_on_grid(self, index_grid_lat: int, index_grid_lon: int) -> Polygon: """ Create one polygon to point on grid. Parameters ---------- index_grid_lat : int Represents index of grid that reference latitude. index_grid_lon : int Represents index of grid that reference longitude. Returns ------- Polygon Represents a polygon of this cell in a grid. """ operation = begin_operation('create_one_polygon_to_point_on_grid') cell_size = self.cell_size_by_degree lat_init = self.lat_min_y + cell_size * index_grid_lat lon_init = self.lon_min_x + cell_size * index_grid_lon polygon = Polygon( ((lon_init, lat_init), (lon_init, lat_init + cell_size), (lon_init + cell_size, lat_init + cell_size), (lon_init + cell_size, lat_init))) self.last_operation = end_operation(operation) return polygon
def _create_virtual_grid(self, data: DataFrame, cell_size: float, meters_by_degree: float): """ Create a virtual grid based in dataset bound box. Parameters ---------- data : DataFrame Represents the dataset with contains lat, long and datetime cell_size : float Size of grid cell meters_by_degree : float Represents the meters degree of latitude """ operation = begin_operation('_create_virtual_grid') bbox = data.get_bbox() print('\nCreating a virtual grid without polygons') # Latitude in Fortaleza: -3.8162973555 cell_size_by_degree = cell_size / meters_by_degree print('...cell size by degree: %s' % cell_size_by_degree) lat_min_y = bbox[0] lon_min_x = bbox[1] lat_max_y = bbox[2] lon_max_x = bbox[3] # If cell size does not fit in the grid area, an expansion is made if math.fmod((lat_max_y - lat_min_y), cell_size_by_degree) != 0: lat_max_y = lat_min_y + cell_size_by_degree * (math.floor( (lat_max_y - lat_min_y) / cell_size_by_degree) + 1) if math.fmod((lon_max_x - lon_min_x), cell_size_by_degree) != 0: lon_max_x = lon_min_x + cell_size_by_degree * (math.floor( (lon_max_x - lon_min_x) / cell_size_by_degree) + 1) # adjust grid size to lat and lon grid_size_lat_y = int( round((lat_max_y - lat_min_y) / cell_size_by_degree)) grid_size_lon_x = int( round((lon_max_x - lon_min_x) / cell_size_by_degree)) print('...grid_size_lat_y:%s\ngrid_size_lon_x:%s' % (grid_size_lat_y, grid_size_lon_x)) self.lon_min_x = lon_min_x self.lat_min_y = lat_min_y self.grid_size_lat_y = grid_size_lat_y self.grid_size_lon_x = grid_size_lon_x self.cell_size_by_degree = cell_size_by_degree print('\n..A virtual grid was created') self.last_operation = end_operation(operation)
def create_update_index_grid_feature( self, data: DataFrame, unique_index: Optional[bool] = True, label_dtype: Optional[Callable] = np.int64, sort: Optional[bool] = True): """ Create or update index grid feature. It not necessary pass dic_grid, because if don't pass, the function create a dic_grid. Parameters ---------- data : DataFrame Represents the dataset with contains lat, long and datetime. unique_index: bool, optional How to index the grid, by default True label_dtype : Optional[Callable], optional Represents the type of a value of new column in dataframe, by default np.int64 sort : bool, optional Represents if needs to sort the dataframe, by default True """ operation = begin_operation('create_update_index_grid_feature') print('\nCreating or updating index of the grid feature..\n') try: if sort: data.sort_values([TRAJ_ID, DATETIME], inplace=True) lat_, lon_ = self.point_to_index_grid(data[LATITUDE], data[LONGITUDE]) lat_, lon_ = label_dtype(lat_), label_dtype(lon_) dict_grid = self.get_grid() if unique_index: data[INDEX_GRID] = lon_ * dict_grid['grid_size_lat_y'] + lat_ else: data[INDEX_GRID_LAT] = lat_ data[INDEX_GRID_LON] = lon_ self.last_operation = end_operation(operation) except Exception as e: self.last_operation = end_operation(operation) raise e
def save_grid_pkl(self, filename: str): """ Save a grid with new file .pkl. Parameters ---------- filename : Text Represents the name of a file. """ operation = begin_operation('save_grid_pkl') with open(filename, 'wb') as f: joblib.dump(self.get_grid(), f) self.last_operation = end_operation(operation)
def discretize_based_grid(self, region_size: int = 1000): """ Discrete space in cells of the same size, assigning a unique id to each cell. Parameters ---------- region_size: int, optional Size of grid cell, by default 1000 """ operation = begin_operation('discretize based on grid') logger.debug('\nDiscretizing dataframe...') grid = Grid(self, cell_size=region_size) grid.create_update_index_grid_feature(self) self.reset_index(drop=True, inplace=True) self.last_operation = end_operation(operation)
def read_grid_pkl(self, filename: str) -> 'Grid': """ Read grid dict from a file .pkl. Parameters ---------- filename : str Represents the name of a file. Returns ------- Grid Grid object containing informations about virtual grid """ operation = begin_operation('read_grid_pkl') with open(filename, 'rb') as f: dict_grid = joblib.load(f) grid = Grid(data=dict_grid) self.last_operation = end_operation(operation) return grid
def generate_prev_local_features( self, label_id: str = TRAJ_ID, local_label: str = LOCAL_LABEL, sort: bool = True, inplace: bool = True) -> 'PandasDiscreteMoveDataFrame' | None: """ Create a feature prev_local with the label of previous local to current point. Parameters ---------- label_id : str, optional Represents name of column of trajectory id, by default TRAJ_ID local_label : str, optional Indicates name of column of place labels on symbolic trajectory, by default LOCAL_LABEL sort : bool, optional Wether the dataframe will be sorted, by default True inplace : bool, optional Represents whether the operation will be performed on the data provided or in a copy, by default True Returns ------- PandasDiscreteMoveDataFrame Object with new features or None """ operation = begin_operation('generate_prev_equ_feature') if inplace: data_ = self else: data_ = self.copy() ids, size_id, idx = self._prepare_generate_data(self, sort, label_id) message = '\nCreating generate_prev_equ_feature in previous equ\n' logger.debug(message) if (data_[local_label].dtype == 'int'): data_[local_label] = data_[local_label].astype(np.float16) for idx in progress_bar(ids, desc=f'Generating previous {local_label}'): current_local = data_.at[idx, local_label] current_local = np.array(current_local) size_id = current_local.size if size_id <= 1: data_.at[idx, PREV_LOCAL] = np.nan else: prev_local = shift(current_local, 1) # previous to current point data_.at[idx, PREV_LOCAL] = prev_local data_.reset_index(inplace=True) data_.last_operation = end_operation(operation) if not inplace: return data_
def show_grid_polygons( self, data: DataFrame, markersize: Optional[float] = 10, linewidth: Optional[float] = 2, figsize: Optional[Tuple[int, int]] = (10, 10), return_fig: Optional[bool] = True, save_fig: Optional[bool] = False, name: Optional[Text] = 'grid.png', ) -> Optional[figure]: """ Generate a visualization with grid polygons. Parameters ---------- data : DataFrame Input trajectory data markersize : float, optional Represents visualization size marker, by default 10 linewidth : float, optional Represents visualization size line, by default 2 figsize : tuple(int, int), optional Represents the size (float: width, float: height) of a figure, by default (10, 10) return_fig : bool, optional Represents whether or not to save the generated picture, by default True save_fig : bool, optional Wether to save the figure, by default False name : str, optional Represents name of a file, by default 'grid.png' Returns ------- Optional[figure] The generated picture or None Raises ------ If the dataframe does not contains the POLYGON feature IndexError If there is no user with the id passed """ if POLYGON not in data: raise KeyError('POLYGON feature not in dataframe') data.dropna(subset=[POLYGON], inplace=True) operation = begin_operation('show_grid_polygons') fig = plt.figure(figsize=figsize) for _, row in data.iterrows(): xs, ys = row[POLYGON].exterior.xy plt.plot(ys, xs, 'g', linewidth=linewidth, markersize=markersize) xs_start, ys_start = data.iloc[0][POLYGON].exterior.xy xs_end, ys_end = data.iloc[-1][POLYGON].exterior.xy plt.plot(ys_start, xs_start, 'bo', markersize=markersize * 1.5) plt.plot(ys_end, xs_end, 'bX', markersize=markersize * 1.5) # start point if save_fig: plt.savefig(fname=name, fig=fig) self.last_operation = end_operation(operation) if return_fig: return fig
def show_grid_polygons( self, data, id_, figsize=(10, 10), return_fig=True, save_fig=False, name='grid.png', ): """ Generate a visualization with grid polygons. Parameters ---------- data : pymove.core.MoveDataFrameAbstract subclass. Input trajectory data. id_ : String Represents the id. figsize : tuple Represents the size (float: width, float: height) of a figure. return_fig : bool, optional, default True. Represents whether or not to save the generated picture. save_fig : bool, optional, default False. Represents whether or not to save the generated picture. name : String, optional, default 'grid.png'. Represents name of a file. Returns ------- matplotlib.pyplot.figure or None The generated picture. Raises ------ KeyError If the dataframe does not contains the POLYGON feature IndexError If there is no user with the id passed """ print(TRAJ_ID, TID) if POLYGON not in data: raise KeyError('POLYGON feature not in dataframe') df_ = data[data[TRAJ_ID] == id_] if not len(df_): raise IndexError('No user with id %s in dataframe' % id_) operation = begin_operation('show_grid_polygons') fig = plt.figure(figsize=figsize) xs_start, ys_start = df_.iloc[0][POLYGON].exterior.xy plt.plot(ys_start, xs_start, 'bo', markersize=20) # start point for idx in range(df_.shape[0]): if not isinstance(df_[POLYGON].iloc[idx], float): xs, ys = df_[POLYGON].iloc[idx].exterior.xy plt.plot(ys, xs, 'g', linewidth=2, markersize=5) if save_fig: plt.savefig(fname=name, fig=fig) self.last_operation = end_operation(operation) if return_fig: return fig
def generate_prev_local_features( self, label_id: Optional[Text] = TRAJ_ID, local_label: Optional[Text] = LOCAL_LABEL, sort: Optional[bool] = True, inplace: Optional[bool] = True ) -> Optional['PandasDiscreteMoveDataFrame']: """ Create a feature prev_local with the label of previous local to current point. Parameters ---------- label_id : str, optional Represents name of column of trajectory id, by default TRAJ_ID local_label : str, optional Indicates name of column of place labels on symbolic trajectory, by default LOCAL_LABEL sort : bool, optional Wether the dataframe will be sorted, by default True inplace : bool, optional Represents whether the operation will be performed on the data provided or in a copy, by default True Returns ------- PandasDiscreteMoveDataFrame Object with new features or None """ operation = begin_operation('generate_prev_equ_feature') columns = set(self.columns) ids, sum_size_id, size_id, idx = self._prepare_generate_data( self, sort, label_id) try: message = '\nCreating generate_prev_equ_feature' message += ' in previous equ\n' print(message) if (self[local_label].dtype == 'int'): self[local_label] = self[local_label].astype(np.float16) for idx in progress_bar( ids, desc='Generating previous {}'.format(local_label)): current_local = self.at[idx, local_label] current_local = np.array(current_local) size_id = current_local.size if size_id <= 1: self.at[idx, PREV_LOCAL] = np.nan else: prev_local = shift(current_local, 1) # previous to current point self.at[idx, PREV_LOCAL] = prev_local return self._return_generated_data(self, columns, operation, inplace) except Exception as e: print('label_tid:%s\nidx:%s\nsize_id:%s\nsum_size_id:%s' % (label_id, idx, size_id, sum_size_id)) self.last_operation = end_operation(operation) raise e