def get_lats_lon(h5_data: h5py.File, h5_size: int): idx, lats, lons = 0, None, None while (lats is None or lons is None) and idx < h5_size: lats, lons = utils.fetch_hdf5_sample("lat", h5_data, idx), utils.fetch_hdf5_sample( "lon", h5_data, idx) idx += 1 return lats, lons
def get_lats_longs_goes13_coords(catalog): goes13_ds = 'hdf5_16bit_path' lats = None lons = None all_paths = list(catalog.groupby(goes13_ds).groups.keys()) i = 0 while lats is None and lons is None and i < len(all_paths): with h5py.File(all_paths[i], "r") as h5_data: offset = np.random.randint( 0, h5_data.attrs["global_dataframe_end_idx"] - h5_data.attrs["global_dataframe_start_idx"]) lats, lons = \ utils.fetch_hdf5_sample("lat", h5_data, offset), utils.fetch_hdf5_sample("lon", h5_data, offset) i = i + 1 assert lats is not None and lons is not None, 'No latitude and longitude values were found' return lats, lons
def get_image(self, timestamp: datetime, station_coords: tuple): path_offset = self.metadata.get_path(timestamp) if path_offset is None: #logger.warning(f'{timestamp} is unavailable. Returning empty image.') return np.zeros((self.image_size, self.image_size, 5)) path, offset = path_offset h5_data = h5py.File(path, "r") # Get latitude & longitude stored in the file lats, lons = utils.fetch_hdf5_sample("lat", h5_data, offset), utils.fetch_hdf5_sample( "lon", h5_data, offset) if lats is None or lons is None: #logger.warning(f'{timestamp} is unavailable. Returning empty image.') return np.zeros((self.image_size, self.image_size, 5)) # Get data from the 5 channels images = [] for channel in ('ch1', 'ch2', 'ch3', 'ch4', 'ch6'): img = utils.fetch_hdf5_sample(channel, h5_data, offset) if type(img) is np.ndarray: images.append(img) else: #logger.warning(f'Channel "{channel}" is not available at date {timestamp}, it will be zeros.') images.append(np.zeros((self.image_size, self.image_size))) # Crop image pixel_coords = (np.argmin(np.abs(lats - station_coords[0])), np.argmin(np.abs(lons - station_coords[1]))) pixels = self.image_size // 2 adjustement = self.image_size % 2 # Adjustement if image_size is odd cropped_images = [] for img, mean, std in zip(images, data.images_mean.values(), data.images_std.values()): img = (img - mean) / std # Normalize image cropped_images.append( img[pixel_coords[0] - pixels:pixel_coords[0] + pixels + adjustement, pixel_coords[1] - pixels:pixel_coords[1] + pixels + adjustement]) return np.moveaxis(np.array(cropped_images), 0, -1)
def fetch_frames(datetimes, frames_df, channels, seqs): output = np.empty( (len(datetimes), seqs, len(channels), IMAGE_HEIGHT, IMAGE_WIDTH)) paths_groups = frames_df.groupby('path', sort=False) for name, group in paths_groups: with h5py.File(name, "r") as h5_data: for index, row in group.iterrows(): position = row['position'] frame = row['offset'] for c, channel in enumerate(channels): channel_idx_data = utils.fetch_hdf5_sample( channel, h5_data, frame) if channel_idx_data is None or channel_idx_data.shape != ( IMAGE_HEIGHT, IMAGE_WIDTH): output[position[0], position[1], c] = 0 else: output[position[0], position[1], c] = tf.keras.utils.normalize(channel_idx_data) return output
def get_full_image(image_data): all_channels = np.empty( [cropped_img_size, cropped_img_size, len(channels)]) for ch_idx, channel in enumerate(channels): raw_img = utils.fetch_hdf5_sample(channel, image_data, image_time_offset_idx) if raw_img is None or raw_img.shape != (650, 1500): return None try: array_cropped = utils.crop(copy.deepcopy(raw_img), station_pixel_coords, cropped_img_size) except: return None # raw_data[array_idx, station_idx, channel_idx, ...] = cv.flip(array_cropped, 0) # TODO why the flip?? #array = (((array.astype(np.float32) - norm_min) / (norm_max - norm_min)) * 255).astype(np.uint8) # TODO norm? array_cropped = array_cropped.astype( np.float64) # convert to image format all_channels[:, :, ch_idx] = array_cropped return all_channels
def generator(self, data): lats = None lons = None c, path = data try: if os.path.isfile(path): with h5py.File(path, "r") as h5_data: file_data = [] for channel in ['ch1', 'ch2', 'ch3', 'ch4', 'ch6']: ch_images = [] for offset in range(96): channel_idx_data = utils.fetch_hdf5_sample( channel, h5_data, offset) if channel_idx_data is None or channel_idx_data.shape != ( IMAGE_HEIGHT, IMAGE_WIDTH): ch_images.append([ np.zeros((self.config.crop_size, self.config.crop_size)) ] * len(self.stations)) else: if not self.stations_px_center: i = 0 while lats is None or lons is None: lats = utils.fetch_hdf5_sample( "lat", h5_data, i) lons = utils.fetch_hdf5_sample( "lon", h5_data, i) i += 1 if lats is None or lons is None: continue def red_coords_to_px(aggre, s): lat, lon, _ = self.stations[s] px_lat = len(lats) * ( (lat - lats[0]) / (lats[-1] - lats[0])) px_lon = len(lons) * ( (lon - lons[0]) / (lons[-1] - lons[0])) del lat del lon aggre[s] = (int(px_lat), int(px_lon)) return aggre self.stations_px_center = functools.reduce( red_coords_to_px, self.stations, {}) del lats del lons gc.collect() def crop(s): center = self.stations_px_center[s] px_x_ = center[0] - self.px_offset px_x = center[0] + self.px_offset px_y_ = center[1] - self.px_offset px_y = center[1] + self.px_offset crop = channel_idx_data[px_x_:px_x, px_y_:px_y].copy() return crop ch_images.append( list(map(crop, self.stations_px_center))) yield ch_images finally: del ch_images gc.collect()
def create_crops(args: tuple): """Function executed by multiple threads to create crops around stations and save them to disk as pickle. Arguments: args {tuple} -- dest, paths, image_size, thread_nb """ dest, paths, image_size, thread_nb = args # Iterate over all the existing timestamps open_path = None curr_date = None for time, path, offset in tqdm(paths, position=thread_nb, desc=f'Thread {thread_nb}', leave=False): # Open hdf5 file if it is not already opened if open_path != path: if open_path != None: h5_data.close() h5_data = h5py.File(path, "r") open_path = path # Save cropped images if curr_date != time.date(): if curr_date is not None: with open(os.path.join(dest, str(curr_date) + '.pkl'), 'wb') as f: pickle.dump(cropped_images_day, f) cropped_images_day = {} curr_date = time.date() # Get latitude & longitude stored in the file lats, lons = utils.fetch_hdf5_sample("lat", h5_data, offset), utils.fetch_hdf5_sample( "lon", h5_data, offset) if lats is None or lons is None: logger.warning( f'latlong of date {time} is unavailable, skipping...') continue # Get data from the 5 channels images = [] for channel in ('ch1', 'ch2', 'ch3', 'ch4', 'ch6'): img = utils.fetch_hdf5_sample(channel, h5_data, offset) if type(img) is np.ndarray: images.append(img) if len(images) < 5: logger.warning( f'{5-len(images)} channels are not available at date {index}, skipping...' ) continue # Crop stations cropped_images_stations = {} for station_name, station_coords in stations.items(): pixel_coords = (np.argmin(np.abs(lats - station_coords[0])), np.argmin(np.abs(lons - station_coords[1]))) # Crop the images with the station centered pixels = image_size // 2 adjustement = image_size % 2 # Adjustement if image_size is odd cropped_images = [] for img, mean, std in zip(images, images_mean.values(), images_std.values()): # TODO : Check if the slice is out of bounds img = (img - mean) / std # Normalize image cropped_images.append( img[pixel_coords[0] - pixels:pixel_coords[0] + pixels + adjustement, pixel_coords[1] - pixels:pixel_coords[1] + pixels + adjustement]) cropped_images_stations[station_name] = np.moveaxis( np.array(cropped_images), 0, -1) cropped_images_day[time] = cropped_images_stations # Save the last day if len(cropped_images_day.keys()) > 0: with open(os.path.join(dest, str(time.date()) + '.pkl'), 'wb') as f: pickle.dump(cropped_images_day, f)