Exemplo n.º 1
0
    def touch_timeline(self):
        """
            Touch timeline tensor in interpolated

            It is supposed that times are included in filenames
            of interpolated in format *YYYYDDD*
        """
        int_data_dir_path = self.get_interpolated_path()
        iutils.guard(os.path.isdir(int_data_dir_path),
                     'Run touch_interpolated_data() before this.')

        filenames = [
            f for f in iutils.ls(int_data_dir_path) if
            self.unified_tensor_stem not in f and self.timeline_stem not in f
        ]
        timeline = []
        for f in filenames:
            m = re.search(r'\d{4}(\d{3})', f)
            if m:
                timeline.append(m.group(1))

        timeline = np.array([timeline], dtype=np.float)

        timeline_path = self.get_timeline_path(extension='npy')
        np.save(timeline_path, timeline)
        logger.info(f'timeline is created here: {timeline_path}')
Exemplo n.º 2
0
    def touch_unified_tensor(self, move_new_axis_to_end):
        """
            Unify all files from interpolated in 1 tensor and put it
            in the same directory as unified.npy
        """
        int_data_dir_path = self.get_interpolated_path()
        iutils.guard(os.path.isdir(int_data_dir_path),
                     'Run touch_interpolated_data() before this.')

        data_files = [
            f for f in iutils.ls(int_data_dir_path) if
            self.unified_tensor_stem not in f and self.timeline_stem not in f
        ]
        iutils.guard(all([f.split('.')[-1] == 'npy' for f in data_files]),
                     'Files in dir_path should have .npy ext')

        unified_tensor = []
        for f in data_files:
            d = np.load(f)
            unified_tensor.append(d)
        unified_tensor = np.array(unified_tensor)

        if move_new_axis_to_end:
            unified_tensor = np.moveaxis(unified_tensor, 0, -1)

        unified_tensor_path = self.get_unified_tensor_path(extension='npy')
        np.save(unified_tensor_path, unified_tensor)
        logger.info(f'unified_tensor is created here: {unified_tensor_path}')
Exemplo n.º 3
0
    def read_raw_data_files(self):
        data_files = iutils.ls(self.raw_data_dir)
        iutils.guard(all(d.split('.')[-1] == 'nc' for d in data_files),
                     'NetCDF format is only supported format')

        for raw_data_file in data_files:
            ds = nc.Dataset(raw_data_file, mode='r')

            nav_group = ds.groups['navigation_data']
            # Initially data is masked
            lons = nav_group.variables['longitude'][:]
            # I unmask it for further simpler usage
            lons = np.ma.getdata(lons)
            lats = nav_group.variables['latitude'][:]
            lats = np.ma.getdata(lats)

            geo_group = ds.groups['geophysical_data']
            inv_obj = geo_group.variables[self.investigated_obj][:]

            # Initially mask consists of: False - lake, True - land
            # I want: True - lake, False - land
            inv_obj_mask = np.invert(inv_obj.mask)

            # Unmask and place nan in land's points
            inv_obj.fill_value = np.nan
            inv_obj = inv_obj.filled()

            yield lons, lats, inv_obj, inv_obj_mask, raw_data_file
Exemplo n.º 4
0
    def preserve_best_day_only(self):
        """
            Preserves the best matrix for one day.

            Filenames in interpolated should be in format *YYYYDDD*.
            .npy extension is only supported.
        """
        static_grid_dir_path = self.get_static_grid_path()
        iutils.guard(os.path.isdir(static_grid_dir_path),
                     'Run touch_static_grid() before this.')

        int_data_dir_path = self.get_interpolated_path()
        iutils.guard(os.path.isdir(int_data_dir_path),
                     'Run touch_interpolated_data() before this.')

        mask_path = self.get_static_grid_mask_path()
        geo_obj_mask = np.load(mask_path)

        data_files = [
            f for f in iutils.ls(int_data_dir_path) if
            self.unified_tensor_stem not in f and self.timeline_stem not in f
        ]
        iutils.guard(
            all([f.split('.')[-1] == 'npy' for f in data_files]),
            'Interpolated chunks in interpolated/ should have .npy ext')

        final_data_files = []
        already_analyzed_days = []
        for f in data_files:
            # Pull day from filename
            day = int(
                re.search(f'^{int_data_dir_path}/' + r'[a-z]*\d{4}(\d{3})', f,
                          re.I).group(1))
            if day in already_analyzed_days:
                continue
            #  Choose all files for this specific day
            r_compiler = re.compile(
                f'^{int_data_dir_path}/' + r'[a-z]*\d{4}' + f'{day:03d}', re.I)
            filtered_data_files = list(filter(r_compiler.match, data_files))

            datasets = [np.load(f) for f in filtered_data_files]

            fullness, best_file = iutils.calculate_fullness(
                datasets[0], geo_obj_mask), filtered_data_files[0]
            for i, d in enumerate(datasets[1:], 1):
                new_fullness = iutils.calculate_fullness(d, geo_obj_mask)
                if new_fullness > fullness:
                    fullness = new_fullness
                    best_file = filtered_data_files[i]

            final_data_files.append(best_file)
            already_analyzed_days.append(day)

        files_to_del = [f for f in data_files if f not in final_data_files]

        for f in files_to_del:
            os.remove(f)

        logger.info(f'Best day is only kept in {int_data_dir_path}.')
Exemplo n.º 5
0
    def touch_interpolated_data(
            self,
            fullness_threshold,
            remove_low_fullness,
            interpolation_strategy  # Either radius or neighbours
    ):
        """
            Interpolate raw data in raw_data_dir to static grid into raw_data_dir/interpolated
        """
        mask = np.load(os.path.join(self.get_static_grid_path(),
                                    'mask.npy')).astype(np.bool)
        interpolated_dir = self.get_interpolated_path()
        os.makedirs(interpolated_dir, exist_ok=True)

        logger.info('Interpolating data.')
        raw_data_files_count = len(iutils.ls(self.raw_data_dir))
        for i, (raw_lons, raw_lats, raw_inv_obj, raw_inv_obj_mask,
                raw_data_file) in tqdm(enumerate(self.read_raw_data_files()),
                                       total=raw_data_files_count):
            raw_data_file_stem = Path(raw_data_file).stem

            if os.path.exists(
                    os.path.join(interpolated_dir,
                                 f'{raw_data_file_stem}.npy')):
                continue

            try:
                int_inv_obj = self.interpolate_raw_data_obj(
                    raw_lons, raw_lats, raw_inv_obj, raw_inv_obj_mask,
                    interpolation_strategy)
                fullness = iutils.calculate_fullness(int_inv_obj, mask)
            except Exception as e:
                logger.warning(f'{i} is empty. {e}')
                fullness = 0
                int_inv_obj = np.full(mask.shape, np.nan)

            if fullness_threshold and fullness < fullness_threshold:
                if remove_low_fullness:
                    os.remove(raw_data_file)
                continue

            np.save(
                os.path.join(interpolated_dir, f'{raw_data_file_stem}.npy'),
                int_inv_obj)

        logger.success(
            f'Interpolation is completed, interpolated data is here: {interpolated_dir}'
        )
Exemplo n.º 6
0
    def preserve_day_range_only(self, day_range):
        data_files = iutils.ls(self.raw_data_dir)
        iutils.guard(all(d.split('.')[-1] == 'nc' for d in data_files),
                     'NetCDF format is only supported format')

        final_data_files = []
        for day in day_range:
            #  Choose all files for specific day
            r_compiler = re.compile(
                f'^{self.raw_data_dir}/' + r'[a-z]*\d{4}' + f'{day:03d}', re.I)
            filtered_data_files = list(filter(r_compiler.match, data_files))

            final_data_files.extend(filtered_data_files)

        files_to_del = [f for f in data_files if f not in final_data_files]

        for f in files_to_del:
            os.remove(f)

        logger.info(
            f'Day range: {day_range} is only kept in {self.raw_data_dir}.')