def fetch_previous_file(self, datastream_name: str, start_time: str) -> DisposableLocalTempFile: # fetch files one day previous and one day after start date (since find is exclusive) date = datetime.datetime.strptime(start_time, "%Y%m%d.%H%M%S") prev_date = (date - datetime.timedelta(days=1)).strftime("%Y%m%d.%H%M%S") next_date = (date + datetime.timedelta(days=1)).strftime("%Y%m%d.%H%M%S") files = self.find( datastream_name, prev_date, next_date, filetype=DatastreamStorage.default_file_type, ) dates = [DSUtil.get_date_from_filename(_file) for _file in files] previous_filepath = None if dates: i = bisect.bisect_left(dates, start_time) if i > 0: previous_filepath = files[i - 1] if previous_filepath is None: return DisposableLocalTempFile(previous_filepath) return self._tmp.fetch(previous_filepath)
def test_plotting_utilities(dataset): expected_filename = "test.SortedDataset.a1.20211001.000000.height.png" filename = DSUtil.get_plot_filename(dataset, "height", "png") filepath = os.path.join(STORAGE_PATH, "test.SortedDataset.a1", filename) assert filename == expected_filename assert DSUtil.get_date_from_filename(filepath) == "20211001.000000" DSUtil.plot_qc(dataset, "height_out", filepath) assert DSUtil.is_image(filepath) assert not DSUtil.is_image(PROCESSED_NC)
def find(self, datastream_name: str, start_time: str, end_time: str, filetype: str = None) -> List[str]: # TODO: think about refactoring so that you don't need both start and end time # TODO: if times don't include hours/min/sec, then add .000000 to the string dir_to_check = DSUtil.get_datastream_directory(datastream_name=datastream_name, root=self._root) storage_paths = [] if os.path.isdir(dir_to_check): for file in os.listdir(dir_to_check): if start_time <= DSUtil.get_date_from_filename(file) < end_time: storage_paths.append(os.path.join(dir_to_check, file)) if filetype is not None: filter_func = DatastreamStorage.file_filters[filetype] storage_paths = list(filter(filter_func, storage_paths)) return sorted(storage_paths)
def find(self, datastream_name: str, start_time: str, end_time: str, filetype: str = None) -> List[S3Path]: # TODO: think about refactoring so that you don't need both start and end time # TODO: if times don't include hours/min/sec, then add .000000 to the string subpath = DSUtil.get_datastream_directory( datastream_name=datastream_name) dir_to_check = self.root.join(subpath) storage_paths = [] for file in self.tmp.listdir(dir_to_check): if start_time <= DSUtil.get_date_from_filename( file.bucket_path) < end_time: storage_paths.append(file) if filetype is not None: filter_func = DatastreamStorage.file_filters[filetype] storage_paths = list(filter(filter_func, storage_paths)) return sorted(storage_paths)