def save_local_path(self, local_path: str, new_filename: str = None) -> Any: # TODO: we should perform a REGEX check to make sure that the filename is valid filename = os.path.basename(local_path) if not new_filename else new_filename datastream_name = DSUtil.get_datastream_name_from_filename(filename) dest_dir = DSUtil.get_datastream_directory(datastream_name=datastream_name, root=self._root) os.makedirs(dest_dir, exist_ok=True) # make sure the dest folder exists dest_path = os.path.join(dest_dir, filename) shutil.copy(local_path, dest_path) return dest_path
def save_local_path(self, local_path: str, new_filename: str = None): # TODO: we should perform a REGEX check to make sure that the filename is valid filename = os.path.basename( local_path) if not new_filename else new_filename datastream_name = DSUtil.get_datastream_name_from_filename(filename) subpath = DSUtil.get_datastream_directory( datastream_name=datastream_name) s3_path = self.root.join(subpath, filename) self.tmp.upload(local_path, s3_path) return s3_path
def find(self, datastream_name: str, start_time: str, end_time: str, filetype: str = None) -> List[str]: # TODO: think about refactoring so that you don't need both start and end time # TODO: if times don't include hours/min/sec, then add .000000 to the string dir_to_check = DSUtil.get_datastream_directory(datastream_name=datastream_name, root=self._root) storage_paths = [] if os.path.isdir(dir_to_check): for file in os.listdir(dir_to_check): if start_time <= DSUtil.get_date_from_filename(file) < end_time: storage_paths.append(os.path.join(dir_to_check, file)) if filetype is not None: filter_func = DatastreamStorage.file_filters[filetype] storage_paths = list(filter(filter_func, storage_paths)) return sorted(storage_paths)
def find(self, datastream_name: str, start_time: str, end_time: str, filetype: str = None) -> List[S3Path]: # TODO: think about refactoring so that you don't need both start and end time # TODO: if times don't include hours/min/sec, then add .000000 to the string subpath = DSUtil.get_datastream_directory( datastream_name=datastream_name) dir_to_check = self.root.join(subpath) storage_paths = [] for file in self.tmp.listdir(dir_to_check): if start_time <= DSUtil.get_date_from_filename( file.bucket_path) < end_time: storage_paths.append(file) if filetype is not None: filter_func = DatastreamStorage.file_filters[filetype] storage_paths = list(filter(filter_func, storage_paths)) return sorted(storage_paths)