예제 #1
0
    def save_local_path(self, local_path: str, new_filename: str = None) -> Any:
        # TODO: we should perform a REGEX check to make sure that the filename is valid
        filename = os.path.basename(local_path) if not new_filename else new_filename
        datastream_name = DSUtil.get_datastream_name_from_filename(filename)

        dest_dir = DSUtil.get_datastream_directory(datastream_name=datastream_name, root=self._root)
        os.makedirs(dest_dir, exist_ok=True)  # make sure the dest folder exists
        dest_path = os.path.join(dest_dir, filename)

        shutil.copy(local_path, dest_path)
        return dest_path
예제 #2
0
    def save_local_path(self, local_path: str, new_filename: str = None):
        # TODO: we should perform a REGEX check to make sure that the filename is valid
        filename = os.path.basename(
            local_path) if not new_filename else new_filename
        datastream_name = DSUtil.get_datastream_name_from_filename(filename)

        subpath = DSUtil.get_datastream_directory(
            datastream_name=datastream_name)
        s3_path = self.root.join(subpath, filename)

        self.tmp.upload(local_path, s3_path)
        return s3_path
예제 #3
0
    def find(self, datastream_name: str, start_time: str, end_time: str,
             filetype: str = None) -> List[str]:
        # TODO: think about refactoring so that you don't need both start and end time
        # TODO: if times don't include hours/min/sec, then add .000000 to the string
        dir_to_check = DSUtil.get_datastream_directory(datastream_name=datastream_name, root=self._root)
        storage_paths = []

        if os.path.isdir(dir_to_check):
            for file in os.listdir(dir_to_check):
                if start_time <= DSUtil.get_date_from_filename(file) < end_time:
                    storage_paths.append(os.path.join(dir_to_check, file))

            if filetype is not None:
                filter_func = DatastreamStorage.file_filters[filetype]
                storage_paths = list(filter(filter_func, storage_paths))

        return sorted(storage_paths)
예제 #4
0
    def find(self,
             datastream_name: str,
             start_time: str,
             end_time: str,
             filetype: str = None) -> List[S3Path]:
        # TODO: think about refactoring so that you don't need both start and end time
        # TODO: if times don't include hours/min/sec, then add .000000 to the string
        subpath = DSUtil.get_datastream_directory(
            datastream_name=datastream_name)
        dir_to_check = self.root.join(subpath)
        storage_paths = []

        for file in self.tmp.listdir(dir_to_check):
            if start_time <= DSUtil.get_date_from_filename(
                    file.bucket_path) < end_time:
                storage_paths.append(file)

        if filetype is not None:
            filter_func = DatastreamStorage.file_filters[filetype]
            storage_paths = list(filter(filter_func, storage_paths))

        return sorted(storage_paths)