def data(self) -> List[CalVT]: self.check() # If cache is enabled, then return cache directly if self.enable_read_cache: key = "orig_file" + str(self.uri) if key not in H["c"]: H["c"][key] = self._read_calendar() _calendar = H["c"][key] else: _calendar = self._read_calendar() if Freq(self._freq_file) != Freq(self.freq): _calendar = resam_calendar( np.array(list(map(pd.Timestamp, _calendar))), self._freq_file, self.freq, self.region) return _calendar
def _freq_file(self) -> str: """the freq to read from file""" if not hasattr(self, "_freq_file_cache"): freq = Freq(self.freq) if freq not in self.support_freq: # NOTE: uri # 1. If `uri` does not exist # - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri` # - Read data from `min_uri` and resample to `freq` freq = Freq.get_recent_freq(freq, self.support_freq) if freq is None: raise ValueError(f"can't find a freq from {self.support_freq} that can resample to {self.freq}!") self._freq_file_cache = freq return self._freq_file_cache
def data(self) -> List[CalVT]: # NOTE: uri # 1. If `uri` does not exist # - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri` # - Read data from `min_uri` and resample to `freq` try: self.check() _calendar = self._read_calendar() except ValueError: freq_list = self._get_storage_freq() _freq = Freq.get_recent_freq(self.freq, freq_list) if _freq is None: raise ValueError( f"can't find a freq from {freq_list} that can resample to {self.freq}!" ) self.file_name = f"{_freq}_future.txt" if self.future else f"{_freq}.txt".lower( ) # The cache is useful for the following cases # - multiple frequencies are sampled from the same calendar cache_key = self.uri if cache_key not in H["c"]: H["c"][cache_key] = self._read_calendar() _calendar = H["c"][cache_key] _calendar = resam_calendar( np.array(list(map(pd.Timestamp, _calendar))), _freq, self.freq) return _calendar
def support_freq(self) -> List[str]: _v = "_support_freq" if hasattr(self, _v): return getattr(self, _v) if len(self.provider_uri) == 1 and C.DEFAULT_FREQ in self.provider_uri: freq_l = filter( lambda _freq: not _freq.endswith("_future"), map(lambda x: x.stem, self.dpm.get_data_uri(C.DEFAULT_FREQ).joinpath("calendars").glob("*.txt")), ) else: freq_l = self.provider_uri.keys() freq_l = [Freq(freq) for freq in freq_l] setattr(self, _v, freq_l) return freq_l
def set_params(self, tdx_files, data_dir, freq): if self.limit_nums is None: self.csv_files = tdx_files else: self.csv_files = tdx_files[:int(self.limit_nums)] self.freq = str(Freq(freq)) self.calendar_format = self.DAILY_FORMAT if freq == "day" else self.HIGH_FREQ_FORMAT self._calendars_list = [] self._calendars_dir = data_dir.joinpath(self.CALENDARS_DIR_NAME) self._features_dir = data_dir.joinpath(self.FEATURES_DIR_NAME) self._instruments_dir = data_dir.joinpath(self.INSTRUMENTS_DIR_NAME)
def uri(self) -> Path: freq = self.freq if freq not in self.support_freq: # NOTE: uri # 1. If `uri` does not exist # - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri` # - Read data from `min_uri` and resample to `freq` freq = Freq.get_recent_freq(freq, self.support_freq) if freq is None: raise ValueError( f"can't find a freq from {self.support_freq} that can resample to {self.freq}!" ) self.resample_freq = freq return self.dpm.get_data_uri(self.use_freq).joinpath( f"{self.storage_name}s", self.file_name)