def test_sync_ndays(self, repo_ledger, sink_fs: FileRepoFS, missing: Iterator[RepoObjectPath]) -> None: src_fs = mock.MagicMock() src_fs.find.side_effect = self.mock_find pipe: RepoPipe = RepoPipe(repo_ledger, src_fs, sink_fs) with mock.patch("edgar.utils.repo.file_repo_fs.FileRepoFS.iterate_missing") as m: m.return_value = missing pipe.sync() for m in missing: period_type = m.date_period_type() the_date = m.date() o: RepoObject = sink_fs.find(period_type, the_date) assert ' '.join([str(period_type), str(the_date)]) == next(o.inp(bufsize=1024)) assert o.exists() tracker: CallTracker = CallTracker() tracker.add_expected('next_period', []) tracker.add_expected('start' , [Date('2021-01-01')]) tracker.add_expected('record', [Date('2021-07-12'), DatePeriodType.DAY]) tracker.add_expected('record', [Date('2021-07-13'), DatePeriodType.DAY]) tracker.add_expected('record', [Date('2021-07-14'), DatePeriodType.DAY]) tracker.add_expected('end', [Date('2021-08-01')]) tracker.assertCalls(repo_ledger.mock_calls) (beg_date, end_date) = repo_ledger.next_period() assert sink_fs.find(DatePeriodType.DAY, beg_date) == None assert sink_fs.find(DatePeriodType.DAY, end_date) == None
def test_backfill_diff_quarters(self, from_date_str, to_date_str, elems): to_date: Date = Date(to_date_str) from_date: Date = Date(from_date_str) items: str = "" for date_period in to_date.backfill(from_date): items += str(date_period.period_type) assert items == elems
def test_backfill_same_date(self, from_date_str, to_date_str, has_backfill): to_date: Date = Date(to_date_str) from_date: Date = Date(from_date_str) for _ in to_date.backfill(from_date): assert has_backfill, "should return an empty iterator" return assert not has_backfill
def test_add_days(self, from_date_str: str, to_date_str: str, days: int) -> None: from_date: Date = Date(from_date_str) to_date: Date = Date(to_date_str) count: int = 0 while from_date <= to_date: from_date += 1 count += 1 assert count == days
def format(self, period_type: DatePeriodType, the_date: Date, **kwargs) -> List[str]: name_spec = self.__format.name_spec[period_type] path_spec = self.__format.path_spec eval_macros = dict(kwargs) for name, func in self.__macros.items(): eval_macros[name] = func(period_type, the_date) return [*[the_date.format(s, period_type, **eval_macros) for s in path_spec], the_date.format(name_spec, period_type, **eval_macros)]
def test_backfill_same_quarter(self, from_date_str: str, to_date_str: str, grain_expected: str, num_expected: int): to_date: Date = Date(to_date_str) from_date: Date = Date(from_date_str) had_results = False for date_period in to_date.backfill(from_date): had_results = True assert date_period.period_type == grain_expected assert date_period.num_days == num_expected assert date_period.start_date == from_date assert date_period.end_date == to_date assert had_results
def edgar_fs() -> tempfile.TemporaryDirectory: temp: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory( suffix="_edgar_fs") root: Path = Path(temp.name) for t in [DatePeriodType.DAY, DatePeriodType.QUARTER]: base: Path = root / str(t) base.mkdir() for s in EDGAR_QUARTER: qtr = s.split('-') dir: Path = base for i in range(2): dir = dir / qtr[i] dir.mkdir() if t == DatePeriodType.QUARTER: file: Path = dir / 'master.idx' with file.open(mode="w", buffering=2048) as fd: fd.write(str(file)) else: dt: Date = Date( date(int(qtr[0]), (int(qtr[1][3]) - 1) * 3 + 1, 1)) for _ in range(int(qtr[2])): file: Path = dir / dt.format('master{y}{m:02}{d:02}.idx') with file.open(mode="w", buffering=2048) as fd: fd.write(str(file)) dt.add_days(1) return temp
def test_sync_create_error(self, create, repo_ledger, sink_fs: FileRepoFS, missing: Iterator[RepoObjectPath]) -> None: with mock.patch("edgar.utils.repo.file_repo_fs.FileRepoFS.iterate_missing") as iterate_missing: iterate_missing.return_value = missing create.side_effect = self.mock_create src_fs = mock.MagicMock() src_fs.find.side_effect = self.mock_find pipe: RepoPipe = RepoPipe(repo_ledger, src_fs, sink_fs) pipe.sync() tracker: CallTracker = CallTracker() tracker.add_expected('next_period', []) tracker.add_expected('start', [Date('2021-01-01')]) tracker.add_expected('record', [Date('2021-07-12'), DatePeriodType.DAY]) tracker.add_expected('error', [Date('2021-07-13'), repr(FileExistsError())]) tracker.assertCalls(repo_ledger.mock_calls)
def iterate_missing(self, from_date: Date, to_date: Date) -> Iterator[RepoURI]: """ Identifies objects that are not in the repository or need to be updated for the given dates Parameters ---------- from_date: Date the start date to_date: Date the end date Returns ------- Iterator[str] an iterator for missing objects """ self.refresh() track_year, track_quarter = 0, 0 cur_holidays: us_holidays = None cur_date: Date = from_date.copy() for _ in range(to_date.diff_days(from_date)): (cur_year, cur_quarter, *_) = cur_date.tuple() if cur_year != track_year: # Moving to the first or to the next year cur_holidays = us_holidays(cur_year) track_year, track_quarter = cur_year, 0 if not (cur_date.is_weekend() or cur_date in cur_holidays): obj_path: RepoObjectPath = RepoObjectPath.from_date( DatePeriodType.DAY, cur_date, self.__format) if str(obj_path) not in self.__index: if cur_quarter != track_quarter: # Add a quartely file to the update list # only if it has not been added before yield RepoObjectPath.from_date(DatePeriodType.QUARTER, cur_date, self.__format) track_quarter = cur_quarter # Add a daily file to the update list yield obj_path # next date cur_date += 1
def __init__(self, year: int) -> None: self.list: List[Date] = [] self.names: Dict[str, str] = {} for i in [ # New Year Jan 1 (self.JANUARY, 1, 'New Year' 's Day'), # Independence Day July 4 (self.JULY, 4, 'Independency Day'), # Veterans Day Nov 11 (self.NOVEMBER, 11, 'Veterans Day'), # Christmas Day Dec 25 (self.DECEMBER, 25, 'Christmas Day') ]: d: Date = Date(date(year, i[0], i[1])) self.names[str(d)] = i[2] self.list.append(d) for i in [ # Martin Luther King, Jr. third Mon in Jan (self.JANUARY, self.MONDAY, self.THIRD_WEEK, 'Birthday of Martin Luther King, Jr.'), # Washington's Birthday third Mon in Feb (self.FEBRUARY, self.MONDAY, self.THIRD_WEEK, 'Washington' 's Birthday'), # Memorial Day last Mon in May (self.MAY, self.MONDAY, self.LAST_WEEK, 'Memorial Day'), # Labor Day first Mon in Sept (self.SEPTEMBER, self.MONDAY, self.FIRST_WEEK, 'Labor Day'), # Columbus Day second Mon in Oct (self.OCTOBER, self.MONDAY, self.SECOND_WEEK, 'Columbus Day'), # Thanksgiving Day fourth Thur in Nov (self.NOVEMBER, self.THURSDAY, self.FOURTH_WEEK, 'Thanksgiving Day' ), ]: d: Date = Date(date(year, i[0], 1)).nthday_of_nthweek(i[1], i[2]) self.names[str(d)] = i[3] self.list.append(d) for i in self.list: wd: int = i.isoweekday() if wd == self.SATURDAY: i += -1 elif wd == self.SUNDAY: i += 1
def test_macros(self, period_type: DatePeriodType, date_str: str, path_spec: List[str], name_spec: str, expected: str): formatter: RepoFormatter = RepoFormatter( RepoFormat({period_type: name_spec}, path_spec)) formatter[ 'z'] = lambda period_type, date: 'DAY' if period_type == DatePeriodType.DAY else 'QUARTER' assert '/'.join(formatter.format(period_type, Date(date_str))) == expected
def test_getitem(self, path: List[str], date_period: str, quarter: str, year: int, date_str: str) -> None: obj_path: RepoObjectPath = RepoObjectPath.from_list( path, self.REPO_FORMAT) assert obj_path[0] == date_period assert obj_path[1] == year assert obj_path[2] == quarter assert obj_path[3] == Date(date_str).format( 'master{y}{m:02}{d:02}.idx')
def test_init_with_list(self, path: List[str], date_period: str, quarter: int, year: int, date_str: str) -> None: obj_path: RepoObjectPath = RepoObjectPath.from_list( path, self.REPO_FORMAT) assert obj_path.date_period_type() == DatePeriodType.from_string( date_period) assert obj_path.year() == year assert obj_path.quarter() == quarter assert obj_path.date() == Date(date_str)
def test_record(self, ledger: DbRepoLedger) -> None: beg_ts: int = int(datetime.now().timestamp()) ledger.record(Date('2021-11-11'), DatePeriodType.DAY) end_ts: int = int(datetime.now().timestamp()) rows: list = ledger.dump() print(rows) assert len(rows) == 1 assert rows[0][0] == 'record' assert rows[0][1] == '2021-11-11' assert rows[0][2] == 'D' assert rows[0][3] >= beg_ts assert rows[0][3] <= end_ts
def test_sync_missing_error(self, iterate_missing, repo_ledger, sink_fs: FileRepoFS) -> None: src_fs = mock.MagicMock() src_fs.find.side_effect = self.mock_find error: FileNotFoundError = FileNotFoundError() iterate_missing.side_effect = error pipe: RepoPipe = RepoPipe(repo_ledger, src_fs, sink_fs) pipe.sync() tracker: CallTracker = CallTracker() tracker.add_expected('next_period', []) tracker.add_expected('start', [Date('2021-01-01')]) tracker.add_expected('error', [None, repr(error)] ) tracker.assertCalls(repo_ledger.mock_calls)
def date(self) -> Date: """ Returns the date for an object at the object path Returns ------- Date the date """ if not self.__date: params = parse(self.__format.name_spec[DatePeriodType.DAY], self.__list[-1]) self.__date = Date(date(int(params['y']), int(params['m']),int(params['d']))) return self.__date
def test_iterator(self) -> None: date_obj: Date = Date("2020-01-01") holidays: us_holidays = us_holidays(date_obj.year()) dates: Dict[str, bool] = {} for i in holidays: dates[str(i)] = True assert ("2020-01-01") in dates assert ("2020-01-20") in dates assert ("2020-02-17") in dates assert ("2020-05-25") in dates assert ("2020-07-03") in dates assert ("2020-09-07") in dates assert ("2020-11-11") in dates assert ("2020-11-26") in dates assert ("2020-12-25") in dates
def test_contains(self, date_str: str, expected_result: bool) -> None: date_obj: Date = Date(date_str) holidays: us_holidays = us_holidays(date_obj.year()) assert (date_obj in holidays) == expected_result
def test_find_object(self, period_type, date_str, expected) -> None: repo: HttpRepoFS = HttpRepoFS('https://www.sec.gov/Archives/edgar/', self.__formatter) obj: HttpRepoObject = repo.find(period_type, Date(date_str)) assert obj.as_uri() == expected
def test_kwargs(self, period_type: DatePeriodType, date_str: str, path_spec: List[str], name_spec: str, expected: str): formatter: RepoFormatter = RepoFormatter( RepoFormat({period_type: name_spec}, path_spec)) assert '/'.join(formatter.format(period_type, Date(date_str), z='X')) == expected
def test_quarter(self, date_str: str, expected_result: int): date_obj = Date(date_str) assert date_obj.quarter() == expected_result
def test_add_days(self, date_str: str, expected_result: str, days: int): date_obj: Date = Date(date_str) date_new: Date = date_obj.add_days(days) assert str(date_new) == expected_result
def test_format_args(self, date_str: str, date_period_type: DatePeriodType, args: Dict, format_spec: str, expected: str) -> None: date_obj = Date(date_str) assert date_obj.format(format_spec, date_period_type, **args) == expected
def test_format(self, date_str: str, format_spec: str, expected: str) -> None: date_obj = Date(date_str) assert date_obj.format(format_spec) == expected
def test_init_bad_format(self): with pytest.raises(ValueError): date_obj = Date("XXX")
def test_init_success(self, date_str): date_obj = Date(date_str) assert date_str == str(date_obj)
def is_weekend(self, date_str: str, expected_result: bool) -> None: date_obj: Date = Date(date_str) assert date_obj.is_weekend() == expected_result
def test_diff_days(self, from_date_str: str, to_date_str: str, expected_result: int): to_date: Date = Date(to_date_str) assert to_date.diff_days(Date(from_date_str)) == expected_result
def test_nthday_of_nthweek(self, date_str: str, dayofweek: int, whichweek: int, expected_result: str) -> None: date_obj: Date = Date(date_str) date_new: Date = date_obj.nthday_of_nthweek(dayofweek=dayofweek, whichweek=whichweek) assert str(date_new) == expected_result
def test_quarter_dates(self, date_str: str, expected_quarter_start: str, expected_quarter_end: str): date_obj: Date = Date(date_str) assert date_obj.quarter_dates() == (Date(expected_quarter_start), Date(expected_quarter_end))