def test_date_is_sanitized(self, monkeypatch): start_date = datetime.datetime(2012, 3, 1) end_date = datetime.datetime(2015, 1, 1) aapl = Filing(cik='aapl', filing_type=FilingType.FILING_10Q, count=10, start_date=start_date, end_date=end_date) assert aapl.params['dateb'] == '20150101' assert aapl.params['datea'] == '20120301' assert aapl.start_date == datetime.datetime(2012, 3, 1) assert aapl.end_date == datetime.datetime(2015, 1, 1)
def test_date_is_sanitized(self): start_date = datetime.datetime(2012, 3, 1) end_date = datetime.datetime(2015, 1, 1) aapl = Filing(cik_lookup="aapl", filing_type=FilingType.FILING_10Q, count=10, start_date=start_date, end_date=end_date) assert aapl.params["dateb"] == "20150101" assert aapl.params["datea"] == "20120301" assert aapl.start_date == datetime.datetime(2012, 3, 1) assert aapl.end_date == datetime.datetime(2015, 1, 1)
def test_filing_raises_warning_when_less_filings_than_count( self, monkeypatch, recwarn, count, raises_error, tmp_data_directory): monkeypatch.setattr(_CIKValidator, "get_ciks", MockCIKValidatorGetCIKs.get_ciks) monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFilingLimitedResponses(10)) f = Filing(cik_lookup=['aapl', 'msft', 'amzn'], filing_type=FilingType.FILING_10Q, count=count, client=NetworkClient(batch_size=10)) f.save(tmp_data_directory) if raises_error: w = recwarn.pop(UserWarning) assert issubclass(w.category, UserWarning) else: try: w = recwarn.pop(UserWarning) pytest.fail("Expected no UserWarning, but received one.") # Should raise assertion error since no UserWarning should be found except AssertionError: pass
from secedgar.filings import Filing, FilingType # 10Q filings for Apple (ticker "aapl") my_filings = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, count=15) my_filings.save('./data')
# In[8]: import nest_asyncio nest_asyncio.apply() # In[19]: from secedgar.filings import Filing, FilingType # 10Q filings for Apple (ticker "aapl") from secedgar.filings import Filing, FilingType my_filings = Filing(cik_lookup=['gme'], filing_type=FilingType.FILING_10K, count=1326380, user_agent='deeptendies') my_filings.save('filings') # # Parse Data # In[1]: import glob from secedgar.parser import MetaParser from pathlib import Path out_dir = 'parsed_filings' Path(out_dir).mkdir(parents=True, exist_ok=True)
def test_validate_cik_inside_filing(self, monkeypatch): monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKNotFound) with pytest.raises(EDGARQueryError): _ = Filing(cik_lookup='0notvalid0', filing_type=FilingType.FILING_10K).cik_lookup.ciks
def test_filing_save_multiple_ciks(self, tmp_data_directory, monkeypatch): monkeypatch.setattr(_CIKValidator, "get_ciks", MockCIKValidatorMultipleCIKs.get_ciks) monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFiling) f = Filing(['aapl', 'amzn', 'msft'], FilingType.FILING_10Q, count=3) f.save(tmp_data_directory)
def test_txt_urls(self, mock_cik_validator_get_single_cik, mock_single_cik_filing): aapl = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, count=10) first_txt_url = aapl.get_urls()['aapl'][0] assert first_txt_url.split('.')[-1] == 'txt'
def test_invalid_filing_type_types(self, bad_filing_type): with pytest.raises(FilingTypeError): Filing(cik_lookup='0000320193', filing_type=bad_filing_type)
def test_count_setter_on_init(self, count): filing = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, count=count) assert filing.count == count
def test_invalid_filing_type_types(self): for t in ('10j', '10-k', 'ssd', 'invalid', 1): with pytest.raises(FilingTypeError): Filing(cik='0000320193', filing_type=t)
def test_filing_get_urls_returns_single_list_of_urls(self, mock_cik_validator_get_multiple_ciks, mock_single_cik_filing): # Uses same response for filing links (will all be filings for aapl) f = Filing(cik_lookup=['aapl', 'msft', 'amzn'], filing_type=FilingType.FILING_10Q, count=5) assert all(len(f.get_urls().get(key)) == 5 for key in f.get_urls().keys())
def test_filing_simple_example(self, tmp_data_directory): my_filings = Filing(cik_lookup='IBM', filing_type=FilingType.FILING_10Q) my_filings.save(tmp_data_directory)
def test_filing_save_single_cik(self, tmp_data_directory, mock_cik_validator_get_single_cik, mock_single_cik_filing): f = Filing('aapl', FilingType.FILING_10Q, count=3) f.save(tmp_data_directory)
def test_filing_save_multiple_ciks(self, tmp_data_directory, mock_cik_validator_get_multiple_ciks, mock_single_cik_filing): f = Filing(['aapl', 'amzn', 'msft'], FilingType.FILING_10Q, count=3) f.save(tmp_data_directory)
def test_validate_cik_inside_filing(self, mock_single_cik_not_found): with pytest.raises(EDGARQueryError): _ = Filing(cik_lookup='0notvalid0', filing_type=FilingType.FILING_10K).cik_lookup.ciks
def test_validate_cik_type_inside_filing(self): with pytest.raises(TypeError): Filing(cik=1234567891011, filing_type=FilingType.FILING_10K) with pytest.raises(TypeError): Filing(cik=123.0, filing_type=FilingType.FILING_10K)
def test_filing_type_setter(self, new_filing_type): f = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q) f.filing_type = new_filing_type assert f.filing_type == new_filing_type
def test_good_start_date_setter_on_init(self, start_date): filing = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, start_date=start_date) assert filing.start_date == start_date
def test_end_date_setter(self, date, expected): f = Filing('aapl', FilingType.FILING_10Q, start_date=datetime.datetime( 2010, 1, 1), end_date=datetime.datetime(2015, 1, 1)) f.end_date = date assert f.end_date == date and f.params.get("dateb") == expected
def test_invalid_filing_type_enum(self): with pytest.raises(AttributeError): Filing(cik_lookup='0000320193', filing_type=FilingType.INVALID)
def test_bad_start_date_setter_on_init(self, bad_start_date): with pytest.raises(TypeError): Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, start_date=bad_start_date)
def test_bad_filing_type_setter(self, bad_filing_type): f = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q) with pytest.raises(FilingTypeError): f.filing_type = bad_filing_type
def test_count_setter_bad_values(self, count, expected_error): with pytest.raises(expected_error): Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, count=count)
def test_validate_cik_type_inside_filing(self, bad_cik_lookup): with pytest.raises(TypeError): Filing(cik_lookup=bad_cik_lookup, filing_type=FilingType.FILING_10K)
def __get_data(self, cik, filing_type, data_set): result = pd.DataFrame() filing_word_count = dict() my_filings = Filing(cik=str(cik), filing_type=filing_type) path = f'../data/company_filings/{cik}_{filing_type.value}/' if not os.path.exists(path): try: print( f'Fetching data for cik={cik}, filing_type={filing_type}') my_filings.save(path) except: try: if os.path.exists(path): shutil.rmtree(path) except OSError as e: print("Error: %s : %s" % (path, e.strerror)) else: print(f'Skipping data fetching. Using cache at {path}') for subdir, dirs, files in os.walk(path): for file in files: file_metadata = self.__get_file_metadata(f'{subdir}/{file}') for url in my_filings.get_urls(): if url.rsplit('/')[-1].strip() == file: file_metadata['url'] = url break assert len( file_metadata ) == 8, "Could not get all relevant metadata: %r" % file_metadata if file_metadata['year'] < 2007 or \ (file_metadata['form_type'] != '10-K' and file_metadata['form_type'] != '10-Q'): print( f'Skipping file. year={file_metadata["year"]} form_type={file_metadata["form_type"]}' ) continue violations_in_file, local_word_count = self.__get_violations_for_file( f'{subdir}/{file}') file_info = { 'cik': cik, 'firm name': file_metadata['company_name'], 'firm address': file_metadata['address'], 'zip code': str(file_metadata['zip']), 'year': file_metadata['year'], 'quarter': file_metadata['quarter'] if filing_type is FilingType.FILING_10Q else None, 'url': file_metadata['url'], 'filing type': filing_type.value, 'dataset': data_set, 'has covenant violation': 0 if violations_in_file == 0 else 1, 'total violations': violations_in_file } result = result.append(pd.DataFrame(file_info, index=[0])) for word in local_word_count: if word in filing_word_count: filing_word_count[word] = filing_word_count[ word] + local_word_count[word] else: filing_word_count[word] = local_word_count[word] return result, filing_word_count
def test_save_no_filings_raises_error(self, tmp_data_directory, monkeypatch, no_urls): monkeypatch.setattr(Filing, "get_urls", lambda x: no_urls) f = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10K) with pytest.raises(ValueError): f.save(tmp_data_directory)
from secedgar.utils import get_cik_map from secedgar.filings import Filing, FilingType print(list(get_cik_map().items())[:5]) my_filings = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, count=1) my_filings.save('/home/sroot/kaizha/temp')
def test_filing_save_single_cik(self, tmp_data_directory, monkeypatch): f = Filing('aapl', FilingType.FILING_10Q, count=3) monkeypatch.setattr(_CIKValidator, "get_ciks", MockCIKValidatorGetCIKs.get_ciks) monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFiling) f.save(tmp_data_directory)
from secedgar.filings import Filing, FilingType import os from tqdm import tqdm f = open('tickers.txt', 'r') tickers = [] for x in f: tickers.append(x) for ticker in tqdm(tickers): ticker = ticker[:-1] try: file_dir = os.getcwd() + '/filings/' my_filings = Filing(cik_lookup=ticker, filing_type=FilingType.FILING_10K, count=1) my_filings.save(file_dir) print(ticker + " 10k downloaded") except OSError as err: print("OS error: {0}".format(err)) print('Unable to download ' + ticker + ' 10k!')