示例#1
0
 def test_date_is_sanitized(self, monkeypatch):
     start_date = datetime.datetime(2012, 3, 1)
     end_date = datetime.datetime(2015, 1, 1)
     aapl = Filing(cik='aapl',
                   filing_type=FilingType.FILING_10Q,
                   count=10,
                   start_date=start_date,
                   end_date=end_date)
     assert aapl.params['dateb'] == '20150101'
     assert aapl.params['datea'] == '20120301'
     assert aapl.start_date == datetime.datetime(2012, 3, 1)
     assert aapl.end_date == datetime.datetime(2015, 1, 1)
示例#2
0
 def test_date_is_sanitized(self):
     start_date = datetime.datetime(2012, 3, 1)
     end_date = datetime.datetime(2015, 1, 1)
     aapl = Filing(cik_lookup="aapl",
                   filing_type=FilingType.FILING_10Q,
                   count=10,
                   start_date=start_date,
                   end_date=end_date)
     assert aapl.params["dateb"] == "20150101"
     assert aapl.params["datea"] == "20120301"
     assert aapl.start_date == datetime.datetime(2012, 3, 1)
     assert aapl.end_date == datetime.datetime(2015, 1, 1)
示例#3
0
 def test_filing_raises_warning_when_less_filings_than_count(
         self, monkeypatch, recwarn, count, raises_error,
         tmp_data_directory):
     monkeypatch.setattr(_CIKValidator, "get_ciks",
                         MockCIKValidatorGetCIKs.get_ciks)
     monkeypatch.setattr(NetworkClient, "get_response",
                         MockSingleCIKFilingLimitedResponses(10))
     f = Filing(cik_lookup=['aapl', 'msft', 'amzn'],
                filing_type=FilingType.FILING_10Q,
                count=count,
                client=NetworkClient(batch_size=10))
     f.save(tmp_data_directory)
     if raises_error:
         w = recwarn.pop(UserWarning)
         assert issubclass(w.category, UserWarning)
     else:
         try:
             w = recwarn.pop(UserWarning)
             pytest.fail("Expected no UserWarning, but received one.")
         # Should raise assertion error since no UserWarning should be found
         except AssertionError:
             pass
示例#4
0
文件: p.py 项目: clojure-quant/edgar
from secedgar.filings import Filing, FilingType

# 10Q filings for Apple (ticker "aapl")

my_filings = Filing(cik_lookup='aapl',
                    filing_type=FilingType.FILING_10Q,
                    count=15)

my_filings.save('./data')
# In[8]:

import nest_asyncio

nest_asyncio.apply()

# In[19]:

from secedgar.filings import Filing, FilingType

# 10Q filings for Apple (ticker "aapl")
from secedgar.filings import Filing, FilingType

my_filings = Filing(cik_lookup=['gme'],
                    filing_type=FilingType.FILING_10K,
                    count=1326380,
                    user_agent='deeptendies')

my_filings.save('filings')

# # Parse Data

# In[1]:

import glob
from secedgar.parser import MetaParser
from pathlib import Path

out_dir = 'parsed_filings'
Path(out_dir).mkdir(parents=True, exist_ok=True)
示例#6
0
 def test_validate_cik_inside_filing(self, monkeypatch):
     monkeypatch.setattr(NetworkClient, "get_response",
                         MockSingleCIKNotFound)
     with pytest.raises(EDGARQueryError):
         _ = Filing(cik_lookup='0notvalid0',
                    filing_type=FilingType.FILING_10K).cik_lookup.ciks
示例#7
0
 def test_filing_save_multiple_ciks(self, tmp_data_directory, monkeypatch):
     monkeypatch.setattr(_CIKValidator, "get_ciks",
                         MockCIKValidatorMultipleCIKs.get_ciks)
     monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFiling)
     f = Filing(['aapl', 'amzn', 'msft'], FilingType.FILING_10Q, count=3)
     f.save(tmp_data_directory)
示例#8
0
 def test_txt_urls(self, mock_cik_validator_get_single_cik, mock_single_cik_filing):
     aapl = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, count=10)
     first_txt_url = aapl.get_urls()['aapl'][0]
     assert first_txt_url.split('.')[-1] == 'txt'
示例#9
0
 def test_invalid_filing_type_types(self, bad_filing_type):
     with pytest.raises(FilingTypeError):
         Filing(cik_lookup='0000320193', filing_type=bad_filing_type)
示例#10
0
 def test_count_setter_on_init(self, count):
     filing = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, count=count)
     assert filing.count == count
示例#11
0
 def test_invalid_filing_type_types(self):
     for t in ('10j', '10-k', 'ssd', 'invalid', 1):
         with pytest.raises(FilingTypeError):
             Filing(cik='0000320193', filing_type=t)
示例#12
0
 def test_filing_get_urls_returns_single_list_of_urls(self,
                                                      mock_cik_validator_get_multiple_ciks,
                                                      mock_single_cik_filing):
     # Uses same response for filing links (will all be filings for aapl)
     f = Filing(cik_lookup=['aapl', 'msft', 'amzn'], filing_type=FilingType.FILING_10Q, count=5)
     assert all(len(f.get_urls().get(key)) == 5 for key in f.get_urls().keys())
示例#13
0
 def test_filing_simple_example(self, tmp_data_directory):
     my_filings = Filing(cik_lookup='IBM', filing_type=FilingType.FILING_10Q)
     my_filings.save(tmp_data_directory)
示例#14
0
 def test_filing_save_single_cik(self, tmp_data_directory,
                                 mock_cik_validator_get_single_cik,
                                 mock_single_cik_filing):
     f = Filing('aapl', FilingType.FILING_10Q, count=3)
     f.save(tmp_data_directory)
示例#15
0
 def test_filing_save_multiple_ciks(self, tmp_data_directory,
                                    mock_cik_validator_get_multiple_ciks,
                                    mock_single_cik_filing):
     f = Filing(['aapl', 'amzn', 'msft'], FilingType.FILING_10Q, count=3)
     f.save(tmp_data_directory)
示例#16
0
 def test_validate_cik_inside_filing(self, mock_single_cik_not_found):
     with pytest.raises(EDGARQueryError):
         _ = Filing(cik_lookup='0notvalid0', filing_type=FilingType.FILING_10K).cik_lookup.ciks
示例#17
0
 def test_validate_cik_type_inside_filing(self):
     with pytest.raises(TypeError):
         Filing(cik=1234567891011, filing_type=FilingType.FILING_10K)
     with pytest.raises(TypeError):
         Filing(cik=123.0, filing_type=FilingType.FILING_10K)
示例#18
0
 def test_filing_type_setter(self, new_filing_type):
     f = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q)
     f.filing_type = new_filing_type
     assert f.filing_type == new_filing_type
示例#19
0
 def test_good_start_date_setter_on_init(self, start_date):
     filing = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, start_date=start_date)
     assert filing.start_date == start_date
示例#20
0
 def test_end_date_setter(self, date, expected):
     f = Filing('aapl', FilingType.FILING_10Q, start_date=datetime.datetime(
         2010, 1, 1), end_date=datetime.datetime(2015, 1, 1))
     f.end_date = date
     assert f.end_date == date and f.params.get("dateb") == expected
示例#21
0
 def test_invalid_filing_type_enum(self):
     with pytest.raises(AttributeError):
         Filing(cik_lookup='0000320193', filing_type=FilingType.INVALID)
示例#22
0
 def test_bad_start_date_setter_on_init(self, bad_start_date):
     with pytest.raises(TypeError):
         Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q,
                start_date=bad_start_date)
示例#23
0
 def test_bad_filing_type_setter(self, bad_filing_type):
     f = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q)
     with pytest.raises(FilingTypeError):
         f.filing_type = bad_filing_type
示例#24
0
 def test_count_setter_bad_values(self, count, expected_error):
     with pytest.raises(expected_error):
         Filing(cik_lookup='aapl',
                filing_type=FilingType.FILING_10Q,
                count=count)
示例#25
0
 def test_validate_cik_type_inside_filing(self, bad_cik_lookup):
     with pytest.raises(TypeError):
         Filing(cik_lookup=bad_cik_lookup,
                filing_type=FilingType.FILING_10K)
    def __get_data(self, cik, filing_type, data_set):
        result = pd.DataFrame()
        filing_word_count = dict()
        my_filings = Filing(cik=str(cik), filing_type=filing_type)
        path = f'../data/company_filings/{cik}_{filing_type.value}/'
        if not os.path.exists(path):
            try:
                print(
                    f'Fetching data for cik={cik}, filing_type={filing_type}')
                my_filings.save(path)
            except:
                try:
                    if os.path.exists(path):
                        shutil.rmtree(path)
                except OSError as e:
                    print("Error: %s : %s" % (path, e.strerror))
        else:
            print(f'Skipping data fetching. Using cache at {path}')
        for subdir, dirs, files in os.walk(path):
            for file in files:
                file_metadata = self.__get_file_metadata(f'{subdir}/{file}')
                for url in my_filings.get_urls():
                    if url.rsplit('/')[-1].strip() == file:
                        file_metadata['url'] = url
                        break
                assert len(
                    file_metadata
                ) == 8, "Could not get all relevant metadata: %r" % file_metadata
                if file_metadata['year'] < 2007 or \
                        (file_metadata['form_type'] != '10-K' and file_metadata['form_type'] != '10-Q'):
                    print(
                        f'Skipping file. year={file_metadata["year"]} form_type={file_metadata["form_type"]}'
                    )
                    continue
                violations_in_file, local_word_count = self.__get_violations_for_file(
                    f'{subdir}/{file}')

                file_info = {
                    'cik':
                    cik,
                    'firm name':
                    file_metadata['company_name'],
                    'firm address':
                    file_metadata['address'],
                    'zip code':
                    str(file_metadata['zip']),
                    'year':
                    file_metadata['year'],
                    'quarter':
                    file_metadata['quarter']
                    if filing_type is FilingType.FILING_10Q else None,
                    'url':
                    file_metadata['url'],
                    'filing type':
                    filing_type.value,
                    'dataset':
                    data_set,
                    'has covenant violation':
                    0 if violations_in_file == 0 else 1,
                    'total violations':
                    violations_in_file
                }
                result = result.append(pd.DataFrame(file_info, index=[0]))
                for word in local_word_count:
                    if word in filing_word_count:
                        filing_word_count[word] = filing_word_count[
                            word] + local_word_count[word]
                    else:
                        filing_word_count[word] = local_word_count[word]
        return result, filing_word_count
示例#27
0
 def test_save_no_filings_raises_error(self, tmp_data_directory,
                                       monkeypatch, no_urls):
     monkeypatch.setattr(Filing, "get_urls", lambda x: no_urls)
     f = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10K)
     with pytest.raises(ValueError):
         f.save(tmp_data_directory)
示例#28
0
from secedgar.utils import get_cik_map
from secedgar.filings import Filing, FilingType
print(list(get_cik_map().items())[:5])
my_filings = Filing(cik_lookup='aapl',
                    filing_type=FilingType.FILING_10Q,
                    count=1)
my_filings.save('/home/sroot/kaizha/temp')
示例#29
0
 def test_filing_save_single_cik(self, tmp_data_directory, monkeypatch):
     f = Filing('aapl', FilingType.FILING_10Q, count=3)
     monkeypatch.setattr(_CIKValidator, "get_ciks",
                         MockCIKValidatorGetCIKs.get_ciks)
     monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFiling)
     f.save(tmp_data_directory)
示例#30
0
from secedgar.filings import Filing, FilingType
import os
from tqdm import tqdm

f = open('tickers.txt', 'r')
tickers = []

for x in f:
    tickers.append(x)

for ticker in tqdm(tickers):
    ticker = ticker[:-1]
    try:
        file_dir = os.getcwd() + '/filings/'
        my_filings = Filing(cik_lookup=ticker,
                            filing_type=FilingType.FILING_10K,
                            count=1)
        my_filings.save(file_dir)
        print(ticker + " 10k downloaded")

    except OSError as err:
        print("OS error: {0}".format(err))
        print('Unable to download ' + ticker + ' 10k!')