Пример #1
0
 def test_filing_get_urls_returns_single_list_of_urls(self, monkeypatch):
     monkeypatch.setattr(_CIKValidator, "get_ciks", MockCIKValidatorMultipleCIKs.get_ciks)
     # Use same response for each request
     monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFiling)
     ciks = CIKLookup(['aapl', 'msft', 'amzn'])
     f = Filing(ciks, FilingType.FILING_10Q, count=3)
     assert all(len(f.get_urls().get(key)) == 3 for key in f.get_urls().keys())
Пример #2
0
 def test_filing_returns_correct_number_of_urls(self,
                                                count,
                                                mock_cik_validator_get_multiple_ciks,
                                                mock_single_cik_filing):
     # Uses same response for filing links (will all be filings for aapl)
     f = Filing(cik_lookup=['aapl', 'msft', 'amzn'], filing_type=FilingType.FILING_10Q,
                count=count, client=NetworkClient(batch_size=10))
     assert all(len(f.get_urls().get(key)) == count for key in f.get_urls().keys())
Пример #3
0
 def test_filing_get_urls_returns_single_list_of_urls(
         self, mock_cik_validator_get_multiple_ciks,
         mock_single_cik_filing):
     # Uses same response for filing links (will all be filings for aapl)
     f = Filing(cik_lookup=["aapl", "msft", "amzn"],
                filing_type=FilingType.FILING_10Q,
                count=5)
     assert all(
         len(f.get_urls().get(key)) == 5 for key in f.get_urls().keys())
Пример #4
0
 def test_filing_returns_correct_number_of_urls(self, monkeypatch, count):
     monkeypatch.setattr(_CIKValidator, "get_ciks",
                         MockCIKValidatorMultipleCIKs.get_ciks)
     # Use same response for each request
     monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFiling)
     f = Filing(cik_lookup=['aapl', 'msft', 'amzn'],
                filing_type=FilingType.FILING_10Q,
                count=count,
                client=NetworkClient(batch_size=10))
     assert all(
         len(f.get_urls().get(key)) == count for key in f.get_urls().keys())
Пример #5
0
 def test_txt_urls(self, monkeypatch):
     aapl = Filing(cik='aapl', filing_type=FilingType.FILING_10Q, count=10)
     monkeypatch.setattr(CIKValidator, "get_ciks",
                         MockCIKValidatorGetCIKs.get_ciks)
     monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFiling)
     first_txt_url = aapl.get_urls()[0]
     assert first_txt_url.split('.')[-1] == 'txt'
Пример #6
0
 def test_txt_urls(self, mock_cik_validator_get_single_cik,
                   mock_single_cik_filing):
     aapl = Filing(cik_lookup="aapl",
                   filing_type=FilingType.FILING_10Q,
                   count=10)
     first_txt_url = aapl.get_urls()["aapl"][0]
     assert first_txt_url.split(".")[-1] == "txt"
Пример #7
0
 def test_count_returns_exact(self, monkeypatch):
     aapl = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, count=10)
     monkeypatch.setattr(_CIKValidator, "get_ciks", MockCIKValidatorGetCIKs.get_ciks)
     monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFiling)
     urls = aapl.get_urls()['aapl']
     if len(urls) != aapl.client.count:
         raise AssertionError("""Count should return exact number of filings.
                              Got {0}, but expected {1} URLs.""".format(
                 urls, aapl.client.count))
Пример #8
0
 def test_count_returns_exact(self,
                              mock_cik_validator_get_single_cik,
                              mock_single_cik_filing):
     count = 10
     aapl = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, count=count)
     urls = aapl.get_urls()['aapl']
     if len(urls) != count:
         raise AssertionError("""Count should return exact number of filings.
                              Got {0}, but expected {1} URLs.""".format(
             urls, count))
    def __get_data(self, cik, filing_type, data_set):
        result = pd.DataFrame()
        filing_word_count = dict()
        my_filings = Filing(cik=str(cik), filing_type=filing_type)
        path = f'../data/company_filings/{cik}_{filing_type.value}/'
        if not os.path.exists(path):
            try:
                print(
                    f'Fetching data for cik={cik}, filing_type={filing_type}')
                my_filings.save(path)
            except:
                try:
                    if os.path.exists(path):
                        shutil.rmtree(path)
                except OSError as e:
                    print("Error: %s : %s" % (path, e.strerror))
        else:
            print(f'Skipping data fetching. Using cache at {path}')
        for subdir, dirs, files in os.walk(path):
            for file in files:
                file_metadata = self.__get_file_metadata(f'{subdir}/{file}')
                for url in my_filings.get_urls():
                    if url.rsplit('/')[-1].strip() == file:
                        file_metadata['url'] = url
                        break
                assert len(
                    file_metadata
                ) == 8, "Could not get all relevant metadata: %r" % file_metadata
                if file_metadata['year'] < 2007 or \
                        (file_metadata['form_type'] != '10-K' and file_metadata['form_type'] != '10-Q'):
                    print(
                        f'Skipping file. year={file_metadata["year"]} form_type={file_metadata["form_type"]}'
                    )
                    continue
                violations_in_file, local_word_count = self.__get_violations_for_file(
                    f'{subdir}/{file}')

                file_info = {
                    'cik':
                    cik,
                    'firm name':
                    file_metadata['company_name'],
                    'firm address':
                    file_metadata['address'],
                    'zip code':
                    str(file_metadata['zip']),
                    'year':
                    file_metadata['year'],
                    'quarter':
                    file_metadata['quarter']
                    if filing_type is FilingType.FILING_10Q else None,
                    'url':
                    file_metadata['url'],
                    'filing type':
                    filing_type.value,
                    'dataset':
                    data_set,
                    'has covenant violation':
                    0 if violations_in_file == 0 else 1,
                    'total violations':
                    violations_in_file
                }
                result = result.append(pd.DataFrame(file_info, index=[0]))
                for word in local_word_count:
                    if word in filing_word_count:
                        filing_word_count[word] = filing_word_count[
                            word] + local_word_count[word]
                    else:
                        filing_word_count[word] = local_word_count[word]
        return result, filing_word_count
Пример #10
0
 def test_txt_urls(self, mock_cik_validator_get_single_cik, mock_single_cik_filing):
     aapl = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10Q, count=10)
     first_txt_url = aapl.get_urls()['aapl'][0]
     assert first_txt_url.split('.')[-1] == 'txt'