def test_edgar_download_xbrl(): ingestor = Ingestor() edgar = Edgar("xbrl", "2014-01-01") ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory) assert os.path.exists(docs_directory + "/aapl-20130928.xml") is True
def test_edgar_download_html(): ingestor = Ingestor() edgar = Edgar("html", "2013-01-01") ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory) assert os.path.exists(docs_directory + "/a2124888z10-k.htm") is True
exit(0) # always declare the signal handler first signal.signal(signal.SIGINT, quit_gracefully) env = lucene.initVM() queryer = Queryer("index", "hits") print('Using Directory: ', queryer.store_dir) # directory for storing downloaded docs directoryToWalk = "docs" # and start the indexer # note the indexer thread is set to daemon causing it to terminate on a SIGINT indexer = Indexer(queryer.store_dir, queryer.writer, directoryToWalk) ingestor = Ingestor() edgar = Edgar() with open('data.txt', 'r') as reader: for line in reader: ingestor.file_downloader(edgar.ingest_stock(line.rstrip()), directoryToWalk) indexer.indexDocs() # start up the terminal query interface queryer.run(queryer.writer, queryer.analyzer) # if return from Querying then call the signal handler # to clean up the writer cleanly quit_gracefully()
exit(0) # always declare the signal handler first signal.signal(signal.SIGINT, quit_gracefully) env = lucene.initVM() queryer = Queryer("index", "hits") print 'Using Directory: ', queryer.store_dir # directory for storing downloaded docs directoryToWalk = "docs" # and start the indexer # note the indexer thread is set to daemon causing it to terminate on a SIGINT indexer = Indexer(queryer.store_dir, queryer.writer, directoryToWalk) ingestor = Ingestor() edgar = Edgar() with open('edgar_tickers.txt', 'r') as reader: for line in reader: ingestor.file_downloader(edgar.ingest_stock(line.rstrip()), directoryToWalk) indexer.indexDocs() # start up the terminal query interface queryer.run(queryer.writer, queryer.analyzer) # if return from Querying then call the signal handler # to clean up the writer cleanly quit_gracefully()
#! /usr/bin/env python # encoding: utf-8 from ingestor import Ingestor, Edgar, Sedar, IngestorException import requests import requests.utils import os import shutil import sys sys.path.insert(0, os.path.abspath('./')) import pytest docs_directory = "test" ping_edgar = pytest.mark.skipif(os.system("ping -c 1 \ " + Edgar().org_root.replace("http://", "")) != 0, reason="could not reach Edgar") def setup_module(): """ create folder for downloading docs """ if not os.path.exists(docs_directory): os.mkdir(docs_directory) @ping_edgar def test_edgar_download_html(): ingestor = Ingestor() edgar = Edgar("html", "2013-01-01")