Пример #1
0
def test_edgar_download_xbrl():

    ingestor = Ingestor()
    edgar = Edgar("xbrl", "2014-01-01")
    ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory)

    assert os.path.exists(docs_directory + "/aapl-20130928.xml") is True
Пример #2
0
def test_edgar_download_html():

    ingestor = Ingestor()
    edgar = Edgar("html", "2013-01-01")
    ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory)

    assert os.path.exists(docs_directory + "/a2124888z10-k.htm") is True
Пример #3
0
def test_edgar_download_xbrl():

    ingestor = Ingestor()
    edgar = Edgar("xbrl", "2014-01-01")
    ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory)

    assert os.path.exists(docs_directory + "/aapl-20130928.xml") is True
Пример #4
0
def test_edgar_download_html():

    ingestor = Ingestor()
    edgar = Edgar("html", "2013-01-01")
    ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory)

    assert os.path.exists(docs_directory + "/a2124888z10-k.htm") is True
Пример #5
0
    exit(0)

# always declare the signal handler first
signal.signal(signal.SIGINT, quit_gracefully)

env = lucene.initVM()
queryer = Queryer("index", "hits")
print('Using Directory: ', queryer.store_dir)

# directory for storing downloaded docs
directoryToWalk = "docs"

# and start the indexer
# note the indexer thread is set to daemon causing it to terminate on a SIGINT
indexer = Indexer(queryer.store_dir, queryer.writer, directoryToWalk)
ingestor = Ingestor()
edgar = Edgar()

with open('data.txt', 'r') as reader:
    for line in reader:
        ingestor.file_downloader(edgar.ingest_stock(line.rstrip()),
         directoryToWalk)
        indexer.indexDocs()

# start up the terminal query interface
queryer.run(queryer.writer, queryer.analyzer)

# if return from Querying then call the signal handler
# to clean up the writer cleanly
quit_gracefully()
Пример #6
0
    exit(0)

# always declare the signal handler first
signal.signal(signal.SIGINT, quit_gracefully)

env = lucene.initVM()
queryer = Queryer("index", "hits")
print 'Using Directory: ', queryer.store_dir

# directory for storing downloaded docs
directoryToWalk = "docs"

# and start the indexer
# note the indexer thread is set to daemon causing it to terminate on a SIGINT
indexer = Indexer(queryer.store_dir, queryer.writer, directoryToWalk)
ingestor = Ingestor()
edgar = Edgar()

with open('edgar_tickers.txt', 'r') as reader:
    for line in reader:
        ingestor.file_downloader(edgar.ingest_stock(line.rstrip()),
        directoryToWalk)
        indexer.indexDocs()

# start up the terminal query interface
queryer.run(queryer.writer, queryer.analyzer)

# if return from Querying then call the signal handler
# to clean up the writer cleanly
quit_gracefully()
Пример #7
0
#! /usr/bin/env python
# encoding: utf-8

from ingestor import Ingestor, Edgar, Sedar, IngestorException
import requests
import requests.utils
import os
import shutil
import sys
sys.path.insert(0, os.path.abspath('./'))
import pytest

docs_directory = "test"

ping_edgar = pytest.mark.skipif(os.system("ping -c 1 \
    " + Edgar().org_root.replace("http://", "")) != 0,
                                reason="could not reach Edgar")


def setup_module():
    """ create folder for downloading docs """

    if not os.path.exists(docs_directory):
        os.mkdir(docs_directory)


@ping_edgar
def test_edgar_download_html():

    ingestor = Ingestor()
    edgar = Edgar("html", "2013-01-01")