def download_filings(feedpath, args=None):
    """Go through all entries in the given EDGAR RSS feed and download any missing or new filings."""
    logger.info("Processing RSS feed %s", feedpath)

    dir = filings_dir(feedpath)
    os.makedirs(dir, exist_ok=True)

    filing_urls = []
    for filing in feed_tools.read_feed(feedpath):
        if args:
            if args.company_re and not bool(
                    args.company_re.match(filing['companyName'])):
                continue
            if args.cik and args.cik != filing['cikNumber']:
                continue
            if args.sic and args.sic != filing['assignedSic']:
                continue
            if args.form_type and args.form_type != filing['formType']:
                continue
        if 'enclosureUrl' in filing and not exists_filing(
                dir, filing['enclosureUrl'], filing['enclosureLength']):
            filing_urls.append(filing['enclosureUrl'])

    logger.info("Start downloading %d new filings", len(filing_urls))
    with concurrent.futures.ThreadPoolExecutor(
            max_workers=args.max_threads) as executor:
        futures = [
            executor.submit(download_filing, dir, url, args.max_retries)
            for url in filing_urls
        ]
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()
            except Exception as e:
                print(e)
示例#2
0
def build_secdb(feeds):
	# Setup python logging framework
	setup_logging(args.log_file)

	tickers = load_ticker_symbols()

	# Setup up DB connection
	global db_connect
	db_connect = setup_db_connect(args.db_driver,args.db_name)

	# Create all required DB tables
	if args.create_tables:
		create_db_tables()
		create_db_indices()
		insert_ticker_symbols(tickers)

	# Process all filings in the given RSS feeds one month after another
	for filepath in feeds:

		# Load EDGAR filing metadata from RSS feed (and filter out all non 10-K/10-Q filings or companies without an assigned ticker symbol)
		filings = {}
		for filing in feed_tools.read_feed(filepath):
			if args.cik is None or args.cik == filing['cikNumber']:
				if filing['formType'] in ('10-K','10-K/A','10-Q','10-Q/A') and filing['cikNumber'] in tickers:
					filing['ticker'] = tickers[filing['cikNumber']]
					filings.setdefault(filing['cikNumber'],[]).append(filing)

		# Process the selected XBRL filings
		process_filings(filings)
示例#3
0
def download_filings(feedpath, args=None):
    """Go through all entries in the given EDGAR RSS feed and download any missing or new filings."""
    logger.info("Processing RSS feed %s", feedpath)

    dir = filings_dir(feedpath)
    os.makedirs(dir, exist_ok=True)

    filing_urls = []
    for filing in feed_tools.read_feed(feedpath):
        if args:
            if args.company_re and not bool(
                    args.company_re.match(filing['companyName'])):
                continue
            if args.cik and args.cik != filing['cikNumber']:
                continue
            if args.sic and args.sic != filing['assignedSic']:
                continue
            if args.form_type and args.form_type != filing['formType']:
                continue
        if 'enclosureUrl' in filing and not exists_filing(
                dir, filing['enclosureUrl'], filing['enclosureLength']):
            filing_urls.append(filing['enclosureUrl'])

    logger.info("Start downloading %d new filings", len(filing_urls))
    for url in filing_urls:
        download_filing(dir, url, args.max_retries)
def generate_project(feedpath):
    filings = feed_tools.read_feed(feedpath)
    filings_by_company = {}
    for filing in filings:
        filings_by_company.setdefault(filing['companyName'], []).append(filing)

    month = re.fullmatch(r'.*xbrlrss-(\d{4}-\d{2})\.xml',
                         os.path.basename(feedpath)).group(1)
    dir = os.path.join(feed_tools.filings_dir, month)
    file = os.path.join(dir, '%s.spp' % month)
    print('Generating project file', file)
    with open(file, 'w') as f:
        f.write("""\
<?xml version="1.0" encoding="UTF-8"?>
<Project>
""")

        f.write("""\
	<Folder FolderName="Filings by name" ExtStr="xml">
""")
        for filing in filings:
            if filing['instanceUrl']:
                f.write("""\
		<File FilePath="%s" HomeFolder="Yes"/>
""" % filing['instanceUrl'][len('filings/YYYY-MM/'):].replace(
                    '%7Czip/', '|zip\\'))
        f.write("""\
	</Folder>
""")

        f.write("""\
	<Folder FolderName="Filings by company">
""")
        for company in sorted(filings_by_company.keys()):
            f.write("""\
		<Folder FolderName="%s" ExtStr="xml">
""" % company.replace('&', '&amp;').replace('<', '&lt;'))
            for filing in filings_by_company[company]:
                if filing['instanceUrl']:
                    f.write("""\
			<File FilePath="%s" HomeFolder="Yes"/>
""" % filing['instanceUrl'][len('filings/YYYY-MM/'):].replace(
                        '%7Czip/', '|zip\\'))
            f.write("""\
		</Folder>
""")
        f.write("""\
	</Folder>
""")

        f.write("""\
</Project>
""")
示例#5
0
def generate_project(feedpath):
	filings = feed_tools.read_feed(feedpath)
	filings_by_company = {}
	for filing in filings:
		filings_by_company.setdefault(filing['companyName'],[]).append(filing)

	month = re.fullmatch(r'.*xbrlrss-(\d{4}-\d{2})\.xml',os.path.basename(feedpath)).group(1)
	dir = os.path.join(feed_tools.filings_dir,month)
	file = os.path.join(dir,'%s.spp'%month)
	print('Generating project file',file)
	with open(file,'w') as f:
		f.write("""\
<?xml version="1.0" encoding="UTF-8"?>
<Project>
""")

		f.write("""\
	<Folder FolderName="Filings by name" ExtStr="xml">
""")
		for filing in filings:
			if filing['instanceUrl']:
				f.write("""\
		<File FilePath="%s" HomeFolder="Yes"/>
"""%filing['instanceUrl'][len('filings/YYYY-MM/'):].replace('%7Czip/','|zip\\'))
		f.write("""\
	</Folder>
""")

		f.write("""\
	<Folder FolderName="Filings by company">
""")
		for company in sorted(filings_by_company.keys()):
			f.write("""\
		<Folder FolderName="%s" ExtStr="xml">
"""%company.replace('&','&amp;').replace('<','&lt;'))
			for filing in filings_by_company[company]:
				if filing['instanceUrl']:
					f.write("""\
			<File FilePath="%s" HomeFolder="Yes"/>
"""%filing['instanceUrl'][len('filings/YYYY-MM/'):].replace('%7Czip/','|zip\\'))
			f.write("""\
		</Folder>
""")
		f.write("""\
	</Folder>
""")

		f.write("""\
</Project>
""")
示例#6
0
def main():
    # Parse script arguments
    args = parse_args()
    # Setup python logging framework
    setup_logging(args)

    # Validate all filings in the given RSS feeds one month after another
    for filepath in collect_feeds(args):

        # Load EDGAR filing metadata from RSS feed (and filter out all non 10-K/10-Q filings or companies without an assigned ticker symbol)
        filings = []
        for filing in feed_tools.read_feed(filepath):
            # Google to Alphabet reorganization
            if filing['cikNumber'] == 1288776:
                filing['cikNumber'] = 1652044
            if args.form_type is None or args.form_type == filing['formType']:
                if args.sic is None or args.sic == filing['assignedSic']:
                    if args.cik is None or args.cik == filing['cikNumber']:
                        filings.append(filing)

        # Validate the selected XBRL filings
        validate_filings(filings[:100], args.max_threads)
示例#7
0
def download_filings(feedpath,args=None):
	"""Go through all entries in the given EDGAR RSS feed and download any missing or new filings."""
	logger.info("Processing RSS feed %s",feedpath)

	dir = filings_dir(feedpath)
	os.makedirs(dir,exist_ok=True)

	filing_urls = []
	for filing in feed_tools.read_feed(feedpath):
		if args:
			if args.company_re and not bool(args.company_re.match(filing['companyName'])):
				continue
			if args.cik and args.cik != filing['cikNumber']:
				continue
			if args.sic and args.sic != filing['assignedSic']:
				continue
			if args.form_type and args.form_type != filing['formType']:
				continue
		if 'enclosureUrl' in filing and not exists_filing(dir,filing['enclosureUrl'],filing['enclosureLength']):
			filing_urls.append(filing['enclosureUrl'])
	
	logger.info("Start downloading %d new filings",len(filing_urls))
	for url in filing_urls:
		download_filing(dir,url,args.max_retries)