def nonthreaded_get_filings(): # create object print 'getting filings using a single lonely execution thread :(' seccrawler = SecCrawler() date = '20170313' # date from which filings should be downloaded # date = '20160922' count = '100' # no of filings # sp_500 = open('missed_companies2.txt') sp_500 = open('sp_500.txt') lines = sp_500.readlines() sp_500.close() companies = [line.split('\t')[1:3] for line in lines[100:]] # OVERRIDES FOR TESTING # companies = [line.split('\t')[1:3] for line in lines[2:4]] # companies = [['AMZN', 'AMZN']] start = time.time() for companyCode, cik in companies: t1 = time.time() seccrawler.getFiling(str(companyCode), str(date), str(count), "10-K") # seccrawler.getFiling(str(companyCode), str(cik), str(date), str(count), "10-Q") # seccrawler.getFiling(str(companyCode), str(cik), str(date), str(count), "8-K") t2 = time.time() print "Total Time taken for ", companyCode, ": ", str(t2 - t1) end = time.time() print '\n\n\n FINAL TIME:' print end - start
def test(): t1 = time.time() # file containig company name and corresponding cik codes seccrawler = SecCrawler() company_code_list = list() # company code list cik_list = list() # cik code list date_list = list() # pror date list count_list = list() try: with open("data.txt", "r") as f: # get the comapny quotes and cik number from the file. for columns in (raw.strip().split() for raw in f): company_code_list.append(columns[0]) cik_list.append(columns[1]) date_list.append(columns[2]) count_list.append(columns[3]) except: print("No input file Found") # call different API from the crawler for i in range(1, len(cik_list)): seccrawler.filing_SD(company_code_list[i], cik_list[i], date_list[i], count_list[i]) seccrawler.filing_10K(company_code_list[i], cik_list[i], date_list[i], count_list[i]) seccrawler.filing_8K(company_code_list[i], cik_list[i], date_list[i], count_list[i]) seccrawler.filing_10Q(company_code_list[i], cik_list[i], date_list[i], count_list[i]) t2 = time.time() print("Total Time taken: {0}".format(t2 - t1))
def find_and_save_10K_to_folder(ticker, from_date=None, number_of_documents=40, doc_format='xbrl'): if from_date is None: from_date = datetime.today().strftime('%Y%m%d') crawler = SecCrawler() cik, company_name = get_cik_and_name_from_ticker(ticker) crawler.filing_10K(ticker, cik, company_name, from_date, number_of_documents, doc_format)
def test(): start_time = time.time() # file containig company name and corresponding CIK codes seccrawler = SecCrawler() company_code_list = list() # company code list cik_list = list() # CIK code list date_list = list() # pror date list count_list = list() try: crs = open("data.txt", "r") except: print("No input file Found") # get the company quotes and CIK number from the file. for columns in (raw.strip().split() for raw in crs): company_code_list.append(columns[0]) cik_list.append(columns[1]) date_list.append(columns[2]) count_list.append(columns[3]) # call different API from the crawler for i in range(1, len(cik_list)): seccrawler.filing_10Q(str(company_code_list[i]), str(cik_list[i]), str(date_list[i]), str(count_list[i])) seccrawler.filing_10K(str(company_code_list[i]), str(cik_list[i]), str(date_list[i]), str(count_list[i])) seccrawler.filing_8K(str(company_code_list[i]), str(cik_list[i]), str(date_list[i]), str(count_list[i])) end_time = time.time() print("Total Time taken: "), print(end_time - start_time) crs.close()
def test(): t1 = time.time() # file containig company name and corresponding cik codes seccrawler = SecCrawler() companyCodeList = list() # company code list cikList = list() # cik code list dateList = list() # pror date list countList = list() try: crs = open("file.txt", "r") except: print "No input file Found" # get the comapny quotes and cik number from the file. for columns in ( raw.strip().split() for raw in crs ): companyCodeList.append(columns[0]) cikList.append(columns[1]) dateList.append(columns[2]) countList.append(columns[3]) del cikList[0]; del companyCodeList[0]; del dateList[0] for i in range(len(cikList)): seccrawler.filing_10Q(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i])) seccrawler.filing_10K(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i])) seccrawler.filing_8K(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i])) t2 = time.time() print "Total Time taken: ", print (t2-t1) crs.close()
def test(): t1 = time.time() # file containig company name and corresponding cik codes seccrawler = SecCrawler() companyCodeList = list() # company code list cikList = list() # cik code list dateList = list() # pror date list countList = list() try: crs = open("data.txt", "r") except: print "No input file Found" # get the comapny quotes and cik number from the file. for columns in ( raw.strip().split() for raw in crs ): companyCodeList.append(columns[0]) cikList.append(columns[1]) dateList.append(columns[2]) countList.append(columns[3]) del cikList[0]; del companyCodeList[0]; del dateList[0] for i in range(len(cikList)): seccrawler.filing_10Q(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i])) seccrawler.filing_10K(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i])) seccrawler.filing_8K(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i])) t2 = time.time() print "Total Time taken: ", print (t2-t1) crs.close()
def getSingleCompanyFiling(companyCode, logPath='downloaded_companies.txt'): date = '20170315' # date from which filings should be downloaded count = '100' # no of filings seccrawler = SecCrawler() t1 = time.time() try: seccrawler.getFiling(str(companyCode), str(date), str(count), "10-K") except: raise Exception("".join(traceback.format_exception(*sys.exc_info()))) # seccrawler.getFiling(str(companyCode), str(cik), str(date), str(count), "10-Q") # seccrawler.getFiling(str(companyCode), str(cik), str(date), str(count), "8-K") t2 = time.time() logString = "Total Time taken for " + companyCode + ": " + str(t2 - t1) f = open(logPath, 'a+') f.write(companyCode + '\n') f.close() return logString
def test(): t1 = time.time() # file containig company name and corresponding cik codes seccrawler = SecCrawler() company_code_list = list() # company code list cik_list = list() # cik code list date_list = list() # pror date list count_list = list() print("Enter the company name?") comp11 = str(input()) cik11 = get_cik(comp11) print("Enter the date in format YYYYMMDD?") date11 = str(input()) company_code_list.append(comp11) cik_list.append(cik11) date_list.append(date11) count_list.append("2") # call different API from the crawler for i in range(len(cik_list)): seccrawler.filing_10Q(str(company_code_list[i]), str(cik_list[i]), str(date_list[i]), str(count_list[i])) seccrawler.filing_10K(str(company_code_list[i]), str(cik_list[i]), str(date_list[i]), str(count_list[i])) # seccrawler.filing_8K(str(company_code_list[i]), str(cik_list[i]), # str(date_list[i]), str(count_list[i])) t2 = time.time() print("Total Time taken: "), print(t2 - t1)
def extractSectionFromExistingFilings(numThreads): filename = '../check.txt' completedFilename = 'extraction_log.txt' f = open(completedFilename, 'r') completedLines = f.readlines() f.close() completed = [line.strip() for line in completedLines] with open(filename) as check: lines = check.readlines() readLines = [[line.split('\t')[0].strip(), [line.split('\t')[6].strip()], \ [line.split('\t')[5].strip()], [line.split('\t')[6].strip()], '10-K'] for line in lines] sectionsToGet = [] for line in reversed(readLines): companyCode, filingURLList, docNameList, indexURLList, filingType = line if docNameList[0] not in completed: sectionsToGet.append(line) else: print 'Already downloaded ', docNameList[0] if numThreads > 1: print 'THREADED' results = calculateParallel(sectionsToGet, extractSingleSection, numThreads) else: print 'NONTHREADED' seccrawler = SecCrawler() for lineElem in sectionsToGet: companyCode, filingURLList, docNameList, indexURLList, filingType = lineElem if '.txt' not in filingURLList[0]: seccrawler.save_in_directory(companyCode, filingURLList, docNameList, indexURLList, filingType) f = open(completedFilename, 'a+') f.write(docNameList[0] + '\n') f.close()
def extractSingleSection(inputs): companyCode, filingURLList, docNameList, indexURLList, filingType = inputs logString = 'Skipping text file.' if '.txt' not in filingURLList[0]: t1 = time.time() seccrawler = SecCrawler() try: seccrawler.save_in_directory(companyCode, filingURLList, docNameList, indexURLList, filingType) # Put all exception text into an exception and raise that t2 = time.time() completedFilename = 'extraction_log.txt' f = open(completedFilename, 'a+') f.write(docNameList[0] + '\n') f.close() logString = "Total Time taken for " + companyCode + "sections: " + str( t2 - t1) except: raise Exception("".join( traceback.format_exception(*sys.exc_info()))) return logString
def main(): """Pull info for apple and google.""" sec_crawler = SecCrawler() apple = COMPANIES["Apple"] google = COMPANIES["Google"] # Pull 10Q's for apple sec_crawler.filing_10q(apple.code, apple.cik, '20170101', 2) # Pull 10K's for apple sec_crawler.filing_10k(google.code, google.cik, '20170101', 2)
def test(): t1 = time.time() # file containig company name and corresponding cik codes seccrawler = SecCrawler() company_code_list = list() # company code list cik_list = list() # cik code list date_list = list() # pror date list count_list = list() try: crs = open(sys.argv[1], "r") except: print("No input file Found") # get the comapny quotes and cik number from the file. for columns in (raw.strip().split() for raw in crs): company_code_list.append(columns[0]) cik_list.append(columns[1]) date_list.append(columns[2]) count_list.append(columns[3]) # call different API from the crawler for i in range(0, len(cik_list)): seccrawler.filing_NCSR(str(company_code_list[i]), str(cik_list[i]), str(date_list[i]), str(count_list[i])) seccrawler.filing_NCSRS(str(company_code_list[i]), str(cik_list[i]), str(date_list[i]), str(count_list[i])) seccrawler.filing_NQ(str(company_code_list[i]), str(cik_list[i]), str(date_list[i]), str(count_list[i])) t2 = time.time() print("Total Time taken: "), print(t2 - t1) crs.close()
def test(): t1 = time.time() # file containig company name and corresponding cik codes seccrawler = SecCrawler() company_code_list = list() # company code list cik_list = list() # cik code list date_list = list() # pror date list count_list = list() try: f = open("data.txt", "w") except: print("No input file found") try: workbook = xlrd.open_workbook('listofITfirms.xls') except: print("No input file found") sheet = workbook.sheet_by_index(0) sheet.cell_value(0, 0) # Extracting number of rows print(sheet.nrows) f.write("Ticker CIK priorto(YYYYMMDD) Count\n") sheet.cell_value(0, 4) # need to just keep incrementing... How many? # range from 2 to 1514 # has headers for i in range(1, 3): # f.write("%s,%s\n" % sheet.cell_value(i, 1)) #print("%d\n" % i) if sheet.cell_value(i, 1) and sheet.cell_value(i, 4): f.write("%s %d 20190101 100\n" % (sheet.cell_value(i, 1), sheet.cell_value(i, 4))) f.close() try: crs = open("data.txt", "r") except: print("No input file Found") # get the comapny tickers and cik number from the file. for columns in (raw.strip().split() for raw in crs): company_code_list.append(columns[0]) cik_list.append(columns[1]) date_list.append(columns[2]) count_list.append(columns[3]) # call different API from the crawler row = 1 # needs to match the range end col = 1 for i in range(1, len(cik_list)): seccrawler.filing_10K(str(company_code_list[i]), str(cik_list[i]), str(date_list[i]), str(count_list[i])) t2 = time.time() print("Total Time taken: "), print(t2 - t1) crs.close()