Exemplo n.º 1
0
def get_comp_tuples(companies,data_folder):
    '''
    
    '''
    
    with open('sp500tickcik', 'rb') as file:
        data = pickle.load(file)
    names = []
    for idx in range(len(data)):
        names = np.append(names,data[idx][3])
    
    tick_cik = []
    edgar_t    = edgar.Edgar()
    for name in companies: 
        score=[]
        for textTemp in names:
            score = np.append(score, fuzz.ratio(textTemp, name))
        
        maxInd   = np.where(score == np.amax(score))[0][0]
        bmc      = str(data[maxInd][3])
        bmc      = bmc.replace("'" , "")
        bmc      = bmc.replace("." , "")
        comp     = edgar_t.findCompanyName(bmc)
        tick_cik = np.append(tick_cik,(comp[0],data[maxInd][1]))
Exemplo n.º 2
0
import os

import requests
import xlrd
import edgar
from company import Company
import xlsxwriter
from bs4 import BeautifulSoup

edg = edgar.Edgar()
oo = 1e9


def print_log(mess, log_file, verbose=True):
    if verbose:
        print(mess)
        if log_file is not None:
            log_file.write(mess + "\n")


def format_cik_file(input_file="cik.xlsx",
                    output_file="cik.xlsx",
                    verbose=True,
                    log_file=None):
    cik_sheet = xlrd.open_workbook(input_file).sheet_by_index(0)

    if os.path.exists(output_file):
        os.remove(output_file)

    output_wb = xlsxwriter.Workbook(output_file)
    output_sheet = output_wb.add_worksheet()
Exemplo n.º 3
0
import edgar
edgar = edgar.Edgar()
possible_companies = edgar.findCompanyName("Cisco System")
print(possible_companies)

#get Oracle Corp's last 5 form 10-K's
company = edgar.Company("Oracle Corp", "0001341439")
tree = company.getAllFilings(filingType="10-K")
docs = edgar.getDocuments(tree, noOfDocuments=5)
#docs is an array of strings, each one is the full text doc

#SIC CODES
url = "https://www.sec.gov/info/edgar/siccodes.htm"
#Developer page
#https://www.sec.gov/developer
Exemplo n.º 4
0
                # Get the page number of the "financial report" section
                y = re.sub("[^0-9]", "", line)  # Remove non-numeric characters
                fyear = int(y[-4:])
                return fyear
    return fyear


def countStatesApperance(doc):
    table = {}
    for word in doc.split():
        if word in states:
            table[word] = 1
    return len(table)


ed = edgar.Edgar()

c = "785814"
c = c.zfill(10)

#n = ed.getCompanyNameByCik(c)
company = edgar.Company("INTEGRATED HEALTH SVCS INC", c)
tree = company.getAllFilings(filingType="10-K")
docs = edgar.getDocuments(tree, noOfDocuments=30)
with io.open("C:/Users/William/Desktop/Output.txt", "w",
             encoding="utf-8") as f:
    f.write(docs[0])
if len(docs) > 0:
    print(countStatesApperance(docs[0]))
    print(extractYear(docs[0]))
Exemplo n.º 5
0
URL = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={}&Find=Search&owner=exclude&action=getcompany'
CIK_RE = re.compile(r'.*CIK=(\d{10}).*')

# change Ticker into CIK
cik_dict = {}
for ticker in DEFAULT_TICKERS:
    f = requests.get(URL.format(ticker), stream=True)
    results = CIK_RE.findall(f.text)
    if len(results):
        cik = str(results[0])
        cik_dict[str(ticker).upper()] = str(results[0])
print(cik_dict)

# Use edgar to get text compilation of the lxml
# Get Company name from CIK
edgar1 = edgar.Edgar()
cmp_name = edgar1.getCompanyNameByCik(results[0])
print(cmp_name)
company = edgar.Company(cmp_name, cik)

# Creating filename and url structure
file_name = [
    f for f in os.listdir(out_path)
    if os.path.isfile(os.path.join(out_path, f))
]
switched_filename = file_name[0]
switched_filename = switched_filename.replace('-', '').replace(
    '.txt', '/index.json')
print(switched_filename)
print(file_name)