def __init__(self, errorKey, filename, replace, previousStatus=None, actualStatus=None, isNullPool=False): self.initLogger() self.errorKey = errorKey self.filename = filename self.fileDataDao = FileDataDao() self.session = DBConnector(isNullPool=isNullPool).getNewSession() self.replace = replace self.previousStatus = previousStatus self.actualStatus = actualStatus self.fileData = FileDataDao.getFileData(self.filename, self.session) if (not AbstractImporter.cacheDict): self.initCache()
def importMasterIndexFor(self, period, replaceMasterFile, session=None, threadNumber=1): dbconnector = DBConnector() if (session is None): session = dbconnector.getNewSession() localURL = Constant.CACHE_FOLDER + 'master' + str( period.year) + "-Q" + str(period.quarter) + '.gz' secURL = "https://www.sec.gov/Archives/edgar/full-index/" + str( period.year) + "/QTR" + str(period.quarter) + "/master.gz" print(localURL, secURL) file = getBinaryFileFromCache(localURL, secURL, replaceMasterFile) with gzip.open(BytesIO(file), 'rb') as f: file_content = f.read() text = file_content.decode("ISO-8859-1") text = text[text.find("CIK", 0, len(text)):len(text)] point1 = text.find("\n") point2 = text.find("\n", point1 + 1) text2 = text[0:point1] + text[point2:len(text)] df = pandas.read_csv(StringIO(text2), sep="|") df.set_index("CIK", inplace=True) df.head() print("STARTED FOR PERIOD " + str(period.year) + "-" + str(period.quarter)) for row in df.iterrows(): CIK = row[0] filename = row[1]["Filename"] formType = row[1]["Form Type"] if (formType == "10-Q" or formType == "10-K"): fd = FileDataDao.getFileData(filename, session) if (fd is None): company = CompanyEngine().getOrCreateCompany( CIK=CIK, session=session) fd = FileData() fd.fileName = filename fd.company = company Dao().addObject(objectToAdd=fd, session=session, doCommit=True) print("FD Added " + filename) print("FINISHED")