def __processInstrument__(self, instrument, ratioList, fromDate, toDate, wrongInstrumentsSymbolLists, outputDict): logger.debug('%s_%s processing to vectorize ' % (instrument.symbol, instrument.currency)) if self.parallelDownloadInstruments is True and self.parallelDownloadRatios is True: self.parallelDownloadRatios = False if self.parallelDownloadRatios and len(ratioList) > self.threads: logger.info('__processInstrument__ parallel into %d threads ' % self.threads) outputDict = mpPandasObj( func=self.__processInstrumentToVectorize__, pdObj=('ratioList', ratioList), numThreads=self.threads, mpBatches=1, isVerticalParallel=True, instrument=instrument, # ratioList=ratioList, fromDate=fromDate, toDate=toDate, outputDict=outputDict, wrongInstrumentsSymbolLists=wrongInstrumentsSymbolLists, ) else: # logger.info('__processInstrument__ serialized ' ) outputDict = self.__processInstrumentToVectorize__( instrument, ratioList, fromDate, toDate, outputDict, wrongInstrumentsSymbolLists) return outputDict
def getFactorData(self, instrumentList, ratioList, fromDate, toDate=None): logger.info( "Request factor data from db for\ninstrumentList: %s \nratioList:%s \nfrom %s to %s" % (instrumentList, ratioList, fromDate, toDate)) return self.factor_service.getDataDictOfMatrixAlphalens( instrumentList=instrumentList, ratioList=ratioList, fromDate=fromDate, toDate=toDate)
def __getAllInstrumentData__(self, instrumentList, ratioList, fromDate, toDate): wrongInstrumentsSymbolLists = [] # nanDF = self.vectorizedDataService.__createDataframe__(instrumentList, fromDate, toDate) columnList = DataDictKeys.keys + ratioList outputDict = {} if len(instrumentList) < self.threads: self.threads = int(np.ceil(len(instrumentList) / 2)) logger.debug('Modified threads of __getAllInstrumentData__ to %d' % self.threads) if self.threads > 1: mpBatches = 1 linMols = True if self.threads > 3: mpBatches = float(len(instrumentList)) / float(self.threads) mpBatches = int(min(int(mpBatches / 5), 50)) if mpBatches < 1: mpBatches = 1 logger.debug('mpBatches of __getAllInstrumentData__ to %d' % mpBatches) # mpBatches = 1 outputDict = mpPandasObj( func=self.__getAllInstrumentSerial__, pdObj=('instrumentList', instrumentList), numThreads=self.threads, mpBatches=mpBatches, isVerticalParallel=True, linMols=linMols, columnList=columnList, fromDate=fromDate, toDate=toDate, outputDict=outputDict, wrongInstrumentsSymbolLists=wrongInstrumentsSymbolLists, ) else: logger.info('Downloading data serialized') outputDict = self.__getAllInstrumentSerial__( instrumentList, columnList, fromDate, toDate, outputDict, wrongInstrumentsSymbolLists) for key in outputDict.keys(): if isinstance(outputDict[key], pd.DataFrame): outputDict[key].fillna(method='ffill', inplace=True) outputDict[key].fillna(0, inplace=True) # 1st row # filter to asked data if outputDict is not None: outputDictFinal = {} for ratioAsked in columnList: outputDictFinal[ratioAsked] = outputDict[ratioAsked] else: outputDictFinal = None return outputDictFinal
def __init__(self, user_settings): HistoricalMarketData.__init__(self, user_settings) self.inputPath = getDukascopyInputPath(user_settings) self.filesInDirectory = glob.glob(self.inputPath + os.sep + "*.csv") logger.info("dukascopy detected %i files can be processed" % (len(self.filesInDirectory))) pass
def historical_data_handler(self, msg): logger.debug("IB historical received %s" % msg) if self.messageCounter == 0: logger.info("IB historical started! %s" % msg.date) self.messageCounter += 1 if ('finished' in str(msg.date)) == True: logger.info("IB historical finished! %s" % msg.date) self.receivedAllHistorical = True else: self.appendData(msg)
def getFundamentalDataProvider(self, ratio, instrument): asset_requested = instrument.asset_type try: provider = self.user_settings.asset_type_to_fundamental_data[ asset_requested] except: logger.info( 'No provided fundamental data provider in user_settings as dict asset_type_to_fundamental_data=> default' ) provider = QuandlFundamentalData return provider
def getHistoricalData(self, instrument, period, number_of_periods, fromDate, toDate=None, bar_type=BarType.time_bar): # Need to be on pystore logger.info("Request historical data from db for %s_%s from %s to %s" % (instrument.symbol, instrument.currency, fromDate, toDate)) return self.historical_market_data_service.getHistoricalData( instrument, period, number_of_periods, fromDate, toDate, bar_type)
def getBroker(self, instrument): asset_requested = instrument.asset_type try: provider = self.user_settings.asset_type_to_broker[asset_requested] except: logger.info( 'No provided broker in user_settings as dict asset_type_to_broker=> default' ) if asset_requested == AssetType.crypto: provider = GdaxConnector else: provider = EmailConnector return provider
def getHistoricalMarketDataProvider(self, instrument): asset_requested = instrument.asset_type try: provider = self.user_settings.asset_type_to_historical_market_data[ asset_requested] except: logger.info( 'No provided historical market in user_settings as dict asset_type_to_historical_market_data=> default' ) if asset_requested == AssetType.crypto: provider = CryptoCompareHistoricalMarketData else: provider = YahooHistoricalMarketData return provider
def getDataDictOfMatrix( self, instrumentList, ratioList, fromDate, toDate=None, persistTempFile=None, ): fromDate = pd.datetime(day=fromDate.day, month=fromDate.month, year=fromDate.year) if toDate is None: toDate = pd.datetime.today() toDate = pd.datetime(day=toDate.day, month=toDate.month, year=toDate.year) if self.useFunctionTemp: logger.debug('getDataDictOfMatrix downloading/loading...') logger.debug('instrumentList: %s' % instrumentList) logger.debug('ratioList: %s' % ratioList) logger.debug('fromDate: %s' % fromDate) logger.debug('toDate: %s' % toDate) logger.debug('persistTempFile: %s' % persistTempFile) functionTemp = self.cacher.cache(self.__getDataDictOfMatrix__, ignore=['self']) ratioList = self.getLongestRatioListDownloaded( instrumentList, ratioList, fromDate, toDate) outputDictFinal = functionTemp( __unstackInstrumentList__(instrumentList), ratioList, fromDate, toDate, None) else: logger.info('Not using cache function!') outputDictFinal = self.__getDataDictOfMatrix__( __unstackInstrumentList__(instrumentList), ratioList, fromDate, toDate, None) if persistTempFile is False: persistTempFile = None if persistTempFile is not None: logger.debug('Persisting ') if ~persistTempFile.endswith('.xlsx'): persistTempFile += '.xlsx' self.__createTempExcelFile__(outputDictFinal, filenameWithExtension=persistTempFile) self.__save_ratio_list__(instrumentList, ratioList, fromDate, toDate) return outputDictFinal
def download(self, instrument, period, number_of_periods, fromDate, toDate=None): import datetime logger.debug("Downloading %s" % instrument) oandaInstrument = '%s_%s' % (instrument.symbol, instrument.currency) if period == Period.day: oandaGranularity = self.period_dict[period] else: oandaGranularity = '%s%i' % (self.period_dict[period], number_of_periods) # 2014-07-03T04:00:00.000000Z startDate = fromDate.strftime(self.formatDate) if toDate is None: toDate = datetime.datetime.today() endDate = toDate.strftime(self.formatDate) try: data_downloaded = self.oanda.get_history( instrument=oandaInstrument, granularity=oandaGranularity, start=startDate, end=endDate, candleFormat=self.candleFormat, dailyAlignment=self.dailyAlignment, alignmentTimezone=self.alignmentTimezone, weeklyAlignment=self.weeklyAlignment, ) except Exception as e: logger.error("Cant download from oanda %s %s=> return None %s" % (instrument.symbol, period, e)) return None logger.info("formatting oanda data for %s" % oandaInstrument) outputComplete = self.formatHistorical(data_downloaded, period=period) # Already added # outputComplete = self.setTimeCorrect(outputComplete, period=period, instrument=instrument) return outputComplete
def getDataDictOfMatrix(self, instrumentList, ratioList, fromDate, toDate=None, persistTempFile=None): start = time.time() fromDate = pd.datetime(day=fromDate.day, month=fromDate.month, year=fromDate.year) if toDate is None: toDate = pd.datetime.today() toDate = pd.datetime(day=toDate.day, month=toDate.month, year=toDate.year) fromDate = convert_date(fromDate) toDate = convert_date(toDate) logger.debug('getDataDictOfMatrix downloading/loading...') logger.debug('instrumentList: %s' % instrumentList) logger.debug('ratioList: %s' % ratioList) logger.debug('fromDate: %s' % fromDate) logger.debug('toDate: %s' % toDate) logger.debug('persistTempFile: %s' % persistTempFile) outputDict = self.__getAllInstrumentData__(instrumentList, ratioList, fromDate, toDate) assetType = instrumentList[0].asset_type outputDictFinal = self.__cleanData__(outputDict, assetType=assetType, persistTempFile=persistTempFile) end = time.time() logger.info('******') minutesTime = (end - start) / 60 logger.info('Took %f minutes to finish __getDataDictOfMatrix__' % (minutesTime)) logger.info('******') import os outputDictFinal = self.__allignSymbolsDictMatrix__(outputDictFinal) outputDictFinal = self.__cleanSymbolsDictMatrix__(outputDictFinal) logger.info('Finished => saving dictOfMatrix_last.pickle') save_to_file( outputDictFinal, getTempPath(self.user_settings) + os.sep + 'dictOfMatrix_last.pickle') return outputDictFinal
def __makeRequestSingle__(self, contract, durationStr, toDateString, barSizeSetting): self.receivedDataObject.reset() self.ib_object.reqHistoricalData(self.tickId, contract, toDateString, durationStr, barSizeSetting, self.whatToShow, 1, self.ib_formatDate_return) self.tickId += 1 logger.info("send req historical %s : waiting" % contract.m_symbol) while (self.receivedDataObject.receivedAllHistorical is False): sleep(3) logger.debug("finished single request %s " % contract.m_symbol) dataframeReceived = self.receivedDataObject.getDataframe() if dataframeReceived is None: logger.error("Some error appears on single request !! check it") return None # is necessary??? outputComplete = self.__formatHistorical__(dataframeReceived) return outputComplete
def __getDataDictOfMatrix__(self, instrumentStringStacked, ratioList, fromDate, toDate=None, persistTempFile=None): # with self.lock: logger.debug('__getDataDictOfMatrix__ downloading...') instrumentList = __stackInstrumentList__(instrumentStringStacked[0], instrumentStringStacked[1], instrumentStringStacked[2]) if ratioList is None: ratioList = [] # compatibility if persistTempFile is False: persistTempFile = None fromDate = convert_date(fromDate) toDate = convert_date(toDate) assetType = instrumentList[0].asset_type wrongInstrumentsSymbolLists = [] start = time.time() typeDownload = '' if self.parallelDownloadInstruments and len( instrumentList) > self.threads: mpBatches = float(len(instrumentList)) / float(self.threads) mpBatches = min(int(mpBatches / 5), 50) mpBatches = 1 logger.info( 'Downloading Data using parallel[%d threads ,%d mpBatches] __processAllInstruments__ of %d instruments and %d ratios' % (self.threads, mpBatches, len(instrumentList), len(ratioList))) outputDict = mpPandasObj( func=self.__processAllInstruments__, pdObj=('instrumentList', instrumentList), isVerticalParallel=True, numThreads=self.threads, mpBatches=mpBatches, linMols=True, ratioList=ratioList, fromDate=fromDate, toDate=toDate, wrongInstrumentsSymbolLists=wrongInstrumentsSymbolLists, ) typeDownload = 'parallel' else: logger.info( 'Downloading Data using serial __processAllInstruments__ of %d instruments and %d ratios' % (len(instrumentList), len(ratioList))) outputDict = self.__processAllInstruments__( instrumentList, ratioList, fromDate, toDate, wrongInstrumentsSymbolLists) typeDownload = 'serial' end = time.time() logger.info('******') minutesTime = (end - start) / 60 logger.info('Took %f minutes to finish %s __getDataDictOfMatrix__' % (minutesTime, typeDownload)) logger.info('******') dateFinal = None columnsFinalRemove = None for keys in outputDict.keys(): if keys == 'wrong': continue outputDict[keys] = outputDict[keys][~outputDict[keys].index. duplicated(keep='last')] dateIndex = outputDict[keys][( outputDict[keys].fillna(0).sum(axis=1) != 0)].index # columnsClean = list(outputDict[keys].columns[outputDict[keys].fillna(0).sum()==0]) if len(dateIndex) == 0: dateIndex = outputDict[keys].index # if len(columnsClean) ==0: # del outputDict[keys] # continue if dateFinal is None: dateFinal = dateIndex else: dateFinal = dateFinal.intersection(dateIndex) # if columnsFinalRemove is None: # columnsFinalRemove = columnsClean # else: # columnsFinalRemove=list(set(columnsFinalRemove+columnsClean)) for keys in outputDict.keys(): if keys == 'wrong': continue # outputDict[keys].drop(columns = columnsFinalRemove,inplace=True) mask = dateFinal.searchsorted(outputDict[keys].index) outputDict[keys] = outputDict[keys][mask > 0] outputDict[keys].fillna(0, inplace=True) logger.debug('all instruments processed => cleaning ') wrongInstrumentsSymbolLists = outputDict['wrong'].copy() if 'wrong' in outputDict: del outputDict['wrong'] wrongInstrumentsSymbolLists = self.__formatWrongInstrumentList__( wrongInstrumentsSymbolLists, outputDict) outputDictFinal = self.__cleanOutputDict__( outputDict, wrongInstrumentsSymbolLists) if assetType != AssetType.forex and assetType != AssetType.crypto: outputDictFinal = self.__cleanBankHolidays__(outputDictFinal) df = outputDictFinal[DataDictKeys.close] logger.debug( 'all dictOfMatrix cleaned => finished %d matrixes of %d columns' % (len(outputDictFinal), df.shape[1])) if persistTempFile is not None: if ~persistTempFile.endswith('.xlsx'): persistTempFile += '.xlsx' self.__createTempExcelFile__(outputDictFinal, filenameWithExtension=persistTempFile) return outputDictFinal
def __sendEmail__(self, recipient, subject, body, html=None, fileToSendArray=[]): import smtplib import mimetypes from email.mime.multipart import MIMEMultipart from email import encoders from email.mime.audio import MIMEAudio from email.mime.base import MIMEBase from email.mime.image import MIMEImage from email.mime.text import MIMEText fromEmail = self.user_settings.email_address try: msg = MIMEMultipart() msg['From'] = fromEmail msg['To'] = recipient msg['Subject'] = subject body = body msg.attach(MIMEText(body, 'plain')) if html is not None and isinstance(html, str): msg.attach(MIMEText(html, 'html')) # %% Atachemnt if fileToSendArray is not None and len(fileToSendArray) > 0: for fileToSend in fileToSendArray: if fileToSend is not None and os.path.isfile(fileToSend): logger.debug('adding file ' + fileToSend) ctype, encoding = mimetypes.guess_type(fileToSend) if ctype is None or encoding is not None: ctype = "application/octet-stream" maintype, subtype = ctype.split("/", 1) if maintype == "text": fp = open(fileToSend) # Note: we should handle calculating the charset attachment = MIMEText(fp.read(), _subtype=subtype) fp.close() elif maintype == "image": fp = open(fileToSend, "rb") attachment = MIMEImage(fp.read(), _subtype=subtype) fp.close() elif maintype == "audio": fp = open(fileToSend, "rb") attachment = MIMEAudio(fp.read(), _subtype=subtype) fp.close() else: fp = open(fileToSend, "rb") attachment = MIMEBase(maintype, subtype) attachment.set_payload(fp.read()) fp.close() encoders.encode_base64(attachment) attachment.add_header("Content-Disposition", "attachment", filename=fileToSend) msg.attach(attachment) result = False counter = 3 while (not result and counter > 0): try: server = smtplib.SMTP(self.user_settings.email_smtp_host, self.user_settings.email_smtp_port) server.ehlo() server.starttls() server.ehlo() server.login(fromEmail, self.user_settings.email_password) text = msg.as_string() problems = server.sendmail(fromEmail, recipient, text) server.quit() result = True except Exception as e: logger.error("Error: unable to send email retry[%d] :%s" % (counter, str(e))) result = False counter -= 1 os.sleep(5) if result: logger.info("Successfully sent email") else: logger.error("Error: unable to send email") except: logger.error("Error: unable to send email")