Пример #1
0
def calcAllDay(residPath, stdPath, tradingDates, keyCols, includeCols, excludeCols, createDate, settleDate):
	"""	residPath - 保存残差数据的目录
		stdPath - 保存标准化数据的目录
		tradingDates - 交易日列表,内部数据为datetime.date类型
		keyCols - 结果中必须包含的列,list类型
		includeCols - 需要处理的列名,list类型
		excludeCols - 不需要处理的列名,list类型
		createDate - 开始时间列名
		settleDate - 结束时间列名

	"""
	for td in tradingDates:
		start = datetime.datetime.now()
		
		df = fileutil.loadPickle(stdPath, td)
		if df is not None:
			print 'process: {0}'.format(td.strftime('%Y%m%d'))
			actualKeyCols = list(keyCols)

			if settleDate in df.columns:
				actualKeyCols.insert(0, settleDate)
			if createDate in df.columns:
				actualKeyCols.insert(0, createDate)

			newdf = calcResidual(td, df, actualKeyCols, includeCols, excludeCols)
			fileutil.savePickle(residPath, td, newdf)
		else:
			print 'Fail to load data: {0}, {1}'.format(td.strftime('%Y%m%d'), stdPath)
		
		end = datetime.datetime.now()
		print 'cost: {0} on: {1}'.format(end-start, td.strftime('%Y%m%d'))
Пример #2
0
def insertAllData(filepath, tradingDays, dtype):
	#engine = dbaccessor.getdb('176.1.11.55', 'zhenggq', 'Yuzhong0931', 'advancedb', 1433)
	#engine = dbaccessor.getdb('localhost', 'zhenggq', 'yuzhong', 'advancedb', 1433)
	engine = dbaccessor.getdb('localhost', 'zhenggq', 'Yuzhong0931', 'advancedb', 1433)
	for td in tradingDays:
		print 'insert: {0}'.format(td.strftime('%Y%m%d'))
		start = datetime.datetime.now()

		df = fileutil.loadPickle(filepath, td)
		if df is not None:
			#中文被错误处理成unicode编码,而实际上是gbk编码
			df['SecuAbbr'] = df['SecuAbbr'].apply(lambda x:x.encode('raw-unicode-escape').decode('gbk'))
			insertData(dtype, engine, df)
		
		end = datetime.datetime.now()
		print 'Cost: {0} on {1}'.format(end-start, td.strftime('%Y%m%d'))
Пример #3
0
def testWeeklyData(td):

    filepath = 'D:/workspace/python/residual/resid/weekly/'
    df = fileutil.loadPickle(filepath, td)
    if df is not None:
        t1 = datetime.datetime.now()
        engine = dbaccessor.getdb('localhost', 'zhenggq', 'Yuzhong0931',
                                  'advancedb', 1433)
        conn = engine.connect()
        t2 = datetime.datetime.now()

        print 'cost connect db: {0}'.format(t2 - t1)
        insertWeekly(conn, df)

        t3 = datetime.datetime.now()
        print 'cost insert db: {0}'.format(t3 - t2)
Пример #4
0
def test():
    td = datetime.date(2017, 3, 16)
    stdPath = 'D:/workspace/python/residual/result/monthly/'
    residPath = 'D:/workspace/python/residual/residtest/'
    #csvpath = '{0}{1}.csv'.format(stdPath, td.strftime('%Y%m%d'))

    keyCols = dataapi.keyCols
    includeCols = dataapi.includeCols
    excludeCols = dataapi.excludeCols
    newIndusColumns = indusColumns

    df = fileutil.loadPickle(stdPath, td)
    if df is not None:
        #去掉行业为空的行
        #df = df.dropna(subset=['IndustrySecuCode_I'])
        #newIndusColumns = preprocessData(df)
        newdf = calcResidual(df, keyCols, includeCols, excludeCols)
    else:
        pass
Пример #5
0
def calcAllDay(deltaPath, stdPath, tradingDays, keyCols, includeCols,
               excludeCols, createDate, settleDate):
    """	deltaPath - 保存前10%和后10%均值差数据的目录
		stdPath - 保存标准化数据的目录
		tradingDays - 交易日列表,内部数据为datetime.date类型
		keyCols - 结果中必须包含的列,list类型
		includeCols - 需要处理的列名,list类型
		excludeCols - 不需要处理的列名,list类型
		createDate - 开始时间列名
		settleDate - 结束时间列名

	"""

    for td in tradingDays:
        start = datetime.datetime.now()

        df = fileutil.loadPickle(stdPath, td)
        if df is not None:
            print 'process: {0}'.format(td.strftime('%Y%m%d'))
            actualKeyCols = list(keyCols)

            if settleDate in df.columns:
                actualKeyCols.insert(0, settleDate)
            if createDate in df.columns:
                actualKeyCols.insert(0, createDate)

            newdf = calcDelta(td, df, actualKeyCols, includeCols, excludeCols)
            fileutil.savePickle(deltaPath, td, newdf)
            #fileutil.saveCSV(deltaPath, td, newdf)
            #filename = td.strftime('%Y%m%d')
            #fullpath='{0}{1}.csv'.format(deltaPath, filename)
            #newdf.to_csv(fullpath, encoding='utf8')
        else:
            print 'Fail to load data: {0}, {1}'.format(td.strftime('%Y%m%d'),
                                                       stdPath)

        end = datetime.datetime.now()
        print 'cost: {0} on: {1}'.format(end - start, td.strftime('%Y%m%d'))