Пример #1
0
def findActiveCodes(df, InstrumentCode):
    ExchangeCode=GetExchangeCode(InstrumentCode)
    AllInstrument = list(set(df.iloc[:,2])) #list of all unique instrument codes in spread
    unique = list()
    volumes = arr.array('i', []) #new array of ints

    mylist = list(df.iloc[:,2]) #the instrument codes column, in a list
    for i in range(len(AllInstrument)):
        if len(AllInstrument[i])==4-(ExchangeCode=='CZCE')+len(InstrumentCode) and AllInstrument[i][0:len(InstrumentCode)] == InstrumentCode: #check that it is the instrument code
            lastindex = len(mylist) - mylist[::-1].index(AllInstrument[i]) - 1
            lastvolume = df.iloc[lastindex,12] #colume 12 = volume
            if lastvolume >= 0 : #cumulative volume; last is highest
                unique.append(AllInstrument[i])
                volumes.extend([int(lastvolume)])
    if len(volumes) < 2: return None #quit
    i1 = volumes.index(max(volumes)) #Highest Volume
    i2 = volumes.index(np.unique(volumes)[-2]) #Second Highest Volume
    return unique[i1], unique[i2]
def main():
    fadd = 'D:\\Hongze\\Dropbox'
    if os.path.isdir(fadd) == False:
        fadd = 'C:\\Users\\Hongze\\Dropbox'
    AllInstrumentCode = [
        'AP', 'CF', 'CJ', 'FG', 'MA', 'OI', 'RM', 'SA', 'SF', 'SM', 'SR', 'TA',
        'UR', 'ZC'
    ]
    AllTestDates = [
        i for i in list(os.listdir(fadd + '\\2020')) if len(i) == 8
    ]
    #AllTestDates = [i for i in list(os.listdir(fadd+'\\2020')) if len(i)==8 and i<'20200323']
    #AllTestDates = [i for i in list(os.listdir(fadd+'\\2020')) if len(i)==8 and i>='20200323' and i<'20200521']
    #AllTestDates = [i for i in list(os.listdir(fadd+'\\2020')) if len(i)==8 and i>='20200521']
    actualAllDates = [
        i for i in list(os.listdir(fadd + '\\2020')) if len(i) == 8
    ]
    #AllInstrumentCode = list(['ZC'])
    #AllTestDates = list(['20200515'])
    epsilon = 1e-10
    with open(fadd + '\\SpreadData\\_Test Data\\passesCZCE.txt',
              'w') as txtfile:
        txtfile.write('No RawSpread created for code/date:\n')

    datedict = dict()
    for c in AllInstrumentCode:
        datedict[c] = copy.deepcopy(AllTestDates)

    #--------------------------------------------------------------------------------
    for d in range(len(AllTestDates)):
        fname = AllTestDates[d]
        if actualAllDates.index(fname) > 0:
            pname = actualAllDates[actualAllDates.index(fname) - 1]
        else:
            pname = None  # should only happen if 1/2
        print('Date = ' + fname + '\n')
        lastread = None  #just to initialize
        for j in range(len(AllInstrumentCode)):
            #Access/Prepare files
            InstrumentCode = AllInstrumentCode[j]
            print('Code = ' + InstrumentCode)
            ExchangeCode = GetExchangeCode(InstrumentCode)
            cfname = fadd + "\\" + fname[
                0:4] + "\\" + fname + "\\" + ExchangeCode + "_" + fname + ".csv"
            if pname is not None:
                cfname_prev = fadd + "\\" + pname[
                    0:
                    4] + "\\" + pname + "\\" + ExchangeCode + "_" + pname + ".csv"
            if os.path.isfile(cfname) == False:
                print('Source file not found; skipping\n')
                continue
            destdir = fadd + "\\SpreadData\\" + InstrumentCode
            if not os.path.exists(destdir):
                os.makedirs(destdir)  #create destination directory
            pattern = "RawSpread_" + fname
            if len([i for i in os.listdir(destdir) if pattern in i]) > 0:
                print('File already exists; skipping\n')
                continue

        #Create df (raw data)
            if lastread == None or j == 0 or GetExchangeCode(
                    InstrumentCode) != GetExchangeCode(
                        AllInstrumentCode[lastread]
                    ) or 'df_curr' not in locals():
                df_curr = pd.read_csv(cfname,
                                      sep=',',
                                      encoding="ISO-8859-1",
                                      error_bad_lines=False)
                df_curr = df_curr[(
                    abs(df_curr[df_curr.columns[24]]) >= epsilon) |
                                  (abs(df_curr[df_curr.columns[26]]) >= epsilon
                                   )].reset_index(drop=True)
                if pname is not None:
                    df_prev = pd.read_csv(cfname_prev,
                                          sep=',',
                                          encoding="ISO-8859-1",
                                          error_bad_lines=False)
                    df_prev = df_prev[
                        (abs(df_prev[df_prev.columns[24]]) >= epsilon) |
                        (abs(df_prev[df_prev.columns[26]]) >= epsilon
                         )].reset_index(drop=True)
                else:
                    df_prev = None  # nothing read if no previous day
                lastread = j  #fixed a bug here.

            activeCodes = findActiveCodes(df_curr, InstrumentCode)
            if activeCodes is None:
                print("Nothing created for " + InstrumentCode + " on " + fname)
                with open(fadd + '\\SpreadData\\_Test Data\\passesCZCE.txt',
                          'a') as txtfile:
                    txtfile.write(InstrumentCode + ': ' + fname +
                                  ' (insufficient uniques)\n')
                continue
            else:
                activeCode1, activeCode2 = activeCodes

    #-------------------------------------------------------------------------------------------------------------
            dfObj = createSpread(df_curr, df_prev, activeCode1, activeCode2,
                                 InstrumentCode)
            if dfObj is None:
                print("Nothing created for " + InstrumentCode + " on " + fname)
                with open(fadd + '\\SpreadData\\_Test Data\\passesCZCE.txt',
                          'a') as txtfile:
                    txtfile.write(InstrumentCode + ': ' + fname +
                                  ' (not long enough)\n')
                continue

            if len(dfObj[(dfObj['AskVolume1'] == 0) |
                         (dfObj['AskVolume2'] == 0) |
                         (dfObj['BidVolume1'] == 0) |
                         (dfObj['BidVolume2'] == 0)]) > len(dfObj) / 2:
                print("Too many zero volume ticks; skipping...\n")
                if fname in datedict.get(InstrumentCode):
                    datedict.get(InstrumentCode).remove(fname)
                with open(fadd + '\\SpreadData\\_Test Data\\passesCZCE.txt',
                          'a') as txtfile:
                    txtfile.write(InstrumentCode + ': ' + fname +
                                  ' (too many 0 volumes)\n')
                if d < len(AllTestDates) - 1:
                    datedict.get(InstrumentCode).remove(AllTestDates[d + 1])
                continue

            #If the above is false but still removed from list previously
            if fname not in datedict.get(InstrumentCode):
                print('Creating alternate spread for ' + InstrumentCode +
                      ' on ' + fname)
                with open(fadd + '\\SpreadData\\_Test Data\\passesCZCE.txt',
                          'a') as txtfile:
                    txtfile.write(InstrumentCode + ': ' + fname +
                                  ' (previous day too many 0 volumes)\n')
                if not os.path.exists(destdir + '\\Alternates'):
                    os.mkdir(destdir + '\\Alternates')
                destfile = destdir + "\\Alternates\\RawSpread_" + fname + "_" + activeCode1 + "_" + activeCode2 + ".csv"
            else:
                destfile = destdir + "\\RawSpread_" + fname + "_" + activeCode1 + "_" + activeCode2 + ".csv"
            dfObj.to_csv(destfile)
            print('Complete.\n')
Пример #3
0
def main():
    #AllInstrumentCode = GetInstrument('ZDX_D')
    #AllInstrumentCode = list(['sc'])
    AllInstrumentCode = list(['cu','au','ag','ni','rb','al','fu','ru','bu','sn','ss','sp','zn','pb','hc'])
    fadd = 'D:\\Hongze\\Dropbox'
    #fadd = 'C:\\Users\\Hongze\\Dropbox\\' #file path
    #if os.path.isdir(fadd) == False :
    #    fadd = 'C:\\Users\\xingt\\Dropbox\\Backup\\Futures Data\\'
    #fadd2 = fadd + '2020'
    #AllTestDates = os.listdir(fadd2)
    #AllTestDates = list(['20200103'])
    AllTestDates = list(['20200102','20200103','20200106','20200107','20200108','20200109','20200110','20200113','20200114','20200115','20200116','20200117','20200120','20200121','20200122','20200123'])
    epsilon = 1e-10
    #--------------------------------------------------------------------------------
    for fname in AllTestDates: #run once for each date
    #    if fname < '20200108':
    #        continue
        print('Date = ' + fname)
        for j in range(len(AllInstrumentCode)):   #run once for each instrument code
            
        #Access/Prepare files
            InstrumentCode = AllInstrumentCode[j] 
            print('Code = ' + InstrumentCode)
            ExchangeCode = GetExchangeCode(InstrumentCode)
            cfname = fadd + "\\" + fname[0:4] + "\\" +  fname + "\\" + ExchangeCode + "_" + fname + ".csv"
            if os.path.isfile(cfname) == False: 
                print('Source file not found; skipping')
                continue
            destdir = fadd + "\\SpreadData\\"+InstrumentCode
            if not os.path.exists(destdir):
                os.makedirs(destdir) #create destination directory
            pattern = "RawSpread_" + fname         
            #if len([i for i in os.listdir(destdir) if pattern in i]) > 0:
                #print('File already exists; skipping')
                #continue
            
        #Create df (raw data)
            if j == 0 or GetExchangeCode(InstrumentCode) != GetExchangeCode(AllInstrumentCode[j-1]) or 'df' not in locals():
                df = pd.read_csv(cfname,sep=',',encoding = "ISO-8859-1", error_bad_lines=False)
                #python vs c? engine='python',
                #i_drop = [i for i in range(len(df)) if abs(df.iat[i,24]) < epsilon and abs(df.iat[i,26]) < epsilon]
                #df = df.drop(i_drop) #remove rows with small volumes
                df = df[(abs(df[df.columns[24]]) >= epsilon) | (abs(df[df.columns[26]]) >= epsilon)]
                df.index=range(len(df))
    
            activeCodes = findActiveCodes(df, InstrumentCode)
            if activeCodes is None: 
                print("Nothing created for " + InstrumentCode + " on " + fname)
                continue
            else:
                activeCode1, activeCode2 = activeCodes
                
            destfile = destdir + "\\RawSpread_" + fname + "_" + activeCode1 +  "_" + activeCode2 + "_1.csv"
            #if os.path.isfile(destfile):
            #    print('File already exists; skipping')
            #    continue
    #-------------------------------------------------------------------------------------------------------------        
            #see above
            dfObj = createSpread(df, activeCode1, activeCode2, InstrumentCode)
            if dfObj is None: 
                print("Nothing created for " + InstrumentCode + " on " + fname)
                continue
            dfObj = dfObj.drop_duplicates().reset_index(drop=True)
            dfObj.to_csv(destfile)
            print('Complete.')
        prevfilename = filename[:10] + previousDate + filename[18:]

        if os.path.isfile(codepath + prevfilename):  #search for file
            print('Found file: ' + prevfilename)
            df_prev = pd.read_csv(codepath + prevfilename, index_col=0)
        elif os.path.isfile(alternatePath + prevfilename):
            print('Found alternate file: ' + prevfilename)
            df_prev = pd.read_csv(alternatePath + prevfilename, index_col=0)
        else:
            print("Creating spread for " + prevfilename + '...')
            #Since we can't guarantee the length of the codes...
            underscore = filename.index('_', 19)
            code1 = filename[19:underscore]
            code2 = filename[underscore + 1:-4]
            # actually works for CZCE too!
            exchangeCode = GetExchangeCode(code)
            cfname = dataPath + "\\" + previousDate + "\\" + exchangeCode + "_" + previousDate + ".csv"
            previous_day = pd.read_csv(cfname,
                                       sep=',',
                                       encoding="ISO-8859-1",
                                       error_bad_lines=False)
            if exchangeCode == 'CZCE':
                if currIndex == 1:
                    #just one day
                    df_prev = createSpread(
                        previous_day, None, code1, code2, code
                    )  #previous file doesn't have 20s, current file does
                else:
                    #go another day back
                    previouserDate = dateList[currIndex - 2]
                    previouser_day = pd.read_csv(
Пример #5
0
def createSpread1(df, activeCode1, activeCode2, InstrumentCode, june_1=False):
    df1 = df[df[df.columns[2]] == activeCode1].reset_index().drop(columns='index')
    df2 = df[df[df.columns[2]] == activeCode2].reset_index().drop(columns='index')
    if len(df1) + len(df2) < 10: 
        return None
    multiplier = GetMultiplier(InstrumentCode)
    #dfObj1 = pd.DataFrame(columns=['Time','MilliSecond','LastPrice1','BidVolume1','BidPrice1','AskPrice1','AskVolume1','TradeVolume1','AveragePrice1','TotalVolume1','Amount1','UpperLimitPrice1','LowerLimitPrice1'])
    #dfObj2 = pd.DataFrame(columns=['Time','MilliSecond','LastPrice2','BidVolume2','BidPrice2','AskPrice2','AskVolume2','TradeVolume2','AveragePrice2','TotalVolume2','Amount2','UpperLimitPrice2','LowerLimitPrice2'])
    dfObj1 = pd.DataFrame()
    dfObj2 = pd.DataFrame()
    
    dfObj1['Time'] = df1[df1.columns[21]]
    dfObj1['MilliSecond'] = df1[df1.columns[22]]
    dfObj1['LastPrice1'] = df1[df1.columns[5]]
    dfObj1['BidVolume1'] = df1[df1.columns[24]]
    dfObj1['BidPrice1'] = df1[df1.columns[23]]
    dfObj1['AskPrice1'] = df1[df1.columns[25]]
    dfObj1['AskVolume1'] = df1[df1.columns[26]]
    dfObj1['TotalVolume1'] = df1[df1.columns[12]]
    dfObj1['Amount1'] = df1[df1.columns[13]]
    dfObj1['UpperLimitPrice1'] = df1[df1.columns[17]]
    dfObj1['LowerLimitPrice1'] = df1[df1.columns[18]]
    
    #TradeVolume and Buy/Sellvolume has been moved after merge
    
    dfObj2['Time'] = df2[df2.columns[21]]
    dfObj2['MilliSecond'] = df2[df2.columns[22]]
    dfObj2['LastPrice2'] = df2[df2.columns[5]]
    dfObj2['BidVolume2'] = df2[df2.columns[24]]
    dfObj2['BidPrice2'] = df2[df2.columns[23]]
    dfObj2['AskPrice2'] = df2[df2.columns[25]]
    dfObj2['AskVolume2'] = df2[df2.columns[26]]
    dfObj2['TotalVolume2'] = df2[df2.columns[12]]
    dfObj2['Amount2'] = df2[df2.columns[13]]
    dfObj2['UpperLimitPrice2'] = df2[df2.columns[17]]
    dfObj2['LowerLimitPrice2'] = df2[df2.columns[18]]
    
    # Fix the time column for sorting, hackily
    #dfObj1 = deleteFirstTicks(dfObj1)
    dfObj1['Time'].where(dfObj1['Time']>'17:00:00', other='a'+dfObj1['Time'], inplace=True)
    dfObj1['Time'].mask(dfObj1.index<findFirstTick(dfObj1), other='!'+dfObj1['Time'], inplace=True)
#    = np.where(dfObj1['Time'].str.contains('^2'), dfObj1['Time'], 'a'+dfObj1['Time'])
    #dfObj2 = deleteFirstTicks(dfObj2)
    dfObj2['Time'].where(dfObj2['Time']>'17:00:00', other='a'+dfObj2['Time'], inplace=True)
    dfObj2['Time'].mask(dfObj2.index<findFirstTick(dfObj2), other='!'+dfObj2['Time'], inplace=True)
#    = np.where(dfObj2['Time'].str.contains('^2'), dfObj2['Time'], 'a'+dfObj2['Time'])
    #print(len(dfObj1), len(dfObj2))
    
    exchangeCode = GetExchangeCode(InstrumentCode)
    
    if not june_1:
        dfObj1 = dfObj1.drop_duplicates().reset_index(drop=True)
        dfObj2 = dfObj2.drop_duplicates().reset_index(drop=True)
    elif exchangeCode != 'DCE':
        d1 = dfObj1[dfObj1.Time < 'a09:00:00'].drop_duplicates()
        d2 = dfObj2[dfObj2.Time < 'a09:00:00'].drop_duplicates()
        dfObj1 = dfObj1[dfObj1.Time >= 'a09:00:00'].drop_duplicates(subset=['Time','MilliSecond','LastPrice1','BidVolume1','BidPrice1','AskPrice1','AskVolume1','Amount1','UpperLimitPrice1','LowerLimitPrice1'])
        dfObj2 = dfObj2[dfObj2.Time >= 'a09:00:00'].drop_duplicates(subset=['Time','MilliSecond','LastPrice2','BidVolume2','BidPrice2','AskPrice2','AskVolume2','Amount2','UpperLimitPrice2','LowerLimitPrice2'])
        dfObj1 = d1.append(dfObj1, ignore_index=True)
        dfObj2 = d2.append(dfObj2, ignore_index=True)
    else:
        dfObj1 = dfObj1.drop_duplicates(subset=['Time','MilliSecond','LastPrice1','BidVolume1','BidPrice1','AskPrice1','AskVolume1','Amount1','UpperLimitPrice1','LowerLimitPrice1'])
        dfObj2 = dfObj2.drop_duplicates(subset=['Time','MilliSecond','LastPrice2','BidVolume2','BidPrice2','AskPrice2','AskVolume2','Amount2','UpperLimitPrice2','LowerLimitPrice2'])
    
    #dfObj1.to_csv('D:\\Hongze\\Dropbox\\SpreadData\\_Test Data\\dfObj1.csv')
    #dfObj2.to_csv('D:\\Hongze\\Dropbox\\SpreadData\\_Test Data\\dfObj2.csv')
    
    # HANDLE DCE/CZCE:
    if exchangeCode=='DCE':
        dfObj1['MS1'] = dfObj1['MilliSecond']
        dfObj1['MilliSecond'] = np.where(dfObj1['MilliSecond'] < 500, 0, 500)
        dfObj2['MS2'] = dfObj2['MilliSecond']
        dfObj2['MilliSecond'] = np.where(dfObj2['MilliSecond'] < 500, 0, 500)
    elif exchangeCode=='CZCE':
        CZCEfix(dfObj1)
        CZCEfix(dfObj2)
    
    # HANDLE THE DUPLICATES
    dfObj1['TM'] = list(zip(dfObj1['Time'],dfObj1['MilliSecond']))
    dfObj2['TM'] = list(zip(dfObj2['Time'],dfObj2['MilliSecond']))
    
    l1 = list(dfObj1[dfObj1.duplicated(subset='TM', keep=False)].TM)
    l2 = list(dfObj2[dfObj2.duplicated(subset='TM', keep=False)].TM)
    s = set(l1).intersection(set(l2))
    print(s)
    #print(l1, l2)
    #print(len(list(s)),len(l1)+len(l2)-len(list(s)))
    if len(s) > 0:
        dup1 = dfObj1[dfObj1['TM'].isin(s)]
        dup2 = dfObj2[dfObj2['TM'].isin(s)]
        dfObj1 = dfObj1[dfObj1['TM'].isin(s)==False]
        dfObj2 = dfObj2[dfObj2['TM'].isin(s)==False]
        #print(len(dup1),len(dfObj1),len(dup2), len(dfObj2))
    dfObj1.drop(columns='TM',inplace=True)
    dfObj2.drop(columns='TM',inplace=True)
     
    #now merge!
    dfObj = pd.merge(dfObj1, dfObj2, how='outer', on=['Time','MilliSecond'], sort=True)
    #Note: merge does not sort by default
    if len(s) > 0:
        if len(dup1)==0 or len(dup2)==0:
            raise Exception('How did this happen?')
        dfObj = dfObj.append(matchMerge(dup1,dup2),ignore_index=True).sort_values(by=['Time','MilliSecond']).reset_index(drop=True)
    
    #dfObj.to_csv('D:\\Hongze\\Dropbox\\SpreadData\\_Test Data\\dfObj.csv')
    
    #dfObj.iloc[:findFirstTick(dfObj)] = dfObj.iloc[:findFirstTick(dfObj)].fillna(method='ffill')
    dfObj.fillna(method='ffill', inplace=True)
    
    # In case the first few ticks are missing:
    if True in list(dfObj.iloc[0].isna()):
        print("There's some ticks missing from the start.")
        dfObj[['UpperLimitPrice1','UpperLimitPrice2','LowerLimitPrice1','LowerLimitPrice2']] = dfObj[['UpperLimitPrice1','UpperLimitPrice2','LowerLimitPrice1','LowerLimitPrice2']].fillna(method='bfill')
        dfObj['LastPrice1'].fillna(value=dfObj['UpperLimitPrice1'], inplace=True)
        dfObj['LastPrice2'].fillna(value=dfObj['UpperLimitPrice2'], inplace=True)
        dfObj.fillna(value=0, inplace=True)
    #append the duplicates back
    
    if exchangeCode=='DCE' or findFirstTick(dfObj)!=1:
        dfObj = deleteFirstTicks(dfObj)
    
    #TradeVolume and AveragePrice are calculated with TotalVolume and amount, which carries between rows
    dfObj['TradeVolume1'] = dfObj.TotalVolume1.diff()
    dfObj.at[0,'TradeVolume1'] = dfObj['TotalVolume1'][0]
    dfObj['TradeAmount1'] = dfObj.Amount1.diff() #temporary column
    dfObj.at[0,'TradeAmount1'] = dfObj['Amount1'][0]
    dfObj['AveragePrice1'] = dfObj['TradeAmount1']/dfObj['TradeVolume1']/multiplier #nan if no trades
    dfObj.drop(columns='TradeAmount1', inplace=True)
    dfObj['BuyVolume1'], dfObj['SellVolume1'] = calculateBuySellVolume(dfObj, instrument=1)
    dfObj['TradeVolume2'] = dfObj.TotalVolume2.diff()
    dfObj.at[0,'TradeVolume2'] = dfObj['TotalVolume2'][0]
    dfObj['TradeAmount2'] = dfObj.Amount2.diff() #temporary column
    dfObj.at[0,'TradeAmount2'] = dfObj['Amount2'][0]
    dfObj['AveragePrice2'] = dfObj['TradeAmount2']/dfObj['TradeVolume2']/multiplier
    dfObj.drop(columns='TradeAmount2', inplace=True)
    dfObj['BuyVolume2'], dfObj['SellVolume2'] = calculateBuySellVolume(dfObj, instrument=2)
    
    #dfObj['TradeVolume1'].fillna(value=0, inplace=True)
    #dfObj['TradeVolume2'].fillna(value=0, inplace=True)
    #dfObj['AveragePrice1'].fillna(value=-1,inplace=True)
    #dfObj['AveragePrice2'].fillna(value=-1,inplace=True)
    dfObj['BuyVolume1'].fillna(value=0, inplace=True)
    dfObj['SellVolume1'].fillna(value=0, inplace=True)
    dfObj['BuyVolume2'].fillna(value=0, inplace=True)
    dfObj['SellVolume2'].fillna(value=0, inplace=True)
    #now restore averageprice back to normal.
    #dfObj['AveragePrice1'].replace(to_replace=-1, value=np.nan, inplace=True)
    #dfObj['AveragePrice2'].replace(to_replace=-1, value=np.nan, inplace=True)
    
    #restore the time column
    dfObj['Time'].mask(dfObj['Time'].str.contains('!'), other=dfObj['Time'].str.slice(start=1), inplace=True)
    dfObj['Time'].mask(dfObj['Time'].str.contains('a'), other=dfObj['Time'].str.slice(start=1), inplace=True)
    
    #finally, the few columns that are missing.
    dfObj['Spread'] = -(dfObj['LastPrice1'] - dfObj['LastPrice2']) #2-1 now!
    
    dfObj['AskPrice1N'] = np.where(dfObj['AskVolume1']!=0, dfObj['AskPrice1'], dfObj['UpperLimitPrice1'])
    dfObj['BidPrice1N'] = np.where(dfObj['BidVolume1']!=0, dfObj['BidPrice1'], dfObj['LowerLimitPrice1'])
    dfObj['AskPrice2N'] = np.where(dfObj['AskVolume2']!=0, dfObj['AskPrice2'], dfObj['UpperLimitPrice2'])
    dfObj['BidPrice2N'] = np.where(dfObj['BidVolume2']!=0, dfObj['BidPrice2'], dfObj['LowerLimitPrice2'])
    
    dfObj['MidSpread'] = (dfObj['BidPrice2N'] + dfObj['AskPrice2N'] - dfObj['BidPrice1N'] - dfObj['AskPrice1N'])/2
    dfObj['MidPrice1'] = (dfObj['BidPrice1N']+dfObj['AskPrice1N'])/2
    dfObj['MidPrice2'] = (dfObj['BidPrice2N']+dfObj['AskPrice2N'])/2        

    #and order and clean up.
    
    if exchangeCode=='DCE':
        dfObj = dfObj[['Time', 'MilliSecond', 'MS1', 'MS2', 'Spread', 'MidSpread', 'LastPrice1', 'MidPrice1', 'BidVolume1','BidPrice1','AskPrice1','AskVolume1','TradeVolume1','AveragePrice1','BuyVolume1','SellVolume1', 'MidPrice2', 'LastPrice2','BidVolume2','BidPrice2','AskPrice2','AskVolume2','TradeVolume2','AveragePrice2','BuyVolume2','SellVolume2','TotalVolume1','TotalVolume2','Amount1','Amount2', 'UpperLimitPrice1', 'LowerLimitPrice1', "UpperLimitPrice2", "LowerLimitPrice2"]]
    else:
        dfObj = dfObj[['Time', 'MilliSecond', 'Spread', 'MidSpread', 'LastPrice1', 'MidPrice1', 'BidVolume1','BidPrice1','AskPrice1','AskVolume1','TradeVolume1','AveragePrice1','BuyVolume1','SellVolume1', 'MidPrice2', 'LastPrice2','BidVolume2','BidPrice2','AskPrice2','AskVolume2','TradeVolume2','AveragePrice2','BuyVolume2','SellVolume2','TotalVolume1','TotalVolume2','Amount1','Amount2', 'UpperLimitPrice1', 'LowerLimitPrice1', "UpperLimitPrice2", "LowerLimitPrice2"]]
    #print(null_counts[null_counts > 0].sort_values(ascending=False))
    dfObj = dfObj.astype({'BidVolume1':'int64','AskVolume1':'int64','TradeVolume1':'int64','BidVolume2':'int64','AskVolume2':'int64','TradeVolume2':'int64','TotalVolume1':'int64','TotalVolume2':'int64','Amount1':'int64','Amount2':'int64'})
    return dfObj
Пример #6
0
def main():
    fadd = 'D:\\Hongze\\Dropbox'
    if os.path.isdir(fadd) == False :
        fadd = 'C:\\Users\\Hongze\\Dropbox'
    AllInstrumentCode = list(['cu','au','ag','ni','rb','al','ru','zn','bu','fu','hc','pb','sn','sp','ss','sc','IF','IC','T'])
    AllInstrumentCode.extend(['cs','eb','eg','jd','jm','pp','pg','rr','a','b','c','i','j','l','m','p','v','y'])
    AllTestDates = [i for i in list(os.listdir(fadd+'\\2020')) if len(i)==8]
    #AllTestDates = [i for i in list(os.listdir(fadd+'\\2020')) if len(i)==8 and i<'20200323']
    #AllTestDates = [i for i in list(os.listdir(fadd+'\\2020')) if len(i)==8 and i>='20200323' and i<'20200521']
    #AllTestDates = [i for i in list(os.listdir(fadd+'\\2020')) if len(i)==8 and i>='20200521']
    #AllInstrumentCode = list(['OI'])
    #AllTestDates = list(['20200529'])
    epsilon=1e-10
    with open('D:\\Hongze\\Dropbox\\SpreadData\\_Test Data\\passes.txt', 'w') as txtfile:
        txtfile.write('No RawSpread created for code/date:\n')
    
    datedict = dict()
    for c in AllInstrumentCode:
        datedict[c] = copy.deepcopy(AllTestDates)
    
    #--------------------------------------------------------------------------------
    for d in range(len(AllTestDates)):
        fname = AllTestDates[d]
        print('Date = ' + fname+'\n')
        lastread=None #just to initialize
        for j in range(len(AllInstrumentCode)):
        #Access/Prepare files
            InstrumentCode = AllInstrumentCode[j] 
            print('Code = ' + InstrumentCode)
            ExchangeCode = GetExchangeCode(InstrumentCode)
            cfname = fadd + "\\" + fname[0:4] + "\\" +  fname + "\\" + ExchangeCode + "_" + fname + ".csv"
            if os.path.isfile(cfname) == False: 
                print('Source file not found; skipping\n')
                continue
            destdir = fadd + "\\SpreadData\\"+InstrumentCode
            if not os.path.exists(destdir):
                os.makedirs(destdir) #create destination directory
            pattern = "RawSpread_" + fname         
            if len([i for i in os.listdir(destdir) if pattern in i]) > 0:
                print('File already exists; skipping\n')
                continue
            
        #Create df (raw data)
            if lastread==None or j == 0 or GetExchangeCode(InstrumentCode) != GetExchangeCode(AllInstrumentCode[lastread]) or 'df' not in locals():
                df = pd.read_csv(cfname,sep=',',encoding = "ISO-8859-1", error_bad_lines=False)
                df = df[(abs(df[df.columns[24]]) >= epsilon) | (abs(df[df.columns[26]]) >= epsilon)]
                df.index=range(len(df))
                lastread=j #fixed a bug here.
    
            activeCodes = findActiveCodes(df, InstrumentCode)
            if activeCodes is None: 
                print("Nothing created for " + InstrumentCode + " on " + fname)
                with open('D:\\Hongze\\Dropbox\\SpreadData\\_Test Data\\passes.txt', 'a') as txtfile:
                    txtfile.write(InstrumentCode + ': ' + fname+' (insufficient uniques)\n')
                continue
            else:
                activeCode1, activeCode2 = activeCodes
                
    #-------------------------------------------------------------------------------------------------------------
            dfObj = createSpread1(df, activeCode1, activeCode2, InstrumentCode, fname=='20200601')
            if dfObj is None: 
                print("Nothing created for " + InstrumentCode + " on " + fname)
                with open('D:\\Hongze\\Dropbox\\SpreadData\\_Test Data\\passes.txt', 'a') as txtfile:
                    txtfile.write(InstrumentCode + ': ' + fname+' (not long enough)\n')
                continue
            
            if len(dfObj[(dfObj['AskVolume1']==0) | (dfObj['AskVolume2']==0) | (dfObj['BidVolume1']==0) | (dfObj['BidVolume2']==0)]) > len(dfObj)/2:
                print("Too many zero volume ticks; skipping...\n")
                if fname in datedict.get(InstrumentCode):
                    datedict.get(InstrumentCode).remove(fname)
                with open('D:\\Hongze\\Dropbox\\SpreadData\\_Test Data\\passes.txt', 'a') as txtfile:
                    txtfile.write(InstrumentCode + ': ' + fname+' (too many 0 volumes)\n')
                if d < len(AllTestDates)-1:
                    datedict.get(InstrumentCode).remove(AllTestDates[d+1])
                continue
            
            #If the above is false but still removed from list previously
            if fname not in datedict.get(InstrumentCode):
                print('Creating alternate spread for '+InstrumentCode+' on '+fname)
                with open('D:\\Hongze\\Dropbox\\SpreadData\\_Test Data\\passes.txt', 'a') as txtfile:
                    txtfile.write(InstrumentCode + ': ' + fname+' (previous day too many 0 volumes)\n')
                if not os.path.exists(destdir+'\\Alternates'):
                    os.mkdir(destdir+'\\Alternates')
                destfile = destdir + "\\Alternates\\RawSpread_" + fname + "_" + activeCode1 +  "_" + activeCode2 + ".csv"
            else:
                destfile = destdir + "\\RawSpread_" + fname + "_" + activeCode1 +  "_" + activeCode2 + ".csv"
            #destfile='D:\\Hongze\\Dropbox\\SpreadData\\_Test Data\\cu0102.csv'
            dfObj.to_csv(destfile)
            print('Complete.\n')
Пример #7
0
fadd = 'D:\\Hongze\\Dropbox'
if os.path.isdir(fadd) == False:
    fadd = 'C:\\Users\\Hongze\\Dropbox'
#AllInstrumentCode = ['AP','CF','CJ','FG','MA','OI','RM','SA','SF','SM','SR','TA','UR','ZC']
AllInstrumentCode = ['OI']
#AllTestDates = [i for i in list(os.listdir(fadd+'\\2020')) if len(i)==8 and i>'20200520']
AllTestDates = ['20200511']
for d in range(len(AllTestDates)):
    fname = AllTestDates[d]
    print('Date = ' + fname + '\n')
    lastread = None  #just to initialize
    for j in range(len(AllInstrumentCode)):
        #Access/Prepare files
        InstrumentCode = AllInstrumentCode[j]
        print('Code = ' + InstrumentCode)
        ExchangeCode = GetExchangeCode(InstrumentCode)
        cfname = fadd + "\\" + fname[
            0:4] + "\\" + fname + "\\" + ExchangeCode + "_" + fname + ".csv"
        if os.path.isfile(cfname) == False:
            print('Source file not found; skipping\n')
            continue
        destdir = fadd + "\\SpreadData\\" + InstrumentCode
        if not os.path.exists(destdir):
            os.makedirs(destdir)  #create destination directory
        pattern = "RawSpread_" + fname
        #if len([i for i in os.listdir(destdir) if pattern in i]) > 0:
        #    print('File already exists; skipping\n')
        #    continue
        if lastread == None or j == 0 or GetExchangeCode(
                InstrumentCode) != GetExchangeCode(
                    AllInstrumentCode[lastread]) or 'df' not in locals():
Пример #8
0
#AllInstrumentCode = list(['ag','au','rb','ni'])'
fadd = 'D:\\Dropbox\\Dropbox\\Backup\\Futures Data\\'
if os.path.isdir(fadd) == False :
    fadd = 'C:\\Users\\xingt\\Dropbox\\Backup\\Futures Data\\'
fadd2 = fadd + '2020'
AllTestDates = os.listdir(fadd2)
#AllTestDates = list(['20200318','20200319','20200320','20200323','20200324'])
AllTestDates = list(['20200401'])
epsilon = 1e-10
for fname in AllTestDates:
#    if fname < '20200108':
#        continue
    print('Date = ' + fname)
    for j in range(len(AllInstrumentCode)):   
        InstrumentCode = AllInstrumentCode[j]       
        ExchangeCode = GetExchangeCode(InstrumentCode)
        
        cfname = fadd + "\\" + fname[0:4] + "\\" +  fname + "\\" + ExchangeCode + "_" + fname + ".csv"
            
        if os.path.isfile(cfname) == False: continue
    
        destdir = fadd + "SpreadData\\"+InstrumentCode
        if not os.path.exists(destdir):
            os.makedirs(destdir)
        # pattern = "RawSpread_" + fname         
        # if len([i for i in os.listdir(destdir) if pattern in i]) > 0:continue

        if j == 0 or GetExchangeCode(InstrumentCode) != GetExchangeCode(AllInstrumentCode[j-1]):
            df = pd.read_csv(cfname,sep=',',encoding = "ISO-8859-1", engine='python', error_bad_lines=False)
            i_drop = [i for i in range(len(df)) if abs(df.iat[i,24]) < epsilon and abs(df.iat[i,26]) < epsilon]
            df = df.drop(i_drop)