def ShapeAllScaledData(): InputFileNameList = sorted(os.listdir(SharedVars.ScaledDataPath)) # Traverse the list of input files for InputFileName in InputFileNameList: WriteOutput = True if InputFileName[-4:] != '.csv': print(f'Unexpected input file not ending in .csf: {InputFileName}') else: InputFileNameParts = InputFileName[:-4].split('-') if InputFileNameParts[0] != 'SPX': print( f'Unexpected input file not starting with SPX: {InputFileName}' ) else: InputFile = open( SharedVars.ScaledDataPath + '/' + InputFileName, 'rt') IntervalQuantity = int(InputFileNameParts[1]) IntervalUnit = InputFileNameParts[2] ASLetter = InputFileNameParts[3] # Convert the given interval for use in scanning the input files NumberOfInputLinesPerDay = 0 NumberOfInputLinesTo11AM = 0 CheckHour = 0 CheckMinute = 0 if IntervalUnit == 'Second': WriteOutput = False if IntervalQuantity == 20: NumberOfInputLinesPerDay = 1171 NumberOfInputLinesTo11AM = 811 CheckHour = 11 CheckMinute = 0 elif IntervalQuantity == 30: NumberOfInputLinesPerDay = 781 NumberOfInputLinesTo11AM = 541 CheckHour = 11 CheckMinute = 0 elif IntervalQuantity == 40: NumberOfInputLinesPerDay = 586 NumberOfInputLinesTo11AM = 406 CheckHour = 11 CheckMinute = 0 else: IbViewUtilities.ErrorExit( f'Unexpected scale interval: {IntervalQuantity} Seconds' ) elif IntervalUnit == 'Minute': if IntervalQuantity == 1: WriteOutput = False NumberOfInputLinesPerDay = 391 NumberOfInputLinesTo11AM = 271 CheckHour = 11 CheckMinute = 0 elif IntervalQuantity == 5: WriteOutput = False NumberOfInputLinesPerDay = 79 NumberOfInputLinesTo11AM = 55 CheckHour = 11 CheckMinute = 0 elif IntervalQuantity == 10: # WriteOutput = False NumberOfInputLinesPerDay = 40 NumberOfInputLinesTo11AM = 28 CheckHour = 11 CheckMinute = 0 elif IntervalQuantity == 15: # WriteOutput = False NumberOfInputLinesPerDay = 27 NumberOfInputLinesTo11AM = 19 CheckHour = 11 CheckMinute = 0 elif IntervalQuantity == 30: NumberOfInputLinesPerDay = 14 NumberOfInputLinesTo11AM = 10 CheckHour = 11 CheckMinute = 0 else: IbViewUtilities.ErrorExit( f'Unexpected scale interval: {IntervalQuantity} Minutes' ) elif IntervalUnit == 'Hour': if IntervalQuantity == 1: NumberOfInputLinesPerDay = 8 NumberOfInputLinesTo11AM = 5 CheckHour = 10 CheckMinute = 30 elif IntervalQuantity == 2: NumberOfInputLinesPerDay = 5 NumberOfInputLinesTo11AM = 3 CheckHour = 10 CheckMinute = 30 else: IbViewUtilities.ErrorExit( f'Unexpected scale interval: {IntervalQuantity} Hours' ) if WriteOutput: # How many elements will go into a sample vector? SampleVectorLength = ( SharedVars.NumberOfDaysInASample - 1 ) * NumberOfInputLinesPerDay + NumberOfInputLinesTo11AM # How many input lines will be skipped from the end of the sample vector (11:00 today) to the corresponding today label value (1:00 today) NumberOfInputLinesFrom11AMToTodayClose = NumberOfInputLinesPerDay - NumberOfInputLinesTo11AM # How many input lines will be skipped from the end of the sample vector (11:00 today) to the corresponding tomorrow label value (1:00 tomorrow) NumberOfInputLinesFrom11AMToTomorrowClose = 2 * NumberOfInputLinesPerDay - NumberOfInputLinesTo11AM # How many input lines to encompass a sample vector all the way to the furthest corresponding label value (tomorrow close)? # ... 20 days + 1 more day to get to the desired "tomorrow close" NumberOfInputLinesInCompleteOutputBuffer = ( SharedVars.NumberOfDaysInASample + 1) * NumberOfInputLinesPerDay # A buffer to hold all the sequential values from the first in an input vector through the corresponding label value GenericSampleOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-G-S-{SampleVectorLength}.csv' GenericLabelOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-G-L-{SampleVectorLength}.csv' SemiGenericSampleOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-g-S-{SampleVectorLength}.csv' SemiGenericLabelOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-g-L-{SampleVectorLength}.csv' ParticularSampleOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-P-S-{SampleVectorLength}.csv' ParticularLabelOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-P-L-{SampleVectorLength}.csv' GenericSampleOutputFile = open( SharedVars.ShapedDataPath + '/' + GenericSampleOutputFileName, 'wt') GenericLabelOutputFile = open( SharedVars.ShapedDataPath + '/' + GenericLabelOutputFileName, 'wt') SemiGenericSampleOutputFile = open( SharedVars.ShapedDataPath + '/' + SemiGenericSampleOutputFileName, 'wt') SemiGenericLabelOutputFile = open( SharedVars.ShapedDataPath + '/' + SemiGenericLabelOutputFileName, 'wt') ParticularSampleOutputFile = open( SharedVars.ShapedDataPath + '/' + ParticularSampleOutputFileName, 'wt') ParticularLabelOutputFile = open( SharedVars.ShapedDataPath + '/' + ParticularLabelOutputFileName, 'wt') # Pandas read_csv is pretty cranky if there's no first line with column labels... SampleFileColumnNames = '' for i in range(SampleVectorLength - 1): SampleFileColumnNames += f't({str(i - SampleVectorLength + 1)}), ' SampleFileColumnNames += 't(0)' print(f'{SampleFileColumnNames}', file=GenericSampleOutputFile) print(f'{SampleFileColumnNames}', file=SemiGenericSampleOutputFile) print(f'{SampleFileColumnNames}', file=ParticularSampleOutputFile) LabelFileColumnNames = '11 oclock,Today close,Tomorrow close,Today 2,Tomorrow 2,Today 3,Tomorrow 3,Today 5,Tomorrow 5' print(f'{LabelFileColumnNames}', file=GenericLabelOutputFile) print(f'{LabelFileColumnNames}', file=SemiGenericLabelOutputFile) print(f'{LabelFileColumnNames}', file=ParticularLabelOutputFile) OutputValueStorage = [] InputLineCounter = 0 IbViewUtilities.AddLineToTextWindow( f'Shaping {IntervalQuantity} {IntervalUnit} {ASLetter}' ) SharedVars.GuiWindow.update() for InputFileLine in InputFile: InputLineCounter += 1 InputLineParts = InputFileLine.rstrip('\n').split(',') InputLineYear = InputLineParts[0] InputLineMonth = InputLineParts[1] InputLineDay = InputLineParts[2] InputLineHour = InputLineParts[3] InputLineMinute = InputLineParts[4] InputLineSecond = InputLineParts[5] InputLineValueString = InputLineParts[6] # add the next input value to the end of the buffer OutputValueStorage.append(InputLineValueString) if len(OutputValueStorage ) == NumberOfInputLinesInCompleteOutputBuffer: # There are enough values in the buffer to make an output sample and label so... # 1) Build a sample vector and label set # A) Label(s) are four elements: the tomorrow close value plus three variations of proper integer labels ElevenOclockValueString = OutputValueStorage[ SampleVectorLength - 1].strip() TodayCloseValueString = OutputValueStorage[ SampleVectorLength + NumberOfInputLinesFrom11AMToTodayClose - 1].strip() TomorrowCloseValueString = OutputValueStorage[ -1].strip() ElevenOclockValue = float(ElevenOclockValueString) TodayCloseValue = float(TodayCloseValueString) TomorrowCloseValue = float( TomorrowCloseValueString) FiveDollarRailBelowElevenOclockValue = float( (math.floor(ElevenOclockValue) // 5) * 5) FiveDollarRailAboveElevenOclockValue = FiveDollarRailBelowElevenOclockValue + 5.0 TenDollarRailBelowElevenOclockValue = FiveDollarRailBelowElevenOclockValue - 5.0 TenDollarRailAboveElevenOclockValue = FiveDollarRailAboveElevenOclockValue + 5.0 # Figure the simple binary "above/below" labels if TodayCloseValue > ElevenOclockValue: Label2TodayString = '1' else: Label2TodayString = '-1' if TomorrowCloseValue > ElevenOclockValue: Label2TomorrowString = '1' else: Label2TomorrowString = '-1' # Figure the labels for "above/below/within" the $5 band if TodayCloseValue > FiveDollarRailAboveElevenOclockValue: Label3TodayString = '1' Label5TodayString = '1' elif TodayCloseValue < FiveDollarRailBelowElevenOclockValue: Label3TodayString = '-1' Label5TodayString = '-1' else: Label3TodayString = '0' Label5TodayString = '0' if TomorrowCloseValue > FiveDollarRailAboveElevenOclockValue: Label3TomorrowString = '1' Label5TomorrowString = '1' elif TomorrowCloseValue < FiveDollarRailBelowElevenOclockValue: Label3TomorrowString = '-1' Label5TomorrowString = '-1' else: Label3TomorrowString = '0' Label5TomorrowString = '0' # Check if the above/below is more than $5 above or below the $5 band if TodayCloseValue > TenDollarRailAboveElevenOclockValue: Label5TodayString = '2' elif TodayCloseValue < TenDollarRailBelowElevenOclockValue: Label5TodayString = '-2' else: pass if TomorrowCloseValue > TenDollarRailAboveElevenOclockValue: Label5TomorrowString = '2' elif TomorrowCloseValue < TenDollarRailBelowElevenOclockValue: Label5TomorrowString = '-2' else: pass OutputLabelString = f'{ElevenOclockValueString},{TodayCloseValueString},{TomorrowCloseValueString},{Label2TodayString},{Label2TomorrowString},{Label3TodayString},{Label3TomorrowString},{Label5TodayString},{Label5TomorrowString}' # B) Sample vector is values for previous 19 days plus 20th day values up to 11:00 OutputSampleString = '' for SampleIndex in range(SampleVectorLength - 1): OutputSampleString += OutputValueStorage[ SampleIndex] + ', ' OutputSampleString += OutputValueStorage[ SampleVectorLength - 1] # 2) Write the sample and label to their output files # Write ALL samples and labels to the generic files print(f'{OutputSampleString}', file=GenericSampleOutputFile) print(f'{OutputLabelString}', file=GenericLabelOutputFile) # If the label time is 1:00 PM then this is a semi-generic data set so write it to those files, too if int(InputLineHour) == 13 and int( InputLineMinute) == 0: print(f'{OutputSampleString}', file=SemiGenericSampleOutputFile) print(f'{OutputLabelString}', file=SemiGenericLabelOutputFile) # If this is a semi-generic data set AND the label value is from a date that's an SPX expiration date, then it's also "particular" if IbViewUtilities.DateIsAnSpxExpirationDay( datetime.date(int(InputLineYear), int(InputLineMonth), int(InputLineDay))): print(f'{OutputSampleString}', file=ParticularSampleOutputFile) print(f'{OutputLabelString}', file=ParticularLabelOutputFile) # Remove the first element from the beginning to make room for the next one at the end del OutputValueStorage[0] GenericSampleOutputFile.close() GenericLabelOutputFile.close() SemiGenericSampleOutputFile.close() SemiGenericLabelOutputFile.close() ParticularSampleOutputFile.close() ParticularLabelOutputFile.close() InputFile.close()
def ScaleUnderlying(IntervalUnit, IntervalQuantity): # Convert the given interval for use in scanning the input files DeltaSeconds = 0 DeltaMinutes = 0 DeltaHours = 0 ExpectedInputLinesPerInterval = 0 if IntervalUnit == 'Second': if IntervalQuantity == 20: DeltaSeconds = 20 elif IntervalQuantity == 30: DeltaSeconds = 30 elif IntervalQuantity == 40: DeltaSeconds = 40 else: IbViewUtilities.ErrorExit( f'Unexpected scale interval: {IntervalQuantity} Seconds') elif IntervalUnit == 'Minute': if IntervalQuantity == 1: DeltaMinutes = 1 elif IntervalQuantity == 5: DeltaMinutes = 5 elif IntervalQuantity == 10: DeltaMinutes = 10 elif IntervalQuantity == 15: DeltaMinutes = 15 elif IntervalQuantity == 30: DeltaMinutes = 30 else: IbViewUtilities.ErrorExit( f'Unexpected scale interval: {IntervalQuantity} Minutes') elif IntervalUnit == 'Hour': if IntervalQuantity == 1: DeltaHours = 1 elif IntervalQuantity == 2: DeltaHours = 2 else: IbViewUtilities.ErrorExit( f'Unexpected scale interval: {IntervalQuantity} Hours') # Set up an file for averaging and another for sampling AveragingOutputFileName = f'SPX-{str(IntervalQuantity)}-{IntervalUnit}-A.csv' AveragingOutputFile = open( SharedVars.ScaledDataPath + '/' + AveragingOutputFileName, 'wt') SamplingOutputFileName = f'SPX-{str(IntervalQuantity)}-{IntervalUnit}-S.csv' SamplingOutputFile = open( SharedVars.ScaledDataPath + '/' + SamplingOutputFileName, 'wt') # InputFileNameList = sorted(os.listdir(SharedVars.FilteredDataPath)) InputFileNameList = sorted(os.listdir(SharedVars.CheckedDataPath)) # Traverse the list of input files for InputFileName in InputFileNameList: # InputFile = open(SharedVars.FilteredDataPath + '/' + InputFileName, 'rt') InputFile = open(SharedVars.CheckedDataPath + '/' + InputFileName, 'rt') # For each input file, set up the date strings for the output file lines InputFileNameParts = InputFileName.split('-') InputYear = int(InputFileNameParts[1]) InputMonth = int(InputFileNameParts[2]) InputDay = int(InputFileNameParts[3][0:2]) # Averaging variables WaitingForSecondSamplePoint = True Accumulator = 0.0 Count = 0 SavedAccumulator = 0.0 SavedCount = 0 LaggingHour = 0 LaggingMinute = 0 LaggingSecond = 0 # Start with the first time to be added to the output file OutputHour = 6 OutputMinute = 30 OutputSecond = 0 # End when we get to 1:00 EndHour = 13 EndMinute = 0 EndSecond = 0 # Traverse the lines in the current input file for InputFileLine in InputFile: InputFileLineParts = InputFileLine.split(',') InputHour = int(InputFileLineParts[0]) InputMinute = int(InputFileLineParts[1]) InputSecond = int(InputFileLineParts[2]) InputValue = float(InputFileLineParts[4]) if OutputHour == 6 and OutputMinute == 30 and OutputSecond == 0: # This is the first input file entry - copy it straight across to both output files WriteToScaledOutputFile(AveragingOutputFile, InputYear, InputMonth, InputDay, OutputHour, OutputMinute, OutputSecond, InputValue) WriteToScaledOutputFile(SamplingOutputFile, InputYear, InputMonth, InputDay, OutputHour, OutputMinute, OutputSecond, InputValue) # Initialize interval recognition # TargetHour, TargetMinute, TargetSecond = IncrementOutputTime(OutputHour, OutputMinute, OutputSecond, DeltaHours, DeltaMinutes, DeltaSeconds) OutputHour, OutputMinute, OutputSecond = IncrementOutputTime( OutputHour, OutputMinute, OutputSecond, DeltaHours, DeltaMinutes, DeltaSeconds) # Initialize averaging Accumulator = 0.0 Count = 0 SavedAccumulator = 0.0 SavedCount = 0 else: # This is beyond the first entry TargetTimeReached = TimesAreEqual(InputHour, InputMinute, InputSecond, OutputHour, OutputMinute, OutputSecond) EndOfInputDayReached = TimesAreEqual(InputHour, InputMinute, InputSecond, EndHour, EndMinute, EndSecond) if TargetTimeReached or EndOfInputDayReached: # This input file entry falls on (well.... close enough to) the next target time or the end of the input day so # the sampling output file gets written if not EndOfInputDayReached: WriteToScaledOutputFile(SamplingOutputFile, InputYear, InputMonth, InputDay, OutputHour, OutputMinute, OutputSecond, InputValue) # The averaging output file wants to write the average of the last TWO intervals out as the value for the PREVIOUS time # ... so we have to skip the past the first sampling point before we start writing out average values if WaitingForSecondSamplePoint: # The lagging average handling means we have to skip past one output average write WaitingForSecondSamplePoint = False else: # We're at least up to the third time point in the input file so we write the average for the previous time point into the output file AverageValue = (Accumulator + SavedAccumulator) / ( Count + SavedCount) WriteToScaledOutputFile(AveragingOutputFile, InputYear, InputMonth, InputDay, LaggingHour, LaggingMinute, LaggingSecond, AverageValue) # Is this the last sample point (1:00)? if EndOfInputDayReached: # # This is the last time point so.... # # 1) Write the final entry into both the files as just the plain value at 1:00 WriteToScaledOutputFile(AveragingOutputFile, InputYear, InputMonth, InputDay, EndHour, EndMinute, EndSecond, InputValue) WriteToScaledOutputFile(SamplingOutputFile, InputYear, InputMonth, InputDay, EndHour, EndMinute, EndSecond, InputValue) # # 2) Move on to the next input file (hmmmm, sort of redundant to the file's eof... not entirely sure how this will interact with the above "for InputFileLine") break else: # update things for processing the next interval LaggingHour = OutputHour LaggingMinute = OutputMinute LaggingSecond = OutputSecond OutputHour, OutputMinute, OutputSecond = IncrementOutputTime( OutputHour, OutputMinute, OutputSecond, DeltaHours, DeltaMinutes, DeltaSeconds) SavedAccumulator = Accumulator + InputValue SavedCount = Count + 1 Accumulator = 0.0 Count = 0 else: # This input file entry falls between the times that are to be included in the output file # ??? Possible error if we've passed the target time without coming close enough to it. if Time1IsAfterTime2(InputHour, InputMinute, InputSecond, OutputHour, OutputMinute, OutputSecond): # !!!! We apparently missed our target time IbViewUtilities.AddLineToTextWindow( f'Missed target {InputYear}-{InputMonth}-{InputDay}@{OutputHour}:{OutputMinute}:{OutputSecond}' ) else: # This is one of those "in-between" input entries Accumulator += InputValue Count += 1 InputFile.close() AveragingOutputFile.close() SamplingOutputFile.close()