Пример #1
0
def ShapeAllScaledData():
    InputFileNameList = sorted(os.listdir(SharedVars.ScaledDataPath))
    # Traverse the list of input files
    for InputFileName in InputFileNameList:
        WriteOutput = True
        if InputFileName[-4:] != '.csv':
            print(f'Unexpected input file not ending in .csf: {InputFileName}')
        else:
            InputFileNameParts = InputFileName[:-4].split('-')
            if InputFileNameParts[0] != 'SPX':
                print(
                    f'Unexpected input file not starting with SPX: {InputFileName}'
                )
            else:
                InputFile = open(
                    SharedVars.ScaledDataPath + '/' + InputFileName, 'rt')
                IntervalQuantity = int(InputFileNameParts[1])
                IntervalUnit = InputFileNameParts[2]
                ASLetter = InputFileNameParts[3]
                # Convert the given interval for use in scanning the input files
                NumberOfInputLinesPerDay = 0
                NumberOfInputLinesTo11AM = 0
                CheckHour = 0
                CheckMinute = 0
                if IntervalUnit == 'Second':
                    WriteOutput = False
                    if IntervalQuantity == 20:
                        NumberOfInputLinesPerDay = 1171
                        NumberOfInputLinesTo11AM = 811
                        CheckHour = 11
                        CheckMinute = 0
                    elif IntervalQuantity == 30:
                        NumberOfInputLinesPerDay = 781
                        NumberOfInputLinesTo11AM = 541
                        CheckHour = 11
                        CheckMinute = 0
                    elif IntervalQuantity == 40:
                        NumberOfInputLinesPerDay = 586
                        NumberOfInputLinesTo11AM = 406
                        CheckHour = 11
                        CheckMinute = 0
                    else:
                        IbViewUtilities.ErrorExit(
                            f'Unexpected scale interval: {IntervalQuantity} Seconds'
                        )
                elif IntervalUnit == 'Minute':
                    if IntervalQuantity == 1:
                        WriteOutput = False
                        NumberOfInputLinesPerDay = 391
                        NumberOfInputLinesTo11AM = 271
                        CheckHour = 11
                        CheckMinute = 0
                    elif IntervalQuantity == 5:
                        WriteOutput = False
                        NumberOfInputLinesPerDay = 79
                        NumberOfInputLinesTo11AM = 55
                        CheckHour = 11
                        CheckMinute = 0
                    elif IntervalQuantity == 10:
                        # WriteOutput = False
                        NumberOfInputLinesPerDay = 40
                        NumberOfInputLinesTo11AM = 28
                        CheckHour = 11
                        CheckMinute = 0
                    elif IntervalQuantity == 15:
                        # WriteOutput = False
                        NumberOfInputLinesPerDay = 27
                        NumberOfInputLinesTo11AM = 19
                        CheckHour = 11
                        CheckMinute = 0
                    elif IntervalQuantity == 30:
                        NumberOfInputLinesPerDay = 14
                        NumberOfInputLinesTo11AM = 10
                        CheckHour = 11
                        CheckMinute = 0
                    else:
                        IbViewUtilities.ErrorExit(
                            f'Unexpected scale interval: {IntervalQuantity} Minutes'
                        )
                elif IntervalUnit == 'Hour':
                    if IntervalQuantity == 1:
                        NumberOfInputLinesPerDay = 8
                        NumberOfInputLinesTo11AM = 5
                        CheckHour = 10
                        CheckMinute = 30
                    elif IntervalQuantity == 2:
                        NumberOfInputLinesPerDay = 5
                        NumberOfInputLinesTo11AM = 3
                        CheckHour = 10
                        CheckMinute = 30
                    else:
                        IbViewUtilities.ErrorExit(
                            f'Unexpected scale interval: {IntervalQuantity} Hours'
                        )
                if WriteOutput:
                    # How many elements will go into a sample vector?
                    SampleVectorLength = (
                        SharedVars.NumberOfDaysInASample - 1
                    ) * NumberOfInputLinesPerDay + NumberOfInputLinesTo11AM
                    # How many input lines will be skipped from the end of the sample vector (11:00 today) to the corresponding today label value (1:00 today)
                    NumberOfInputLinesFrom11AMToTodayClose = NumberOfInputLinesPerDay - NumberOfInputLinesTo11AM
                    # How many input lines will be skipped from the end of the sample vector (11:00 today) to the corresponding tomorrow label value (1:00 tomorrow)
                    NumberOfInputLinesFrom11AMToTomorrowClose = 2 * NumberOfInputLinesPerDay - NumberOfInputLinesTo11AM
                    # How many input lines to encompass a sample vector all the way to the furthest corresponding label value (tomorrow close)?
                    # ... 20 days + 1 more day to get to the desired "tomorrow close"
                    NumberOfInputLinesInCompleteOutputBuffer = (
                        SharedVars.NumberOfDaysInASample +
                        1) * NumberOfInputLinesPerDay
                    # A buffer to hold all the sequential values from the first in an input vector through the corresponding label value
                    GenericSampleOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-G-S-{SampleVectorLength}.csv'
                    GenericLabelOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-G-L-{SampleVectorLength}.csv'
                    SemiGenericSampleOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-g-S-{SampleVectorLength}.csv'
                    SemiGenericLabelOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-g-L-{SampleVectorLength}.csv'
                    ParticularSampleOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-P-S-{SampleVectorLength}.csv'
                    ParticularLabelOutputFileName = f'SPX-{IntervalQuantity}-{IntervalUnit}-{ASLetter}-P-L-{SampleVectorLength}.csv'
                    GenericSampleOutputFile = open(
                        SharedVars.ShapedDataPath + '/' +
                        GenericSampleOutputFileName, 'wt')
                    GenericLabelOutputFile = open(
                        SharedVars.ShapedDataPath + '/' +
                        GenericLabelOutputFileName, 'wt')
                    SemiGenericSampleOutputFile = open(
                        SharedVars.ShapedDataPath + '/' +
                        SemiGenericSampleOutputFileName, 'wt')
                    SemiGenericLabelOutputFile = open(
                        SharedVars.ShapedDataPath + '/' +
                        SemiGenericLabelOutputFileName, 'wt')
                    ParticularSampleOutputFile = open(
                        SharedVars.ShapedDataPath + '/' +
                        ParticularSampleOutputFileName, 'wt')
                    ParticularLabelOutputFile = open(
                        SharedVars.ShapedDataPath + '/' +
                        ParticularLabelOutputFileName, 'wt')
                    # Pandas read_csv is pretty cranky if there's no first line with column labels...
                    SampleFileColumnNames = ''
                    for i in range(SampleVectorLength - 1):
                        SampleFileColumnNames += f't({str(i - SampleVectorLength + 1)}), '
                    SampleFileColumnNames += 't(0)'
                    print(f'{SampleFileColumnNames}',
                          file=GenericSampleOutputFile)
                    print(f'{SampleFileColumnNames}',
                          file=SemiGenericSampleOutputFile)
                    print(f'{SampleFileColumnNames}',
                          file=ParticularSampleOutputFile)
                    LabelFileColumnNames = '11 oclock,Today close,Tomorrow close,Today 2,Tomorrow 2,Today 3,Tomorrow 3,Today 5,Tomorrow 5'
                    print(f'{LabelFileColumnNames}',
                          file=GenericLabelOutputFile)
                    print(f'{LabelFileColumnNames}',
                          file=SemiGenericLabelOutputFile)
                    print(f'{LabelFileColumnNames}',
                          file=ParticularLabelOutputFile)
                    OutputValueStorage = []
                    InputLineCounter = 0
                    IbViewUtilities.AddLineToTextWindow(
                        f'Shaping {IntervalQuantity} {IntervalUnit} {ASLetter}'
                    )
                    SharedVars.GuiWindow.update()
                    for InputFileLine in InputFile:
                        InputLineCounter += 1
                        InputLineParts = InputFileLine.rstrip('\n').split(',')
                        InputLineYear = InputLineParts[0]
                        InputLineMonth = InputLineParts[1]
                        InputLineDay = InputLineParts[2]
                        InputLineHour = InputLineParts[3]
                        InputLineMinute = InputLineParts[4]
                        InputLineSecond = InputLineParts[5]
                        InputLineValueString = InputLineParts[6]
                        # add the next input value to the end of the buffer
                        OutputValueStorage.append(InputLineValueString)
                        if len(OutputValueStorage
                               ) == NumberOfInputLinesInCompleteOutputBuffer:
                            # There are enough values in the buffer to make an output sample and label so...
                            # 1) Build a sample vector and label set
                            #  A) Label(s) are four elements: the tomorrow close value plus three variations of proper integer labels
                            ElevenOclockValueString = OutputValueStorage[
                                SampleVectorLength - 1].strip()
                            TodayCloseValueString = OutputValueStorage[
                                SampleVectorLength +
                                NumberOfInputLinesFrom11AMToTodayClose -
                                1].strip()
                            TomorrowCloseValueString = OutputValueStorage[
                                -1].strip()
                            ElevenOclockValue = float(ElevenOclockValueString)
                            TodayCloseValue = float(TodayCloseValueString)
                            TomorrowCloseValue = float(
                                TomorrowCloseValueString)
                            FiveDollarRailBelowElevenOclockValue = float(
                                (math.floor(ElevenOclockValue) // 5) * 5)
                            FiveDollarRailAboveElevenOclockValue = FiveDollarRailBelowElevenOclockValue + 5.0
                            TenDollarRailBelowElevenOclockValue = FiveDollarRailBelowElevenOclockValue - 5.0
                            TenDollarRailAboveElevenOclockValue = FiveDollarRailAboveElevenOclockValue + 5.0
                            # Figure the simple binary "above/below" labels
                            if TodayCloseValue > ElevenOclockValue:
                                Label2TodayString = '1'
                            else:
                                Label2TodayString = '-1'
                            if TomorrowCloseValue > ElevenOclockValue:
                                Label2TomorrowString = '1'
                            else:
                                Label2TomorrowString = '-1'
                            # Figure the labels for "above/below/within" the $5 band
                            if TodayCloseValue > FiveDollarRailAboveElevenOclockValue:
                                Label3TodayString = '1'
                                Label5TodayString = '1'
                            elif TodayCloseValue < FiveDollarRailBelowElevenOclockValue:
                                Label3TodayString = '-1'
                                Label5TodayString = '-1'
                            else:
                                Label3TodayString = '0'
                                Label5TodayString = '0'
                            if TomorrowCloseValue > FiveDollarRailAboveElevenOclockValue:
                                Label3TomorrowString = '1'
                                Label5TomorrowString = '1'
                            elif TomorrowCloseValue < FiveDollarRailBelowElevenOclockValue:
                                Label3TomorrowString = '-1'
                                Label5TomorrowString = '-1'
                            else:
                                Label3TomorrowString = '0'
                                Label5TomorrowString = '0'
                            # Check if the above/below is more than $5 above or below the $5 band
                            if TodayCloseValue > TenDollarRailAboveElevenOclockValue:
                                Label5TodayString = '2'
                            elif TodayCloseValue < TenDollarRailBelowElevenOclockValue:
                                Label5TodayString = '-2'
                            else:
                                pass
                            if TomorrowCloseValue > TenDollarRailAboveElevenOclockValue:
                                Label5TomorrowString = '2'
                            elif TomorrowCloseValue < TenDollarRailBelowElevenOclockValue:
                                Label5TomorrowString = '-2'
                            else:
                                pass
                            OutputLabelString = f'{ElevenOclockValueString},{TodayCloseValueString},{TomorrowCloseValueString},{Label2TodayString},{Label2TomorrowString},{Label3TodayString},{Label3TomorrowString},{Label5TodayString},{Label5TomorrowString}'
                            #  B) Sample vector is values for previous 19 days plus 20th day values up to 11:00
                            OutputSampleString = ''
                            for SampleIndex in range(SampleVectorLength - 1):
                                OutputSampleString += OutputValueStorage[
                                    SampleIndex] + ', '
                            OutputSampleString += OutputValueStorage[
                                SampleVectorLength - 1]
                            # 2) Write the sample and label to their output files
                            # Write ALL samples and labels to the generic files
                            print(f'{OutputSampleString}',
                                  file=GenericSampleOutputFile)
                            print(f'{OutputLabelString}',
                                  file=GenericLabelOutputFile)
                            # If the label time is 1:00 PM then this is a semi-generic data set so write it to those files, too
                            if int(InputLineHour) == 13 and int(
                                    InputLineMinute) == 0:
                                print(f'{OutputSampleString}',
                                      file=SemiGenericSampleOutputFile)
                                print(f'{OutputLabelString}',
                                      file=SemiGenericLabelOutputFile)
                                # If this is a semi-generic data set AND the label value is from a date that's an SPX expiration date, then it's also "particular"
                                if IbViewUtilities.DateIsAnSpxExpirationDay(
                                        datetime.date(int(InputLineYear),
                                                      int(InputLineMonth),
                                                      int(InputLineDay))):
                                    print(f'{OutputSampleString}',
                                          file=ParticularSampleOutputFile)
                                    print(f'{OutputLabelString}',
                                          file=ParticularLabelOutputFile)
                            # Remove the first element from the beginning to make room for the next one at the end
                            del OutputValueStorage[0]
                    GenericSampleOutputFile.close()
                    GenericLabelOutputFile.close()
                    SemiGenericSampleOutputFile.close()
                    SemiGenericLabelOutputFile.close()
                    ParticularSampleOutputFile.close()
                    ParticularLabelOutputFile.close()
                InputFile.close()
Пример #2
0
def ScaleUnderlying(IntervalUnit, IntervalQuantity):
    # Convert the given interval for use in scanning the input files
    DeltaSeconds = 0
    DeltaMinutes = 0
    DeltaHours = 0
    ExpectedInputLinesPerInterval = 0
    if IntervalUnit == 'Second':
        if IntervalQuantity == 20:
            DeltaSeconds = 20
        elif IntervalQuantity == 30:
            DeltaSeconds = 30
        elif IntervalQuantity == 40:
            DeltaSeconds = 40
        else:
            IbViewUtilities.ErrorExit(
                f'Unexpected scale interval: {IntervalQuantity} Seconds')
    elif IntervalUnit == 'Minute':
        if IntervalQuantity == 1:
            DeltaMinutes = 1
        elif IntervalQuantity == 5:
            DeltaMinutes = 5
        elif IntervalQuantity == 10:
            DeltaMinutes = 10
        elif IntervalQuantity == 15:
            DeltaMinutes = 15
        elif IntervalQuantity == 30:
            DeltaMinutes = 30
        else:
            IbViewUtilities.ErrorExit(
                f'Unexpected scale interval: {IntervalQuantity} Minutes')
    elif IntervalUnit == 'Hour':
        if IntervalQuantity == 1:
            DeltaHours = 1
        elif IntervalQuantity == 2:
            DeltaHours = 2
        else:
            IbViewUtilities.ErrorExit(
                f'Unexpected scale interval: {IntervalQuantity} Hours')
    # Set up an file for averaging and another for sampling
    AveragingOutputFileName = f'SPX-{str(IntervalQuantity)}-{IntervalUnit}-A.csv'
    AveragingOutputFile = open(
        SharedVars.ScaledDataPath + '/' + AveragingOutputFileName, 'wt')
    SamplingOutputFileName = f'SPX-{str(IntervalQuantity)}-{IntervalUnit}-S.csv'
    SamplingOutputFile = open(
        SharedVars.ScaledDataPath + '/' + SamplingOutputFileName, 'wt')
    # InputFileNameList = sorted(os.listdir(SharedVars.FilteredDataPath))
    InputFileNameList = sorted(os.listdir(SharedVars.CheckedDataPath))
    # Traverse the list of input files
    for InputFileName in InputFileNameList:
        # InputFile = open(SharedVars.FilteredDataPath + '/' + InputFileName, 'rt')
        InputFile = open(SharedVars.CheckedDataPath + '/' + InputFileName,
                         'rt')
        # For each input file, set up the date strings for the output file lines
        InputFileNameParts = InputFileName.split('-')
        InputYear = int(InputFileNameParts[1])
        InputMonth = int(InputFileNameParts[2])
        InputDay = int(InputFileNameParts[3][0:2])

        # Averaging variables
        WaitingForSecondSamplePoint = True
        Accumulator = 0.0
        Count = 0
        SavedAccumulator = 0.0
        SavedCount = 0
        LaggingHour = 0
        LaggingMinute = 0
        LaggingSecond = 0

        # Start with the first time to be added to the output file
        OutputHour = 6
        OutputMinute = 30
        OutputSecond = 0
        # End when we get to 1:00
        EndHour = 13
        EndMinute = 0
        EndSecond = 0

        # Traverse the lines in the current input file
        for InputFileLine in InputFile:
            InputFileLineParts = InputFileLine.split(',')
            InputHour = int(InputFileLineParts[0])
            InputMinute = int(InputFileLineParts[1])
            InputSecond = int(InputFileLineParts[2])
            InputValue = float(InputFileLineParts[4])
            if OutputHour == 6 and OutputMinute == 30 and OutputSecond == 0:
                # This is the first input file entry - copy it straight across to both output files
                WriteToScaledOutputFile(AveragingOutputFile, InputYear,
                                        InputMonth, InputDay, OutputHour,
                                        OutputMinute, OutputSecond, InputValue)
                WriteToScaledOutputFile(SamplingOutputFile, InputYear,
                                        InputMonth, InputDay, OutputHour,
                                        OutputMinute, OutputSecond, InputValue)
                # Initialize interval recognition
                # TargetHour, TargetMinute, TargetSecond = IncrementOutputTime(OutputHour, OutputMinute, OutputSecond, DeltaHours, DeltaMinutes, DeltaSeconds)
                OutputHour, OutputMinute, OutputSecond = IncrementOutputTime(
                    OutputHour, OutputMinute, OutputSecond, DeltaHours,
                    DeltaMinutes, DeltaSeconds)
                # Initialize averaging
                Accumulator = 0.0
                Count = 0
                SavedAccumulator = 0.0
                SavedCount = 0
            else:
                # This is beyond the first entry
                TargetTimeReached = TimesAreEqual(InputHour, InputMinute,
                                                  InputSecond, OutputHour,
                                                  OutputMinute, OutputSecond)
                EndOfInputDayReached = TimesAreEqual(InputHour, InputMinute,
                                                     InputSecond, EndHour,
                                                     EndMinute, EndSecond)
                if TargetTimeReached or EndOfInputDayReached:
                    # This input file entry falls on (well.... close enough to) the next target time or the end of the input day so
                    #  the sampling output file gets written
                    if not EndOfInputDayReached:
                        WriteToScaledOutputFile(SamplingOutputFile, InputYear,
                                                InputMonth, InputDay,
                                                OutputHour, OutputMinute,
                                                OutputSecond, InputValue)
                    # The averaging output file wants to write the average of the last TWO intervals out as the value for the PREVIOUS time
                    #  ... so we have to skip the past the first sampling point before we start writing out average values
                    if WaitingForSecondSamplePoint:
                        # The lagging average handling means we have to skip past one output average write
                        WaitingForSecondSamplePoint = False
                    else:
                        # We're at least up to the third time point in the input file so we write the average for the previous time point into the output file
                        AverageValue = (Accumulator + SavedAccumulator) / (
                            Count + SavedCount)
                        WriteToScaledOutputFile(AveragingOutputFile, InputYear,
                                                InputMonth, InputDay,
                                                LaggingHour, LaggingMinute,
                                                LaggingSecond, AverageValue)
                    # Is this the last sample point (1:00)?
                    if EndOfInputDayReached:
                        # # This is the last time point so....
                        # # 1) Write the final entry into both the  files as just the plain value at 1:00
                        WriteToScaledOutputFile(AveragingOutputFile, InputYear,
                                                InputMonth, InputDay, EndHour,
                                                EndMinute, EndSecond,
                                                InputValue)
                        WriteToScaledOutputFile(SamplingOutputFile, InputYear,
                                                InputMonth, InputDay, EndHour,
                                                EndMinute, EndSecond,
                                                InputValue)
                        # # 2) Move on to the next input file (hmmmm, sort of redundant to the file's eof... not entirely sure how this will interact with the above "for InputFileLine")
                        break
                    else:
                        # update things for processing the next interval
                        LaggingHour = OutputHour
                        LaggingMinute = OutputMinute
                        LaggingSecond = OutputSecond
                        OutputHour, OutputMinute, OutputSecond = IncrementOutputTime(
                            OutputHour, OutputMinute, OutputSecond, DeltaHours,
                            DeltaMinutes, DeltaSeconds)
                        SavedAccumulator = Accumulator + InputValue
                        SavedCount = Count + 1
                        Accumulator = 0.0
                        Count = 0
                else:
                    # This input file entry falls between the times that are to be included in the output file
                    # ??? Possible error if we've passed the target time without coming close enough to it.
                    if Time1IsAfterTime2(InputHour, InputMinute, InputSecond,
                                         OutputHour, OutputMinute,
                                         OutputSecond):
                        # !!!! We apparently missed our target time
                        IbViewUtilities.AddLineToTextWindow(
                            f'Missed target {InputYear}-{InputMonth}-{InputDay}@{OutputHour}:{OutputMinute}:{OutputSecond}'
                        )
                    else:
                        # This is one of those "in-between" input entries
                        Accumulator += InputValue
                        Count += 1
        InputFile.close()
    AveragingOutputFile.close()
    SamplingOutputFile.close()