from sklearn.svm import SVR from utility import Utility from datetime import datetime import sys #Get the commamd line arguments directoryName = "Datasets/" profileName = sys.argv[1] datasetFileName = directoryName + profileName + "_time_interaction_rate.csv" daysOfHorizon = 30 daysOfDepth = 14 horizon = 24 * daysOfHorizon #7 days ahead depth = 24 * daysOfDepth #30 days util = Utility.SeriesUtility() # Step 1 - Convert the dataset into pandas series series = util.convertDatasetsToSeries(datasetFileName) # Step 2 - Resample the series (to hourly) resampledSeries = util.resampleSeriesMean(series, "H") del series # Todo - Train based on the recent data only. yearsOfData = 5 recentCount = yearsOfData * 365 * 24 + horizon filteredSeries = util.filterRecent(resampledSeries, recentCount) del resampledSeries # Step 3 - Scale the series normalizedSeries = util.scaleSeries(filteredSeries)
] profileNames = ["Audi", "Benz", "Dodge", "Ferrari", "Jeep"] # For plotting seriesList = [] seriesNames = [] # Parameters for training daysOfHorizon = 4 daysOfDepth = 30 horizon = 24 * daysOfHorizon depth = 24 * daysOfDepth yearsOfData = 5 # Util Object globalUtil = Utility.SeriesUtility() # Preprocess and sample the series( Series Intersection) resampledSeries = [] for i in range(len(datasetFileNames)): # Convert the dataset into pandas series series = globalUtil.convertDatasetsToSeries(datasetFileNames[i]) # Resample the series (to hourly) resampled = globalUtil.resampleSeries(series, "H") resampledSeries.append(resampled) del series # Find intersection processedSeries = globalUtil.intersect(resampledSeries)
def predictSeries(seriesName): # Dataset directoryName = "Datasets/" datasetFileName = directoryName + seriesName + "_time_interaction.csv" util = Utility.SeriesUtility() # Step 1 - Convert the dataset into pandas series series = util.convertDatasetsToSeries(datasetFileName) # Step 2 - Re-sample the series (to hourly) resampledSeries = util.resampleSeriesSum(series, "H") del series # Step 3 - Filter recent data yearsOfData = 3 recentCount = yearsOfData * 365 * 24 + horizon filteredSeries = util.filterRecent(resampledSeries, recentCount) del resampledSeries # Step 4 - Scale the series normalizedSeries = util.scaleSeries(filteredSeries) del filteredSeries # Step 5 - Split into training and testing trainingSeries, testingSeries = util.splitIntoTrainingAndTestingSeries( normalizedSeries, horizon) # Step 6 - Form the feature and target vectors for training featureIntervalList = [] # # 24 hour interval period = 60 for i in range(period, 0, -1): t = -24 * i featureIntervalList.append(pd.Timedelta(hours=t)) # T targetIntervalList = [pd.Timedelta(hours=0)] featureTrainingVectors, targetTrainingVectors = tsi.TimeSeriesIntervalProcessor( trainingSeries, featureIntervalList, targetIntervalList).getProcessedData() featureTrainingVectors = np.hstack((np.ones( (featureTrainingVectors.shape[0], 1)), featureTrainingVectors)) # Step 7 - Train the network networkSize = int(featureTrainingVectors.shape[0] / 10) util.trainESNWithoutTuning(size=networkSize, featureVectors=featureTrainingVectors, targetVectors=targetTrainingVectors, initialTransient=50, inputConnectivity=1.0, reservoirConnectivity=0.3, inputScaling=0.5, reservoirScaling=0.5, spectralRadius=0.79, leakingRate=0.30, learningMethod=Utility.LearningMethod.Batch) # Step 8 - Predict the future predictedSeries = util.predictFutureWithFeatureInterval( trainingSeries, featureIntervalList, horizon) # Step 9 - De-scale the series actualSeries = util.descaleSeries(testingSeries) predictedSeries = util.descaleSeries(predictedSeries) return actualSeries, predictedSeries
def predictSeries(seriesName): # Dataset directoryName = "Datasets/" profileName = seriesName datasetFileName = directoryName + profileName + "_time_interaction.csv" util = Utility.SeriesUtility() # Step 1 - Convert the dataset into pandas series series = util.convertDatasetsToSeries(datasetFileName) # Step 2 - Re-sample the series (to hourly) resampledSeries = util.resampleSeriesSum(series, "H") del series # Step 3 - Filter recent data recentCount = yearsOfData * 365 * 24 + horizon filteredSeries = util.filterRecent(resampledSeries, recentCount) del resampledSeries # Step 4 - Scale the series normalizedSeries = util.scaleSeries(filteredSeries) del filteredSeries # Step 5 - Split into training, validation and testing series trainingSeries, testingSeries = util.splitIntoTrainingAndTestingSeries( normalizedSeries, horizon) availableSeries = trainingSeries trainingSeries, validationSeries = util.splitIntoTrainingAndValidationSeries( trainingSeries, trainingSetRatio=0.9) networkParameters['size'] = int( trainingSeries.shape[0] / 10) # One-tenth of length of the training set #networkParameters['size'] = 256 # Step 6 - Form the feature and target vectors for training bestDepth, bestFeaturesIndices, bestFeatures, targetVectors = util.getBestFeatures( trainingSeries, validationSeries, networkParameters, Utility.FeatureSelectionMethod.Pattern_Analysis, args={'threshold': 0.5}) # Step 7 - Tune the leaking rate by brute force optimalLeakingRate = util.getBestLeakingRate(bestFeaturesIndices, bestDepth, bestFeatures, targetVectors, trainingSeries, validationSeries, networkParameters) # Step 7 - Tune and Train the network util.trainESNWithoutTuning( size=networkParameters['size'], featureVectors=bestFeatures, targetVectors=targetVectors, initialTransient=networkParameters['initialTransient'], inputConnectivity=networkParameters['inputConnectivity'], reservoirConnectivity=networkParameters['reservoirConnectivity'], inputScaling=networkParameters['inputScaling'], reservoirScaling=networkParameters['reservoirScaling'], spectralRadius=networkParameters['spectralRadius'], leakingRate=optimalLeakingRate) # Step 8 - Predict the future predictedSeries = util.predict(util.esn, availableSeries, bestDepth, horizon, bestFeaturesIndices) # Step 9 - De-scale the series actualSeries = util.descaleSeries(testingSeries) predictedSeries = util.descaleSeries(predictedSeries) return actualSeries, predictedSeries