def generateDataset(): ResultHeadings, ResultData = getDataFromResults("results.csv") PitStopsHeadings, PitStopsData, PitStopsDict = getPitStops("pitStops.csv") LapTimesHeadings, LapTimesData, LapTimesDict, RaceLapDict = getLapTimes( "lapTimes.csv") InsertFirstLapChange(ResultHeadings, ResultData, RaceLapDict) keys = PitStopsDict.keys() InsertPitStopsTime(ResultHeadings, ResultData, PitStopsDict) ResultData.insert(0, ResultHeadings) saveLListAsCSV("PreprocessedDataset1", ResultData)
def generateDataset(): ResultHeadings, ResultData = getDataFromResults("results.csv") PitStopsHeadings, PitStopsData, PitStopsDict = getPitStops("pitStops.csv") LapTimesHeadings, LapTimesData, LapTimesDict, RaceLapDict = getLapTimes( "lapTimes.csv") InsertFirstLapChange(ResultHeadings, ResultData, RaceLapDict) keys = PitStopsDict.keys() InsertPitStopsTime(ResultHeadings, ResultData, PitStopsDict) _data_needed = [] for data in ResultData: if ((data[7] >= "A") & (data[7] <= "Z")): continue else: _data_needed.append(data) _data_needed.insert(0, ResultHeadings) saveLListAsCSV("PreprocessedDataset1", _data_needed)
def getCircuitSpecificStatisticsOfFirstLapChange(headings): """ for circuit specific statistics """ headings.append("DriverId") headings.append("DriverName") headings.append("CircuitId") headings.append("CircuitName") headings.append("FirstLapChangeList") headings.append("FirstLapChangeMin") headings.append("FirstLapChangeMax") headings.append("FirstLapChangeFirstQuartile") headings.append("FirstLapChangeMedian") headings.append("FirstLapChangeThirdQuartile") headings.append("FirstLapChangeMean") headings.append("FirstLapChangeSd") _ret_list = [] circuitRaceList = getCircuitRaceList("races.csv") PreprocessedDataHeadings, PreprocessedDataset = getPreprocessedData( "PreprocessedDataset1.csv") circuitNameDict = getRecentTracksName() driverIDNameDict = getDriverIDName("drivers.csv") # temperary data structure # {driverId: {circuitId: [first lap change]}} _dict_dict_list = defaultdict(lambda: defaultdict(lambda: [])) for data in PreprocessedDataset: circuitKeys = circuitRaceList.keys() for circuitKey in circuitKeys: if (data[0] in circuitRaceList[circuitKey]): _dict_dict_list[data[1]][circuitKey].append(data[9]) # print _dict_dict_list["4"]["32"] driversKeys = sorted(_dict_dict_list.keys(), key=lambda _key: int(_key)) for driversKey in driversKeys: _driverID = driversKey _driverName = driverIDNameDict[_driverID] _circuitID = "" _circuitName = "" _FirstLapChangeList = "" _FirstLapChangeMin = 0.0 _FirstLapChangeMax = 0.0 _FirstLapChangeFirstQuartile = 0.0 _FirstLapChangeMedian = 0.0 _FirstLapChangeThirdQuartile = 0.0 _FirstLapChangeMean = 0.0 _FirstLapChangeSd = 0.0 driverCircuitsKeys = sorted(_dict_dict_list[driversKey].keys(), key=lambda _key: int(_key)) for driverCircuitsKey in driverCircuitsKeys: # _tempSumOfFirstLapChange = 0.0; _tempContainer = [] _circuitID = driverCircuitsKey _circuitName = circuitNameDict[_circuitID] for firstLapChangeVal in _dict_dict_list[driversKey][ driverCircuitsKey]: if (firstLapChangeVal != "-999"): _FirstLapChangeList += (firstLapChangeVal + ",") _change = int(firstLapChangeVal) _tempContainer.append(_change) _tempContainer.sort(key=lambda _key: int(_key)) if (len(_tempContainer) != 0): _FirstLapChangeMin = _tempContainer[0] _FirstLapChangeMax = _tempContainer[-1] _FirstLapChangeFirstQuartile = _tempContainer[ len(_tempContainer) / 4] _FirstLapChangeMedian = _tempContainer[len(_tempContainer) / 2] _FirstLapChangeThirdQuartile = _tempContainer[ len(_tempContainer) / 4 * 3] _FirstLapChangeMean = np.sum( np.array(_tempContainer)) / len(_tempContainer) _FirstLapChangeSd = np.sqrt( np.sum(( (np.array(_tempContainer) - _FirstLapChangeMean)**2)) / len(_tempContainer)) _ret_list.append([ _driverID, _driverName, _circuitID, _circuitName, _FirstLapChangeList, _FirstLapChangeMin, _FirstLapChangeMax, _FirstLapChangeFirstQuartile, _FirstLapChangeMedian, _FirstLapChangeThirdQuartile, _FirstLapChangeMean, _FirstLapChangeSd ]) # print _FirstLapChangeList # if(len(_tempContainer) != 0): # _FirstLapChangeMin=_tempContainer[0] # _FirstLapChangeMax=_tempContainer[-1] # _FirstLapChangeFirstQuartile = _tempContainer[len(_tempContainer)/4] # _FirstLapChangeMedian=_tempContainer[len(_tempContainer)/2] # _FirstLapChangeThirdQuartile = _tempContainer[len(_tempContainer)/4 *3] # _FirstLapChangeMean=np.sum(np.array(_tempContainer))/len(_tempContainer) # _FirstLapChangeSd=np.sqrt(np.sum(((np.array(_tempContainer)-_FirstLapChangeMean)**2))/len(_tempContainer)) # _ret_list.append([_driverID, _FirstLapChangeList, _FirstLapChangeMin, _FirstLapChangeMax, _FirstLapChangeFirstQuartile, # _FirstLapChangeMedian, _FirstLapChangeThirdQuartile, _FirstLapChangeMean, _FirstLapChangeSd]) _ret_list.insert(0, headings) saveLListAsCSV("PreprocessedDataset3", _ret_list)
def createDataFrame(headings, data): headings = ["driverId", "ranking", "circuits"] data.insert(0,headings) saveLListAsCSV("DriversTopPerformingCircuits", data)
_listOfList.append([ConstructorIDName[data[2]],\ data[1], \ DriverName[data[1]],\ circuitIDNameDict[_circuitID],\ _totalpoints[data[1]]/len(_driverYear[data[1]]),\ returnCategoryNumber(rank) ]) return {"listofList": _listOfList} if __name__ == '__main__': dataset = getRaceDriverConstRankPts("results.csv") DPS = getDriverIDPointsConstructor(dataset) # print DPS["listofList"] # newArray = np.array(DPS["listofList"])[] DCAPR = getDriverIDConstructorAveragePointsRanking(dataset) # print DCAPR FilteredDataset = filterDataAccordingToRecentDrivers(DPS["listofList"]) FilteredDCAPR = filterDataAccordingToRecentDrivers2(DCAPR["listofList"]) saveLListAsCSV("PreprocessedDataset4", FilteredDataset) saveLListAsCSV("PreprocessedDataset5", FilteredDCAPR) headings = [] createDataFrame(headings, getDriverTopPerformingCircuits())
_ret_list_of_list = list(listOfList) _ret_list_of_list.insert(0, headings) return _ret_list_of_list elif (typeOfDataFrame == "similarityList"): headings = ["circuitId1", "circuitId2", "n"] for i in xrange(1000): headings.append("P" + str(i + 1)) _ret_list_of_list = listOfList _ret_list_of_list.insert(0, headings) return _ret_list_of_list if __name__ == "__main__": print "start" """ obtain the frequency of active circuit and plot a scatter plot """ circuitDict_InfoList = getCircuitInfoList("new_track_data.csv") # print len(circuitDict_InfoList["20"]) # shiftRightNTimes(4, circuitDict_InfoList["20"]) # optimisedSimilarity(circuitDict_InfoList["20"], circuitDict_InfoList["18"]) covarianceMatrixHeadings, covarianceMatrix, similarityList = getCovarianceMatrix( circuitDict_InfoList) CovarianceMatrix = createDataFrame(covarianceMatrix, "covarianceMatrix", covarianceMatrixHeadings) SimilarityInfoList = createDataFrame(similarityList, "similarityList", []) saveLListAsCSV("CovarianceMatrixOfCircuits", CovarianceMatrix) saveLListAsCSV("SimilarityInfoList", SimilarityInfoList) print "end"