Exemplo n.º 1
0
def generateDataset():
    ResultHeadings, ResultData = getDataFromResults("results.csv")
    PitStopsHeadings, PitStopsData, PitStopsDict = getPitStops("pitStops.csv")
    LapTimesHeadings, LapTimesData, LapTimesDict, RaceLapDict = getLapTimes(
        "lapTimes.csv")
    InsertFirstLapChange(ResultHeadings, ResultData, RaceLapDict)
    keys = PitStopsDict.keys()
    InsertPitStopsTime(ResultHeadings, ResultData, PitStopsDict)
    ResultData.insert(0, ResultHeadings)
    saveLListAsCSV("PreprocessedDataset1", ResultData)
Exemplo n.º 2
0
def generateDataset():
    ResultHeadings, ResultData = getDataFromResults("results.csv")
    PitStopsHeadings, PitStopsData, PitStopsDict = getPitStops("pitStops.csv")
    LapTimesHeadings, LapTimesData, LapTimesDict, RaceLapDict = getLapTimes(
        "lapTimes.csv")
    InsertFirstLapChange(ResultHeadings, ResultData, RaceLapDict)
    keys = PitStopsDict.keys()
    InsertPitStopsTime(ResultHeadings, ResultData, PitStopsDict)

    _data_needed = []
    for data in ResultData:
        if ((data[7] >= "A") & (data[7] <= "Z")):
            continue
        else:
            _data_needed.append(data)

    _data_needed.insert(0, ResultHeadings)

    saveLListAsCSV("PreprocessedDataset1", _data_needed)
Exemplo n.º 3
0
def getCircuitSpecificStatisticsOfFirstLapChange(headings):
    """
		for circuit specific statistics
	"""
    headings.append("DriverId")
    headings.append("DriverName")
    headings.append("CircuitId")
    headings.append("CircuitName")
    headings.append("FirstLapChangeList")
    headings.append("FirstLapChangeMin")
    headings.append("FirstLapChangeMax")
    headings.append("FirstLapChangeFirstQuartile")
    headings.append("FirstLapChangeMedian")
    headings.append("FirstLapChangeThirdQuartile")
    headings.append("FirstLapChangeMean")
    headings.append("FirstLapChangeSd")
    _ret_list = []

    circuitRaceList = getCircuitRaceList("races.csv")
    PreprocessedDataHeadings, PreprocessedDataset = getPreprocessedData(
        "PreprocessedDataset1.csv")
    circuitNameDict = getRecentTracksName()
    driverIDNameDict = getDriverIDName("drivers.csv")
    # temperary data structure
    # {driverId: {circuitId: [first lap change]}}
    _dict_dict_list = defaultdict(lambda: defaultdict(lambda: []))

    for data in PreprocessedDataset:
        circuitKeys = circuitRaceList.keys()
        for circuitKey in circuitKeys:
            if (data[0] in circuitRaceList[circuitKey]):
                _dict_dict_list[data[1]][circuitKey].append(data[9])

    # print _dict_dict_list["4"]["32"]
    driversKeys = sorted(_dict_dict_list.keys(), key=lambda _key: int(_key))
    for driversKey in driversKeys:
        _driverID = driversKey
        _driverName = driverIDNameDict[_driverID]
        _circuitID = ""
        _circuitName = ""
        _FirstLapChangeList = ""
        _FirstLapChangeMin = 0.0
        _FirstLapChangeMax = 0.0
        _FirstLapChangeFirstQuartile = 0.0
        _FirstLapChangeMedian = 0.0
        _FirstLapChangeThirdQuartile = 0.0
        _FirstLapChangeMean = 0.0
        _FirstLapChangeSd = 0.0
        driverCircuitsKeys = sorted(_dict_dict_list[driversKey].keys(),
                                    key=lambda _key: int(_key))
        for driverCircuitsKey in driverCircuitsKeys:
            # _tempSumOfFirstLapChange = 0.0;
            _tempContainer = []
            _circuitID = driverCircuitsKey
            _circuitName = circuitNameDict[_circuitID]
            for firstLapChangeVal in _dict_dict_list[driversKey][
                    driverCircuitsKey]:
                if (firstLapChangeVal != "-999"):
                    _FirstLapChangeList += (firstLapChangeVal + ",")
                    _change = int(firstLapChangeVal)
                    _tempContainer.append(_change)
            _tempContainer.sort(key=lambda _key: int(_key))
            if (len(_tempContainer) != 0):
                _FirstLapChangeMin = _tempContainer[0]
                _FirstLapChangeMax = _tempContainer[-1]
                _FirstLapChangeFirstQuartile = _tempContainer[
                    len(_tempContainer) / 4]
                _FirstLapChangeMedian = _tempContainer[len(_tempContainer) / 2]
                _FirstLapChangeThirdQuartile = _tempContainer[
                    len(_tempContainer) / 4 * 3]
                _FirstLapChangeMean = np.sum(
                    np.array(_tempContainer)) / len(_tempContainer)
                _FirstLapChangeSd = np.sqrt(
                    np.sum((
                        (np.array(_tempContainer) - _FirstLapChangeMean)**2)) /
                    len(_tempContainer))
            _ret_list.append([
                _driverID, _driverName, _circuitID, _circuitName,
                _FirstLapChangeList, _FirstLapChangeMin, _FirstLapChangeMax,
                _FirstLapChangeFirstQuartile, _FirstLapChangeMedian,
                _FirstLapChangeThirdQuartile, _FirstLapChangeMean,
                _FirstLapChangeSd
            ])

        # print _FirstLapChangeList
        # if(len(_tempContainer) != 0):
        # 	_FirstLapChangeMin=_tempContainer[0]
        # 	_FirstLapChangeMax=_tempContainer[-1]
        # 	_FirstLapChangeFirstQuartile = _tempContainer[len(_tempContainer)/4]
        # 	_FirstLapChangeMedian=_tempContainer[len(_tempContainer)/2]
        # 	_FirstLapChangeThirdQuartile = _tempContainer[len(_tempContainer)/4 *3]
        # 	_FirstLapChangeMean=np.sum(np.array(_tempContainer))/len(_tempContainer)
        # 	_FirstLapChangeSd=np.sqrt(np.sum(((np.array(_tempContainer)-_FirstLapChangeMean)**2))/len(_tempContainer))
        # _ret_list.append([_driverID, _FirstLapChangeList, _FirstLapChangeMin, _FirstLapChangeMax, _FirstLapChangeFirstQuartile,
        # 	_FirstLapChangeMedian, _FirstLapChangeThirdQuartile, _FirstLapChangeMean, _FirstLapChangeSd])

    _ret_list.insert(0, headings)
    saveLListAsCSV("PreprocessedDataset3", _ret_list)
Exemplo n.º 4
0
def createDataFrame(headings, data):
	headings = ["driverId", "ranking", "circuits"]
	data.insert(0,headings)
	saveLListAsCSV("DriversTopPerformingCircuits", data)
Exemplo n.º 5
0
		_listOfList.append([ConstructorIDName[data[2]],\
			data[1], \
			DriverName[data[1]],\
			circuitIDNameDict[_circuitID],\
			_totalpoints[data[1]]/len(_driverYear[data[1]]),\
			returnCategoryNumber(rank)
			])


	return {"listofList": _listOfList}



if __name__ == '__main__':
	dataset = getRaceDriverConstRankPts("results.csv")
	DPS = getDriverIDPointsConstructor(dataset)
	# print DPS["listofList"]
	# newArray = np.array(DPS["listofList"])[]
	DCAPR = getDriverIDConstructorAveragePointsRanking(dataset)
	# print DCAPR
	FilteredDataset = filterDataAccordingToRecentDrivers(DPS["listofList"])
	FilteredDCAPR = filterDataAccordingToRecentDrivers2(DCAPR["listofList"])
	saveLListAsCSV("PreprocessedDataset4", FilteredDataset)
	saveLListAsCSV("PreprocessedDataset5", FilteredDCAPR)
	headings = []
	createDataFrame(headings, getDriverTopPerformingCircuits())




Exemplo n.º 6
0
        _ret_list_of_list = list(listOfList)
        _ret_list_of_list.insert(0, headings)
        return _ret_list_of_list
    elif (typeOfDataFrame == "similarityList"):
        headings = ["circuitId1", "circuitId2", "n"]
        for i in xrange(1000):
            headings.append("P" + str(i + 1))
        _ret_list_of_list = listOfList
        _ret_list_of_list.insert(0, headings)
        return _ret_list_of_list


if __name__ == "__main__":
    print "start"
    """
		obtain the frequency of active circuit and plot a scatter plot
	"""
    circuitDict_InfoList = getCircuitInfoList("new_track_data.csv")
    # print len(circuitDict_InfoList["20"])
    # shiftRightNTimes(4, circuitDict_InfoList["20"])
    # optimisedSimilarity(circuitDict_InfoList["20"], circuitDict_InfoList["18"])
    covarianceMatrixHeadings, covarianceMatrix, similarityList = getCovarianceMatrix(
        circuitDict_InfoList)
    CovarianceMatrix = createDataFrame(covarianceMatrix, "covarianceMatrix",
                                       covarianceMatrixHeadings)
    SimilarityInfoList = createDataFrame(similarityList, "similarityList", [])
    saveLListAsCSV("CovarianceMatrixOfCircuits", CovarianceMatrix)
    saveLListAsCSV("SimilarityInfoList", SimilarityInfoList)

    print "end"