def GetProba(drive,driverID,tripInd): driverDir = '/home/user1/Desktop/SharedFolder/Kaggle/DriversCleaned/'+str(driverID) df = pd.read_csv(driverDir+'_' + str(tripInd)+'.csv') trip = Trip(driverID,tripInd,df) trip.getSpeed() trip.getAcc() #trip.getRadius() #trip.getCacc() trip.getFeatures() X=trip.features[['v','acc']] probas = np.zeros((X.shape[0],drive.shape[0])) for i in range(drive.shape[0]): probas[:,i]=multivariate_normal.pdf(X, mean=array(drive.ix[i,:2]), cov=[array(drive.ix[i,2:4]),array(drive.ix[i,4:])]) probas=np.max(probas,axis=1) return probas.mean()
def GetProba(clf,driverID,tripID): #print driverID,tripID driverDir = '/home/user1/Desktop/SharedFolder/Kaggle/DriversCleaned/'+str(driverID) df = pd.read_csv(driverDir+'_' + str(tripInd)+'.csv') trip = Trip(driverID,tripInd,df) trip.getSpeed() trip.getAcc() #trip.getRadius() #trip.getCacc() trip.getFeatures() X=trip.features[['v','acc']] probas = np.zeros(X.shape[0]) for i in range(X.shape[0]): probas[i]=clf.score(X.loc[i]) print probas.mean() return probas.mean()
def GetFeatures(driverID,j): driverDir = '/home/user1/Desktop/SharedFolder/Kaggle/DriversCleaned/'+str(driverID) tripFiles = range(1,201) X = pd.DataFrame(columns=selectedCols) for index,tripID in enumerate(tripFiles): #print index,tripID trip = Trip(driverID,tripID,pd.read_csv(driverDir+'_' + str(tripID) + '.csv')) trip.getSpeed() trip.getAcc() #trip.getRadius() #trip.getCacc() trip.getFeatures() '''z=array(list(set(np.asarray([range(x-5,x+5) for x in (trip.features.v<vlim[0]).nonzero()[0]]).flatten()))) z=z[z<trip.features.shape[0]] z=z[z>=0] #z=array(list(set(range(trip.features.shape[0]))-set(z))) Xz=trip.features.loc[z] Xz=Xz.reset_index(drop=True) Xz=Xz.loc[Xz.v!=0] Xz=Xz.reset_index(drop=True) X = X.append(Xz)''' X = X.append(trip.features) X=X.reset_index(drop=True) X=X[(X.v<vlim[1]) & (X.v>vlim[0])] X=X[(X.acc<clim[1]) & (X.acc>clim[0])] X=X.reset_index(drop=True) clf=GetGmm(np.asanyarray(X[['v','acc']])) cos = SaveGmm(clf) cos.to_csv('/home/user1/Desktop/SharedFolder/Kaggle/FeaturesCleaned/GMM/All/' + str(driverID) + '.csv', index=False) #del cos return 0
import matplotlib.pyplot as plt import matplotlib as mpl from sklearn.mixture import GMM from sklearn.neighbors import KernelDensity as kde from scipy.stats import multivariate_normal from scipy.spatial.distance import mahalanobis # <codecell> driverID = 1 driverDir = '/home/user1/Desktop/SharedFolder/Kaggle/DriversCleaned/'+str(driverID) tripInd = 1 df = pd.read_csv(driverDir+'_' + str(tripInd)+'.csv') trip = Trip(driverID,tripInd,df) trip.getSpeed() trip.getAcc() #trip.getRadius() #trip.getCacc() trip.getFeatures() X=trip.features[['v','acc']] # <codecell> def GetProba(drive,driverID,tripInd): driverDir = '/home/user1/Desktop/SharedFolder/Kaggle/DriversCleaned/'+str(driverID) df = pd.read_csv(driverDir+'_' + str(tripInd)+'.csv') trip = Trip(driverID,tripInd,df) trip.getSpeed() trip.getAcc() #trip.getRadius() #trip.getCacc()