import pandas as pd import time from sklearn.ensemble import RandomForestClassifier from IO import Input from IO import Output start_time = time.time() # load train data df_trainset_caf = Input.load_trainset_caffefeatures() df_trainset_lab = Input.load_trainset_labels() # Load test data df_validationset_caf = Input.load_validationset_caffefeatures() print("--- load data: %s seconds ---" % round((time.time() - start_time),2)) start_time = time.time() x_train = df_trainset_caf y_train = df_trainset_lab x_test = df_validationset_caf # Train model rf = RandomForestClassifier(n_estimators=500) rf.fit(x_train, y_train) print("--- train model: %s seconds ---" % round((time.time() - start_time),2)) start_time = time.time() # Predict
trainset_filenames = Input.load_trainset_filenames() validationset_filenames = Input.load_validationset_filenames() traindata_filenames = Input.load_traindata_filenames() testset_filenames = Input.load_testdata_filenames() #Load the features feat = pd.read_csv('skinTrainFeatures.csv', index_col = 0) #Select the features for each dataset x_trainset = feat.ix[trainset_filenames] x_validationset = feat.ix[validationset_filenames] x_testset = feat.ix[testset_filenames] x_traindata = feat.ix[traindata_filenames] #Load the labels for each dataset y_trainset = np.asarray(Input.load_trainset_labels()) y_validationset = np.asarray(Input.load_validationset_labels()) y_traindata = np.asarray(Input.load_traindata_labels()) #restructure the features so they can be used in the SVM x_trainset = x_trainset.groupby(x_trainset.index).apply(transformXY) x_validationset = x_validationset.groupby(x_validationset.index).apply(transformXY) x_testset = x_testset.groupby(x_testset.index).apply(transformXY) x_traindata = x_traindata.groupby(x_traindata.index).apply(transformXY) #Normalise the data df = x_traindata.iloc[:,1:] df_norm = (df - df.mean(axis=1)) / (df.max(axis=1) - df.min(axis=1)) x_traindata = df_norm #Train classifier