Пример #1
0
import pandas as pd
import time

from sklearn.ensemble import RandomForestClassifier
from IO import Input
from IO import Output

start_time = time.time()

# load train data
df_trainset_caf = Input.load_trainset_caffefeatures()
df_trainset_lab = Input.load_trainset_labels()

# Load test data
df_validationset_caf = Input.load_validationset_caffefeatures()

print("--- load data: %s seconds ---" % round((time.time() - start_time),2))
start_time = time.time()

x_train = df_trainset_caf
y_train = df_trainset_lab
x_test = df_validationset_caf

# Train model
rf = RandomForestClassifier(n_estimators=500)
rf.fit(x_train, y_train)

print("--- train model: %s seconds ---" % round((time.time() - start_time),2))
start_time = time.time()

# Predict
Пример #2
0
trainset_filenames = Input.load_trainset_filenames()
validationset_filenames = Input.load_validationset_filenames()
traindata_filenames = Input.load_traindata_filenames()
testset_filenames = Input.load_testdata_filenames()

#Load the features
feat = pd.read_csv('skinTrainFeatures.csv', index_col = 0)

#Select the features for each dataset
x_trainset = feat.ix[trainset_filenames]
x_validationset = feat.ix[validationset_filenames]  
x_testset = feat.ix[testset_filenames]  
x_traindata = feat.ix[traindata_filenames]

#Load the labels for each dataset
y_trainset = np.asarray(Input.load_trainset_labels())
y_validationset = np.asarray(Input.load_validationset_labels())
y_traindata = np.asarray(Input.load_traindata_labels())

#restructure the features so they can be used in the SVM
x_trainset = x_trainset.groupby(x_trainset.index).apply(transformXY)
x_validationset = x_validationset.groupby(x_validationset.index).apply(transformXY)
x_testset = x_testset.groupby(x_testset.index).apply(transformXY)
x_traindata = x_traindata.groupby(x_traindata.index).apply(transformXY)

#Normalise the data
df = x_traindata.iloc[:,1:]
df_norm = (df - df.mean(axis=1)) / (df.max(axis=1) - df.min(axis=1))
x_traindata = df_norm

#Train classifier