def network(is_test): data_shape = [-1, 3, IMAGE_RESIZE_SIZE, IMAGE_RESIZE_SIZE] if not is_test: file_obj = fluid.layers.open_files( filenames=['train.recordio'], shapes=[data_shape, [-1, 1]], lod_levels=[0, 0], dtypes=['float32', 'float32'], pass_num=PASS_NUM, ) file_obj = fluid.layers.double_buffer(file_obj) file_obj = fluid.layers.shuffle(file_obj, buffer_size=8192) file_obj = fluid.layers.batch(file_obj, batch_size=BATCH_SIZE) else: file_obj = fluid.layers.open_files( filenames=['test.recordio'], shapes=[data_shape, [-1, 1]], lod_levels=[0, 0], dtypes=['float32', 'float32'], ) with fluid.unique_name.guard(): image_layer, label_layer = fluid.layers.read_file(file_obj) prediction_layer = resnet(image_layer) cost_layer = log_loss(input=prediction_layer, label=label_layer) avg_cost_layer = fluid.layers.mean(cost_layer) return avg_cost_layer, prediction_layer, label_layer
### lr = LR() # default param values lr.fit(p_train.reshape(-1, 1), y_train) # LR needs X to be 2-dimensional p_calibrated = lr.predict_proba(p_test.reshape(-1, 1))[:, 1] ### acc = accuracy_score(y_test, np.round(p_test)) acc_calibrated = accuracy_score(y_test, np.round(p_calibrated)) auc = AUC(y_test, p_test) auc_calibrated = AUC(y_test, p_calibrated) ll = log_loss(y_test, p_test) ll_calibrated = log_loss(y_test, p_calibrated) print "accuracy - before/after:", acc, "/", acc_calibrated print "AUC - before/after: ", auc, "/", auc_calibrated print "log loss - before/after:", ll, "/", ll_calibrated """ accuracy - before/after: 0.847788697789 / 0.846805896806 AUC - before/after: 0.878139845077 / 0.878139845077 log loss - before/after: 0.630525772871 / 0.364873617584 """ ### print "creating diagrams..."
Y[index] = int(data[i, 0]) # Store the type of the trip of the current visit else: # If visit number has not changed, it's still the same visit num_products += 1 # Increase the number of products of the current visit X[index, departmentIndex[data[i, 5]]] += 1 X[index, length - 1] += float(data[i, 4]) if data[i, 6] == "": X[index, length - 2] = 0 else: X[index, length - 2] += float(data[i, 6]) cnt += 1 # X[index,filenumberIndex[data[i,6]]] += 1 kf = KFold(X.shape[0], n_folds=10) # Initialize cross validation iterations = 0 # Variable that will store the total iterations totalLogloss = 0 # Variable that will store the correctly predicted intances for trainIndex, testIndex in kf: trainSet = X[trainIndex] testSet = X[testIndex] trainLabels = Y[trainIndex] testLabels = Y[testIndex] predictions, trips = classify(trainSet, trainLabels, testSet) logloss = log_loss(testLabels, predictions, trips) print "Log Loss: ", logloss totalLogloss += logloss iterations += 1 print "Average Log Loss: ", totalLogloss / iterations
else: # If visit number has not changed, it's still the same visit num_products += 1 # Increase the number of products of the current visit X[index,departmentIndex[data[i,5]]] += 1 X[index,length -1] += float(data[i,4]) if data[i,6] == '' : X[index, length - 2] = 0 else: X[index, length - 2] += float(data[i,6]) cnt += 1 #X[index,filenumberIndex[data[i,6]]] += 1 kf = KFold(X.shape[0], n_folds=10) # Initialize cross validation iterations = 0 # Variable that will store the total iterations totalLogloss = 0 # Variable that will store the correctly predicted intances for trainIndex, testIndex in kf: trainSet = X[trainIndex] testSet = X[testIndex] trainLabels = Y[trainIndex] testLabels = Y[testIndex] predictions, trips = classify(trainSet, trainLabels, testSet) logloss = log_loss(testLabels, predictions, trips) print 'Log Loss: ', logloss totalLogloss += logloss iterations += 1 print 'Average Log Loss: ', totalLogloss/iterations
from image import Image from preprocess import Preprocess from classifier import Classifier from log_loss import log_loss from postprocess import PostProcess genders = Image.genders() d, _ = Image.data() matrix = Preprocess.to_matrix(d) print matrix.shape matrix = Preprocess.remove_constants(matrix) print matrix.shape matrix = Preprocess.scale(matrix) matrix = Preprocess.polynomial(matrix, 2) matrix = Preprocess.scale(matrix) print matrix.shape matrix = matrix.tolist() half = len(matrix)/2 train, cv = matrix[:half], matrix[half:] train_genders, cv_genders = genders[:half], genders[half:] cv_genders = cv_genders[0::4] preds = Classifier.ensemble_preds(train, train_genders, cv) print "Score: ", log_loss(preds, cv_genders)