Exemplo n.º 1
0
def network(is_test):
    data_shape = [-1, 3, IMAGE_RESIZE_SIZE, IMAGE_RESIZE_SIZE]

    if not is_test:
        file_obj = fluid.layers.open_files(
              filenames=['train.recordio'],
              shapes=[data_shape, [-1, 1]],
              lod_levels=[0, 0],
              dtypes=['float32', 'float32'],
              pass_num=PASS_NUM,
        )
        file_obj = fluid.layers.double_buffer(file_obj)
        file_obj = fluid.layers.shuffle(file_obj, buffer_size=8192)
        file_obj = fluid.layers.batch(file_obj, batch_size=BATCH_SIZE)
    else:
        file_obj = fluid.layers.open_files(
              filenames=['test.recordio'],
              shapes=[data_shape, [-1, 1]],
              lod_levels=[0, 0],
              dtypes=['float32', 'float32'],
        )
    with fluid.unique_name.guard():
        image_layer, label_layer = fluid.layers.read_file(file_obj)
        prediction_layer = resnet(image_layer)
        cost_layer = log_loss(input=prediction_layer, label=label_layer)
        avg_cost_layer = fluid.layers.mean(cost_layer)
    return avg_cost_layer, prediction_layer, label_layer
###

lr = LR()  # default param values
lr.fit(p_train.reshape(-1, 1), y_train)  # LR needs X to be 2-dimensional
p_calibrated = lr.predict_proba(p_test.reshape(-1, 1))[:, 1]

###

acc = accuracy_score(y_test, np.round(p_test))
acc_calibrated = accuracy_score(y_test, np.round(p_calibrated))

auc = AUC(y_test, p_test)
auc_calibrated = AUC(y_test, p_calibrated)

ll = log_loss(y_test, p_test)
ll_calibrated = log_loss(y_test, p_calibrated)

print "accuracy - before/after:", acc, "/", acc_calibrated
print "AUC - before/after:     ", auc, "/", auc_calibrated
print "log loss - before/after:", ll, "/", ll_calibrated

"""
accuracy - before/after: 0.847788697789 / 0.846805896806
AUC - before/after:      0.878139845077 / 0.878139845077
log loss - before/after: 0.630525772871 / 0.364873617584
"""

###

print "creating diagrams..."
Exemplo n.º 3
0
        Y[index] = int(data[i, 0])  # Store the type of the trip of the current visit
    else:  # If visit number has not changed, it's still the same visit
        num_products += 1  # Increase the number of products of the current visit
        X[index, departmentIndex[data[i, 5]]] += 1
        X[index, length - 1] += float(data[i, 4])
        if data[i, 6] == "":
            X[index, length - 2] = 0
        else:
            X[index, length - 2] += float(data[i, 6])
        cnt += 1
        # X[index,filenumberIndex[data[i,6]]] += 1

kf = KFold(X.shape[0], n_folds=10)  # Initialize cross validation


iterations = 0  # Variable that will store the total iterations
totalLogloss = 0  # Variable that will store the correctly predicted intances

for trainIndex, testIndex in kf:
    trainSet = X[trainIndex]
    testSet = X[testIndex]
    trainLabels = Y[trainIndex]
    testLabels = Y[testIndex]

    predictions, trips = classify(trainSet, trainLabels, testSet)
    logloss = log_loss(testLabels, predictions, trips)
    print "Log Loss: ", logloss
    totalLogloss += logloss
    iterations += 1
print "Average Log Loss: ", totalLogloss / iterations
Exemplo n.º 4
0
	else: 								# If visit number has not changed, it's still the same visit
		num_products += 1				# Increase the number of products of the current visit
		X[index,departmentIndex[data[i,5]]] += 1
		X[index,length -1] += float(data[i,4])
		if data[i,6] == '' :
			X[index, length - 2] = 0
		else:
			X[index, length - 2] += float(data[i,6])
		cnt += 1
		#X[index,filenumberIndex[data[i,6]]] += 1

kf = KFold(X.shape[0], n_folds=10) # Initialize cross validation



iterations = 0 # Variable that will store the total iterations  
totalLogloss = 0 # Variable that will store the correctly predicted intances  

for trainIndex, testIndex in kf:
	trainSet = X[trainIndex]
	testSet = X[testIndex]
	trainLabels = Y[trainIndex]
	testLabels = Y[testIndex]

	predictions, trips = classify(trainSet, trainLabels, testSet)
	logloss = log_loss(testLabels, predictions, trips)	
	print 'Log Loss: ', logloss
	totalLogloss += logloss
	iterations += 1
print 'Average Log Loss: ', totalLogloss/iterations
###

lr = LR()  # default param values
lr.fit(p_train.reshape(-1, 1), y_train)  # LR needs X to be 2-dimensional
p_calibrated = lr.predict_proba(p_test.reshape(-1, 1))[:, 1]

###

acc = accuracy_score(y_test, np.round(p_test))
acc_calibrated = accuracy_score(y_test, np.round(p_calibrated))

auc = AUC(y_test, p_test)
auc_calibrated = AUC(y_test, p_calibrated)

ll = log_loss(y_test, p_test)
ll_calibrated = log_loss(y_test, p_calibrated)

print "accuracy - before/after:", acc, "/", acc_calibrated
print "AUC - before/after:     ", auc, "/", auc_calibrated
print "log loss - before/after:", ll, "/", ll_calibrated
"""
accuracy - before/after: 0.847788697789 / 0.846805896806
AUC - before/after:      0.878139845077 / 0.878139845077
log loss - before/after: 0.630525772871 / 0.364873617584
"""

###

print "creating diagrams..."
from image import Image
from preprocess import Preprocess
from classifier import Classifier
from log_loss import log_loss
from postprocess import PostProcess

genders = Image.genders()
d, _ = Image.data()
matrix = Preprocess.to_matrix(d)
print matrix.shape
matrix = Preprocess.remove_constants(matrix)
print matrix.shape
matrix = Preprocess.scale(matrix)
matrix = Preprocess.polynomial(matrix, 2)
matrix = Preprocess.scale(matrix)
print matrix.shape
matrix = matrix.tolist()
half = len(matrix)/2
train, cv = matrix[:half], matrix[half:]
train_genders, cv_genders = genders[:half], genders[half:]
cv_genders = cv_genders[0::4]
preds = Classifier.ensemble_preds(train, train_genders, cv)
print "Score: ", log_loss(preds, cv_genders)