from scipy.stats.mstats import mquantiles

thumbnail = "img"


def process_galaxy(galaxy_id, transform=0):
    #root = "/media/kevin/0026A5FD26A5F3B6/kaggle/galaxy/"
    root = "/vol/biomedic/users/kpk09/kaggle/galaxy/data/"
    f = root + "images_training_rev1/" + galaxy_id + ".jpg"
    return galaxy.get_features(f, image_statistics=True, transform=transform)


f = "/vol/biomedic/users/kpk09/kaggle/galaxy/data/training_solutions_rev1.csv"
#f = "/media/kevin/0026A5FD26A5F3B6/kaggle/galaxy/training_solutions_rev1.csv"
responses, ids = galaxy.read_responses(f)

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')
mapping = galaxy.get_classes()

for Class in xrange(1, 12):
    classes = np.nonzero(mapping == Class)[0]

    X = []
    Y = []
    svm_class = 0

    for c in classes:
        q = 0.95
from sklearn import decomposition

from scipy.stats.mstats import mquantiles

thumbnail = "img"

def process_galaxy( galaxy_id, transform=0 ):
    #root = "/media/kevin/0026A5FD26A5F3B6/kaggle/galaxy/"
    root = "/vol/biomedic/users/kpk09/kaggle/galaxy/data/"
    f = root + "images_training_rev1/"+galaxy_id+".jpg"
    return galaxy.get_features(f,image_statistics=True,transform=transform)
    
f = "/vol/biomedic/users/kpk09/kaggle/galaxy/data/training_solutions_rev1.csv"
#f = "/media/kevin/0026A5FD26A5F3B6/kaggle/galaxy/training_solutions_rev1.csv"
responses, ids = galaxy.read_responses( f )

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')
mapping = galaxy.get_classes()

for Class in xrange(1,12):
    classes = np.nonzero(mapping==Class)[0]

    X = []
    Y = []
    svm_class = 0
    
    for c in classes:
        q = 0.95
示例#3
0
import scipy.ndimage as nd
import csv
import math

import joblib
from joblib import Parallel, delayed

import galaxy

def to_dict(responses,id_responses):
    res = {}
    for r,i in zip(responses,id_responses):
        res[i] = r
    return res

responses,id_responses = galaxy.read_responses("/vol/biomedic/users/kpk09/kaggle/galaxy/data/training_solutions_rev1.csv" )

predictions,id_predictions = galaxy.read_responses(sys.argv[1])

ground_truth = to_dict(responses,id_responses)
MSE = np.zeros( (len(id_predictions),37), dtype="float" )
n = 0
for p,i in zip(predictions,id_predictions):
    MSE[n] = (p - ground_truth[i])**2
    n += 1
    
mse = np.mean(MSE,axis=0).mean()
print "MSE:", mse
print "RMSE:", math.sqrt(mse)

scores = MSE.mean(axis=1)