def select_indices():
    print 'reading in features'
    test_features = Input.load_testdata_caffefeatures(padded=True)
    train_features = Input.load_traindata_caffefeatures(padded=True)

    print 'selecting indices'
    #get indices of features that have a non-zero variance in the test data
    selector1 = VarianceThreshold()
    selector1.fit_transform(test_features)
    indices_test = selector1.get_support(indices=True)

    #get indices of features that have a non-zero variance in the train data
    selector2 = VarianceThreshold()
    selector2.fit_transform(train_features)
    indices_train = selector2.get_support(indices=True)

    #only keep indices that have variance in both test and train data
    indices = list(set(indices_test) & set(indices_train))

    #add 1 to all indices
    indices = [x + 1 for x in indices]

    #save indices to csv file
    myfile = open('caffefeature_indices_padded.csv', 'wb')
    wr = csv.writer(myfile)
    wr.writerow(indices)
Пример #2
0
def sort_dataframe(df_data, df_filenames):

    correct_order = Input.load_testdata_filenames()
    current_order = list(df_filenames.values)
    indices = [current_order.index(filename) for filename in correct_order]
    df_data = df_data.reindex(indices)
    df_data = df_data.reset_index() #reset index --> adds new indices, old indices become column 'index'
    return df_data.drop('index', axis=1) #remove this new column 'index'
Пример #3
0
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        from copy import deepcopy
        new_kwargs = deepcopy(kwargs)
        del (new_kwargs["name"])
        if "expression" in new_kwargs:
            del(new_kwargs["expression"])
        if "central_name_gen" in new_kwargs:
            new_kwargs["central_name_gen"] = kwargs["central_name_gen"]
        self.taps = []
        self.add_input(Input(name="clk",
                             width=1,
                             expression="",
                             **new_kwargs))

        self.input_list = kwargs["input_list"]
        num_resets = 0
        num_sets = 0
        num_toggles = 0
        self.input_name_list = []
        for one_char in kwargs["input_list"]:
            if one_char.lower() == "r":
                one_name = "rst_" + ("%04d" % num_resets)
                one_sig = Input(name=one_name, width=1, expression="", **new_kwargs)
                self.add_input(one_sig)
                self.add_device(one_sig)
                num_resets += 1
                self.input_name_list.append(one_name)
            if one_char.lower() == "s":
                one_name = "set_" + ("%04d" % num_sets)
                one_sig = Input(name=one_name, width=1, expression="", **new_kwargs)
                self.add_input(one_sig)
                self.add_device(one_sig)
                num_sets += 1
                self.input_name_list.append(one_name)
            if one_char.lower() == "t":
                one_name = "tog_" + ("%04d" % num_toggles)
                one_sig = Input(name=one_name, width=1, expression="", **new_kwargs)
                self.add_input(one_sig)
                self.add_device(one_sig)
                num_toggles += 1
                self.input_name_list.append(one_name)

        out_sig = Signal(name="q_sig", width=1, expression="", **new_kwargs)
        self.signal_list.append(out_sig)
        self.add_output(Output(name="q", width=1, expression="", **new_kwargs))
Пример #4
0
def compute_logloss(df_filenames, df_data):
    #STEP 1: replace values
    replacer = lambda x: max(float(min(x, 0.999999999999)), 0.0000000000000001)
    df_data = df_data.applymap(replacer)

    #STEP 2: rescale
    df_subsum = df_data.sum(axis=1)
    df_sum = pd.concat([
        df_subsum, df_subsum, df_subsum, df_subsum, df_subsum, df_subsum,
        df_subsum, df_subsum, df_subsum, df_subsum
    ],
                       axis=1)
    df_sum.columns = [
        'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'
    ]
    df_data = df_data / df_sum

    #STEP 3: logloss
    #load correct validationset labels
    labels = Input.load_validationset_labels()
    df_labels = pd.get_dummies(
        labels)  #to one-hot-encoding, DataFrame automatically
    df_labels.columns = [
        'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'
    ]

    #sort data to have same order as labels
    correct_order = Input.load_validationset_filenames()
    current_order = list(df_filenames.values)
    indices = [current_order.index(filename) for filename in correct_order]
    df_data = df_data.reindex(indices)
    df_data = df_data.reset_index(
    )  #reset index --> adds new indices, old indices become column 'index'
    df_data = df_data.drop('index', axis=1)  #remove this new column 'index'

    #select probabilities of correct classes only
    df_sparse_probs = df_data * df_labels
    probs = df_sparse_probs.values
    probs = list(chain.from_iterable(probs))  #flatten list
    probs = filter(lambda x: x != 0, probs)  #remove all zeros

    #apply log to them and take the average
    log_probs = [math.log(p) for p in probs]
    return -(np.mean(log_probs))
Пример #5
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     from copy import deepcopy
     new_kwargs = deepcopy(kwargs)
     del(new_kwargs["name"])
     del(new_kwargs["width"])
     if "expression" in new_kwargs:
         del(new_kwargs["expression"])
     if "central_name_gen" in new_kwargs:
         new_kwargs["central_name_gen"] = kwargs["central_name_gen"]
     self.children.append(BasicDelay(**kwargs))
     self.taps = []
     self.add_input(Input(name="clk",
                          width=1,
                          expression="",
                          **new_kwargs))
     self.add_input(Input(name="rst", width=1, expression="", **new_kwargs))
     self.add_input(Input(name="d_in", width=self.children[0].width, expression="", **new_kwargs))
     self.add_output(Output(name="d_out", width=self.children[0].width, expression="", **new_kwargs))
Пример #6
0
def main():
    print("Searching csv files")
    directory_path = "./todosLosAngulos"
    file_searching_pattern = "**/*.csv"
    paths = Utilities.get_csv_filenames(directory_path, file_searching_pattern)
    db_path = "locomotionAnalysis.db"
    #print("Listing " + str(len(paths)) + " files \n");

    input_list = []
    # Create object Input based on csv files
    for path in paths:
        input_obj = Input(str(path))
        input_obj.compute_metadata()
        input_list.append(input_obj)

    input_obj = input_list[0]
    print(input_obj.get_path())
    input_obj.insert_steps_into_db(db_path)
    print("------------------------------")
Пример #7
0
    def decompress(self, in_file_name, out_file_name=''):
        # assign output filename
        if (out_file_name == ''):
            out_file_name = in_file_name.split('.')[0] + ".png"
        else:
            out_file_name = out_file_name.split('.')[0] + ".png"

        print('Decompressing "%s" -> "%s"' % (in_file_name, out_file_name))
        print('Reading...')
        stream = Input(in_file_name)
        decoder = Decoder(stream)

        # decode image dimensions
        height, width = decoder.decode_header()
        stream.flush()
        size_header = stream.bytes_read
        print('* Header: %d bytes' % size_header)

        # decode Huffman table
        tree = decoder.decode_tree()
        stream.flush()
        size_tree = stream.bytes_read - size_header
        print('* Tree: %d bytes' % size_tree)

        # decode image pixel data
        image = decoder.decode_pixels(height, width, tree)
        stream.close()
        size_pixels = stream.bytes_read - size_tree - size_header
        print('* Pixels: %d bytes' % size_pixels)

        size_read = stream.bytes_read
        print('Decompressed %d bytes.' % size_read)
        print('Image dimensions: %d x %dpx' % (width, height))
        image.save(out_file_name)
        size_raw = raw_size(width, height)
        print('RAW size: %d bytes' % size_raw)
        space_expand = 100 * float(size_raw / size_read - 1)
        print('Memory expanded by %0.2f' % (space_expand), '%.')
Пример #8
0
import pandas as pd
import time

from sklearn.ensemble import RandomForestClassifier
from IO import Input
from IO import Output

start_time = time.time()

# load train data
df_trainset_caf = Input.load_trainset_caffefeatures()
df_trainset_lab = Input.load_trainset_labels()

# Load test data
df_validationset_caf = Input.load_validationset_caffefeatures()

print("--- load data: %s seconds ---" % round((time.time() - start_time),2))
start_time = time.time()

x_train = df_trainset_caf
y_train = df_trainset_lab
x_test = df_validationset_caf

# Train model
rf = RandomForestClassifier(n_estimators=500)
rf.fit(x_train, y_train)

print("--- train model: %s seconds ---" % round((time.time() - start_time),2))
start_time = time.time()

# Predict
Пример #9
0
from IO import IO
from Camera import Camera
from IO import Input
from IO import OutputClock
from Image import Image
import code

print("Beholder v0.0.1")
IO = IO()
Camera = Camera()


def takePhoto():
    img = Image(Camera.takePhoto())
    img.save()


sumitomoInput = Input(24, "Sumitomo", takePhoto)
clockTestOutput = OutputClock(25, "Fake Signal", 0.1)


def repl(IO, Camera, takePhoto):
    code.interact(local=locals(), )


repl(IO, Camera, takePhoto)
from sklearn.feature_selection import chi2
from IO import Input
import numpy as np
import csv

print 'loading data'
X = Input.load_trainset_caffefeatures()
Y = Input.load_trainset_labels()

print 'compute chi2 values'
chi, p = chi2(X, Y)
chi = map((lambda x: np.inf if np.isnan(x) else x),
          chi)  #make all nans into infs
count_inf = (np.isinf(chi)).sum()
print 'number of infinities: ' + str(count_inf) + ' of ' + str(len(chi))

print 'sort features on relevance'
indices = np.argsort(chi)

print 'save feature indices to csv'
myfile = open('feature_importance_trainset_chi2.csv', 'wb')
wr = csv.writer(myfile)
wr.writerow(indices)
Пример #11
0
import numpy as np
import pandas as pd
#from Output import *
import pickle
#import xgboost as xgb

print('loading data')
#Load data
x_traindata = pd.read_csv('HOG_features_train_8_16_1.csv',
                          sep=',',
                          header=None).values
x_testdata = pd.read_csv('HOG_features_test_8_16_1.csv', sep=',',
                         header=None).values

#load classification
y_traindata = np.asarray(Input.load_traindata_labels())

print('training classifier')
#Train classifier
clf = OneVsRestClassifier(SVC(kernel='poly', probability=True))
clf.fit(x_traindata, y_traindata)

# now you can save it to a file
with open('classifierpolytraindata_HOG_8_16_1.pkl', 'wb') as f:
    pickle.dump(clf, f)

## and later you can load it
with open('classifierpolytraindata_HOG_8_16_1.pkl', 'rb') as f:
    clf = pickle.load(f)

#Make predictions
'''Simple test file to test whether loading caffefeatures works properly. Selecting percentiles, selecting rows and giving error messages.
@author: Diede Kemper'''

from IO import Input

features = Input.load_validationset_caffefeatures()
print features.shape
print 'should be: 8061x3983'

features = Input.load_traindata_caffefeatures(userows=range(3000, 5500))
print features.shape
print 'should be: 2500x3983'

features = Input.load_validationset_caffefeatures(
    featureSelectionMethod='chi2', Percentile=100)
print features.shape
print 'should be: 8061x3983'

features = Input.load_validationset_caffefeatures(featureSelectionMethod='hoi',
                                                  Percentile=90)
print features.shape
print 'should print error message'

features = Input.load_validationset_caffefeatures(
    featureSelectionMethod='chi2', Percentile=210)
print features.shape
print 'should print error message'

features = Input.load_traindata_caffefeatures(featureSelectionMethod='chi2',
                                              Percentile=5)
print features.shape
Пример #13
0
import pandas as pd
import time

from sklearn.ensemble import RandomForestClassifier
from IO import Input
from IO import Output

start_time = time.time()

# load train data
df_traindata_caf = Input.load_traindata_caffefeatures()
df_traindata_lab = Input.load_traindata_labels()

# Load test data
df_testdata_caf = Input.load_testdata_caffefeatures()

print("--- load data: %s seconds ---" % round((time.time() - start_time), 2))
start_time = time.time()

x_train = df_traindata_caf
y_train = df_traindata_lab
x_test = df_testdata_caf

# Train model
rf = RandomForestClassifier(n_estimators=500)
rf.fit(x_train, y_train)

print("--- train model: %s seconds ---" % round((time.time() - start_time), 2))
start_time = time.time()

# Predict
Пример #14
0
from sklearn.multiclass import OneVsOneClassifier
from sklearn.svm import SVC
from sklearn.svm import NuSVC
import numpy as np
from IO import Output
import pickle
from sklearn.svm import LinearSVC

'''
Helper function to use with the grouping of the dataframe, turns 3 rows of coordinates into a single row
'''
def transformXY(coords):
    return pd.Series(np.asarray(coords).ravel())

#Load the file names of the various datasets
trainset_filenames = Input.load_trainset_filenames()
validationset_filenames = Input.load_validationset_filenames()
traindata_filenames = Input.load_traindata_filenames()
testset_filenames = Input.load_testdata_filenames()

#Load the features
feat = pd.read_csv('skinTrainFeatures.csv', index_col = 0)

#Select the features for each dataset
x_trainset = feat.ix[trainset_filenames]
x_validationset = feat.ix[validationset_filenames]  
x_testset = feat.ix[testset_filenames]  
x_traindata = feat.ix[traindata_filenames]

#Load the labels for each dataset
y_trainset = np.asarray(Input.load_trainset_labels())
Пример #15
0
import pandas as pd
import sys

print 'reading in features'

df = pd.read_csv('features_test_padded.csv', header=None)

print 'Old dataframe'
print df.head()

#
# TRAINDATA
#

#get filenames
testdata_filenames = Input.load_testdata_filenames()
caffefeatures_filenames = list(df[0].values)

# check whether there are files without caffefeatures
missing_filenames = list(
    set(testdata_filenames) - set(caffefeatures_filenames))
if not missing_filenames:  #if there are no missing files
    print 'All testdata files have caffefeatures.'
else:
    print str(
        len(missing_filenames)) + ' testdata files do not have caffefeatures'
    sys.exit(
        "Program execution is stopped, because not all testdata files have caffefeatures. First solve this bug!"
    )

# sort features on testdata filenames
import pandas as pd
import sys

print 'reading in features'

df = pd.read_csv('features_train_padded.csv', header=None)

print 'Old dataframe'
print df.head()

#
# TRAINDATA
#

#get filenames
traindata_filenames = Input.load_traindata_filenames()
caffefeatures_filenames = list(df[0].values)

# check whether there are files without caffefeatures
missing_filenames = list(
    set(traindata_filenames) - set(caffefeatures_filenames))
if not missing_filenames:  #if there are no missing files
    print 'All traindata files have caffefeatures.'
else:
    print str(
        len(missing_filenames)) + ' traindata files do not have caffefeatures'
    sys.exit(
        "Program execution is stopped, because not all traindata files have caffefeatures. First solve this bug!"
    )

# sort features on traindata filenames