示例#1
0
def prepareData(dir_labelDictionary):
    userinput_pBoudn = 0.9
    userinput_column_likelihood0 = {
        'column': ['Rightpaw x', 'Rightpaw y'],
        'likelihood': 'Rightpaw likelihood'
    }

    userinput_column_likelihood1 = {
        'column': ['Leftpaw x', 'Leftpaw y'],
        'likelihood': 'Leftpaw likelihood'
    }

    userinput_column_likelihood2 = {
        'column': ['Tailbase x', 'Tailbase y'],
        'likelihood': 'Tailbase likelihood'
    }

    userinput_column_likelihood3 = {
        'column': ['Rotarodtop x', 'Rotarodtop y'],
        'likelihood': 'Rotarodtop likelihood'
    }

    userinput_column_likelihood4 = {
        'column': ['Rotarodbottom x', 'Rotarodbottom y'],
        'likelihood': 'Rotarodbottom likelihood'
    }

    userinput_columns_likelihoods = [
        userinput_column_likelihood0, userinput_column_likelihood1,
        userinput_column_likelihood2, userinput_column_likelihood3,
        userinput_column_likelihood4
    ]

    outputDirAbsolutePath = outputAbsPath()

    for dir_label in dir_labelDictionary:
        inputDir = dir_label['dir']
        label = dir_label['label']

        outputDir = os.path.join(outputDirAbsolutePath, label)
        if not os.path.exists(outputDir):
            os.mkdir(outputDir)

        # TODO: For each functions below, add progress bar, especially fillnan (t).
        lastDir = \
            isStepUpFrame(
                addVelocityColumnsBothFeet(
                    pixel2mm(
                        fillnan(userinput_columns_likelihoods, userinput_pBoudn,
                                cleanCSV(inputDir)))))

        copy_tree(lastDir, outputDir)
        shutil.rmtree(lastDir)
    return outputDir
示例#2
0
def cleanCSV(inputDir):
    prefix = 'cl_'
    outputDir = os.path.join(outputAbsPath(), 'cleanCSV')

    if os.path.exists(outputDir):
        shutil.rmtree(outputDir)
    os.mkdir(outputDir)

    if '/' in inputDir[-1]:
        inputDir = inputDir[-1]

    for root, dirs, files in os.walk(inputDir):
        for file in files:
            if (not file.startswith('.')) and file.endswith('.csv'):
                with open(os.path.join(inputDir, file), 'r') as readFile:
                    csvReader = csv.reader(readFile)

                    # Delete the first row and join the second and third rows
                    for i, row in enumerate(csvReader):
                        if i == 0:
                            continue
                        if i == 1:
                            row1 = row
                        if i == 2:
                            row2 = row
                            break
                    row0ToBe = zip(row1, row2)
                    row0ToBe = tuple(row0ToBe)
                    row0Col = []
                    for tup in row0ToBe:
                        newCol = ' '.join(tup)
                        row0Col.append(newCol)
                    file = deleteCommonWordsInFileName(file)
                    outputFileName = os.path.join(outputDir, prefix + file)

                    with open(outputFileName, 'w') as outputFile:
                        csvWriter = csv.writer(outputFile)
                        csvWriter.writerow(row0Col)
                        for line in readFile:
                            outputFile.write(line)

    return outputDir
示例#3
0
def concatenateCSVs(dir_labelDictionaries):
    '''

    :param dir_labelDictionaries:
    :return: The first element in the array is with label of 0 and the second is of 1.
    '''
    prefix = 'concat'
    outputCSVs = []
    if dir_labelDictionaries[0]['label'] == '0':
        inputDir0 = dir_labelDictionaries[0]['dir']
        inputDir1 = dir_labelDictionaries[1]['dir']
    else:
        inputDir0 = dir_labelDictionaries[1]['dir']
        inputDir1 = dir_labelDictionaries[0]['dir']

    allCSVs0 = glob.glob(inputDir0 + '/*.csv')
    allCSVs0.sort()

    allCSVs1 = glob.glob(inputDir1 + '/*.csv')
    allCSVs1.sort()

    allCSVsArray = [allCSVs0, allCSVs1]

    for allCSVs in allCSVsArray:
        count = 0
        outputPath = os.path.join(
            outputAbsPath(),
            prefix + str(allCSVsArray.index(allCSVs)) + '.csv')
        outputCSVs.append(outputPath)
        with open(outputPath, 'w') as outputFile:
            for i, inputPath in enumerate(allCSVs):
                with open(inputPath, 'r') as inputFile:
                    if i != 0:
                        inputFile.readline()
                    shutil.copyfileobj(inputFile, outputFile)
                    count += 1
            print('%d files have been combined.' % count)

    return outputCSVs
def calculateStepUpHeight(dir_labelDictionaries,
                          framesPerSecond=20,
                          secondsPerInterval=30,
                          estimatedMaxFrames=6500):
    csvs = []
    for dir_labelDictionary in dir_labelDictionaries:
        inputDir = dir_labelDictionary['dir']
        inputLabel = dir_labelDictionary['label']

        prefix = 'stepUpHeightRight_'

        framesPerInterval = framesPerSecond * secondsPerInterval
        framesPerMinute = framesPerSecond * 60
        minutePerInterval = framesPerInterval / framesPerMinute
        n = int(np.ceil(estimatedMaxFrames / framesPerInterval))

        dfInd = []
        for i in range(n):
            dfInd.append(i)

        dfRightStepUpHeight = pd.DataFrame(index=dfInd)

        for roots, dirs, files in os.walk(inputDir):
            for inputFile in files:
                if inputFile.startswith('.') or not inputFile.endswith('.csv'):
                    continue

                dfRightStepUpHeight[inputFile] = np.nan
                df = pd.read_csv(os.path.join(inputDir, inputFile),
                                 index_col=0)
                col = 'rel RightY mm'
                d = df[col].diff()
                m = d.lt(0)
                b = (~m).cumsum()
                s = d.mask(~m).abs().groupby(b).transform('sum')
                df['right foot step up height'] = pd.DataFrame(
                    np.select([~b.duplicated(keep='last') & m,
                               d.eq(0)], [s, '1e3'], ''))

                dfs = df['right foot step up height']
                dfLen = len(dfs)
                for i in range(n):
                    start = i * framesPerInterval
                    end = (i + 1) * framesPerInterval
                    if end < len(dfs):
                        splits = dfs[start:end]
                    elif start < len(dfs):
                        splits = dfs[i * framesPerInterval:]
                    else:
                        break
                    splits = splits.replace('', '0')
                    splits = splits.astype(np.float)
                    splits1 = splits != 0
                    nSplitStepUp = np.sum(splits1)
                    if nSplitStepUp == 0:
                        dfRightStepUpHeight.loc[i, inputFile] = np.nan
                    else:
                        splitStepUpHeightmm = np.sum(splits)
                        dfRightStepUpHeight.loc[
                            i, inputFile] = splitStepUpHeightmm / nSplitStepUp

        outputDir = os.path.join(outputAbsPath(), 'stepHeightCalculation')
        if not os.path.exists(outputDir):
            os.mkdir(outputDir)

        inputDirLastPath = getLastDirectory(inputDir)
        outputCSVPath = os.path.join(outputDir,
                                     prefix + inputDirLastPath + '.csv')
        dfRightStepUpHeight.to_csv(outputCSVPath)
        csvs.append(outputCSVPath)
    return csvs
示例#5
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils.getDirAbsPath import outputAbsPath
import os
import string

from sklearn.ensemble import AdaBoostClassifier
from matplotlib import pyplot
import pandas as pd
from numpy.random import shuffle
from sklearn.model_selection import train_test_split
import numpy as np
from utils.getDirAbsPath import outputAbsPath

outputDir = os.path.join(outputAbsPath(), 'featureImportance')
framesPerInterval = 200
maxFrames = 6500

if not os.path.exists(outputDir):
    os.mkdir(outputDir)

nLoops = range(100)
nFeatures = range(1, int(maxFrames / framesPerInterval))

importance_columns = ['feature' + str(i) for i in nFeatures]
index = [str(i) for i in nFeatures]
acc_mp = pd.DataFrame(
    index=nLoops,
    columns=['accuracy'].append(nFeatures))
示例#6
0
bins = np.array_split(df01, len(df01) / interval)[1]
df01 = pd.DataFrame(bins)

footnote = ''
# fancyBoxPlot(pd.DataFrame(bins),
#              xlabel='trained up to (i)th interval',
#              ylabel='accuracy',
#              title='Training Accuracy with Different Windows of Time Series Data (step up height) %i iterations' % interval,
#              outputPath=os.path.join(outputAbsPath(), 'featureImportance', 'accuracies.png'),
#              footnote=footnote)

xlabel = '(i)th interval'
ylabel = 'feature importance'
title = 'Feature importance in classifying genotypes (%i iterations)' % interval
outputPath = os.path.join(outputAbsPath(), 'featureImportance', 'featureImportanceDecisionTree.png')
footnote = 'Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. '
# Plotting

fig, ax = plt.subplots(figsize=(15, 11), tight_layout=True)
plt.subplots_adjust(hspace=1.0, wspace=0.02, bottom=0.17)

# Creating axes instance
bp = ax.boxplot(bins, patch_artist=True,
                notch='True')

# changing color and linewidth of
# whiskers
for whisker in bp['whiskers']:
    whisker.set(color='#8B008B',
                linestyle="-.", linewidth=3)
示例#7
0
import os
import string

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils.getDirAbsPath import outputAbsPath

inputCSV = os.path.join(outputAbsPath(), 'featureImportance',
                        'featureImportance_decisionTree.csv')

# Plot importance when the number of intervals is 2.

interval = len(string.ascii_lowercase)
df = pd.read_csv(inputCSV, index_col=0).drop(columns=['accuracy'])

bins = np.array_split(df, len(df) / interval)[-1]
df = pd.DataFrame(bins)

xlabel = '(i)th interval'
ylabel = 'feature importance'
title = 'Feature importance in classifying genotypes (%i iterations)' % interval
outputPath = os.path.join(outputAbsPath(), 'featureImportance',
                          'featureImportance10intervals.png')
footnote = 'Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. Classified with mean step up height as the model\'s input. '
# Plotting

fig, ax = plt.subplots(figsize=(15, 11), tight_layout=True)
plt.subplots_adjust(hspace=1.0, wspace=0.02, bottom=0.17)

# Creating axes instance
示例#8
0
import os

# from dataAnalysis.allAnalysis import allAnalysis
from dataPrep.main_dataPrep import prepareData
from utils.getDirAbsPath import outputAbsPath

classWT, classYAC = 0, 1
# TODO: Do not assume the names of columns (ex. Rightpaw x, Rotarod top). Ask for them to the user.
# TODO: Create `output` dir main project dir
userinput_secondsPerInterval = 30  # TODO: Ask for interval length.
userinput_framesPerSecond = 20  # TODO: Ask for frames per second
userinput_classification = ['WT',
                            'YAC']  # TODO: Ask which one should be 0 and 1.
userinput_0Day3Dir = \
    '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/data_all/Day4_2and3monthOld_rotarodAnalysis/WT'  # TODO: User input
userinput_1Day3Dir = \
    '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/data_all/Day4_2and3monthOld_rotarodAnalysis/YAC128'  # TODO: User input
userinput_maxFrames = 6500

dir0 = ({'dir': userinput_0Day3Dir, 'label': 'Day4_WT'})
dir1 = ({'dir': userinput_1Day3Dir, 'label': 'Day4_YAC'})
dirs = [dir0, dir1]

outputDirAbsolutePath = outputAbsPath()

if not os.path.exists(outputDirAbsolutePath):
    os.mkdir(outputDirAbsolutePath)

dirs = prepareData(dirs)
# allAnalysis(dirs, userinput_secondsPerInterval, userinput_framesPerSecond, userinput_maxFrames)
import os
import string

from sklearn.tree import DecisionTreeClassifier
from matplotlib import pyplot
import pandas as pd
from numpy.random import shuffle
from sklearn.model_selection import train_test_split
import numpy as np
from utils.getDirAbsPath import outputAbsPath

outputDir = os.path.join(outputAbsPath(), 'featureImportance')

if not os.path.exists(outputDir):
    os.mkdir(outputDir)

maxFrames = 6500
nLoops = list(string.ascii_lowercase)
nFeatures = int(maxFrames / 2)

importance_columns = ['feature' + str(i) for i in range(1, 11)]
index = [s + str(i) for i in nFeatures for s in list(string.ascii_lowercase)]
acc_mp = pd.DataFrame(index=index, columns=['accuracy'].append(nFeatures))

for s in nLoops:
    for i in range(1, 11):
        WT = '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/output/stepHeightCalculation/stepUpHeightRight_Day4_WT.csv'
        YAC = '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/output/stepHeightCalculation/stepUpHeightRight_Day4_YAC.csv'

        dfWT = pd.read_csv(WT, index_col=0)
        dfWT = dfWT.iloc[0:i]
import os
import string

from sklearn.tree import DecisionTreeClassifier
from matplotlib import pyplot
import pandas as pd
from numpy.random import shuffle
from sklearn.model_selection import train_test_split
import numpy as np
from utils.getDirAbsPath import outputAbsPath

outputDir = os.path.join(outputAbsPath(), 'featureImportance')

if not os.path.exists(outputDir):
    os.mkdir(outputDir)

nLoops = list(string.ascii_lowercase)
nFeatures = range(1, 11)

importance_columns = ['feature' + str(i) for i in range(1, 11)]
index = [s + str(i) for i in nFeatures for s in list(string.ascii_lowercase)]
acc_mp = pd.DataFrame(
    index=index,
    columns=['accuracy'].append(nFeatures))

for s in nLoops:
    for i in range(1, 11):
        WT = '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/output/stepHeightCalculation/stepUpHeightRight_Day4_WT.csv'
        YAC = '/Users/ksb7640/Documents/UBC_Academic/Raymond_Lab/448/rotarod_git/rotarod_ML/output/stepHeightCalculation/stepUpHeightRight_Day4_YAC.csv'

        dfWT = pd.read_csv(WT, index_col=0)