示例#1
0
def getFeatureValuesDataFrame(featureName,
                              numFolders,
                              numFilesPerFolder,
                              featureFileType='.csv'):
    '''
    Returns a dataframe of the feature values for a specified number of performances 
    '''
    # Get names of feature folders
    rootPath = FFP.getRootPath(featureName)
    pieceFolders = getFolderNames(rootPath,
                                  contains='mazurka',
                                  orderAlphabetically=True)

    featureDataFrames = []
    # Iterate over pieces
    for pieceFolder in pieceFolders:
        print 'processing folder: %s' % pieceFolder
        featuresPath = FFP.getFeatureFolderPath(rootPath + pieceFolder,
                                                featureName)
        performanceFiles = getFileNames(
            featuresPath, endsWith=featureFileType,
            orderAlphabetically=True)[:numFilesPerFolder]
        # Iterate over performances
        for performanceFile in performanceFiles:
            print '\tprocessing file: %s' % performanceFile
            featureFn = os.path.join(featuresPath, performanceFile)
            if featureFileType == '.csv':
                featureDataFrames.append(
                    pd.read_csv(featureFn, header=None, index_col=0))
            elif featureFileType == '.pkl':
                featureDataFrames.append(pd.read_pickle(featureFn))

    dfAllPerformances = pd.concat(featureDataFrames, ignore_index=True)
    return dfAllPerformances
示例#2
0
    def getFeatureFolderPath(cls, piecePath, featureName):
        '''
        Returns the path of the folder containing specific features within the
        folder for a specific piece
        Inputs:
            :piecePath:     the path to the folder containing the piece feature
                            folders (for features in the original mazurka-dataset folder)
                            or the features themselves (for newly created features such as CENS)
            :featureName:   the name of the feature
        '''
        piecePath = piecePath.rstrip('/') + '/'

        if 'CENS' in featureName or 'FENS' in featureName:
            return piecePath
        else:
            featureFolderPrefix = FeatureFileProps.folderPrefix[featureName]
            featureFolders = [
                fldr
                for fldr in getFolderNames(piecePath, orderAlphabetically=True)
                if featureFolderPrefix in fldr
            ]
            assert len(
                featureFolders
            ) == 1, 'number of feature folders must be 1 (it is %i)' % len(
                featureFolders)
            featuresFolder = featureFolders[0]
            featuresPath = piecePath + featuresFolder + '/'

            return featuresPath
def downloadAllFeatureHistograms():
    '''
    downloads histograms of all features in the new_features folder
    '''
    newFeatureNames = getFolderNames(newFeaturesPath, orderAlphabetically=True)
    download_Feature_histograms(featureNames=newFeatureNames,
                                transformedFeatureNames=[])
示例#4
0
def getFeatureFilesDetails(featureNames):
    '''
    Get a list of the files for a particular feature and their details
    '''
    fileDetails = []

    for featureName in featureNames:
        # Get names of feature folders
        rootPath = FFP.getRootPath(featureName)
        featuresPath = FFP.getFeatureFolderPath(rootPath, featureName)
        pieceFolders = getFolderNames(featuresPath, orderAlphabetically=True)
        # Iterate over pieces
        for pieceFolder in pieceFolders:
            performanceFiles = getFileNames(featuresPath + pieceFolder,
                                            endsWith='.csv',
                                            orderAlphabetically=True)
            # Iterate over performances
            for performanceFile in performanceFiles:
                fileDetails.append({
                    'Feature':
                    featureName,
                    'Piece':
                    pieceFolder,
                    'Performance':
                    rcut(performanceFile, FFP.fileSuffix[featureName]),
                    'Filename':
                    performanceFile
                })

    df = pd.DataFrame(fileDetails)
    df.to_csv('Feature File Details.csv')
示例#5
0
def loadFeatureFileDict(featureName, numFolders, numFilesPerFolder):
    
    piecesPath = FFP.getRootPath(featureName)
    pieceIds = getFolderNames(piecesPath, contains = 'mazurka', orderAlphabetically = True)[:numFolders]
    print 'Loading feature file dict...'
    featureFileDict = FFP.loadFeatureFileDictAllFolders(piecesPath, pieceIds, featureName, numFilesPerFolder)
    print '...done.'
    return featureFileDict, pieceIds
def download_NN_training_results(NNtype='dA'):

    trainingPath = '%straining_results/%s/' % (NNpath, NNtype)
    resultsFolders = getFolderNames(trainingPath, orderAlphabetically=True)
    outputPath = transferPath + 'NN training results/'
    createPath(outputPath)
    for folder in resultsFolders:
        resultsPath = trainingPath + folder + '/'
        resultsFn = resultsPath + 'training_records.pkl'
        if os.path.exists(resultsFn):
            shutil.copyfile(resultsFn, outputPath + folder + '.pkl')
def download_MAP_csv_results(resultsIndex=None):

    if resultsIndex is not None:
        MAPpath = '%srun%i/' % (NCDpath, resultsIndex)
        copyFiles(MAPpath, transferPath, filesThatEndWith='.csv')
    else:
        for folder in getFolderNames(NCDpath,
                                     startsWith='run',
                                     orderAlphabetically=True):
            folderIndex = int(folder[3:])
            download_MAP_csv_results(folderIndex)
示例#8
0
def getFeatureFolderPathAndName(piecePath, featureName):
    '''
    Returns the path and name of the folder containing specific features within the
    folder for a specific piece
    '''
    featureFolderPrefix = featuresDict[featureName]['folder prefix']
    featureFolders = [
        fldr for fldr in getFolderNames(piecePath)
        if featureFolderPrefix in fldr
    ]
    assert len(featureFolders
               ) == 1, 'number of feature folders must be 1 (it is %i)' % len(
                   featureFolders)
    featuresFolder = featureFolders[0]
    featuresPath = piecePath + featuresFolder + '/'

    return featuresPath, featuresFolder
示例#9
0
def createNCDfiles(existingNCDsDataFrame=None):

    mazurkaIds = getFolderNames(mazurkasPath)

    # For each time delay
    for timeDelay in CRPtimeDelays:
        print 'Time Delay: %s' % str(timeDelay)
        # For each dimension
        for dimension in CRPdimensions:
            print '\tDimension: %s' % str(dimension)
            # For each method
            for method in CRPmethods:
                print '\t\tMethod: %s' % method
                # For each feature
                for featureName in featureNames:
                    print '\t\t\tFeature: %s' % featureName
                    # Get performances from each pair of folders (N.B. this only does the first pair at the moment)
                    featureFileDict = getFeatureFileDictAllFolders(
                        mazurkasPath, mazurkaIds[:2], featureName)
                    # Create CRPs for all files
                    print 'Creating CRP files'
                    for featureFileId in featureFileDict.keys():
                        filePath = featureFileDict[featureFileId]['FilePath']
                        pieceId = featureFileDict[featureFileId]['PieceId']
                        createCRPfile(filePath, pieceId, featureFileId, method,
                                      dimension, timeDelay)
                    # Load CRP files into memory
                    CRPfiles, CRPs = loadCRPfiles()
                    numCRPfiles = len(CRPfiles)
                    # Create NCDs for all pairs of CRPs
                    print 'Creating NCD files'
                    for i1 in np.arange(numCRPfiles - 1):
                        for i2 in np.arange(i1 + 1, numCRPfiles):
                            createNCDfile(CRPfiles[i1], CRPfiles[i2], CRPs[i1],
                                          CRPs[i2])
                    # Delete CRP files
                    print 'Deleting CRP files'
                    for CRPfile in CRPfiles:
                        if os.path.exists(CRPpath + CRPfile):
                            try:
                                os.remove(CRPpath + CRPfile)
                            except:
                                pass

    fFinished = open(NCDpath + 'finished.txt', 'w')
    fFinished.close()
示例#10
0
    def getFeatureFileDictValidationSetFolders(cls, rootPath, featureName):
        '''
        Returns featureFileDicts for the given feature for multiple performances of pieces
        in the given folder names. See getFeatureFileDict for details of the featureFileDict
        If numFilesPerFolder is None then all files will be returned from each folder,
            otherwise numFilesPerFolder files will be returned from each folder
        '''
        featureFileDict = {}
        folderNames = getFolderNames(rootPath, orderAlphabetically=True)
        validationFolderNames = getValidationSetFolders(folderNames)
        for pieceFolder in validationFolderNames:
            featuresPath = FeatureFileProps.getFeatureFolderPath(
                rootPath + pieceFolder + '/', featureName)
            ffDict = copy.deepcopy(
                FeatureFileProps.getFeatureFileDictValidationSet(
                    pieceFolder, featuresPath, featureName))
            featureFileDict.update(ffDict)

        return featureFileDict
示例#11
0
def getFeatureFrequenciesDataFrame(featureName, weightMatrix, biasesMatrix,
                                   featureOffset, featureScaling,
                                   NNtimeStacking, numFolders,
                                   numFilesPerFolder):

    # Get the folders (performances)
    piecesPath = FFP.getRootPath(featureName)
    piecesFolders = getFolderNames(
        piecesPath, contains='mazurka', orderAlphabetically=True
    )[:
      20]  # added the contains parameter to avoid the new powerspectrum folder
    if numFolders is not None:
        piecesFolders = piecesFolders[:numFolders]

    # For each piece
    featureDataFrames = []
    for piecesFolder in piecesFolders:
        # Get performances of the piece
        featuresPath = FFP.getFeatureFolderPath(piecesPath + piecesFolder,
                                                featureName)
        performances = getFileNames(featuresPath,
                                    orderAlphabetically=True,
                                    endsWith='.csv')
        if numFilesPerFolder is not None:
            performances = performances[:numFilesPerFolder]
        pf = 0
        for performance in performances:
            pf += 1
            print 'Transforming Features %i' % pf
            # Load feature file and transform
            dfTransformedFeatures = loadAndTransformFeatureFile(
                featuresPath + performance, featureOffset, featureScaling,
                NNtimeStacking, weightMatrix, biasesMatrix)
            featureDataFrames.append(dfTransformedFeatures)

    # Calculate Histogram of the transformed features
    dfAllPerformances = pd.concat(featureDataFrames, ignore_index=True)

    return dfAllPerformances
示例#12
0
for settingsDict in settingsDicts:

    resultsFn = '/u7.swansea/s11/abpg162/project/results_files/validation/' + settingsDict[
        'Run Name'] + '.pkl'
    if not os.path.exists(resultsFn) and settingsDict['Run Name'] is not None:

        cleanCRPfolder()
        cleanNCDfolder()
        startDateTime = datetime.now()

        # Load base features
        baseFeatureName = settingsDict['Feature Name']
        piecesPath = FFP.getRootPath(baseFeatureName)
        pieceIds = getValidationSetFolders(
            getFolderNames(piecesPath,
                           contains='mazurka',
                           orderAlphabetically=True))
        print 'Loading feature file dict...'
        featureFileDict = FFP.loadFeatureFileDictValidationSetFolders(
            piecesPath, settingsDict['Feature Name'])
        print '...done.'

        # load weights etc. if this is for a neural net run
        if settingsDict['NN Type'] is not None:
            weightMatrix, biases, featureOffset, featureScaling = get_NN_NCD_params(
                NNtype=settingsDict['NN Type'],
                featureName=settingsDict['Feature Name'],
                learningRate=settingsDict['dA Learning Rate'],
                learningRateBoostFactor=settingsDict[
                    'dA Learning Rate Boost Factor'],
                corruptionLevel=settingsDict['dA Corruption Level'],