def getFeatureValuesDataFrame(featureName, numFolders, numFilesPerFolder, featureFileType='.csv'): ''' Returns a dataframe of the feature values for a specified number of performances ''' # Get names of feature folders rootPath = FFP.getRootPath(featureName) pieceFolders = getFolderNames(rootPath, contains='mazurka', orderAlphabetically=True) featureDataFrames = [] # Iterate over pieces for pieceFolder in pieceFolders: print 'processing folder: %s' % pieceFolder featuresPath = FFP.getFeatureFolderPath(rootPath + pieceFolder, featureName) performanceFiles = getFileNames( featuresPath, endsWith=featureFileType, orderAlphabetically=True)[:numFilesPerFolder] # Iterate over performances for performanceFile in performanceFiles: print '\tprocessing file: %s' % performanceFile featureFn = os.path.join(featuresPath, performanceFile) if featureFileType == '.csv': featureDataFrames.append( pd.read_csv(featureFn, header=None, index_col=0)) elif featureFileType == '.pkl': featureDataFrames.append(pd.read_pickle(featureFn)) dfAllPerformances = pd.concat(featureDataFrames, ignore_index=True) return dfAllPerformances
def getFeatureFolderPath(cls, piecePath, featureName): ''' Returns the path of the folder containing specific features within the folder for a specific piece Inputs: :piecePath: the path to the folder containing the piece feature folders (for features in the original mazurka-dataset folder) or the features themselves (for newly created features such as CENS) :featureName: the name of the feature ''' piecePath = piecePath.rstrip('/') + '/' if 'CENS' in featureName or 'FENS' in featureName: return piecePath else: featureFolderPrefix = FeatureFileProps.folderPrefix[featureName] featureFolders = [ fldr for fldr in getFolderNames(piecePath, orderAlphabetically=True) if featureFolderPrefix in fldr ] assert len( featureFolders ) == 1, 'number of feature folders must be 1 (it is %i)' % len( featureFolders) featuresFolder = featureFolders[0] featuresPath = piecePath + featuresFolder + '/' return featuresPath
def downloadAllFeatureHistograms(): ''' downloads histograms of all features in the new_features folder ''' newFeatureNames = getFolderNames(newFeaturesPath, orderAlphabetically=True) download_Feature_histograms(featureNames=newFeatureNames, transformedFeatureNames=[])
def getFeatureFilesDetails(featureNames): ''' Get a list of the files for a particular feature and their details ''' fileDetails = [] for featureName in featureNames: # Get names of feature folders rootPath = FFP.getRootPath(featureName) featuresPath = FFP.getFeatureFolderPath(rootPath, featureName) pieceFolders = getFolderNames(featuresPath, orderAlphabetically=True) # Iterate over pieces for pieceFolder in pieceFolders: performanceFiles = getFileNames(featuresPath + pieceFolder, endsWith='.csv', orderAlphabetically=True) # Iterate over performances for performanceFile in performanceFiles: fileDetails.append({ 'Feature': featureName, 'Piece': pieceFolder, 'Performance': rcut(performanceFile, FFP.fileSuffix[featureName]), 'Filename': performanceFile }) df = pd.DataFrame(fileDetails) df.to_csv('Feature File Details.csv')
def loadFeatureFileDict(featureName, numFolders, numFilesPerFolder): piecesPath = FFP.getRootPath(featureName) pieceIds = getFolderNames(piecesPath, contains = 'mazurka', orderAlphabetically = True)[:numFolders] print 'Loading feature file dict...' featureFileDict = FFP.loadFeatureFileDictAllFolders(piecesPath, pieceIds, featureName, numFilesPerFolder) print '...done.' return featureFileDict, pieceIds
def download_NN_training_results(NNtype='dA'): trainingPath = '%straining_results/%s/' % (NNpath, NNtype) resultsFolders = getFolderNames(trainingPath, orderAlphabetically=True) outputPath = transferPath + 'NN training results/' createPath(outputPath) for folder in resultsFolders: resultsPath = trainingPath + folder + '/' resultsFn = resultsPath + 'training_records.pkl' if os.path.exists(resultsFn): shutil.copyfile(resultsFn, outputPath + folder + '.pkl')
def download_MAP_csv_results(resultsIndex=None): if resultsIndex is not None: MAPpath = '%srun%i/' % (NCDpath, resultsIndex) copyFiles(MAPpath, transferPath, filesThatEndWith='.csv') else: for folder in getFolderNames(NCDpath, startsWith='run', orderAlphabetically=True): folderIndex = int(folder[3:]) download_MAP_csv_results(folderIndex)
def getFeatureFolderPathAndName(piecePath, featureName): ''' Returns the path and name of the folder containing specific features within the folder for a specific piece ''' featureFolderPrefix = featuresDict[featureName]['folder prefix'] featureFolders = [ fldr for fldr in getFolderNames(piecePath) if featureFolderPrefix in fldr ] assert len(featureFolders ) == 1, 'number of feature folders must be 1 (it is %i)' % len( featureFolders) featuresFolder = featureFolders[0] featuresPath = piecePath + featuresFolder + '/' return featuresPath, featuresFolder
def createNCDfiles(existingNCDsDataFrame=None): mazurkaIds = getFolderNames(mazurkasPath) # For each time delay for timeDelay in CRPtimeDelays: print 'Time Delay: %s' % str(timeDelay) # For each dimension for dimension in CRPdimensions: print '\tDimension: %s' % str(dimension) # For each method for method in CRPmethods: print '\t\tMethod: %s' % method # For each feature for featureName in featureNames: print '\t\t\tFeature: %s' % featureName # Get performances from each pair of folders (N.B. this only does the first pair at the moment) featureFileDict = getFeatureFileDictAllFolders( mazurkasPath, mazurkaIds[:2], featureName) # Create CRPs for all files print 'Creating CRP files' for featureFileId in featureFileDict.keys(): filePath = featureFileDict[featureFileId]['FilePath'] pieceId = featureFileDict[featureFileId]['PieceId'] createCRPfile(filePath, pieceId, featureFileId, method, dimension, timeDelay) # Load CRP files into memory CRPfiles, CRPs = loadCRPfiles() numCRPfiles = len(CRPfiles) # Create NCDs for all pairs of CRPs print 'Creating NCD files' for i1 in np.arange(numCRPfiles - 1): for i2 in np.arange(i1 + 1, numCRPfiles): createNCDfile(CRPfiles[i1], CRPfiles[i2], CRPs[i1], CRPs[i2]) # Delete CRP files print 'Deleting CRP files' for CRPfile in CRPfiles: if os.path.exists(CRPpath + CRPfile): try: os.remove(CRPpath + CRPfile) except: pass fFinished = open(NCDpath + 'finished.txt', 'w') fFinished.close()
def getFeatureFileDictValidationSetFolders(cls, rootPath, featureName): ''' Returns featureFileDicts for the given feature for multiple performances of pieces in the given folder names. See getFeatureFileDict for details of the featureFileDict If numFilesPerFolder is None then all files will be returned from each folder, otherwise numFilesPerFolder files will be returned from each folder ''' featureFileDict = {} folderNames = getFolderNames(rootPath, orderAlphabetically=True) validationFolderNames = getValidationSetFolders(folderNames) for pieceFolder in validationFolderNames: featuresPath = FeatureFileProps.getFeatureFolderPath( rootPath + pieceFolder + '/', featureName) ffDict = copy.deepcopy( FeatureFileProps.getFeatureFileDictValidationSet( pieceFolder, featuresPath, featureName)) featureFileDict.update(ffDict) return featureFileDict
def getFeatureFrequenciesDataFrame(featureName, weightMatrix, biasesMatrix, featureOffset, featureScaling, NNtimeStacking, numFolders, numFilesPerFolder): # Get the folders (performances) piecesPath = FFP.getRootPath(featureName) piecesFolders = getFolderNames( piecesPath, contains='mazurka', orderAlphabetically=True )[: 20] # added the contains parameter to avoid the new powerspectrum folder if numFolders is not None: piecesFolders = piecesFolders[:numFolders] # For each piece featureDataFrames = [] for piecesFolder in piecesFolders: # Get performances of the piece featuresPath = FFP.getFeatureFolderPath(piecesPath + piecesFolder, featureName) performances = getFileNames(featuresPath, orderAlphabetically=True, endsWith='.csv') if numFilesPerFolder is not None: performances = performances[:numFilesPerFolder] pf = 0 for performance in performances: pf += 1 print 'Transforming Features %i' % pf # Load feature file and transform dfTransformedFeatures = loadAndTransformFeatureFile( featuresPath + performance, featureOffset, featureScaling, NNtimeStacking, weightMatrix, biasesMatrix) featureDataFrames.append(dfTransformedFeatures) # Calculate Histogram of the transformed features dfAllPerformances = pd.concat(featureDataFrames, ignore_index=True) return dfAllPerformances
for settingsDict in settingsDicts: resultsFn = '/u7.swansea/s11/abpg162/project/results_files/validation/' + settingsDict[ 'Run Name'] + '.pkl' if not os.path.exists(resultsFn) and settingsDict['Run Name'] is not None: cleanCRPfolder() cleanNCDfolder() startDateTime = datetime.now() # Load base features baseFeatureName = settingsDict['Feature Name'] piecesPath = FFP.getRootPath(baseFeatureName) pieceIds = getValidationSetFolders( getFolderNames(piecesPath, contains='mazurka', orderAlphabetically=True)) print 'Loading feature file dict...' featureFileDict = FFP.loadFeatureFileDictValidationSetFolders( piecesPath, settingsDict['Feature Name']) print '...done.' # load weights etc. if this is for a neural net run if settingsDict['NN Type'] is not None: weightMatrix, biases, featureOffset, featureScaling = get_NN_NCD_params( NNtype=settingsDict['NN Type'], featureName=settingsDict['Feature Name'], learningRate=settingsDict['dA Learning Rate'], learningRateBoostFactor=settingsDict[ 'dA Learning Rate Boost Factor'], corruptionLevel=settingsDict['dA Corruption Level'],