def stratified_fold(data_dir, annotation_file, n_folds=10, savefile=''): modes = fileOperations.getModeNames(data_dir) [filepaths, basefolders, filenames] = fileOperations.getFileNamesInDir(data_dir, extension='.pitch') with open(annotation_file, 'r') as a: annotations = json.load(a) filemodes = [os.path.basename(b) for b in basefolders] mbids = [os.path.splitext(f)[0] for f in filenames] tonics = [] for m in mbids: for a in annotations: if a['mbid'] == m: tonics.append(a['tonic']) # get the stratified folds mode_idx = [modes.index(m) for m in filemodes] skf = cross_validation.StratifiedKFold(mode_idx, n_folds=n_folds, shuffle=True) folds = dict() for ff, fold in enumerate(skf): folds['fold' + str(ff)] = {'train': [], 'test': []} for tr_idx in fold[0]: folds['fold' + str(ff)]['train'].append({'file': filepaths[tr_idx], 'mode': filemodes[tr_idx], 'tonic': tonics[tr_idx], 'mbid': mbids[tr_idx]}) for te_idx in fold[1]: folds['fold' + str(ff)]['test'].append({'file': filepaths[te_idx], 'mode': filemodes[te_idx], 'tonic': tonics[te_idx], 'mbid': mbids[te_idx]}) # save the folds to a file if specified if savefile: with open(savefile, 'w') as f: json.dump(folds, f, indent=2) return folds
# coding: utf-8 import sys sys.path.insert(0, '../ModeTonicEstimation/') import json import os from extras import fileOperations as fo import numpy as np from ModeTonicEstimation import Chordia # I/O base_dir = '../../experiments/raga/' data_dir = os.path.join(base_dir,'data') experiments_dir = os.path.join(base_dir, 'experiments') modes = fo.getModeNames(data_dir) che = Chordia.Chordia(step_size=10, smooth_factor=15, chunk_size=0, threshold=0.5, overlap=0, frame_rate=128.0/44100) # indexing n_exp = 20 n_folds = 14 n_modes = len(modes) input_num = int(sys.argv[1])-1 [ex_idx, fo_idx, mode_idx] = np.unravel_index(input_num, [n_exp, n_folds, n_modes]) cur_mode = modes[mode_idx]