class AudioDataset(Dataset): def __init__(self, config, root_dir='/data/music/chord_recognition', dataset_names=('ce200', ), featuretype=FeatureTypes.cqt, num_workers=20, train=False, preprocessing=False, resize=None, kfold=4): super(AudioDataset, self).__init__() self.config = config self.root_dir = root_dir self.dataset_names = dataset_names self.preprocessor = Preprocess(config, featuretype, dataset_names, self.root_dir) self.resize = resize self.train = train self.ratio = config.experiment['data_ratio'] # preprocessing hyperparameters # song_hz, n_bins, bins_per_octave, hop_length mp3_config = config.mp3 feature_config = config.feature self.mp3_string = "%d_%.1f_%.1f" % \ (mp3_config['song_hz'], mp3_config['inst_len'], mp3_config['skip_interval']) self.feature_string = "%s_%d_%d_%d" % \ (featuretype.value, feature_config['n_bins'], feature_config['bins_per_octave'], feature_config['hop_length']) if feature_config['large_voca'] == True: # store paths if exists is_preprocessed = True if os.path.exists( os.path.join(root_dir, 'result', dataset_names[0] + '_voca', self.mp3_string, self.feature_string)) else False if (not is_preprocessed) | preprocessing: midi_paths = self.preprocessor.get_all_files() print(' --------- need preprocessed -----------') if num_workers > 1: num_path_per_process = math.ceil( len(midi_paths) / num_workers) args = [ midi_paths[i * num_path_per_process:(i + 1) * num_path_per_process] for i in range(num_workers) ] # start process p = Pool(processes=num_workers) p.map(self.preprocessor.generate_labels_features_voca, args) p.close() else: self.preprocessor.generate_labels_features_voca(midi_paths) # kfold is 5 fold index ( 0, 1, 2, 3, 4 ) self.song_names, self.paths = self.get_paths_voca(kfold=kfold) else: # store paths if exists is_preprocessed = True if os.path.exists( os.path.join(root_dir, 'result', dataset_names[0], self.mp3_string, self.feature_string)) else False if (not is_preprocessed) | preprocessing: midi_paths = self.preprocessor.get_all_files() if num_workers > 1: num_path_per_process = math.ceil( len(midi_paths) / num_workers) args = [ midi_paths[i * num_path_per_process:(i + 1) * num_path_per_process] for i in range(num_workers) ] # start process p = Pool(processes=num_workers) p.map(self.preprocessor.generate_labels_features_new, args) p.close() else: self.preprocessor.generate_labels_features_new(midi_paths) # kfold is 5 fold index ( 0, 1, 2, 3, 4 ) self.song_names, self.paths = self.get_paths(kfold=kfold) def __len__(self): return len(self.paths) def __getitem__(self, idx): instance_path = self.paths[idx] res = dict() data = torch.load(instance_path) res['feature'] = np.log(np.abs(data['feature']) + 1e-6) res['chord'] = data['chord'] return res def get_paths(self, kfold=4): temp = {} used_song_names = list() for name in self.dataset_names: dataset_path = os.path.join(self.root_dir, "result", name, self.mp3_string, self.feature_string) song_names = os.listdir(dataset_path) for song_name in song_names: paths = [] instance_names = os.listdir( os.path.join(dataset_path, song_name)) if len(instance_names) > 0: used_song_names.append(song_name) for instance_name in instance_names: paths.append( os.path.join(dataset_path, song_name, instance_name)) temp[song_name] = paths # throw away unused song names song_names = used_song_names song_names = SortedList(song_names) print('Total used song length : %d' % len(song_names)) tmp = [] for i in range(len(song_names)): tmp += temp[song_names[i]] print('Total instances (train and valid) : %d' % len(tmp)) # divide train/valid dataset using k fold result = [] total_fold = 5 quotient = len(song_names) // total_fold remainder = len(song_names) % total_fold fold_num = [0] for i in range(total_fold): fold_num.append(quotient) for i in range(remainder): fold_num[i + 1] += 1 for i in range(total_fold): fold_num[i + 1] += fold_num[i] if self.train: tmp = [] # get not augmented data for k in range(total_fold): if k != kfold: for i in range(fold_num[k], fold_num[k + 1]): result += temp[song_names[i]] tmp += song_names[fold_num[k]:fold_num[k + 1]] song_names = tmp else: for i in range(fold_num[kfold], fold_num[kfold + 1]): instances = temp[song_names[i]] instances = [inst for inst in instances if "1.00_0" in inst] result += instances song_names = song_names[fold_num[kfold]:fold_num[kfold + 1]] return song_names, result def get_paths_voca(self, kfold=4): temp = {} used_song_names = list() for name in self.dataset_names: dataset_path = os.path.join(self.root_dir, "result", name + '_voca', self.mp3_string, self.feature_string) song_names = os.listdir(dataset_path) for song_name in song_names: paths = [] instance_names = os.listdir( os.path.join(dataset_path, song_name)) if len(instance_names) > 0: used_song_names.append(song_name) for instance_name in instance_names: paths.append( os.path.join(dataset_path, song_name, instance_name)) temp[song_name] = paths # throw away unused song names song_names = used_song_names song_names = SortedList(song_names) print('Total used song length : %d' % len(song_names)) tmp = [] for i in range(len(song_names)): tmp += temp[song_names[i]] print('Total instances (train and valid) : %d' % len(tmp)) # divide train/valid dataset using k fold result = [] total_fold = 5 quotient = len(song_names) // total_fold remainder = len(song_names) % total_fold fold_num = [0] for i in range(total_fold): fold_num.append(quotient) for i in range(remainder): fold_num[i + 1] += 1 for i in range(total_fold): fold_num[i + 1] += fold_num[i] if self.train: tmp = [] # get not augmented data for k in range(total_fold): if k != kfold: for i in range(fold_num[k], fold_num[k + 1]): result += temp[song_names[i]] tmp += song_names[fold_num[k]:fold_num[k + 1]] song_names = tmp else: for i in range(fold_num[kfold], fold_num[kfold + 1]): instances = temp[song_names[i]] instances = [inst for inst in instances if "1.00_0" in inst] result += instances song_names = song_names[fold_num[kfold]:fold_num[kfold + 1]] return song_names, result
class AudioDataset(Dataset): def __init__(self, config, root_dir='../dataset', dataset_names=('isophonic', ), featuretype=FeatureTypes.cqt, num_workers=20, train=False, from_json=False, preprocessing=False, resize=None, kfold=4): super(AudioDataset, self).__init__() self.config = config self.root_dir = root_dir self.dataset_names = dataset_names self.preprocessor = Preprocess(config, featuretype, dataset_names, self.root_dir) self.resize = resize self.train = train self.ratio = config.experiment['data_ratio'] self.train_from_json = from_json # preprocessing hyperparameters # song_hz, n_bins, bins_per_octave, hop_length mp3_config = config.mp3 feature_config = config.feature self.mp3_string = "%d_%.1f_%.1f" % \ (mp3_config['song_hz'], mp3_config['inst_len'], mp3_config['skip_interval']) self.feature_string = "%s_%d_%d_%d" % \ (featuretype.value, feature_config['n_bins'], feature_config['bins_per_octave'], feature_config['hop_length']) if feature_config['large_voca'] == True: if self.train_from_json: is_preprocessed = True if os.path.exists( os.path.join( root_dir, 'dataset', 'CE200_separated_qualified_{inst_len}'.format( inst_len=mp3_config['inst_len']))) else False if (not is_preprocessed) | preprocessing: features_and_labels = self.preprocessor.get_all_json_files( ) self.preprocessor.get_separated_labels_qualified( features_and_labels) # get all features from json files self.song_names, self.paths = self.get_CE200_pt_path( kfold=kfold) else: # store paths if exists is_preprocessed = True if os.path.exists( os.path.join(root_dir, 'result', dataset_names[0] + '_voca', self.mp3_string, self.feature_string)) else False if (not is_preprocessed) | preprocessing: midi_paths = self.preprocessor.get_all_files() if num_workers > 1: num_path_per_process = math.ceil( len(midi_paths) / num_workers) args = [ midi_paths[i * num_path_per_process:(i + 1) * num_path_per_process] for i in range(num_workers) ] # start process p = Pool(processes=num_workers) p.map(self.preprocessor.generate_labels_features_voca, args) p.close() else: self.preprocessor.generate_labels_features_voca( midi_paths) # kfold is 5 fold index ( 0, 1, 2, 3, 4 ) self.song_names, self.paths = self.get_paths_voca(kfold=kfold) else: # store paths if exists is_preprocessed = True if os.path.exists( os.path.join(root_dir, 'result', dataset_names[0], self.mp3_string, self.feature_string)) else False if (not is_preprocessed) | preprocessing: midi_paths = self.preprocessor.get_all_files() if num_workers > 1: num_path_per_process = math.ceil( len(midi_paths) / num_workers) args = [ midi_paths[i * num_path_per_process:(i + 1) * num_path_per_process] for i in range(num_workers) ] # start process p = Pool(processes=num_workers) p.map(self.preprocessor.generate_labels_features_new, args) p.close() else: self.preprocessor.generate_labels_features_new(midi_paths) # kfold is 5 fold index ( 0, 1, 2, 3, 4 ) self.song_names, self.paths = self.get_paths(kfold=kfold) def __len__(self): return len(self.paths) def __getitem__(self, idx): if self.train_from_json: instance_path = self.paths[idx] res = dict() with open(instance_path, 'rb') as pt_file: try: data = torch.load(pt_file) except EOFError: print('file {file} might be empty'.format(file=pt_file)) raise RuntimeError res['feature'] = np.log(np.abs(data['feature']) + 1e-6) res['chord'] = data[ 'chord'] # dimension = ceil{hop_length/(hop_length/song_hz)} = 431 return res else: # return feature and chord from a single .pt file => 10 sec instance_path = self.paths[idx] res = dict() data = torch.load(instance_path) res['feature'] = np.log(np.abs(data['feature']) + 1e-6) res['chord'] = data[ 'chord'] # dimension = ceil{hop_length/(hop_length/song_hz)} = 431 return res def get_paths(self, kfold=4): temp = {} used_song_names = list() for name in self.dataset_names: dataset_path = os.path.join(self.root_dir, "result", name, self.mp3_string, self.feature_string) song_names = os.listdir(dataset_path) for song_name in song_names: paths = [] instance_names = os.listdir( os.path.join(dataset_path, song_name)) if len(instance_names) > 0: used_song_names.append(song_name) for instance_name in instance_names: paths.append( os.path.join(dataset_path, song_name, instance_name)) temp[song_name] = paths # throw away unused song names song_names = used_song_names song_names = SortedList(song_names) print('Total used song length : %d' % len(song_names)) tmp = [] for i in range(len(song_names)): tmp += temp[song_names[i]] print('Total instances (train and valid) : %d' % len(tmp)) # divide train/valid dataset using k fold result = [] total_fold = 5 quotient = len(song_names) // total_fold remainder = len(song_names) % total_fold fold_num = [0] for i in range(total_fold): fold_num.append(quotient) for i in range(remainder): fold_num[i + 1] += 1 for i in range(total_fold): fold_num[i + 1] += fold_num[i] if self.train: tmp = [] # get not augmented data for k in range(total_fold): if k != kfold: for i in range(fold_num[k], fold_num[k + 1]): result += temp[song_names[i]] tmp += song_names[fold_num[k]:fold_num[k + 1]] song_names = tmp else: for i in range(fold_num[kfold], fold_num[kfold + 1]): instances = temp[song_names[i]] instances = [inst for inst in instances if "1.00_0" in inst] result += instances song_names = song_names[fold_num[kfold]:fold_num[kfold + 1]] return song_names, result def get_paths_voca(self, kfold=4): temp = {} used_song_names = list() for name in self.dataset_names: dataset_path = os.path.join(self.root_dir, "result", name + '_voca', self.mp3_string, self.feature_string) song_names = os.listdir(dataset_path) for song_name in song_names: paths = [] # HINT: instance_name => shifted or streched model instance_names = os.listdir( os.path.join(dataset_path, song_name)) if len(instance_names) > 0: used_song_names.append(song_name) for instance_name in instance_names: paths.append( os.path.join(dataset_path, song_name, instance_name)) temp[song_name] = paths # throw away unused song names # song_names = used_song_names song_names = list(map(int, song_names)) song_names = SortedList(song_names) song_names = list(map(str, song_names)) print('Total used song length : %d' % len(song_names)) tmp = [] for i in range(len(song_names)): tmp += temp[song_names[i]] print('Total instances (train and valid) : %d' % len(tmp)) # divide train/valid dataset using k fold result = [] total_fold = 5 quotient = len(song_names) // total_fold remainder = len(song_names) % total_fold fold_num = [0] for i in range(total_fold): fold_num.append(quotient) for i in range(remainder): fold_num[i + 1] += 1 for i in range(total_fold): fold_num[i + 1] += fold_num[i] if self.train: tmp = [] # get not augmented data for k in range(total_fold): if k != kfold: for i in range(fold_num[k], fold_num[k + 1]): result += temp[song_names[i]] tmp += song_names[fold_num[k]:fold_num[k + 1]] song_names = tmp else: for i in range(fold_num[kfold], fold_num[kfold + 1]): instances = temp[song_names[i]] instances = [inst for inst in instances if "1.00_0" in inst] result += instances song_names = song_names[fold_num[kfold]:fold_num[kfold + 1]] return song_names, result def get_CE200_pt_path(self, kfold=4): temp = {} used_song_names = list() for name in self.dataset_names: dataset_path = os.path.join(self.root_dir, "dataset", name + '_separated_qualified_10.0', self.mp3_string, self.feature_string) song_names = os.listdir(dataset_path) for song_name in song_names: paths = [] # HINT: instance_name => shifted or streched model instance_names = os.listdir( os.path.join(dataset_path, song_name)) if len(instance_names) > 0: used_song_names.append(song_name) for instance_name in instance_names: paths.append( os.path.join(dataset_path, song_name, instance_name)) temp[song_name] = paths # throw away unused song names song_names = used_song_names song_names = SortedList(song_names) # divide train/valid dataset using k fold result = [] total_fold = 5 quotient = len(song_names) // total_fold remainder = len(song_names) % total_fold fold_num = [0] for i in range(total_fold): fold_num.append(quotient) for i in range(remainder): fold_num[i + 1] += 1 for i in range(total_fold): fold_num[i + 1] += fold_num[i] if self.train: tmp = [] # get not augmented data for k in range(total_fold): if k != kfold: for i in range(fold_num[k], fold_num[k + 1]): result += temp[song_names[i]] tmp += song_names[fold_num[k]:fold_num[k + 1]] song_names = tmp print('Train: number of songs: %d' % len(song_names)) print('Train: number of instances : %d' % len(result)) else: for i in range(fold_num[kfold], fold_num[kfold + 1]): instances = temp[song_names[i]] instances = [inst for inst in instances] # if "1.00_0" in inst] result += instances song_names = song_names[fold_num[kfold]:fold_num[kfold + 1]] print('Validation: number of songs: %d' % len(song_names)) print('Validation: number of instances : %d' % len(result)) return song_names, result