def get_samples(self, data_file): # time = self.dict_files[data_file]['time'] audio_clip = AudioFileClip(data_file) clip = audio_clip.set_fps(16000) # num_samples = int(clip.fps * time) data_frame = np.array(list(clip.subclip(0).iter_frames())) data_frame = data_frame.mean(1) chunk_size = 640 # split audio file to chuncks of 40ms audio = np.pad(data_frame, (0, chunk_size - data_frame.shape[0] % chunk_size), 'constant') audio = np.reshape(audio, (-1, chunk_size)).astype(np.float32) return audio
def _get_samples(self, data_file): time = self.dict_files[data_file]['time'] if 'audio' in self.input_type.lower(): audio_clip = AudioFileClip(str(data_file)) clip = audio_clip.set_fps(16000) num_samples = int(clip.fps * (time[1] - time[0])) elif 'video' in self.input_type.lower(): clip = VideoFileClip(str(data_file)) if self.dict_files[data_file]['labels'].shape[0] == 1: clip_list = np.reshape( np.array(list(clip.iter_frames())).mean(1), (1, -1)) return clip_list, self.dict_files[data_file]['labels'] frames = [] for i in range(len(time) - 1): start_time = time[i] end_time = time[i + 1] data_frame = np.array( list(clip.subclip(start_time, end_time).iter_frames())) if 'audio' in self.input_type.lower(): data_frame = np.squeeze(data_frame) data_frame = data_frame.mean(1)[:num_samples] frames.append(data_frame.astype(np.float32)) self.shape = data_frame.shape if i == 0: chunk_size = 640 # split audio file to chuncks of 40 ms audio = np.pad(data_frame, (0, chunk_size - data_frame.shape[0] % chunk_size), 'constant') audio = np.reshape(audio, (-1, chunk_size)).astype(np.float32) frames = [audio] return frames, self.dict_files[data_file]['labels']