def extract_features(): audio_txt = 'audio_files.txt' model = SoundNet() model.load_weights() # Extract Feature sound_samples, audio_paths = load_from_txt(audio_txt, config=local_config) print(LEN_WAVEFORM / 6) print(model) features = {} features['feats'] = [] features['paths'] = [] model.eval() for idx, sound_sample in enumerate(sound_samples): print(audio_paths[idx]) new_sample = torch.from_numpy(sound_sample) output = model.forward(new_sample) features['feats'].append(output) features['paths'].append(audio_paths[idx]) return features
# Setup visible device os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_device # Load pre-trained model G_name = './models/sound8.npy' param_G = np.load(G_name, encoding='latin1').item() if args.phase == 'demo': # Demo sound_samples = [ np.reshape(np.load('data/demo.npy', encoding='latin1'), [1, -1, 1, 1]) ] else: # Extract Feature sound_samples = load_from_txt(args.audio_txt, config=local_config) # Make path if not os.path.exists(args.outpath): os.mkdir(args.outpath) # Init. Session sess_config = tf.ConfigProto() sess_config.allow_soft_placement = True sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as session: # Build model model = Model(session, config=local_config, param_G=param_G) init = tf.global_variables_initializer() session.run(init)
'sample_rate': 22050, 'load_size': 22050 * 20, 'name_scope': 'SoundNet_TF', 'phase': 'extract', } #load model and weights from soundnet2 import SoundNet8_pytorch model = SoundNet8_pytorch() model.load_state_dict(torch.load("sound8.pth")) #summary model from torchsummaryX import summary # summary(model, torch.zeros(1,1,22050 * 20,1)) #load data and extract feature audio_txt = os.path.abspath(os.path.join( BASE_DIR, 'mydata', "audio_files.txt")) #path of audio_files.txt sound_samples, audio_paths = load_from_txt(audio_txt, config=local_config) for idx, sound_sample in enumerate(sound_samples): print(audio_paths[idx]) new_sample = torch.from_numpy(sound_sample) output = model.forward(new_sample) #classification softmax = nn.Softmax(dim=1) id_obj = torch.max(softmax(output[0]), 1) id_scn = torch.max(softmax(output[1]), 1) print('#####objects class: %s' % torch.squeeze(id_obj[1])) print('#####places class: %s' % torch.squeeze(id_scn[1])) #average poolling avgpool_layer = nn.AvgPool2d((4, 1)) avgpool_obj = avgpool_layer(softmax(output[0])) #tensor-->ndarry a_feature = avgpool_obj.detach()
# Build model model = Model(session, config=local_config, param_G=param_G) init = tf.global_variables_initializer() session.run(init) model.load() while position < n_files: if args.phase == 'demo': # Demo sound_samples = [ np.reshape(np.load('data/demo.npy', encoding='latin1'), [1, -1, 1, 1]) ] else: # Extract Feature sound_samples = load_from_txt(args.audio_txt, position, config=local_config, batch_size=batch_size) # Make path if not os.path.exists(args.outpath): os.mkdir(args.outpath) for idx in range(len(sound_samples)): sound_sample = sound_samples[idx] output = extract_feat(model, sound_sample, args, filename=all_files[position + idx]) position += batch_size