예제 #1
0
def extract_features():
	audio_txt = 'audio_files.txt'
	
	model = SoundNet()
	model.load_weights()
	
	# Extract Feature
	sound_samples, audio_paths = load_from_txt(audio_txt, config=local_config)
	
	print(LEN_WAVEFORM / 6)
	print(model)
	features = {}
	features['feats'] = []
	features['paths'] = []
	model.eval()
	for idx, sound_sample in enumerate(sound_samples):
		print(audio_paths[idx])
		new_sample = torch.from_numpy(sound_sample)
		output = model.forward(new_sample)
		features['feats'].append(output)
		features['paths'].append(audio_paths[idx])
	return features
예제 #2
0
    # Setup visible device
    os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_device

    # Load pre-trained model
    G_name = './models/sound8.npy'
    param_G = np.load(G_name, encoding='latin1').item()

    if args.phase == 'demo':
        # Demo
        sound_samples = [
            np.reshape(np.load('data/demo.npy', encoding='latin1'),
                       [1, -1, 1, 1])
        ]
    else:
        # Extract Feature
        sound_samples = load_from_txt(args.audio_txt, config=local_config)

    # Make path
    if not os.path.exists(args.outpath):
        os.mkdir(args.outpath)

    # Init. Session
    sess_config = tf.ConfigProto()
    sess_config.allow_soft_placement = True
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as session:
        # Build model
        model = Model(session, config=local_config, param_G=param_G)
        init = tf.global_variables_initializer()
        session.run(init)
    'sample_rate': 22050,
    'load_size': 22050 * 20,
    'name_scope': 'SoundNet_TF',
    'phase': 'extract',
}
#load model and weights
from soundnet2 import SoundNet8_pytorch
model = SoundNet8_pytorch()
model.load_state_dict(torch.load("sound8.pth"))
#summary model
from torchsummaryX import summary
# summary(model, torch.zeros(1,1,22050 * 20,1))
#load data and extract feature
audio_txt = os.path.abspath(os.path.join(
    BASE_DIR, 'mydata', "audio_files.txt"))  #path of audio_files.txt
sound_samples, audio_paths = load_from_txt(audio_txt, config=local_config)
for idx, sound_sample in enumerate(sound_samples):
    print(audio_paths[idx])
    new_sample = torch.from_numpy(sound_sample)
    output = model.forward(new_sample)
    #classification
    softmax = nn.Softmax(dim=1)
    id_obj = torch.max(softmax(output[0]), 1)
    id_scn = torch.max(softmax(output[1]), 1)
    print('#####objects class: %s' % torch.squeeze(id_obj[1]))
    print('#####places class: %s' % torch.squeeze(id_scn[1]))
    #average poolling
    avgpool_layer = nn.AvgPool2d((4, 1))
    avgpool_obj = avgpool_layer(softmax(output[0]))
    #tensor-->ndarry
    a_feature = avgpool_obj.detach()
        # Build model
        model = Model(session, config=local_config, param_G=param_G)
        init = tf.global_variables_initializer()
        session.run(init)
        model.load()

        while position < n_files:
            if args.phase == 'demo':
                # Demo
                sound_samples = [
                    np.reshape(np.load('data/demo.npy', encoding='latin1'),
                               [1, -1, 1, 1])
                ]
            else:
                # Extract Feature
                sound_samples = load_from_txt(args.audio_txt,
                                              position,
                                              config=local_config,
                                              batch_size=batch_size)

            # Make path
            if not os.path.exists(args.outpath):
                os.mkdir(args.outpath)

            for idx in range(len(sound_samples)):
                sound_sample = sound_samples[idx]
                output = extract_feat(model,
                                      sound_sample,
                                      args,
                                      filename=all_files[position + idx])
            position += batch_size