Exemplo n.º 1
0
def wav_loader(path):
    #print(path)
    npys = np.zeros((300, 64))
    temp = audio_processing.mk_MFB(path)
    l = temp.shape[0]
    if l <= 300:
        npys[:l, :] = temp
        npys[l:, :] = temp[:300 - l, :]
    else:
        randint = np.random.randint(l - 300)
        npys = temp[randint:randint + 300, :]
    mu = np.average(npys)
    sigma = np.std(npys)
    npys = (npys - mu) / max(sigma, 0.001)
    #npys = (npys - np.min(npys))/(np.max(npys)-np.min(npys))
    return npys
Exemplo n.º 2
0
    def forward(self, anchor, positive, negative):
        d_p = self.pdist.forward(anchor, positive)
        d_n = self.pdist.forward(anchor, negative)

        dist_hinge = torch.clamp(self.margin + d_p - d_n, min=0.0)
        loss = torch.mean(dist_hinge)
        return loss

kwargs = {'num_workers': 0, 'pin_memory': True} if args.cuda else {}
l2_dist = PairwiseDistance(2)


voxceleb = read_voxceleb_structure(args.dataroot)
if args.makemfb:
    for datum in voxceleb.iterrows():
        mk_MFB((args.dataroot +'/voxceleb1_wav/' + datum[1]['filename']+'.wav'))

if args.mfb:
    transform = transforms.Compose([
        truncatedinputfromMFB(),
        totensor()
    ])
    file_loader = read_MFB
else:
    transform = transforms.Compose([
                        truncatedinput(),
                        toMFB(),
                        totensor(),
                        #tonormal()
                    ])
    file_loader = read_audio