def main(): n_people = 3 grouppath = "./Dropbox/group/" ngroup = dircount(grouppath) SR = speaker_recognition( n_people, os.path.join(grouppath, "group{}".format(ngroup - 1))) index = SR.transform() fname = os.path.join( grouppath, "group{}/target/aux_{}.wav".format(ngroup - 1, int(index[0]))) afps, data = cis.wavread(fname) x = np.array(data) for i in range(1, n_people): fname = os.path.join( grouppath, "group{}/target/ilrma_{}.wav".format(ngroup - 1, int(index[1]))) _, data = cis.wavread(fname) x = np.vstack([x, data.astype(float)]) fname = os.path.join(grouppath, "group{}/target/input.wav".format(ngroup - 1)) _, data = cis.wavread(fname) sepvoice = x rawvoice = np.array(data, dtype=np.float32) vfname = "./Video/group/group{}/input.avi".format(ngroup - 1) outfile = "./Video/group/group{}/output".format(ngroup - 1) recog = FacialRecog(vfname, rawvoice[:, 0], sepvoice, afps, outfile) recog.main()
def generator_from_files(self, file_path1, file_path2, chunk_size=8000): """ For testing 2-channel audio translation :param file_path1: wav file containing signal from the first microphone :param file_path2: wav file containing signal from the second microphone :param chunk_size: sequence size used for processing :return:y[0] -separated 1st channel, y[1] -2nd channel """ rate1, data1 = cis.wavread(file_path1) rate2, data2 = cis.wavread(file_path2) #rate1, data1 = cis.wavread('./data/rssd_A.wav') #rate2, data2 = cis.wavread('./data/rssd_B.wav') if rate1 != rate2: raise ValueError('Sampling_rate_Error') fs = rate1 #for ind in range(0, data1.shape[0] - chunk_size, chunk_size): for ind in range(0, data1.shape[0] - chunk_size, chunk_size): start_time = time.time() try: x = np.array( [data1[ind:ind + chunk_size], data2[ind:ind + chunk_size]], dtype=np.float32) y = AuxIVA(x, sample_freq=fs, beta=0.3, nchannel=2).auxiva() print("--- %s seconds ---" % (time.time() - start_time)) except np.linalg.LinAlgError as err: if 'Singular matrix' in str(err): # error handling block, probably continue print("Singular matrix") yield np.array([0, 1]) else: break yield y
def main(): grouppath = "./Dropbox/group" ngroup = dircount(grouppath) dirname = os.path.join(grouppath, "group{}/target".format(ngroup-1)) fs, data = cis.wavread(os.path.join(dirname, "input.wav")) x = np.array([data[:, 0], data[:, 1], data[:, 2]], dtype=np.float32) y = ILRMA(x, fs, 2, 200).ilrma() cis.wavwrite(os.path.join(dirname, "ilrma_0.wav"), fs, y[0]) cis.wavwrite(os.path.join(dirname, "ilrma_1.wav"), fs, y[1]) cis.wavwrite(os.path.join(dirname, "ilrma_2.wav"), fs, y[2])
def add_vibrato(file, fv, p=0.1): """ 与えられた音源にビブラートをかける関数 file(wav): ビブラートをかけるwavfileを指定 fv(int): 周波数がfvの正弦波の形で変動する p(int): 元の音源の周波数をa,上下させる周波数をbとした時,p=b/aとしたもの 元の音源の周波数の何倍変動させるか指定する """ y,fs = cis.wavread(file) t = np.arange(0, y.shape[0]/fs, 1/fs) g = t - p/(2*np.pi*fv)*np.cos(2*np.pi*fv*t) passing = np.argwhere(g > t[-1]) if passing.size != 0: g = g[:passing[0, 0]] mody = np.interp(g, t, y) cis.audioplay(mody, fs)
fy1 = max(0, y-h//2) fy2 = min(y+h+h//2, src.shape[0]) output = output // 3 output[fy1:fy2,fx1:fx2,:] *= 3 return output def __audio_concat(self): sft = self.nframe // (self.M+2) * self.afps // self.vfps for m in range(self.M+2): if m < 1: output = self.rawvoice[sft*m:sft*(m+1)] elif m > self.M: output = np.hstack((output, self.rawvoice[sft*m:])) else: output = np.hstack((output, self.sepvoice[m-1, sft*m:sft*(m+1)])) return output if __name__ == "__main__": rate1, data1 = cis.wavread('./samples/group/auxiva_2.wav') rate2, data2 = cis.wavread('./samples/group/auxiva_1.wav') rate3, data3 = cis.wavread('./samples/group/auxiva_0.wav') rate4, data4 = cis.wavread('./samples/group/output.wav') sepvoice = np.array([data1, data2, data3], dtype=np.float32) rawvoice = np.array(data4, dtype=np.float32) recog = FacialRecog("samples/WIN_20190515_18_14_38_Pro.mp4", rawvoice[:,0], sepvoice, rate1, "samples/aaa") recog.main()
import numpy as np import cis from AuxIVA import AuxIVA import time rate1, data1 = cis.wavread('./data/rssd_A.wav') rate2, data2 = cis.wavread('./data/rssd_B.wav') #rate3, data3 = cis.wavread('./samples/mixdata/mix3.wav') if rate1 != rate2: #or rate2 != rate3: raise ValueError('Sampling_rate_Error') fs = rate1 print(fs) print(data1.shape) start_time = time.time() x = np.array([data1[:10000], data2[:10000]], dtype=np.float32) y = AuxIVA(x, sample_freq=fs, beta=0.3, nchannel=2).auxiva() print("--- %s seconds ---" % (time.time() - start_time))
import numpy as np import sys import cis import scipy.io.wavfile as wf from IVA import IVA from sound_mixing import Preprocessing #prepare data rate1, data1 = cis.wavread('./samples/music1.wav') rate2, data2 = cis.wavread('./samples/music2.wav') rate3, data3 = cis.wavread('./samples/music3.wav') if rate1 != rate2 or rate2 != rate3: raise ValueError('Sampling_rate_Error') fs = rate1 x = np.array([data1, data2, data3], dtype=np.float32) y = IVA(x, fs).iva() cis.wavwrite('./samples/iva_1_out.wav', fs, y[0]) cis.wavwrite('./samples/iva_2_out.wav', fs, y[1]) cis.wavwrite('./samples/iva_3_out.wav', fs, y[2])