Exemplo n.º 1
0
def writeWaveAtPitch(noteOffset, samplePath):
    fileName = samplePath + '/' + str(noteOffset + (12 * 6)) + ".wav"
    factor = 2.0**(1.0 * noteOffset / 12.0)

    snd = AudioSegment(data=pitchShift(getWave(samples, x['a'], x['b']),
                                       factor),
                       sample_width=1,
                       frame_rate=44100,
                       channels=1)

    snd = snd.set_channels(2)
    snd = snd.set_sample_width(2)
    snd.export(fileName, format="wav")

    # filthy hack for looping :(
    thing = wavfile.read(fileName)

    wavfile.write(fileName,
                  thing[0],
                  thing[1],
                  loops=[{
                      'cuepointid': 0,
                      'datatype': 0,
                      'start': 0,
                      'end': len(thing[1]),
                      'fraction': 0,
                      'playcount': 0
                  }])
Exemplo n.º 2
0
def parse_bin(input_filename, output_filename):
    with open(input_filename, "rb") as f:
        data = f.read()

    if data[0:4].decode('ascii') != "BMP\0":
        print("Not a BMP audio file")
        exit(1)

    data_size, loop_start, loop_end = struct.unpack(">III", data[0x04:0x10])
    channels, bits = struct.unpack("<HH", data[0x10:0x14])
    rate, = struct.unpack(">I", data[0x14:0x18])

    is_looped = True if loop_start > 0 or loop_end > 0 else False

    if is_looped:
        loops = [(loop_start, loop_end)]
        print("Found loop offsets: start = %d, end = %d" %
              (loop_start, loop_end))

        # foobar2000 plugin (rename .wav to .wavloop): http://slemanique.com/software/foo_input_wave_loop.html
        print(
            "Loop information will be stored in a SMPL chunk for playback in players that have support for SMPL loops"
        )
    else:
        loops = None

    data = bytearray(data[0x20:])
    output = adpcmwave.decode_data(data, rate, channels, bits)
    output = numpy.ndarray((int(len(output) // 2 // channels), channels),
                           numpy.int16, output, 0)
    wavfile.write(output_filename, rate, output, loops=loops)
Exemplo n.º 3
0
def writeWaveAtPitch(snd, noteOffset, samplePath):
    fileName = samplePath + '/' + str(noteOffset + (12 * 6)) + ".wav"
    factor = 2.0**(1.0 * noteOffset / 12.0)
    new_sample_rate = (int(snd.frame_rate * factor) // 2) * 2

    shifted_sound = snd._spawn(snd.raw_data,
                               overrides={'frame_rate': new_sample_rate})
    shifted_sound = shifted_sound.set_frame_rate(44100)
    shifted_sound = shifted_sound.set_channels(2)
    shifted_sound = shifted_sound.set_sample_width(2)
    shifted_sound.export(fileName, format="wav")

    # filthy hack for looping :(
    thing = wavfile.read(fileName)

    wavfile.write(fileName,
                  thing[0],
                  thing[1],
                  loops=[{
                      'cuepointid': 0,
                      'datatype': 0,
                      'start': 0,
                      'end': len(thing[1]),
                      'fraction': 0,
                      'playcount': 0
                  }])
Exemplo n.º 4
0
def freqManip():
    # length of data to read.
    chunk = 1199520
    # open the file for reading.
    nh = wave.open("GameSong.wav")
    nwidth = nh.getsampwidth()

    # create an audio object
    p = pyaudio.PyAudio()

    # open stream based on the wave object which has been input.
    stream = p.open(format =
                    p.get_format_from_width(nh.getsampwidth()),
                    channels = nh.getnchannels(),
                    rate = nh.getframerate(),
                    output = True)

    data = nh.readframes(chunk)

    #This is the loop that manipulates audio:
    while(True):
        #print("Length of data:", len(data))
        #print("Width: ", nwidth)
        #print("l(d)/w: ", (len(data)/nwidth))
        #print("(l(d)/w)*2: ", (len(data)/nwidth)*2)
        data = np.array(wave.struct.unpack("%dh"%(len(data)/nwidth), data))*2
        #print data
        data = np.fft.fft(data)
        #print data

        dcnt = 0
        for d in data:
            #data[dcnt] = (d.real**2 + d.imag**2)
            data[dcnt] = (d.real+d.imag)
            #data[dcnt] = (data[dcnt])**0.5
            #data[dcnt] += 2
            dcnt += 1

        if (np.iscomplexobj(d)):
            d = d + 0j

        data = np.fft.ifft(data)

        dataout = np.array(data.real, dtype='int16')
        chunkout = struct.pack("%dh"%(len(dataout.real)), *list(dataout.real))
        #%dh = see https://docs.python.org/2/library/stdtypes.html#string-formatting-operations under String Formatting Operations
        # writing to the stream is what *actually* plays the sound.
        #!!!!!!!!!!!---------------------------!!!!!!!!!!!!!!!
        #try this later:
        #data = nh.readframes(chunkout)
        wavfile.write("realPlusImag.wav", nh.getframerate()*2, dataout)
        stream.stop_stream()
        stream.close()
Exemplo n.º 5
0
def convolve(args):
    sample_res = wavfile.read(args['input'], normalized=True, forcestereo=True)
    impulse_res = wavfile.read(args['impulse'],
                               normalized=True,
                               forcestereo=True)

    debug = args['debug']
    stereo = args['channels'] == 'stereo'

    if debug:
        logger.debug('sample data: \n{data}', data=sample_res[1])
        logger.debug('impulse_res: \n{data}', data=impulse_res[1])

    sr = sample_res[1]
    ir = impulse_res[1]

    if debug:
        logger.debug('sample data as float: \n{data}', data=sr)

    if args['output'] == 'convolve':
        # use numpy convolve
        logger.info('Using numpy.convolve')
        out_0 = numpy.convolve(sr[:, 0], ir[:, 0])
        if stereo:
            out_1 = numpy.convolve(sr[:, 1], ir[:, 1])
    else:
        # use scipy fftconvolve
        logger.info('Using scipy.signal.fftconvolve')
        out_0 = signal.fftconvolve(sr[:, 0], ir[:, 0])
        if stereo:
            out_1 = signal.fftconvolve(sr[:, 1], ir[:, 1])

    if stereo:
        # merge channels
        out = numpy.vstack((out_0, out_1)).T
    else:
        out = out_0.T

    # save output
    wavfile.write(args['output'], sample_res[0], out, normalized=True)

    if args['play']:
        playsound(args['output'])
Exemplo n.º 6
0
def fastSong():
    chunk = 599760
    # open the file for reading.
    nh = wave.open("GameSong.wav")
    nwidth = nh.getsampwidth()

    # create an audio object
    p = pyaudio.PyAudio()

    # open stream based on the wave object which has been input.
    stream = p.open(format =
                    p.get_format_from_width(nh.getsampwidth()),
                    channels = nh.getnchannels(),
                    rate = nh.getframerate(),
                    output = True)

    data = nh.readframes(chunk)
    data = np.array(wave.struct.unpack("%dh"%(len(data)/nwidth), data))*2
    data = np.array(data, dtype='int16')
    wavfile.write("fast.wav", nh.getframerate()*4, data)
    stream.close()
Exemplo n.º 7
0
    bank = logfbank(signal[:rate], rate, nfilt=26, nfft=1103).T
    fbank[c] = bank
    mel = mfcc(signal[:rate], rate, numcep=13, nfilt=26, nfft=1103).T
    mfccs[c] = mel

# plot_signals(signals)
# plt.show()

# plot_fft(fft)
# plt.show()

# plot_fbank(fbank)
# plt.show()

# plot_mfccs(mfccs)
# plt.show()

if not os.path.exists('clean'):
    os.makedirs('clean')
if len(os.listdir('clean')) == 0:
    for f in tqdm(df.fname):
        try:
            wavdir = find_wavdir(f)
            # signal, rate = librosa.load(wavdir, sr=22050)
            _, signal, _ = wavfile.read(wavdir)
            rate = 22050
            # mask = envelope(signal, rate, 0.0005)
            wavfile.write('clean/' + f, rate, signal)
        except:
            logging.exception("AudioWavWritingError")
Exemplo n.º 8
0
    def train(self, config, devices):
        """ Train the SEAE """

        print('Initializing optimizer...')
        # init optimizer
        g_opt = self.g_opt
        num_devices = len(devices)

        try:
            init = tf.global_variables_initializer()
        except AttributeError:
            # fall back to old implementation
            init = tf.initialize_all_variables()

        print('Initializing variables...')
        self.sess.run(init)
        self.saver = tf.train.Saver()
        self.g_sum = tf.summary.merge([
            self.g_loss_sum, self.gen_summ, self.rl_audio_summ,
            self.real_w_summ, self.gen_audio_summ
        ])

        if not os.path.exists(os.path.join(config.save_path, 'train')):
            os.makedirs(os.path.join(config.save_path, 'train'))

        self.writer = tf.summary.FileWriter(
            os.path.join(config.save_path, 'train'), self.sess.graph)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        print('Sampling some wavs to store sample references...')
        # Hang onto a copy of wavs so we can feed the same one every time
        # we store samples to disk for hearing
        # pick a single batch
        sample_noisy, \
        sample_wav = self.sess.run([self.gtruth_noisy[0],
                                    self.gtruth_wavs[0]])
        print('sample noisy shape: ', sample_noisy.shape)
        print('sample wav shape: ', sample_wav.shape)
        save_path = config.save_path
        counter = 0
        # count number of samples
        num_examples = 0
        for record in tf.python_io.tf_record_iterator(self.e2e_dataset):
            num_examples += 1
        print('total examples in TFRecords {}: {}'.format(
            self.e2e_dataset, num_examples))
        # last samples (those not filling a complete batch) are discarded
        num_batches = num_examples / self.batch_size

        print('Batches per epoch: ', num_batches)

        if self.load(self.save_path):
            print('[*] Load SUCCESS')
        else:
            print('[!] Load failed')
        batch_idx = 0
        curr_epoch = 0
        batch_timings = []
        g_losses = []
        try:
            while not coord.should_stop():
                start = timeit.default_timer()
                if counter % config.save_freq == 0:
                    # now G iterations
                    _g_opt, _g_sum, \
                    g_loss = self.sess.run([g_opt, self.g_sum,
                                            self.g_losses[0]])
                else:
                    _g_opt, \
                    g_loss = self.sess.run([g_opt, self.g_losses[0]])

                end = timeit.default_timer()
                batch_timings.append(end - start)
                g_losses.append(g_loss)
                print('{}/{} (epoch {}), g_loss = {:.5f},'
                      ' time/batch = {:.5f}, '
                      'mtime/batch = {:.5f}'.format(counter,
                                                    config.epoch * num_batches,
                                                    curr_epoch, g_loss,
                                                    end - start,
                                                    np.mean(batch_timings)))
                batch_idx += num_devices
                counter += num_devices
                if (counter / num_devices) % config.save_freq == 0:
                    self.save(config.save_path, counter)
                    self.writer.add_summary(_g_sum, counter)
                    fdict = {self.gtruth_noisy[0]: sample_noisy}
                    canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict)
                    swaves = sample_wav
                    sample_dif = sample_wav - sample_noisy
                    for m in range(min(20, canvas_w.shape[0])):
                        print('w{} max: {} min: {}'.format(
                            m, np.max(canvas_w[m]), np.min(canvas_w[m])))
                        wavfile.write(
                            os.path.join(save_path,
                                         'sample_{}-{}.wav'.format(counter,
                                                                   m)), 16e3,
                            canvas_w[m])
                        if not os.path.exists(
                                os.path.join(save_path,
                                             'gtruth_{}.wav'.format(m))):
                            wavfile.write(
                                os.path.join(save_path,
                                             'gtruth_{}.wav'.format(m)), 16e3,
                                swaves[m])
                            wavfile.write(
                                os.path.join(save_path,
                                             'noisy_{}.wav'.format(m)), 16e3,
                                sample_noisy[m])
                            wavfile.write(
                                os.path.join(save_path,
                                             'dif_{}.wav'.format(m)), 16e3,
                                sample_dif[m])
                        np.savetxt(os.path.join(save_path, 'g_losses.txt'),
                                   g_losses)

                if batch_idx >= num_batches:
                    curr_epoch += 1
                    # re-set batch idx
                    batch_idx = 0
                if curr_epoch >= config.epoch:
                    # done training
                    print('Done training; epoch limit {} '
                          'reached.'.format(self.epoch))
                    print('Saving last model at iteration {}'.format(counter))
                    self.save(config.save_path, counter)
                    self.writer.add_summary(_g_sum, counter)
                    break
        except tf.errors.OutOfRangeError:
            print('[!] Reached queues limits in training loop')
        finally:
            coord.request_stop()
        coord.join(threads)
Exemplo n.º 9
0
    def train(self, config, devices):
        """ Train the SEGAN """

        print('Initializing optimizers...')
        # init optimizers
        d_opt = self.d_opt
        g_opt = self.g_opt
        num_devices = len(devices)

        try:
            init = tf.global_variables_initializer()
        except AttributeError:
            # fall back to old implementation
            init = tf.initialize_all_variables()

        print('Initializing variables...')
        self.sess.run(init)
        g_summs = [
            self.d_fk_sum,
            # self.d_nfk_sum,
            self.d_fk_loss_sum,
            # self.d_nfk_loss_sum,
            self.g_loss_sum,
            self.g_loss_l1_sum,
            self.g_loss_adv_sum,
            self.gen_summ,
            self.gen_audio_summ
        ]
        # if we have prelus, add them to summary
        if hasattr(self, 'alpha_summ'):
            g_summs += self.alpha_summ
        self.g_sum = tf.summary.merge(g_summs)
        self.d_sum = tf.summary.merge([
            self.d_loss_sum, self.d_rl_sum, self.d_rl_loss_sum,
            self.rl_audio_summ, self.real_w_summ, self.disc_noise_std_summ
        ])

        if not os.path.exists(os.path.join(config.save_path, 'train')):
            os.makedirs(os.path.join(config.save_path, 'train'))

        self.writer = tf.summary.FileWriter(
            os.path.join(config.save_path, 'train'), self.sess.graph)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        print('Sampling some wavs to store sample references...')
        # Hang onto a copy of wavs so we can feed the same one every time
        # we store samples to disk for hearing
        # pick a single batch
        sample_noisy, sample_wav, \
        sample_z = self.sess.run([self.gtruth_noisy[0],
                                  self.gtruth_wavs[0],
                                  self.zs[0]])
        print('sample noisy shape: ', sample_noisy.shape)
        print('sample wav shape: ', sample_wav.shape)
        print('sample z shape: ', sample_z.shape)

        save_path = config.save_path
        counter = 0
        # count number of samples
        num_examples = 0
        for record in tf.python_io.tf_record_iterator(self.e2e_dataset):
            num_examples += 1
        print('total examples in TFRecords {}: {}'.format(
            self.e2e_dataset, num_examples))
        # last samples (those not filling a complete batch) are discarded
        num_batches = num_examples / self.batch_size

        print('Batches per epoch: ', num_batches)

        if self.load(self.save_path):
            print('[*] Load SUCCESS')
        else:
            print('[!] Load failed')
        batch_idx = 0
        curr_epoch = 0
        batch_timings = []
        d_fk_losses = []
        # d_nfk_losses = []
        d_rl_losses = []
        g_adv_losses = []
        g_l1_losses = []
        try:
            while not coord.should_stop():
                start = timeit.default_timer()
                if counter % config.save_freq == 0:
                    for d_iter in range(self.disc_updates):
                        _d_opt, _d_sum, d_fk_loss, d_rl_loss = self.sess.run([
                            d_opt, self.d_sum, self.d_fk_losses[0],
                            self.d_rl_losses[0]
                        ])
                        if self.d_clip_weights:
                            self.sess.run(self.d_clip)
                        # d_nfk_loss, \

                    # now G iterations
                    _g_opt, _g_sum, g_adv_loss, g_l1_loss = self.sess.run([
                        g_opt, self.g_sum, self.g_adv_losses[0],
                        self.g_l1_losses[0]
                    ])
                else:
                    for d_iter in range(self.disc_updates):
                        _d_opt, \
                        d_fk_loss, \
                        d_rl_loss = self.sess.run([d_opt,
                                                   self.d_fk_losses[0],
                                                   # self.d_nfk_losses[0],
                                                   self.d_rl_losses[0]])
                        # d_nfk_loss, \
                        if self.d_clip_weights:
                            self.sess.run(self.d_clip)

                    _g_opt, \
                    g_adv_loss, \
                    g_l1_loss = self.sess.run([g_opt, self.g_adv_losses[0],
                                               self.g_l1_losses[0]])
                end = timeit.default_timer()
                batch_timings.append(end - start)
                d_fk_losses.append(d_fk_loss)
                # d_nfk_losses.append(d_nfk_loss)
                d_rl_losses.append(d_rl_loss)
                g_adv_losses.append(g_adv_loss)
                g_l1_losses.append(g_l1_loss)
                print('{}/{} (epoch {}), d_rl_loss = {:.5f}, '
                      'd_fk_loss = {:.5f}, '
                      'g_adv_loss = {:.5f}, g_l1_loss = {:.5f},'
                      ' time/batch = {:.5f}, '
                      'mtime/batch = {:.5f}'.format(counter,
                                                    config.epoch * num_batches,
                                                    curr_epoch, d_rl_loss,
                                                    d_fk_loss, g_adv_loss,
                                                    g_l1_loss, end - start,
                                                    np.mean(batch_timings)))
                batch_idx += num_devices
                counter += num_devices
                print("counter:", counter, " num_devices:", num_devices,
                      " config.save_freq:", config.save_freq)
                if (counter / num_devices) % config.save_freq == 0:
                    self.save(config.save_path, counter)
                    self.writer.add_summary(_g_sum, counter)
                    self.writer.add_summary(_d_sum, counter)
                    fdict = {
                        self.gtruth_noisy[0]: sample_noisy,
                        self.zs[0]: sample_z
                    }
                    canvas_w = self.sess.run(self.Gs[0], feed_dict=fdict)
                    swaves = sample_wav
                    sample_dif = sample_wav - sample_noisy
                    for m in range(min(20, canvas_w.shape[0])):
                        print('w{} max: {} min: {}'.format(
                            m, np.max(canvas_w[m]), np.min(canvas_w[m])))
                        wavfile.write(
                            os.path.join(save_path,
                                         'sample_{}-{}.wav'.format(counter,
                                                                   m)), 16e3,
                            de_emph(canvas_w[m], self.preemph))
                        m_gtruth_path = os.path.join(save_path,
                                                     'gtruth_{}.wav'.format(m))
                        if not os.path.exists(m_gtruth_path):
                            print('save_path:', save_path)
                            wavfile.write(
                                os.path.join(save_path,
                                             'gtruth_{}.wav'.format(m)), 16e3,
                                de_emph(swaves[m], self.preemph))

                            wavfile.write(
                                os.path.join(save_path,
                                             'noisy_{}.wav'.format(m)), 16e3,
                                de_emph(sample_noisy[m], self.preemph))

                            wavfile.write(
                                os.path.join(save_path,
                                             'dif_{}.wav'.format(m)), 16e3,
                                de_emph(sample_dif[m], self.preemph))

                        np.savetxt(os.path.join(save_path, 'd_rl_losses.txt'),
                                   d_rl_losses)
                        np.savetxt(os.path.join(save_path, 'd_fk_losses.txt'),
                                   d_fk_losses)
                        np.savetxt(os.path.join(save_path, 'g_adv_losses.txt'),
                                   g_adv_losses)
                        np.savetxt(os.path.join(save_path, 'g_l1_losses.txt'),
                                   g_l1_losses)

                if batch_idx >= num_batches:
                    curr_epoch += 1
                    # re-set batch idx
                    batch_idx = 0
                    # check if we have to deactivate L1
                    if curr_epoch >= config.l1_remove_epoch and self.deactivated_l1 == False:
                        print('** Deactivating L1 factor! **')
                        self.sess.run(tf.assign(self.l1_lambda, 0.))
                        self.deactivated_l1 = True
                    # check if we have to start decaying noise (if any)
                    if curr_epoch >= config.denoise_epoch and self.deactivated_noise == False:
                        # apply noise std decay rate
                        decay = config.noise_decay
                        if not hasattr(self, 'curr_noise_std'):
                            self.curr_noise_std = self.init_noise_std
                        new_noise_std = decay * self.curr_noise_std
                        if new_noise_std < config.denoise_lbound:
                            print('New noise std {} < lbound {}, setting 0.'.
                                  format(new_noise_std, config.denoise_lbound))
                            print('** De-activating noise layer **')
                            # it it's lower than a lower bound, cancel out completely
                            new_noise_std = 0.
                            self.deactivated_noise = True
                        else:
                            print(
                                'Applying decay {} to noise std {}: {}'.format(
                                    decay, self.curr_noise_std, new_noise_std))
                        self.sess.run(
                            tf.assign(self.disc_noise_std, new_noise_std))
                        self.curr_noise_std = new_noise_std
                if curr_epoch >= config.epoch:
                    # done training
                    print('Done training; epoch limit {} '
                          'reached.'.format(self.epoch))
                    print('Saving last model at iteration {}'.format(counter))
                    self.save(config.save_path, counter)
                    self.writer.add_summary(_g_sum, counter)
                    self.writer.add_summary(_d_sum, counter)
                    break
        except tf.errors.OutOfRangeError:
            print('Done training; epoch limit {} reached.'.format(self.epoch))
        finally:
            coord.request_stop()
        coord.join(threads)
Exemplo n.º 10
0
def fastAudio(ffmpeg: str, theFile: str, outFile: str, chunks: list,
              speeds: list, audioBit, samplerate, needConvert: bool, temp: str,
              log, fps: float):

    if (len(chunks) == 1 and chunks[0][2] == 0):
        log.error('Trying to create empty audio.')

    if (not os.path.isfile(theFile)):
        log.error('fastAudio.py could not find file: ' + theFile)

    if (needConvert):
        cmd = [ffmpeg, '-y', '-i', theFile]
        if (audioBit is not None):
            cmd.extend(['-b:a', str(audioBit)])
        cmd.extend(
            ['-ac', '2', '-ar',
             str(samplerate), '-vn', f'{temp}/faAudio.wav'])
        if (log.is_ffmpeg):
            cmd.extend(['-hide_banner'])
        else:
            cmd.extend(['-nostats', '-loglevel', '8'])
        subprocess.call(cmd)

        theFile = f'{temp}/faAudio.wav'

    samplerate, audioData = read(theFile)

    newL = getNewLength(chunks, speeds, fps)
    # Get the new length in samples with some extra leeway.
    estLeng = int(newL * samplerate * 1.5) + int(samplerate * 2)

    # Create an empty array for the new audio.
    newAudio = np.zeros((estLeng, 2), dtype=np.int16)

    channels = 2
    yPointer = 0
    totalChunks = len(chunks)
    beginTime = time.time()

    for chunkNum, chunk in enumerate(chunks):
        audioSampleStart = int(chunk[0] / fps * samplerate)
        audioSampleEnd = int(audioSampleStart + (samplerate / fps) *
                             (chunk[1] - chunk[0]))

        theSpeed = speeds[chunk[2]]
        if (theSpeed != 99999):
            spedChunk = audioData[audioSampleStart:audioSampleEnd]

            if (theSpeed == 1):
                yPointerEnd = yPointer + spedChunk.shape[0]
                newAudio[yPointer:yPointerEnd] = spedChunk
            else:
                spedupAudio = np.zeros((0, 2), dtype=np.int16)
                with ArrReader(spedChunk, channels, samplerate, 2) as reader:
                    with ArrWriter(spedupAudio, channels, samplerate,
                                   2) as writer:
                        phasevocoder(reader.channels,
                                     speed=theSpeed).run(reader, writer)
                        spedupAudio = writer.output

                yPointerEnd = yPointer + spedupAudio.shape[0]
                newAudio[yPointer:yPointerEnd] = spedupAudio

            myL = chunk[1] - chunk[0]
            mySamples = (myL / fps) * samplerate
            newSamples = int(mySamples / theSpeed)

            yPointer = yPointer + newSamples
        else:
            # Speed is too high so skip this section.
            yPointerEnd = yPointer

        progressBar(chunkNum,
                    totalChunks,
                    beginTime,
                    title='Creating new audio')

    log.debug('\n   - Total Samples: ' + str(yPointer))
    log.debug('   - Samples per Frame: ' + str(samplerate / fps))
    log.debug('   - Expected video length: ' + str(yPointer /
                                                   (samplerate / fps)))
    newAudio = newAudio[:yPointer]
    write(outFile, samplerate, newAudio)

    if (needConvert):
        conwrite('')
Exemplo n.º 11
0
def fastAudio(theFile, outFile, chunks: list, speeds: list, log, fps: float,
              machineReadable, hideBar):
    from wavfile import read, write
    import os

    import numpy as np

    log.checkType(chunks, 'chunks', list)
    log.checkType(speeds, 'speeds', list)

    def speedsOtherThan1And99999(a: list) -> bool:
        return len([x for x in a if x != 1 and x != 99999]) > 0

    if (speedsOtherThan1And99999(speeds)):
        from audiotsm2 import phasevocoder
        from audiotsm2.io.array import ArrReader, ArrWriter

    if (len(chunks) == 1 and chunks[0][2] == 0):
        log.error('Trying to create an empty file.')

    if (not os.path.isfile(theFile)):
        log.error('fastAudio.py could not find file: ' + theFile)

    samplerate, audioData = read(theFile)

    newL = getNewLength(chunks, speeds, fps)
    # Get the new length in samples with some extra leeway.
    estLeng = int(newL * samplerate * 1.5) + int(samplerate * 2)

    # Create an empty array for the new audio.
    newAudio = np.zeros((estLeng, 2), dtype=np.int16)

    channels = 2
    yPointer = 0

    audioProgress = ProgressBar(len(chunks), 'Creating new audio',
                                machineReadable, hideBar)

    for chunkNum, chunk in enumerate(chunks):
        audioSampleStart = int(chunk[0] / fps * samplerate)
        audioSampleEnd = int(audioSampleStart + (samplerate / fps) *
                             (chunk[1] - chunk[0]))

        theSpeed = speeds[chunk[2]]
        if (theSpeed != 99999):
            spedChunk = audioData[audioSampleStart:audioSampleEnd]

            if (theSpeed == 1):
                yPointerEnd = yPointer + spedChunk.shape[0]
                newAudio[yPointer:yPointerEnd] = spedChunk
            else:
                spedupAudio = np.zeros((0, 2), dtype=np.int16)
                with ArrReader(spedChunk, channels, samplerate, 2) as reader:
                    with ArrWriter(spedupAudio, channels, samplerate,
                                   2) as writer:
                        phasevocoder(reader.channels,
                                     speed=theSpeed).run(reader, writer)
                        spedupAudio = writer.output

                yPointerEnd = yPointer + spedupAudio.shape[0]
                newAudio[yPointer:yPointerEnd] = spedupAudio

            myL = chunk[1] - chunk[0]
            mySamples = (myL / fps) * samplerate
            newSamples = int(mySamples / theSpeed)

            yPointer = yPointer + newSamples
        else:
            # Speed is too high so skip this section.
            yPointerEnd = yPointer

        audioProgress.tick(chunkNum)

    log.debug('\n   - Total Samples: ' + str(yPointer))
    log.debug('   - Samples per Frame: ' + str(samplerate / fps))
    log.debug('   - Expected video length: ' + str(yPointer /
                                                   (samplerate / fps)))
    newAudio = newAudio[:yPointer]
    write(outFile, samplerate, newAudio)
Exemplo n.º 12
0
byte_length = raw_pcm.size
nframes_per_channel = byte_length // block_align
byte_per_frame = bitrate // 8
length_sec = nframes_per_channel // fs

assert chunk_length > overlap >= 0, "Overlap must be non negative and smaller than length"

nsegs, segs = split_segments(length_sec, chunk_length, overlap, incltail=True)

file, ext = os.path.splitext(input_file)
num_digits = get_number_of_digits(nsegs)
chunk_name_format = '{}__{:0' + str(num_digits) + 'd}{}'

for i, (start, end) in zip(list(range(nsegs)), segs):
    chunk_name = chunk_name_format.format(file, i + 1, ext)
    sample_start = start * fs * num_channels * byte_per_frame
    sample_end = end * fs * num_channels * byte_per_frame
    chunk_size = sample_end - sample_start
    chunk_size_per_channel = chunk_size // num_channels // byte_per_frame

    print(('Chunk #{}/{} named {} is from {} sec to {} sec'.format(
        i + 1, nsegs, chunk_name, start, end)))
    chunk_data = raw_pcm[sample_start:sample_end]
    uint8_data = chunk_data.reshape((chunk_size_per_channel, num_channels,
                                     byte_per_frame)).astype(np.uint8)

    if bitrate == 24:
        write_24b(chunk_name, fs, uint8_data)
    else:
        write(chunk_name, fs, uint8_data, bitrate=bitrate)
Exemplo n.º 13
0
    def add_noise_and_filter(self, x, noise, play_sounds, files_prefix):
        t = np.arange(len(x)) / self.sample_rate

        # Plot original signal.
        figure()
        plot(t, x)
        title('Original signal')
        grid(True)

        #------------------------------------------------
        # Add noise to original signal
        #------------------------------------------------
        with_noise = x + noise

        # Plot the signal with noise.
        figure()
        plot(t, with_noise)
        title('Signal with noise')
        grid(True)

        # Save audio with noise.
        output_with_noise = ''.join(
            [OUTPUT_FOLDER, files_prefix, '_with_noise.wav'])
        wavfile.write(output_with_noise,
                      self.sample_rate,
                      with_noise,
                      normalized=True)

        # Override x signal.
        x = with_noise

        #------------------------------------------------
        # Create a FIR filter and apply it to x.
        #------------------------------------------------

        # The Nyquist rate of the signal.
        nyq_rate = self.sample_rate / 2.0

        # The desired width of the transition from pass to stop,
        # relative to the Nyquist rate.
        width = (self.cutoff_hz_2 - self.cutoff_hz_1) / nyq_rate

        # Compute the order and Kaiser parameter for the FIR filter.
        N, beta = kaiserord(self.ripple_db, width)
        N |= 1

        # Use firwin with a Kaiser window to create a FIR filter.
        taps = firwin(
            N, [self.cutoff_hz_1 / nyq_rate, self.cutoff_hz_2 / nyq_rate],
            window=('kaiser', beta),
            pass_zero=True)

        # Use lfilter to filter x with the FIR filter.
        filtered_x = lfilter(taps, 1.0, x)

        #------------------------------------------------
        # Plot the magnitude response of the filter.
        #------------------------------------------------

        figure()
        w, h = freqz(taps, worN=8000)
        plot((w / np.pi) * nyq_rate, np.absolute(h))
        xlabel('Frequency (Hz)')
        ylabel('Gain')
        title('Frequency response')
        ylim(-0.05, 1.05)
        grid(True)

        #------------------------------------------------
        # Plot the filtered signal.
        #------------------------------------------------

        # The phase delay of the filtered signal.
        delay = 0.5 * (N - 1) / self.sample_rate

        # Plot the filtered signal, shifted to compensate for the phase delay.
        figure()
        # Plot just the "good" part of the filtered signal.  The first N-1
        # samples are "corrupted" by the initial conditions.
        plot(t[N - 1:] - delay, filtered_x[N - 1:], 'g')

        title('Filtered signal')
        xlabel('t')
        grid(True)

        # Save filtered audio
        output_filtered = "".join(
            [OUTPUT_FOLDER, files_prefix, '_filtered.wav'])
        wavfile.write(output_filtered,
                      self.sample_rate,
                      filtered_x,
                      normalized=True)

        if play_sounds:
            playsound(output_with_noise)
            playsound(output_filtered)

        # Show plotted figures
        show()
Exemplo n.º 14
0
        return np.pad(A[:length - before], pad_width=width, mode='constant')

def filter20_20k(x, sr): # filters everything outside out 20 - 20000 Hz
    nyq = 0.5 * sr
    sos = signal.butter(5, [20.0 / nyq, 20000.0 / nyq], btype='band', output='sos')
    return signal.sosfilt(sos, x)

sr, a, br = wavfile.read(SWEEPFILE, normalized=True)
sr, b, br = wavfile.read(RECFILE, normalized=True)

a = padarray(a, sr*50, before=sr*10)
b = padarray(b, sr*50, before=sr*10)
h = np.zeros_like(b)

for chan in [0, 1]:
    b1 = b[:,chan]

    b1 = filter20_20k(b1, sr)
    ffta = np.fft.rfft(a)
    fftb = np.fft.rfft(b1)
    ffth = fftb / ffta
    h1 = np.fft.irfft(ffth)
    h1 = filter20_20k(h1, sr)

    h[:,chan] = h1

h = h[:10 * sr,:]
h *= ratio(dB=40)

wavfile.write('IR.wav', sr, h, normalized=True, bitrate=24)
Exemplo n.º 15
0
def fastAudio(ffmpeg,
              theFile,
              outFile,
              chunks,
              speeds,
              audioBit,
              samplerate,
              debug,
              needConvert,
              log,
              fps=30):

    if (not os.path.isfile(theFile)):
        log.error('Could not find file ' + theFile)

    if (needConvert):
        # Only print this here so other scripts can use this function.
        print('Running from fastAudio.py')

        import tempfile
        from shutil import rmtree

        TEMP = tempfile.mkdtemp()

        cmd = [
            ffmpeg, '-i', theFile, '-b:a', audioBit, '-ac', '2', '-ar',
            str(samplerate), '-vn', f'{TEMP}/fastAud.wav'
        ]
        if (not debug):
            cmd.extend(['-nostats', '-loglevel', '0'])
        subprocess.call(cmd)

        theFile = f'{TEMP}/fastAud.wav'

    samplerate, audioData = read(theFile)

    newL = getNewLength(chunks, speeds, fps)
    # Get the new length in samples with some extra leeway.
    estLeng = int(newL * samplerate * 1.5) + int(samplerate * 2)

    # Create an empty array for the new audio.
    newAudio = np.zeros((estLeng, 2), dtype=np.int16)

    channels = 2
    yPointer = 0
    totalChunks = len(chunks)
    beginTime = time.time()

    for chunkNum, chunk in enumerate(chunks):
        audioSampleStart = int(chunk[0] / fps * samplerate)
        audioSampleEnd = int(audioSampleStart + (samplerate / fps) *
                             (chunk[1] - chunk[0]))

        theSpeed = speeds[chunk[2]]
        if (theSpeed != 99999):
            spedChunk = audioData[audioSampleStart:audioSampleEnd]

            if (theSpeed == 1):
                yPointerEnd = yPointer + spedChunk.shape[0]
                newAudio[yPointer:yPointerEnd] = spedChunk
            else:
                spedupAudio = np.zeros((0, 2), dtype=np.int16)
                with ArrReader(spedChunk, channels, samplerate, 2) as reader:
                    with ArrWriter(spedupAudio, channels, samplerate,
                                   2) as writer:
                        phasevocoder(reader.channels,
                                     speed=theSpeed).run(reader, writer)
                        spedupAudio = writer.output

                yPointerEnd = yPointer + spedupAudio.shape[0]
                newAudio[yPointer:yPointerEnd] = spedupAudio

            myL = chunk[1] - chunk[0]
            mySamples = (myL / fps) * samplerate
            newSamples = int(mySamples / theSpeed)

            yPointer = yPointer + newSamples
        else:
            # Speed is too high so skip this section.
            yPointerEnd = yPointer

        progressBar(chunkNum,
                    totalChunks,
                    beginTime,
                    title='Creating new audio')

    log.debug('yPointer: ' + str(yPointer))
    log.debug('samples per frame: ' + str(samplerate / fps))
    log.debug('Expected video length: ' + str(yPointer / (samplerate / fps)))
    newAudio = newAudio[:yPointer]
    write(outFile, samplerate, newAudio)

    if ('TEMP' in locals()):
        rmtree(TEMP)

    if (needConvert):
        conwrite('')
Exemplo n.º 16
0
def read_vas3(input_filename,
              output_folder,
              force_hex=False,
              mix_audio=False,
              force_game=None):
    data = open(input_filename, "rb").read()

    if data[0:4].decode('ascii') != "VA3W":
        print("Not a valid VA3 file")
        exit(1)

    # v3 header is 1 0 0 2
    version_flag1, version_flag2, version_flag3, version_flag4, entry_count, gdx_size, gdx_start, entry_start, data_start = struct.unpack(
        "<BBBBIIIII", data[0x04:0x1c])

    if entry_count <= 0:
        print("No files to extract")
        exit(1)

    gdx_magic = data[gdx_start:gdx_start + 4].decode('ascii')
    if gdx_magic != "GDXH" and gdx_magic != "GDXG":
        print("Not a valid GDXH header")
        exit(1)

    default_hihat, default_snare, default_bass, default_hightom, default_lowtom, default_rightcymbal = struct.unpack(
        "<HHHHHH", data[gdx_start + 0x04:gdx_start + 0x10])
    if gdx_magic == "GDXH":
        default_leftcymbal = 0xfff0
        default_floortom = 0xfff1
        default_leftpedal = 0xfff2
        gdx_type_unk1 = data[gdx_start + 0x10]  # Not used anywhere?
        gdx_volume_flag = data[gdx_start +
                               0x11]  # How does this work with GDXG?

        filename_prefix = "g"

    elif gdx_magic == "GDXG":
        default_leftcymbal, default_floortom, default_leftpedal = struct.unpack(
            "<HHH", data[gdx_start + 0x10:gdx_start + 0x16])
        gdx_type_unk1 = 0
        gdx_volume_flag = 1

        filename_prefix = "d"

    if force_game:
        filename_prefix = force_game[0]

    metadata = {
        'type': gdx_magic,
        'version': version_flag4,
        'defaults': {
            'default_hihat': default_hihat,
            'default_snare': default_snare,
            'default_bass': default_bass,
            'default_hightom': default_hightom,
            'default_lowtom': default_lowtom,
            'default_rightcymbal': default_rightcymbal,
            'default_leftcymbal': default_leftcymbal,
            'default_floortom': default_floortom,
            'default_leftpedal': default_leftpedal,
        },
        'gdx_type_unk1': gdx_type_unk1,
        'gdx_volume_flag': gdx_volume_flag,
        'entries': [],
    }

    entries = []
    prev_filesize = 0
    for i in range(entry_count):
        # sound_flag seems to be related to defaults. If something is set to default, it is 0x02. Else it's 0x04 (for GDXG). Always 0 for GDXH?
        # entry_unk4 seems to always be 255??
        offset, filesize, channels, bits, rate, entry_unk1, entry_unk2, volume, pan, sound_id, sound_flag, entry_unk4 = struct.unpack(
            "<IIHHIIIBBHHH",
            data[entry_start + (i * 0x40):entry_start + (i * 0x40) + 0x20])
        filename = data[entry_start + (i * 0x40) + 0x20:entry_start +
                        (i * 0x40) + 0x40].decode("ascii").strip('\0')

        if filename_prefix == "g":
            if i + 1 == entry_count:
                filesize = len(data) - (data_start + offset)

            else:
                filesize = int.from_bytes(
                    data[entry_start + ((i + 1) * 0x40):entry_start +
                         ((i + 1) * 0x40) + 4], 'little')
                t = prev_filesize
                prev_filesize = filesize
                filesize -= t

        elif entry_unk1 != 0:
            filesize = entry_unk1

        volume = min(volume, 127)

        if sound_id >= 0xfff0:
            print("Verify when sound_id >= 0xfff0")
            exit(1)

        if sound_id == 0xfff0:
            sound_id = default_leftcymbal
        elif sound_id == 0xfff1:
            sound_id = default_floortom
        elif sound_id == 0xfff2:
            sound_id = default_leftpedal

        entries.append({
            'sound_id': sound_id,
            'filename': filename,
            'offset': offset,
            'filesize': filesize,
            'channels': channels,
            'bits': bits,
            'rate': rate,
            'volume': volume,
            'pan': pan,
        })

        metadata['entries'].append({
            'sound_id': sound_id,
            'filename': filename,
            'volume': volume,
            'pan': pan,
            'extra': entry_unk4,  # Unknown flag, most likely always 255
            'flags': [],
        })

        if version_flag4 < 2:
            if (sound_flag & 0x02) != 0:
                metadata['entries'][-1]['flags'].append(0x02)

        if (sound_flag & 0x0100) != 0:
            metadata['entries'][-1]['flags'].append("NoFilename")

    if output_folder:
        basepath = output_folder
    else:
        basepath = os.path.splitext(os.path.basename(input_filename))[0]

    os.makedirs(basepath, exist_ok=True)

    for entry in entries:
        # print("Extracting", entry['filename'])

        print(entry)

        wave_data = bytearray(data[data_start + entry['offset']:data_start +
                                   entry['offset'] + entry['filesize']])

        output = adpcmwave.decode_data(wave_data, entry['rate'],
                                       entry['channels'], entry['bits'])

        output_filename = os.path.join(
            basepath, "{}_{}.wav".format(filename_prefix, entry['filename']))

        if (sound_flag & 0x100) != 0 or force_hex:
            output_filename = os.path.join(
                basepath, "%s_%04x.wav" % (filename_prefix, entry['sound_id']))

        output = numpy.ndarray(
            (int(len(output) // 2 // entry['channels']), entry['channels']),
            numpy.int16, output, 0)

        wavfile.write(output_filename, entry['rate'], output)

        # If mixing is enabled, mix using AudioSegment
        if mix_audio:
            # audio_segment = pydub.AudioSegment(
            #     output_stream.getbuffer(),
            #     frame_rate=entry['rate'],
            #     sample_width=entry['bits'] // 8,
            #     channels=entry['channels']
            # )

            audio_segment = pydub.AudioSegment.from_file(output_filename)
            pan = (entry['pan'] - (128 / 2)) / (128 / 2)
            audio_segment = audio_segment.pan(pan)
            db = 20 * math.log10(entry['volume'] / 127)
            audio_segment += db
            audio_segment.export(output_filename, format="wav")

            entry['volume'] = 127
            entry['pan'] = 64

        entry['duration'] = (entry['filesize'] * (entry['bits'] // 8) *
                             entry['channels']) / entry['rate']

        for idx in range(len(metadata['entries'])):
            if metadata['entries'][idx]['sound_id'] == entry['sound_id']:
                metadata['entries'][idx]['volume'] = entry['volume']
                metadata['entries'][idx]['pan'] = entry['pan']
                metadata['entries'][idx]['duration'] = entry['duration']
                metadata['entries'][idx]['rate'] = entry['rate']
                metadata['entries'][idx]['channels'] = entry['channels']
                metadata['entries'][idx]['bits'] = entry['bits']
                metadata['entries'][idx]['raw_filesize'] = entry['filesize']
                break

    open(os.path.join(basepath, "%s_metadata.json" % filename_prefix),
         "w").write(json.dumps(metadata, indent=4))
Exemplo n.º 17
0
import os
from wavfile import read, write

for root, dirs, files in os.walk('../data/raw'):
    for wav in files:
        rate, data, _, loops = read(f'{root}/{wav}', readloops=True)

        if loops is not None:
            print(wav, loops)
            start, end = loops[0]
            cut_data = data[start:end]

            assert (len(cut_data) == end - start)

            write(f'../data/cut/{wav}', rate, cut_data)
Exemplo n.º 18
0
def fastAudio(ffmpeg,
              theFile,
              outFile,
              silentT,
              frameMargin,
              SAMPLE_RATE,
              audioBit,
              verbose,
              silentSpeed,
              soundedSpeed,
              needConvert,
              chunks=[],
              fps=30):

    if (not os.path.isfile(theFile)):
        print('Could not find file:', theFile)
        sys.exit(1)

    if (outFile == ''):
        fileName = theFile[:theFile.rfind('.')]
        outFile = f'{fileName}_ALTERED.wav'

    if (needConvert):
        # Only print this here so other scripts can use this function.
        print('Running from fastAudio.py')

        import tempfile
        from shutil import rmtree

        TEMP = tempfile.mkdtemp()

        cmd = [
            ffmpeg, '-i', theFile, '-b:a', audioBit, '-ac', '2', '-ar',
            str(SAMPLE_RATE), '-vn', f'{TEMP}/fastAud.wav'
        ]
        if (not verbose):
            cmd.extend(['-nostats', '-loglevel', '0'])
        subprocess.call(cmd)

        theFile = f'{TEMP}/fastAud.wav'

    speeds = [silentSpeed, soundedSpeed]

    sampleRate, audioData = read(theFile)
    if (chunks == []):
        print('Creating chunks')
        chunks = getAudioChunks(audioData, sampleRate, fps, silentT, 2,
                                frameMargin)

    newL = getNewLength(chunks, speeds, fps)
    # Get the new length in samples with some extra leeway.
    estLeng = int((newL * sampleRate) * 1.5) + int(sampleRate * 2)

    # Create an empty array for the new audio.
    newAudio = np.zeros((estLeng, 2), dtype=np.int16)

    channels = 2
    yPointer = 0

    totalChunks = len(chunks)
    beginTime = time.time()

    for chunkNum, chunk in enumerate(chunks):
        audioSampleStart = int(chunk[0] / fps * sampleRate)
        audioSampleEnd = int(audioSampleStart + (sampleRate / fps) *
                             (chunk[1] - chunk[0]))

        theSpeed = speeds[chunk[2]]

        if (theSpeed != 99999):
            spedChunk = audioData[audioSampleStart:audioSampleEnd]

            if (theSpeed == 1):
                yPointerEnd = yPointer + spedChunk.shape[0]
                newAudio[yPointer:yPointerEnd] = spedChunk
            else:
                spedupAudio = np.zeros((0, 2), dtype=np.int16)
                with ArrReader(spedChunk, channels, sampleRate, 2) as reader:
                    with ArrWriter(spedupAudio, channels, sampleRate,
                                   2) as writer:
                        phasevocoder(reader.channels,
                                     speed=theSpeed).run(reader, writer)
                        spedupAudio = writer.output

                yPointerEnd = yPointer + spedupAudio.shape[0]
                newAudio[yPointer:yPointerEnd] = spedupAudio

            myL = chunk[1] - chunk[0]
            mySamples = (myL / fps) * sampleRate
            newSamples = int(mySamples / theSpeed)

            yPointer = yPointer + newSamples
        else:
            # Speed is too high so skip this section.
            yPointerEnd = yPointer

        progressBar(chunkNum,
                    totalChunks,
                    beginTime,
                    title='Creating new audio')

    if (verbose):
        print('yPointer', yPointer)
        print('samples per frame', sampleRate / fps)
        print('Expected video length', yPointer / (sampleRate / fps))
    newAudio = newAudio[:yPointer]
    write(outFile, sampleRate, newAudio)

    if ('TEMP' in locals()):
        rmtree(TEMP)

    if (needConvert):
        return outFile
Exemplo n.º 19
0
def read_vas3(input_filename, output_folder, force_hex=False, mix_audio=False):
    data = open(input_filename, "rb").read()

    if data[0:4].decode('ascii') != "VA3W":
        print("Not a valid VA3 file")
        exit(1)

    # v3 header is 1 0 0 2
    version_flag1, version_flag2, version_flag3, version_flag4, entry_count, gdx_size, gdx_start, entry_start, data_start = struct.unpack(
        "<BBBBIIIII", data[0x04:0x1c])

    if entry_count <= 0:
        print("No files to extract")
        exit(1)

    gdx_magic = data[gdx_start:gdx_start + 4].decode('ascii')
    if gdx_magic != "GDXH" and gdx_magic != "GDXG":
        print("Not a valid GDXH header")
        exit(1)

    default_hihat, default_snare, default_bass, default_hightom, default_lowtom, default_rightcymbal = struct.unpack(
        "<HHHHHH", data[gdx_start + 0x04:gdx_start + 0x10])
    if gdx_magic == "GDXH":
        # Not used anywhere, can be ignored??
        # gdx_type_unk1 default is 0
        # gdx_type_unk2 default is 1
        default_leftcymbal = 0xfff0
        default_floortom = 0xfff1
        default_leftpedal = 0xfff2
        gdx_type_unk1 = data[gdx_start + 0x10]  # Not used anywhere?
        gdx_volume_flag = data[gdx_start +
                               0x11]  # How does this work with GDXG?
    elif gdx_magic == "GDXG":
        default_leftcymbal, default_floortom, default_leftpedal = struct.unpack(
            "<HHH", data[gdx_start + 0x10:gdx_start + 0x16])
        gdx_type_unk1 = 0
        gdx_volume_flag = 1

    metadata = {
        'type': gdx_magic,
        'version': version_flag4,
        'defaults': {
            'default_hihat': default_hihat,
            'default_snare': default_snare,
            'default_bass': default_bass,
            'default_hightom': default_hightom,
            'default_lowtom': default_lowtom,
            'default_rightcymbal': default_rightcymbal,
            'default_leftcymbal': default_leftcymbal,
            'default_floortom': default_floortom,
            'default_leftpedal': default_leftpedal,
        },
        'gdx_type_unk1': gdx_type_unk1,
        'gdx_volume_flag': gdx_volume_flag,
        'entries': [],
    }

    entries = []
    for i in range(entry_count):
        # sound_flag seems to be related to defaults. If something is set to default, it is 0x02. Else it's 0x04 (for GDXG). Always 0 for GDXH?
        # entry_unk4 seems to always be 255??
        offset, filesize, channels, bits, rate, entry_unk1, entry_unk2, volume, pan, sound_id, sound_flag, entry_unk4 = struct.unpack(
            "<IIHHIIIBBHHH",
            data[entry_start + (i * 0x40):entry_start + (i * 0x40) + 0x20])
        filename = data[entry_start + (i * 0x40) + 0x20:entry_start +
                        (i * 0x40) + 0x40].decode("ascii").strip('\0')

        if entry_unk1 != 0:
            filesize = entry_unk1

        # if entry_unk1 != 0 or entry_unk2 != 0:
        #     print("Unknown fields in entry: %08x %08x" % (entry_unk1, entry_unk2))
        #     exit(1)

        # Code for an older version of VA3 files?
        # I think there's some padding that it's trying to deal with here, but I'm not sure exactly.
        # Need a sample to verify this functionality.
        # entry_unk1 and entry_unk2 should always be 0 for v3
        # if entry_unk2 != 0 and (entry_unk2 == filesize or entry_unk2 == filesize + 0x20 or entry_unk2 == filesize * 4):
        #     entry_unk2 = 0

        # if version_flag4 == 0:
        #     if entry_unk2 > 0 and entry_unk2 >= 0x20:
        #         entry_unk2 -= 0x20
        #     if entry_unk1 > 0 and entry_unk1 >= 0x20:
        #         entry_unk1 -= 0x20

        # if entry_unk2 > filesize:
        #     entry_unk2 = filesize

        # if entry_unk1 != 0:
        #     valid_file = entry_unk1 == entry_unk2
        # else:
        #     valid_file = entry_unk2 == 0

        if gdx_volume_flag == 0:
            # ??
            # This code shouldn't be hit unless you're working
            # with some really old files I suspect
            volume = 3 * volume / 2
            print("Verify volume when gdx_volume_flag == 0")
            exit(1)
        else:
            volume = min(volume, 127)

        if version_flag1 == 1 and version_flag2 == 0 and version_flag3 == 0 and (
                version_flag4 == 0 or version_flag4 == 1):
            # v1 and v2 use a table for volume?
            # Need to find a sample to verify
            #volume2 = VOLUME_TABLE[min(volume, 0x7f)]
            #print(volume, volume2)
            #print("Verify when volume table is used (percentages or not?)")
            #exit(1)
            pass

        if sound_id >= 0xfff0:
            print("Verify when sound_id >= 0xfff0")
            exit(1)

        if sound_id == 0xfff0:
            sound_id = default_leftcymbal
        elif sound_id == 0xfff1:
            sound_id = default_floortom
        elif sound_id == 0xfff2:
            sound_id = default_leftpedal

        entries.append({
            'sound_id': sound_id,
            'filename': filename,
            'offset': offset,
            'filesize': filesize,
            'channels': channels,
            'bits': bits,
            'rate': rate,
            'volume': volume,
            'pan': pan,
        })

        metadata['entries'].append({
            'sound_id': sound_id,
            'filename': filename,
            'volume': volume,
            'pan': pan,
            'extra': entry_unk4,  # Unknown flag, most likely always 255
            'flags': [],
        })

        if version_flag4 < 2:
            if (sound_flag & 0x02) != 0:
                metadata['entries'][-1]['flags'].append(0x02)

        # if (sound_flag & 0x04) != 0:
        #     metadata['entries'][-1]['flags'].append("DefaultSound") # Generate this by checking defaults in header
        #"DefaultSound" if (sound_flag & 0x04) != 0,

        if (sound_flag & 0x0100) != 0:
            metadata['entries'][-1]['flags'].append("NoFilename")

    if output_folder:
        basepath = output_folder
    else:
        basepath = os.path.splitext(os.path.basename(input_filename))[0]

    if not os.path.exists(basepath):
        os.makedirs(basepath)

    for entry in entries:
        #print("Extracting", entry['filename'])
        #print(entry)

        wave_data = bytearray(data[data_start + entry['offset']:data_start +
                                   entry['offset'] + entry['filesize']])
        output = adpcmwave.decode_data(wave_data, entry['rate'],
                                       entry['channels'], entry['bits'])

        output_filename = os.path.join(basepath,
                                       "{}.wav".format(entry['filename']))

        if (sound_flag & 0x100) != 0 or force_hex:
            output_filename = os.path.join(basepath,
                                           "%04x.wav" % entry['sound_id'])

        output = numpy.ndarray(
            (int(len(output) // 2 // entry['channels']), entry['channels']),
            numpy.int16, output, 0)

        wavfile.write(output_filename, entry['rate'], output)

        # If mixing is enabled, mix using AudioSegment
        if mix_audio:
            # audio_segment = pydub.AudioSegment(
            #     output_stream.getbuffer(),
            #     frame_rate=entry['rate'],
            #     sample_width=entry['bits'] // 8,
            #     channels=entry['channels']
            # )

            audio_segment = pydub.AudioSegment.from_file(output_filename)
            pan = (entry['pan'] - (128 / 2)) / (128 / 2)
            audio_segment = audio_segment.pan(pan)
            db = 20 * math.log10(entry['volume'] / 127)
            audio_segment += db
            audio_segment.export(output_filename, format="wav")

            entry['volume'] = 127
            entry['pan'] = 64

        entry['duration'] = len(
            pydub.AudioSegment.from_file(output_filename)) / 1000

        for idx in range(len(metadata['entries'])):
            if metadata['entries'][idx]['sound_id'] == entry['sound_id']:
                metadata['entries'][idx]['volume'] = entry['volume']
                metadata['entries'][idx]['pan'] = entry['pan']
                metadata['entries'][idx]['duration'] = entry['duration']
                break

    open(os.path.join(basepath, "metadata.json"),
         "w").write(json.dumps(metadata, indent=4))
Exemplo n.º 20
0
array = np.zeros((nsamples,), dtype=np.float32)

frequency = 440
length = 5

beep_t = np.linspace(0, 1, beep_length)  # Produces a 5 second Audio-File
beep_value_array = np.sin(frequency * 2 * np.pi * beep_t)  # Has frequency of 440Hz

previous_beep_position = 0
array[:half_beep_length] = beep_value_array[half_beep_length:]
while True:
    current_beep_position = previous_beep_position + gap_length

    if current_beep_position + half_beep_length > nsamples:
        if current_beep_position - half_beep_length < nsamples:
            last_beep_length = nsamples - (current_beep_position - half_beep_length)
            array[current_beep_position - half_beep_length:] = beep_value_array[:last_beep_length]
        break

    array[current_beep_position - half_beep_length:current_beep_position + half_beep_length] = beep_value_array
    previous_beep_position = current_beep_position

array[array > 1.0] = 1.0
array[array < -1.0] = -1.0
data = np.asarray(array * (2 ** 31 - 1), dtype=np.int32).astype(dtype)

uint8_data = np.frombuffer(data.tobytes(), dtype=np.uint8).reshape((nsamples, 1, bytes))

write(filename, fs, uint8_data, bitrate=bits)