Exemplos de WaveObject em Python, exemplos de simpleaudio.WaveObject em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: irrbeat.py Projeto: JochemVanIterson/CSD

    def __init__(self, filepath):
        wave_read = wave.open(filepath, 'rb')
        wave_params = wave_read.getparams()
        wave_framesNR = wave_params[3]  # number of frames
        wave_buffer = wave_read.readframes(wave_framesNR)

        wave_buffer_softer = numpy.fromstring(
            wave_buffer, numpy.int16) // 2  # half amplitude

        self.wave_obj_acc = sa.WaveObject(wave_buffer, wave_params[0],
                                          wave_params[1], wave_params[2])
        self.wave_obj = sa.WaveObject(wave_buffer_softer, wave_params[0],
                                      wave_params[1], wave_params[2])

Exemplo n.º 2

0

Exibir arquivo

Arquivo: audio_info.py Projeto: dmyersturnbull/sauronlib

    def build(name: str,
              song: pydub.AudioSegment,
              applied_length: Optional[int] = None,
              volume: int = 255,
              volume_floor: int = -50,
              bytes_per_sample: int = 2,
              sample_rate: int = 44100):

        if applied_length is not None and applied_length < 0:
            raise BadAudioLengthException(
                "The length is {} but cannot be negative".format(
                    applied_length))
        if volume < 0 or volume > 255:
            raise BadVolumeException(
                "The volume is {} but must be 0–255".format(volume))

        if applied_length is None:
            resized = song
        else:
            n_repeats = math.ceil(applied_length / len(song))
            resized = (song * n_repeats)[0:applied_length]

        if volume == 0 or applied_length == 0:
            final = pydub.AudioSegment.silent(duration=0.5)
        else:
            final = resized + (volume * (volume_floor / 255) - volume_floor)
            if applied_length is not None:
                assert len(resized) << approxeq >> applied_length or applied_length == 1,\
                  "The actual audio stimulus length is {}, but the length in stimulus_frames is {}".format(len(resized), applied_length)

        play_obj = sa.WaveObject(final.raw_data, 1, bytes_per_sample,
                                 sample_rate)
        return AudioInfo(name, play_obj, applied_length, volume)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: tplink_smartbulb_class.py Projeto: dpark6060/tplink-smartplug

    def fun_party(self):
        os.environ['WRAP_STDERR'] = 'true'
        path_to_file = 'data/PartyStart.wav'
        wave_read = wave.open(path_to_file, 'rb')
        audio_data = wave_read.readframes(wave_read.getnframes())
        num_channels = wave_read.getnchannels()
        bytes_per_sample = wave_read.getsampwidth()
        sample_rate = wave_read.getframerate()

        wave_obj = sa.WaveObject(audio_data, num_channels, bytes_per_sample,
                                 sample_rate)
        play_obj = wave_obj.play()

        self.send_command('color_mode')
        for m, t in zip(messages, message_times):
            progress_bar(m, t)

        self.send_command('hue', 240)

        party_list = 'data/dancetypes.txt'
        partyfile = open(party_list, 'r')
        lines = partyfile.readlines()
        lines = [l.rstrip() for l in lines]
        ntypes = len(lines)
        total_time = 11.2
        delay = total_time / ntypes
        for i in progressbar.progressbar(range(ntypes)):
            logging.error(f'{lines[i]}')
            time.sleep(delay)

        self.party()

Exemplo n.º 4

0

Exibir arquivo

def play_signal(audio_signal):
    wave_obj = sa.WaveObject(audio_signal, 1, 4,
                             16000)  # 4 bytes per one float32 sample
    play_obj = wave_obj.play()
    play_obj.wait_done()

    return

Exemplo n.º 5

0

Exibir arquivo

 def play_sound(self, wave):
     try:
         wave_obj = sa.WaveObject(wave, 1, 1, self.sample_rate)
         while self.play_obj and self.play_obj.is_playing():
             pass
         self.play_obj = wave_obj.play()
     except:
         pass

Exemplo n.º 6

0

Exibir arquivo

Arquivo: control.py Projeto: WiktorJ/magichat

def sorting_mode():
    team_name = random.choice(TEAM_NAMES)
    synthesis_input = texttospeech.types.SynthesisInput(text=team_name)
    response = clientT2S.synthesize_speech(synthesis_input, voice,
                                           audio_config)
    wave_obj = sa.WaveObject(response.audio_content, 1, 2, 22050)
    play_obj = wave_obj.play()
    play_obj.wait_done()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: sounds.py Projeto: moormanlab/maze-control

 def addWhiteNoise(self, key, duration=5.0, volume=1.0, sample_rate=44100):
     logger.debug(
         'Sound %s : White noise random generated, duration = %s s, sample_rate = %s, volume = %s',
         key, duration, sample_rate, volume)
     noise = np.random.normal(0, 1, round(duration * sample_rate))
     noise *= 32767 * volume / np.max(np.abs(noise))
     noise = noise.astype(np.int16)
     self.sound[key] = sa.WaveObject(noise, 1, 2, sample_rate)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_SampleService.py Projeto: AframGakk/SamplerGAN_BackEnd

    def test_getFileByLocation(self):
        location = '2/heavy/KICK.wav'
        service = SampleService()

        data = service.getFileByLocation(location)

        wave_obj = sa.WaveObject(data, 1, 2, 16000)

        name = ''

Exemplo n.º 9

0

Exibir arquivo

 def step(self):
     print('Called Wave')
     wave = []
     address = IO_Registers.WAVE_PATTERN_START
     while address <= IO_Registers.WAVE_PATTERN_END:
         wave.append(self.mmu.read_byte(address))
         address += 1
     wave_obj = sa.WaveObject(bytes(wave), 2, 2, 44100)
     wave_obj.play()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: audio_backend.py Projeto: Bluemi/musicus

 def play(self):
     self.playing = True
     wave_object = simpleaudio.WaveObject(self.audio_segment.raw_data,
                                          self.audio_segment.channels,
                                          self.audio_segment.sample_width,
                                          self.audio_segment.frame_rate)
     self.last_start_time = time.time()
     self.play_object = wave_object.play()
     self.played_duration = 0

Exemplo n.º 11

0

Exibir arquivo

Arquivo: audio_backend.py Projeto: Bluemi/musicus

 def resume(self):
     if not self.playing:
         self.playing = True
         wave_object = simpleaudio.WaveObject(
             self.audio_segment[int(self.played_duration * 1000):].raw_data,
             self.audio_segment.channels, self.audio_segment.sample_width,
             self.audio_segment.frame_rate)
         self.play_object = wave_object.play()
         self.last_start_time = time.time()

Exemplo n.º 12

0

Exibir arquivo

Arquivo: audio.py Projeto: TFarla/subs2srs-cross-platform

    def __init__(self, audio_bytes):
        wave_read = wave.open(BytesIO(audio_bytes))
        audio_data = wave_read.readframes(wave_read.getnframes())
        num_channels = wave_read.getnchannels()
        bytes_per_sample = wave_read.getsampwidth()
        sample_rate = wave_read.getframerate()

        self._wave_obj = sa.WaveObject(audio_data, num_channels,
                                       bytes_per_sample, sample_rate)

Exemplo n.º 13

0

Exibir arquivo

    def play(self):
        """Plays the SpectroGraphic sound.
        """

        # get sound array
        audio = self.sound_array

        # play it using simpleaudio
        wave_object = sa.WaveObject(audio, 1, 2, self.SAMPLE_RATE)
        play_object = wave_object.play()
        play_object.wait_done()

Exemplo n.º 14

0

Exibir arquivo

        def p(f):
            # print(f)
            f.load()
            b = f.getBuffer().copy()
            b *= 32767
            b = b.astype(np.int16)

            # print(f.channels)
            # print(b.shape)
            wave_obj = sa.WaveObject(b, f.channels, 2, f.samplerate)
            play_obj = wave_obj.play()
            play_obj.wait_done()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: sounds.py Projeto: moormanlab/maze-control

 def addBufferSound(self, key, buff, sample_rate=44100):
     ''' 
    buff should be a numpy array of np.int16 
    with one or two dimensions (mono or stereo)
 '''
     logger.debug('Custom sound channels %s', len(buff))
     # convert to 16-bit data
     buff = buff.astype(np.int16)
     if len(buff) == 2:
         chan = 2
     else:
         chan = 1
     self.sound[key] = sa.WaveObject(buff, chan, 2, sample_rate)
     logger.debug(str(buff))

Exemplo n.º 16

0

Exibir arquivo

 def play(self):
     t1 = time.time()
     self.buffer = self._calculate_frames()
     WO = simpleaudio.WaveObject(self.buffer, sample_rate=bw.FRAMERATE)
     t2 = time.time()
     logging.debug('_calculate_frames took ', t2 - t1, ' seconds')
     while not self.stop_playing:
         play_buffer = WO.play()
         if self.write_to_disk:
             self.write_to_disk = False
             with wave.open(self.filename, 'wb') as wav:
                 wav.setparams(
                     (bw.CHANNELS, bw.SAMPLEWIDTH, bw.FRAMERATE,
                      self.chunk_length, 'NONE', 'not compressed'))
                 wav.writeframes(self.buffer)
         play_buffer.wait_done()

Exemplo n.º 17

0

Exibir arquivo

def play(key, mode):
    x = {
        "ionian": get_ionian_l(key),
        "dorian": get_dorian_pa(key),
        "mixolydian": get_mixolydian(key),
        "locrian": get_locrian(key)
    }
    c = x[mode]
    a = np.array(
        (c[random.randint(0,
                          len(c) - 1)], c[random.randint(0,
                                                         len(c) - 1)],
         c[random.randint(0,
                          len(c) - 1)], c[random.randint(0,
                                                         len(c) - 1)]))
    b = np.array(
        (c[random.randint(0,
                          len(c) - 1)], c[random.randint(0,
                                                         len(c) - 1)],
         c[random.randint(0,
                          len(c) - 1)], c[random.randint(0,
                                                         len(c) - 1)]))
    d = np.array(
        (c[random.randint(0,
                          len(c) - 1)], c[random.randint(0,
                                                         len(c) - 1)],
         c[random.randint(0,
                          len(c) - 1)], c[random.randint(0,
                                                         len(c) - 1)]))

    audio = np.vstack((a, b, a, d))
    # normalize to 16-bit range
    audio *= 32767 / np.max(np.abs(audio))
    # convert to 16-bit data
    audio = audio.astype(np.int16)
    wave_obj = sa.WaveObject(audio, 1, 2, sample_rate)
    # start playback
    play_obj = wave_obj.play()
    # wait for playback to finish before exiting
    play_obj.wait_done()

Exemplo n.º 18

0

Exibir arquivo

Arquivo: timer.py Projeto: BjoernLudwigPTB/intervaltimer

    def _prepare_audio(path_to_file: str) -> WaveObject:
        """Prepare an audio file from a specific path to be played by simpleaudio

        Parameters
        ----------
        path_to_file : str
            path to the file relative to the working directory or absolute
        Returns
        -------
        WaveObject
            the audio file to be played by simpleaudio's WaveObject's `play()`
        """
        wave_read = wave.open(
            str(PurePath(path_to_file)),
            "rb",
        )
        audio_data = wave_read.readframes(wave_read.getnframes())
        num_channels = wave_read.getnchannels()
        bytes_per_sample = wave_read.getsampwidth()
        sample_rate = wave_read.getframerate()
        return sa.WaveObject(audio_data, num_channels, bytes_per_sample,
                             sample_rate)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: sound_driver.py Projeto: jaideraf/vsgb

    def play(self, left, right, ticks):

        self.ticks += ticks
        if self.ticks <= self.div:
            return

        self.ticks = 0

        self.buffer[self.i] = left
        self.buffer[self.i + 1] = right
        self.i += 2

        if self.i >= SoundDriver.BUFFER_SIZE / 2:
            wave = bytes(self.buffer)
            wave_obj = sa.WaveObject(wave, 2, 1, self.sample_rate)
            try:
                self.play_obj.stop()
            except:
                pass
            self.play_obj = wave_obj.play()
            #self.play_obj.wait_done()
            self.i = 0

Exemplo n.º 20

0

Exibir arquivo

    def handle_audio(obj, wait=False):
        """Handle an audio event.

        This function plays an audio file.
        Currently only `.wav` format is supported.

        :param obj: An :py:class:`~turberfield.dialogue.model.Model.Audio`
            object.
        :param bool wait: Force a blocking wait until playback is complete.
        :return: The supplied object.

        """
        if not simpleaudio:
            return obj

        fp = pkg_resources.resource_filename(obj.package, obj.resource)
        data = wave.open(fp, "rb")
        nChannels = data.getnchannels()
        bytesPerSample = data.getsampwidth()
        sampleRate = data.getframerate()
        nFrames = data.getnframes()
        framesPerMilliSecond = nChannels * sampleRate // 1000

        offset = framesPerMilliSecond * obj.offset
        duration = nFrames - offset
        duration = min(
            duration, framesPerMilliSecond *
            obj.duration if obj.duration is not None else duration)

        data.readframes(offset)
        frames = data.readframes(duration)
        for i in range(obj.loop):
            waveObj = simpleaudio.WaveObject(frames, nChannels, bytesPerSample,
                                             sampleRate)
            playObj = waveObj.play()
            if obj.loop > 1 or wait:
                playObj.wait_done()
        return obj

Exemplo n.º 21

0

Exibir arquivo

Arquivo: sound_test.py Projeto: Pyro12341/sound_test

def measure(settings, SW):
    print('\nGenerating the sound sample', end='...')
    wf, raw = generate_waveform(BASE_WAVEFORM, NOISE_PROFILE, **settings)
    wave_obj = sa.WaveObject(wf,
                             num_channels=settings['channels'],
                             bytes_per_sample=settings['bit_depth'],
                             sample_rate=settings.get('sample_rate', 44100))
    print('Done!')

    print('\nPress \'%s\' when you hear the white noise.' % DETECT_KEY)
    input('Press enter to start the experiment!')

    # s = timer()
    SW.start()
    play_obj = wave_obj.play()
    remove = add_hotkey(DETECT_KEY, play_obj.stop)

    play_obj.wait_done()
    # e = timer()
    remove_hotkey(remove)
    SW.join(forceStop=True)

    # t = (e-s)/1e9
    # if t > settings.get('duration', 10):
    #     print('You didn\'t detect the white noise!')
    #     t=None
    # else:
    #     print('You detected the white noise at %.3f s.' % t)

    t = None
    if SW.getValue() is None:
        print('You didn\'t detect any white noise!')
    else:
        t = SW.getValue() / 1e9
        print('You detected noise at %.3f s.' % t)

    return wf, raw, t

Exemplo n.º 22

0

Exibir arquivo

    def _build_audio(audio_file_id: int, applied_length: Optional[int] = None, volume: int = 255):

        if applied_length is not None and applied_length < 0:
            raise BadWriteError(f"File {audio_file_id}: length {applied_length} < 0")
        if volume < 0 or volume > 255:
            raise BadWriteError(f"The volume is {volume} but must be 0–255")

        import valarpy.model as model

        valar_obj = model.AudioFiles.select().where(model.AudioFiles.id == audio_file_id).first()
        if valar_obj is None:
            raise UnrecognizedKeyError(f"No audio file with ID {audio_file_id}".)
        song = pydub.AudioSegment(data=valar_obj.data, sample_width=2, frame_rate=44100, channels=1)
        n_sec_valar = valar_obj.n_seconds * 1000
        length_delta = abs(len(song) - n_sec_valar)
        if length_delta > 0.00001:
            raise AssertionError(f"File {audio_file_id} is {len(song)}, but Valar says it’s {n_sec_valar}")

        if applied_length is None:
            resized = song
        else:
            n_repeats = math.ceil(applied_length / len(song))
            resized = (song * n_repeats)[0:applied_length]

        if volume == 0 or applied_length == 0:
            final = pydub.AudioSegment.silent(duration=0.5)
        else:
            # noinspection PyTypeChecker
            volume_floor = config.get_float("sauron.hardware.stimuli.audio.audio_floor")
            volume_ceil = config.get_float("sauron.hardware.stimuli.audio.audio_ceil")
            # final = resized + (volume * (volume_floor / 255) - volume_floor)
            # print(volume * (volume_ceil - volume_floor) / 255 + volume_floor)
            final = resized + volume * (volume_ceil - volume_floor) / 255 + volume_floor

        play_obj = sa.WaveObject(final.raw_data, 1, 2, 44100)

        return AudioInfo(play_obj, applied_length, volume)

Exemplo n.º 23

0

Exibir arquivo

Arquivo: sounds.py Projeto: moormanlab/maze-control

    def addTone(self,
                key,
                duration=1.0,
                freq=1000.0,
                volume=1.0,
                sample_rate=44100):
        logger.debug(
            'Sound %s : Tone freq = %s Hz, duration = %s s, sample_rate = %s, volume = %s',
            key, freq, duration, sample_rate, volume)

        # get timesteps for each sample, T is note duration in seconds
        T = duration
        t = np.linspace(0, T, int(T * sample_rate), False)

        # generate sine wave notes
        buff = np.sin(freq * t * 2 * np.pi)
        logger.debug(str(buff))

        # normalize to 16-bit range
        buff *= 32767 * volume / np.max(np.abs(buff))
        # convert to 16-bit data
        buff = buff.astype(np.int16)
        self.sound[key] = sa.WaveObject(buff, 1, 2, sample_rate)
        logger.debug(str(buff))

Exemplo n.º 24

0

Exibir arquivo

Arquivo: main.py Projeto: jjarrett9/musi2525Project

 def play(self):
     global output
     self.wave_obj = sa.WaveObject(output.astype("int16"), 2, 2, 44100)
     self.play_obj = self.wave_obj.play()
     return

Exemplo n.º 25

0

Exibir arquivo

Arquivo: AwsIotNotifierThing.py Projeto: lilley2412/lookout

 def getPhraseWaveObj(self, key, phrase):
     if key not in self.pollyPhrases:
         self.pollyPhrases[key] = sa.WaveObject(self.getPollyPhrase(phrase),
                                                1, 2, 16000)
     return self.pollyPhrases[key]

Exemplo n.º 26

0

Exibir arquivo

volume = reader.read_dynamic(composition['dynamic'])
musics = []
for part in composition['parts']:
    instrument_name = composition['parts'][part]
    instructions = reader.parse_music(composition['score'][part])
    musics.append([])
    count = 0
    for cmd in instructions:
        #  TODO: handle more than notes
        musics[-1].extend(instruments[instrument_name].play_note(
            cmd['frequency'], (60 / BPM) * FPS * cmd['beats']))
        count += 1
        instruments[instrument_name].set_legato(count > 12)

score_len = min([len(x) for x in musics])
for i in range(score_len):
    sound1.append(int(sum([x[i] for x in musics]) / len(musics)))

print(time.time() - start)

a = sa.WaveObject(sound1, 1, 2, FPS)
b = a.play()

with wave.open(sys.argv[3], mode='wb') as f:
    f.setframerate(FPS)
    f.setnchannels(1)
    f.setsampwidth(2)
    f.writeframes(sound1)

b.wait_done()

Exemplo n.º 27

0

Exibir arquivo

Arquivo: samplemaker.py Projeto: wizzwizz4/samplemaker

def play_from_buffer(buffer, bytes_per_sample=2, sample_rate=44100):
    simpleaudio.WaveObject(buffer,
                           num_channels=1,
                           bytes_per_sample=bytes_per_sample,
                           sample_rate=sample_rate).play()

Exemplo n.º 28

0

Exibir arquivo

Arquivo: putzini_track.py Projeto: robertbuecker/nessundorma

    def __init__(self,
                 wave_file=None,
                 label_file=None,
                 start_time=0.,
                 volume=100.,
                 mqtt_client=None):

        self.name = ','.join(
            [fn for fn in [wave_file, label_file] if fn is not None])

        self.logger = logging.getLogger(f'putzini_track:{self.name}')

        if (wave_file is None) and (label_file is None):
            raise ValueError(
                'You must either specify a wave file or a label file!')

        self.wave_file = wave_file
        self.label_file = label_file
        self.mqtt_client = mqtt_client

        self.wave = None
        self.timing = None
        self.start_time = start_time
        self._loop = False
        self.playback = sa.PlayObject(0)

        if (label_file is not None) and (self.mqtt_client is not None):
            self.timing = TimedMessageDispatcher(self.mqtt_client)
            with open(self.label_file, newline='') as fh:
                reader = csv.DictReader(fh,
                                        delimiter='\t',
                                        fieldnames=['start', 'end', 'text'])
                lbls = []
                for row in reader:
                    stp = {}
                    stp['time'] = float(row['start'])
                    txt = row['text'].split(',')
                    stp['comment'] = txt[0]
                    stp['speed'] = int(txt[1]) if txt[1] else None
                    stp['trigger'] = txt[2].strip() == 'T'
                    lbls.append(stp)

            lbls = [lbl for lbl in lbls if lbl['time'] >= self.start_time]
            # self.logger.info('Have label list with %s entries', len(lbls))
            self.timing.set_label_list(lbls)

        if wave_file is not None:
            t0 = time()
            if (volume == 100.) and (self.start_time == 0.):
                self.wave = sa.WaveObject.from_wave_file(wave_file)
            else:
                self.logger.info(
                    'Loading wave file %s explicitly into array...', wave_file)
                sr, waveform = wavfile.read(wave_file)
                self.logger.info(
                    'Wave file has %.1f seconds (%.2f minutes) at %s Hz sample rate',
                    waveform.shape[0] / sr, waveform.shape[0] / sr / 60, sr)
                waveform = (volume / 100 *
                            waveform[int(sr * start_time):, :]).astype(
                                np.int16)
                self.wave = sa.WaveObject(waveform, sample_rate=sr)
            self.logger.info('Loading wave file %s took %.1f seconds.',
                             wave_file,
                             time() - t0)

Exemplo n.º 29

0

Exibir arquivo

def PlayVideo(summary_frame_path, summary_audio_path):

    # video = sys.argv[1]
    videobuffer = []
    files = [
        int(os.path.splitext(f)[0]) for f in os.listdir(summary_frame_path)
        if isfile(join(summary_frame_path, f))
    ]
    # sort the files
    # see python reference https://docs.python.org/3/howto/sorting.html
    files.sort()
    for i in range(len(files)):
        filename = summary_frame_path + str(files[i]) + ".jpg"
        img = cv2.imread(filename)
        videobuffer.append(img)

    audiocap = AudioSegment.from_file(summary_audio_path, "wav")
    # audiocap = AudioSegment.from_wav(debug_audio)

    cv2.namedWindow('image')
    cv2.moveWindow('image', 320, 180)
    cv2.namedWindow('controls')
    cv2.moveWindow('controls', 250, 50)

    controls = np.zeros((50, 750), np.uint8)
    cv2.putText(
        controls,
        "F: Resume/Play, P: Pause, R: Rewind, N: Fast Forward, Esc: Exit",
        (120, 30), cv2.FONT_HERSHEY_PLAIN, 1, 200)

    framerate = audiocap.frame_rate

    wave_obj = sa.WaveObject(audiocap.raw_data,
                             num_channels=audiocap.channels,
                             bytes_per_sample=audiocap.sample_width,
                             sample_rate=audiocap.frame_rate)

    play_obj = None

    FPS = 1.0 / 30.0 * 1000.0

    tots = len(videobuffer)

    i = 0

    frame_rate = 30

    def process(im):
        return cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)

    status = 'stay'

    last_audio_sync = 0

    while True:
        new_time = time.time()
        cv2.imshow("controls", controls)
        try:
            if i == tots - 1:
                i = 0
            # cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            im = videobuffer[i]

            # r = 750.0 / im.shape[1]
            # dim = (750, int(im.shape[0] * r))
            # im = cv2.resize(im, dim, interpolation = cv2.INTER_AREA)
            # if im.shape[0]>600:
            #     im = cv2.resize(im, (500,500))
            #     controls = cv2.resize(controls, (im.shape[1],25))
            #cv2.putText(im, status, )
            cv2.imshow('image', im)
            status = {
                ord('p'): 'stay',
                ord('P'): 'stay',
                ord('f'): 'play',
                ord('F'): 'play',
                ord('r'): 'prev_frame',
                ord('R'): 'prev_frame',
                ord('n'): 'next_frame',
                ord('N'): 'next_frame',
                -1: status,
                27: 'exit'
            }[cv2.waitKey(10)]

            if status == 'play':
                frame_rate = cv2.getTrackbarPos('F', 'image')

                if play_obj is None:
                    play_obj = wave_obj.play()

                if not play_obj.is_playing() or last_audio_sync > 30:
                    if play_obj is not None:
                        play_obj.stop()
                    # must have changed position
                    audio_frame_index = (i * 1000.0) // 30
                    newaudiocap = audiocap[audio_frame_index:]
                    wave_obj = sa.WaveObject(
                        newaudiocap.raw_data,
                        num_channels=audiocap.channels,
                        bytes_per_sample=audiocap.sample_width,
                        sample_rate=audiocap.frame_rate)
                    play_obj = wave_obj.play()
                    last_audio_sync = 0
                # audio_frame_index = i / 30.0 * 1000.0
                #print(str(i) + ", " + str(audio_frame_index))
                # asa = audiocap[audio_frame_index:audio_frame_index+msbetweenframes]
                # play_buffer(asa.raw_data, 2, 2, 48000)
                last_audio_sync += 1
                i += 1
                while time.time() - new_time < 1.0 / 30.0:
                    pass
                cv2.setTrackbarPos('S', 'image', i)
                continue
            if status == 'stay':
                # i = cv2.getTrackbarPos('S','image')
                if play_obj is not None:
                    play_obj.stop()
            if status == 'exit':
                break
            if status == 'prev_frame':
                i -= 1
                status = 'stay'
            if status == 'next_frame':
                i += 1
                status = 'stay'

            while time.time() - new_time < 1.0 / 30.0:
                pass

        except KeyError:
            print("Invalid Key was pressed")
    cv2.destroyWindow('image')

Exemplo n.º 30

0

Exibir arquivo

Arquivo: train.py Projeto: jakobwoegerbauer/kagglebirds2020

def main():
    # parse command line
    parser = opts_parser()
    options = parser.parse_args()
    modelfile = options.modelfile
    cfg = config.from_parsed_arguments(options)
    if not options.cuda_device:
        device = torch.device('cpu')
    else:
        device = torch.device('cuda:%d' % options.cuda_device[0])
        torch.cuda.set_device(options.cuda_device[0])
        if options.cuda_sync_mode != 'auto':
            set_cuda_sync_mode(options.cuda_sync_mode)

    # prepare training data generator
    print("Preparing training data feed...")
    train_data = get_dataset(cfg, 'train')
    print_data_info(train_data)
    train_loader = get_dataloader(cfg, train_data, 'train')

    # start training data generation in background
    train_batches = iterate_infinitely(train_loader)
    train_batches = iterate_data(train_batches, device, cfg)

    # if told so, benchmark the creation of a given number of minibatches
    if cfg.get('benchmark_datafeed'):
        print("Benchmark: %d minibatches of %d items..." %
              (cfg['benchmark_datafeed'], cfg['batchsize']))
        import itertools
        t0 = time.time()
        next(
            itertools.islice(train_batches, cfg['benchmark_datafeed'],
                             cfg['benchmark_datafeed']), None)
        t1 = time.time()
        print("%.3gs per minibatch." % ((t1 - t0) / cfg['benchmark_datafeed']))
        return

    # if told so, play back a given key of the training data as audio
    if cfg.get('play_datafeed'):
        import simpleaudio as sa
        for batch in train_batches:
            for wav in batch[cfg['play_datafeed']]:
                if wav.dtype.is_floating_point:
                    wav = (wav * np.iinfo(np.int16).max).short()
                sa.WaveObject(
                    wav.cpu().numpy().T.data,
                    num_channels=wav.shape[0],
                    bytes_per_sample=2,
                    sample_rate=cfg['data.sample_rate']).play().wait_done()

    # prepare validation data generator
    print("Preparing validation data feed...")
    val_data = get_dataset(cfg, 'valid')
    print_data_info(val_data)
    val_loader = get_dataloader(cfg, val_data, 'valid')

    # enable cuDNN auto-tuning if on GPU and all data sizes are constant
    if options.cuda_device and not any(s is None
                                       for data in (train_data, val_data)
                                       for shape in data.shapes.values()
                                       for s in shape):
        torch.backends.cudnn.benchmark = True

    # prepare model
    print("Preparing network...")
    # instantiate neural network
    model = get_model(cfg, train_data.shapes, train_data.dtypes,
                      train_data.num_classes, options.cuda_device)
    print(model)
    print_model_info(model)

    if cfg['train.teacher_model']:
        print("Preparing teacher network...")
        teacher_modelfile = cfg['train.teacher_model']
        teacher_device = torch.device(cfg['train.teacher_model.device']
                                      or device)
        teacher_cfg = dict(cfg)
        teacher_cfg.update(
            config.parse_config_file(
                teacher_modelfile.rsplit('.', 1)[0] + '.vars'))
        teacher_model = get_model(teacher_cfg, train_data.shapes,
                                  train_data.dtypes, train_data.num_classes,
                                  teacher_device.index)
        teacher_model.load_state_dict(
            torch.load(teacher_modelfile, map_location=teacher_device))
        teacher_model.train(False)

    # obtain cost functions
    train_metrics = get_metrics(cfg, 'train')
    val_metrics = get_metrics(cfg, 'valid')
    extract_loss = get_loss_from_metrics(cfg)

    # initialize optimizer
    params = model.parameters()
    if cfg['train.first_params']:
        first_params_count = cfg['train.first_params']
        # if a string, treat as a submodule name, figure out its param count
        if isinstance(first_params_count, str):
            first_params_count = sum(
                len(list(reduce(getattr, name.split('.'), model).parameters()))
                for name in first_params_count.split('+'))
        # advance the `params` iterator, keep the first parameters separately
        params = iter(params)
        first_params = [next(params) for _ in range(first_params_count)]
    optimizer = get_optimizer(cfg, params)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        factor=cfg['train.eta_decay'],
        patience=cfg['train.patience'],
        cooldown=cfg['train.cooldown'],
        verbose=True)

    # initialize mixed-precision training
    if cfg['float16']:
        from apex import amp
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=cfg['float16.opt_level'])
        if cfg['train.teacher_model']:
            teacher_model = amp.initialize(teacher_model,
                                           opt_level=cfg['float16.opt_level'])

    # initialize tensorboard logger, if requested
    if options.logdir:
        from tensorboardize import TensorboardLogger
        logger = TensorboardLogger(options.logdir,
                                   cfg=cfg,
                                   dataloader=val_loader,
                                   model=model,
                                   optimizer=optimizer)
    else:
        logger = None

    # resume training state if possible
    if options.resume and os.path.exists(options.modelfile + '.resume'):
        state = torch.load(options.modelfile + '.resume', map_location=device)
        model.load_state_dict(state.pop('model'))
        optimizer.load_state_dict(state.pop('optimizer'))
        scheduler.load_state_dict(state.pop('scheduler'))
        history = state.pop('history')
        epoch = state.pop('epoch')
        if cfg['float16']:
            amp.load_state_dict(state.pop('amp'))
        if (cfg['train.first_params']
                and epoch > cfg['train.first_params.delay']):
            add_optimizer_params(optimizer, scheduler, first_params,
                                 cfg['train.first_params.eta_scale'])
    else:
        history = {}
        epoch = 0
        # load pretrained weights if requested
        if cfg['model.init_from']:
            model.load_state_dict(torch.load(
                os.path.join(os.path.dirname(__file__),
                             cfg['model.init_from'])),
                                  map_location=device)
        else:
            # run custom initializations
            init_model(model, cfg)
        # log initial state
        if logger is not None:
            logger.log_start()

    # warn about unused configuration keys
    config.warn_unused_variables(
        cfg, ('train.epochs', 'train.epochsize', 'train.min_eta',
              'train.patience_reference', 'loss'))

    # run training loop
    print("Training:")
    for epoch in range(epoch, cfg['train.epochs']):
        # add first_params to optimizer when the delay has passed
        if (cfg['train.first_params']
                and cfg['train.first_params.delay'] == epoch):
            add_optimizer_params(optimizer, scheduler, first_params,
                                 cfg['train.first_params.eta_scale'])
            if cfg['debug']:
                print(
                    'Training first %d parameters with learning rate '
                    'scaled by %f.' %
                    (first_params_count, cfg['train.first_params.eta_scale']))
        # training pass
        model.train(True)
        if cfg['debug']:
            torch.autograd.set_detect_anomaly(True)
        train_errors = AverageMetrics()
        nans_in_a_row = 0
        for _ in tqdm.trange(cfg['train.epochsize'],
                             desc='Epoch %d/%d' %
                             (epoch + 1, cfg['train.epochs']),
                             ascii=bool(cfg['tqdm.ascii'])):
            # grab the next minibatch
            batch = next(train_batches)
            # reset gradients
            optimizer.zero_grad()
            # compute output
            preds = model(batch)
            # compute born-again output, if needed
            if cfg['train.teacher_model']:
                teacher_batch = copy_to_device(batch, teacher_device)
                with torch.no_grad():
                    teacher_preds = teacher_model(teacher_batch)
                teacher_preds = copy_to_device(teacher_preds, device)
                batch.update(
                    ('teacher.' + k, v) for k, v in teacher_preds.items())
            # compute training metrics and loss
            metrics = OrderedDict(
                (k, fn(preds, batch)) for k, fn in train_metrics.items())
            loss = extract_loss(metrics)
            # bail out if Not a Number
            if not np.isfinite(loss.item()):
                if cfg['debug']:
                    raise RuntimeError('Training error is NaN!')
                nans_in_a_row += 1
                if nans_in_a_row < 5:
                    print('Training error is NaN! Skipping step.')
                    continue
                else:
                    print('Training error is NaN! Stopping training.')
                    return 1
            else:
                nans_in_a_row = 0
            train_errors += metrics
            train_errors += {'loss': loss.item()}
            # backprop and update
            if cfg['float16']:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()
            optimizer.step()
        print_metrics('Train', train_errors.aggregate())
        del batch, preds, loss

        # validation pass
        model.train(False)
        val_errors = AverageMetrics()
        for batch in iterate_data(iter(val_loader), device, cfg):
            with torch.no_grad():
                preds = model(batch)
                metrics = {
                    k: fn(preds, batch)
                    for k, fn in val_metrics.items()
                }
            val_loss = float(extract_loss(metrics).item())
            val_errors += metrics
            val_errors += {'loss': val_loss}
        print_metrics('Valid', val_errors.aggregate())
        del batch, preds, val_loss

        log_metrics(train_errors.aggregate(), val_errors.aggregate(), history,
                    modelfile)
        if logger is not None:
            logger.log_epoch(epoch, {k: v[-1] for k, v in history.items()})

        # learning rate update
        reference = history[cfg['train.patience_reference'].lstrip('-')][-1]
        if hasattr(reference, 'mean'):
            reference = reference.mean()
        if cfg['train.patience_reference'].startswith('-'):
            reference *= -1
        scheduler.step(reference)
        if optimizer.param_groups[0]['lr'] < cfg['train.min_eta']:
            print('Learning rate fell below threshold. Stopping training.')
            break

        # save training state to resume file
        resume_state = dict(model=model.state_dict(),
                            optimizer=optimizer.state_dict(),
                            scheduler=scheduler.state_dict(),
                            epoch=epoch + 1,
                            history=history)
        if cfg['float16']:
            resume_state['amp'] = amp.state_dict()
        torch.save(resume_state, options.modelfile + '.resume')
        del resume_state

    # save final network and the configuration used
    print("Saving final model")
    save_model(modelfile, model, cfg)

    # delete resume file if any
    if os.path.exists(options.modelfile + '.resume'):
        os.remove(options.modelfile + '.resume')

    # log the final state
    if logger is not None:
        logger.log_end(history)