def resample(samples, input_rate, output_rate):
     
     if output_rate != 24000:
         raise ValueError(
             'Sorry, but resampling_utils.resample only supports '
             'resampling to 24000 Hz.')
     
     resampling_utils.resample_to_24000_hz(samples, input_rate)
def get_clip_data(clip):

    waveform = clip[:]

    attrs = clip.attrs

    extraction_start_index = attrs['extraction_start_index']
    clip_start_index = attrs['clip_start_index'] - extraction_start_index
    clip_end_index = clip_start_index + attrs['clip_length']
    call_start_index = attrs['call_start_index'] - extraction_start_index
    call_end_index = attrs['call_end_index'] - extraction_start_index

    # Resample if needed.
    sample_rate = attrs['sample_rate']
    if sample_rate != OUTPUT_SAMPLE_RATE:
        waveform = resampling_utils.resample_to_24000_hz(waveform, sample_rate)
        clip_start_index = adjust_index(clip_start_index, sample_rate,
                                        OUTPUT_SAMPLE_RATE)
        clip_end_index = adjust_index(clip_end_index, sample_rate,
                                      OUTPUT_SAMPLE_RATE)
        call_start_index = adjust_index(call_start_index, sample_rate,
                                        OUTPUT_SAMPLE_RATE)
        call_end_index = adjust_index(call_end_index, sample_rate,
                                      OUTPUT_SAMPLE_RATE)

    return (waveform, clip_start_index, clip_end_index, call_start_index,
            call_end_index)
def test_resampling_utils(samples, input_rate, output_rate, pdf_file):
    
    if output_rate != 24000:
        raise ValueError(
            'Sorry, but resampling_utils.resample only supports '
            'resampling to 24000 Hz.')
        
    # Resample chirp.
    samples = resampling_utils.resample_to_24000_hz(samples, input_rate)
    
    # Plot spectrogram of result.
    plot_spectrogram(samples, output_rate, 'resampling_utils', pdf_file)
Exemplo n.º 4
0
def test_resampling_utils(samples, input_rate, output_rate, pdf_file):

    if output_rate != 24000:
        raise ValueError('Sorry, but resampling_utils.resample only supports '
                         'resampling to 24000 Hz.')

    # Resample chirp.
    samples = resampling_utils.resample_to_24000_hz(samples, input_rate)

    show_stats('resampling_utils', samples)

    # Plot spectrogram of result.
    plot_spectrogram(samples, output_rate, 'resampling_utils', pdf_file)
def create_tf_example(clip_ds):

    waveform = clip_ds[:]
    attrs = clip_ds.attrs

    sample_rate = attrs['sample_rate']

    # Trim waveform.
    start_index = int(round(EXAMPLE_START_OFFSET * sample_rate))
    length = int(round(EXAMPLE_DURATION * sample_rate))
    waveform = waveform[start_index:start_index + length]

    # Get call start index in waveform.
    waveform_start_index = attrs['extraction_start_index'] + start_index
    call_start_index = attrs['call_start_index'] - waveform_start_index

    # Resample waveform if needed.
    if sample_rate != EXAMPLE_SAMPLE_RATE:
        waveform = resampling_utils.resample_to_24000_hz(waveform, sample_rate)
        rate_factor = EXAMPLE_SAMPLE_RATE / sample_rate
        call_start_index = int(round(call_start_index * rate_factor))

    waveform_feature = create_bytes_feature(waveform.tobytes())

    classification = attrs['classification']
    classification = CLASSIFICATION_CHANGES.get(classification, classification)
    label = CLASSIFICATION_LABELS[classification]
    label_feature = create_int64_feature(label)

    clip_id = attrs['clip_id']
    clip_id_feature = create_int64_feature(clip_id)

    call_start_index_feature = create_int64_feature(call_start_index)

    if call_start_index < 0:
        print(f'Warning: Call start index {call_start_index} is less than '
              f'zero for clip {clip_id}.')

    call_start_time = int(round(1000 * call_start_index / EXAMPLE_SAMPLE_RATE))
    if call_start_time != 500:
        station = attrs['station']
        start_time = attrs['clip_start_time']
        print(call_start_time, clip_id, station, start_time, classification)


#         for key in attrs.keys():
#             print(f'    {key} {attrs[key]}')

    if len(waveform) != int(round(EXAMPLE_DURATION * EXAMPLE_SAMPLE_RATE)):
        print(f'Unexpected waveform length {len(waveform)}.')

    start_time_counts[call_start_time] += 1

    features = tf.train.Features(
        feature={
            'waveform': waveform_feature,
            'label': label_feature,
            'clip_id': clip_id_feature,
            'call_start_index': call_start_index_feature
        })

    return tf.train.Example(features=features)
Exemplo n.º 6
0
    def _process_input_chunk(self, samples):

        input_length = len(samples)

        if self._classifier_sample_rate != self._input_sample_rate:
            # need to resample input

            # When the input sample rate is 22050 Hz or 44100 Hz,
            # we resample as though it were 22000 Hz or 44000 Hz,
            # respectively, resulting in an actual resampled rate of
            # about 24055 Hz rather than 24000 Hz. This allows us to
            # resample much faster, and has little or no effect on the
            # clips [NEED TO SHOW THIS] output by the detector, since
            # the change to the resampled rate is small (only about a
            # quarter of a percent), and the detector is fairly
            # insensitive to small changes in the frequency and duration
            # of NFCs. We account for such sample rate substitutions
            # when computing the start index in the input signal of a
            # detected clip in the `_notify_listener_of_clips` method,
            # below.
            #
            # The lack of rigor inherent in this trick will always make
            # the processing of 22050 Hz and 44100 Hz input a little
            # questionable. In the future, I hope to obviate the trick by
            # implementing faster but proper resampling of 22050 Hz and
            # 44100 Hz input.
            if self._input_sample_rate == 22050:
                self._purported_input_sample_rate = 22000
            elif self._input_sample_rate == 44100:
                self._purported_input_sample_rate = 44000
            else:
                self._purported_input_sample_rate = self._input_sample_rate

            # start_time = time.time()

            samples = resampling_utils.resample_to_24000_hz(
                samples, self._purported_input_sample_rate)

            # processing_time = time.time() - start_time
            # input_duration = input_length / self._input_sample_rate
            # rate = input_duration / processing_time
            # print((
            #     'Resampled {:.1f} seconds of input in {:.1f} seconds, '
            #     'or {:.1f} times faster than real time.').format(
            #         input_duration, processing_time, rate))

        else:
            # don't need to resample input

            self._purported_input_sample_rate = self._input_sample_rate

        self._waveforms = _get_analysis_records(
            samples, self._classifier_waveform_length, self._hop_size)

        #         print('Scoring chunk waveforms...')
        #         start_time = time.time()

        scores = classifier_utils.score_dataset_examples(
            self._estimator, self._create_dataset)

        #         elapsed_time = time.time() - start_time
        #         num_waveforms = self._waveforms.shape[0]
        #         rate = num_waveforms / elapsed_time
        #         print((
        #             'Scored {} waveforms in {:.1f} seconds, a rate of {:.1f} '
        #             'waveforms per second.').format(
        #                 num_waveforms, elapsed_time, rate))

        if _SCORE_OUTPUT_ENABLED:
            self._score_file_writer.write(samples, scores)

        for threshold in self._thresholds:
            peak_indices = signal_utils.find_peaks(scores, threshold)
            peak_scores = scores[peak_indices]
            self._notify_listener_of_clips(peak_indices, peak_scores,
                                           input_length, threshold)

        self._input_chunk_start_index += input_length
def create_output_files(night_file_infos):

    max_length = int(
        signal_utils.seconds_to_frames(MAX_NIGHT_DURATION * 3600,
                                       INPUT_SAMPLE_RATE))
    input_samples = np.empty(max_length, dtype='int16')

    night_intervals = sorted(night_file_infos.keys())

    for night_interval in night_intervals:

        partitions = night_file_infos[night_interval]

        for file_infos in partitions:

            start_time = time.time()
            partition_input_length = 0

            first_input_start = file_infos[0][1].start

            # Get output file start time.
            if night_interval.start >= first_input_start:
                output_start_time = night_interval.start
            else:
                output_start_time = first_input_start

            output_file_name = create_output_file_name(output_start_time)
            print('Creating recording {}...'.format(output_file_name))

            partition_read_interval = get_partition_read_interval(
                night_interval, first_input_start)

            input_start_index = 0

            for i, (input_file_path, _, input_length) in enumerate(file_infos):

                input_end_index = input_start_index + input_length
                input_interval = Interval(input_start_index, input_end_index)

                # Get read interval as partition indices.
                read_interval = intersect_intervals(input_interval,
                                                    partition_read_interval)

                # Get read interval as input file indices.
                read_interval = Interval(
                    read_interval.start - input_start_index,
                    read_interval.end - input_start_index)

                read_size = read_interval.end - read_interval.start

                with soundfile.SoundFile(str(input_file_path)) as sound_file:

                    if read_interval.start != 0:
                        sound_file.seek(read_interval.start)

                    samples = sound_file.read(read_size, dtype='int16')

                    start = partition_input_length
                    end = partition_input_length + read_size
                    input_samples[start:end] = samples

                print('    Reading {} {} {} {} {} {}...'.format(
                    i, input_file_path.name, input_length, read_interval.start,
                    read_interval.end, read_size))

                partition_input_length += read_size

                input_start_index += input_length

            duration = partition_input_length / INPUT_SAMPLE_RATE / 3600
            print('    Resampling {:.1f} hours of audio...'.format(duration))
            output_samples = resampling_utils.resample_to_24000_hz(
                input_samples[:partition_input_length], INPUT_SAMPLE_RATE)

            output_samples.shape = (1, len(output_samples))
            output_file_path = OUTPUT_DIR_PATH / output_file_name
            audio_file_utils.write_wave_file(str(output_file_path),
                                             output_samples,
                                             OUTPUT_SAMPLE_RATE)

            end_time = time.time()
            elapsed_time = end_time - start_time
            partition_duration = partition_input_length / INPUT_SAMPLE_RATE
            rate = partition_duration / elapsed_time
            print(
                ('    Processed {:.1f} seconds of audio in {:.1f} seconds, or '
                 '{:.1f} times faster than real time.').format(
                     partition_duration, elapsed_time, rate))