def set_name(cls, objs, name, extras={}):
        if len(objs) != 1:
            raise CustomAssertionError(
                'Can\'t set the same name to more than 1 song.')
        obj = objs[0]

        is_unique = not AudioFile.objects.filter(database=obj.database,
                                                 name=name).exists()
        if not is_unique:
            raise CustomAssertionError('File {} already exists'.format(name))

        # If audio file is original, change the actual audio files' names as well
        if obj.is_original():
            old_name = obj.name
            old_name_wav = wav_path(obj)
            old_name_compressed = audio_path(obj,
                                             settings.AUDIO_COMPRESSED_FORMAT)

            try:
                obj.name = name
                obj.save()
                new_name_wav = wav_path(obj)
                new_name_compressed = audio_path(
                    obj, settings.AUDIO_COMPRESSED_FORMAT)

                os.rename(old_name_wav, new_name_wav)
                os.rename(old_name_compressed, new_name_compressed)
            except Exception as e:
                obj.name = old_name
                obj.save()
                raise CustomAssertionError('Error changing name')
        else:
            obj.name = name
            obj.save()
Exemplo n.º 2
0
def delete_audio_files_async(*args, **kwargs):
    audio_files = AudioFile.fobjs.filter(active=False)

    # Mark all segments belong to these audio files as to be deleted, then delete them
    segments = Segment.objects.filter(audio_file__in=audio_files)
    segments.update(active=False)
    delete_segments_async()

    # Now delete the audio files
    audio_files_ids = audio_files.values_list('id', flat=True)

    ExtraAttrValue.objects.filter(attr__klass=AudioFile.__name__,
                                  owner_id__in=audio_files_ids).delete()

    # If the audio file is not original - just delete the model
    # Otherwise, search if there are clones. If there are, make one of the clones the new original
    # If there is no clone, delete the real audio files (wav and mp4)

    for af in audio_files:
        if af.original is None:
            clones = AudioFile.objects.filter(original=af).order_by('id')
            first_clone = clones.first()

            # If there are clones, make the first clone original of the remaining
            # Also move the real audio file to the database's folder of the clone
            if first_clone:
                old_wav_file = wav_path(af)
                old_mp4_file = audio_path(af, settings.AUDIO_COMPRESSED_FORMAT)

                clones.update(original=first_clone)
                first_clone.original = None
                first_clone.save()

                new_wav_file = wav_path(first_clone)
                new_mp4_file = audio_path(first_clone,
                                          settings.AUDIO_COMPRESSED_FORMAT)

                os.rename(old_wav_file, new_wav_file)
                os.rename(old_mp4_file, new_mp4_file)

            # Otherwise, delete the audio files too
            else:
                wav = wav_path(af)
                mp4 = audio_path(af, settings.AUDIO_COMPRESSED_FORMAT)
                if os.path.isfile(wav):
                    os.remove(wav)
                if os.path.isfile(mp4):
                    os.remove(mp4)
        af.delete()
def spect_from_seg(seg, extractor):
    af = seg.audio_file
    wav_file_path = wav_path(af)
    fs = af.fs
    start = seg.start_time_ms
    end = seg.end_time_ms
    return extractor(wav_file_path, fs=fs, start=start, end=end)
    def handle(self, testfile, fmt, *args, **options):

        if testfile is None:
            audio_files = AudioFile.objects.filter(original=None)

            conversion_list = []

            for af in audio_files:
                wav_file_path = wav_path(af)
                conversion_scheme = dict(wav=wav_file_path)

                target_file_path = audio_path(af, fmt)
                conversion_scheme['other'] = (fmt, target_file_path)

                conversion_list.append(conversion_scheme)

            bar = Bar('Converting song ...', max=len(conversion_list))
            for conversion_scheme in conversion_list:
                convert(conversion_scheme, print_stats=False)

                bar.next()
            bar.finish()
        else:
            target_file_path = '/tmp/test-compress-wav.' + fmt
            conversion_scheme = dict(wav=testfile,
                                     other=(fmt, target_file_path))

            convert(conversion_scheme)

            os.remove(target_file_path)
Exemplo n.º 5
0
def extract_syllables(database_name, spect_dir, format):
    database = get_or_error(Database, dict(name__iexact=database_name))
    segments = Segment.objects.filter(audio_file__database=database)

    extractor = extractors[format]

    audio_file_dict = {}
    for seg in segments:
        af = seg.audio_file
        if af in audio_file_dict:
            info = audio_file_dict[af]
        else:
            info = []
            audio_file_dict[af] = info
        info.append((seg.id, seg.start_time_ms, seg.end_time_ms))

    bar = Bar('Exporting segments ...', max=len(segments))

    for af, info in audio_file_dict.items():
        wav_file_path = wav_path(af)
        fs = af.fs

        for sid, start, end in info:
            spect_name = '{}.{}'.format(sid, format)
            spect_path = os.path.join(spect_dir, spect_name)

            if not os.path.isfile(spect_path):
                extractor(wav_file_path, fs, start, end, spect_path)

            bar.next()
    bar.finish()
def extract_spectrogram(audio_file_id):
    """
    Extract raw sepectrograms for all segments (Not the masked spectrogram from Luscinia) of an audio file
    :param audio_file:
    :return:
    """
    audio_file = AudioFile.objects.get(id=audio_file_id)
    segs_info = Segment.objects.filter(audio_file=audio_file).values_list(
        'tid', 'start_time_ms', 'end_time_ms')

    missing_segs_info = []

    for tid, start, end in segs_info:
        seg_spect_path = spect_fft_path(tid, 'syllable')
        ensure_parent_folder_exists(seg_spect_path)
        if not os.path.isfile(seg_spect_path):
            missing_segs_info.append((seg_spect_path, start, end))

    if len(missing_segs_info) > 0:
        filepath = wav_path(audio_file)

        fs, sig = wav_2_mono(filepath)
        duration_ms = len(sig) * 1000 / fs

        _, _, s = signal.stft(sig,
                              fs=fs,
                              window=window,
                              noverlap=noverlap,
                              nfft=window_size,
                              return_onesided=True)
        file_spect = np.abs(s * scale)

        height, width = np.shape(file_spect)
        file_spect = np.flipud(file_spect)

        file_spect = np.log10(file_spect)
        file_spect = ((file_spect - global_min_spect_pixel) / interval64)
        file_spect[np.isinf(file_spect)] = 0
        file_spect = file_spect.astype(np.int)

        file_spect = file_spect.reshape((width * height, ), order='C')
        file_spect[file_spect >= 64] = 63
        file_spect_rgb = np.empty((height, width, 3), dtype=np.uint8)
        file_spect_rgb[:, :, 0] = cm_red[file_spect].reshape(
            (height, width)) * 255
        file_spect_rgb[:, :, 1] = cm_green[file_spect].reshape(
            (height, width)) * 255
        file_spect_rgb[:, :, 2] = cm_blue[file_spect].reshape(
            (height, width)) * 255

        for path, start, end in missing_segs_info:
            roi_start = int(start / duration_ms * width)
            roi_end = int(np.ceil(end / duration_ms * width))

            seg_spect_rgb = file_spect_rgb[:, roi_start:roi_end, :]
            seg_spect_img = Image.fromarray(seg_spect_rgb)

            seg_spect_img.save(path, format='PNG')
Exemplo n.º 7
0
def extract_segment_features_for_segments(task,
                                          sids,
                                          features,
                                          f2bs,
                                          force=False):
    preserved = Case(*[When(id=id, then=pos) for pos, id in enumerate(sids)])
    segments = Segment.objects.filter(id__in=sids).order_by(preserved)
    tids = np.array(segments.values_list('tid', flat=True), dtype=np.int32)

    f2tid2fvals = {}
    f2af2segments = {}
    n_calculations = 0

    for feature in features:
        index_filename, value_filename = f2bs[feature]

        if force:
            tids_target = tids
        else:
            existing_tids = binstorage.retrieve_ids(index_filename)
            sorted_ids, sort_order = np.unique(existing_tids,
                                               return_index=True)

            non_existing_idx = np.where(
                np.logical_not(np.isin(tids, sorted_ids)))
            missing_tids = tids[non_existing_idx]
            tids_target = missing_tids

        af_to_segments = {}

        for segment in segments.order_by('audio_file', 'start_time_ms'):
            tid = segment.tid
            af = segment.audio_file
            if tid in tids_target:
                if af not in af_to_segments:
                    af_to_segments[af] = []
                af_to_segments[af].append(
                    (tid, segment.start_time_ms, segment.end_time_ms))

        f2af2segments[feature] = af_to_segments
        n_calculations += len(af_to_segments)

    if n_calculations:
        task.start(limit=n_calculations)
        for feature, af_to_segments in f2af2segments.items():
            _tids = []
            _fvals = []
            for af, segs_info in af_to_segments.items():
                wav_file_path = wav_path(af)
                __tids, __fvals = extract_segment_feature_for_audio_file(
                    wav_file_path, segs_info, feature)
                _tids += __tids
                _fvals += __fvals
                task.tick()
            f2tid2fvals[feature] = (_tids, _fvals)

    return tids, f2tid2fvals
Exemplo n.º 8
0
    def handle(self, *args, **options):
        # audio_file_path = '/Users/yfukuzaw/workspace/koe/user_data/audio/wav/52/201911142.wav'
        # begin_ms = 234
        # end_ms = 544
        # fs, length = get_wav_info(audio_file_path)
        # read_segment(audio_file_path, beg_ms=begin_ms, end_ms=end_ms, mono=True, normalised=True)

        # audio_files = AudioFile.objects.filter(fs__gt=320000)
        #
        # for audio_file in audio_files:
        #     segments = Segment.objects.filter(audio_file=audio_file)
        #     ratio = 48000 / audio_file.fs
        #     duration_ms = int(audio_file.length * 1000 /audio_file.fs)
        #     for segment in segments:
        #         beg_ms = segment.start_time_ms
        #         end_ms = segment.end_time_ms
        #
        #         new_beg = max(0, int(np.round(beg_ms * ratio)))
        #         new_end = min(int(np.round(end_ms * ratio)), duration_ms)
        #
        #         segment.start_time_ms = new_beg
        #         segment.end_time_ms = new_end
        #
        #         sid = segment.id
        #
        #         print('Change syllable #{} from [{} - {}] to [{} - {}]'.format(sid, beg_ms, end_ms, new_beg, new_end))
        #
        #         segment.save()

        audio_files = AudioFile.objects.all()
        num_segments = Segment.objects.all().count()
        num_tested = 0

        bar = Bar('Testing...', max=num_segments)
        for audio_file in audio_files:
            segments = Segment.objects.filter(audio_file=audio_file)
            num_segments = segments.count()
            if num_tested + num_segments < already_tested:
                num_tested += num_segments
                bar.next(num_segments)
                continue

            audio_file_path = wav_path(audio_file)
            for segment in segments:
                begin_ms = segment.start_time_ms
                end_ms = segment.end_time_ms
                if os.path.isfile(audio_file_path):
                    read_segment(audio_file_path,
                                 beg_ms=begin_ms,
                                 end_ms=end_ms,
                                 mono=True,
                                 normalised=True)
                bar.next()
                num_tested += 1

        bar.finish()
Exemplo n.º 9
0
def extract_spectrogram(audio_file, segs_info):
    """
    Extract raw sepectrograms for all segments (Not the masked spectrogram from Luscinia) of an audio file
    :param audio_file:
    :return:
    """
    filepath = wav_path(audio_file)

    fs, duration = get_wav_info(filepath)
    if not os.path.isfile(filepath):
        raise CustomAssertionError("File {} not found".format(audio_file.name))

    for tid, start, end in segs_info:
        seg_spect_path = get_abs_spect_path(tid)
        ensure_parent_folder_exists(seg_spect_path)

        sig = read_segment(filepath,
                           beg_ms=start,
                           end_ms=end,
                           mono=True,
                           normalised=True,
                           return_fs=False,
                           retype=True,
                           winlen=window_size)
        _, _, s = signal.stft(sig,
                              fs=fs,
                              window=window,
                              noverlap=noverlap,
                              nfft=window_size,
                              return_onesided=True)
        spect = np.abs(s * scale)

        height, width = np.shape(spect)
        spect = np.flipud(spect)

        spect = np.log10(spect)
        spect = ((spect - global_min_spect_pixel) / interval64)
        spect[np.isinf(spect)] = 0
        spect = spect.astype(np.int)

        spect = spect.reshape((width * height, ), order='C')
        spect[spect >= 64] = 63
        spect_rgb = np.empty((height, width, 3), dtype=np.uint8)
        spect_rgb[:, :, 0] = cm_red[spect].reshape((height, width)) * 255
        spect_rgb[:, :, 1] = cm_green[spect].reshape((height, width)) * 255
        spect_rgb[:, :, 2] = cm_blue[spect].reshape((height, width)) * 255

        # roi_start = int(start / duration_ms * width)
        # roi_end = int(np.ceil(end / duration_ms * width))

        # seg_spect_rgb = file_spect_rgb[:, roi_start:roi_end, :]
        seg_spect_img = Image.fromarray(spect_rgb)

        seg_spect_img.save(seg_spect_path, format='PNG')
        celerylogger.info('spectrogram {} created'.format(seg_spect_path))
Exemplo n.º 10
0
def extract_psd(extractor, audio_file):
    """
    Extract audio file's spectrogram given its ID
    :param audio_file:
    :param extractor:
    :return: the normalised spectrogram (spectrogram - wise, not dimension wise)
    """
    wav_file_path = wav_path(audio_file)
    spect = extractor(wav_file_path, audio_file.fs, 0, None)
    spect_min = np.min(spect)
    spect_max = np.max(spect)

    return (spect - spect_min) / (spect_max - spect_min)
Exemplo n.º 11
0
def spect_from_seg(seg, extractor):
    af = seg.audio_file
    wav_file_path = wav_path(af)
    fs = af.fs
    start = seg.start_time_ms
    end = seg.end_time_ms
    database = af.database
    return extractor(wav_file_path,
                     fs=fs,
                     start=start,
                     end=end,
                     nfft=database.nfft,
                     noverlap=database.noverlap)
Exemplo n.º 12
0
    def handle(self, dbs, *args, **options):
        # Correct false wav info
        for af in AudioFile.objects.all():
            wav_file_path = wav_path(af, 'wav')
            fs, length = get_wav_info(wav_file_path)
            if fs != af.fs or length != af.length:
                print(
                    'Correct file {}, originally length={} fs={}, now length={}, fs={}'
                    .format(af.name, af.length, af.fs, length, fs))
                af.fs = fs
                af.length = length
                af.save()

        conns = None
        try:
            conns = get_dbconf(dbs)
            for pop in conns:
                conn = conns[pop]
                cur = conn.cursor()
                bitrate = 16
                song_cur = conn.cursor(
                    cursor_factory=psycopg2.extras.RealDictCursor)

                song_cur.execute(
                    'select w.framesize, w.stereo, w.samplerate, w.ssizeinbits, w.songid, s.name '
                    'from wavs w join songdata s on w.songid=s.id where w.ssizeinbits={}'
                    .format(bitrate))

                songs = song_cur.fetchall()
                for song in songs:
                    song_name = song['name']
                    # Import WAV data and save as WAV and MP3 files
                    wav_file_path = '/tmp/{}'.format(song_name)
                    mp3_file_path = '/tmp/{}.mp3'.format(song_name)
                    fs, length = import_pcm(song, cur, song_name,
                                            wav_file_path, mp3_file_path)

                    fs1, length1 = get_wav_info(wav_file_path)

                    if fs != fs1 or length != length1:
                        print('-------SHIT--------')

                    print(
                        'Song {} length = {} fs = {} time = {}, length1 = {} fs1 = {} time1 = {}'
                        .format(song_name, length, fs, length / fs, length1,
                                fs1, length1 / fs1))
        finally:
            for dbconf in conns:
                conn = conns[dbconf]
                if conn is not None:
                    conn.close()
Exemplo n.º 13
0
def import_pcm(song, cur, audio_file, wav_file_path=None, compressed_url=None):
    if wav_file_path is None:
        wav_file_path = wav_path(audio_file)
    if compressed_url is None:
        compressed_url = audio_path(audio_file,
                                    settings.AUDIO_COMPRESSED_FORMAT)

    if not os.path.isfile(wav_file_path):
        # print('Importing {}'.format(song_name))
        song_id = song['songid']
        cur.execute('select wav from wavs where songid={};'.format(song_id))

        data = cur.fetchone()
        raw_pcm = str_to_bytes(data[0])

        nchannels = song['stereo']
        bitrate = int(song['ssizeinbits'])
        fs = int(song['samplerate'])

        byte_per_frame = int(bitrate / 8)
        nframes_all_channel = int(len(raw_pcm) / byte_per_frame)
        nframes_per_channel = int(nframes_all_channel / nchannels)
        length = nframes_per_channel
        ensure_parent_folder_exists(wav_file_path)

        if bitrate == 24:
            array1 = np.frombuffer(raw_pcm, dtype=np.ubyte)
            array2 = array1.reshape((nframes_per_channel, nchannels,
                                     byte_per_frame)).astype(np.uint8)
            wf.write_24b(wav_file_path, fs, array2)
        else:
            data = array.array('i', raw_pcm)
            sound = pydub.AudioSegment(data=data,
                                       sample_width=byte_per_frame,
                                       frame_rate=fs,
                                       channels=nchannels)
            sound.export(wav_file_path, 'wav')
    else:
        fs, length = get_wav_info(wav_file_path)

    if not os.path.isfile(compressed_url):
        ensure_parent_folder_exists(compressed_url)
        sound = pydub.AudioSegment.from_wav(wav_file_path)
        sound.export(compressed_url, format=settings.AUDIO_COMPRESSED_FORMAT)

    return fs, length
    def handle(self, *args, **options):
        segment_csv = options['segment_csv']
        folder = options['folder']
        mkdirp(folder)

        with open(segment_csv, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f, delimiter='\t')
            supplied_fields = reader.fieldnames

            # The first field is always id, the second field is always the primary label type
            primary_label_level = supplied_fields[1]

            sid_to_label = {
                int(row['id']): row[primary_label_level]
                for row in reader
            }

        sids = sid_to_label.keys()
        audio_file_dict = {}
        for segment in Segment.objects.filter(id__in=sids):
            af = segment.audio_file
            if af in audio_file_dict:
                info = audio_file_dict[af]
            else:
                info = []
                audio_file_dict[af] = info
            info.append(
                (segment.id, segment.start_time_ms, segment.end_time_ms))

        bar = Bar('Exporting segments ...', max=len(sid_to_label))
        for af, info in audio_file_dict.items():
            wav_file_path = wav_path(af)
            fullwav = pydub.AudioSegment.from_wav(wav_file_path)

            for id, start, end in info:
                audio_segment = fullwav[start:end]

                filename = '{}.wav'.format(id)
                filepath = os.path.join(folder, filename)
                with open(filepath, 'wb') as f:
                    audio_segment.export(f, format='wav')
                bar.next()
        bar.finish()
Exemplo n.º 15
0
    def handle(self, testfile, tofile, fmt, *args, **options):

        if fmt is None:
            if tofile is None:
                raise Exception("Either format or --to-file must be provided")
            else:
                fmt = tofile.split('.')[-1]

        print("Format = {}".format(fmt))

        if testfile is None:
            audio_files = AudioFile.objects.filter(original=None)

            conversion_list = []

            for af in audio_files:
                wav_file_path = wav_path(af)
                conversion_scheme = dict(wav=wav_file_path)

                target_file_path = audio_path(af, fmt)
                conversion_scheme['other'] = (fmt, target_file_path)

                conversion_list.append(conversion_scheme)

            bar = Bar('Converting song ...', max=len(conversion_list))
            for conversion_scheme in conversion_list:
                convert(conversion_scheme, print_stats=False)

                bar.next()
            bar.finish()
        else:

            if tofile is None:
                target_file_path = '/tmp/test-compress-wav.' + fmt
                conversion_scheme = dict(wav=testfile,
                                         other=(fmt, target_file_path))
                convert(conversion_scheme)
                os.remove(target_file_path)
            else:
                conversion_scheme = dict(wav=testfile, other=(fmt, tofile))
                convert(conversion_scheme)
Exemplo n.º 16
0
def extract_psd(extractor, audio_file, normalise=True):
    """
    Extract audio file's spectrogram given its ID
    :param audio_file:
    :param extractor:
    :return: the normalised spectrogram (spectrogram - wise, not dimension wise)
    """
    wav_file_path = wav_path(audio_file)
    database = audio_file.database
    spect = extractor(wav_file_path,
                      audio_file.fs,
                      start=0,
                      end=None,
                      nfft=database.nfft,
                      noverlap=database.noverlap)
    spect_min = np.min(spect)

    if normalise:
        spect_max = np.max(spect)

        return (spect - spect_min) / (spect_max - spect_min)
    else:
        return spect
Exemplo n.º 17
0
def extract_segment_features_for_segments(runner, sids, features, force=False):
    segments = Segment.objects.filter(id__in=sids)
    tids = np.array(segments.values_list('tid', flat=True), dtype=np.int32)

    if len(tids) == 0:
        return

    tid_min = tids.min()
    tid_max = tids.max()

    storage_loc_template = get_storage_loc_template()

    f2af2segments = {}
    n_calculations = 0

    for feature in features:
        storage_loc = storage_loc_template.format(feature.name)
        mkdirp(storage_loc)

        if force:
            tids_target = tids
        else:
            existing_tids = bs.retrieve_ids(storage_loc, (tid_min, tid_max))
            sorted_ids, sort_order = np.unique(existing_tids,
                                               return_index=True)

            non_existing_idx = np.where(
                np.logical_not(np.isin(tids, sorted_ids)))
            missing_tids = tids[non_existing_idx]
            tids_target = missing_tids

        af_to_segments = {}

        vl = segments.filter(tid__in=tids_target).order_by('audio_file', 'start_time_ms')\
                     .values_list('tid', 'audio_file', 'start_time_ms', 'end_time_ms',
                                  'audio_file__database__nfft', 'audio_file__database__noverlap',
                                  'audio_file__database__lpf', 'audio_file__database__hpf')

        if len(vl):
            for tid, afid, start_time_ms, end_time_ms, nfft, noverlap, lpf, hpf in vl:
                if afid not in af_to_segments:
                    af_to_segments[afid] = []
                af_to_segments[afid].append((tid, start_time_ms, end_time_ms,
                                             nfft, noverlap, lpf, hpf))

            f2af2segments[feature] = af_to_segments
            n_calculations += len(tids_target)

    if n_calculations:
        runner.start(limit=n_calculations)
        for ind, (feature, af_to_segments) in enumerate(f2af2segments.items()):
            _tids = []
            _fvals = []
            storage_loc = storage_loc_template.format(feature.name)

            afids = list(af_to_segments.keys())
            af_lookup = {
                x.id: x
                for x in AudioFile.objects.filter(id__in=afids)
            }
            for afid, segs_info in af_to_segments.items():
                af = af_lookup[afid]
                wav_file_path = wav_path(af)
                try:
                    __tids, __fvals = extract_segment_feature_for_audio_file(
                        wav_file_path, segs_info, feature)
                except Exception as e:
                    raise Exception(
                        'Error extracting [{}] for file {}. Error message: {}'.
                        format(feature.name, af.name, str(e)))
                #
                _tids += __tids
                _fvals += __fvals

                if len(_tids) >= 100:
                    bs.store(_tids, _fvals, storage_loc)
                    runner.tick(len(_tids))
                    _tids = []
                    _fvals = []

            if len(_tids):
                bs.store(_tids, _fvals, storage_loc)
                runner.tick(len(_tids))
Exemplo n.º 18
0
def extract_xfcc(segments, config, is_pattern=False, method_name='mfcc'):
    nsegs = len(segments)

    lower = int(config.get('lower', 20))
    upper = int(config.get('upper', 8000))
    ndelta = int(config.get('delta', 0))
    nfilt = int(config.get('nfilt', 26))
    nmfcc = int(config.get('nmfcc', nfilt / 2))

    assert nmfcc <= nfilt
    xtrargs = {
        'name': method_name,
        'lowfreq': lower,
        'highfreq': upper,
        'numcep': nmfcc,
        'nfilt': nfilt
    }
    if 'cepsfunc' in config:
        xtrargs['cepsfunc'] = config['cepsfunc']

    if method_name in ['mfcc', 'bfcc', 'lfcc']:
        method = xfcc
    elif method_name == 'gfcc':
        lowhear = int(config.get('lowhear', 500))
        hihear = int(config.get('hihear', 12000))
        xtrargs['lowhear'] = lowhear
        xtrargs['hihear'] = hihear
        method = xfcc
    elif method_name in ['mfc', 'bfc', 'lfc']:
        method = xfc
    elif method_name == 'gfc':
        lowhear = int(config.get('lowhear', 500))
        hihear = int(config.get('hihear', 12000))
        xtrargs['lowhear'] = lowhear
        xtrargs['hihear'] = hihear
        method = xfc
    else:
        raise Exception('No such method: {}'.format(method_name))

    lower = xtrargs['lowfreq']
    upper = xtrargs['highfreq']
    nmfcc = xtrargs['numcep']
    bar = Bar('Extracting {} Range={}~{}, nCoefs={}, delta={}'.format(
        method_name, lower, upper, nmfcc, ndelta),
              max=nsegs,
              suffix='%(index)d/%(max)d %(elapsed)ds/%(eta)ds')

    if is_pattern:
        cache = {}

        original_segment_ids = np.array(segments.values_list('id', flat=True),
                                        dtype=np.int32)

        # Sort by duration so that we can cache them effectively
        segments = segments.annotate(duration=F('end_time_ms') -
                                     F('start_time_ms')).order_by('duration')
        duration_sorted_segment_ids = np.array(segments.values_list('id',
                                                                    flat=True),
                                               dtype=np.int32)

        # We need the index array in order to restore the original order:
        ascending_sorted_idx = np.sort(original_segment_ids)

        ascending_sorted_to_original_order = np.searchsorted(
            ascending_sorted_idx, original_segment_ids)
        duration_sorted_to_ascending_sorted_order = np.argsort(
            duration_sorted_segment_ids)
        duration_sorted_to_original_order = duration_sorted_to_ascending_sorted_order[
            ascending_sorted_to_original_order]

        sorted_mfcc = []

        segments_info = segments.values_list('duration', 'audio_file__fs')
        for duration, fs in segments_info:
            if duration not in cache:
                cache = {duration: {}}
            if fs not in cache[duration]:
                chirps = []
                for amp_profile_name in amp_profile_names:
                    for f0_profile_name in f0_profile_names:
                        chirp = generate_chirp(f0_profile_name,
                                               amp_profile_name, duration, fs)
                        chirps.append(chirp)
                cache[duration][fs] = chirps

            if 'ft' not in cache[duration]:
                chirps = cache[duration][fs]
                mfcc_fts = []

                for chirp in chirps:
                    mfcc_ft = _extract_xfcc(chirp, fs, method, xtrargs, ndelta)
                    mfcc_fts.append(mfcc_ft)

                cache[duration]['ft'] = mfcc_fts

            else:
                mfcc_fts = cache[duration]['ft']

            sorted_mfcc.append(mfcc_fts)
            bar.next()
        mfccs = np.array(sorted_mfcc)[duration_sorted_to_original_order]

    else:
        mfccs = []
        segment_data = {}

        for segment in segments:
            fs = segment.audio_file.fs
            file_url = wav_path(segment.audio_file)
            sig = wavfile.read_segment(file_url,
                                       segment.start_time_ms,
                                       segment.end_time_ms,
                                       mono=True)
            mfcc_fts = _extract_xfcc(sig, fs, method, xtrargs, ndelta)

            segment_data['s' + str(id)] = dict(sig=sig, fs=fs, ft=mfcc_fts)

            mfccs.append(mfcc_fts)
            bar.next()
        mfccs = np.array(mfccs)

        import scipy.io as sio
        sio.savemat('/tmp/segment_data.mat', segment_data)

    bar.finish()
    return mfccs
Exemplo n.º 19
0
def extract_spectrogram():
    """
    Extract raw sepectrograms for all segments (Not the masked spectrogram from Luscinia)
    :return:
    """
    audio_to_segs = {}
    for segment in Segment.objects.all():
        audio_file = segment.audio_file
        if audio_file not in audio_to_segs:
            audio_to_segs[audio_file] = [(segment.id, segment.start_time_ms,
                                          segment.end_time_ms)]
        else:
            audio_to_segs[audio_file].append(
                (segment.id, segment.start_time_ms, segment.end_time_ms))

    n = len(audio_to_segs)
    bar = Bar('Exporting spects ...', max=n)

    for audio_file, seg_list in audio_to_segs.items():
        count = 0
        for seg_id, start, end in seg_list:
            seg_spect_path = spect_fft_path(seg_id, 'syllable')
            if os.path.isfile(seg_spect_path):
                count += 1
        if count == len(seg_list):
            bar.next()
            continue

        filepath = wav_path(audio_file)

        fs, sig = wav_2_mono(filepath)
        duration_ms = len(sig) * 1000 / fs

        _, _, s = signal.stft(sig,
                              fs=fs,
                              window=window,
                              noverlap=noverlap,
                              nfft=window_size,
                              return_onesided=True)
        file_spect = np.abs(s * scale)

        height, width = np.shape(file_spect)
        file_spect = np.flipud(file_spect)

        try:

            file_spect = np.log10(file_spect)
            file_spect = ((file_spect - global_min_spect_pixel) / interval64)
            file_spect[np.isinf(file_spect)] = 0
            file_spect = file_spect.astype(np.int)

            file_spect = file_spect.reshape((width * height, ), order='C')
            file_spect[file_spect >= 64] = 63
            file_spect_rgb = np.empty((height, width, 3), dtype=np.uint8)
            file_spect_rgb[:, :, 0] = cm_red[file_spect].reshape(
                (height, width)) * 255
            file_spect_rgb[:, :, 1] = cm_green[file_spect].reshape(
                (height, width)) * 255
            file_spect_rgb[:, :, 2] = cm_blue[file_spect].reshape(
                (height, width)) * 255

            file_spect_img = Image.fromarray(file_spect_rgb)
            file_spect_path = spect_fft_path(audio_file.id, 'song')
            ensure_parent_folder_exists(file_spect_path)
            if not os.path.isfile(file_spect_path):
                file_spect_img.save(file_spect_path, format='PNG')

            for seg_id, start, end in seg_list:
                roi_start = int(start / duration_ms * width)
                roi_end = int(np.ceil(end / duration_ms * width))

                seg_spect_rgb = file_spect_rgb[:, roi_start:roi_end, :]
                seg_spect_img = Image.fromarray(seg_spect_rgb)
                seg_spect_path = spect_fft_path(seg_id, 'syllable')
                ensure_parent_folder_exists(seg_spect_path)

                if not os.path.isfile(seg_spect_path):
                    seg_spect_img.save(seg_spect_path, format='PNG')

        except Exception as e:
            warning('Error occured at song id: {}'.format(audio_file.id))
            raise e

        bar.next()
    bar.finish()
    def handle(self, *args, **options):
        database_name = options['database_name']
        annotator_name = options['annotator_name']
        label_level = options['label_level']
        save_to = options['save_to']
        format = options['format']
        min_occur = options['min_occur']
        num_instances = options['num_instances']
        normalised = options['normalised']

        if num_instances is not None:
            assert num_instances >= min_occur, 'num_instances must be >= min_occur'

        database = get_or_error(Database, dict(name__iexact=database_name))
        annotator = get_or_error(User, dict(username__iexact=annotator_name))
        segments = Segment.objects.filter(audio_file__database=database)

        sids = np.array(list(segments.order_by('id').values_list('id', flat=True)))

        labels, no_label_ids = get_labels_by_sids(sids, label_level, annotator, min_occur)
        if len(no_label_ids) > 0:
            sids, _, labels = exclude_no_labels(sids, None, labels, no_label_ids)

        if num_instances:
            sids, _, labels = select_instances(sids, None, labels, num_instances)

        unique_labels, enum_labels = np.unique(labels, return_inverse=True)
        fold_indices = get_kfold_indices(enum_labels, min_occur)

        segments_info = {sid: (label, label_enum, fold_ind) for sid, label, label_enum, fold_ind in
                         zip(sids, labels, enum_labels, fold_indices)}

        segs = Segment.objects.filter(id__in=sids)

        audio_file_dict = {}
        for seg in segs:
            af = seg.audio_file
            if af in audio_file_dict:
                info = audio_file_dict[af]
            else:
                info = []
                audio_file_dict[af] = info
            info.append((seg.id, seg.start_time_ms, seg.end_time_ms))

        audio_info = []

        bar = Bar('Exporting segments ...', max=len(segs))
        metadata_file_path = os.path.join(save_to, 'metadata.tsv')

        extractor = extractors.get(format, None)

        for af, info in audio_file_dict.items():
            wav_file_path = wav_path(af)
            fullwav = pydub.AudioSegment.from_wav(wav_file_path)

            for id, start, end in info:
                label, label_enum, fold_ind = segments_info[id]

                audio_segment = fullwav[start: end]

                filename = '{}.{}'.format(id, format)

                filepath = os.path.join(save_to, filename)
                ensure_parent_folder_exists(filepath)

                if not os.path.isfile(filepath):
                    if extractor is not None:
                        extractor(wav_file_path, af.fs, start, end, filepath)
                    else:
                        with open(filepath, 'wb') as f:
                            audio_segment.export(f, format=format)

                audio_info.append(
                    (id, filename, label, label_enum, fold_ind)
                )

                bar.next()

        with open(metadata_file_path, 'w') as f:
            f.write('id\tfilename\tlabel\tlabel_enum\tfold\n')
            for id, filename, label, label_enum, fold_ind in audio_info:
                f.write('{}\t{}\t{}\t{}\t{}\n'.format(id, filename, label, label_enum, fold_ind))

        bar.finish()

        if normalised:
            norm_folder = os.path.join(save_to, 'normalised')
            mkdirp(norm_folder)
            global_min, global_max = extract_global_min_max(save_to, format)
            save_global_min_max(norm_folder, global_min, global_max)
            normalise_all(save_to, norm_folder, format, global_min, global_max)