Пример #1
0
def copy_audio_files(request):
    """
    Copy files from the source database to the target database, not copying the actual files, but everything database-
    wise is copied, so the copies don't affect the original.
    :param request:
    :return:
    """
    user = request.user
    ids = json.loads(get_or_error(request.POST, 'ids'))
    target_database_name = get_or_error(request.POST, 'target-database-name')
    source_database_id = get_or_error(request.POST, 'source-database-id')
    target_database = get_or_error(Database, dict(name=target_database_name))
    source_database = get_or_error(Database, dict(id=source_database_id))
    assert_permission(user, source_database, DatabasePermission.COPY_FILES)
    assert_permission(user, target_database, DatabasePermission.ADD_FILES)

    # Make sure all those IDs belong to the source database
    source_audio_files = AudioFile.objects.filter(id__in=ids,
                                                  database=source_database)
    if len(source_audio_files) != len(ids):
        raise CustomAssertionError(
            'There\'s a mismatch between the song IDs you provided and the actual songs in the database'
        )

    song_values = source_audio_files \
        .values_list('id', 'fs', 'length', 'name', 'track', 'individual', 'quality', 'original')
    old_song_id_to_name = {x[0]: x[3] for x in song_values}
    old_song_names = old_song_id_to_name.values()
    old_song_ids = old_song_id_to_name.keys()

    # Make sure there is no duplication:
    duplicate_audio_files = AudioFile.objects.filter(database=target_database,
                                                     name__in=old_song_names)
    if duplicate_audio_files:
        raise CustomAssertionError(
            'Some file(s) you\'re trying to copy already exist in {}'.format(
                target_database_name))

    # We need to map old and new IDs of AudioFiles so that we can copy their ExtraAttrValue later
    songs_old_id_to_new_id = {}

    # Create Song objects one by one because they can't be bulk created
    for old_id, fs, length, name, track, individual, quality, original in song_values:
        # Make sure that we always point to the true original. E.g if AudioFile #2 is a copy of #1 and someone makes
        # a copy of AudioFile #2, the new AudioFile must still reference #1 as its original

        original_id = old_id if original is None else original

        audio_file = AudioFile.objects.create(fs=fs,
                                              length=length,
                                              name=name,
                                              track_id=track,
                                              individual_id=individual,
                                              quality=quality,
                                              original_id=original_id,
                                              database=target_database)

        songs_old_id_to_new_id[old_id] = audio_file.id

    segments = Segment.objects.filter(
        audio_file__in=songs_old_id_to_new_id.keys())
    segments_values = segments.values_list('id', 'start_time_ms',
                                           'end_time_ms', 'mean_ff', 'min_ff',
                                           'max_ff', 'audio_file__name',
                                           'audio_file__id', 'tid')

    # We need this to map old and new IDs of Segments so that we can copy their ExtraAttrValue later
    # The only reliable way to map new to old Segments is through the pair (start_time_ms, end_time_ms, song_name)
    # since they are guaranteed to be unique
    segments_old_id_to_start_end = {
        x[0]: (x[1], x[2], x[6])
        for x in segments_values
    }

    new_segments_info = {}
    for seg_id, start, end, mean_ff, min_ff, max_ff, song_name, song_old_id, tid in segments_values:
        segment_info = (seg_id, start, end, mean_ff, min_ff, max_ff, tid)
        if song_old_id not in new_segments_info:
            new_segments_info[song_old_id] = [segment_info]
        else:
            new_segments_info[song_old_id].append(segment_info)

    segments_to_copy = []
    for song_old_id, segment_info in new_segments_info.items():
        for seg_id, start, end, mean_ff, min_ff, max_ff, tid in segment_info:
            song_new_id = songs_old_id_to_new_id[song_old_id]
            segment = Segment(start_time_ms=start,
                              end_time_ms=end,
                              mean_ff=mean_ff,
                              min_ff=min_ff,
                              max_ff=max_ff,
                              audio_file_id=song_new_id,
                              tid=tid)
            segments_to_copy.append(segment)

    Segment.objects.bulk_create(segments_to_copy)

    copied_segments = Segment.objects.filter(
        audio_file__in=songs_old_id_to_new_id.values())
    copied_segments_values = copied_segments.values_list(
        'id', 'start_time_ms', 'end_time_ms', 'audio_file__name')
    segments_new_start_end_to_new_id = {(x[1], x[2], x[3]): x[0]
                                        for x in copied_segments_values}

    # Based on two maps: from new segment (start,end) key to their ID and old segment's ID to (start,end) key
    # we can now map Segment new IDs -> old IDs
    segments_old_id_to_new_id = {}
    for old_segment_id, segment_start_end in segments_old_id_to_start_end.items(
    ):
        new_segment_id = segments_new_start_end_to_new_id[segment_start_end]
        old_segment_id = int(old_segment_id)
        new_segment_id = int(new_segment_id)
        segments_old_id_to_new_id[old_segment_id] = new_segment_id

    # Query all ExtraAttrValue of Songs, and make duplicate by replacing old song IDs by new song IDs
    song_attrs = ExtraAttr.objects.filter(klass=AudioFile.__name__)
    old_song_extra_attrs = ExtraAttrValue.objects \
        .filter(owner_id__in=old_song_ids, user=user, attr__in=song_attrs).values_list('owner_id', 'attr', 'value')
    new_song_extra_attrs = []
    for old_song_id, attr_id, value in old_song_extra_attrs:
        new_song_id = songs_old_id_to_new_id[old_song_id]
        new_song_extra_attrs.append(
            ExtraAttrValue(user=user,
                           attr_id=attr_id,
                           value=value,
                           owner_id=new_song_id))

    old_segment_ids = segments_old_id_to_start_end.keys()

    # Query all ExtraAttrValue of Segments, and make duplicate by replacing old IDs by new IDs
    segment_attrs = ExtraAttr.objects.filter(klass=Segment.__name__)
    old_segment_extra_attrs = ExtraAttrValue.objects \
        .filter(owner_id__in=old_segment_ids, user=user, attr__in=segment_attrs)\
        .values_list('owner_id', 'attr', 'value')
    new_segment_extra_attrs = []
    for old_segment_id, attr_id, value in old_segment_extra_attrs:
        new_segment_id = segments_old_id_to_new_id[int(old_segment_id)]
        new_segment_extra_attrs.append(
            ExtraAttrValue(user=user,
                           attr_id=attr_id,
                           value=value,
                           owner_id=new_segment_id))

    # Now bulk create
    ExtraAttrValue.objects.filter(owner_id__in=songs_old_id_to_new_id.values(),
                                  attr__in=song_attrs).delete()

    try:
        ExtraAttrValue.objects.bulk_create(new_song_extra_attrs)
    except IntegrityError as e:
        raise CustomAssertionError(e)

    ExtraAttrValue.objects.filter(
        owner_id__in=segments_old_id_to_new_id.values(),
        attr__in=segment_attrs).delete()

    try:
        ExtraAttrValue.objects.bulk_create(new_segment_extra_attrs)
    except IntegrityError as e:
        raise CustomAssertionError(e)

    return True
Пример #2
0
def import_history_with_segmentation(database, user, filelist):
    """
    For version 4 - the segment endpoints are also stored, so object IDs don't matter.
    - Recreate segmentation for files that haven't got segmentation, or theirs are different
    - Populate labels using file names and segment endpoints
    :param database:
    :param user:
    :param filelist:
    :return:
    """
    with transaction.atomic():
        try:
            _extra_attrs = json.loads(filelist['extraattr.json'])
            segment_attr_values = json.loads(
                filelist['segment.extraattrvalue.json'])
            song_attr_values = json.loads(
                filelist['audiofile.extraattrvalue.json'])
            _song_info = json.loads(filelist['songinfo.json'])
        except Exception:
            raise CustomAssertionError(
                'The history content is malformed and cannot be parsed.')

        # Match saved song IDs to their actual IDs on the datbase (if exists)
        # Songs that don't exist in the database are ignore
        song_names = list(_song_info.keys())

        existing_segments = Segment.objects \
            .filter(audio_file__name__in=song_names, audio_file__database=database) \
            .values_list('id', 'audio_file__name', 'audio_file', 'start_time_ms', 'end_time_ms')

        song_name_to_new_id = {
            x[0]: x[1]
            for x in AudioFile.objects.filter(name__in=song_names,
                                              database=database).values_list(
                                                  'name', 'id')
        }

        seg_old_to_new_id = {}
        song_info = {}
        new_segments = []
        song_old_to_new_id = {}

        for seg_id, song_name, song_id, start, end in existing_segments:
            if song_name not in song_info:
                song_info[song_name] = (song_id, [])
            song_info[song_name][1].append((seg_id, start, end))

        seg_key_to_new_id = {}
        seg_key_to_old_id = {}
        seg_key_to_extras = {}
        for song_name, (_song_id, _syls_info) in _song_info.items():

            # Ignore songs that exist in the saved but not in this database
            if song_name not in song_name_to_new_id:
                continue

            for syl_info in _syls_info:
                _seg_id, start, end, mean_ff, min_ff, max_ff = syl_info[:6]

                # New version also save Segment's TID
                if len(syl_info) == 7:
                    tid = syl_info[6]
                else:
                    tid = None
                seg_key = (start, end, song_name)
                seg_key_to_old_id[seg_key] = _seg_id
                seg_key_to_extras[seg_key] = (mean_ff, min_ff, max_ff, tid)

            if song_name in song_info:
                song_id, info = song_info[song_name]
                song_old_to_new_id[_song_id] = song_id

                for seg_id, start, end in info:
                    seg_key_to_new_id[(start, end, song_name)] = seg_id
            else:
                song_old_to_new_id[_song_id] = song_name_to_new_id[song_name]

        for seg_key, _seg_id in seg_key_to_old_id.items():
            if seg_key in seg_key_to_new_id:
                seg_id = seg_key_to_new_id[seg_key]
                seg_old_to_new_id[_seg_id] = seg_id
            else:
                start, end, song_name = seg_key
                mean_ff, min_ff, max_ff, tid = seg_key_to_extras[seg_key]
                song_id = song_name_to_new_id[song_name]
                new_segments.append(
                    Segment(start_time_ms=start,
                            end_time_ms=end,
                            audio_file_id=song_id,
                            mean_ff=mean_ff,
                            min_ff=min_ff,
                            max_ff=max_ff,
                            tid=tid))

        seg_key_to_new_id = {}
        with transaction.atomic():
            for segment in new_segments:
                segment.save()
                if segment.tid is None:
                    segment.tid = segment.id
                    segment.save()

                seg_key = (segment.start_time_ms, segment.end_time_ms,
                           segment.audio_file.name)
                seg_key_to_new_id[seg_key] = segment.id

        for seg_key, _seg_id in seg_key_to_old_id.items():
            if seg_key in seg_key_to_new_id:
                seg_old_to_new_id[_seg_id] = seg_key_to_new_id[seg_key]

        segment_attr_values = change_owner_and_attr_ids(
            segment_attr_values, _extra_attrs, seg_old_to_new_id, True)
        song_attr_values = change_owner_and_attr_ids(song_attr_values,
                                                     _extra_attrs,
                                                     song_old_to_new_id)

        update_extra_attr_values(user, segment_attr_values)
        update_extra_attr_values(user, song_attr_values)

    return True
Пример #3
0
def save_segmentation(request):
    """
    Save the segmentation scheme sent from the client. Compare with the existing segmentation, there are three cases:
    1. segments that currently exist but not found in the client's scheme - they need to be deleted
    2. segments that currently exist and found in the client's scheme - they need to be updated
    3. segments that doesn't currently exist but found in the client's scheme - they need to be created

    Finally, create or update the spectrogram image (not the mask - can't do anything about the mask)
    :param request:
    :return:
    """
    user = request.user
    items = json.loads(get_or_error(request.POST, 'items'))
    file_id = int(get_or_error(request.POST, 'file-id'))
    audio_file = get_or_error(AudioFile, dict(id=file_id))
    assert_permission(user, audio_file.database,
                      DatabasePermission.MODIFY_SEGMENTS)
    segments = Segment.objects.filter(audio_file=audio_file)

    new_segments = []
    old_segments = []
    for item in items:
        id = item['id']
        if isinstance(id, str) and id.startswith('new:'):
            segment = Segment(audio_file=audio_file,
                              start_time_ms=item['start'],
                              end_time_ms=item['end'])
            label = item.get('label', None)
            family = item.get('label_family', None)
            subfamily = item.get('label_subfamily', None)
            note = item.get('note', None)

            new_segments.append((segment, label, family, subfamily, note))
        else:
            old_segments.append(item)

    id_to_exiting_item = {x['id']: x for x in old_segments}

    to_update = []
    to_delete_id = []

    for segment in segments:
        id = segment.id
        if id in id_to_exiting_item:
            item = id_to_exiting_item[id]
            segment.start_time_ms = item['start']
            segment.end_time_ms = item['end']

            to_update.append(segment)
        else:
            to_delete_id.append(segment.id)

    label_attr = settings.ATTRS.segment.label
    family_attr = settings.ATTRS.segment.family
    subfamily_attr = settings.ATTRS.segment.subfamily
    note_attr = settings.ATTRS.segment.note

    with transaction.atomic():
        for segment in to_update:
            segment.save()

        Segment.objects.filter(id__in=to_delete_id).update(active=False)

        for segment, label, family, subfamily, note in new_segments:
            segment.save()
            segment.tid = segment.id
            segment.save()
            if label:
                ExtraAttrValue.objects.create(user=user,
                                              attr=label_attr,
                                              owner_id=segment.id,
                                              value=label)
            if family:
                ExtraAttrValue.objects.create(user=user,
                                              attr=family_attr,
                                              owner_id=segment.id,
                                              value=family)
            if subfamily:
                ExtraAttrValue.objects.create(user=user,
                                              attr=subfamily_attr,
                                              owner_id=segment.id,
                                              value=subfamily)
            if note:
                ExtraAttrValue.objects.create(user=user,
                                              attr=note_attr,
                                              owner_id=segment.id,
                                              value=note)

    segments = Segment.objects.filter(audio_file=audio_file)
    _, rows = bulk_get_segments_for_audio(segments,
                                          DotMap(file_id=file_id, user=user))

    delay_in_production(extract_spectrogram, audio_file.id)
    delay_in_production(delete_segments_async)

    return rows
Пример #4
0
def import_syllables(conn):
    """
    :param conn: the database connection
    :return:
    """
    cur = conn.cursor()
    el_cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)

    # Import syllables for all songs
    cur.execute(
        'SELECT sg.name, s.starttime, s.endtime, w.songid FROM syllable s '
        'JOIN wavs w ON s.songid=w.songid '
        'JOIN songdata sg ON w.songid=sg.id ORDER BY w.filename, s.starttime')
    song_syllable_rows = cur.fetchall()
    songs_2_syllables = {}

    # Song #79 PKI_2017_02_25_WHW028_01_M.EX..PipeClicksGrowlcough.wav has more than one syllable at position 1124:1136.
    # Db Syllable #2924

    for row in song_syllable_rows:
        song_name = row[0]
        syl_starttime = row[1]
        syl_endtime = row[2]
        song_id = row[3]

        el_cur.execute(
            'select starttime, timelength from element where songid={} and starttime >= {} '
            'and (starttime + timelength) <= {} order by starttime'.format(
                song_id, syl_starttime, syl_endtime))
        el_rows = el_cur.fetchall()
        if len(el_rows) == 0:
            warning(
                'Syllable with starttime={} endtime={} of song: "{}" doesn\'t enclose any syllable.'
                .format(syl_starttime, syl_endtime, song_name))
            continue

        real_syl_starttime = el_rows[0]['starttime']
        real_syl_endtime = utils.get_syllable_end_time(el_rows)

        syllable = (real_syl_starttime, real_syl_endtime)

        if song_name not in songs_2_syllables:
            syllables = []
            songs_2_syllables[song_name] = syllables
        syllables.append(syllable)

    # delete all existing manual segmentation:
    Segment.objects.filter(
        audio_file__name__in=songs_2_syllables.keys()).delete()

    bar = Bar('Importing syllables ...', max=len(songs_2_syllables))
    for song in songs_2_syllables:
        syllables = songs_2_syllables[song]
        audio_file = AudioFile.objects.filter(name=song).first()
        if audio_file is None:
            warning(
                'File {} has not been imported. Please run import_luscinia_songs again.'
                ' Ignore for now'.format(song))
            continue

        for syllable in syllables:
            segment = Segment()
            segment.start_time_ms = syllable[0]
            segment.end_time_ms = syllable[1]
            segment.audio_file = audio_file
            segment.save()
            segment.tid = segment.id
            segment.save()

        # print('Processed song {}'.format(song))
        bar.next()
    bar.finish()