def copy_audio_files(request): """ Copy files from the source database to the target database, not copying the actual files, but everything database- wise is copied, so the copies don't affect the original. :param request: :return: """ user = request.user ids = json.loads(get_or_error(request.POST, 'ids')) target_database_name = get_or_error(request.POST, 'target-database-name') source_database_id = get_or_error(request.POST, 'source-database-id') target_database = get_or_error(Database, dict(name=target_database_name)) source_database = get_or_error(Database, dict(id=source_database_id)) assert_permission(user, source_database, DatabasePermission.COPY_FILES) assert_permission(user, target_database, DatabasePermission.ADD_FILES) # Make sure all those IDs belong to the source database source_audio_files = AudioFile.objects.filter(id__in=ids, database=source_database) if len(source_audio_files) != len(ids): raise CustomAssertionError( 'There\'s a mismatch between the song IDs you provided and the actual songs in the database' ) song_values = source_audio_files \ .values_list('id', 'fs', 'length', 'name', 'track', 'individual', 'quality', 'original') old_song_id_to_name = {x[0]: x[3] for x in song_values} old_song_names = old_song_id_to_name.values() old_song_ids = old_song_id_to_name.keys() # Make sure there is no duplication: duplicate_audio_files = AudioFile.objects.filter(database=target_database, name__in=old_song_names) if duplicate_audio_files: raise CustomAssertionError( 'Some file(s) you\'re trying to copy already exist in {}'.format( target_database_name)) # We need to map old and new IDs of AudioFiles so that we can copy their ExtraAttrValue later songs_old_id_to_new_id = {} # Create Song objects one by one because they can't be bulk created for old_id, fs, length, name, track, individual, quality, original in song_values: # Make sure that we always point to the true original. E.g if AudioFile #2 is a copy of #1 and someone makes # a copy of AudioFile #2, the new AudioFile must still reference #1 as its original original_id = old_id if original is None else original audio_file = AudioFile.objects.create(fs=fs, length=length, name=name, track_id=track, individual_id=individual, quality=quality, original_id=original_id, database=target_database) songs_old_id_to_new_id[old_id] = audio_file.id segments = Segment.objects.filter( audio_file__in=songs_old_id_to_new_id.keys()) segments_values = segments.values_list('id', 'start_time_ms', 'end_time_ms', 'mean_ff', 'min_ff', 'max_ff', 'audio_file__name', 'audio_file__id', 'tid') # We need this to map old and new IDs of Segments so that we can copy their ExtraAttrValue later # The only reliable way to map new to old Segments is through the pair (start_time_ms, end_time_ms, song_name) # since they are guaranteed to be unique segments_old_id_to_start_end = { x[0]: (x[1], x[2], x[6]) for x in segments_values } new_segments_info = {} for seg_id, start, end, mean_ff, min_ff, max_ff, song_name, song_old_id, tid in segments_values: segment_info = (seg_id, start, end, mean_ff, min_ff, max_ff, tid) if song_old_id not in new_segments_info: new_segments_info[song_old_id] = [segment_info] else: new_segments_info[song_old_id].append(segment_info) segments_to_copy = [] for song_old_id, segment_info in new_segments_info.items(): for seg_id, start, end, mean_ff, min_ff, max_ff, tid in segment_info: song_new_id = songs_old_id_to_new_id[song_old_id] segment = Segment(start_time_ms=start, end_time_ms=end, mean_ff=mean_ff, min_ff=min_ff, max_ff=max_ff, audio_file_id=song_new_id, tid=tid) segments_to_copy.append(segment) Segment.objects.bulk_create(segments_to_copy) copied_segments = Segment.objects.filter( audio_file__in=songs_old_id_to_new_id.values()) copied_segments_values = copied_segments.values_list( 'id', 'start_time_ms', 'end_time_ms', 'audio_file__name') segments_new_start_end_to_new_id = {(x[1], x[2], x[3]): x[0] for x in copied_segments_values} # Based on two maps: from new segment (start,end) key to their ID and old segment's ID to (start,end) key # we can now map Segment new IDs -> old IDs segments_old_id_to_new_id = {} for old_segment_id, segment_start_end in segments_old_id_to_start_end.items( ): new_segment_id = segments_new_start_end_to_new_id[segment_start_end] old_segment_id = int(old_segment_id) new_segment_id = int(new_segment_id) segments_old_id_to_new_id[old_segment_id] = new_segment_id # Query all ExtraAttrValue of Songs, and make duplicate by replacing old song IDs by new song IDs song_attrs = ExtraAttr.objects.filter(klass=AudioFile.__name__) old_song_extra_attrs = ExtraAttrValue.objects \ .filter(owner_id__in=old_song_ids, user=user, attr__in=song_attrs).values_list('owner_id', 'attr', 'value') new_song_extra_attrs = [] for old_song_id, attr_id, value in old_song_extra_attrs: new_song_id = songs_old_id_to_new_id[old_song_id] new_song_extra_attrs.append( ExtraAttrValue(user=user, attr_id=attr_id, value=value, owner_id=new_song_id)) old_segment_ids = segments_old_id_to_start_end.keys() # Query all ExtraAttrValue of Segments, and make duplicate by replacing old IDs by new IDs segment_attrs = ExtraAttr.objects.filter(klass=Segment.__name__) old_segment_extra_attrs = ExtraAttrValue.objects \ .filter(owner_id__in=old_segment_ids, user=user, attr__in=segment_attrs)\ .values_list('owner_id', 'attr', 'value') new_segment_extra_attrs = [] for old_segment_id, attr_id, value in old_segment_extra_attrs: new_segment_id = segments_old_id_to_new_id[int(old_segment_id)] new_segment_extra_attrs.append( ExtraAttrValue(user=user, attr_id=attr_id, value=value, owner_id=new_segment_id)) # Now bulk create ExtraAttrValue.objects.filter(owner_id__in=songs_old_id_to_new_id.values(), attr__in=song_attrs).delete() try: ExtraAttrValue.objects.bulk_create(new_song_extra_attrs) except IntegrityError as e: raise CustomAssertionError(e) ExtraAttrValue.objects.filter( owner_id__in=segments_old_id_to_new_id.values(), attr__in=segment_attrs).delete() try: ExtraAttrValue.objects.bulk_create(new_segment_extra_attrs) except IntegrityError as e: raise CustomAssertionError(e) return True
def import_history_with_segmentation(database, user, filelist): """ For version 4 - the segment endpoints are also stored, so object IDs don't matter. - Recreate segmentation for files that haven't got segmentation, or theirs are different - Populate labels using file names and segment endpoints :param database: :param user: :param filelist: :return: """ with transaction.atomic(): try: _extra_attrs = json.loads(filelist['extraattr.json']) segment_attr_values = json.loads( filelist['segment.extraattrvalue.json']) song_attr_values = json.loads( filelist['audiofile.extraattrvalue.json']) _song_info = json.loads(filelist['songinfo.json']) except Exception: raise CustomAssertionError( 'The history content is malformed and cannot be parsed.') # Match saved song IDs to their actual IDs on the datbase (if exists) # Songs that don't exist in the database are ignore song_names = list(_song_info.keys()) existing_segments = Segment.objects \ .filter(audio_file__name__in=song_names, audio_file__database=database) \ .values_list('id', 'audio_file__name', 'audio_file', 'start_time_ms', 'end_time_ms') song_name_to_new_id = { x[0]: x[1] for x in AudioFile.objects.filter(name__in=song_names, database=database).values_list( 'name', 'id') } seg_old_to_new_id = {} song_info = {} new_segments = [] song_old_to_new_id = {} for seg_id, song_name, song_id, start, end in existing_segments: if song_name not in song_info: song_info[song_name] = (song_id, []) song_info[song_name][1].append((seg_id, start, end)) seg_key_to_new_id = {} seg_key_to_old_id = {} seg_key_to_extras = {} for song_name, (_song_id, _syls_info) in _song_info.items(): # Ignore songs that exist in the saved but not in this database if song_name not in song_name_to_new_id: continue for syl_info in _syls_info: _seg_id, start, end, mean_ff, min_ff, max_ff = syl_info[:6] # New version also save Segment's TID if len(syl_info) == 7: tid = syl_info[6] else: tid = None seg_key = (start, end, song_name) seg_key_to_old_id[seg_key] = _seg_id seg_key_to_extras[seg_key] = (mean_ff, min_ff, max_ff, tid) if song_name in song_info: song_id, info = song_info[song_name] song_old_to_new_id[_song_id] = song_id for seg_id, start, end in info: seg_key_to_new_id[(start, end, song_name)] = seg_id else: song_old_to_new_id[_song_id] = song_name_to_new_id[song_name] for seg_key, _seg_id in seg_key_to_old_id.items(): if seg_key in seg_key_to_new_id: seg_id = seg_key_to_new_id[seg_key] seg_old_to_new_id[_seg_id] = seg_id else: start, end, song_name = seg_key mean_ff, min_ff, max_ff, tid = seg_key_to_extras[seg_key] song_id = song_name_to_new_id[song_name] new_segments.append( Segment(start_time_ms=start, end_time_ms=end, audio_file_id=song_id, mean_ff=mean_ff, min_ff=min_ff, max_ff=max_ff, tid=tid)) seg_key_to_new_id = {} with transaction.atomic(): for segment in new_segments: segment.save() if segment.tid is None: segment.tid = segment.id segment.save() seg_key = (segment.start_time_ms, segment.end_time_ms, segment.audio_file.name) seg_key_to_new_id[seg_key] = segment.id for seg_key, _seg_id in seg_key_to_old_id.items(): if seg_key in seg_key_to_new_id: seg_old_to_new_id[_seg_id] = seg_key_to_new_id[seg_key] segment_attr_values = change_owner_and_attr_ids( segment_attr_values, _extra_attrs, seg_old_to_new_id, True) song_attr_values = change_owner_and_attr_ids(song_attr_values, _extra_attrs, song_old_to_new_id) update_extra_attr_values(user, segment_attr_values) update_extra_attr_values(user, song_attr_values) return True
def save_segmentation(request): """ Save the segmentation scheme sent from the client. Compare with the existing segmentation, there are three cases: 1. segments that currently exist but not found in the client's scheme - they need to be deleted 2. segments that currently exist and found in the client's scheme - they need to be updated 3. segments that doesn't currently exist but found in the client's scheme - they need to be created Finally, create or update the spectrogram image (not the mask - can't do anything about the mask) :param request: :return: """ user = request.user items = json.loads(get_or_error(request.POST, 'items')) file_id = int(get_or_error(request.POST, 'file-id')) audio_file = get_or_error(AudioFile, dict(id=file_id)) assert_permission(user, audio_file.database, DatabasePermission.MODIFY_SEGMENTS) segments = Segment.objects.filter(audio_file=audio_file) new_segments = [] old_segments = [] for item in items: id = item['id'] if isinstance(id, str) and id.startswith('new:'): segment = Segment(audio_file=audio_file, start_time_ms=item['start'], end_time_ms=item['end']) label = item.get('label', None) family = item.get('label_family', None) subfamily = item.get('label_subfamily', None) note = item.get('note', None) new_segments.append((segment, label, family, subfamily, note)) else: old_segments.append(item) id_to_exiting_item = {x['id']: x for x in old_segments} to_update = [] to_delete_id = [] for segment in segments: id = segment.id if id in id_to_exiting_item: item = id_to_exiting_item[id] segment.start_time_ms = item['start'] segment.end_time_ms = item['end'] to_update.append(segment) else: to_delete_id.append(segment.id) label_attr = settings.ATTRS.segment.label family_attr = settings.ATTRS.segment.family subfamily_attr = settings.ATTRS.segment.subfamily note_attr = settings.ATTRS.segment.note with transaction.atomic(): for segment in to_update: segment.save() Segment.objects.filter(id__in=to_delete_id).update(active=False) for segment, label, family, subfamily, note in new_segments: segment.save() segment.tid = segment.id segment.save() if label: ExtraAttrValue.objects.create(user=user, attr=label_attr, owner_id=segment.id, value=label) if family: ExtraAttrValue.objects.create(user=user, attr=family_attr, owner_id=segment.id, value=family) if subfamily: ExtraAttrValue.objects.create(user=user, attr=subfamily_attr, owner_id=segment.id, value=subfamily) if note: ExtraAttrValue.objects.create(user=user, attr=note_attr, owner_id=segment.id, value=note) segments = Segment.objects.filter(audio_file=audio_file) _, rows = bulk_get_segments_for_audio(segments, DotMap(file_id=file_id, user=user)) delay_in_production(extract_spectrogram, audio_file.id) delay_in_production(delete_segments_async) return rows
def import_syllables(conn): """ :param conn: the database connection :return: """ cur = conn.cursor() el_cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) # Import syllables for all songs cur.execute( 'SELECT sg.name, s.starttime, s.endtime, w.songid FROM syllable s ' 'JOIN wavs w ON s.songid=w.songid ' 'JOIN songdata sg ON w.songid=sg.id ORDER BY w.filename, s.starttime') song_syllable_rows = cur.fetchall() songs_2_syllables = {} # Song #79 PKI_2017_02_25_WHW028_01_M.EX..PipeClicksGrowlcough.wav has more than one syllable at position 1124:1136. # Db Syllable #2924 for row in song_syllable_rows: song_name = row[0] syl_starttime = row[1] syl_endtime = row[2] song_id = row[3] el_cur.execute( 'select starttime, timelength from element where songid={} and starttime >= {} ' 'and (starttime + timelength) <= {} order by starttime'.format( song_id, syl_starttime, syl_endtime)) el_rows = el_cur.fetchall() if len(el_rows) == 0: warning( 'Syllable with starttime={} endtime={} of song: "{}" doesn\'t enclose any syllable.' .format(syl_starttime, syl_endtime, song_name)) continue real_syl_starttime = el_rows[0]['starttime'] real_syl_endtime = utils.get_syllable_end_time(el_rows) syllable = (real_syl_starttime, real_syl_endtime) if song_name not in songs_2_syllables: syllables = [] songs_2_syllables[song_name] = syllables syllables.append(syllable) # delete all existing manual segmentation: Segment.objects.filter( audio_file__name__in=songs_2_syllables.keys()).delete() bar = Bar('Importing syllables ...', max=len(songs_2_syllables)) for song in songs_2_syllables: syllables = songs_2_syllables[song] audio_file = AudioFile.objects.filter(name=song).first() if audio_file is None: warning( 'File {} has not been imported. Please run import_luscinia_songs again.' ' Ignore for now'.format(song)) continue for syllable in syllables: segment = Segment() segment.start_time_ms = syllable[0] segment.end_time_ms = syllable[1] segment.audio_file = audio_file segment.save() segment.tid = segment.id segment.save() # print('Processed song {}'.format(song)) bar.next() bar.finish()