Пример #1
0
def get_audio_file_url(request):
    user = request.user

    file_id = get_or_error(request.POST, 'file-id')
    audio_file = get_or_error(AudioFile, dict(id=file_id))
    assert_permission(user, audio_file.database, DatabasePermission.VIEW)

    # The audio file might have sample rate being faked - this is the only sample rate value the browser can see.
    # It has no idea what the real fs is unless we tell it.
    # However, when converted to MP3, the real fs can be changed anyways. For example, 44100Hz (wav) -> 48000 (mp3)
    # in which case there is a difference in real_fs and what the browser can see.
    # In this case we must tell the browser to use 48000 as the real_fs of the mp3 file.
    # We do that by omitting real_fs (returning NULL to the browser)
    real_fs = None
    if audio_file.fake_fs is not None:
        real_fs = audio_file.fs

    if audio_file.fs > 48000:
        real_fs = audio_file.fs

    return {
        'url':
        audio_path(audio_file, settings.AUDIO_COMPRESSED_FORMAT, for_url=True),
        'real-fs':
        real_fs
    }
Пример #2
0
def save_history(request):
    """
    Save a copy of all ExtraAttrValue (labels, notes, ...) in a HistoryEntry
    :param request: must specify a comment to store with this copy
    :return: name of the zip file created
    :version: 2.0.0
    """
    version = 4
    user = request.user

    comment = get_or_error(request.POST, 'comment')
    database_id = get_or_error(request.POST, 'database')
    backup_type = get_or_error(request.POST, 'type')

    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.VIEW)
    assert_values(backup_type, ['labels', 'segmentation'])

    meta = dict(database=database_id,
                user=user.id,
                time=timezone.now(),
                version=version,
                note=comment,
                type=backup_type)

    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_BZIP2,
                         False) as zip_file:
        zip_file.writestr('meta.json', json.dumps(meta))
        zip_file.writestr('root.extraattrvalue.json',
                          'here for checking purpose')

        if backup_type == 'labels':
            save_label_history(database, user, zip_file)
        else:
            save_segmentation_history(database, user, zip_file)

    binary_content = zip_buffer.getvalue()

    he = HistoryEntry.objects.create(user=user,
                                     time=timezone.now(),
                                     database=database,
                                     version=version,
                                     note=comment,
                                     type=backup_type)
    filename = he.filename
    filepath = history_path(filename)
    ensure_parent_folder_exists(filepath)

    with open(filepath, 'wb') as f:
        f.write(binary_content)

    tz_offset = request.session['detected_tz']
    tz = offset_to_timezone(tz_offset)

    _, rows = bulk_get_history_entries([he],
                                       DotMap(user=user,
                                              database=database_id,
                                              tz=tz))
    return rows[0]
Пример #3
0
def delete_audio_files(request):
    """
    Delete audio files given ids. Also remove all existing audio files.
    :param request: must contain a list of ids and the id of the database where these files come from
    :return:
    """
    user = request.user
    ids = json.loads(get_or_error(request.POST, 'ids'))
    database_id = get_or_error(request.POST, 'database')
    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.DELETE_FILES)

    # Check that the ids to delete actually come from this database
    audio_files = AudioFile.objects.filter(id__in=ids)
    audio_files_ids = audio_files.values_list('id', flat=True)

    non_existent_ids = [x for x in ids if x not in audio_files_ids]

    if non_existent_ids:
        raise CustomAssertionError(
            'You\'re trying to delete files that don\'t belong to database {}. '
            'Are you messing with Javascript?'.format(database.name))

    segments = Segment.objects.filter(audio_file__in=audio_files)

    segments.update(active=False)
    audio_files.update(active=False)

    delay_in_production(delete_segments_async)
    delay_in_production(delete_audio_files_async)

    return True
Пример #4
0
def add_collaborator(request):
    you = request.user
    user_name_or_email = get_or_error(request.POST, 'user')
    database_id = get_or_error(request.POST, 'database')
    database = get_or_error(Database, dict(id=database_id))

    assert_permission(you, database, DatabasePermission.ASSIGN_USER)

    user = User.objects.filter(
        Q(username__iexact=user_name_or_email)
        | Q(email__iexact=user_name_or_email)).first()
    if user is None:
        raise CustomAssertionError('This user doesn\'t exist.')

    already_granted = DatabaseAssignment.objects.filter(
        user=user, database=database).exists()

    if already_granted:
        raise CustomAssertionError(
            'User\'s already been granted access. You can change their permission in the table.'
        )

    database_assignment = DatabaseAssignment(
        user=user, database=database, permission=DatabasePermission.VIEW)
    database_assignment.save()

    _, rows = bulk_get_database_assignment([database_assignment],
                                           DotMap(database=database.id))
    return rows[0]
Пример #5
0
def remove_collaborators(request):
    you = request.user
    dbassignments_ids = json.loads(get_or_error(request.POST, 'ids'))
    database_id = get_or_error(request.POST, 'database')
    database = get_or_error(Database, dict(id=database_id))
    dbassignments = DatabaseAssignment.objects.filter(id__in=dbassignments_ids,
                                                      database=database)

    assert_permission(you, database, DatabasePermission.ASSIGN_USER)

    if len(dbassignments) != len(dbassignments_ids):
        raise CustomAssertionError(
            'ERROR: one or more collaborators are not assigned to this database.'
        )

    if dbassignments.filter(user=you).exists():
        raise CustomAssertionError('ERROR: you can\'t remove yourself.')

    if dbassignments.filter(
            permission=DatabasePermission.ASSIGN_USER).exists():
        raise CustomAssertionError(
            'ERROR: you can\'t remove other admins of this database.')

    dbassignments.delete()
    return True
Пример #6
0
def get_segment_audio_data(request):
    """
    Return a playable audio segment given the segment id
    :param request: must specify segment-id, this is the ID of a Segment object to be played
    :return: a binary blob specified as audio/ogg (or whatever the format is), playable and volume set to -10dB
    """
    user = request.user

    segment_id = get_or_error(request.POST, 'segment-id')
    segment = get_or_error(Segment, dict(id=segment_id))
    audio_file = segment.audio_file
    assert_permission(user, audio_file.database, DatabasePermission.VIEW)

    start = segment.start_time_ms
    end = segment.end_time_ms

    if audio_file.is_original():
        database_id = audio_file.database.id
        audio_file_name = audio_file.name
    else:
        database_id = audio_file.original.database.id
        audio_file_name = audio_file.original.name

    return _cached_get_segment_audio_data(audio_file_name, database_id,
                                          audio_file.fs, start, end)
def get_audio_file_url(request):
    user = request.user

    file_id = get_or_error(request.POST, 'file-id')
    audio_file = get_or_error(AudioFile, dict(id=file_id))
    assert_permission(user, audio_file.database, DatabasePermission.VIEW)

    return audio_path(audio_file,
                      settings.AUDIO_COMPRESSED_FORMAT,
                      for_url=True)
Пример #8
0
def get_label_options(request):
    file_id = request.POST.get('file-id', None)
    database_id = request.POST.get('database-id', None)
    tmpdb_id = request.POST.get('tmpdb-id', None)

    if file_id is None and database_id is None and tmpdb_id is None:
        raise CustomAssertionError('Need file-id or database-id or tmpdb-id')

    if file_id:
        audio_file = get_or_error(AudioFile, dict(id=file_id))
        database = audio_file.database
    elif database_id:
        database = get_or_error(Database, dict(id=database_id))
    else:
        database = get_or_error(TemporaryDatabase, dict(id=tmpdb_id))

    user = request.user

    if isinstance(database, Database):
        assert_permission(user, database, DatabasePermission.VIEW)
        sids = list(
            Segment.objects.filter(audio_file__database=database).values_list(
                'id', flat=True))
    else:
        sids = database.ids

    label_attr = ExtraAttr.objects.get(klass=Segment.__name__, name='label')
    family_attr = ExtraAttr.objects.get(klass=Segment.__name__,
                                        name='label_family')
    subfamily_attr = ExtraAttr.objects.get(klass=Segment.__name__,
                                           name='label_subfamily')

    extra_attr_values = ExtraAttrValue.objects.filter(user=user,
                                                      owner_id__in=sids)
    labels_and_counts = extra_attr_values.filter(
        attr=label_attr).values_list('value').annotate(c=Count('value'))
    families_and_counts = extra_attr_values.filter(
        attr=family_attr).values_list('value').annotate(c=Count('value'))
    subfams_and_counts = extra_attr_values.filter(
        attr=subfamily_attr).values_list('value').annotate(c=Count('value'))

    labels_to_counts = {l: c for l, c in labels_and_counts}
    fams_to_counts = {l: c for l, c in families_and_counts}
    subfams_to_counts = {l: c for l, c in subfams_and_counts}

    retval = {
        'label': labels_to_counts,
        'label_family': fams_to_counts,
        'label_subfamily': subfams_to_counts
    }

    return dict(origin='request_database_access',
                success=True,
                warning=None,
                payload=retval)
Пример #9
0
def delete_database(request):
    user = request.user
    database_id = get_or_error(request.POST, 'database-id')
    database = get_or_error(Database, dict(id=database_id))

    assert_permission(user, database, DatabasePermission.ASSIGN_USER)

    database.active = False
    database.save()

    delay_in_production(delete_database_async)
Пример #10
0
def merge_audio_chunks(request):
    """
    This action should be called after the last audio chunk is uploaded.
    It will merge all the saved chunks (foo.wav__1, foo.wav__2, etc...) into foo.wav
    And import to the database
    :param request:
    :return:
    """
    user = request.user
    params = request.POST
    name = params['name']
    chunk_count = int(params['chunkCount'])
    max_fs = int(request.POST.get('browser-fs', 0))

    if name.lower().endswith('.wav'):
        name = name[:-4]

    database_id = get_or_error(request.POST, 'database')
    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.ADD_FILES)

    wav_file_path = data_path('audio/wav/{}'.format(database_id),
                              name + '.wav')

    with open(wav_file_path, 'wb') as combined_file:
        for i in range(chunk_count):
            chunk_file_path = wav_file_path + '__' + str(i)
            with open(chunk_file_path, 'rb') as chunk_file:
                combined_file.write(chunk_file.read())

    size, comp, num_channels, fs, sbytes, block_align, bitrate, bytes, dtype = read_wav_info(
        wav_file_path)
    if comp == 3:
        warning('File is IEEE format. Convert to standard WAV')
        audio = pydub.AudioSegment.from_file(wav_file_path)
        audio.export(wav_file_path, format='wav')

    audio_file = _import_and_convert_audio_file(database, combined_file,
                                                max_fs)

    for i in range(chunk_count):
        chunk_file_path = wav_file_path + '__' + str(i)
        os.remove(chunk_file_path)

    added_files = AudioFile.objects.filter(id=audio_file.id)
    _, rows = get_sequence_info_empty_songs(added_files)
    return dict(origin='merge_audio_chunks',
                success=True,
                warning=None,
                payload=rows)
Пример #11
0
def delete_segments(request):
    user = request.user
    ids = json.loads(get_or_error(request.POST, 'ids'))
    database_id = get_or_error(request.POST, 'database-id')
    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.MODIFY_SEGMENTS)

    segments = Segment.objects.filter(id__in=ids,
                                      audio_file__database=database)
    segments.update(active=False)

    delay_in_production(delete_segments_async)

    return True
Пример #12
0
def delete_database(request):
    user = request.user
    database_id = get_or_error(request.POST, 'database-id')
    database = get_or_error(Database, dict(id=database_id))

    assert_permission(user, database, DatabasePermission.ASSIGN_USER)

    database.active = False
    database.save()

    delay_in_production(delete_database_async)
    return dict(origin='delete_database',
                success=True,
                warning=None,
                payload=None)
Пример #13
0
def get_unsegmented_songs(request):
    database_id = get_or_error(request.POST, 'database-id')
    user = request.user

    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.MODIFY_SEGMENTS)

    existing_file_names = AudioFile.objects.filter(
        database=database).values_list('name', flat=True)
    file_with_segments = Segment.objects.filter(audio_file__database=database)\
        .values_list('audio_file__name', flat=True).distinct()

    af_with_no_segments = list(
        set(existing_file_names) - set(file_with_segments))
    return af_with_no_segments
Пример #14
0
def get_audio_files_urls(request):
    user = request.user

    file_ids = get_or_error(request.POST, 'file-ids')
    file_ids = json.loads(file_ids)
    format = request.POST.get('format', settings.AUDIO_COMPRESSED_FORMAT)
    audio_files = AudioFile.objects.filter(id__in=file_ids)
    database_ids = audio_files.values_list('database', flat=True).distinct()
    databases = Database.objects.filter(id__in=database_ids)
    for database in databases:
        assert_permission(user, database, DatabasePermission.VIEW)

    file_paths = []
    for audio_file in audio_files:
        file_path = audio_path(audio_file, format, for_url=True)
        file_paths.append(file_path)

    return file_paths
Пример #15
0
def import_audio_chunk(request):
    """
    To facilitate sending big files, Dropzone allows uploading by chunk
    Each chunk is uploaded in one request. This function will save this chunk to the database
    by using the chunk's index as enumeration appended to the file's name
    :param request:
    :return:
    """
    user = request.user
    params = request.POST

    database_id = get_or_error(request.POST, 'database')
    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.ADD_FILES)

    file = File(file=request.FILES['file'])
    name = params['dzFilename']
    chunk_index = int(params['dzChunkIndex'])

    if name.lower().endswith('.wav'):
        name = name[:-4]

    wav_file_path = data_path('audio/wav/{}'.format(database_id),
                              name + '.wav')

    if chunk_index == 0:
        is_unique = not AudioFile.objects.filter(database=database,
                                                 name=name).exists()

        if not is_unique:
            raise CustomAssertionError(
                'Error: file {} already exists in this database'.format(name))

    chunk_file_path = wav_file_path + '__' + str(chunk_index)
    with open(chunk_file_path, 'wb') as f:
        f.write(file.read())

    return dict(origin='import_audio_chunk',
                success=True,
                warning=None,
                payload=None)
def import_audio_file(request):
    """
    Store uploaded file (only wav is accepted)
    :param request: must contain a list of files and the id of the database to be stored against
    :return:
    """
    user = request.user
    f = request.FILES['file']

    database_id = get_or_error(request.POST, 'database-id')
    item = json.loads(get_or_error(request.POST, 'item'))
    track_id = get_or_error(request.POST, 'track-id')

    database = get_or_error(Database, dict(id=database_id))
    track = get_or_error(AudioTrack, dict(id=track_id))
    assert_permission(user, database, DatabasePermission.ADD_FILES)

    start = item['start']
    end = item['end']
    song_id = item['id']

    file = File(file=f)
    name = file.name
    if name.lower().endswith('.wav'):
        name = name[:-4]

    audio_file = None
    need_unique_name = True
    if not isinstance(song_id, str) or not song_id.startswith('new:'):
        audio_file = AudioFile.objects.filter(id=song_id).first()
        if audio_file and audio_file.name == name:
            need_unique_name = False

    if need_unique_name:
        is_unique = not AudioFile.objects.filter(database=database,
                                                 name=name).exists()
        if not is_unique:
            raise CustomAssertionError('File {} already exists'.format(name))

    name_wav = data_path('audio/wav/{}'.format(database.id),
                         '{}.wav'.format(name))
    name_compressed = data_path(
        'audio/{}/{}'.format(settings.AUDIO_COMPRESSED_FORMAT, database.id),
        '{}.{}'.format(name, settings.AUDIO_COMPRESSED_FORMAT))

    with open(name_wav, 'wb') as wav_file:
        wav_file.write(file.read())

    audio = pydub.AudioSegment.from_file(name_wav)

    ensure_parent_folder_exists(name_compressed)
    audio.export(name_compressed, format=settings.AUDIO_COMPRESSED_FORMAT)
    fs = audio.frame_rate
    length = audio.raw_data.__len__() // audio.frame_width

    if audio_file is None:
        audio_file = AudioFile(name=name,
                               length=length,
                               fs=fs,
                               database=database,
                               track=track,
                               start=start,
                               end=end)
    else:
        if audio_file.name != name:
            AudioFile.set_name([audio_file], name)
        audio_file.start = start
        audio_file.end = end
        audio_file.length = length
        audio_file.save()

    quality = item.get('quality', None)
    individual_name = item.get('individual', None)
    note = item.get('note', None)
    type = item.get('type', None)
    sex = item.get('sex', None)

    if individual_name is not None:
        individual = Individual.objects.filter(name=individual_name).first()
        if individual is None:
            individual = Individual.objects.create(name=individual_name,
                                                   gender=sex)
        elif sex is not None:
            individual.gender = sex
            individual.save()

        audio_file.individual = individual

    if quality:
        audio_file.quality = quality

    audio_file.save()
    audio_file_attrs = settings.ATTRS.audio_file
    if note:
        extra_attr_value = ExtraAttrValue.objects.filter(
            user=user, owner_id=audio_file.id, attr=audio_file_attrs.note)
        extra_attr_value.value = note

    if type:
        extra_attr_value = ExtraAttrValue.objects.create(
            user=user, owner_id=audio_file.id, attr=audio_file_attrs.type)
        extra_attr_value.value = type

    return dict(id=audio_file.id, name=audio_file.name)
Пример #17
0
def copy_audio_files(request):
    """
    Copy files from the source database to the target database, not copying the actual files, but everything database-
    wise is copied, so the copies don't affect the original.
    :param request:
    :return:
    """
    user = request.user
    ids = json.loads(get_or_error(request.POST, 'ids'))
    target_database_name = get_or_error(request.POST, 'target-database-name')
    source_database_id = get_or_error(request.POST, 'source-database-id')
    target_database = get_or_error(Database, dict(name=target_database_name))
    source_database = get_or_error(Database, dict(id=source_database_id))
    assert_permission(user, source_database, DatabasePermission.COPY_FILES)
    assert_permission(user, target_database, DatabasePermission.ADD_FILES)

    # Make sure all those IDs belong to the source database
    source_audio_files = AudioFile.objects.filter(id__in=ids,
                                                  database=source_database)
    if len(source_audio_files) != len(ids):
        raise CustomAssertionError(
            'There\'s a mismatch between the song IDs you provided and the actual songs in the database'
        )

    song_values = source_audio_files \
        .values_list('id', 'fs', 'length', 'name', 'track', 'individual', 'quality', 'original')
    old_song_id_to_name = {x[0]: x[3] for x in song_values}
    old_song_names = old_song_id_to_name.values()
    old_song_ids = old_song_id_to_name.keys()

    # Make sure there is no duplication:
    duplicate_audio_files = AudioFile.objects.filter(database=target_database,
                                                     name__in=old_song_names)
    if duplicate_audio_files:
        raise CustomAssertionError(
            'Some file(s) you\'re trying to copy already exist in {}'.format(
                target_database_name))

    # We need to map old and new IDs of AudioFiles so that we can copy their ExtraAttrValue later
    songs_old_id_to_new_id = {}

    # Create Song objects one by one because they can't be bulk created
    for old_id, fs, length, name, track, individual, quality, original in song_values:
        # Make sure that we always point to the true original. E.g if AudioFile #2 is a copy of #1 and someone makes
        # a copy of AudioFile #2, the new AudioFile must still reference #1 as its original

        original_id = old_id if original is None else original

        audio_file = AudioFile.objects.create(fs=fs,
                                              length=length,
                                              name=name,
                                              track_id=track,
                                              individual_id=individual,
                                              quality=quality,
                                              original_id=original_id,
                                              database=target_database)

        songs_old_id_to_new_id[old_id] = audio_file.id

    segments = Segment.objects.filter(
        audio_file__in=songs_old_id_to_new_id.keys())
    segments_values = segments.values_list('id', 'start_time_ms',
                                           'end_time_ms', 'mean_ff', 'min_ff',
                                           'max_ff', 'audio_file__name',
                                           'audio_file__id', 'tid')

    # We need this to map old and new IDs of Segments so that we can copy their ExtraAttrValue later
    # The only reliable way to map new to old Segments is through the pair (start_time_ms, end_time_ms, song_name)
    # since they are guaranteed to be unique
    segments_old_id_to_start_end = {
        x[0]: (x[1], x[2], x[6])
        for x in segments_values
    }

    new_segments_info = {}
    for seg_id, start, end, mean_ff, min_ff, max_ff, song_name, song_old_id, tid in segments_values:
        segment_info = (seg_id, start, end, mean_ff, min_ff, max_ff, tid)
        if song_old_id not in new_segments_info:
            new_segments_info[song_old_id] = [segment_info]
        else:
            new_segments_info[song_old_id].append(segment_info)

    segments_to_copy = []
    for song_old_id, segment_info in new_segments_info.items():
        for seg_id, start, end, mean_ff, min_ff, max_ff, tid in segment_info:
            song_new_id = songs_old_id_to_new_id[song_old_id]
            segment = Segment(start_time_ms=start,
                              end_time_ms=end,
                              mean_ff=mean_ff,
                              min_ff=min_ff,
                              max_ff=max_ff,
                              audio_file_id=song_new_id,
                              tid=tid)
            segments_to_copy.append(segment)

    Segment.objects.bulk_create(segments_to_copy)

    copied_segments = Segment.objects.filter(
        audio_file__in=songs_old_id_to_new_id.values())
    copied_segments_values = copied_segments.values_list(
        'id', 'start_time_ms', 'end_time_ms', 'audio_file__name')
    segments_new_start_end_to_new_id = {(x[1], x[2], x[3]): x[0]
                                        for x in copied_segments_values}

    # Based on two maps: from new segment (start,end) key to their ID and old segment's ID to (start,end) key
    # we can now map Segment new IDs -> old IDs
    segments_old_id_to_new_id = {}
    for old_segment_id, segment_start_end in segments_old_id_to_start_end.items(
    ):
        new_segment_id = segments_new_start_end_to_new_id[segment_start_end]
        old_segment_id = int(old_segment_id)
        new_segment_id = int(new_segment_id)
        segments_old_id_to_new_id[old_segment_id] = new_segment_id

    # Query all ExtraAttrValue of Songs, and make duplicate by replacing old song IDs by new song IDs
    song_attrs = ExtraAttr.objects.filter(klass=AudioFile.__name__)
    old_song_extra_attrs = ExtraAttrValue.objects \
        .filter(owner_id__in=old_song_ids, user=user, attr__in=song_attrs).values_list('owner_id', 'attr', 'value')
    new_song_extra_attrs = []
    for old_song_id, attr_id, value in old_song_extra_attrs:
        new_song_id = songs_old_id_to_new_id[old_song_id]
        new_song_extra_attrs.append(
            ExtraAttrValue(user=user,
                           attr_id=attr_id,
                           value=value,
                           owner_id=new_song_id))

    old_segment_ids = segments_old_id_to_start_end.keys()

    # Query all ExtraAttrValue of Segments, and make duplicate by replacing old IDs by new IDs
    segment_attrs = ExtraAttr.objects.filter(klass=Segment.__name__)
    old_segment_extra_attrs = ExtraAttrValue.objects \
        .filter(owner_id__in=old_segment_ids, user=user, attr__in=segment_attrs)\
        .values_list('owner_id', 'attr', 'value')
    new_segment_extra_attrs = []
    for old_segment_id, attr_id, value in old_segment_extra_attrs:
        new_segment_id = segments_old_id_to_new_id[int(old_segment_id)]
        new_segment_extra_attrs.append(
            ExtraAttrValue(user=user,
                           attr_id=attr_id,
                           value=value,
                           owner_id=new_segment_id))

    # Now bulk create
    ExtraAttrValue.objects.filter(owner_id__in=songs_old_id_to_new_id.values(),
                                  attr__in=song_attrs).delete()

    try:
        ExtraAttrValue.objects.bulk_create(new_song_extra_attrs)
    except IntegrityError as e:
        raise CustomAssertionError(e)

    ExtraAttrValue.objects.filter(
        owner_id__in=segments_old_id_to_new_id.values(),
        attr__in=segment_attrs).delete()

    try:
        ExtraAttrValue.objects.bulk_create(new_segment_extra_attrs)
    except IntegrityError as e:
        raise CustomAssertionError(e)

    return True
def import_audio_files(request):
    """
    Store uploaded files (only wav is accepted)
    :param request: must contain a list of files and the id of the database to be stored against
    :return:
    """
    user = request.user
    files = request.FILES.values()

    database_id = get_or_error(request.POST, 'database')
    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.ADD_FILES)

    added_files = []
    not_importable_filenames = []
    importable_files = []

    for f in files:
        file = File(file=f)
        name = file.name
        if name.lower().endswith('.wav'):
            name = name[:-4]

        is_unique = not AudioFile.objects.filter(database=database,
                                                 name=name).exists()

        if not is_unique:
            not_importable_filenames.append(name)
        else:
            importable_files.append(file)

    if len(not_importable_filenames) > 0:
        raise CustomAssertionError(
            'Error: No files were imported because the following files already exist: {}'
            .format(', '.join(not_importable_filenames)))
    else:
        for file in importable_files:
            name = file.name
            if name.lower().endswith('.wav'):
                name = name[:-4]

            name_wav = data_path('audio/wav/{}'.format(database.id),
                                 '{}.wav'.format(name))
            name_compressed = data_path(
                'audio/{}/{}'.format(settings.AUDIO_COMPRESSED_FORMAT,
                                     database.id),
                '{}.{}'.format(name, settings.AUDIO_COMPRESSED_FORMAT))

            with open(name_wav, 'wb') as wav_file:
                wav_file.write(file.read())

            audio = pydub.AudioSegment.from_file(name_wav)

            ensure_parent_folder_exists(name_compressed)
            audio.export(name_compressed,
                         format=settings.AUDIO_COMPRESSED_FORMAT)

            fs = audio.frame_rate
            length = audio.raw_data.__len__() // audio.frame_width
            audio_file = AudioFile(name=name,
                                   length=length,
                                   fs=fs,
                                   database=database)
            added_files.append(audio_file)

        AudioFile.objects.bulk_create(added_files)
        added_files = AudioFile.objects.filter(
            database=database, name__in=[x.name for x in added_files])
        _, rows = get_sequence_info_empty_songs(added_files)
        return rows
Пример #19
0
def import_audio_metadata(request):
    """
    Store uploaded files (csv only)
    :param request: must contain a list of files and the id of the database to be stored against
    :return:
    """
    user = request.user

    file = get_or_error(request.FILES, 'file')
    database_id = get_or_error(request.POST, 'database')
    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.ADD_FILES)

    file_data = file.read().decode("utf-8")
    reader = csv.DictReader(io.StringIO(file_data))

    supplied_fields = reader.fieldnames
    required_fields = [
        'filename', 'genus', 'species', 'quality', 'date', 'individual',
        'gender', 'track'
    ]
    missing_fields = [x for x in required_fields if x not in supplied_fields]

    if missing_fields:
        raise CustomAssertionError(
            'Field(s) {} are required but not found in your CSV file'.format(
                ','.join(missing_fields)))

    filename_to_metadata = {}

    existing_individuals = {(x.name, x.species.genus, x.species.species): x
                            for x in Individual.objects.all()
                            if x.species is not None}
    existing_species = {(x.genus, x.species): x for x in Species.objects.all()}
    existing_tracks = {x.name: x for x in AudioTrack.objects.all()}

    for row in reader:
        filename = row['filename']
        species_code = row['species']
        genus = row['genus']
        quality = row['quality']
        individual_name = row['individual']
        gender = row['sex']
        date_str = row['date']
        track_name = row['track']
        date = None
        if date_str:
            date = datetime.datetime.strptime(
                date_str, settings.DATE_INPUT_FORMAT).date()

        species_key = (genus, species_code)
        if species_key in existing_species:
            species = existing_species[species_key]
        else:
            species = Species(genus=genus, species=species_code)
            species.save()
            existing_species[species_key] = species

        individual_key = (individual_name, genus, species_code)
        if individual_key in existing_individuals:
            individual = existing_individuals[individual_key]
        else:
            individual = existing_individuals.get(individual_key, None)
            if individual is None:
                individual = Individual(name=individual_name,
                                        gender=gender,
                                        species=species)
                individual.save()
                existing_individuals[individual_key] = individual

        if track_name in existing_tracks:
            track = existing_tracks[track_name]
        else:
            track = AudioTrack(name=track_name, date=date)
            track.save()
            existing_tracks[track_name] = track

        filename_to_metadata[filename] = (individual, quality, track)

    existing_audio_files = AudioFile.objects.filter(
        name__in=filename_to_metadata.keys(), database=database)

    with transaction.atomic():
        for audio_file in existing_audio_files:
            individual, quality, track = filename_to_metadata[audio_file.name]
            audio_file.individual = individual
            audio_file.quality = quality
            audio_file.track = track
            audio_file.save()

    return True
Пример #20
0
def save_segmentation(request):
    """
    Save the segmentation scheme sent from the client. Compare with the existing segmentation, there are three cases:
    1. segments that currently exist but not found in the client's scheme - they need to be deleted
    2. segments that currently exist and found in the client's scheme - they need to be updated
    3. segments that doesn't currently exist but found in the client's scheme - they need to be created

    Finally, create or update the spectrogram image (not the mask - can't do anything about the mask)
    :param request:
    :return:
    """
    user = request.user
    items = json.loads(get_or_error(request.POST, 'items'))
    file_id = int(get_or_error(request.POST, 'file-id'))
    audio_file = get_or_error(AudioFile, dict(id=file_id))
    assert_permission(user, audio_file.database,
                      DatabasePermission.MODIFY_SEGMENTS)
    segments = Segment.objects.filter(audio_file=audio_file)

    new_segments = []
    old_segments = []
    for item in items:
        id = item['id']
        if isinstance(id, str) and id.startswith('new:'):
            segment = Segment(audio_file=audio_file,
                              start_time_ms=item['start'],
                              end_time_ms=item['end'])
            label = item.get('label', None)
            family = item.get('label_family', None)
            subfamily = item.get('label_subfamily', None)
            note = item.get('note', None)

            new_segments.append((segment, label, family, subfamily, note))
        else:
            old_segments.append(item)

    id_to_exiting_item = {x['id']: x for x in old_segments}

    to_update = []
    to_delete_id = []

    for segment in segments:
        id = segment.id
        if id in id_to_exiting_item:
            item = id_to_exiting_item[id]
            segment.start_time_ms = item['start']
            segment.end_time_ms = item['end']

            to_update.append(segment)
        else:
            to_delete_id.append(segment.id)

    label_attr = settings.ATTRS.segment.label
    family_attr = settings.ATTRS.segment.family
    subfamily_attr = settings.ATTRS.segment.subfamily
    note_attr = settings.ATTRS.segment.note

    with transaction.atomic():
        for segment in to_update:
            segment.save()

        Segment.objects.filter(id__in=to_delete_id).update(active=False)

        for segment, label, family, subfamily, note in new_segments:
            segment.save()
            segment.tid = segment.id
            segment.save()
            if label:
                ExtraAttrValue.objects.create(user=user,
                                              attr=label_attr,
                                              owner_id=segment.id,
                                              value=label)
            if family:
                ExtraAttrValue.objects.create(user=user,
                                              attr=family_attr,
                                              owner_id=segment.id,
                                              value=family)
            if subfamily:
                ExtraAttrValue.objects.create(user=user,
                                              attr=subfamily_attr,
                                              owner_id=segment.id,
                                              value=subfamily)
            if note:
                ExtraAttrValue.objects.create(user=user,
                                              attr=note_attr,
                                              owner_id=segment.id,
                                              value=note)

    segments = Segment.objects.filter(audio_file=audio_file)
    _, rows = bulk_get_segments_for_audio(segments,
                                          DotMap(file_id=file_id, user=user))

    delay_in_production(extract_spectrogram, audio_file.id)
    delay_in_production(delete_segments_async)

    return rows
Пример #21
0
def populate_context(obj, context):
    page_name = getattr(obj, 'page_name', None)
    if page_name is None:
        page_name = obj.__class__.page_name

    user = obj.request.user
    gets = obj.request.GET

    for key, value in gets.items():
        if key.startswith('__'):
            context['external{}'.format(key)] = value
        elif key.startswith('_'):
            context['internal{}'.format(key)] = value
        else:
            context[key] = value

    current_database = get_user_databases(user)

    specified_db = None
    db_class = Database if current_database is None else current_database.__class__

    if 'database' in gets:
        specified_db = gets['database']
        db_class = Database
    elif 'tmpdb' in gets:
        specified_db = gets['tmpdb']
        db_class = TemporaryDatabase

    if specified_db and (current_database is None
                         or specified_db != current_database.name):
        current_database = get_or_error(db_class, dict(name=specified_db))

        current_database_value = ExtraAttrValue.objects.filter(
            attr=settings.ATTRS.user.current_database,
            owner_id=user.id,
            user=user).first()
        if current_database_value is None:
            current_database_value = ExtraAttrValue(
                attr=settings.ATTRS.user.current_database,
                owner_id=user.id,
                user=user)
        current_database_value.value = '{}_{}'.format(db_class.__name__,
                                                      current_database.id)
        current_database_value.save()

    if db_class == Database:
        db_assignment = assert_permission(user, current_database,
                                          DatabasePermission.VIEW)
    else:
        db_assignment = {'can_view': True}

    context['databases'] = get_user_accessible_databases(user)
    context['current_database'] = current_database
    context['db_assignment'] = db_assignment

    context['my_tmpdbs'] = TemporaryDatabase.objects.filter(user=user)
    # context['other_tmpdbs'] = TemporaryDatabase.objects.exclude(user=user)

    if db_class == Database:
        underlying_databases = [current_database]
    else:
        underlying_databases = current_database.get_databases()

    other_users = DatabaseAssignment.objects\
        .filter(database__in=underlying_databases, permission__gte=DatabasePermission.VIEW)\
        .values_list('user__id', flat=True)
    other_users = User.objects.filter(id__in=other_users)

    viewas = gets.get('viewas', user.username)
    viewas = get_or_error(User, dict(username=viewas))
    context['viewas'] = viewas
    context['other_users'] = other_users

    granularity = gets.get('granularity', 'label')
    context['granularity'] = granularity
    context['page'] = page_name
Пример #22
0
def update_segments_from_csv(request):
    rows = json.loads(get_or_error(request.POST, 'rows'))
    grid_type = get_or_error(request.POST, 'grid-type')
    database_id = get_or_error(request.POST, 'database-id')
    missing_attrs = json.loads(get_or_error(request.POST, 'missing-attrs'))
    attrs = json.loads(get_or_error(request.POST, 'attrs'))
    user = request.user

    database = get_or_error(Database, dict(id=database_id))
    assert_permission(user, database, DatabasePermission.MODIFY_SEGMENTS)

    song_attr_idx = attrs.index('song')
    start_attr_idx = attrs.index('start_time_ms')
    end_attr_idx = attrs.index('end_time_ms')

    song_names = set()
    starts = set()
    ends = set()

    # The logic here is as follow: from the given CSV data, try to match each row with existing syllable in the database
    # To match, a row must contain the same song, start and end time as a segment. We use this as the key and find
    # matching value from two dictionaries: key => row and key => segment

    # Build key => row
    key2row = {}
    for row in rows:
        song = row[song_attr_idx]
        start = row[start_attr_idx]
        end = row[end_attr_idx]
        song_names.add(song)
        starts.add(start)
        ends.add(end)

        key2row[(song, start, end)] = row

    songs = AudioFile.objects.filter(name__in=song_names, database=database)
    name2song = {song.name: song for song in songs}

    # Build key => seg
    segs = Segment.objects.filter(audio_file__in=songs,
                                  start_time_ms__in=starts,
                                  end_time_ms__in=ends)
    key2seg = {(seg.audio_file.name, seg.start_time_ms, seg.end_time_ms): seg
               for seg in segs}

    # It is possible that a segment with same song, start and end time that doesn't exist in the current database
    # (and has to be created) exists elsewhere in other database. In this case, we must use the same TID given to the
    # existing segment when creating this new segment in this database

    # Query for all segments with song, start and end from ALL database
    segs_all_db = Segment.objects.filter(audio_file__name__in=song_names,
                                         start_time_ms__in=starts,
                                         end_time_ms__in=ends)
    segs_all_db_vl = segs_all_db.values_list('audio_file__name',
                                             'start_time_ms', 'end_time_ms',
                                             'tid')

    # Similarly, we map key => tid for lookup
    key2tid = {(name, start, end): tid
               for name, start, end, tid in segs_all_db_vl}

    # This is used to re-extract segment's spectrograms for songs that have new segments added
    song_to_seg_ids = {name: [] for name in name2song.keys()}

    for key, row in key2row.items():
        (song_name, start, end) = key
        start = Decimal(start)
        end = Decimal(end)
        seg = key2seg.get(key, None)
        if seg is None:
            song = name2song[song_name]
            seg = Segment.objects.create(audio_file=song,
                                         start_time_ms=start,
                                         end_time_ms=end)
            tid = key2tid.get(key, seg.id)
            seg.tid = tid
            seg.save()

            song_to_seg_ids[song_name].append((song, tid, start, end))

        # Although the ids of modified rows are sent back and stored at the end of each row,
        # We cannot rely on this ID because there might be errors in the uploaded CSV
        # We store the correct segment ID here
        row[-1] = seg.id

    for song_name, song_info in song_to_seg_ids.items():
        if len(song_info) > 0:
            segs_info = [(tid, start, end)
                         for song, tid, start, end in song_info]
            song = song_info[0][0]
            try:
                extract_spectrogram(song, segs_info)
            except Exception as e:
                raise e

    # Finally to change all other properties (label, family, note...)
    retval = _change_properties_table(rows, grid_type, missing_attrs, attrs,
                                      user)
    return dict(origin='update_segments_from_csv',
                success=True,
                warning=None,
                payload=retval)
Пример #23
0
def import_history(request):
    """
    Import a HistoryEntry from any user to this user.
    If this operation fails, the database is intact.
    :param request: must specify either : version-id, which is the id of the HistoryEntry object to be imported to
                                          or FILES['zipfile'] which should be created somewhere by Koe for someone
    :return: True if everything goes well.
    """
    version_id = request.POST.get('version-id', None)
    zip_file = request.FILES.get('zipfile', None)
    user = request.user

    current_database = get_user_databases(user)
    if current_database is None:
        raise CustomAssertionError(
            'You don\'t have a current working database')

    assert_permission(user, current_database, DatabasePermission.ANNOTATE)

    if not (version_id or zip_file):
        raise CustomAssertionError('No ID or file provided. Abort.')

    if version_id:
        he = HistoryEntry.objects.get(id=version_id)
        file = open(history_path(he.filename), 'rb')
    else:
        file = File(file=zip_file)

    filelist = {}
    with zipfile.ZipFile(file, "r") as zip_file:
        namelist = zip_file.namelist()
        for name in namelist:
            filelist[name] = zip_file.read(name)

    meta = json.loads(get_or_error(filelist, 'meta.json'))
    version = get_or_error(meta, 'version')
    backup_type = get_or_error(meta, 'type')

    if version < 4:
        raise CustomAssertionError(
            'This file format is too old and not supported anymore.')

    if backup_type == 'segmentation':
        retval = import_history_with_segmentation(current_database, user,
                                                  filelist)
        return dict(origin='import_history',
                    success=True,
                    warning=None,
                    payload=retval)

    try:
        contents = [
            get_or_error(filelist, 'segment.extraattrvalue.json'),
            get_or_error(filelist, 'audiofile.extraattrvalue.json')
        ]
        extra_attrs = json.loads(get_or_error(filelist, 'extraattr.json'))
        new_entries = []
        for content in contents:
            loaded = json.loads(content)
            new_entries += loaded
    except Exception:
        raise CustomAssertionError(
            'The history content is malformed and cannot be parsed.')

    new_entries = change_owner_and_attr_ids(new_entries, extra_attrs)

    retval = update_extra_attr_values(user, new_entries)
    return dict(origin='import_history',
                success=True,
                warning=None,
                payload=retval)
Пример #24
0
def import_audio_file(request):
    """
    Store uploaded file (only wav is accepted)
    :param request: must contain a list of files and the id of the database to be stored against
    :return:
    """
    user = request.user
    f = request.FILES['file']

    database_id = get_or_error(request.POST, 'database-id')
    item = json.loads(get_or_error(request.POST, 'item'))
    real_fs = int(get_or_error(request.POST, 'real-fs'))
    max_fs = int(get_or_error(request.POST, 'max-fs'))
    track_id = get_or_error(request.POST, 'track-id')

    database = get_or_error(Database, dict(id=database_id))
    track = get_or_error(AudioTrack, dict(id=track_id))
    assert_permission(user, database, DatabasePermission.ADD_FILES)

    start = item['start']
    end = item['end']
    song_id = item['id']

    file = File(file=f)

    audio_file = None
    if not isinstance(song_id, str) or not song_id.startswith('new:'):
        audio_file = AudioFile.objects.filter(database=database,
                                              id=song_id).first()

    audio_file = _import_and_convert_audio_file(database, file, max_fs,
                                                real_fs, audio_file, track,
                                                start, end)

    quality = item.get('quality', None)
    individual_name = item.get('individual', None)
    note = item.get('note', None)
    type = item.get('type', None)
    sex = item.get('sex', None)

    if individual_name is not None:
        individual = Individual.objects.filter(name=individual_name).first()
        if individual is None:
            individual = Individual.objects.create(name=individual_name,
                                                   gender=sex)
        elif sex is not None:
            individual.gender = sex
            individual.save()

        audio_file.individual = individual

    if quality:
        audio_file.quality = quality

    audio_file.save()
    audio_file_attrs = settings.ATTRS.audio_file
    if note:
        extra_attr_value = ExtraAttrValue.objects.filter(
            user=user, owner_id=audio_file.id, attr=audio_file_attrs.note)
        extra_attr_value.value = note

    if type:
        extra_attr_value = ExtraAttrValue.objects.create(
            user=user, owner_id=audio_file.id, attr=audio_file_attrs.type)
        extra_attr_value.value = type

    return dict(id=audio_file.id, name=audio_file.name)