Exemplo n.º 1
0
def get_syntactically_similar_pairs(request):
    extra_args = json.loads(request.POST.get('extras', {}))
    granularity = extra_args['granularity']
    user = request.user
    database = get_user_databases(user)
    permission = database.get_assigned_permission(user)
    if permission < DatabasePermission.ANNOTATE:
        raise CustomAssertionError(
            'You don\'t have permission to annotate this database')

    sids, tids = get_sids_tids(database)

    label_arr = get_syllable_labels(user,
                                    granularity,
                                    sids,
                                    on_no_label='set_blank')
    cls_labels, syl_label_enum_arr = np.unique(label_arr, return_inverse=True)

    enum2label = {enum: label for enum, label in enumerate(cls_labels)}
    sid2enumlabel = {
        sid: enum_label
        for sid, enum_label in zip(sids, syl_label_enum_arr)
    }

    adjacency_mat, classes_info = calc_class_ajacency(database,
                                                      syl_label_enum_arr,
                                                      enum2label,
                                                      sid2enumlabel,
                                                      count_style='forward',
                                                      self_count='append')
    counter = Counter(syl_label_enum_arr)
    nlabels = len(counter)
    frequencies = np.array([counter[i] for i in range(nlabels)])

    return adjacency_mat.tolist(), frequencies.tolist(), cls_labels.tolist()
Exemplo n.º 2
0
def get_home_page(request):
    user = request.user
    if user.is_authenticated:
        current_database = get_user_databases(user)
        if current_database is None:
            return redirect('dashboard')
        return redirect('songs')
    return render(request, 'home_page.html')
Exemplo n.º 3
0
def import_history(request):
    """
    Import a HistoryEntry from any user to this user.
    If this operation fails, the database is intact.
    :param request: must specify either : version-id, which is the id of the HistoryEntry object to be imported to
                                          or FILES['zipfile'] which should be created somewhere by Koe for someone
    :return: True if everything goes well.
    """
    version_id = request.POST.get('version-id', None)
    zip_file = request.FILES.get('zipfile', None)
    user = request.user

    current_database = get_user_databases(user)
    if current_database is None:
        raise CustomAssertionError(
            'You don\'t have a current working database')

    assert_permission(user, current_database, DatabasePermission.ANNOTATE)

    if not (version_id or zip_file):
        raise CustomAssertionError('No ID or file provided. Abort.')

    if version_id:
        he = HistoryEntry.objects.get(id=version_id)
        file = open(history_path(he.filename), 'rb')
    else:
        file = File(file=zip_file)

    filelist = {}
    with zipfile.ZipFile(file, "r") as zip_file:
        namelist = zip_file.namelist()
        for name in namelist:
            filelist[name] = zip_file.read(name)

    meta = json.loads(get_or_error(filelist, 'meta.json'))
    version = get_or_error(meta, 'version')
    backup_type = get_or_error(meta, 'type')

    if version < 4:
        raise CustomAssertionError(
            'This file format is too old and not supported anymore.')

    if backup_type == 'segmentation':
        retval = import_history_with_segmentation(current_database, user,
                                                  filelist)
        return dict(origin='import_history',
                    success=True,
                    warning=None,
                    payload=retval)

    try:
        contents = [
            get_or_error(filelist, 'segment.extraattrvalue.json'),
            get_or_error(filelist, 'audiofile.extraattrvalue.json')
        ]
        extra_attrs = json.loads(get_or_error(filelist, 'extraattr.json'))
        new_entries = []
        for content in contents:
            loaded = json.loads(content)
            new_entries += loaded
    except Exception:
        raise CustomAssertionError(
            'The history content is malformed and cannot be parsed.')

    new_entries = change_owner_and_attr_ids(new_entries, extra_attrs)

    retval = update_extra_attr_values(user, new_entries)
    return dict(origin='import_history',
                success=True,
                warning=None,
                payload=retval)
Exemplo n.º 4
0
def populate_context(obj, context):
    page_name = getattr(obj, 'page_name', None)
    if page_name is None:
        page_name = obj.__class__.page_name

    user = obj.request.user
    gets = obj.request.GET

    for key, value in gets.items():
        if key.startswith('__'):
            context['external{}'.format(key)] = value
        elif key.startswith('_'):
            context['internal{}'.format(key)] = value
        else:
            context[key] = value

    current_database = get_user_databases(user)

    specified_db = None
    db_class = Database if current_database is None else current_database.__class__

    if 'database' in gets:
        specified_db = gets['database']
        db_class = Database
    elif 'tmpdb' in gets:
        specified_db = gets['tmpdb']
        db_class = TemporaryDatabase

    if specified_db and (current_database is None
                         or specified_db != current_database.name):
        current_database = get_or_error(db_class, dict(name=specified_db))

        current_database_value = ExtraAttrValue.objects.filter(
            attr=settings.ATTRS.user.current_database,
            owner_id=user.id,
            user=user).first()
        if current_database_value is None:
            current_database_value = ExtraAttrValue(
                attr=settings.ATTRS.user.current_database,
                owner_id=user.id,
                user=user)
        current_database_value.value = '{}_{}'.format(db_class.__name__,
                                                      current_database.id)
        current_database_value.save()

    if db_class == Database:
        db_assignment = assert_permission(user, current_database,
                                          DatabasePermission.VIEW)
    else:
        db_assignment = {'can_view': True}

    context['databases'] = get_user_accessible_databases(user)
    context['current_database'] = current_database
    context['db_assignment'] = db_assignment

    context['my_tmpdbs'] = TemporaryDatabase.objects.filter(user=user)
    # context['other_tmpdbs'] = TemporaryDatabase.objects.exclude(user=user)

    if db_class == Database:
        underlying_databases = [current_database]
    else:
        underlying_databases = current_database.get_databases()

    other_users = DatabaseAssignment.objects\
        .filter(database__in=underlying_databases, permission__gte=DatabasePermission.VIEW)\
        .values_list('user__id', flat=True)
    other_users = User.objects.filter(id__in=other_users)

    viewas = gets.get('viewas', user.username)
    viewas = get_or_error(User, dict(username=viewas))
    context['viewas'] = viewas
    context['other_users'] = other_users

    granularity = gets.get('granularity', 'label')
    context['granularity'] = granularity
    context['page'] = page_name
Exemplo n.º 5
0
def bulk_get_song_sequence_associations(all_songs, extras):
    granularity = extras.granularity
    current_database = get_user_databases(extras.user)
    viewas = extras.viewas

    support = float(extras.get('support', 0.01))
    use_gap = extras.usegap
    maxgap = extras.maxgap if use_gap else 1
    mingap = extras.mingap if use_gap else -99999

    if isinstance(current_database, Database):
        if isinstance(all_songs, QuerySet):
            all_songs = all_songs.filter(database=current_database)
        else:
            all_songs = [
                x.id for x in all_songs if x.database == current_database
            ]
        segs = Segment.objects.filter(audio_file__in=all_songs).order_by(
            'audio_file__name', 'start_time_ms')
    else:
        segs = Segment.objects.filter(id__in=current_database.ids)

    if use_gap:
        values = segs.values_list('id', 'audio_file__id', 'start_time_ms',
                                  'end_time_ms')
    else:
        values = segs.values_list('id', 'audio_file__id')

    seg_ids = segs.values_list('id', flat=True)

    label_attr = ExtraAttr.objects.get(klass=Segment.__name__,
                                       name=granularity)
    labels = ExtraAttrValue.objects.filter(attr=label_attr, owner_id__in=seg_ids, user__username=viewas) \
        .values_list('owner_id', 'value')

    seg_id_to_label = {x: y for x, y in labels}
    label_set = set(seg_id_to_label.values())
    labels2enums = {y: x + 1 for x, y in enumerate(label_set)}

    enums2labels = {x: y for y, x in labels2enums.items()}
    pseudo_end_id = len(label_set) + 1
    enums2labels[pseudo_end_id] = '__PSEUDO_END__'
    enums2labels[0] = '__PSEUDO_START__'

    seg_id_to_label_enum = {
        x: labels2enums[y]
        for x, y in seg_id_to_label.items()
    }

    # Bagging song syllables by song name
    songs = {}
    sequences = []
    sequence_ind = 1

    for value in values:
        seg_id = value[0]
        song_id = value[1]

        label2enum = seg_id_to_label_enum.get(seg_id, None)
        if use_gap:
            start = value[2]
            end = value[3]
            seg_info = (label2enum, start, end)
        else:
            seg_info = label2enum

        if song_id not in songs:
            segs_info = []
            songs[song_id] = segs_info
        else:
            segs_info = songs[song_id]

        segs_info.append(seg_info)

    for song_id, segs_info in songs.items():
        song_sequence = []
        has_unlabelled = False

        # This helps keep track of the current position of the syllable when the song is rid of syllable duration and
        # only gaps are retained.
        accum_gap = 10

        # This helps keep track of the gap between this current syllable and the previous one,
        # such that we can decide to merge two syllables if their gap is too small (could also be negative)
        gap = 0

        last_syl_end = None
        for ind, seg_info in enumerate(segs_info):
            if use_gap:
                label2enum, start, end = seg_info
                if last_syl_end is None:
                    gap = 0
                else:
                    gap = start - last_syl_end

                last_syl_end = end
                accum_gap += gap

                # If the gap is too small, merge this one with the previous one, which means the eid stays the same
                if ind > 0 and gap < mingap:
                    song_sequence[-1][2].append(label2enum)
                else:
                    eid = accum_gap
                    song_sequence.append([sequence_ind, eid, [label2enum]])
            else:
                label2enum = seg_info
                eid = ind + 1
                song_sequence.append([sequence_ind, eid, [label2enum]])

            if label2enum is None:
                has_unlabelled = True
                break

        pseudo_start = max(0, song_sequence[0][1] - 1)
        song_sequence.insert(0, [sequence_ind, pseudo_start, [0]])
        song_sequence.append([sequence_ind, eid + 1, [pseudo_end_id]])

        if not has_unlabelled:
            sequences += song_sequence
            sequence_ind += 1

    ids = []
    rows = []
    nsequences = sequence_ind - 1

    if nsequences == 0:
        return ids, rows

    support = max(int(nsequences * support), 1) / nsequences

    try:
        result = spade(data=sequences, support=support, maxgap=maxgap)
    except RuntimeError as e:
        raise CustomAssertionError('SPADE error: {}'.format(str(e)))
    mined_objects = result['mined_objects']

    for idx, seq in enumerate(mined_objects):
        items = seq.items
        conf = -1 if seq.confidence is None else seq.confidence
        lift = -1 if seq.lift is None else seq.lift

        items_str = []
        for item in items:
            item_str = '{}' if len(item.elements) == 1 else '({})'
            labels = ' -&- '.join(
                [enums2labels[element] for element in item.elements])
            item_str = item_str.format(labels)
            items_str.append(item_str)
        assocrule = ' => '.join(items_str)

        row = dict(id=idx,
                   chainlength=len(items),
                   transcount=seq.noccurs,
                   confidence=conf,
                   lift=lift,
                   support=seq.noccurs / nsequences,
                   assocrule=assocrule)

        rows.append(row)
        ids.append(idx)

    return ids, rows
Exemplo n.º 6
0
def bulk_get_song_sequences(all_songs, extras):
    """
    For the song sequence page. For each song, send the sequence of syllables in order of appearance
    :param all_songs: a QuerySet list of AudioFile
    :param extras:
    :return:
    """
    granularity = extras.granularity
    current_database = get_user_databases(extras.user)
    permission = current_database.get_assigned_permission(extras.user)
    viewas = extras.viewas

    if permission < DatabasePermission.VIEW:
        raise CustomAssertionError(
            'You don\'t have permission to view this database')

    extras.permission = permission

    if isinstance(current_database, Database):
        if isinstance(all_songs, QuerySet):
            all_songs = all_songs.filter(database=current_database)
            song_ids = all_songs.values_list('id', flat=True)
        else:
            all_songs = [
                x.id for x in all_songs if x.database == current_database
            ]
            song_ids = all_songs
        segs = Segment.objects.filter(audio_file__in=all_songs).order_by(
            'audio_file__name', 'start_time_ms')
    else:
        seg_ids = current_database.ids
        segs = Segment.objects.filter(id__in=seg_ids)
        song_ids = segs.values_list('audio_file').distinct()
        all_songs = AudioFile.objects.filter(id__in=song_ids)

    values = segs.values_list(
        'id', 'tid', 'start_time_ms', 'end_time_ms', 'audio_file__name',
        'audio_file__id', 'audio_file__quality', 'audio_file__length',
        'audio_file__fs', 'audio_file__added', 'audio_file__track__name',
        'audio_file__track__date', 'audio_file__individual__name',
        'audio_file__individual__gender',
        'audio_file__individual__species__name')
    seg_ids = segs.values_list('id', flat=True)

    label_attr = ExtraAttr.objects.get(klass=Segment.__name__,
                                       name=granularity)
    labels = ExtraAttrValue.objects.filter(attr=label_attr, owner_id__in=seg_ids, user__username=viewas) \
        .values_list('owner_id', 'value')

    seg_id_to_label = {x: y for x, y in labels}

    ids = []
    rows = []

    # Bagging song syllables by song name
    songs = {}

    for seg_id, tid, start, end, filename, song_id, quality, length, fs, added, track, date, indv, gender, species \
            in values:
        if song_id not in songs:
            url = reverse('segmentation', kwargs={'file_id': song_id})
            url = '[{}]({})'.format(url, filename)
            duration_ms = round(length * 1000 / fs)
            song_info = dict(filename=url,
                             track=track,
                             individual=indv,
                             sex=gender,
                             quality=quality,
                             record_date=date,
                             added=added.date(),
                             duration=duration_ms,
                             species=species)
            segs_info = []
            songs[song_id] = dict(song=song_info, segs=segs_info)
        else:
            segs_info = songs[song_id]['segs']

        label = seg_id_to_label.get(seg_id, '__NONE__')
        segs_info.append((tid, label, start, end))

    for song_id, info in songs.items():
        song_info = info['song']
        segs_info = info['segs']

        sequence_labels = []
        sequence_starts = []
        sequence_ends = []
        sequence_tids = []

        for tid, label, start, end in segs_info:
            sequence_labels.append(label)
            sequence_starts.append(start)
            sequence_ends.append(end)
            sequence_tids.append(tid)

        sequence_str = '-'.join('\"{}\"'.format(x) for x in sequence_labels)

        row = song_info
        row['id'] = song_id
        row['sequence'] = sequence_str
        row['sequence-labels'] = sequence_labels
        row['sequence-starts'] = sequence_starts
        row['sequence-ends'] = sequence_ends
        row['sequence-tids'] = sequence_tids

        ids.append(song_id)
        rows.append(row)

    # Now we have to deal with songs without any segmentation done
    empty_songs = all_songs.exclude(id__in=songs.keys())

    _ids, _rows = get_sequence_info_empty_songs(empty_songs)
    ids += _ids
    rows += _rows

    extra_attrs = ExtraAttr.objects.filter(klass=AudioFile.__name__)
    extra_attr_values_list = ExtraAttrValue.objects\
        .filter(user__username=viewas, attr__in=extra_attrs, owner_id__in=song_ids)\
        .values_list('owner_id', 'attr__name', 'value')

    extra_attr_values_lookup = {}
    for id, attr, value in extra_attr_values_list:
        if id not in extra_attr_values_lookup:
            extra_attr_values_lookup[id] = {}
        extra_attr_dict = extra_attr_values_lookup[id]
        extra_attr_dict[attr] = value

    for song_id, row in zip(ids, rows):
        extra_attr_dict = extra_attr_values_lookup.get(song_id, {})
        for attr in extra_attr_dict:
            row[attr] = extra_attr_dict[attr]

    return ids, rows
Exemplo n.º 7
0
def bulk_get_exemplars(objs, extras):
    """
    Return rows containing n exemplars per class. Each row is one class. Class can be label, label_family,
    label_subfamily
    :param objs: a list of Segments
    :param extras: must contain key 'class', value can be one of 'label', 'label_family', 'label_subfamily'
    :return:
    """
    granularity = extras.granularity
    viewas = extras.viewas
    current_database = get_user_databases(extras.user)

    if isinstance(current_database, Database):
        if isinstance(objs, QuerySet):
            id2tid = {
                x: y
                for x, y in objs.filter(
                    audio_file__database=current_database).values_list(
                        'id', 'tid')
            }
            ids = id2tid.keys()
        else:
            objs = [
                x for x in objs if x.audio_file.database == current_database
            ]
            id2tid = {x.id: x.tid for x in objs}
            ids = id2tid.keys()
    else:
        ids = current_database.ids
        segs = Segment.objects.filter(id__in=ids)
        id2tid = {x: y for x, y in segs.values_list('id', 'tid')}

    values = ExtraAttrValue.objects.filter(attr__klass=Segment.__name__, attr__name=granularity, owner_id__in=ids,
                                           user__username=viewas) \
        .order_by(Lower('value'), 'owner_id').values_list('value', 'owner_id')

    class_to_exemplars = []
    current_class = ''
    current_exemplars_list = None
    current_exemplars_count = 0
    total_exemplars_count = 0

    for cls, owner_id in values:
        if cls:
            cls = cls.strip()
            if cls:
                if cls.lower() != current_class.lower():
                    class_to_exemplars.append(
                        (current_class, total_exemplars_count,
                         current_exemplars_list))
                    current_exemplars_count = 0
                    current_class = cls
                    total_exemplars_count = 0
                    current_exemplars_list = [owner_id]
                else:
                    current_exemplars_list.append(owner_id)
                    current_exemplars_count += 1

                total_exemplars_count += 1

    class_to_exemplars.append(
        (current_class, total_exemplars_count, current_exemplars_list))

    rows = []
    ids = []
    for cls, count, exemplar_ids in class_to_exemplars:
        if cls:
            exemplar_id2tid = [(x, id2tid[x]) for x in exemplar_ids]
            row = dict(id=cls, count=count, spectrograms=exemplar_id2tid)
            row['class'] = cls
            rows.append(row)
            ids.append(cls)

    return ids, rows