Пример #1
0
def update_collections_that_reference_this_collection(collection,
                                                      query_builder, **kwargs):
    """Update all collections that reference the input collection.
    
    :param collection: a collection model.
    :param query_builder: an :class:`SQLAQueryBuilder` instance.
    :param bool kwargs['contents_changed']: indicates whether the input
        collection's ``contents`` value has changed.
    :param bool kwargs['deleted']: indicates whether the input collection has
        just been deleted.
    :returns: ``None``

    Update the ``contents``, ``contents_unpacked``, ``html`` and/or ``form``
    attributes of every collection that references the input collection plus all
    of the collections that reference those collections, etc.  This function is
    called upon successful update and delete requests.

    If the contents of the ``collection`` have changed (i.e.,
    ``kwargs['contents_changed']==True``) , then retrieve all collections
    that reference ``collection`` and all collections that reference those
    referers, etc., and update their ``contents_unpacked``, ``html`` and
    ``forms`` attributes.

    If the ``collection`` has been deleted (i.e., ``kwargs['deleted']==True``),
    then recursively retrieve all collections referencing ``collection`` and
    update their ``contents``, ``contents_unpacked``, ``html`` and ``forms``
    attributes.

    If ``collection`` has just been tagged as restricted (i.e.,
    ``kwargs['restricted']==True``), then recursively restrict all collections
    that reference it.

    In all cases, update the ``datetime_modified`` value of every collection that
    recursively references ``collection``.

    """
    def update_contents_unpacked_etc(collection, **kwargs):
        deleted = kwargs.get('deleted', False)
        collection_id = kwargs.get('collection_id')
        if deleted:
            collection.contents = remove_references_to_this_collection(
                collection.contents, collection_id)
        collections_referenced = get_collections_referenced(
            collection.contents)
        collection.contents_unpacked = generate_contents_unpacked(
            collection.contents, collections_referenced)
        collection.html = h.get_HTML_from_contents(
            collection.contents_unpacked, collection.markup_language)
        collection.forms = [
            Session.query(Form).get(int(id)) for id in
            h.form_reference_pattern.findall(collection.contents_unpacked)
        ]

    def update_modification_values(collection, now):
        collection.datetime_modified = now
        session['user'] = Session.merge(session['user'])
        collection.modifier = session['user']

    restricted = kwargs.get('restricted', False)
    contents_changed = kwargs.get('contents_changed', False)
    deleted = kwargs.get('deleted', False)
    if restricted or contents_changed or deleted:
        collections_referencing_this_collection = get_collections_referencing_this_collection(
            collection, query_builder)
        collections_referencing_this_collection_dicts = [
            c.get_full_dict() for c in collections_referencing_this_collection
        ]
        now = h.now()
        if restricted:
            restricted_tag = h.get_restricted_tag()
            [
                c.tags.append(restricted_tag)
                for c in collections_referencing_this_collection
            ]
        if contents_changed:
            [
                update_contents_unpacked_etc(c)
                for c in collections_referencing_this_collection
            ]
        if deleted:
            [
                update_contents_unpacked_etc(c,
                                             collection_id=collection.id,
                                             deleted=True)
                for c in collections_referencing_this_collection
            ]
        [
            update_modification_values(c, now)
            for c in collections_referencing_this_collection
        ]
        [
            backup_collection(cd)
            for cd in collections_referencing_this_collection_dicts
        ]
        Session.add_all(collections_referencing_this_collection)
        Session.commit()
Пример #2
0
def update_collections_that_reference_this_collection(collection, query_builder, **kwargs):
    """Update all collections that reference the input collection.
    
    :param collection: a collection model.
    :param query_builder: an :class:`SQLAQueryBuilder` instance.
    :param bool kwargs['contents_changed']: indicates whether the input
        collection's ``contents`` value has changed.
    :param bool kwargs['deleted']: indicates whether the input collection has
        just been deleted.
    :returns: ``None``

    Update the ``contents``, ``contents_unpacked``, ``html`` and/or ``form``
    attributes of every collection that references the input collection plus all
    of the collections that reference those collections, etc.  This function is
    called upon successful update and delete requests.

    If the contents of the ``collection`` have changed (i.e.,
    ``kwargs['contents_changed']==True``) , then retrieve all collections
    that reference ``collection`` and all collections that reference those
    referers, etc., and update their ``contents_unpacked``, ``html`` and
    ``forms`` attributes.

    If the ``collection`` has been deleted (i.e., ``kwargs['deleted']==True``),
    then recursively retrieve all collections referencing ``collection`` and
    update their ``contents``, ``contents_unpacked``, ``html`` and ``forms``
    attributes.

    If ``collection`` has just been tagged as restricted (i.e.,
    ``kwargs['restricted']==True``), then recursively restrict all collections
    that reference it.

    In all cases, update the ``datetime_modified`` value of every collection that
    recursively references ``collection``.

    """
    def update_contents_unpacked_etc(collection, **kwargs):
        deleted = kwargs.get('deleted', False)
        collection_id = kwargs.get('collection_id')
        if deleted:
            collection.contents = remove_references_to_this_collection(collection.contents, collection_id)
        collections_referenced = get_collections_referenced(collection.contents)
        collection.contents_unpacked = generate_contents_unpacked(
                                    collection.contents, collections_referenced)
        collection.html = h.get_HTML_from_contents(collection.contents_unpacked,
                                                  collection.markup_language)
        collection.forms = [Session.query(Form).get(int(id)) for id in
                    h.form_reference_pattern.findall(collection.contents_unpacked)]
    def update_modification_values(collection, now):
        collection.datetime_modified = now
        session['user'] = Session.merge(session['user'])
        collection.modifier = session['user']
    restricted = kwargs.get('restricted', False)
    contents_changed = kwargs.get('contents_changed', False)
    deleted = kwargs.get('deleted', False)
    if restricted or contents_changed or deleted:
        collections_referencing_this_collection = get_collections_referencing_this_collection(
            collection, query_builder)
        collections_referencing_this_collection_dicts = [c.get_full_dict() for c in
                                        collections_referencing_this_collection]
        now = h.now()
        if restricted:
            restricted_tag = h.get_restricted_tag()
            [c.tags.append(restricted_tag) for c in collections_referencing_this_collection]
        if contents_changed:
            [update_contents_unpacked_etc(c) for c in collections_referencing_this_collection]
        if deleted:
            [update_contents_unpacked_etc(c, collection_id=collection.id, deleted=True)
             for c in collections_referencing_this_collection]
        [update_modification_values(c, now) for c in collections_referencing_this_collection]
        [backup_collection(cd) for cd in collections_referencing_this_collection_dicts]
        Session.add_all(collections_referencing_this_collection)
        Session.commit()
Пример #3
0
def _create_test_forms(n=100):
    """Create n forms with various properties.  A testing ground for searches!
    """
    test_models = _get_test_models()
    viewer, contributor, administrator = get_users()
    restricted_tag = h.get_restricted_tag()
    for i in range(1, n + 1):
        f = model.Form()
        f.transcription = u'transcription %d' % i
        if i > 50:
            f.transcription = f.transcription.upper()
        f.morpheme_break = u'morpheme_break %d' % i
        f.morpheme_gloss = u'morpheme_gloss %d' % i
        f.comments = u'comments %d' % i
        f.speaker_comments = u'speaker_comments %d' % i
        f.morpheme_break_ids = u'[[[]]]'
        f.morpheme_gloss_ids = u'[[[]]]'
        tl = model.Translation()
        tl.transcription = u'translation %d' % i
        f.enterer = contributor
        f.syntactic_category = test_models['syntactic_categories'][i - 1]
        if i > 75:
            f.phonetic_transcription = u'phonetic_transcription %d' % i
            f.narrow_phonetic_transcription = u'narrow_phonetic_transcription %d' % i
            t = test_models['tags'][i - 1]
            f.tags.append(t)
            tl.grammaticality = u'*'
        if i > 65 and i < 86:
            fi = test_models['files'][i - 1]
            f.files.append(fi)
        if i > 50:
            f.elicitor = contributor
            f.tags.append(restricted_tag)
            if i != 100:
                f.speaker = test_models['speakers'][0]
                f.datetime_modified = today_timestamp
                f.datetime_entered = today_timestamp
        else:
            f.elicitor = administrator
            f.speaker = test_models['speakers'][-1]
            f.datetime_modified = yesterday_timestamp
            f.datetime_entered = yesterday_timestamp
        if i < 26:
            f.elicitation_method = test_models['elicitation_methods'][0]
            f.date_elicited = jan1
        elif i < 51:
            f.elicitation_method = test_models['elicitation_methods'][24]
            f.date_elicited = jan2
        elif i < 76:
            f.elicitation_method = test_models['elicitation_methods'][49]
            f.date_elicited = jan3
        else:
            f.elicitation_method = test_models['elicitation_methods'][74]
            if i < 99:
                f.date_elicited = jan4
        if (i > 41 and i < 53) or i in [86, 92, 3]:
            f.source = test_models['sources'][i]
        if i != 87:
            f.translations.append(tl)
        if i == 79:
            tl = model.Translation()
            tl.transcription = u'translation %d the second' % i
            f.translations.append(tl)
            t = test_models['tags'][i - 2]
            f.tags.append(t)
        Session.add(f)
    Session.commit()
Пример #4
0
def _create_test_forms(n=100):
    """Create n forms with various properties.  A testing ground for searches!
    """
    test_models = _get_test_models()
    viewer, contributor, administrator = get_users()
    restricted_tag = h.get_restricted_tag()
    for i in range(1, n + 1):
        f = model.Form()
        f.transcription = u'transcription %d' % i
        if i > 50:
            f.transcription = f.transcription.upper()
        f.morpheme_break = u'morpheme_break %d' % i
        f.morpheme_gloss = u'morpheme_gloss %d' % i
        f.comments = u'comments %d' % i
        f.speaker_comments = u'speaker_comments %d' % i
        f.morpheme_break_ids = u'[[[]]]'
        f.morpheme_gloss_ids = u'[[[]]]'
        tl = model.Translation()
        tl.transcription = u'translation %d' % i
        f.enterer = contributor
        f.syntactic_category = test_models['syntactic_categories'][i - 1]
        if i > 75:
            f.phonetic_transcription = u'phonetic_transcription %d' % i
            f.narrow_phonetic_transcription = u'narrow_phonetic_transcription %d' % i
            t = test_models['tags'][i - 1]
            f.tags.append(t)
            tl.grammaticality = u'*'
        if i > 65 and i < 86:
            fi = test_models['files'][i - 1]
            f.files.append(fi)
        if i > 50:
            f.elicitor = contributor
            f.tags.append(restricted_tag)
            if i != 100:
                f.speaker = test_models['speakers'][0]
                f.datetime_modified = today_timestamp
                f.datetime_entered = today_timestamp
        else:
            f.elicitor = administrator
            f.speaker = test_models['speakers'][-1]
            f.datetime_modified = yesterday_timestamp
            f.datetime_entered = yesterday_timestamp
        if i < 26:
            f.elicitation_method = test_models['elicitation_methods'][0]
            f.date_elicited = jan1
        elif i < 51:
            f.elicitation_method = test_models['elicitation_methods'][24]
            f.date_elicited = jan2
        elif i < 76:
            f.elicitation_method = test_models['elicitation_methods'][49]
            f.date_elicited = jan3
        else:
            f.elicitation_method = test_models['elicitation_methods'][74]
            if i < 99:
                f.date_elicited = jan4
        if (i > 41 and i < 53) or i in [86, 92, 3]:
            f.source = test_models['sources'][i]
        if i != 87:
            f.translations.append(tl)
        if i == 79:
            tl = model.Translation()
            tl.transcription = u'translation %d the second' % i
            f.translations.append(tl)
            t = test_models['tags'][i - 2]
            f.tags.append(t)
        Session.add(f)
    Session.commit()
Пример #5
0
    def test_search(self):
        """Tests that corpora search works correctly.

        """

        # Create a corpus defined by ``content`` that contains all sentences
        # with five or more words.

        # Get ids of all sentences with more than 5 words.
        long_sentences = Session.query(model.Form).\
            filter(and_(
                model.Form.syntactic_category.has(model.SyntacticCategory.name==u'S'),
                model.Form.transcription.op('regexp')(u'^([^ ]+ ){5}[^ ]+'))).all()
        long_sentence = long_sentences[0]
        len_long_sentences = len(long_sentences)
        long_sentence_ids = [f.id for f in long_sentences]
        long_sentences = u','.join(map(str, long_sentence_ids))

        # Restrict one of the forms that will be in the corpus.
        restricted_tag = h.get_restricted_tag()
        long_sentence.tags.append(restricted_tag)
        Session.add(long_sentence)
        Session.commit()

        # Create the corpus
        name = u'Sentences with 6 or more words.'
        params = self.corpus_create_params.copy()
        params.update({
            'name': name,
            'content': long_sentences
        })
        params = json.dumps(params)
        original_corpus_count = Session.query(Corpus).count()
        response = self.app.post(url('corpora'), params, self.json_headers,
                                 self.extra_environ_admin)
        resp = json.loads(response.body)
        corpus_id = resp['id']
        new_corpus_count = Session.query(Corpus).count()
        corpus = Session.query(Corpus).get(corpus_id)
        corpus_dir = os.path.join(self.corpora_path, 'corpus_%d' % corpus_id)
        corpus_dir_contents = os.listdir(corpus_dir)
        assert new_corpus_count == original_corpus_count + 1
        assert resp['name'] == name
        assert corpus_dir_contents == []
        assert response.content_type == 'application/json'
        assert resp['content'] == long_sentences
        # The ``forms`` attribute is a collection, no repeats, that's why the following is true:
        assert len(corpus.forms) == len_long_sentences

        # Search the corpus for forms beginning in vowels.
        query = json.dumps({"query": {"filter": ['Form', 'transcription', 'regex', '^[AEIOUaeiou]']},
                "paginator": {'page': 1, 'items_per_page': 10}})
        response = self.app.post(url('/corpora/%d/search' % corpus_id), query,
            self.json_headers, self.extra_environ_admin)
        resp = json.loads(response.body)
        matches = resp['items']
        assert not set([f['id'] for f in matches]) - set(long_sentence_ids)
        assert not filter(
                lambda f: f['transcription'][0].lower() not in ['a', 'e', 'i', 'o', 'u'], matches)
        assert not filter(lambda f: len(f['transcription'].split(' ')) < 6, matches)

        # Vacuous search of the corpus returns everything.
        query = json.dumps({"query": {"filter": ['Form', 'transcription', 'like', '%']}})
        response = self.app.post(url('/corpora/%d/search' % corpus_id), query,
            self.json_headers, self.extra_environ_admin)
        resp = json.loads(response.body)
        assert set([f['id'] for f in resp]) == set(long_sentence_ids)

        # Vacuous search as the viewer returns everything that is not restricted.
        query = json.dumps({"query": {"filter": ['Form', 'transcription', 'like', '%']}})
        response = self.app.post(url('/corpora/%d/search' % corpus_id), query,
            self.json_headers, self.extra_environ_view)
        resp2 = json.loads(response.body)
        # Viewer will get 1 or 2 forms fewer (2 are restricted, 1 assuredly a long sentence.)
        assert len(resp) > len(resp2)

        # Failed search with an invalid corpus id
        query = json.dumps({"query": {"filter": ['Form', 'transcription', 'like', '%']}})
        response = self.app.post(url('/corpora/123456789/search'), query,
            self.json_headers, self.extra_environ_admin, status=404)
        resp = json.loads(response.body)
        assert resp['error'] == u'There is no corpus with id 123456789'

        # Failed search with an invalid query
        query = json.dumps({"query": {"filter": ['Form', 'thingamafracasicle', 'like', '%']}})
        response = self.app.post(url('/corpora/%d/search' % corpus_id), query,
            self.json_headers, self.extra_environ_admin, status=400)
        resp = json.loads(response.body)
        assert resp['errors']['Form.thingamafracasicle'] == 'There is no attribute thingamafracasicle of Form'

        # Request GET /corpora/new_search
        response = self.app.get(url(controller='corpora', action='new_search'),
            headers=self.json_headers, extra_environ=self.extra_environ_admin)
        resp = json.loads(response.body)
        assert resp == {'search_parameters': h.get_search_parameters(SQLAQueryBuilder('Form'))}
Пример #6
0
    def test_search(self):
        """Tests that corpora search works correctly.

        """

        # Create a corpus defined by ``content`` that contains all sentences
        # with five or more words.

        # Get ids of all sentences with more than 5 words.
        long_sentences = Session.query(model.Form).\
            filter(and_(
                model.Form.syntactic_category.has(model.SyntacticCategory.name==u'S'),
                model.Form.transcription.op('regexp')(u'^([^ ]+ ){5}[^ ]+'))).all()
        long_sentence = long_sentences[0]
        len_long_sentences = len(long_sentences)
        long_sentence_ids = [f.id for f in long_sentences]
        long_sentences = u','.join(map(str, long_sentence_ids))

        # Restrict one of the forms that will be in the corpus.
        restricted_tag = h.get_restricted_tag()
        long_sentence.tags.append(restricted_tag)
        Session.add(long_sentence)
        Session.commit()

        # Create the corpus
        name = u'Sentences with 6 or more words.'
        params = self.corpus_create_params.copy()
        params.update({'name': name, 'content': long_sentences})
        params = json.dumps(params)
        original_corpus_count = Session.query(Corpus).count()
        response = self.app.post(url('corpora'), params, self.json_headers,
                                 self.extra_environ_admin)
        resp = json.loads(response.body)
        corpus_id = resp['id']
        new_corpus_count = Session.query(Corpus).count()
        corpus = Session.query(Corpus).get(corpus_id)
        corpus_dir = os.path.join(self.corpora_path, 'corpus_%d' % corpus_id)
        corpus_dir_contents = os.listdir(corpus_dir)
        assert new_corpus_count == original_corpus_count + 1
        assert resp['name'] == name
        assert corpus_dir_contents == []
        assert response.content_type == 'application/json'
        assert resp['content'] == long_sentences
        # The ``forms`` attribute is a collection, no repeats, that's why the following is true:
        assert len(corpus.forms) == len_long_sentences

        # Search the corpus for forms beginning in vowels.
        query = json.dumps({
            "query": {
                "filter": ['Form', 'transcription', 'regex', '^[AEIOUaeiou]']
            },
            "paginator": {
                'page': 1,
                'items_per_page': 10
            }
        })
        response = self.app.post(url('/corpora/%d/search' % corpus_id), query,
                                 self.json_headers, self.extra_environ_admin)
        resp = json.loads(response.body)
        matches = resp['items']
        assert not set([f['id'] for f in matches]) - set(long_sentence_ids)
        assert not filter(
            lambda f: f['transcription'][0].lower() not in
            ['a', 'e', 'i', 'o', 'u'], matches)
        assert not filter(lambda f: len(f['transcription'].split(' ')) < 6,
                          matches)

        # Vacuous search of the corpus returns everything.
        query = json.dumps(
            {"query": {
                "filter": ['Form', 'transcription', 'like', '%']
            }})
        response = self.app.post(url('/corpora/%d/search' % corpus_id), query,
                                 self.json_headers, self.extra_environ_admin)
        resp = json.loads(response.body)
        assert set([f['id'] for f in resp]) == set(long_sentence_ids)

        # Vacuous search as the viewer returns everything that is not restricted.
        query = json.dumps(
            {"query": {
                "filter": ['Form', 'transcription', 'like', '%']
            }})
        response = self.app.post(url('/corpora/%d/search' % corpus_id), query,
                                 self.json_headers, self.extra_environ_view)
        resp2 = json.loads(response.body)
        # Viewer will get 1 or 2 forms fewer (2 are restricted, 1 assuredly a long sentence.)
        assert len(resp) > len(resp2)

        # Failed search with an invalid corpus id
        query = json.dumps(
            {"query": {
                "filter": ['Form', 'transcription', 'like', '%']
            }})
        response = self.app.post(url('/corpora/123456789/search'),
                                 query,
                                 self.json_headers,
                                 self.extra_environ_admin,
                                 status=404)
        resp = json.loads(response.body)
        assert resp['error'] == u'There is no corpus with id 123456789'

        # Failed search with an invalid query
        query = json.dumps(
            {"query": {
                "filter": ['Form', 'thingamafracasicle', 'like', '%']
            }})
        response = self.app.post(url('/corpora/%d/search' % corpus_id),
                                 query,
                                 self.json_headers,
                                 self.extra_environ_admin,
                                 status=400)
        resp = json.loads(response.body)
        assert resp['errors'][
            'Form.thingamafracasicle'] == 'There is no attribute thingamafracasicle of Form'

        # Request GET /corpora/new_search
        response = self.app.get(url(controller='corpora', action='new_search'),
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin)
        resp = json.loads(response.body)
        assert resp == {
            'search_parameters':
            h.get_search_parameters(SQLAQueryBuilder('Form'))
        }