Пример #1
0
    def test_get_articles(self):
        from amcat.models import Sentence
        _get_articles = lambda a, s: list(get_articles(a, s))

        # Should raise exception if sentences not in article
        article, sentences = self.create_test_sentences()
        s1 = Sentence.objects.filter(id=amcattest.create_test_sentence().id)
        self.assertRaises(ValueError, _get_articles, article, s1)

        # Should raise an exception if we try to split on headline
        self.assertRaises(ValueError, _get_articles, article,
                          sentences.filter(parnr=1))

        # Should return a "copy", with byline in "text" property
        arts = _get_articles(article, Sentence.objects.none())
        map(lambda a: a.save(), arts)

        self.assertEquals(len(arts), 1)
        sbd.create_sentences(arts[0])

        self.assertEquals([s.sentence for s in sentences[1:]],
                          [s.sentence for s in arts[0].sentences.all()[1:]])

        self.assertTrue("foo" in arts[0].text)

        # Should be able to split on byline
        self.assertEquals(2, len(_get_articles(article, sentences[1:2])))
        a, b = _get_articles(article, sentences[4:5])

        # Check if text on splitted articles contains expected
        self.assertTrue("Einde" not in a.text)
        self.assertTrue("Einde" in b.text)
Пример #2
0
    def run(self, _input=None):
        sets = self.options['articlesets']
        log.info("Listing articles from sets {sets}".format(**locals()))

        # Determine which articles are already splitted, and which are not
        all_articles = Article.objects.filter(articlesets_set__in=sets)
        all_ids = all_articles.values_list("id", flat=True)
        splitteds_ids = all_articles.filter(sentences__id__gte=0).values_list(
            "id", flat=True)

        # Get articles to be split and precache headline, byline, text
        to_split = Article.objects.filter(id__in=set(all_ids) -
                                          set(splitteds_ids)).only(
                                              "headline", "byline", "text")
        n = len(to_split)

        log.info("Total articles: {m}. To be split: {n}.".format(
            m=len(all_ids), **locals()))
        for i, article in enumerate(to_split):
            if not i % 100:
                log.info("Processing article {i}/{n}".format(**locals()))

            sbd.create_sentences(article)

        log.info("Splitted {n} articles!".format(**locals()))
    def test_get_articles(self):
        from amcat.models import Sentence
        _get_articles = lambda a,s : list(get_articles(a,s))

        # Should raise exception if sentences not in article
        article, sentences = self.create_test_sentences()
        s1 = Sentence.objects.filter(id=amcattest.create_test_sentence().id)
        self.assertRaises(ValueError, _get_articles, article, s1)

        # Should raise an exception if we try to split on headline
        self.assertRaises(ValueError, _get_articles, article, sentences.filter(parnr=1))

        # Should return a "copy", with byline in "text" property
        arts = _get_articles(article, Sentence.objects.none())
        map(lambda a : a.save(), arts)

        self.assertEquals(len(arts), 1)
        sbd.create_sentences(arts[0])

        self.assertEquals(
            [s.sentence for s in sentences[1:]],
            [s.sentence for s in arts[0].sentences.all()[1:]]
        )

        self.assertTrue("foo" in arts[0].text)

        # Should be able to split on byline
        self.assertEquals(2, len(_get_articles(article, sentences[1:2])))
        a, b = _get_articles(article, sentences[4:5])

        # Check if text on splitted articles contains expected
        self.assertTrue("Einde" not in a.text)
        self.assertTrue("Einde" in b.text)
Пример #4
0
 def test_create_sentences(self):
     hl = "This is the title"
     text = "A sentence.\n\nAnother sentence. And yet a third"
     a = amcattest.create_test_article(title=hl, text=text)
     create_sentences(a)
     sents = Sentence.objects.filter(article=a.id)
     sents = set((s.parnr, s.sentnr, s.sentence) for s in sents)
     self.assertEqual(
         sents, {(1, 1, hl), (2, 1, "A sentence"),
                 (3, 1, "Another sentence"), (3, 2, "And yet a third")})
Пример #5
0
 def test_create_sentences(self):
     hl = "This is the title"
     text = "A sentence.\n\nAnother sentence. And yet a third"
     a = amcattest.create_test_article(title=hl, text=text)
     create_sentences(a)
     sents = Sentence.objects.filter(article=a.id)
     sents = set((s.parnr, s.sentnr, s.sentence) for s in sents)
     self.assertEqual(sents, {(1, 1, hl),
                              (2, 1, "A sentence"),
                              (3, 1, "Another sentence"),
                              (3, 2, "And yet a third")})
Пример #6
0
def handle_split(form, project, article, sentences):
    articles = list(get_articles(article, sentences))

    # We won't use bulk_create yet, as it bypasses save() and doesn't
    # insert ids
    for art in articles:
        art.save()
        sbd.create_sentences(art)

    if not form.is_valid():
        raise ValueError("Form invalid: {form.errors}".format(**locals()))

    # Context variables for template
    form_data = form.cleaned_data
    all_sets = list(project.all_articlesets().filter(articles=article))

    # Add splitted articles to existing sets
    for aset in form_data["add_splitted_to_sets"]:
        aset.add_articles(articles)

    # Add splitted articles to sets wherin the original article live{d,s}
    if form_data["add_splitted_to_all"]:
        asets = project.all_articlesets().filter(articles=article).only("id")
        for aset in asets:
            aset.add_articles(articles)

    if form_data["remove_from_sets"]:
        for aset in form_data["remove_from_sets"]:
            aset.remove_articles([article])

    if form_data["remove_from_all_sets"]:
        for aset in ArticleSet.objects.filter(project=project,
                                              articles=article).distinct():
            aset.remove_articles([article])

    if form_data["add_splitted_to_new_set"]:
        new_splitted_set = ArticleSet.create_set(
            project, form_data["add_splitted_to_new_set"], articles)

    if form_data["add_to_sets"]:
        for articleset in form_data["add_to_sets"]:
            articleset.add_articles([article])

    if form_data["add_to_new_set"]:
        new_set = ArticleSet.create_set(project, form_data["add_to_new_set"],
                                        [article])

    return locals()
Пример #7
0
def handle_split(form, project, article, sentences):
    articles = list(get_articles(article, sentences))

    # We won't use bulk_create yet, as it bypasses save() and doesn't
    # insert ids
    for art in articles:
        art.save()
        sbd.create_sentences(art)

    if not form.is_valid():
        raise ValueError("Form invalid: {form.errors}".format(**locals()))

    # Context variables for template
    form_data = form.cleaned_data
    all_sets = list(project.all_articlesets().filter(articles=article))

    # Add splitted articles to existing sets
    for aset in form_data["add_splitted_to_sets"]:
        aset.add_articles(articles)

    # Add splitted articles to sets wherin the original article live{d,s}
    if form_data["add_splitted_to_all"]:
        asets = project.all_articlesets().filter(articles=article).only("id")
        for aset in asets:
            aset.add_articles(articles)

    if form_data["remove_from_sets"]:
        for aset in form_data["remove_from_sets"]:
            aset.remove_articles([article])

    if form_data["remove_from_all_sets"]:
        for aset in ArticleSet.objects.filter(project=project, articles=article).distinct():
            aset.remove_articles([article])

    if form_data["add_splitted_to_new_set"]:
        new_splitted_set = ArticleSet.create_set(project, form_data["add_splitted_to_new_set"], articles)

    if form_data["add_to_sets"]:
        for articleset in form_data["add_to_sets"]:
            articleset.add_articles([article])

    if form_data["add_to_new_set"]:
        new_set = ArticleSet.create_set(project, form_data["add_to_new_set"], [article])

    return locals()
    def run(self, _input=None):
        sets = self.options['articlesets']
        log.info("Listing articles from sets {sets}".format(**locals()))

        # Determine which articles are already splitted, and which are not
        all_articles = Article.objects.filter(articlesets_set__in=sets)
        all_ids = all_articles.values_list("id", flat=True)
        splitteds_ids = all_articles.filter(sentences__id__gte=0).values_list("id", flat=True)

        # Get articles to be split and precache headline, byline, text
        to_split = Article.objects.filter(id__in=set(all_ids) - set(splitteds_ids)).only("headline", "byline", "text")
        n = len(to_split)

        log.info("Total articles: {m}. To be split: {n}.".format(m=len(all_ids), **locals()))
        for i, article in enumerate(to_split):
            if not i % 100:
                log.info("Processing article {i}/{n}".format(**locals()))

            sbd.create_sentences(article)

        log.info("Splitted {n} articles!".format(**locals()))
    def test_handle_split(self):
        from amcat.tools.amcates import ES
        from amcat.tools import amcattest
        from functools import partial

        article, sentences = self.create_test_sentences()
        project = amcattest.create_test_project()
        aset1 = amcattest.create_test_set(4, project=project)
        aset2 = amcattest.create_test_set(5, project=project)
        aset3 = amcattest.create_test_set(0)

        for _set in [aset1, aset2]:
            for _article in _set.articles.all():
                sbd.create_sentences(_article)

        a1, a2 = aset1.articles.all()[0], aset2.articles.all()[0]

        aset1.add_articles([article])
        aset3.add_articles([a1])

        form = partial(navigator.forms.SplitArticleForm, project, article, initial={
            "remove_from_sets" : False
        })

        # Test form defaults (should do nothing!)
        f = form(dict())
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())

        self.assertEquals(5, aset1.articles.all().count())
        self.assertEquals(5, aset2.articles.all().count())
        self.assertEquals(1, aset3.articles.all().count())

        self.assertTrue(self.article_in(aset1, article))
        self.assertFalse(self.article_in(aset2, article))
        self.assertFalse(self.article_in(aset3, article))

        # Passing invalid form should raise exception
        f = form(dict(add_to_sets=[-1]))
        self.assertFalse(f.is_valid())
        self.assertRaises(ValueError, handle_split, f, project, article, Sentence.objects.none())

        # Test add_to_new_set
        f = form(dict(add_to_new_set="New Set 1"))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())
        aset = project.all_articlesets().filter(name="New Set 1")
        self.assertTrue(aset.exists())
        self.assertEquals(project, aset[0].project)

        # Test add_to_sets
        f = form(dict(add_to_sets=[aset3.id]))
        self.assertFalse(f.is_valid())
        f = form(dict(add_to_sets=[aset2.id]))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())
        self.assertTrue(self.article_in(aset2, article))

        # Test add_splitted_to_new_set
        f = form(dict(add_splitted_to_new_set="New Set 2"))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())
        aset = project.all_articlesets().filter(name="New Set 2")
        self.assertTrue(aset.exists())
        self.assertEquals(project, aset[0].project)
        self.assertEquals(1, aset[0].articles.count())
        self.assertFalse(self.article_in(aset[0], article))

        # Test add_splitted_to_sets
        f = form(dict(add_splitted_to_sets=[aset2.id]))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())
        self.assertTrue(article in aset2.articles.all())

        # Test remove_from_sets
        f = form(dict(remove_from_sets=[aset1.id]))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())
        self.assertTrue(article not in aset1.articles.all())

        # Test remove_from_all_sets
        aset1.add_articles([article])
        aset2.add_articles([article])
        aset3.add_articles([article])

        f = form(dict(remove_from_all_sets=True))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())

        self.assertTrue(aset1 in project.all_articlesets())
        self.assertTrue(aset2 in project.all_articlesets())
        self.assertFalse(aset3 in project.all_articlesets())

        self.assertFalse(self.article_in(aset1, article))
        self.assertFalse(self.article_in(aset2, article))
        self.assertTrue(self.article_in(aset3, article))
 def create_test_sentences(self):
     article = amcattest.create_test_article(byline="foo", text="Dit is. Tekst.\n\n"*3 + "Einde.")
     sbd.create_sentences(article)
     return article, article.sentences.all()
def handle_split(form, project, article, sentences):
    articles = list(get_articles(article, sentences))

    # We won't use bulk_create yet, as it bypasses save() and doesn't
    # insert ids
    for art in articles:
        art.save()
        sbd.create_sentences(art)

    if not form.is_valid():
        raise ValueError("Form invalid: {form.errors}".format(**locals()))

    # Context variables for template
    form_data = form.cleaned_data
    all_sets = list(project.all_articlesets().filter(articles=article))

    # Keep a list of touched sets, so we can invalidate their indices
    dirty_sets = ArticleSet.objects.none()

    # Add splitted articles to existing sets
    ArticleSet.articles.through.objects.bulk_create([
        ArticleSet.articles.through(articleset=aset, article=art) for
            art in articles for aset in form_data["add_splitted_to_sets"]
    ])

    # Collect changed sets
    for field in ("add_splitted_to_sets", "remove_from_sets", "add_to_sets"):
        dirty_sets |= form_data[field]

    # Add splitted articles to sets wherin the original article live{d,s}
    if form_data["add_splitted_to_all"]:
        articlesetarts = ArticleSet.articles.through.objects.filter(article=article, articleset__project=project)

        ArticleSet.articles.through.objects.bulk_create([
            ArticleSet.articles.through(articleset=asetart.articleset, article=art)
                for art in articles for asetart in articlesetarts
        ])

        dirty_sets |= project.all_articlesets().filter(articles=article).only("id")

    if form_data["remove_from_sets"]:
        for aset in form_data["remove_from_sets"]:
            aset.remove_articles([article])

    if form_data["remove_from_all_sets"]:
        for aset in ArticleSet.objects.filter(project=project, articles=article).distinct():
            aset.remove_articles([article])

    if form_data["add_splitted_to_new_set"]:
        new_splitted_set = ArticleSet.create_set(project, form_data["add_splitted_to_new_set"], articles)

    if form_data["add_to_sets"]:
        for articleset in form_data["add_to_sets"]:
            articleset.add_articles([article])

    if form_data["add_to_new_set"]:
        new_set = ArticleSet.create_set(project, form_data["add_to_new_set"], [article])

    for ds in dirty_sets:
        ds.refresh_index()

    return locals()
Пример #12
0
 def create_test_sentences(self):
     article = amcattest.create_test_article(byline="foo",
                                             text="Dit is. Tekst.\n\n" * 3 +
                                             "Einde.")
     sbd.create_sentences(article)
     return article, article.sentences.all()
Пример #13
0
    def test_handle_split(self):
        from amcat.tools import amcattest
        from functools import partial

        article, sentences = self.create_test_sentences()
        project = amcattest.create_test_project()
        aset1 = amcattest.create_test_set(4, project=project)
        aset2 = amcattest.create_test_set(5, project=project)
        aset3 = amcattest.create_test_set(0)

        # Creates a codingjob for each articleset, as handle_split should account
        # for "codedarticlesets" as well.
        cj1 = amcattest.create_test_job(articleset=aset1)
        cj2 = amcattest.create_test_job(articleset=aset2)
        cj3 = amcattest.create_test_job(articleset=aset3)

        for _set in [aset1, aset2]:
            for _article in _set.articles.all():
                sbd.create_sentences(_article)

        a1, a2 = aset1.articles.all()[0], aset2.articles.all()[0]

        aset1.add_articles([article])
        aset3.add_articles([a1])

        form = partial(navigator.forms.SplitArticleForm,
                       project,
                       article,
                       initial={"remove_from_sets": False})

        # Test form defaults (should do nothing!)
        f = form(dict())
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())

        self.assertEquals(5, aset1.articles.all().count())
        self.assertEquals(5, aset2.articles.all().count())
        self.assertEquals(1, aset3.articles.all().count())

        self.assertTrue(self.article_in(cj1, aset1, article))
        self.assertFalse(self.article_in(cj2, aset2, article))
        self.assertFalse(self.article_in(cj3, aset3, article))

        # Passing invalid form should raise exception
        f = form(dict(add_to_sets=[-1]))
        self.assertFalse(f.is_valid())
        self.assertRaises(ValueError, handle_split, f, project, article,
                          Sentence.objects.none())

        # Test add_to_new_set
        f = form(dict(add_to_new_set="New Set 1"))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())
        aset = project.all_articlesets().filter(name="New Set 1")
        self.assertTrue(aset.exists())
        self.assertEquals(project, aset[0].project)

        # Test add_to_sets
        f = form(dict(add_to_sets=[aset3.id]))
        self.assertFalse(f.is_valid())
        f = form(dict(add_to_sets=[aset2.id]))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())
        self.assertTrue(self.article_in(cj2, aset2, article))

        # Test add_splitted_to_new_set
        f = form(dict(add_splitted_to_new_set="New Set 2"))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())
        aset = project.all_articlesets().filter(name="New Set 2")
        self.assertTrue(aset.exists())
        self.assertEquals(project, aset[0].project)
        self.assertEquals(1, aset[0].articles.count())
        self.assertFalse(self.article_in(None, aset[0], article))

        # Test add_splitted_to_sets
        f = form(dict(add_splitted_to_sets=[aset2.id]))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())
        self.assertTrue(article in aset2.articles.all())

        # Test remove_from_sets
        f = form(dict(remove_from_sets=[aset1.id]))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())
        self.assertTrue(article not in aset1.articles.all())

        # Test remove_from_all_sets
        aset1.add_articles([article])
        aset2.add_articles([article])
        aset3.add_articles([article])

        f = form(dict(remove_from_all_sets=True))
        self.assertTrue(f.is_valid())
        handle_split(f, project, article, Sentence.objects.none())

        self.assertTrue(aset1 in project.all_articlesets())
        self.assertTrue(aset2 in project.all_articlesets())
        self.assertFalse(aset3 in project.all_articlesets())

        self.assertFalse(self.article_in(cj1, aset1, article))
        self.assertFalse(self.article_in(cj2, aset2, article))
        self.assertTrue(self.article_in(cj3, aset3, article))