Пример #1
0
    def test_aggregate(self):
        """Can we make tables per medium/date interval?"""
        from amcat.models import Article
        m1 = amcattest.create_test_medium(name="De Nep-Krant")
        m2, m3 = [amcattest.create_test_medium() for _ in range(2)]
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        unused = amcattest.create_test_article(text='aap noot mies', medium=m3, articleset=s2)
        a = amcattest.create_test_article(text='aap noot mies', medium=m1, date='2001-01-01', create=False)
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-01', create=False)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date='2002-01-01', create=False)
        d = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-03', create=False)

        Article.create_articles([a,b,c,d], articleset=s1, check_duplicate=False, create_id=True)
        ES().flush()

        self.assertEqual(dict(ES().aggregate_query(filters=dict(sets=s1.id), group_by="mediumid")),
                         {m1.id : 1, m2.id : 3})

        self.assertEqual(dict(ES().aggregate_query(filters=dict(sets=s1.id), group_by="date", date_interval="year")),
                         {datetime(2001,1,1) : 3, datetime(2002,1,1) : 1})

        self.assertEqual(dict(ES().aggregate_query(filters=dict(sets=s1.id), group_by="date", date_interval="month")),
                         {datetime(2001,1,1) : 1, datetime(2002,1,1) : 1, datetime(2001,2,1) : 2})

        # set statistics
        stats = ES().statistics(filters=dict(sets=s1.id))
        self.assertEqual(stats.n, 4)
        self.assertEqual(stats.start_date, datetime(2001,1,1))
        self.assertEqual(stats.end_date, datetime(2002,1,1))

        # media list
        self.assertEqual(set(ES().list_media(filters=dict(sets=s1.id))),
                         {m1.id, m2.id})
Пример #2
0
 def test_to_medium_ids(self):
     arts = amcattest.create_test_set(2).articles.all()
     m1, m2 = amcattest.create_test_medium(), amcattest.create_test_medium()
     self.assertEqual(set(to_medium_ids(m1)), {m1.id, })
     self.assertEqual(set(to_medium_ids([m1, m2])), {m1.id, m2.id})
     self.assertEqual(set(to_medium_ids(Medium.objects.filter(id__in=[m1.id, m2.id]))), {m1.id, m2.id})
     self.assertEqual(set(to_medium_ids(arts.values_list("medium__id", flat=True))), {a.medium_id for a in arts})
Пример #3
0
 def test_to_medium_ids(self):
     arts = amcattest.create_test_set(2).articles.all()
     m1, m2 = amcattest.create_test_medium(), amcattest.create_test_medium()
     self.assertEqual(set(to_medium_ids(m1)), {m1.id,})
     self.assertEqual(set(to_medium_ids([m1,m2])), {m1.id, m2.id})
     self.assertEqual(set(to_medium_ids(Medium.objects.filter(id__in=[m1.id, m2.id]))), {m1.id, m2.id})
     self.assertEqual(set(to_medium_ids(arts.values_list("medium__id", flat=True))), {a.medium_id for a in arts})
Пример #4
0
    def set_up(self):
        # We cannot use setUp, as use_elastic deletes indices
        aset = amcattest.create_test_set()

        m1 = amcattest.create_test_medium()
        m2 = amcattest.create_test_medium()
        a1 = amcattest.create_test_article(text="Foo", medium=m1, articleset=aset, date=datetime(2014, 4, 3))
        a2 = amcattest.create_test_article(text="Bar", medium=m1, articleset=aset, date=datetime(2015, 4, 3))
        a3 = amcattest.create_test_article(text="FooBar", medium=m2, articleset=aset)
        a4 = amcattest.create_test_article(text="BarFoo", medium=m2, articleset=aset, date=datetime(2014, 1, 3))

        ES().flush()
        return aset, m1, m2, a1, a2, a3, a4
Пример #5
0
    def setup(self):
        m1 = amcattest.create_test_medium(name="De Nep-Krant")
        m2, m3 = [amcattest.create_test_medium() for _ in range(2)]
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        a = amcattest.create_test_article(text='aap noot mies', medium=m1, date='2001-01-01', create=False)
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-01', create=False)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date='2002-01-01', create=False)
        d = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-03', create=False)
        e = amcattest.create_test_article(text='aap noot mies', medium=m3, articleset=s2)

        Article.create_articles([a, b, c, d], articleset=s1)
        ES().flush()
        return m1, m2, m3, s1, s2, a, b, c, d, e
Пример #6
0
    def setup(self):
        m1 = amcattest.create_test_medium(name="De Nep-Krant")
        m2, m3 = [amcattest.create_test_medium() for _ in range(2)]
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        a = amcattest.create_test_article(text='aap noot mies', medium=m1, date='2001-01-01', create=False)
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-01', create=False)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date='2002-01-01', create=False)
        d = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-03', create=False)
        e = amcattest.create_test_article(text='aap noot mies', medium=m3, articleset=s2)

        Article.create_articles([a, b, c, d], articleset=s1, check_duplicate=False, create_id=True)
        ES().flush()
        return m1, m2, m3, s1, s2, a, b, c, d, e
Пример #7
0
    def test_elastic_hash(self):
        """Can we reproduce a hash from elastic data alone?"""
        article = Article(**{
            "date": datetime.date(2015, 1, 1),
            "section": "\u6f22\u5b57",
            "pagenr": 1928390,
            "headline": "Headline hier.",
            "byline": "byline..",
            "length": 1928,
            "metastring": "Even more strange characters.. \x0C ..",
            "url": "https://example.com",
            "externalid": None,
            "author": None,
            "addressee": "Hmm",
            "text": "Contains invalid char \x08 woo",
            "medium": create_test_medium(name="abc."),
            "project": create_test_project()
        })

        article.save()

        es = ES()
        es.add_articles([article.id])
        hash = get_article_dict(article)["hash"]
        es.flush()

        es_articles = es.query_all(filters={"ids": [article.id]}, fields=HASH_FIELDS + ["hash"])
        es_article = list(es_articles)[0]

        self.assertEqual(article.id, es_article.id)
        self.assertEqual(hash, es_article.hash)
        self.assertEqual(_get_hash(es_article.to_dict()), hash)
Пример #8
0
    def test_list_media(self):
        """Test that list media works for more than 10 media"""
        from amcat.models import Article
        media = [amcattest.create_test_medium() for _ in range(20)]
        arts = [
            amcattest.create_test_article(medium=m, create=False)
            for m in media
        ]

        s1 = amcattest.create_test_set()
        Article.create_articles(arts[:5],
                                articleset=s1,
                                check_duplicate=False,
                                create_id=True)
        ES().flush()
        self.assertEqual(set(s1.get_mediums()), set(media[:5]))

        s2 = amcattest.create_test_set(project=s1.project)
        Article.create_articles(arts[5:],
                                articleset=s2,
                                check_duplicate=False,
                                create_id=True)
        ES().flush()
        self.assertEqual(set(s2.get_mediums()), set(media[5:]))

        self.assertEqual(set(s1.project.get_mediums()), set(media))
Пример #9
0
def create_test_article(n):
    return {
        "headline": str(n),
        "text": "test %s" % n,
        "date": datetime.date.today().isoformat(),
        "medium": amcattest.create_test_medium().name
    }
Пример #10
0
    def test_elastic_hash(self):
        """Can we reproduce a hash from elastic data alone?"""
        article = Article(**{
            "date": datetime.date(2015, 1, 1),
            "section": "\u6f22\u5b57",
            "pagenr": 1928390,
            "headline": "Headline hier.",
            "byline": "byline..",
            "length": 1928,
            "metastring": "Even more strange characters.. \x0C ..",
            "url": "https://example.com",
            "externalid": None,
            "author": None,
            "addressee": "Hmm",
            "text": "Contains invalid char \x08 woo",
            "medium": create_test_medium(name="abc."),
            "project": create_test_project()
        })

        article.save()

        es = ES()
        es.add_articles([article.id])
        hash = get_article_dict(article)["hash"]
        es.flush()

        es_articles = es.query_all(filters={"ids": [article.id]}, fields=HASH_FIELDS + ["hash"])
        es_article = list(es_articles)[0]

        self.assertEqual(article.id, es_article.id)
        self.assertEqual(hash, es_article.hash)
        self.assertEqual(_get_hash(es_article.to_dict()), hash)
Пример #11
0
    def test_scores(self):
        """test if scores (and matches) are as expected for various queries"""
        s = amcattest.create_test_set(articles=[
            amcattest.create_test_article(headline="a", text='dit is een test'),
        ])

        s.refresh_index()

        def q(query):
            result = ES().query(query, filters={'sets': s.id}, fields=["headline"])
            return {a.headline: a.score for a in result}

        self.assertEqual(q("test"), {"a": 1})

        m1, m2 = [amcattest.create_test_medium() for _ in range(2)]
        a = amcattest.create_test_article(text='aap noot mies', medium=m1)
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2)
        d = amcattest.create_test_article(text='ik woon in een sociale huurwoning, net als anderen', medium=m2)
        ES().flush()

        self.assertEqual(set(ES().query_ids("no*")), {a.id, b.id})
        self.assertEqual(set(ES().query_ids("no*", filters=dict(mediumid=m2.id))), {b.id})
        self.assertEqual(set(ES().query_ids("zus AND jet", filters=dict(mediumid=m2.id))), {c.id})
        self.assertEqual(set(ES().query_ids("zus OR jet", filters=dict(mediumid=m2.id))), {b.id, c.id})
        self.assertEqual(set(ES().query_ids('"mies wim"', filters=dict(mediumid=m2.id))), {b.id})
        self.assertEqual(set(ES().query_ids('"mies wim"~5', filters=dict(mediumid=m2.id))), {b.id, c.id})

        self.assertEqual(set(ES().query_ids('"sociale huur*"', filters=dict(mediumid=m2.id))), {d.id})
        self.assertEqual(set(ES().query_ids('"sociale huur*"', filters=dict(mediumid=m2.id))), {d.id})
Пример #12
0
    def test_filters(self):
        """
        Do filters work properly?
        """
        m1, m2 = [amcattest.create_test_medium() for _ in range(2)]
        a = amcattest.create_test_article(text='aap noot mies', medium=m1, date="2001-01-01")
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date="2002-01-01")
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date="2003-01-01")

        s1 = amcattest.create_test_set(articles=[a, b, c])
        s2 = amcattest.create_test_set(articles=[a, b])
        ES().flush()

        q = lambda **filters: set(ES().query_ids(filters=filters))

        # MEDIUM FILTER
        self.assertEqual(q(mediumid=m2.id), {b.id, c.id})

        #### DATE FILTERS
        self.assertEqual(q(sets=s1.id, start_date='2001-06-01'), {b.id, c.id})
        # start is inclusive
        self.assertEqual(q(sets=s1.id, start_date='2002-01-01', end_date="2002-06-01"), {b.id})
        # end is exclusive
        self.assertEqual(q(sets=s1.id, start_date='2001-01-01', end_date="2003-01-01"), {a.id, b.id})

        # COMBINATION
        self.assertEqual(q(sets=s2.id, start_date='2001-06-01'), {b.id})
        self.assertEqual(q(end_date='2002-06-01', mediumid=m2.id), {b.id})
    def test_aggregation(self):
        """Can we create nice tables?"""
        p = amcattest.create_test_project()
        m1, m2 = [amcattest.create_test_medium() for x in [1,2]]
        arts1 = {amcattest.create_test_article(project=p, medium=m1) for i in range(5)}
        arts2 = {amcattest.create_test_article(project=p, medium=m2) for i in range(15)}
        aset = amcattest.create_test_set(project=p)
        aset.add_articles(arts1|arts2)
        aset.refresh_index()


        # can we select on mediumid
        self.assertEqual(self.list(projects=[p.id]), self.pks(arts1|arts2))
        self.assertEqual(self.list(projects=[p.id], mediums=[m1.id]), self.pks(arts1))

        # can we make a table?
        x = self.aggr(projects=[p.id], xAxis='medium')
        self.assertEqual(set(x), {(5,), (15,)})

        
        # add second project with articles from first project in set
        p2 = amcattest.create_test_project()
        s = amcattest.create_test_set(project=p2)
        s.add(*(arts1|arts2))
        x = self.aggr(projects=[p2.id], articlesets=[s.id], xAxis='medium')
Пример #14
0
    def test_get_model_field(self):
        article = create_test_article(text="abc", medium=create_test_medium(name="The Guardian"))

        self.assertEqual(article.medium.name, "The Guardian")
        self.assertEqual(get_model_field(article, "medium__name"), "The Guardian")
        self.assertEqual(get_model_field(article, "medium"), article.medium)
        self.assertEqual(get_model_field(article, "text"), "abc")
Пример #15
0
def create_test_article(n):
    return {
        "headline": str(n),
        "text": "test %s" % n,
        "date": datetime.date.today().isoformat(),
        "medium": amcattest.create_test_medium().name
    }
Пример #16
0
    def test_deduplication(self):
        """Does deduplication work as it is supposed to?"""
        art = dict(headline="test", byline="test", date='2001-01-01',
                   medium=amcattest.create_test_medium(),
                   project=amcattest.create_test_project(),
                   )
        
        a1 = amcattest.create_test_article(**art)
        def q(**filters):
            amcates.ES().flush()
            return set(amcates.ES().query_ids(filters=filters))
        self.assertEqual(q(mediumid=art['medium']), {a1.id})

        # duplicate articles should not be added
        a2 = amcattest.create_test_article(check_duplicate=True,**art)
        self.assertFalse(Article.objects.filter(pk=a2.id).exists())
        self.assertEqual(a2.duplicate_of, a1.id)
        self.assertEqual(q(mediumid=art['medium']), {a1.id})

        # however, if an articleset is given the 'existing' article
        # should be added to that set
        s1 = amcattest.create_test_set()
        a3 = amcattest.create_test_article(check_duplicate=True,articleset=s1, **art)
        
        self.assertFalse(Article.objects.filter(pk=a2.id).exists())
        self.assertEqual(a3.duplicate_of, a1.id)
        self.assertEqual(q(mediumid=art['medium']), {a1.id})
        self.assertEqual(set(s1.get_article_ids()), {a1.id})
        self.assertEqual(q(sets=s1.id), {a1.id})

        # can we suppress duplicate checking?
        a4 = amcattest.create_test_article(check_duplicate=False, **art)
        self.assertTrue(Article.objects.filter(pk=a4.id).exists())
        self.assertFalse(hasattr(a4, 'duplicate_of'))
        self.assertIn(a4.id, q(mediumid=art['medium']))
Пример #17
0
    def test_filters(self):
        """
        Do filters work properly?
        """
        m1, m2 = [amcattest.create_test_medium() for _ in range(2)]
        a = amcattest.create_test_article(text='aap noot mies', medium=m1, date="2001-01-01")
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date="2002-01-01")
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date="2003-01-01")

        s1 = amcattest.create_test_set(articles=[a,b,c])
        s2 = amcattest.create_test_set(articles=[a,b])
        ES().flush()

        q = lambda **filters: set(ES().query_ids(filters=filters))

        # MEDIUM FILTER
        self.assertEqual(q(mediumid=m2.id), {b.id, c.id})

        #### DATE FILTERS
        self.assertEqual(q(sets=s1.id, start_date='2001-06-01'), {b.id, c.id})
        # start is inclusive
        self.assertEqual(q(sets=s1.id, start_date='2002-01-01', end_date="2002-06-01"), {b.id})
        # end is exclusive
        self.assertEqual(q(sets=s1.id, start_date='2001-01-01', end_date="2003-01-01"), {a.id, b.id})

        # COMBINATION
        self.assertEqual(q(sets=s2.id, start_date='2001-06-01'), {b.id})
        self.assertEqual(q(end_date='2002-06-01', mediumid=m2.id), {b.id})
Пример #18
0
    def test_query_args_from_form(self):
        m = amcattest.create_test_medium()
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()

        form = dict(sortColumn='',
                    useSolr=True,
                    start=100,
                    length=100,
                    articleids=[],
                    articlesets=[s1, s2],
                    mediums=[m],
                    projects=[],
                    columns=[
                        u'article_id', u'date', u'medium_id', u'medium_name',
                        u'headline'
                    ],
                    highlight=False,
                    columnInterval='month',
                    datetype='all',
                    sortOrder='')
        args = query_args_from_form(form)
        self.assertEqual(
            args,
            dict(start=100,
                 rows=100,
                 filters=[
                     u'mediumid:{m.id}'.format(**locals()),
                     u'sets:{s1.id} OR sets:{s2.id}'.format(**locals())
                 ]))
Пример #19
0
    def test_full_refresh(self):
        "test full refresh, e.g. document content change"
        m1, m2 = [amcattest.create_test_medium() for _ in range(2)]
        a = amcattest.create_test_article(text='aap noot mies', medium=m1)
        s = amcattest.create_test_set()
        s.add(a)
        s.refresh_index()
        self.assertEqual(
            set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))),
            {a.id})

        a.medium = m2
        a.save()
        s.refresh_index(full_refresh=False)  # a should NOT be reindexed
        self.assertEqual(
            set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))),
            {a.id})
        self.assertEqual(
            set(ES().query_ids(filters=dict(sets=s.id, mediumid=m2.id))),
            set())

        s.refresh_index(full_refresh=True)
        self.assertEqual(
            set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))),
            set())
        self.assertEqual(
            set(ES().query_ids(filters=dict(sets=s.id, mediumid=m2.id))),
            {a.id})
Пример #20
0
    def test_scores(self):
        "test if scores (and matches) are as expected for various queries"
        s = amcattest.create_test_set(articles=[
                amcattest.create_test_article(headline="a", text='dit is een test'),
                ])

        s.refresh_index()
        def q(query):
            result = ES().query(query, filters={'sets':s.id}, fields=["headline"])
            return {a.headline : a.score for a in result}

        self.assertEqual(q("test"), {"a" : 1})

        m1, m2 = [amcattest.create_test_medium() for _ in range(2)]
        a = amcattest.create_test_article(text='aap noot mies', medium=m1)
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2)
        d = amcattest.create_test_article(text='ik woon in een sociale huurwoning, net als anderen', medium=m2)
        ES().add_articles([a.id, b.id, c.id, d.id])
        ES().flush()

        self.assertEqual(set(ES().query_ids("no*")), {a.id, b.id})
        self.assertEqual(set(ES().query_ids("no*", filters=dict(mediumid=m2.id))), {b.id})
        self.assertEqual(set(ES().query_ids("zus AND jet", filters=dict(mediumid=m2.id))), {c.id})
        self.assertEqual(set(ES().query_ids("zus OR jet", filters=dict(mediumid=m2.id))), {b.id, c.id})
        self.assertEqual(set(ES().query_ids('"mies wim"', filters=dict(mediumid=m2.id))), {b.id})
        self.assertEqual(set(ES().query_ids('"mies wim"~5', filters=dict(mediumid=m2.id))), {b.id, c.id})

        self.assertEqual(set(ES().query_ids('"sociale huur*"', filters=dict(mediumid=m2.id))), {d.id})
        self.assertEqual(set(ES().query_ids('"sociale huur*"', filters=dict(mediumid=m2.id))), {d.id})
Пример #21
0
    def test_get_model_field(self):
        article = create_test_article(
            text="abc", medium=create_test_medium(name="The Guardian"))

        self.assertEqual(article.medium.name, "The Guardian")
        self.assertEqual(get_model_field(article, "medium__name"),
                         "The Guardian")
        self.assertEqual(get_model_field(article, "medium"), article.medium)
        self.assertEqual(get_model_field(article, "text"), "abc")
Пример #22
0
    def test_get_mediums(self):
        aset = amcattest.create_test_set(0)
        media = [amcattest.create_test_medium(name="Test__"+str(i)) for i in range(10)]
        for m in media:
            aset.add(amcattest.create_test_article(medium=m))
        aset.refresh_index()

        # Test if medium really added
        self.assertEqual(set(aset.get_mediums()), set(media))
Пример #23
0
    def test_get_mediums(self):
        aset = amcattest.create_test_set(0)
        media = [
            amcattest.create_test_medium(name="Test__" + str(i))
            for i in range(10)
        ]
        for m in media:
            aset.add(amcattest.create_test_article(medium=m))
        aset.refresh_index()

        # Test if medium really added
        self.assertEqual(set(aset.get_mediums()), set(media))
Пример #24
0
    def test_date(self):
        s = amcattest.create_test_set()
        m = amcattest.create_test_medium()
        arts = [
            amcattest.create_test_article(id=1, articleset=s, medium=m, date="2001-01-01"),
            amcattest.create_test_article(id=2, articleset=s, medium=m, date="2001-01-01 02:00"),
            amcattest.create_test_article(id=3, articleset=s, medium=m, date="2001-01-02"),
            ]
        aids = [a.id for a in arts]

        self.assertEqual(self.do_test(arts), {1,2,3})
        self.assertEqual(self.do_test(arts, ignore_date=True), {1,3})
Пример #25
0
    def test_date(self):
        s = amcattest.create_test_set()
        m = amcattest.create_test_medium()
        adict = dict(text="text", headline="headline", articleset=s, medium=m)
        arts = [
            amcattest.create_test_article(date="2001-01-01", **adict),
            amcattest.create_test_article(date="2001-01-01 02:00", **adict),
            amcattest.create_test_article(date="2001-01-02", **adict),
            ]
        aids = [a.id for a in arts]

        self.assertEqual(self.do_test(arts), {1,2,3})
        self.assertEqual(self.do_test(arts, ignore_date=True), {1,3})
Пример #26
0
    def test_get_mediums(self):
        from django.core.cache import cache
        cache.clear()
        AmCAT.enable_mediums_cache()

        aset = amcattest.create_test_set(0)
        media = [amcattest.create_test_medium(name="Test__"+str(i)) for i in range(10)]
        for m in media:
            aset.add(amcattest.create_test_article(medium=m))
        aset.refresh_index()
            
        # Test if medium really added
        self.assertEqual(set(aset.get_mediums()), set(media))
Пример #27
0
    def test_get_mediums(self):
        from django.core.cache import cache
        cache.clear()
        AmCAT.enable_mediums_cache()

        aset = amcattest.create_test_set(0)
        media = [amcattest.create_test_medium(name="Test__"+str(i)) for i in range(10)]
        for m in media:
            aset.add(amcattest.create_test_article(medium=m))
        aset.refresh_index()
            
        # Test if medium really added
        self.assertEqual(set(aset.get_mediums()), set(media))
Пример #28
0
    def test_dupe(self):
        """Test whether deduplication works"""
        m = amcattest.create_test_medium()
        a = test_article(medium=m.name)
        aid1 = self._post_articles(a)['id']
        self.setUp_set()
        aid2 = self._post_articles(a)['id']

        # are the resulting ids identical?
        self.assertEqual(aid1, aid2)
        # is it not added (ie we only have one article with this medium)
        self.assertEqual(set(amcates.ES().query_ids(filters={'mediumid':m.id})), {aid1})
        # is it added to elastic for this set?
        self.assertEqual(set(amcates.ES().query_ids(filters={'sets':self.aset.id})), {aid1})
Пример #29
0
    def test_fuzzy(self):
        s = amcattest.create_test_set()
        m = amcattest.create_test_medium()
        arts = [
            amcattest.create_test_article(id=1, articleset=s, medium=m, headline="Dit is een test"),
            amcattest.create_test_article(id=2, articleset=s, medium=m, headline="Dit is ook een test"),
            amcattest.create_test_article(id=3, articleset=s, medium=m, headline="Dit is ook een tesdt"),
            amcattest.create_test_article(id=4, articleset=s, medium=m, headline="Is dit een test?"),

            ]
        self.assertEqual(self.do_test(arts, ignore_medium=True), {1,2,3,4})
        self.assertEqual(self.do_test(arts, ignore_medium=True, headline_ratio=90), {1,2,4})
        self.assertEqual(self.do_test(arts, ignore_medium=True, headline_ratio=80), {1,4})
        self.assertEqual(self.do_test(arts, ignore_medium=True, headline_ratio=50), {1})
Пример #30
0
 def test_dedup(self):
     s = amcattest.create_test_set()
     m1, m2 = [amcattest.create_test_medium() for _x in range(2)]
     arts = [
         amcattest.create_test_article(articleset=s, medium=m1, pagenr=1, id=1),
         amcattest.create_test_article(articleset=s, medium=m1, pagenr=2, id=2),
         amcattest.create_test_article(articleset=s, medium=m2, pagenr=1, id=3),
         amcattest.create_test_article(articleset=s, medium=m2, pagenr=2, id=4),
         amcattest.create_test_article(articleset=s, medium=m2, pagenr=2, id=5)
         ]
     self.assertEqual(self.do_test(arts), {1,2,3,4})
     self.assertEqual(self.do_test(arts, dry_run=True), {1,2,3,4,5})
     self.assertEqual(self.do_test(arts, ignore_medium=True), {1,2})
     self.assertEqual(self.do_test(arts, ignore_page=True), {1,3})
Пример #31
0
    def test_refresh_index(self):
        """Are added/removed articles added/removed from the index?"""
        # TODO add/remove articles adds to index automatically (does remove?)
        # so refresh isn't really used. Rewrite to add to db manually
        s = amcattest.create_test_set()
        a = amcattest.create_test_article()

        s.add(a)
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id))))
        s.refresh_index()
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))

        # check adding of existing articles to a new set:
        s2 = amcattest.create_test_set()
        s2.add(a)
        s2.refresh_index()
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s2.id))))
        # check that removing of articles from a set works and does not affect
        # other sets
        s2.remove_articles([a])
        s2.refresh_index()
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s2.id))))
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))



        s.remove_articles([a])
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))
        s.refresh_index()
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id))))

        # test that remove from index works for larger sets
        s = amcattest.create_test_set()
        arts = [amcattest.create_test_article(medium=a.medium) for i in range(20)]
        s.add(*arts)

        s.refresh_index()
        solr_ids = set(ES().query_ids(filters=dict(sets=s.id)))
        self.assertEqual(set(solr_ids), {a.id for a in arts})

        s.remove_articles([arts[0]])
        s.remove_articles([arts[-1]])
        s.refresh_index()
        solr_ids = set(ES().query_ids(filters=dict(sets=s.id)))
        self.assertEqual(set(solr_ids), {a.id for a in arts[1:-1]})

        # test that changing an article's properties can be reindexed
        arts[1].medium = amcattest.create_test_medium()
        arts[1].save()
Пример #32
0
    def test_refresh_index(self):
        """Are added/removed articles added/removed from the index?"""
        # TODO add/remove articles adds to index automatically (does remove?)
        # so refresh isn't really used. Rewrite to add to db manually
        s = amcattest.create_test_set()
        a = amcattest.create_test_article()

        s.add(a)
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id))))
        s.refresh_index()
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))

        # check adding of existing articles to a new set:
        s2 = amcattest.create_test_set()
        s2.add(a)
        s2.refresh_index()
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s2.id))))
        # check that removing of articles from a set works and does not affect
        # other sets
        s2.remove_articles([a])
        s2.refresh_index()
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s2.id))))
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))

        s.remove_articles([a])
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))
        s.refresh_index()
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id))))

        # test that remove from index works for larger sets
        s = amcattest.create_test_set()
        arts = [
            amcattest.create_test_article(medium=a.medium) for i in range(20)
        ]
        s.add(*arts)

        s.refresh_index()
        solr_ids = set(ES().query_ids(filters=dict(sets=s.id)))
        self.assertEqual(set(solr_ids), {a.id for a in arts})

        s.remove_articles([arts[0]])
        s.remove_articles([arts[-1]])
        s.refresh_index()
        solr_ids = set(ES().query_ids(filters=dict(sets=s.id)))
        self.assertEqual(set(solr_ids), {a.id for a in arts[1:-1]})

        # test that changing an article's properties can be reindexed
        arts[1].medium = amcattest.create_test_medium()
        arts[1].save()
Пример #33
0
    def test_fuzzy(self):
        s = amcattest.create_test_set()
        m = amcattest.create_test_medium()
        adict = dict(text="text", articleset=s, medium=m)
        arts = [
            amcattest.create_test_article(headline="Dit is een test", **adict),
            amcattest.create_test_article(headline="Dit is ook een test", **adict),
            amcattest.create_test_article(headline="Dit is ook een tesdt", **adict),
            amcattest.create_test_article(headline="Is dit een test?", **adict),

            ]
        self.assertEqual(self.do_test(arts, ignore_medium=True), {1,2,3,4})
        self.assertEqual(self.do_test(arts, ignore_medium=True, headline_ratio=90), {1,2,4})
        self.assertEqual(self.do_test(arts, ignore_medium=True, headline_ratio=80), {1,4})
        self.assertEqual(self.do_test(arts, ignore_medium=True, headline_ratio=50), {1})
Пример #34
0
 def test_dedup(self):
     s = amcattest.create_test_set()
     m1, m2 = [amcattest.create_test_medium() for _x in range(2)]
     adict = dict(text="text", headline="headline", articleset=s, deduplicate=False)
     arts = [
         amcattest.create_test_article(medium=m1, pagenr=1, **adict),
         amcattest.create_test_article(medium=m1, pagenr=2, **adict),
         amcattest.create_test_article(medium=m2, pagenr=1, **adict),
         amcattest.create_test_article(medium=m2, pagenr=2, **adict),
         amcattest.create_test_article(medium=m2, pagenr=2, **adict)
         ]
     self.assertEqual(self.do_test(arts), {1,2,3,4})
     self.assertEqual(self.do_test(arts, dry_run=True), {1,2,3,4,5})
     self.assertEqual(self.do_test(arts, ignore_medium=True), {1,2})
     self.assertEqual(self.do_test(arts, ignore_page=True), {1,3})
Пример #35
0
    def test_add_many(self):
        """Can we add a large number of articles from one set to another?"""
        s = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        m = amcattest.create_test_medium()
        p = amcattest.create_test_project()

        arts = [amcattest.create_test_article(project=p, medium=m, create=False) for _x in range(1213)]
        Article.create_articles(arts, s)
        ES().flush()
        self.assertEqual(len(arts), s.get_count())
        s2.add_articles(arts, monitor=ProgressMonitor())
        ES().flush()
        self.assertEqual(len(arts), s2.get_count())
        print(s2.get_count())
Пример #36
0
    def todo_test_zip(self):
        base = dict(project=amcattest.create_test_project().id,
                    articlesets=[amcattest.create_test_set().id],
                    medium=amcattest.create_test_medium().id)

        with NamedTemporaryFile(prefix=u"upload_test", suffix=".zip") as f:
            with zipfile.ZipFile(f, "w") as zf:
                zf.writestr("headline1.txt", "TEXT1")
                zf.writestr("x/headline2.txt", "TEXT2")
            f.flush()

            s = Text(file=File(f), date='2010-01-01', **base)
            arts = list(ArticleSet.objects.get(id=s.run()[0]).articles.all())
            self.assertEqual({a.headline for a in arts}, {"headline1", "headline2"})
            self.assertEqual({a.section for a in arts}, {'', "x"})
            self.assertEqual({a.text for a in arts}, {"TEXT1", "TEXT2"})
Пример #37
0
    def todo_test_zip(self):
        base = dict(project=amcattest.create_test_project().id,
                    articlesets=[amcattest.create_test_set().id],
                    medium=amcattest.create_test_medium().id)

        with NamedTemporaryFile(prefix=u"upload_test", suffix=".zip") as f:
            with zipfile.ZipFile(f, "w") as zf:
                zf.writestr("headline1.txt", "TEXT1")
                zf.writestr("x/headline2.txt", "TEXT2")
            f.flush()

            s = Text(file=File(f), date='2010-01-01', **base)
            arts = list(ArticleSet.objects.get(id=s.run()[0]).articles.all())
            self.assertEqual({a.headline for a in arts}, {"headline1", "headline2"})
            self.assertEqual({a.section for a in arts}, {'', "x"})
            self.assertEqual({a.text for a in arts}, {"TEXT1", "TEXT2"})
Пример #38
0
    def test_deduplication(self):
        """Does deduplication work as it is supposed to?"""

        # create dummy articles to have something in the db 
        [amcattest.create_test_article() for i in range(10)]
        amcates.ES().flush()
        
        art = dict(headline="test", text="test", byline="test", date='2001-01-01',
                   medium=amcattest.create_test_medium(),
                   project=amcattest.create_test_project(),
                   )
        a1 = amcattest.create_test_article(**art)
        amcates.ES().flush()
        self.assertEqual(_q(mediumid=art['medium']), {a1.id})


        # duplicate articles should not be added
        a2 = amcattest.create_test_article(**art)
        amcates.ES().flush()
        self.assertEqual(a2.id, a1.id)
        self.assertTrue(a2.duplicate)
        self.assertEqual(_q(mediumid=art['medium']), {a1.id})

        # however, if an articleset is given the 'existing' article
        # should be added to that set
        s1 = amcattest.create_test_set()
        a3 = amcattest.create_test_article(articleset=s1, **art)
        amcates.ES().flush()
        self.assertEqual(a3.id, a1.id)
        self.assertEqual(_q(mediumid=art['medium']), {a1.id})
        self.assertEqual(set(s1.get_article_ids()), {a1.id})
        self.assertEqual(_q(sets=s1.id), {a1.id})

        # a dupe with a non-identical uuid is not a dupe
        uu = uuid.uuid4()
        a4 = amcattest.create_test_article(uuid=uu, **art)
        self.assertFalse(a4.duplicate)
        self.assertEqual(a4.uuid, uu)
        
        # if an existing uuid is set, it should be a perfect duplicate
        art['uuid'] = a1.uuid
        a5 = amcattest.create_test_article(**art) # okay
        amcates.ES().flush()
        self.assertEqual(_q(mediumid=art['medium']), {a1.id, a4.id}) # a5 is a dupe
        
        art['headline']="not the same"
        self.assertRaises(ValueError, amcattest.create_test_article, **art) # not okay
Пример #39
0
    def test_save_parent(self):
        """Can we save objects with new and existing parents?"""
        m = amcattest.create_test_medium()
        root = create_test_article()
        s = create_test_set()
        structure = {1:0, 2:1, 3:1, 4:0}
        adict= dict(medium=m, date=datetime.date(2001,1,1), project=s.project)
        def _articles(n, structure):
            articles = [Article(headline=str(i), text=str(i), **adict) for i in range(n)]
            articles[0].parent = root
            for child, parent in structure.items():
                articles[child].parent = articles[parent]
            return articles

        # Trees are 3 levels deep, so it should take 3 queries to complete this request
        articles = _articles(5, structure)
        self.assertNumQueries(3, Article.create_articles, articles)

        ids = _q(mediumid=m.id)
        self.assertEqual(len(ids), 5)
        a = {int(a.text):a for a in Article.objects.filter(pk__in=ids)}

        # Are the parent properties set correctly?
        self.assertEqual(a[0].parent, root)        
        for child, parent in structure.items():
            articles[child].parent = articles[parent]
            self.assertEqual(a[child].parent, a[parent])

        # can we save it again without errors? (And without queries, since it's all dupes
        articles = _articles(5, structure)
        self.assertNumQueries(0, Article.create_articles, articles)
        self.assertEqual(len(_q(mediumid=m.id)), 5)
        
        # Can we insert new articles together with dupes?
        structure.update({5:1, 6:1})
        articles = _articles(7, structure)
        articles[6].parent = a[1] # existing article
        amcates.ES().flush()
        # (inefficiency: it knows it can save 6 immediately, doesn't know it can also save 5 until dedup)
        self.assertNumQueries(2, Article.create_articles, articles)
        ids = _q(mediumid=m.id)
        self.assertEqual(len(ids), 7)
        a = {int(a.text):a for a in Article.objects.filter(pk__in=ids)}
        self.assertEqual(a[0].parent, root)        
        for child, parent in structure.items():
            articles[child].parent = articles[parent]
            self.assertEqual(a[child].parent, a[parent])
Пример #40
0
    def test_list_media(self):
        """Test that list media works for more than 10 media"""
        from amcat.models import Article
        media =  [amcattest.create_test_medium() for _ in range(20)]
        arts = [amcattest.create_test_article(medium=m, create=False) for m in media]

        s1 = amcattest.create_test_set()
        Article.create_articles(arts[:5], articleset=s1, check_duplicate=False, create_id=True)
        ES().flush()
        self.assertEqual(set(s1.get_mediums()), set(media[:5]))

        s2 = amcattest.create_test_set(project=s1.project)
        Article.create_articles(arts[5:], articleset=s2, check_duplicate=False, create_id=True)
        ES().flush()
        self.assertEqual(set(s2.get_mediums()), set(media[5:]))

        self.assertEqual(set(s1.project.get_mediums()), set(media))
Пример #41
0
    def test_article(self):
        from django.core.files import File

        base = dict(project=amcattest.create_test_project().id,
                    articleset=amcattest.create_test_set().id,
                    medium=amcattest.create_test_medium().id)
        from tempfile import NamedTemporaryFile

        with NamedTemporaryFile(prefix=u"1999-12-31_\u0409\u0429\u0449\u04c3",
                                suffix=".txt") as f:
            text = u'H. C. Andersens for\xe6ldre tilh\xf8rte samfundets laveste lag.'
            f.write(text.encode('utf-8'))
            f.flush()

            dn, fn = os.path.split(f.name)
            fn, ext = os.path.splitext(fn)
            a, = Text(
                dict(date='2010-01-01',
                     headline='simple testxxx',
                     file=File(open(f.name)),
                     encoding=0,
                     **base)).run()
            a = Article.objects.get(pk=a.id)
            self.assertEqual(a.headline, 'simple test')
            self.assertEqual(a.date.isoformat()[:10], '2010-01-01')
            self.assertEqual(a.text, text)

            # test autodect headline from filename
            a, = Text(
                dict(date='2010-01-01',
                     file=File(open(f.name)),
                     encoding=0,
                     **base)).run()
            a = Article.objects.get(pk=a.id)
            self.assertEqual(a.headline, fn)
            self.assertEqual(a.date.isoformat()[:10], '2010-01-01')
            self.assertEqual(a.text, text)
            self.assertEqual(a.section, dn)

            # test autodect date and headline from filename
            a, = Text(dict(file=File(open(f.name)), encoding=0, **base)).run()
            a = Article.objects.get(pk=a.id)
            self.assertEqual(a.headline, fn.replace("1999-12-31_", ""))
            self.assertEqual(a.date.isoformat()[:10], '1999-12-31')
            self.assertEqual(a.text, text)
Пример #42
0
    def test_add_many(self):
        """Can we add a large number of articles from one set to another?"""
        s = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        m = amcattest.create_test_medium()
        p = amcattest.create_test_project()

        arts = [
            amcattest.create_test_article(project=p, medium=m, create=False)
            for _x in range(1213)
        ]
        Article.create_articles(arts, s, create_id=True)
        ES().flush()
        self.assertEqual(len(arts), s.get_count())
        s2.add_articles(arts, monitor=ProgressMonitor())
        ES().flush()
        self.assertEqual(len(arts), s2.get_count())
        print(s2.get_count())
Пример #43
0
    def test_full_refresh(self):
        "test full refresh, e.g. document content change"
        m1, m2 = [amcattest.create_test_medium() for _ in range(2)]
        a = amcattest.create_test_article(text='aap noot mies', medium=m1)
        s = amcattest.create_test_set()
        s.add(a)
        s.refresh_index()
        self.assertEqual(set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))), {a.id})

        a.medium = m2
        a.save()
        s.refresh_index(full_refresh=False) # a should NOT be reindexed
        self.assertEqual(set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))), {a.id})
        self.assertEqual(set(ES().query_ids(filters=dict(sets=s.id, mediumid=m2.id))), set())

        s.refresh_index(full_refresh=True)
        self.assertEqual(set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))), set())
        self.assertEqual(set(ES().query_ids(filters=dict(sets=s.id, mediumid=m2.id))), {a.id})
Пример #44
0
    def test_deduplication(self):
        """Does deduplication work as it is supposed to?"""
        art = dict(
            headline="test",
            byline="test",
            date='2001-01-01',
            medium=amcattest.create_test_medium(),
            project=amcattest.create_test_project(),
        )

        a1 = amcattest.create_test_article(**art)

        def q(**filters):
            amcates.ES().flush()
            return set(amcates.ES().query_ids(filters=filters))

        self.assertEqual(q(mediumid=art['medium']), {a1.id})

        # duplicate articles should not be added
        a2 = amcattest.create_test_article(check_duplicate=True, **art)
        self.assertFalse(Article.objects.filter(pk=a2.id).exists())
        self.assertEqual(a2.duplicate_of, a1.id)
        self.assertEqual(q(mediumid=art['medium']), {a1.id})

        # however, if an articleset is given the 'existing' article
        # should be added to that set
        s1 = amcattest.create_test_set()
        a3 = amcattest.create_test_article(check_duplicate=True,
                                           articleset=s1,
                                           **art)

        self.assertFalse(Article.objects.filter(pk=a2.id).exists())
        self.assertEqual(a3.duplicate_of, a1.id)
        self.assertEqual(q(mediumid=art['medium']), {a1.id})
        self.assertEqual(set(s1.get_article_ids()), {a1.id})
        self.assertEqual(q(sets=s1.id), {a1.id})

        # can we suppress duplicate checking?
        a4 = amcattest.create_test_article(check_duplicate=False, **art)
        self.assertTrue(Article.objects.filter(pk=a4.id).exists())
        self.assertFalse(hasattr(a4, 'duplicate_of'))
        self.assertIn(a4.id, q(mediumid=art['medium']))
Пример #45
0
    def test_create_article_dicts(self):
        """Test whether article dicts are created correctly"""
        from amcat.models.article import Article
        s1, s2 = [amcattest.create_test_set() for _x in range(2)]
        p = amcattest.create_test_project()
        m = amcattest.create_test_medium()
        a1 = amcattest.create_test_article(headline="bla \x1d blo",
                                           text="test",
                                           project=p,
                                           medium=m)

        a2 = amcattest.create_test_article(headline="blabla",
                                           text="t\xe9st!",
                                           byline="\u0904\u0905 test",
                                           project=p,
                                           medium=m)
        s1.add(a1)
        s2.add(a1)
        s2.add(a2)

        # force getting to make db rountrip and deserialize date
        ad1, ad2 = list(
            _get_article_dicts(Article.objects.filter(pk__in=[a1.id, a2.id])))
        for k, v in dict(id=a1.id,
                         headline="bla   blo",
                         body="test",
                         byline=None,
                         section=None,
                         projectid=p.id,
                         mediumid=m.id,
                         sets=set([s1.id, s2.id])).items():
            self.assertEqual(ad1[k], v, "Article 1 %s %r!=%r" % (k, ad1[k], v))

        for k, v in dict(id=a2.id,
                         headline="blabla",
                         body="t\xe9st!",
                         byline="\u0904\u0905 test",
                         section=None,
                         projectid=p.id,
                         mediumid=m.id,
                         sets=set([s2.id])).items():
            self.assertEqual(ad2[k], v, "Article 2 %s %r!=%r" % (k, ad2[k], v))
Пример #46
0
    def test_zip(self):
        from tempfile import NamedTemporaryFile
        from django.core.files import File
        import zipfile

        base = dict(project=amcattest.create_test_project().id,
                    articleset=amcattest.create_test_set().id,
                    medium=amcattest.create_test_medium().id)

        with NamedTemporaryFile(prefix=u"upload_test", suffix=".zip") as f:
            with zipfile.ZipFile(f, "w") as zf:
                zf.writestr("headline1.txt", "TEXT1")
                zf.writestr("x/headline2.txt", "TEXT2")
            f.flush()

            s = Text(file=File(f), date='2010-01-01', **base)
            arts = list(s.run().articles.all())
            self.assertEqual({a.headline for a in arts}, {"headline1", "headline2"})
            self.assertEqual({a.section for a in arts}, {'', "x"})
            self.assertEqual({a.text for a in arts}, {"TEXT1", "TEXT2"})
Пример #47
0
    def test_zip(self):
        from tempfile import NamedTemporaryFile
        from django.core.files import File
        import zipfile
        
        base = dict(project=amcattest.create_test_project().id,
                    articleset=amcattest.create_test_set().id,
                    medium=amcattest.create_test_medium().id)

        
        with NamedTemporaryFile(prefix=u"upload_test", suffix=".zip") as f:
            with zipfile.ZipFile(f, "w") as zf:
                zf.writestr("headline1.txt", "TEXT1")
                zf.writestr("x/headline2.txt", "TEXT2")
            f.flush()
            
            s = Text(file=File(f),date='2010-01-01', **base)
            arts = s.run()
            self.assertEqual({a.headline for a in arts}, {"headline1","headline2"})
            self.assertEqual({a.section for a in arts}, {'',"x"})
            self.assertEqual({a.text for a in arts}, {"TEXT1", "TEXT2"})
Пример #48
0
    def test_aggregation(self):
        """Can we create nice tables?"""
        p = amcattest.create_test_project()
        m1, m2 = [amcattest.create_test_medium() for x in [1,2]]
        arts1 = {amcattest.create_test_article(project=p, medium=m1) for i in range(5)}
        arts2 = {amcattest.create_test_article(project=p, medium=m2) for i in range(15)}

        # can we select on mediumid
        self.assertEqual(self.list(projects=[p.id]), arts1|arts2)
        self.assertEqual(self.list(projects=[p.id], mediums=[m1.id]), arts1)

        # can we make a table?
        x = self.aggr(projects=[p.id], xAxis='medium')
        self.assertEqual(set(x), {(5,), (15,)})

        
        # add second project with articles from first project in set
        p2 = amcattest.create_test_project()
        s = amcattest.create_test_set(project=p2)
        s.add(*(arts1|arts2))
        x = self.aggr(projects=[p2.id], articlesets=[s.id], xAxis='medium')
Пример #49
0
    def test_article(self):
        from django.core.files import File

        base = dict(project=amcattest.create_test_project().id,
                    articleset=amcattest.create_test_set().id,
                    medium=amcattest.create_test_medium().id)
        from tempfile import NamedTemporaryFile

        with NamedTemporaryFile(prefix=u"1999-12-31_\u0409\u0429\u0449\u04c3", suffix=".txt") as f:
            text = u'H. C. Andersens for\xe6ldre tilh\xf8rte samfundets laveste lag.'
            f.write(text.encode('utf-8'))
            f.flush()

            dn, fn = os.path.split(f.name)
            fn, ext = os.path.splitext(fn)
            a, = Text(dict(date='2010-01-01', headline='simple testxxx',
                           file=File(open(f.name)), encoding=0, **base)).run()
            a = Article.objects.get(pk=a.id)
            self.assertEqual(a.headline, 'simple test')
            self.assertEqual(a.date.isoformat()[:10], '2010-01-01')
            self.assertEqual(a.text, text)


            # test autodect headline from filename
            a, = Text(dict(date='2010-01-01',
                           file=File(open(f.name)), encoding=0, **base)).run()
            a = Article.objects.get(pk=a.id)
            self.assertEqual(a.headline, fn)
            self.assertEqual(a.date.isoformat()[:10], '2010-01-01')
            self.assertEqual(a.text, text)
            self.assertEqual(a.section, dn)

            # test autodect date and headline from filename
            a, = Text(dict(file=File(open(f.name)), encoding=0, **base)).run()
            a = Article.objects.get(pk=a.id)
            self.assertEqual(a.headline, fn.replace("1999-12-31_", ""))
            self.assertEqual(a.date.isoformat()[:10], '1999-12-31')
            self.assertEqual(a.text, text)
Пример #50
0
    def test_aggregate(self):
        """Can we make tables per medium/date interval?"""
        from amcat.models import Article
        m1 = amcattest.create_test_medium(name="De Nep-Krant")
        m2, m3 = [amcattest.create_test_medium() for _ in range(2)]
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        unused = amcattest.create_test_article(text='aap noot mies',
                                               medium=m3,
                                               articleset=s2)
        a = amcattest.create_test_article(text='aap noot mies',
                                          medium=m1,
                                          date='2001-01-01',
                                          create=False)
        b = amcattest.create_test_article(text='noot mies wim zus',
                                          medium=m2,
                                          date='2001-02-01',
                                          create=False)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet',
                                          medium=m2,
                                          date='2002-01-01',
                                          create=False)
        d = amcattest.create_test_article(text='noot mies wim zus',
                                          medium=m2,
                                          date='2001-02-03',
                                          create=False)

        Article.create_articles([a, b, c, d],
                                articleset=s1,
                                check_duplicate=False)
        ES().flush()

        # counts per mediumid
        self.assertEqual(
            dict(ES().aggregate_query(filters=dict(sets=s1.id),
                                      group_by="mediumid")), {
                                          m1.id: 1,
                                          m2.id: 3
                                      })

        # counts per medium (name)
        self.assertEqual(
            dict(ES().aggregate_query(filters=dict(sets=s1.id),
                                      group_by="medium")), {
                                          m1.name: 1,
                                          m2.name: 3
                                      })

        self.assertEqual(
            dict(ES().aggregate_query(filters=dict(sets=s1.id),
                                      group_by="date",
                                      date_interval="year")), {
                                          datetime(2001, 1, 1): 3,
                                          datetime(2002, 1, 1): 1
                                      })

        self.assertEqual(
            dict(ES().aggregate_query(filters=dict(sets=s1.id),
                                      group_by="date",
                                      date_interval="month")), {
                                          datetime(2001, 1, 1): 1,
                                          datetime(2002, 1, 1): 1,
                                          datetime(2001, 2, 1): 2
                                      })

        # set statistics
        stats = ES().statistics(filters=dict(sets=s1.id))
        self.assertEqual(stats.n, 4)
        self.assertEqual(stats.start_date, datetime(2001, 1, 1))
        self.assertEqual(stats.end_date, datetime(2002, 1, 1))

        # media list
        self.assertEqual(set(ES().list_media(filters=dict(sets=s1.id))),
                         {m1.id, m2.id})
Пример #51
0
    def test_deduplicate(self):
        """One article should be deleted from artset and added to project 2"""
        p = amcattest.create_test_project()

        m = amcattest.create_test_medium()

        art2 = amcattest.create_test_article(headline='blaat1',
                                             project=p,
                                             medium=m,
                                             text="""
bla bla bla
bla bla bla bla


var c=0;
var t;
var timer_is_on=0;

function timedCount()
{
document.getElementById('txt').value=c;
c=c+1;
t=setTimeout(function(){timedCount()},1000);
}

function doTimer()
{
if (!timer_is_on)
  {
  timer_is_on=1;
  timedCount();
  }
}

function stopCount()
{
clearTimeout(t);
timer_is_on=0;
}
""",
                                             date=m_date(2012, 01, 01),
                                             section="kaas",
                                             metastring={
                                                 'moet_door':
                                                 False,
                                                 'delete?':
                                                 True,
                                                 'mist':
                                                 'link, heeft wel meer tekst'
                                             })

        art1 = amcattest.create_test_article(
            headline='blaat1',
            project=p,
            text="""
bla bla bla
[bla](http://www.bla.com) bla bla bla
""",
            date=m_date(2012, 01, 01),
            section="kaas",
            metastring={
                'moet_door': True,
                'delete?': False,
                'mist': 'niets'
            },
            medium=m,
        )

        art3 = amcattest.create_test_article(headline='blaat1',
                                             project=p,
                                             medium=m,
                                             text="""
bla bla bla
[bla](http://www.bla.com) bla bla bla
""",
                                             date=m_date(2012, 01, 01),
                                             metastring={'mist': '3 fields'})

        art4 = amcattest.create_test_article(headline='blaat1',
                                             project=p,
                                             medium=m,
                                             text="""
bla bla bla
[bla](http://www.bla.com) bla bla bla
""",
                                             date=m_date(2012, 01, 01),
                                             section="kaas",
                                             metastring={
                                                 'moet_door': False,
                                                 'delete?': True,
                                                 'mist': 'later gemaakt'
                                             })

        artset = amcattest.create_test_set(articles=[art1, art2, art3, art4])
        d = DeduplicateScript(articleset=artset.id, slow=True)
        d.run(None)
        self.assertEqual(len(artset.articles.all()), 1)
        self.assertEqual(len(Article.objects.filter(project=p)), 4)
        self.assertEqual(art1.pk, artset.articles.all()[0].pk)
Пример #52
0
 def test_medium_name(self):
     m = amcattest.create_test_medium(name="de testkrant")
     a = amcattest.create_test_article(medium=m)
     r = amcates.ES().query(filters={"id": a.id}, fields=["medium"])
     print(r)