def test_get_rows(self):
     schema, codebook, strf, intf, codef = amcattest.create_test_schema_with_fields()
     job = amcattest.create_test_job(unitschema=schema, articleschema=schema, narticles=5)
     articles = list(job.articleset.articles.all())
     c = amcattest.create_test_coding(codingjob=job, article=articles[0])
     ca = job.get_coded_article(articles[0])
     # simple coding
     rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=False))
     self.assertEqual(rows, {(job, ca, articles[0], None, c, None)})
     # test uncoded_articles
     rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=True))
     self.assertEqual(rows, {(job, ca, articles[0], None, c, None)} | {(job, job.get_coded_article(a), a, None, None, None)
                                                                       for a in articles[1:]})
     # test sentence
     s = amcattest.create_test_sentence(article=articles[0])
     sc = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s)
     rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=False))
     self.assertEqual(rows, {(job, ca, articles[0], None, c, None)})
     rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False))
     self.assertEqual(rows, {(job, ca, articles[0], s, c, sc)})
     # multiple sentence codings on the same article should duplicate article(coding)
     s2 = amcattest.create_test_sentence(article=articles[0])
     sc2 = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s2)
     rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False))
     self.assertEqual(rows, {(job, ca, articles[0], s, c, sc), (job, ca, articles[0], s2, c, sc2)})
     # if an article contains an article coding but no sentence coding, it should still show up with sentence=True
     c2 = amcattest.create_test_coding(codingjob=job, article=articles[1])
     rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False))
     self.assertEqual(rows, {(job, ca, articles[0], s, c, sc), (job, ca, articles[0], s2, c, sc2),
                             (job, job.get_coded_article(articles[1]), articles[1], None, c2, None)})
示例#2
0
 def test_get_rows(self):
     schema, codebook, strf, intf, codef, _, _ = amcattest.create_test_schema_with_fields()
     job = amcattest.create_test_job(unitschema=schema, articleschema=schema, narticles=5)
     articles = list(job.articleset.articles.all())
     c = amcattest.create_test_coding(codingjob=job, article=articles[0])
     ca = job.get_coded_article(articles[0])
     # simple coding
     rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=False))
     self.assertEqual(rows, {(job, ca, articles[0], None, c, None)})
     # test uncoded_articles
     rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=True))
     self.assertEqual(rows,
                      {(job, ca, articles[0], None, c, None)} | {(job, job.get_coded_article(a), a, None, None, None)
                                                                 for a in articles[1:]})
     # test sentence
     s = amcattest.create_test_sentence(article=articles[0])
     sc = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s)
     rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=False))
     self.assertEqual(rows, {(job, ca, articles[0], None, c, None)})
     rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False))
     self.assertEqual(rows, {(job, ca, articles[0], s, c, sc)})
     # multiple sentence codings on the same article should duplicate article(coding)
     s2 = amcattest.create_test_sentence(article=articles[0])
     sc2 = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s2)
     rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False))
     self.assertEqual(rows, {(job, ca, articles[0], s, c, sc), (job, ca, articles[0], s2, c, sc2)})
     # if an article contains an article coding but no sentence coding, it should still show up with sentence=True
     c2 = amcattest.create_test_coding(codingjob=job, article=articles[1])
     rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False))
     self.assertEqual(rows, {(job, ca, articles[0], s, c, sc), (job, ca, articles[0], s2, c, sc2),
                             (job, job.get_coded_article(articles[1]), articles[1], None, c2, None)})
示例#3
0
    def setUp(self):
        from amcat.models.coding.coding import CodingValue
        # create a coding job set with a sensible schema and some articles to 'code'
        self.schema = amcattest.create_test_schema()
        self.codebook = amcattest.create_test_codebook()
        self.code = amcattest.create_test_code(label="CODED")
        self.codebook.add_code(self.code)

        texttype = CodingSchemaFieldType.objects.get(pk=1)
        inttype = CodingSchemaFieldType.objects.get(pk=2)
        codetype = CodingSchemaFieldType.objects.get(pk=5)

        create = CodingSchemaField.objects.create
        self.textfield = create(codingschema=self.schema,
                                fieldnr=1,
                                fieldtype=texttype,
                                label="Text")
        self.intfield = create(codingschema=self.schema,
                               fieldnr=2,
                               fieldtype=inttype,
                               label="Number")
        self.codefield = create(codingschema=self.schema,
                                fieldnr=3,
                                fieldtype=codetype,
                                label="Code",
                                codebook=self.codebook)

        self.users = [amcattest.create_test_user() for _x in range(2)]

        self.articles, self.jobs, self.asets = [], [], []
        for i, user in enumerate([0, 0, 0, 0, 1]):
            aset = amcattest.create_test_set(articles=2 * (i + 1))
            self.articles += list(aset.articles.all())
            self.asets.append(aset)
            job = amcattest.create_test_job(articleschema=self.schema,
                                            unitschema=self.schema,
                                            coder=self.users[user],
                                            articleset=aset)
            self.jobs.append(job)

        self.an1 = Coding.objects.create(codingjob=self.jobs[0],
                                         article=self.articles[0])
        self.an2 = Coding.objects.create(codingjob=self.jobs[0],
                                         article=self.articles[1])
        self.an2.set_status(STATUS_COMPLETE)
        self.an2.comments = 'Makkie!'
        self.an2.save()

        sent = amcattest.create_test_sentence()
        self.sa1 = Coding.objects.create(codingjob=self.jobs[0],
                                         article=self.articles[0],
                                         sentence=sent)
        self.sa2 = Coding.objects.create(codingjob=self.jobs[0],
                                         article=self.articles[0],
                                         sentence=sent)
        create = CodingValue.objects.create
        create(coding=self.sa1, field=self.intfield, intval=1)
        create(coding=self.sa1, field=self.textfield, strval="bla")
        create(coding=self.sa2, field=self.textfield, strval="blx")
        create(coding=self.sa1, field=self.codefield, intval=self.code.id)
示例#4
0
    def test_results(self):
        codebook, codes = amcattest.create_test_codebook_with_codes()
        schema, codebook, strf, intf, codef, _, _ = amcattest.create_test_schema_with_fields(codebook=codebook,
                                                                                       isarticleschema=True)
        sschema, codebook, sstrf, sintf, scodef, _, _ = amcattest.create_test_schema_with_fields(codebook=codebook)
        job = amcattest.create_test_job(unitschema=sschema, articleschema=schema, narticles=5)
        articles = list(job.articleset.articles.all())

        c = amcattest.create_test_coding(codingjob=job, article=articles[0])

        # test simple coding with a codebook code
        c.update_values({strf: "bla", intf: 1, codef: codes["A1b"].id})
        self.assertEqual(self._get_results([job], {strf: {}, intf: {}, codef: dict(ids=True)}),
                         [('bla', 1, codes["A1b"].id)])
        # test multiple codings and parents
        c2 = amcattest.create_test_coding(codingjob=job, article=articles[1])
        c2.update_values({strf: "blx", intf: 1, codef: codes["B1"].id})
        self.assertEqual(set(self._get_results([job], {strf: {}, intf: {}, codef: dict(labels=True, parents=2)})),
                         {('bla', 1, "A", "A1", "A1b"), ('blx', 1, "B", "B1", "B1")})


        # test sentence result
        s = amcattest.create_test_sentence(article=articles[0])
        sc = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s)
        sc.update_values({sstrf: "z", sintf: -1, scodef: codes["A"].id})

        self.assertEqual(set(self._get_results([job], {strf: {}, sstrf: {}, sintf: {}}, export_level=2)),
                         {('bla', 'z', -1), ('blx', None, None)})
    def test_results(self):
        codebook, codes = amcattest.create_test_codebook_with_codes()
        schema, codebook, strf, intf, codef = amcattest.create_test_schema_with_fields(codebook=codebook, isarticleschema=True)
        sschema, codebook, sstrf, sintf, scodef = amcattest.create_test_schema_with_fields(codebook=codebook)
        job = amcattest.create_test_job(unitschema=sschema, articleschema=schema, narticles=5)
        articles = list(job.articleset.articles.all())

        c = amcattest.create_test_coding(codingjob=job, article=articles[0])

        # test simple coding with a codebook code
        c.update_values({strf:"bla", intf:1, codef:codes["A1b"].id})
        self.assertEqual(self._get_results([job], {strf : {}, intf : {}, codef : dict(ids=True)}),
                         [('bla', 1, codes["A1b"].id)])
        # test multiple codings and parents
        c2 = amcattest.create_test_coding(codingjob=job, article=articles[1])
        c2.update_values({strf:"blx", intf:1, codef:codes["B1"].id})
        self.assertEqual(set(self._get_results([job], {strf : {}, intf : {}, codef : dict(labels=True, parents=2)})),
                         {('bla', 1, "A", "A1", "A1b"), ('blx', 1, "B", "B1", "B1")})


        # test sentence result
        s = amcattest.create_test_sentence(article=articles[0])
        sc = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s)
        sc.update_values({sstrf:"z", sintf:-1, scodef:codes["A"].id})

        self.assertEqual(set(self._get_results([job], {strf : {}, sstrf : {}, sintf : {}}, export_level=2)),
                         {('bla', 'z', -1), ('blx', None, None)})
示例#6
0
    def test_get_articles(self):
        from amcat.models import Sentence
        _get_articles = lambda a, s: list(get_articles(a, s))

        # Should raise exception if sentences not in article
        article, sentences = self.create_test_sentences()
        s1 = Sentence.objects.filter(id=amcattest.create_test_sentence().id)
        self.assertRaises(ValueError, _get_articles, article, s1)

        # Should raise an exception if we try to split on headline
        self.assertRaises(ValueError, _get_articles, article,
                          sentences.filter(parnr=1))

        # Should return a "copy", with byline in "text" property
        arts = _get_articles(article, Sentence.objects.none())
        map(lambda a: a.save(), arts)

        self.assertEquals(len(arts), 1)
        sbd.create_sentences(arts[0])

        self.assertEquals([s.sentence for s in sentences[1:]],
                          [s.sentence for s in arts[0].sentences.all()[1:]])

        self.assertTrue("foo" in arts[0].text)

        # Should be able to split on byline
        self.assertEquals(2, len(_get_articles(article, sentences[1:2])))
        a, b = _get_articles(article, sentences[4:5])

        # Check if text on splitted articles contains expected
        self.assertTrue("Einde" not in a.text)
        self.assertTrue("Einde" in b.text)
    def test_get_articles(self):
        from amcat.models import Sentence
        _get_articles = lambda a,s : list(get_articles(a,s))

        # Should raise exception if sentences not in article
        article, sentences = self.create_test_sentences()
        s1 = Sentence.objects.filter(id=amcattest.create_test_sentence().id)
        self.assertRaises(ValueError, _get_articles, article, s1)

        # Should raise an exception if we try to split on headline
        self.assertRaises(ValueError, _get_articles, article, sentences.filter(parnr=1))

        # Should return a "copy", with byline in "text" property
        arts = _get_articles(article, Sentence.objects.none())
        map(lambda a : a.save(), arts)

        self.assertEquals(len(arts), 1)
        sbd.create_sentences(arts[0])

        self.assertEquals(
            [s.sentence for s in sentences[1:]],
            [s.sentence for s in arts[0].sentences.all()[1:]]
        )

        self.assertTrue("foo" in arts[0].text)

        # Should be able to split on byline
        self.assertEquals(2, len(_get_articles(article, sentences[1:2])))
        a, b = _get_articles(article, sentences[4:5])

        # Check if text on splitted articles contains expected
        self.assertTrue("Einde" not in a.text)
        self.assertTrue("Einde" in b.text)
示例#8
0
    def test_codedarticle(self):
        """Test whether CodedArticle coding retrieval works"""
        a = amcattest.create_test_coding()
        s = amcattest.create_test_sentence()
        a2 = amcattest.create_test_coding(sentence=s, codingjob=a.codingjob)
        a3 = amcattest.create_test_coding(sentence=s, codingjob=a.codingjob)
        ca = CodedArticle(a)

        self.assertEqual(set(ca.sentence_codings), set([a2, a3]))
        self.assertEqual(ca.coding, a)
示例#9
0
    def test_codedarticle(self):
        """Test whether CodedArticle coding retrieval works"""
        a = amcattest.create_test_coding()
        s = amcattest.create_test_sentence()
        a2 = amcattest.create_test_coding(sentence=s, codingjob=a.codingjob)
        a3 = amcattest.create_test_coding(sentence=s, codingjob=a.codingjob)
        ca = CodedArticle(a)

        self.assertEqual(set(ca.sentence_codings), set([a2, a3]))
        self.assertEqual(ca.coding, a)
示例#10
0
 def test_create(self):
     """Can we create an coding?"""
     schema2 = amcattest.create_test_schema()
     j = amcattest.create_test_job(unitschema=self.schema, articleschema=schema2)
     a = amcattest.create_test_coding(codingjob=j)
     self.assertIsNotNone(a)
     self.assertIn(a.article, j.articleset.articles.all())
     self.assertEqual(a.schema, schema2)
     a2 = amcattest.create_test_coding(codingjob=j,
                                           sentence=amcattest.create_test_sentence())
     self.assertEqual(a2.schema, self.schema)
示例#11
0
 def test_create(self):
     """Can we create an coding?"""
     schema2 = amcattest.create_test_schema()
     j = amcattest.create_test_job(unitschema=self.schema, articleschema=schema2)
     a = amcattest.create_test_coding(codingjob=j)
     self.assertIsNotNone(a)
     self.assertIn(a.coded_article.article, j.articleset.articles.all())
     self.assertEqual(a.schema, schema2)
     a2 = amcattest.create_test_coding(codingjob=j,
                                       sentence=amcattest.create_test_sentence())
     self.assertEqual(a2.schema, self.schema)
示例#12
0
    def todo_test_process_sentence(self):
        s = amcattest.create_test_analysis_sentence(
            sentence=amcattest.create_test_sentence(sentence="de groenste huizen")
        )

        f = Frog(None)
        tokens, triples = f.process_sentence(s)
        tokens = list(tokens)
        lemmata = [token.lemma for token in tokens]
        self.assertEqual(lemmata, ["de", "groen", "huis"])
        poscats = [token.pos for token in tokens]
        self.assertEqual(poscats, ["D", "A", "N"])
示例#13
0
    def todo_test_process_sentence(self):
        s = amcattest.create_test_analysis_sentence(
            sentence=amcattest.create_test_sentence(
                sentence="de groenste huizen"))

        f = Frog(None)
        tokens, triples = f.process_sentence(s)
        tokens = list(tokens)
        lemmata = [token.lemma for token in tokens]
        self.assertEqual(lemmata, ["de", "groen", "huis"])
        poscats = [token.pos for token in tokens]
        self.assertEqual(poscats, ["D", "A", "N"])
示例#14
0
 def todo_test_triples(self):
     s = amcattest.create_test_analysis_sentence(
         sentence=amcattest.create_test_sentence(
             sentence="hij gaf hem een boek"))
     f = FrogTriples(None)
     triples = set(f.get_triples(s))
     self.assertEqual(
         triples, {
             TripleValues(s, 0, 1, 'su'),
             TripleValues(s, 2, 1, 'obj2'),
             TripleValues(s, 3, 4, 'det'),
             TripleValues(s, 4, 1, 'obj1'),
         })
示例#15
0
 def todo_test_triples(self):
     s = amcattest.create_test_analysis_sentence(
         sentence=amcattest.create_test_sentence(sentence="hij gaf hem een boek")
     )
     f = FrogTriples(None)
     triples = set(f.get_triples(s))
     self.assertEqual(
         triples,
         {
             TripleValues(s, 0, 1, "su"),
             TripleValues(s, 2, 1, "obj2"),
             TripleValues(s, 3, 4, "det"),
             TripleValues(s, 4, 1, "obj1"),
         },
     )
示例#16
0
    def setUp(self):
        from amcat.models.coding.coding import CodingValue
        # create a coding job set with a sensible schema and some articles to 'code'
        self.schema = amcattest.create_test_schema()
        self.codebook = amcattest.create_test_codebook()
        self.code = amcattest.create_test_code(label="CODED")
        self.codebook.add_code(self.code)
        

        texttype = CodingSchemaFieldType.objects.get(pk=1)
        inttype = CodingSchemaFieldType.objects.get(pk=2)
        codetype = CodingSchemaFieldType.objects.get(pk=5)

        create = CodingSchemaField.objects.create
        self.textfield = create(codingschema=self.schema, fieldnr=1, fieldtype=texttype, 
                                label="Text")
        self.intfield = create(codingschema=self.schema,  fieldnr=2, fieldtype=inttype, 
                               label="Number")
        self.codefield = create(codingschema=self.schema, fieldnr=3, fieldtype=codetype, 
                                label="Code", codebook=self.codebook)

        self.users = [amcattest.create_test_user() for _x in range(2)]

        self.articles, self.jobs, self.asets = [], [], []
        for i, user in enumerate([0, 0, 0, 0, 1]):
            aset = amcattest.create_test_set(articles=2 * (i+1))
            self.articles += list(aset.articles.all())
            self.asets.append(aset)
            job = amcattest.create_test_job(articleschema=self.schema, unitschema=self.schema,
                                            coder = self.users[user], articleset=aset)
            self.jobs.append(job)
                    
        self.an1 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0])
        self.an2 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[1])
        self.an2.set_status(STATUS_COMPLETE)
        self.an2.comments = 'Makkie!'
        self.an2.save()

        sent = amcattest.create_test_sentence()
        self.sa1 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0], 
                                             sentence=sent)
        self.sa2 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0], 
                                             sentence=sent)
        create = CodingValue.objects.create
        create(coding=self.sa1, field=self.intfield, intval=1)
        create(coding=self.sa1, field=self.textfield, strval="bla")
        create(coding=self.sa2, field=self.textfield, strval="blx")
        create(coding=self.sa1, field=self.codefield, intval=self.code.id)
示例#17
0
    def test_nqueries_table_sentence_codings(self):
        """Check for efficient retrieval of codings"""
        from amcat.models.coding.coding import CodingValue
        from amcat.tools.djangotoolkit import list_queries, query_list_to_table
        ca = CodedArticle(self.an1)

        # create 1000 sentence annotations
        for i in range(1):
            sent = amcattest.create_test_sentence()
            sa = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0], 
                                       sentence=sent)
            CodingValue.objects.create(coding=sa, field=self.intfield, intval=i)
        
        language = ca.codingjob.coder.userprofile.language
        with list_queries() as l:
            t = get_table_sentence_codings_article(ca, language)
            t.output() # force getting all values
        #query_list_to_table(l, output=print, maxqlen=190)
        self.assertTrue(len(l) <= 20, "Retrieving table used %i queries" % len(l))
示例#18
0
    def test_process(self):
        from amcat.models.token import TokenValues

        class X(AnalysisScript):
            def __init__(self):
                super(X, self).__init__(analysis=None,
                                        tokens=True,
                                        triples=False)

            def get_tokens(self, analysis_sentence, memo=None):
                for i, x in enumerate(
                        analysis_sentence.sentence.sentence.split()):
                    yield TokenValues(analysis_sentence, i + 1, x, None, None,
                                      None, None)

        a = amcattest.create_test_analysis_sentence(
            sentence=amcattest.create_test_sentence(
                sentence="dit is een test"))
        tokens, triples = list(X().process_sentence(a))
        print(tokens)
        self.assertIsNone(triples)
        self.assertEqual(
            list(tokens)[0],
            (TokenValues(a, 1, "dit", None, None, None, None)))