def test_get_rows(self): schema, codebook, strf, intf, codef = amcattest.create_test_schema_with_fields() job = amcattest.create_test_job(unitschema=schema, articleschema=schema, narticles=5) articles = list(job.articleset.articles.all()) c = amcattest.create_test_coding(codingjob=job, article=articles[0]) ca = job.get_coded_article(articles[0]) # simple coding rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=False)) self.assertEqual(rows, {(job, ca, articles[0], None, c, None)}) # test uncoded_articles rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=True)) self.assertEqual(rows, {(job, ca, articles[0], None, c, None)} | {(job, job.get_coded_article(a), a, None, None, None) for a in articles[1:]}) # test sentence s = amcattest.create_test_sentence(article=articles[0]) sc = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s) rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=False)) self.assertEqual(rows, {(job, ca, articles[0], None, c, None)}) rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False)) self.assertEqual(rows, {(job, ca, articles[0], s, c, sc)}) # multiple sentence codings on the same article should duplicate article(coding) s2 = amcattest.create_test_sentence(article=articles[0]) sc2 = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s2) rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False)) self.assertEqual(rows, {(job, ca, articles[0], s, c, sc), (job, ca, articles[0], s2, c, sc2)}) # if an article contains an article coding but no sentence coding, it should still show up with sentence=True c2 = amcattest.create_test_coding(codingjob=job, article=articles[1]) rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False)) self.assertEqual(rows, {(job, ca, articles[0], s, c, sc), (job, ca, articles[0], s2, c, sc2), (job, job.get_coded_article(articles[1]), articles[1], None, c2, None)})
def test_get_rows(self): schema, codebook, strf, intf, codef, _, _ = amcattest.create_test_schema_with_fields() job = amcattest.create_test_job(unitschema=schema, articleschema=schema, narticles=5) articles = list(job.articleset.articles.all()) c = amcattest.create_test_coding(codingjob=job, article=articles[0]) ca = job.get_coded_article(articles[0]) # simple coding rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=False)) self.assertEqual(rows, {(job, ca, articles[0], None, c, None)}) # test uncoded_articles rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=True)) self.assertEqual(rows, {(job, ca, articles[0], None, c, None)} | {(job, job.get_coded_article(a), a, None, None, None) for a in articles[1:]}) # test sentence s = amcattest.create_test_sentence(article=articles[0]) sc = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s) rows = set(_get_rows([job], include_sentences=False, include_multiple=True, include_uncoded_articles=False)) self.assertEqual(rows, {(job, ca, articles[0], None, c, None)}) rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False)) self.assertEqual(rows, {(job, ca, articles[0], s, c, sc)}) # multiple sentence codings on the same article should duplicate article(coding) s2 = amcattest.create_test_sentence(article=articles[0]) sc2 = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s2) rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False)) self.assertEqual(rows, {(job, ca, articles[0], s, c, sc), (job, ca, articles[0], s2, c, sc2)}) # if an article contains an article coding but no sentence coding, it should still show up with sentence=True c2 = amcattest.create_test_coding(codingjob=job, article=articles[1]) rows = set(_get_rows([job], include_sentences=True, include_multiple=True, include_uncoded_articles=False)) self.assertEqual(rows, {(job, ca, articles[0], s, c, sc), (job, ca, articles[0], s2, c, sc2), (job, job.get_coded_article(articles[1]), articles[1], None, c2, None)})
def setUp(self): from amcat.models.coding.coding import CodingValue # create a coding job set with a sensible schema and some articles to 'code' self.schema = amcattest.create_test_schema() self.codebook = amcattest.create_test_codebook() self.code = amcattest.create_test_code(label="CODED") self.codebook.add_code(self.code) texttype = CodingSchemaFieldType.objects.get(pk=1) inttype = CodingSchemaFieldType.objects.get(pk=2) codetype = CodingSchemaFieldType.objects.get(pk=5) create = CodingSchemaField.objects.create self.textfield = create(codingschema=self.schema, fieldnr=1, fieldtype=texttype, label="Text") self.intfield = create(codingschema=self.schema, fieldnr=2, fieldtype=inttype, label="Number") self.codefield = create(codingschema=self.schema, fieldnr=3, fieldtype=codetype, label="Code", codebook=self.codebook) self.users = [amcattest.create_test_user() for _x in range(2)] self.articles, self.jobs, self.asets = [], [], [] for i, user in enumerate([0, 0, 0, 0, 1]): aset = amcattest.create_test_set(articles=2 * (i + 1)) self.articles += list(aset.articles.all()) self.asets.append(aset) job = amcattest.create_test_job(articleschema=self.schema, unitschema=self.schema, coder=self.users[user], articleset=aset) self.jobs.append(job) self.an1 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0]) self.an2 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[1]) self.an2.set_status(STATUS_COMPLETE) self.an2.comments = 'Makkie!' self.an2.save() sent = amcattest.create_test_sentence() self.sa1 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0], sentence=sent) self.sa2 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0], sentence=sent) create = CodingValue.objects.create create(coding=self.sa1, field=self.intfield, intval=1) create(coding=self.sa1, field=self.textfield, strval="bla") create(coding=self.sa2, field=self.textfield, strval="blx") create(coding=self.sa1, field=self.codefield, intval=self.code.id)
def test_results(self): codebook, codes = amcattest.create_test_codebook_with_codes() schema, codebook, strf, intf, codef, _, _ = amcattest.create_test_schema_with_fields(codebook=codebook, isarticleschema=True) sschema, codebook, sstrf, sintf, scodef, _, _ = amcattest.create_test_schema_with_fields(codebook=codebook) job = amcattest.create_test_job(unitschema=sschema, articleschema=schema, narticles=5) articles = list(job.articleset.articles.all()) c = amcattest.create_test_coding(codingjob=job, article=articles[0]) # test simple coding with a codebook code c.update_values({strf: "bla", intf: 1, codef: codes["A1b"].id}) self.assertEqual(self._get_results([job], {strf: {}, intf: {}, codef: dict(ids=True)}), [('bla', 1, codes["A1b"].id)]) # test multiple codings and parents c2 = amcattest.create_test_coding(codingjob=job, article=articles[1]) c2.update_values({strf: "blx", intf: 1, codef: codes["B1"].id}) self.assertEqual(set(self._get_results([job], {strf: {}, intf: {}, codef: dict(labels=True, parents=2)})), {('bla', 1, "A", "A1", "A1b"), ('blx', 1, "B", "B1", "B1")}) # test sentence result s = amcattest.create_test_sentence(article=articles[0]) sc = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s) sc.update_values({sstrf: "z", sintf: -1, scodef: codes["A"].id}) self.assertEqual(set(self._get_results([job], {strf: {}, sstrf: {}, sintf: {}}, export_level=2)), {('bla', 'z', -1), ('blx', None, None)})
def test_results(self): codebook, codes = amcattest.create_test_codebook_with_codes() schema, codebook, strf, intf, codef = amcattest.create_test_schema_with_fields(codebook=codebook, isarticleschema=True) sschema, codebook, sstrf, sintf, scodef = amcattest.create_test_schema_with_fields(codebook=codebook) job = amcattest.create_test_job(unitschema=sschema, articleschema=schema, narticles=5) articles = list(job.articleset.articles.all()) c = amcattest.create_test_coding(codingjob=job, article=articles[0]) # test simple coding with a codebook code c.update_values({strf:"bla", intf:1, codef:codes["A1b"].id}) self.assertEqual(self._get_results([job], {strf : {}, intf : {}, codef : dict(ids=True)}), [('bla', 1, codes["A1b"].id)]) # test multiple codings and parents c2 = amcattest.create_test_coding(codingjob=job, article=articles[1]) c2.update_values({strf:"blx", intf:1, codef:codes["B1"].id}) self.assertEqual(set(self._get_results([job], {strf : {}, intf : {}, codef : dict(labels=True, parents=2)})), {('bla', 1, "A", "A1", "A1b"), ('blx', 1, "B", "B1", "B1")}) # test sentence result s = amcattest.create_test_sentence(article=articles[0]) sc = amcattest.create_test_coding(codingjob=job, article=articles[0], sentence=s) sc.update_values({sstrf:"z", sintf:-1, scodef:codes["A"].id}) self.assertEqual(set(self._get_results([job], {strf : {}, sstrf : {}, sintf : {}}, export_level=2)), {('bla', 'z', -1), ('blx', None, None)})
def test_get_articles(self): from amcat.models import Sentence _get_articles = lambda a, s: list(get_articles(a, s)) # Should raise exception if sentences not in article article, sentences = self.create_test_sentences() s1 = Sentence.objects.filter(id=amcattest.create_test_sentence().id) self.assertRaises(ValueError, _get_articles, article, s1) # Should raise an exception if we try to split on headline self.assertRaises(ValueError, _get_articles, article, sentences.filter(parnr=1)) # Should return a "copy", with byline in "text" property arts = _get_articles(article, Sentence.objects.none()) map(lambda a: a.save(), arts) self.assertEquals(len(arts), 1) sbd.create_sentences(arts[0]) self.assertEquals([s.sentence for s in sentences[1:]], [s.sentence for s in arts[0].sentences.all()[1:]]) self.assertTrue("foo" in arts[0].text) # Should be able to split on byline self.assertEquals(2, len(_get_articles(article, sentences[1:2]))) a, b = _get_articles(article, sentences[4:5]) # Check if text on splitted articles contains expected self.assertTrue("Einde" not in a.text) self.assertTrue("Einde" in b.text)
def test_get_articles(self): from amcat.models import Sentence _get_articles = lambda a,s : list(get_articles(a,s)) # Should raise exception if sentences not in article article, sentences = self.create_test_sentences() s1 = Sentence.objects.filter(id=amcattest.create_test_sentence().id) self.assertRaises(ValueError, _get_articles, article, s1) # Should raise an exception if we try to split on headline self.assertRaises(ValueError, _get_articles, article, sentences.filter(parnr=1)) # Should return a "copy", with byline in "text" property arts = _get_articles(article, Sentence.objects.none()) map(lambda a : a.save(), arts) self.assertEquals(len(arts), 1) sbd.create_sentences(arts[0]) self.assertEquals( [s.sentence for s in sentences[1:]], [s.sentence for s in arts[0].sentences.all()[1:]] ) self.assertTrue("foo" in arts[0].text) # Should be able to split on byline self.assertEquals(2, len(_get_articles(article, sentences[1:2]))) a, b = _get_articles(article, sentences[4:5]) # Check if text on splitted articles contains expected self.assertTrue("Einde" not in a.text) self.assertTrue("Einde" in b.text)
def test_codedarticle(self): """Test whether CodedArticle coding retrieval works""" a = amcattest.create_test_coding() s = amcattest.create_test_sentence() a2 = amcattest.create_test_coding(sentence=s, codingjob=a.codingjob) a3 = amcattest.create_test_coding(sentence=s, codingjob=a.codingjob) ca = CodedArticle(a) self.assertEqual(set(ca.sentence_codings), set([a2, a3])) self.assertEqual(ca.coding, a)
def test_create(self): """Can we create an coding?""" schema2 = amcattest.create_test_schema() j = amcattest.create_test_job(unitschema=self.schema, articleschema=schema2) a = amcattest.create_test_coding(codingjob=j) self.assertIsNotNone(a) self.assertIn(a.article, j.articleset.articles.all()) self.assertEqual(a.schema, schema2) a2 = amcattest.create_test_coding(codingjob=j, sentence=amcattest.create_test_sentence()) self.assertEqual(a2.schema, self.schema)
def test_create(self): """Can we create an coding?""" schema2 = amcattest.create_test_schema() j = amcattest.create_test_job(unitschema=self.schema, articleschema=schema2) a = amcattest.create_test_coding(codingjob=j) self.assertIsNotNone(a) self.assertIn(a.coded_article.article, j.articleset.articles.all()) self.assertEqual(a.schema, schema2) a2 = amcattest.create_test_coding(codingjob=j, sentence=amcattest.create_test_sentence()) self.assertEqual(a2.schema, self.schema)
def todo_test_process_sentence(self): s = amcattest.create_test_analysis_sentence( sentence=amcattest.create_test_sentence(sentence="de groenste huizen") ) f = Frog(None) tokens, triples = f.process_sentence(s) tokens = list(tokens) lemmata = [token.lemma for token in tokens] self.assertEqual(lemmata, ["de", "groen", "huis"]) poscats = [token.pos for token in tokens] self.assertEqual(poscats, ["D", "A", "N"])
def todo_test_process_sentence(self): s = amcattest.create_test_analysis_sentence( sentence=amcattest.create_test_sentence( sentence="de groenste huizen")) f = Frog(None) tokens, triples = f.process_sentence(s) tokens = list(tokens) lemmata = [token.lemma for token in tokens] self.assertEqual(lemmata, ["de", "groen", "huis"]) poscats = [token.pos for token in tokens] self.assertEqual(poscats, ["D", "A", "N"])
def todo_test_triples(self): s = amcattest.create_test_analysis_sentence( sentence=amcattest.create_test_sentence( sentence="hij gaf hem een boek")) f = FrogTriples(None) triples = set(f.get_triples(s)) self.assertEqual( triples, { TripleValues(s, 0, 1, 'su'), TripleValues(s, 2, 1, 'obj2'), TripleValues(s, 3, 4, 'det'), TripleValues(s, 4, 1, 'obj1'), })
def todo_test_triples(self): s = amcattest.create_test_analysis_sentence( sentence=amcattest.create_test_sentence(sentence="hij gaf hem een boek") ) f = FrogTriples(None) triples = set(f.get_triples(s)) self.assertEqual( triples, { TripleValues(s, 0, 1, "su"), TripleValues(s, 2, 1, "obj2"), TripleValues(s, 3, 4, "det"), TripleValues(s, 4, 1, "obj1"), }, )
def setUp(self): from amcat.models.coding.coding import CodingValue # create a coding job set with a sensible schema and some articles to 'code' self.schema = amcattest.create_test_schema() self.codebook = amcattest.create_test_codebook() self.code = amcattest.create_test_code(label="CODED") self.codebook.add_code(self.code) texttype = CodingSchemaFieldType.objects.get(pk=1) inttype = CodingSchemaFieldType.objects.get(pk=2) codetype = CodingSchemaFieldType.objects.get(pk=5) create = CodingSchemaField.objects.create self.textfield = create(codingschema=self.schema, fieldnr=1, fieldtype=texttype, label="Text") self.intfield = create(codingschema=self.schema, fieldnr=2, fieldtype=inttype, label="Number") self.codefield = create(codingschema=self.schema, fieldnr=3, fieldtype=codetype, label="Code", codebook=self.codebook) self.users = [amcattest.create_test_user() for _x in range(2)] self.articles, self.jobs, self.asets = [], [], [] for i, user in enumerate([0, 0, 0, 0, 1]): aset = amcattest.create_test_set(articles=2 * (i+1)) self.articles += list(aset.articles.all()) self.asets.append(aset) job = amcattest.create_test_job(articleschema=self.schema, unitschema=self.schema, coder = self.users[user], articleset=aset) self.jobs.append(job) self.an1 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0]) self.an2 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[1]) self.an2.set_status(STATUS_COMPLETE) self.an2.comments = 'Makkie!' self.an2.save() sent = amcattest.create_test_sentence() self.sa1 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0], sentence=sent) self.sa2 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0], sentence=sent) create = CodingValue.objects.create create(coding=self.sa1, field=self.intfield, intval=1) create(coding=self.sa1, field=self.textfield, strval="bla") create(coding=self.sa2, field=self.textfield, strval="blx") create(coding=self.sa1, field=self.codefield, intval=self.code.id)
def test_nqueries_table_sentence_codings(self): """Check for efficient retrieval of codings""" from amcat.models.coding.coding import CodingValue from amcat.tools.djangotoolkit import list_queries, query_list_to_table ca = CodedArticle(self.an1) # create 1000 sentence annotations for i in range(1): sent = amcattest.create_test_sentence() sa = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0], sentence=sent) CodingValue.objects.create(coding=sa, field=self.intfield, intval=i) language = ca.codingjob.coder.userprofile.language with list_queries() as l: t = get_table_sentence_codings_article(ca, language) t.output() # force getting all values #query_list_to_table(l, output=print, maxqlen=190) self.assertTrue(len(l) <= 20, "Retrieving table used %i queries" % len(l))
def test_process(self): from amcat.models.token import TokenValues class X(AnalysisScript): def __init__(self): super(X, self).__init__(analysis=None, tokens=True, triples=False) def get_tokens(self, analysis_sentence, memo=None): for i, x in enumerate( analysis_sentence.sentence.sentence.split()): yield TokenValues(analysis_sentence, i + 1, x, None, None, None, None) a = amcattest.create_test_analysis_sentence( sentence=amcattest.create_test_sentence( sentence="dit is een test")) tokens, triples = list(X().process_sentence(a)) print(tokens) self.assertIsNone(triples) self.assertEqual( list(tokens)[0], (TokenValues(a, 1, "dit", None, None, None, None)))