Exemplo n.º 1
0
 def _test_parse_file(self,
                      file,
                      n_articles,
                      min_article_length=20,
                      expected_articles=()):
     """
     Tests the parsing of a file using the scrape_2 format.
     @param expected_articles    A sequence of {field: value} dicts, each with at least a 'title' field.
     """
     field_map = self._create_id_field_map("title", "text", "medium",
                                           "date")
     script = BZK(field_map=field_map,
                  file=file,
                  project=self.project.id,
                  encoding="UTF-8")
     result_set = script.run()
     self.assertIsInstance(result_set, ArticleSet)
     articles = list(result_set.articles.all())
     self.assertEqual(len(articles), n_articles)
     article_map = {a.title: a for a in articles}
     for fields in expected_articles:
         title = fields['title']
         article = article_map[title]
         self.assertGreaterEqual(len(article.text), min_article_length)
         for field, value in fields.items():
             self.assertEqual(
                 article.get_property(field), value,
                 "Expected value '{}' for article field '{}'".format(
                     value, field))
Exemplo n.º 2
0
    def setUp(self):
        from django.core.files import File
        import os.path

        self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk')
        self.bzk = BZK(project=amcattest.create_test_project().id,
                       file=File(open(os.path.join(self.dir, 'test.html'))),
                       articlesets=[amcattest.create_test_set().id])
        self.result = ArticleSet.objects.get(
            id=self.bzk.run()[0]).articles.all()
Exemplo n.º 3
0
 def test_get_fields(self):
     fields = set(
         f.label
         for f in BZK.get_fields(self.file_scrape1, encoding="utf-8"))
     self.assertIn("title", fields)
     self.assertIn("date", fields)
     self.assertIn("text", fields)
Exemplo n.º 4
0
    def setUp(self):
        from django.core.files import File
        import os.path

        self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk')
        self.bzk = BZK(project=amcattest.create_test_project().id,
                       file=File(open(os.path.join(self.dir, 'test.html'))),
                       articlesets=[amcattest.create_test_set().id])
        self.result = ArticleSet.objects.get(id=self.bzk.run()[0]).articles.all()
Exemplo n.º 5
0
class TestBZK(amcattest.AmCATTestCase):
    def setUp(self):
        from django.core.files import File
        import os.path

        self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk')
        self.bzk = BZK(project=amcattest.create_test_project().id,
                       file=File(open(os.path.join(self.dir, 'test.html'))),
                       articlesets=[amcattest.create_test_set().id])
        self.result = ArticleSet.objects.get(id=self.bzk.run()[0]).articles.all()

    def todo_test_scrape_unit(self):
        self.assertTrue(self.result)

    def todo_test_scrape_file(self):
        must_props = ('headline', 'text', 'medium', 'date')
        must_props = [[getattr(a, prop) for a in self.result] for prop in must_props]

        for proplist in must_props:
            self.assertTrue(all(proplist))
Exemplo n.º 6
0
class TestBZK(amcattest.AmCATTestCase):
    def setUp(self):
        from django.core.files import File
        import os.path

        self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk')
        self.bzk = BZK(project=amcattest.create_test_project().id,
                       file=File(open(os.path.join(self.dir, 'test.html'))),
                       articlesets=[amcattest.create_test_set().id])
        self.result = ArticleSet.objects.get(
            id=self.bzk.run()[0]).articles.all()

    def todo_test_scrape_unit(self):
        self.assertTrue(self.result)

    def todo_test_scrape_file(self):
        must_props = ('headline', 'text', 'medium', 'date')
        must_props = [[getattr(a, prop) for a in self.result]
                      for prop in must_props]

        for proplist in must_props:
            self.assertTrue(all(proplist))