def test_medium(self): import functools header = ('kop', 'datum', 'tekst', 'med') data = [('kop1', '2001-01-01', '', 'Bla')] test = functools.partial(_run_test_csv, header, data, text="tekst", headline="kop", date="datum") articles = test(medium_name=None, medium="med") self.assertEqual(len(articles), 1) self.assertEqual(articles[0].medium.name, "Bla") articles = test(medium_existing=Medium.get_or_create("1").id) self.assertEqual(len(articles), 1) self.assertEqual(articles[0].medium.name, "1") articles = test(medium_existing=Medium.get_or_create("1").id, medium="med") self.assertEqual(len(articles), 1) self.assertEqual(articles[0].medium.name, "Bla") articles = test(medium_name="bla2", medium="med") self.assertEqual(len(articles), 1) self.assertEqual(articles[0].medium.name, "Bla") articles = test(medium_name="bla2", medium_existing=Medium.get_or_create("2").id) self.assertEqual(len(articles), 1) self.assertEqual(articles[0].medium.name, "2")
def to_internal_value(self, data): try: int(data) except ValueError: return Medium.get_or_create(data) else: return super(MediumField, self).to_internal_value(data)
def test_bulk_insert_returning_ids(self): m1 = Medium(name="test_bi_1") m2 = Medium(name="test_bi_2") self.assertIsNone(m1.id) self.assertIsNone(m2.id) new_objects = bulk_insert_returning_ids([m1, m2]) self.assertIsNone(m1.id) self.assertIsNone(m2.id) self.assertIsNotNone(new_objects[0].id) self.assertIsNotNone(new_objects[1].id) self.assertEqual("test_bi_1", Medium.objects.get(id=new_objects[0].id).name) self.assertEqual("test_bi_2", Medium.objects.get(id=new_objects[1].id).name)
def _create_medium(self, source): from amcat.models import Language try: Medium.objects.get(name__iexact=source) except Medium.DoesNotExist: l = Language.objects.get(id=1) Medium(name=source, abbrev=source[0:5], circulation=1, language=l).save()
def _scrape_unit(self, row): row = {k:v.decode("utf-8") for k,v in row.iteritems()} query = row.pop('zoekopdracht') self.queries.add(query) medium = Medium.get_or_create(row.pop('type bron')) date = row.pop('datum') date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M") headline = row.pop('titel') pagenr = row.pop('bereik') or None text = row.pop('bericht tekst') url = row.pop('url') author=row.pop('auteur') metastring = json.dumps(row) a = Article(headline=headline, pagenr=pagenr, text=text, date=date, medium=medium, url=url, author=author, metastring=metastring) yield a
def restore_fields(self, data, files): # convert media from name to id, if needed data = data.copy() # make data mutable if 'medium' in data: try: int(data['medium']) except ValueError: if not hasattr(self, 'media'): self.media = {} m = data['medium'] if m not in self.media: self.media[m] = Medium.get_or_create(m).id data['medium'] = self.media[m] # add time part to date, if needed if 'date' in data and len(data['date']) == 10: data['date'] += "T00:00" if 'project' not in data: data['project'] = self.context['view'].project.id return super(ArticleSerializer, self).restore_fields(data, files)
def restore_fields(self, data, files): # convert media from name to id, if needed data = data.copy() # make data mutable if 'medium' in data: try: int(data['medium']) except ValueError: m = data['medium'] if m not in self.medium_ids: mid = Medium.get_or_create(m).id self.medium_ids[m] = mid self.medium_names[mid] = m data['medium'] = self.medium_ids[m] # add time part to date, if needed if 'date' in data and len(data['date']) == 10: data['date'] += "T00:00" if 'project' not in data: data['project'] = self.context['view'].project.id return super(ArticleSerializer, self).restore_fields(data, files)
def parse_document(self, paragraphs): metadata, text = parse_page(paragraphs) metadata["medium"] = Medium.get_or_create(metadata["medium"]) return Article(text=text, **metadata)