def handle(self, *args, **options):
        limit = options['limit']

        instance = None
        try:
            instance = Instance.objects.get(label=options['instance'])
        except Instance.DoesNotExist:
            raise CommandError("Instance specified not found (%s)" % options['instance'])

        sources = Source.objects.filter(last_processing_success__isnull = False)

        if not( options['reimport'] or options['id']):
            sources = sources.filter(sayit_section__isnull = True)

        if options['id']:
            sources = sources.filter(id = options['id'])

        sections = []

        hansard_tag, hansard_tag_created = Tag.objects.get_or_create(instance=instance, name="hansard")

        sources = sources[:limit] if limit else sources.all()
        for s in sources:

            path = s.xml_file_path()
            if not path:
                continue

            importer = ImportAkomaNtoso( instance=instance,
                popit_url='http://za-peoples-assembly.popit.mysociety.org/api/v0.1/')
            try:
                self.stdout.write("TRYING %s\n" % path)
                section = importer.import_document(path)
            except Exception as e:
                self.stderr.write('WARN: failed to import %d: %s' %
                    (s.id, str(e)))
                continue

            sections.append(section)
            s.sayit_section = section
            s.last_sayit_import = datetime.datetime.now(pytz.utc)
            s.save()

            for speech in section.descendant_speeches():
                speech.tags.add(hansard_tag)

            # Get or create the sections above the one we just created and put it in there
            parent = Section.objects.get_or_create_with_parents(instance=instance, titles=s.section_parent_titles)
            section.parent = parent
            section.save()

        self.stdout.write('Imported %d / %d sections\n' %
            (len(sections), len(sources)))

        self.stdout.write( str( [s.id for s in sections] ) )
        self.stdout.write( '\n' )
Пример #2
0
    def test_empty_title(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(), {
                'Untitled': ['<p>Hello</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
            })

        ImportAkomaNtoso(
            instance=self.instance, commit=True,
            clobber='skip').import_document(
                'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(), {
                'Untitled': ['<p>Hello</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
            })

        ImportAkomaNtoso(
            instance=self.instance, commit=True,
            clobber='merge').import_document(
                'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(), {
                'Untitled': ['<p>Hello</p>', '<p>Hello</p>', '<p>Howdy</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>', '<p>Bye</p>'],
            })

        ImportAkomaNtoso(
            instance=self.instance, commit=True,
            clobber='replace').import_document(
                'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(), {
                'Untitled': ['<p>Hello</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
            })

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(), {
                'Untitled': ['<p>Hello</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
                'Untitled': ['<p>Hello</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
            })
Пример #3
0
    def form_valid(self, form):
        try:
            importer = ImportAkomaNtoso(
                instance=self.request.instance,
                commit=True,
                clobber=form.cleaned_data.get('existing_sections'),
            )

            stats = importer.import_document(form.cleaned_data['location'])
        except:
            form._errors[NON_FIELD_ERRORS] = form.error_class(
                [_('Sorry - something went wrong with the import')])
            return self.form_invalid(form)

        speakers = stats.get(Speaker, 0)
        sections = stats.get(Section, 0)
        speeches = stats.get(Speech, 0)
        success = speakers or sections or speeches

        if success:
            messages.add_message(
                self.request, messages.SUCCESS,
                _('Created: ') + ', '.join((
                    ungettext(
                        "%(speakers)d speaker",
                        "%(speakers)d speakers",
                        speakers,
                    ) % {
                        'speakers': speakers
                    },
                    ungettext(
                        "%(sections)d section",
                        "%(sections)d sections",
                        sections,
                    ) % {
                        'sections': sections
                    },
                    ungettext(
                        "%(speeches)d speech",
                        "%(speeches)d speeches",
                        speeches,
                    ) % {
                        'speeches': speeches
                    },
                )))
        else:
            messages.add_message(
                self.request,
                messages.INFO,
                _('Nothing new to import.'),
            )

        return super(AkomaNtosoImportView, self).form_valid(form)
Пример #4
0
    def form_valid(self, form):
        try:
            importer = ImportAkomaNtoso(
                instance=self.request.instance,
                commit=True,
                clobber=form.cleaned_data.get('existing_sections'),
                )

            stats = importer.import_document(form.cleaned_data['location'])
        except:
            form._errors[NON_FIELD_ERRORS] = form.error_class(
                [_('Sorry - something went wrong with the import')])
            return self.form_invalid(form)

        speakers = stats.get(Speaker, 0)
        sections = stats.get(Section, 0)
        speeches = stats.get(Speech, 0)
        success = speakers or sections or speeches

        if success:
            messages.add_message(
                self.request,
                messages.SUCCESS,
                _('Created: ') + ', '.join(
                    (
                        ungettext(
                            "%(speakers)d speaker",
                            "%(speakers)d speakers",
                            speakers,
                            ) % {'speakers': speakers},
                        ungettext(
                            "%(sections)d section",
                            "%(sections)d sections",
                            sections,
                            ) % {'sections': sections},
                        ungettext(
                            "%(speeches)d speech",
                            "%(speeches)d speeches",
                            speeches,
                            ) % {'speeches': speeches},
                        )
                    )
                )
        else:
            messages.add_message(
                self.request,
                messages.INFO,
                _('Nothing new to import.'),
                )

        return super(AkomaNtosoImportView, self).form_valid(form)
Пример #5
0
class AkomaNtosoImportTestCase(InstanceTestCase):
    def setUp(self):
        super(AkomaNtosoImportTestCase, self).setUp()
        self.importer = ImportAkomaNtoso(instance=self.instance, commit=True)

    def test_import_sample_file(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/Debate_Bungeni_1995-10-31.xml')

        # To get us started, let's just check that we get the right kind of
        # speech in the right order.
        self.assertEqual(
            [x.type for x in Speech.objects.all()],
            [u'scene', u'other', u'narrative', u'speech', u'question',
             u'summary', u'speech', u'answer', u'narrative', u'speech',
             u'narrative']
            )

    def test_xpath_preface_elements(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_xpath.xml')
        self.assertEqual(
            [ x.title for x in Section.objects.all() ],
            [ 'This is the title' ]
        )
        self.assertEqual(
            [ x.start_date for x in Speech.objects.all() ],
            [ datetime.date(2014, 7, 24) ]
        )

    def test_unicode_character(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_unicode_character.xml')

        self.assertEqual(
            [ x.type for x in Speech.objects.all() ],
            [ 'other' ]
            )

    def test_blank_speakers(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_blank_speakers.xml')

        speaker = Speaker.objects.get(name='Speaker')
        speeches = Speech.objects.all()
        speeches_s = Speech.objects.filter(type='speech')
        self.assertEqual(speeches.count(), speeches_s.count())

        for i in range(4):
            s = speaker if i%2 else None
            sd = 'Speaker' if i>1 else None
            self.assertEqual(speeches[i].speaker, s)
            self.assertEqual(speeches[i].speaker_display, sd)
Пример #6
0
    def test_already_imported(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_xpath.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Hello</p>'])]
            )

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Hello</p>'])]
            )

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Hello</p>', '<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ('This is the title', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])
Пример #7
0
    def test_not_already_imported(self):
        ImportAkomaNtoso(instance=self.instance, commit=True,
                         clobber='skip').import_document(
                             'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(self._list_sections(), {
            'This is the title': ['<p>Howdy</p>'],
            'Conclusions': ['<p>Bye</p>'],
        })
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance, commit=True,
                         clobber='merge').import_document(
                             'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(self._list_sections(), {
            'This is the title': ['<p>Howdy</p>'],
            'Conclusions': ['<p>Bye</p>'],
        })
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance,
                         commit=True,
                         clobber='replace').import_document(
                             'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(self._list_sections(), {
            'This is the title': ['<p>Howdy</p>'],
            'Conclusions': ['<p>Bye</p>'],
        })
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(self._list_sections(), {
            'This is the title': ['<p>Howdy</p>'],
            'Conclusions': ['<p>Bye</p>'],
        })
        Section.objects.all().delete()
Пример #8
0
    def test_import(self):
        document_path = os.path.join(self._in_fixtures, 'NA200912.xml')

        an = ImportAkomaNtoso(instance=self.instance, commit=True, popit_url=popit_url, title_case=True)
        section = an.import_document(document_path)

        self.assertTrue(section is not None)

        # Check that all the sections have correct looking titles
        for sub in section.children.all():
            self.assertFalse("Member'S" in sub.title)

        speakers = Speaker.objects.all()
        resolved = filter(lambda s: s.person != None, speakers)
        THRESHOLD=48

        logging.info(
                "%d above threshold %d/%d?"
                % (len(resolved), THRESHOLD, len(speakers)))

        self.assertTrue(
                len(resolved) >= THRESHOLD,
                "%d above threshold %d/%d"
                % (len(resolved), THRESHOLD, len(speakers)))
Пример #9
0
 def setUp(self):
     super(AkomaNtosoImportTestCase, self).setUp()
     self.importer = ImportAkomaNtoso(instance=self.instance, commit=True)
Пример #10
0
class AkomaNtosoImportTestCase(InstanceTestCase):
    def setUp(self):
        super(AkomaNtosoImportTestCase, self).setUp()
        self.importer = ImportAkomaNtoso(instance=self.instance, commit=True)

    def _list_sections(self):
        # Make mapping {section name: list of its speeches' texts}
        sections = {section.id: [] for section in Section.objects.all()}
        for speech in Speech.objects.all():
            if speech.section_id:
                sections[speech.section_id].append(speech.text)
        return {
            section.title: sections[section.id]
            for section in Section.objects.all()
        }

    def test_import_sample_file(self):
        self.importer.import_document(
            'speeches/tests/data/fake_http/Debate_Bungeni_1995-10-31.xml')

        # To get us started, let's just check that we get the right kind of
        # speech in the right order.
        self.assertEqual([x.type for x in Speech.objects.all()], [
            u'scene', u'other', u'narrative', u'speech', u'question',
            u'summary', u'speech', u'answer', u'narrative', u'speech',
            u'narrative'
        ])

    def test_already_imported(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_xpath.xml')
        self.assertEqual(self._list_sections(),
                         {'This is the title': ['<p>Hello</p>']})

        ImportAkomaNtoso(instance=self.instance, commit=True,
                         clobber='skip').import_document(
                             'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(self._list_sections(),
                         {'This is the title': ['<p>Hello</p>']})

        ImportAkomaNtoso(instance=self.instance, commit=True,
                         clobber='merge').import_document(
                             'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(), {
                'This is the title': ['<p>Hello</p>', '<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
            })

        ImportAkomaNtoso(instance=self.instance,
                         commit=True,
                         clobber='replace').import_document(
                             'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(self._list_sections(), {
            'This is the title': ['<p>Howdy</p>'],
            'Conclusions': ['<p>Bye</p>'],
        })

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(), {
                'This is the title': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
                'This is the title': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
            })

    def test_not_already_imported(self):
        ImportAkomaNtoso(instance=self.instance, commit=True,
                         clobber='skip').import_document(
                             'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(self._list_sections(), {
            'This is the title': ['<p>Howdy</p>'],
            'Conclusions': ['<p>Bye</p>'],
        })
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance, commit=True,
                         clobber='merge').import_document(
                             'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(self._list_sections(), {
            'This is the title': ['<p>Howdy</p>'],
            'Conclusions': ['<p>Bye</p>'],
        })
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance,
                         commit=True,
                         clobber='replace').import_document(
                             'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(self._list_sections(), {
            'This is the title': ['<p>Howdy</p>'],
            'Conclusions': ['<p>Bye</p>'],
        })
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(self._list_sections(), {
            'This is the title': ['<p>Howdy</p>'],
            'Conclusions': ['<p>Bye</p>'],
        })
        Section.objects.all().delete()

    def test_empty_title(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(), {
                'Untitled': ['<p>Hello</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
            })

        ImportAkomaNtoso(
            instance=self.instance, commit=True,
            clobber='skip').import_document(
                'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(), {
                'Untitled': ['<p>Hello</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
            })

        ImportAkomaNtoso(
            instance=self.instance, commit=True,
            clobber='merge').import_document(
                'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(), {
                'Untitled': ['<p>Hello</p>', '<p>Hello</p>', '<p>Howdy</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>', '<p>Bye</p>'],
            })

        ImportAkomaNtoso(
            instance=self.instance, commit=True,
            clobber='replace').import_document(
                'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(), {
                'Untitled': ['<p>Hello</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
            })

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(), {
                'Untitled': ['<p>Hello</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
                'Untitled': ['<p>Hello</p>'],
                'Untitled': ['<p>Howdy</p>'],
                'Conclusions': ['<p>Bye</p>'],
            })

    def test_empty_docDate(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_empty_docDate.xml')
        self.assertEqual(
            [(x.start_date, x.title, x.source_url)
             for x in Section.objects.all()],
            [(datetime.date(2012, 3, 7), 'Title', 'http://example.org')])

    def test_xpath_preface_elements(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_xpath.xml')
        self.assertEqual([x.title for x in Section.objects.all()],
                         ['This is the title'])
        self.assertEqual([x.start_date for x in Speech.objects.all()],
                         [datetime.date(2014, 7, 24)])

    def test_unicode_character(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_unicode_character.xml')

        self.assertEqual([x.type for x in Speech.objects.all()], ['other'])

    def test_blank_speakers(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_blank_speakers.xml')

        speaker = Speaker.objects.get(name='Speaker')
        speeches = Speech.objects.all()
        speeches_s = Speech.objects.filter(type='speech')
        self.assertEqual(speeches.count(), speeches_s.count())

        for i in range(4):
            s = speaker if i % 2 else None
            sd = 'Speaker' if i > 1 else None
            self.assertEqual(speeches[i].speaker, s)
            self.assertEqual(speeches[i].speaker_display, sd)

    def test_import_remote_file(self):
        self.importer.import_document(
            'http://example.com/Debate_Bungeni_1995-10-31.xml')

        # To get us started, let's just check that we get the right kind of
        # speech in the right order.
        self.assertEqual([x.type for x in Speech.objects.all()], [
            u'scene', u'other', u'narrative', u'speech', u'question',
            u'summary', u'speech', u'answer', u'narrative', u'speech',
            u'narrative'
        ])
Пример #11
0
 def setUp(self):
     super(AkomaNtosoImportTestCase, self).setUp()
     self.importer = ImportAkomaNtoso(instance=self.instance, commit=True)
Пример #12
0
class AkomaNtosoImportTestCase(InstanceTestCase):
    def setUp(self):
        super(AkomaNtosoImportTestCase, self).setUp()
        self.importer = ImportAkomaNtoso(instance=self.instance, commit=True)

    def _list_sections(self):
        # Make list [section name: list of its speeches' texts}
        sections = {section.id: [] for section in Section.objects.all()}
        for speech in Speech.objects.all():
            if speech.section_id:
                sections[speech.section_id].append(speech.text)
        return [(section.title, sections[section.id]) for section in Section.objects.all()]

    def test_import_sample_file(self):
        self.importer.import_document(
            'speeches/tests/data/fake_http/Debate_Bungeni_1995-10-31.xml')

        # To get us started, let's just check that we get the right kind of
        # speech in the right order.
        self.assertEqual(
            [x.type for x in Speech.objects.all()],
            [u'scene', u'other', u'narrative', u'speech', u'question',
             u'summary', u'speech', u'answer', u'narrative', u'speech',
             u'narrative']
            )

    def test_already_imported(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_xpath.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Hello</p>'])]
            )

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Hello</p>'])]
            )

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Hello</p>', '<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ('This is the title', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])

    def test_not_already_imported(self):
        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            'speeches/tests/data/fake_http/test_clobber.xml')
        self.assertEqual(
            self._list_sections(),
            [('This is the title', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])
        Section.objects.all().delete()

    def test_empty_title(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(),
            [('Untitled', ['<p>Hello</p>']),
             ('Untitled', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document(
            'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(),
            [('Untitled', ['<p>Hello</p>']),
             ('Untitled', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document(
            'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(),
            [('Untitled', ['<p>Hello</p>', '<p>Hello</p>', '<p>Howdy</p>']),
             ('Untitled', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>', '<p>Bye</p>']),
             ])

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document(
            'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(),
            [('Untitled', ['<p>Hello</p>']),
             ('Untitled', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            'speeches/fixtures/test_inputs/test_empty_title.xml')
        self.assertEqual(
            self._list_sections(),
            [('Untitled', ['<p>Hello</p>']),
             ('Untitled', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ('Untitled', ['<p>Hello</p>']),
             ('Untitled', ['<p>Howdy</p>']),
             ('Conclusions', ['<p>Bye</p>']),
             ])

    def test_empty_docDate(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_empty_docDate.xml')
        self.assertEqual(
            [(x.start_date, x.title, x.source_url) for x in Section.objects.all()],
            [(datetime.date(2012, 3, 7), 'Title', 'http://example.org')]
        )

    def test_xpath_preface_elements(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_xpath.xml')
        self.assertEqual(
            [x.title for x in Section.objects.all()],
            ['This is the title']
        )
        self.assertEqual(
            [x.start_date for x in Speech.objects.all()],
            [datetime.date(2014, 7, 24)]
        )

    def test_unicode_character(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_unicode_character.xml')

        self.assertEqual(
            [x.type for x in Speech.objects.all()],
            ['other']
            )

    def test_blank_speakers(self):
        self.importer.import_document(
            'speeches/fixtures/test_inputs/test_blank_speakers.xml')

        speaker = Speaker.objects.get(name='Speaker')
        speeches = Speech.objects.all()
        speeches_s = Speech.objects.filter(type='speech')
        self.assertEqual(speeches.count(), speeches_s.count())

        for i in range(4):
            s = speaker if i % 2 else None
            sd = 'Speaker' if i > 1 else None
            self.assertEqual(speeches[i].speaker, s)
            self.assertEqual(speeches[i].speaker_display, sd)

    def test_import_remote_file(self):
        self.importer.import_document(
            'http://example.com/Debate_Bungeni_1995-10-31.xml')

        # To get us started, let's just check that we get the right kind of
        # speech in the right order.
        self.assertEqual(
            [x.type for x in Speech.objects.all()],
            [u'scene', u'other', u'narrative', u'speech', u'question',
             u'summary', u'speech', u'answer', u'narrative', u'speech',
             u'narrative']
            )
Пример #13
0
class AkomaNtosoImportTestCase(InstanceTestCase):
    def setUp(self):
        super(AkomaNtosoImportTestCase, self).setUp()
        self.importer = ImportAkomaNtoso(instance=self.instance, commit=True)

    def test_import_sample_file(self):
        self.importer.import_document("speeches/fixtures/test_inputs/Debate_Bungeni_1995-10-31.xml")

        # To get us started, let's just check that we get the right kind of
        # speech in the right order.
        self.assertEqual(
            [x.type for x in Speech.objects.all()],
            [
                u"scene",
                u"other",
                u"narrative",
                u"speech",
                u"question",
                u"summary",
                u"speech",
                u"answer",
                u"narrative",
                u"speech",
                u"narrative",
            ],
        )

    def test_import_remote_file(self):
        self.importer.import_document(
            "http://examples.akomantoso.org/php/download.php?file=Debate_Bungeni_1995-10-31.xml"
        )  # noqa

        # To get us started, let's just check that we get the right kind of
        # speech in the right order.
        self.assertEqual(
            [x.type for x in Speech.objects.all()],
            [
                u"scene",
                u"other",
                u"narrative",
                u"speech",
                u"question",
                u"summary",
                u"speech",
                u"answer",
                u"narrative",
                u"speech",
                u"narrative",
            ],
        )

    def test_already_imported(self):
        self.importer.import_document("speeches/fixtures/test_inputs/test_xpath.xml")

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber=False).import_document(
            "speeches/fixtures/test_inputs/test_clobber.xml"
        )
        self.assertEqual([x.text for x in Speech.objects.all()], ["<p>Hello</p>"])

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber=True).import_document(
            "speeches/fixtures/test_inputs/test_clobber.xml"
        )
        self.assertEqual([x.text for x in Speech.objects.all()], ["<p>Howdy</p>"])

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            "speeches/fixtures/test_inputs/test_clobber.xml"
        )
        self.assertEqual([x.text for x in Speech.objects.all()], ["<p>Howdy</p>", "<p>Howdy</p>"])

    def test_not_already_imported(self):
        ImportAkomaNtoso(instance=self.instance, commit=True, clobber=False).import_document(
            "speeches/fixtures/test_inputs/test_xpath.xml"
        )
        self.assertEqual([x.title for x in Section.objects.all()], ["This is the title"])
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber=True).import_document(
            "speeches/fixtures/test_inputs/test_xpath.xml"
        )
        self.assertEqual([x.title for x in Section.objects.all()], ["This is the title"])
        Section.objects.all().delete()

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            "speeches/fixtures/test_inputs/test_xpath.xml"
        )
        self.assertEqual([x.title for x in Section.objects.all()], ["This is the title"])
        Section.objects.all().delete()

    def test_empty_title(self):
        self.importer.import_document("speeches/fixtures/test_inputs/test_empty_title.xml")

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber=False).import_document(
            "speeches/fixtures/test_inputs/test_empty_title.xml"
        )
        self.assertEqual([x.title for x in Section.objects.all()], ["", ""])

        ImportAkomaNtoso(instance=self.instance, commit=True, clobber=True).import_document(
            "speeches/fixtures/test_inputs/test_empty_title.xml"
        )
        self.assertEqual([x.title for x in Section.objects.all()], ["", "", ""])

        ImportAkomaNtoso(instance=self.instance, commit=True).import_document(
            "speeches/fixtures/test_inputs/test_empty_title.xml"
        )
        self.assertEqual([x.title for x in Section.objects.all()], ["", "", "", ""])

    def test_xpath_preface_elements(self):
        self.importer.import_document("speeches/fixtures/test_inputs/test_xpath.xml")
        self.assertEqual([x.title for x in Section.objects.all()], ["This is the title"])
        self.assertEqual([x.start_date for x in Speech.objects.all()], [datetime.date(2014, 7, 24)])

    def test_unicode_character(self):
        self.importer.import_document("speeches/fixtures/test_inputs/test_unicode_character.xml")

        self.assertEqual([x.type for x in Speech.objects.all()], ["other"])

    def test_blank_speakers(self):
        self.importer.import_document("speeches/fixtures/test_inputs/test_blank_speakers.xml")

        speaker = Speaker.objects.get(name="Speaker")
        speeches = Speech.objects.all()
        speeches_s = Speech.objects.filter(type="speech")
        self.assertEqual(speeches.count(), speeches_s.count())

        for i in range(4):
            s = speaker if i % 2 else None
            sd = "Speaker" if i > 1 else None
            self.assertEqual(speeches[i].speaker, s)
            self.assertEqual(speeches[i].speaker_display, sd)