def to_xml(self):
     doc = BioDesDoc()
     bioport_id = self.get_biographies()[0].get_bioport_id()
     # add the basic onfirmation
     doc.from_args(
         naam_publisher='Het Biografisch Portaal',
         url_biografie='http://www.biografischportaal.nl/persoon/%s' % bioport_id,
         url_publisher='http://www.biografischportaal.nl',
         namen=self.get_names(),
         bioport_id=bioport_id,
         sex=self.get_value('sex'),
     )
     # add the events
     for event_type in ['birth', 'death', 'funeral', 'baptism', 'floruit']:
         event = self.get_event(event_type)
         if event is not None:
             doc._add_event_element(event)
     # add illustrations
     for ill in self.get_illustrations():
         doc._add_figure(url=ill.source_url, head=ill.caption)
     # add links to all sources
     for bio in self.get_biographies():
         if bio.get_source().id != 'bioport':
             # construct a bibl element
             bibl = SubElement(doc.get_element_biography(), 'bibl')
             publisher = SubElement(bibl, 'publisher')
             publisher.text = bio.get_value('name_publisher')
             ref = SubElement(bibl, 'ref')
             ref.attrib['target'] = bio.get_value('url_biography')
             author = bio.get_value('author')
             if author:
                 for s in author:
                     el_author = SubElement(bibl, 'author')
                     el_author.text = s
     return doc
Пример #2
0
    def test_from_args(self):
        kw = {
            'url_biografie':'http://www.gerbrandy.com/bio?a&b',
            'url_publisher':'http://www.gerbrandy.com',
            'naam_publisher':'Website van Jelle',
            'titel_biografie':'Bio van Jelle',
            'naam':'Jelle Gerbrandy',
            'local_id': '123',
        }

        doc = BioDesDoc()
        doc.from_args(**kw)
        self.assertEqual(doc.get_idno(), '123')
Пример #3
0
 def process(self):
     names = []
     for file in os.listdir('in'):
         tree = etree.parse("in/" + file)
         entries = tree.xpath("//item")
         for index, person in enumerate(entries, 1):
             self.total += 2
             try:
                 name1 = person.xpath('title/from')[0].text
             except IndexError:
                 name1 = None
             try:
                 name2 = person.xpath('title/to')[0].text
             except IndexError:
                 name2 = None
                 
             
             for name in (name1, name2):
                 if name == "...." or not name:
                     self.skip("null name")
                     continue
                 if name.replace('.', '').strip() == "":
                     self.skip("null name")
                     continue
                 if name in names:
                     self.skip("dupe name")
                     continue
                 names.append(name)
                
     for index, name in enumerate(names, 1):
         base_production = "http://www.inghist.nl/retroboeken/archives/"
         anchor = "#accessor=toc&accessor_href=toc%3Fcorrespondent%253Austring%253Autf-8%3D"
         encoded_name = urllib.quote(urllib.quote(name.encode('utf8')))
         url = base_production + \
               anchor + \
               encoded_name 
         bdes = BioDesDoc()
         args = dict(naam = name,
                     naam_publisher = "Instituut voor Nederlandse Geschiedenis",
                     url_publisher = "http://www.inghist.nl/",
                     url_biografie = url,
                    )               
         bdes.from_args(**args)
         self.write_file(bdes, index)
Пример #4
0
    def write(self, names):
        names.sort()
        self.total = len(names)
        for index, name in enumerate(names):
            index += 1
           
            name = sanitize_name(name)
            if self.name_already_processed(name):
                self.skip("dupe name")
                continue
            # URL
            encoded_name = urllib.quote(urllib.quote(name.encode('utf8')))
            url = "http://www.inghist.nl/retroboeken/gachard/#accessor=toc&accessor_href=toc%3FSearchSource%253Austring%253Autf-8%3D%26van_aan%3D%26correspondent%253Austring%253Autf-8%3D" + encoded_name

            bdes = BioDesDoc()
            args = dict(naam = name,
                        naam_publisher = "XXX",
                        url_publisher = "http://XXX.nl",
                        url_biografie = url,
                       )               
            bdes.from_args(**args)
            self.write_file(bdes, index)
Пример #5
0
    def test_create_some_samples(self, **args):
        #create a very simple file
        kw = {
            'url_biografie':'http://www.gerbrandy.com/bio',
            'url_publisher':'http://www.gerbrandy.com',
            'naam_publisher':'Website van Jelle',
            'titel_biografie':'Bio van Jelle',
            'naam':'Jelle Gerbrandy',
        }

        doc = BioDesDoc()
        doc.from_args(**kw)
        doc.to_file('biodes10_minimal.xml')

        #the most complex case includes everyting
        kw = {
            'bioport_id':'biodesid',   
            'url_biografie':'http://url_van_de_biografie',
            'url_publisher':'http://url_van_de_publisher',
            'titel_biografie':'titel van de biografie',
            'naam_publisher':'naam van depublisher',
#            'naam':'naam',
            'auteur':'auteur',
    #        'beroep':'beroep',
            'prepositie':'prepositie',
            'voornaam':'voornaam',
            'intrapositie':'intrapositie', 
            'geslachtsnaam': 'geslachtsnaam',
            'postpositie':'postpositie',
            'laatst_veranderd':'2009-11-11',   
            'publicatiedatum':'2009-11-11',
            'geboortedatum':'2009-11-11',
            'geboortedatum_tekst':'2009-11-11 in tekst',
            'geboorteplaats':'geboorteplaats',
            'sterfdatum':'2011-11-11',
            'sterfdatum_tekst':'sterfdatum_tekst',
            'sterfplaats':'sterfplaats',
            'geslacht':'1',
            'illustraties':['http://illustratie1.jpg', 'http://illustratie2.jpg'],
            'namen':['Naam1', ('mr.', 'Jan', 'van', 'Voorbeeld', 'Esq.')], 
            'namen_en':['John'],
            'tekst':'tekst van de biografie kan <em>Markup</em> <p>bevatten</p>', 
        }
        doc.from_args(**kw)
        doc._add_event(
            type='marriage',
            when='1901-12-12',
            text='getrouwd met marietje',
        )
        doc.add_state(
            type='occupation',
            frm='1940', 
            to='1960',
            text='schilder',
            )
        doc.add_state(
            type='residence',
            frm='1940', 
            to='1960',
            text='Amsterdam',
            )

        doc.add_state(
            type='claim_to_fame',
            text='Superbekende persoon!',
            )

        doc.add_state(
            type='occupation',
            frm='1940', 
            to='1960',
            text='schilder',
            place="Amsterdam",
            )
    
        doc.to_file('biodes10_maximal.xml')