Exemplo n.º 1
0
    def write(self, names):
        names.sort()
        self.total = len(names)
        for index, name in enumerate(names):
            index += 1
            self.print_progress(index, name)

            # URL
            base_dev = "http://dev.inghist.nl/retrotest2010/oldenbarnevelt/"
            base_production = "http://www.inghist.nl/retroboeken/oldenbarnevelt/"
            anchor = "#accessor=toc&accessor_href=toc%3FSearchSource%3D"
            encoded_name = urllib.quote(urllib.quote(name.encode('utf8')))
            url = base_production + \
                  anchor + \
                  encoded_name + \
                  "%26correspondent%3D%26day1%3D%26month1%3D%26year1%3D%26day2%3D%26month2%3D%26year2%3D"
            bdes = BioDesDoc()
            args = dict(
                naam=name,
                naam_publisher="XXX",
                url_publisher="http://XXX.nl",
                url_biografie=url,
            )
            bdes.from_args(**args)
            self.write_file(bdes, index)
Exemplo n.º 2
0
 def process(self, people_dict):
     people = people_dict.keys()
     people.sort()
     self.total = len(people)
     x = 0
     for name in people:
         x += 1
         info = people_dict[name]
         print "processing: %s/%s - %s" %(x, len(people), name)
         name = sanitize_name(name)
         if self.name_already_processed(name):
             self.skipped += 1
             continue           
         
         # URL
         base_dev = "http://dev.inghist.nl/retrotest2010/groen/"
         base_production = "http://www.inghist.nl/retroboeken/groen/"                          
         encoded_name = urllib.quote(urllib.quote(name.encode('utf8')))
         url = base_production + \
               "#accessor=accessor_index&accessor_href=accessor_index%3FSearchSource%253Autf-8%253Austring%3D" + \
               encoded_name
         
         bdes = BioDesDoc()
         args = dict(naam = name,
                     naam_publisher = "XXX",
                     url_publisher = "http://XXX.nl",
                     url_biografie = url,
                )
         """
         args = dict(naam = name,
                     figures =[(people_dict[id]['img_url'], 
                                people_dict[id]['caption'],
                               )],
                     naam_publisher = "Het Geheugen van Nederland",
                     url_biografie = people_dict[id]['bio_url'],
                     url_publisher = "http://geheugenvannederland.nl",
                     tekst = people_dict[id]['tekst']
                     )
         """
         birth_date = info['born']
         death_date = sterfdatum = info['dead']
         if bdes.is_date(birth_date):
             args['geboortedatum'] = birth_date
         if bdes.is_date(death_date):
             args['sterfdatum'] = death_date
             
         bdes.from_args(**args)
         self.write_file(bdes, x)
Exemplo n.º 3
0
 def write(self, people):
     self.total = len(people)
     for index, name in enumerate(people):
         if self.name_already_processed(name):
             self.skipped += 1
             continue
         url = people[name]['url']
         bdes = BioDesDoc()
         args = dict(
             naam=name,
             naam_publisher="XXX",
             url_publisher="http://XXX.nl",
             url_biografie=url,
         )
         bdes.from_args(**args)
         self.write_file(bdes, index)
Exemplo n.º 4
0
 def process(self):
     names = []
     for file in os.listdir('in'):
         tree = etree.parse("in/" + file)
         entries = tree.xpath("//item")
         for index, person in enumerate(entries, 1):
             self.total += 2
             try:
                 name1 = person.xpath('title/from')[0].text
             except IndexError:
                 name1 = None
             try:
                 name2 = person.xpath('title/to')[0].text
             except IndexError:
                 name2 = None
                 
             
             for name in (name1, name2):
                 if name == "...." or not name:
                     self.skip("null name")
                     continue
                 if name.replace('.', '').strip() == "":
                     self.skip("null name")
                     continue
                 if name in names:
                     self.skip("dupe name")
                     continue
                 names.append(name)
                
     for index, name in enumerate(names, 1):
         base_production = "http://www.inghist.nl/retroboeken/archives/"
         anchor = "#accessor=toc&accessor_href=toc%3Fcorrespondent%253Austring%253Autf-8%3D"
         encoded_name = urllib.quote(urllib.quote(name.encode('utf8')))
         url = base_production + \
               anchor + \
               encoded_name 
         bdes = BioDesDoc()
         args = dict(naam = name,
                     naam_publisher = "Instituut voor Nederlandse Geschiedenis",
                     url_publisher = "http://www.inghist.nl/",
                     url_biografie = url,
                    )               
         bdes.from_args(**args)
         self.write_file(bdes, index)
Exemplo n.º 5
0
    def write(self, names):
        names.sort()
        self.total = len(names)
        for index, name in enumerate(names):
            index += 1
           
            name = sanitize_name(name)
            if self.name_already_processed(name):
                self.skip("dupe name")
                continue
            # URL
            encoded_name = urllib.quote(urllib.quote(name.encode('utf8')))
            url = "http://www.inghist.nl/retroboeken/gachard/#accessor=toc&accessor_href=toc%3FSearchSource%253Austring%253Autf-8%3D%26van_aan%3D%26correspondent%253Austring%253Autf-8%3D" + encoded_name

            bdes = BioDesDoc()
            args = dict(naam = name,
                        naam_publisher = "XXX",
                        url_publisher = "http://XXX.nl",
                        url_biografie = url,
                       )               
            bdes.from_args(**args)
            self.write_file(bdes, index)
Exemplo n.º 6
0
 def get_biodes_documents(self):
     for url in self.root.xpath('//a/@href'):
         yield BioDesDoc().from_url(url)