示例#1
0
from ETL.E.G1.authors import Authors
import os

authors = Authors()
for dirpath, dnames, fnames in os.walk("./mch/"):
    for f in fnames:
        if f.endswith(".xml"):
            print os.path.join(dirpath, f)
            authors.add_from_xml(os.path.join(dirpath, f))

# print authors
# print '*** Numero de autores ***'
print len(authors)
示例#2
0
文件: main.py 项目: WillyMaikowski/bn
import xml.etree.ElementTree as ET
import json
import logging

from ETL.E.G1.authors import Authors
from ETL.E.G2.request.find_request import FindRequest
from ETL.E.G2.request.present_request import PresentRequest
from ETL.T.transformer import Transformer

logging.basicConfig()

authors = Authors()
authors.add_from_xml("data/000 - 999.xml")

aleph_data = ET.fromstring("<aleph></aleph>")
cnt = 0
total = 20
for author in authors:
    cnt += 1
    if cnt == total:
        break
    print "Loading authors: " + str(100*cnt/total) + "%"
    url = 'http://www.bncatalogo.cl/X'
    request = FindRequest(base_url=url)
    metadata = request.find(name=author)
    if len(metadata.keys()) == 0:
        logging.warning("Author '" + author + "' not found.")
        continue

    no_entries = metadata['no_entries']
    author_id = metadata['set_number']
示例#3
0
文件: main.py 项目: WillyMaikowski/bn
import xml.etree.ElementTree as ET
import json
import logging

from ETL.E.G1.authors import Authors
from ETL.E.G2.request.find_request import FindRequest
from ETL.E.G2.request.present_request import PresentRequest
from ETL.T.transformer import Transformer

logging.basicConfig()

authors = Authors()
authors.add_from_xml("data/000 - 999.xml")

aleph_data = ET.fromstring("<aleph></aleph>")
cnt = 0
total = 20
for author in authors:
    cnt += 1
    if cnt == total:
        break
    print "Loading authors: " + str(100 * cnt / total) + "%"
    url = 'http://www.bncatalogo.cl/X'
    request = FindRequest(base_url=url)
    metadata = request.find(name=author)
    if len(metadata.keys()) == 0:
        logging.warning("Author '" + author + "' not found.")
        continue

    no_entries = metadata['no_entries']
    author_id = metadata['set_number']