Exemplo n.º 1
0
def read_oeb_metadata(metadata):
    for child in metadata:
        if child.tag == 'dc-metadata':
            metadata = child
            break

    print 'OEB:'
    dump(metadata)
Exemplo n.º 2
0
def read_opf_metadata(metadata):
    print 'OPF:'
    dump(metadata)

    if len(metadata) == 0:
        return

    title = None
    language = None
    authors = []
    series = []
    tags = []
    annotation = None

    if metadata[0].tag.endswith('dc-metadata'):
        metadata = metadata[0]

    for child in metadata:
        index = child.tag.find('}')
        if index == -1:     # skipped unqualified names
            continue

        name = child.tag[index+1:]

        if name == 'title':
            title = child.text.strip()
        elif name == 'language':
            language = child.text.strip()
        elif name == 'subject':
            tags.append(child.text.strip())
        elif name == 'description':
            if child.text:
                annotation = child.text.strip()
            else:
                annotation = ''
        elif name == 'creator':
            role = child.get('{%s}role', 'aut')

            if role == 'aut':
                authors.append(child.text.strip())

    result = {
        'title': title,
        'language': language,
        'authors': authors,
        'series': series,
        'tags': list(set(tags)),
        'annotation': annotation,
    }

    LOG.debug('read_opf_metadata output: %s', result)

    return result
Exemplo n.º 3
0
def read(path):
    LOG.debug("reading %s", path)

    archive = get_good_zip(path)

    try:
        container = archive.read(CONTAINER)
    except KeyError:
        LOG.error('%s has no container', path)
        return

    container = parse_xml(container)

    if container is None:
        LOG.error('%s has invalid container', path)
        return

    opf_name = container.find('.//{%s}rootfile' % CONTAINER_NS).get('full-path')

    try:
        opf = archive.read(opf_name)
    except KeyError:
        LOG.error('Could not open opf file (%s)', opf_name.encode('utf-8'))
        return

    opf = parse_xml(opf)

    if 0:
        dump(opf)

    if opf is None:
        return

    metadata = None

    for child in opf:
        if child.tag == 'metadata' or child.tag.endswith('}metadata'):
            metadata = child
            break

    if metadata is None:
        LOG.error('Could not find metadata in the opf file')
        return

    if metadata.tag == 'metadata':
        return read_oeb_metadata(metadata)
    else:
        return read_opf_metadata(metadata)
Exemplo n.º 4
0
def get_root(data, path):
    """parses supplied data and returns its root element"""

    root = parse_xml(data, True, path)

    if root is None:
        LOG.error('Invalid data in %s', path)
        return

    if root.tag != ROOT_ELEM:
        LOG.error('%s has wrong root element: %s (expected %s)',
                  path, root.tag, ROOT_ELEM)
        return

    if 0:
        dump(root)

    return root
Exemplo n.º 5
0
def read(path):
    """read a FB2 file and return extracted meta-information"""

    LOG.debug("reading %s", path)

    data = read_fb2_file(path)

    if data is None:
        return

    root = get_root(data, path)

    title_info = root.find('.//%s' % TITLE_INFO_ELEM)
    if not title_info:
        LOG.error('Could not find title-info for %s', path)
        LOG.error('  elem:', TITLE_INFO_ELEM)
        dump(root, stream=sys.stderr)
        return

    title = None
    language = None
    authors = []
    series = []
    tags = []
    annotation = None
    cover = None

    for child in title_info:
        if child.tag == AUTHOR_ELEM:
            authors.append(person2str(child))
        elif child.tag == TITLE_ELEM:
            title = strip_text(child.text)
        elif child.tag == TAG_ELEM:
            tag = tag2tag(child)
            if tag:     # add only non-empty tags
                tags.append(tag)
        elif child.tag == LANG_ELEM:
            language = strip_text(child.text)
        elif child.tag == SERIES_ELEM:
            if 'name' in child.attrib and child.attrib['name'] \
                                      and child.attrib.get('number', 0):
                series.append((child.attrib['name'], child.attrib['number']))
        elif child.tag == COVER_ELEM:
            if len(child) and \
               child[0].tag == IMAGE_ELEM and XLINK_HREF in child[0].attrib:
                value = child[0].attrib[XLINK_HREF]

                if value[0] == '#':
                    value = value[1:]

                binary = [binary for binary in root.findall(BINARY_ELEM) \
                                 if 'id' in binary.attrib and \
                                            binary.attrib['id'] == value]
                if len(binary) == 1:
                    cover = Image.open(StringIO(decodestring(binary[0].text)))

    return {
        'title': title,
        'language': language,
        'authors': authors,
        'series': series,
        'tags': normalize_tags(tags),
        'annotation': annotation,
        'cover': cover,
    }