示例#1
0
def check_opf(container):
    errors = []
    opf_version = container.opf_version_parsed

    if container.opf.tag != OPF('package'):
        err = BaseError(_('The OPF does not have the correct root element'), container.opf_name, container.opf.sourceline)
        err.HELP = xml(_(
            'The opf must have the root element <package> in namespace {0}, like this: <package xmlns="{0}">')).format(OPF2_NS)
        errors.append(err)

    elif container.opf.get('version') is None and container.book_type == 'epub':
        err = BaseError(_('The OPF does not have a version'), container.opf_name, container.opf.sourceline)
        err.HELP = xml(_(
            'The <package> tag in the OPF must have a version attribute. This is usually version="2.0" for EPUB2 and AZW3 and version="3.0" for EPUB3'))
        errors.append(err)

    for tag in ('metadata', 'manifest', 'spine'):
        if not container.opf_xpath('/opf:package/opf:' + tag):
            errors.append(MissingSection(container.opf_name, tag))

    all_ids = set(container.opf_xpath('//*/@id'))
    if '' in all_ids:
        for empty_id_tag in container.opf_xpath('//*[@id=""]'):
            errors.append(EmptyID(container.opf_name, empty_id_tag.sourceline))
    all_ids.discard('')
    for elem in container.opf_xpath('//*[@idref]'):
        if elem.get('idref') not in all_ids:
            errors.append(IncorrectIdref(container.opf_name, elem.get('idref'), elem.sourceline))

    nl_items = [elem.sourceline for elem in container.opf_xpath('//opf:spine/opf:itemref[@linear="no"]')]
    if nl_items:
        errors.append(NonLinearItems(container.opf_name, nl_items))

    seen, dups = {}, {}
    for item in container.opf_xpath('/opf:package/opf:manifest/opf:item'):
        href = item.get('href', None)
        if href is None:
            errors.append(NoHref(container.opf_name, item.get('id', None), item.sourceline))
        else:
            hname = container.href_to_name(href, container.opf_name)
            if not hname or not container.exists(hname):
                errors.append(MissingHref(container.opf_name, href, item.sourceline))
            if href in seen:
                if href not in dups:
                    dups[href] = [seen[href]]
                dups[href].append(item.sourceline)
            else:
                seen[href] = item.sourceline
    errors.extend(DuplicateHref(container.opf_name, eid, locs) for eid, locs in iteritems(dups))

    seen, dups = {}, {}
    for item in container.opf_xpath('/opf:package/opf:spine/opf:itemref[@idref]'):
        ref = item.get('idref')
        if ref in seen:
            if ref not in dups:
                dups[ref] = [seen[ref]]
            dups[ref].append(item.sourceline)
        else:
            seen[ref] = item.sourceline
    errors.extend(DuplicateHref(container.opf_name, eid, locs, for_spine=True) for eid, locs in iteritems(dups))

    spine = container.opf_xpath('/opf:package/opf:spine[@toc]')
    if spine:
        spine = spine[0]
        mitems = [x for x in container.opf_xpath('/opf:package/opf:manifest/opf:item[@id]') if x.get('id') == spine.get('toc')]
        if mitems:
            mitem = mitems[0]
            if mitem.get('media-type', '') != guess_type('a.ncx'):
                errors.append(IncorrectToc(container.opf_name, mitem.sourceline, bad_mimetype=mitem.get('media-type')))
        else:
            errors.append(IncorrectToc(container.opf_name, spine.sourceline, bad_idref=spine.get('toc')))
    else:
        spine = container.opf_xpath('/opf:package/opf:spine')
        if spine:
            spine = spine[0]
            ncx = container.manifest_type_map.get(guess_type('a.ncx'))
            if ncx:
                ncx_name = ncx[0]
                rmap = {v:k for k, v in iteritems(container.manifest_id_map)}
                ncx_id = rmap.get(ncx_name)
                if ncx_id:
                    errors.append(MissingNCXRef(container.opf_name, spine.sourceline, ncx_id))

    if opf_version.major > 2:
        existing_nav = find_existing_nav_toc(container)
        if existing_nav is None:
            errors.append(MissingNav(container.opf_name, 0))
        else:
            toc = parse_nav(container, existing_nav)
            if len(toc) == 0:
                errors.append(EmptyNav(existing_nav, 0))

    covers = container.opf_xpath('/opf:package/opf:metadata/opf:meta[@name="cover"]')
    if len(covers) > 0:
        if len(covers) > 1:
            errors.append(MultipleCovers(container.opf_name, [c.sourceline for c in covers]))
        manifest_ids = set(container.opf_xpath('/opf:package/opf:manifest/opf:item/@id'))
        for cover in covers:
            if cover.get('content', None) not in manifest_ids:
                errors.append(IncorrectCover(container.opf_name, cover.sourceline, cover.get('content', '')))
            raw = etree.tostring(cover)
            try:
                n, c = raw.index(b'name="'), raw.index(b'content="')
            except ValueError:
                n = c = -1
            if n > -1 and c > -1 and n > c:
                errors.append(NookCover(container.opf_name, cover.sourceline))

    uid = container.opf.get('unique-identifier', None)
    if uid is None or not container.opf_xpath('/opf:package/opf:metadata/dc:identifier[@id=%r]' % uid):
        errors.append(NoUID(container.opf_name))
    for elem in container.opf_xpath('/opf:package/opf:metadata/dc:identifier'):
        if not elem.text or not elem.text.strip():
            errors.append(EmptyIdentifier(container.opf_name, elem.sourceline))

    for item, name, linear in container.spine_iter:
        mt = container.mime_map[name]
        if mt != XHTML_MIME:
            iid = item.get('idref', None)
            lnum = None
            if iid:
                mitem = container.opf_xpath('/opf:package/opf:manifest/opf:item[@id=%r]' % iid)
                if mitem:
                    lnum = mitem[0].sourceline
                else:
                    iid = None
            errors.append(BadSpineMime(name, iid, mt, lnum, container.opf_name))

    return errors
示例#2
0
文件: opf.py 项目: MarioJC/calibre
def check_opf(container):
    errors = []
    opf_version = container.opf_version_parsed

    if container.opf.tag != OPF('package'):
        err = BaseError(_('The OPF does not have the correct root element'), container.opf_name, container.opf.sourceline)
        err.HELP = xml(_(
            'The opf must have the root element <package> in namespace {0}, like this: <package xmlns="{0}">')).format(OPF2_NS)
        errors.append(err)

    elif container.opf.get('version') is None and container.book_type == 'epub':
        err = BaseError(_('The OPF does not have a version'), container.opf_name, container.opf.sourceline)
        err.HELP = xml(_(
            'The <package> tag in the OPF must have a version attribute. This is usually version="2.0" for EPUB2 and AZW3 and version="3.0" for EPUB3'))
        errors.append(err)

    for tag in ('metadata', 'manifest', 'spine'):
        if not container.opf_xpath('/opf:package/opf:' + tag):
            errors.append(MissingSection(container.opf_name, tag))

    all_ids = set(container.opf_xpath('//*/@id'))
    for elem in container.opf_xpath('//*[@idref]'):
        if elem.get('idref') not in all_ids:
            errors.append(IncorrectIdref(container.opf_name, elem.get('idref'), elem.sourceline))

    nl_items = [elem.sourceline for elem in container.opf_xpath('//opf:spine/opf:itemref[@linear="no"]')]
    if nl_items:
        errors.append(NonLinearItems(container.opf_name, nl_items))

    seen, dups = {}, {}
    for item in container.opf_xpath('/opf:package/opf:manifest/opf:item'):
        href = item.get('href', None)
        if href is None:
            errors.append(NoHref(container.opf_name, item.get('id', None), item.sourceline))
        else:
            hname = container.href_to_name(href, container.opf_name)
            if not hname or not container.exists(hname):
                errors.append(MissingHref(container.opf_name, href, item.sourceline))
            if href in seen:
                if href not in dups:
                    dups[href] = [seen[href]]
                dups[href].append(item.sourceline)
            else:
                seen[href] = item.sourceline
    errors.extend(DuplicateHref(container.opf_name, eid, locs) for eid, locs in dups.iteritems())

    seen, dups = {}, {}
    for item in container.opf_xpath('/opf:package/opf:spine/opf:itemref[@idref]'):
        ref = item.get('idref')
        if ref in seen:
            if ref not in dups:
                dups[ref] = [seen[ref]]
            dups[ref].append(item.sourceline)
        else:
            seen[ref] = item.sourceline
    errors.extend(DuplicateHref(container.opf_name, eid, locs, for_spine=True) for eid, locs in dups.iteritems())

    spine = container.opf_xpath('/opf:package/opf:spine[@toc]')
    if spine:
        spine = spine[0]
        mitems = [x for x in container.opf_xpath('/opf:package/opf:manifest/opf:item[@id]') if x.get('id') == spine.get('toc')]
        if mitems:
            mitem = mitems[0]
            if mitem.get('media-type', '') != guess_type('a.ncx'):
                errors.append(IncorrectToc(container.opf_name, mitem.sourceline, bad_mimetype=mitem.get('media-type')))
        else:
            errors.append(IncorrectToc(container.opf_name, spine.sourceline, bad_idref=spine.get('toc')))
    else:
        spine = container.opf_xpath('/opf:package/opf:spine')
        if spine:
            spine = spine[0]
            ncx = container.manifest_type_map.get(guess_type('a.ncx'))
            if ncx:
                ncx_name = ncx[0]
                rmap = {v:k for k, v in container.manifest_id_map.iteritems()}
                ncx_id = rmap.get(ncx_name)
                if ncx_id:
                    errors.append(MissingNCXRef(container.opf_name, spine.sourceline, ncx_id))

    if opf_version.major > 2:
        existing_nav = find_existing_nav_toc(container)
        if existing_nav is None:
            errors.append(MissingNav(container.opf_name, 0))
        else:
            toc = parse_nav(container, existing_nav)
            if len(toc) == 0:
                errors.append(EmptyNav(existing_nav, 0))

    covers = container.opf_xpath('/opf:package/opf:metadata/opf:meta[@name="cover"]')
    if len(covers) > 0:
        if len(covers) > 1:
            errors.append(MultipleCovers(container.opf_name, [c.sourceline for c in covers]))
        manifest_ids = set(container.opf_xpath('/opf:package/opf:manifest/opf:item/@id'))
        for cover in covers:
            if cover.get('content', None) not in manifest_ids:
                errors.append(IncorrectCover(container.opf_name, cover.sourceline, cover.get('content', '')))
            raw = etree.tostring(cover)
            try:
                n, c = raw.index('name="'), raw.index('content="')
            except ValueError:
                n = c = -1
            if n > -1 and c > -1 and n > c:
                errors.append(NookCover(container.opf_name, cover.sourceline))

    uid = container.opf.get('unique-identifier', None)
    if uid is None or not container.opf_xpath('/opf:package/opf:metadata/dc:identifier[@id=%r]' % uid):
        errors.append(NoUID(container.opf_name))
    for elem in container.opf_xpath('/opf:package/opf:metadata/dc:identifier'):
        if not elem.text or not elem.text.strip():
            errors.append(EmptyIdentifier(container.opf_name, elem.sourceline))

    for item, name, linear in container.spine_iter:
        mt = container.mime_map[name]
        if mt != XHTML_MIME:
            iid = item.get('idref', None)
            lnum = None
            if iid:
                mitem = container.opf_xpath('/opf:package/opf:manifest/opf:item[@id=%r]' % iid)
                if mitem:
                    lnum = mitem[0].sourceline
                else:
                    iid = None
            errors.append(BadSpineMime(name, iid, mt, lnum, container.opf_name))

    return errors