Пример #1
0
def get_publication(xd):
    matching_publications = set()

    all_headers = xd.get_header("Copyright").lower()

    # source filename/metadata must be the priority
    abbr = utils.parse_pubid(xd.filename)

    all_pubs = metadb.xd_publications()

    for publ in all_pubs.values():
        if publ.PublicationAbbr == abbr.lower():
            matching_publications.add((1, publ))

        if publ.PublicationName and publ.PublicationName.lower() in all_headers:
            matching_publications.add((2, publ))

        if publ.PublisherName and publ.PublisherName.lower() in all_headers:
            matching_publications.add((3, publ))

    if not matching_publications:
        return None
    elif len(matching_publications) == 1:
        return matching_publications.pop()[1]

    # otherwise, filter out 'self' publications
    matching_pubs = set([(pri, p) for pri, p in matching_publications if 'self' not in p.PublisherAbbr])

    if not matching_pubs:
        matching_pubs = matching_publications  # right back where we started
    elif len(matching_pubs) == 1:
        return matching_pubs.pop()[1]

    return sorted(matching_pubs)[0][1]
Пример #2
0
def main():
    args = utils.get_args()

    all_receipts = metadb.xd_receipts_header

    receipts = metadb.xd_receipts_rows()
    rids = set()  # set of ReceiptId

    for r in receipts:
        oldpubid = ""
        oldpubid = utils.parse_pubid(r.xdid or '')

        newpubid = catalog.find_pubid("|".join((str(x) for x in r)))

        d = r._asdict()

        if newpubid and newpubid != oldpubid:
            seqnum = utils.parse_seqnum(r.xdid or r.SourceFilename)
            if seqnum:
                newxdid = newpubid + seqnum
                utils.info("changing xdid from '%s' to '%s'" %
                           (r.xdid, newxdid))
                d["xdid"] = newxdid
            else:
                utils.info("no date or number in xdid, not reshelving")

        all_receipts += metadb.xd_receipts_row(**d)

    open(metadb.RECEIPTS_TSV, 'w').write(all_receipts)
Пример #3
0
def main():
    args = utils.get_args()

    all_receipts = metadb.xd_receipts_header

    receipts = metadb.xd_receipts_rows()
    rids = set()  # set of ReceiptId

    for r in receipts:
        oldpubid = ""
        oldpubid = utils.parse_pubid(r.xdid or '')

        newpubid = catalog.find_pubid("|".join((str(x) for x in r)))

        d = r._asdict()

        if newpubid and newpubid != oldpubid:
            seqnum = utils.parse_seqnum(r.xdid or r.SourceFilename)
            if seqnum:
                newxdid = newpubid + seqnum
                utils.info("changing xdid from '%s' to '%s'" % (r.xdid, newxdid))
                d["xdid"] = newxdid
            else:
                utils.info("no date or number in xdid, not reshelving")

        all_receipts += metadb.xd_receipts_row(**d)

    open(metadb.RECEIPTS_TSV, 'w').write(all_receipts)
Пример #4
0
def get_publication(xd):
    matching_publications = set()

    all_headers = xd.get_header("Copyright").lower()

    # source filename/metadata must be the priority
    abbr = utils.parse_pubid(xd.filename)

    all_pubs = metadb.xd_publications()

    for publ in all_pubs.values():
        if publ.PublicationAbbr == abbr.lower():
            matching_publications.add((1, publ))

        if publ.PublicationName and publ.PublicationName.lower(
        ) in all_headers:
            matching_publications.add((2, publ))

        if publ.PublisherName and publ.PublisherName.lower() in all_headers:
            matching_publications.add((3, publ))

    if not matching_publications:
        return None
    elif len(matching_publications) == 1:
        return matching_publications.pop()[1]

    # otherwise, filter out 'self' publications
    matching_pubs = set([(pri, p) for pri, p in matching_publications
                         if 'self' not in p.PublisherAbbr])

    if not matching_pubs:
        matching_pubs = matching_publications  # right back where we started
    elif len(matching_pubs) == 1:
        return matching_pubs.pop()[1]

    return sorted(matching_pubs)[0][1]