def get_publication(xd): matching_publications = set() all_headers = xd.get_header("Copyright").lower() # source filename/metadata must be the priority abbr = utils.parse_pubid(xd.filename) all_pubs = metadb.xd_publications() for publ in all_pubs.values(): if publ.PublicationAbbr == abbr.lower(): matching_publications.add((1, publ)) if publ.PublicationName and publ.PublicationName.lower() in all_headers: matching_publications.add((2, publ)) if publ.PublisherName and publ.PublisherName.lower() in all_headers: matching_publications.add((3, publ)) if not matching_publications: return None elif len(matching_publications) == 1: return matching_publications.pop()[1] # otherwise, filter out 'self' publications matching_pubs = set([(pri, p) for pri, p in matching_publications if 'self' not in p.PublisherAbbr]) if not matching_pubs: matching_pubs = matching_publications # right back where we started elif len(matching_pubs) == 1: return matching_pubs.pop()[1] return sorted(matching_pubs)[0][1]
def main(): args = utils.get_args() all_receipts = metadb.xd_receipts_header receipts = metadb.xd_receipts_rows() rids = set() # set of ReceiptId for r in receipts: oldpubid = "" oldpubid = utils.parse_pubid(r.xdid or '') newpubid = catalog.find_pubid("|".join((str(x) for x in r))) d = r._asdict() if newpubid and newpubid != oldpubid: seqnum = utils.parse_seqnum(r.xdid or r.SourceFilename) if seqnum: newxdid = newpubid + seqnum utils.info("changing xdid from '%s' to '%s'" % (r.xdid, newxdid)) d["xdid"] = newxdid else: utils.info("no date or number in xdid, not reshelving") all_receipts += metadb.xd_receipts_row(**d) open(metadb.RECEIPTS_TSV, 'w').write(all_receipts)
def get_publication(xd): matching_publications = set() all_headers = xd.get_header("Copyright").lower() # source filename/metadata must be the priority abbr = utils.parse_pubid(xd.filename) all_pubs = metadb.xd_publications() for publ in all_pubs.values(): if publ.PublicationAbbr == abbr.lower(): matching_publications.add((1, publ)) if publ.PublicationName and publ.PublicationName.lower( ) in all_headers: matching_publications.add((2, publ)) if publ.PublisherName and publ.PublisherName.lower() in all_headers: matching_publications.add((3, publ)) if not matching_publications: return None elif len(matching_publications) == 1: return matching_publications.pop()[1] # otherwise, filter out 'self' publications matching_pubs = set([(pri, p) for pri, p in matching_publications if 'self' not in p.PublisherAbbr]) if not matching_pubs: matching_pubs = matching_publications # right back where we started elif len(matching_pubs) == 1: return matching_pubs.pop()[1] return sorted(matching_pubs)[0][1]