def record_as_html(rec, bn, db, fn): creator = '' if rec.author(): creator = marc8_to_unicode(rec.author()) creatordiv = build_div_html(creator, 'creator') title = marc8_to_unicode(rec['245'].format_field()) titlediv = build_div_html(title, 'title') scopecontent = build_div(rec.get_fields('520'), 'abstract scopecontent') bioghist = build_div(rec.get_fields('545'), 'description bioghist') subjdiv = build_div(rec.subjects(), 'subject') addedentries = build_div(rec.addedentries(), 'addedentry') repos = build_div(rec.location(), 'isLocatedAt repos') # db = get_catdb(rec) # bn = get_bibno(rec) recordbody = '\n%s%s%s%s%s%s%s%s' % (creatordiv, titlediv, scopecontent, bioghist, subjdiv, addedentries, repos, catalogdiv) recordbodydiv = build_div_html(recordbody, 'record') url = hip_url(bn, db) out = htmltemplate.substitute(creator=creator, title=title, url=url, jsurl=url.replace('&', '&'), recordbody=recordbodydiv) #def write_htmlfile(record, fn): try: outfile = open(fn, 'w') except: write_error(fn) outfile.write(out.encode("utf-8")) outfile.close()
def test_eszett_euro(self): # MARC-8 mapping: Revised June 2004 to add the Eszett (M+C7) and the # Euro Sign (M+C8) to the MARC-8 set. self.assertEqual(marc8_to_unicode(b'ESZETT SYMBOL: \xc7 is U+00DF'), u'ESZETT SYMBOL: \u00df is U+00DF') self.assertEqual(marc8_to_unicode(b'EURO SIGN: \xc8 is U+20AC'), u'EURO SIGN: \u20ac is U+20AC')
def build_div(fields, divclass): divlist = [ build_div_html(marc8_to_unicode(field.format_field()), divclass) for field in fields ] divs = "".join(divlist) return divs
def to_unicode(self): """Converts MARC8 encoded data to Unicode.""" result = self.__str__() result_html = Parser().to_html(result) try: result_out = marc8_to_unicode(result_html) except: if IGNORE_UNICODE_ERRORS == True: result_out = "<strong>NOTE: MARC8 to Unicode conversion failed on this \ record.</strong><br/>\n%s" % result else: raise return result_out
def test_marc8_to_unicode(self): marc8_file = file('test/test_marc8.txt') utf8_file = file('test/test_utf8.txt') count = 0 while True: marc8 = marc8_file.readline().strip("\n") utf8 = utf8_file.readline().strip("\n") if marc8 == '' or utf8 == '': break count += 1 self.assertEquals(marc8_to_unicode(marc8).encode('utf8'), utf8) self.assertEquals(count, 1515)
def test_marc8_to_unicode(self): marc8_file = open("test/test_marc8.txt", "rb") utf8_file = open("test/test_utf8.txt", "rb") count = 0 while True: marc8 = marc8_file.readline().strip(b"\n") utf8 = utf8_file.readline().strip(b"\n") if marc8 == b"" or utf8 == b"": break count += 1 self.assertEqual(marc8_to_unicode(marc8).encode("utf8"), utf8) self.assertEqual(count, 1515) marc8_file.close() utf8_file.close()
def test_marc8_to_unicode(self): marc8_file = open('test/test_marc8.txt', 'rb') utf8_file = open('test/test_utf8.txt', 'rb') count = 0 while True: marc8 = marc8_file.readline().strip(b"\n") utf8 = utf8_file.readline().strip(b"\n") if marc8 == b'' or utf8 == b'': break count += 1 self.assertEqual(marc8_to_unicode(marc8).encode('utf8'), utf8) self.assertEqual(count, 1515) marc8_file.close() utf8_file.close()
def test_subscript_2(self): self.assertEqual(marc8_to_unicode('CO\x1bb2\x1bs is a gas'), u'CO\u2082 is a gas') self.assertEqual(marc8_to_unicode('CO\x1bb2\x1bs'), u'CO\u2082')
def test_alif(self): # MARC-8 mapping: Revised March 2005 to change the mapping from MARC-8 # to Unicode for the Alif (M+2E) from U+02BE to U+02BC. self.assertEqual(marc8_to_unicode(b"ALIF: \xae is U+02BC"), u"ALIF: \u02bc is U+02BC")
def test_subscript_2(self): self.assertEqual(marc8_to_unicode(b"CO\x1bb2\x1bs is a gas"), u"CO\u2082 is a gas") self.assertEqual(marc8_to_unicode(b"CO\x1bb2\x1bs"), u"CO\u2082")
def test_alif(self): # MARC-8 mapping: Revised March 2005 to change the mapping from MARC-8 # to Unicode for the Alif (M+2E) from U+02BE to U+02BC. self.assertEqual(marc8_to_unicode(b'ALIF: \xae is U+02BC'), u'ALIF: \u02bc is U+02BC')
def utf8_join(in_list): out = ' '.join(in_list) out = pymarc.marc8_to_unicode(out) return out.strip('.:,;/ ')
def test_subscript_2(self): self.assertEqual(marc8_to_unicode(b'CO\x1bb2\x1bs is a gas'), u'CO\u2082 is a gas') self.assertEqual(marc8_to_unicode(b'CO\x1bb2\x1bs'), u'CO\u2082')
def main(marcfile): reader = MARCReader(file(marcfile)) letters = list(string.uppercase) interviews = [] ohiindex = {} ohititle = 'Oral History Interviews held by the Niels Bohr Library' transtitle = 'Online Oral History Transcripts from the Niels Bohr Library' ohibacklink = """A <a href="http://aip.org/history/ohilist/transcripts.html">separate list of transcripts available online</a> is also available.""" transbacklink = """A <a href="http://aip.org/history/ohilist/">separate list of all transcripts</a> is also available.""" transindex = {} recordcounter = 0 for record in reader: if record['998'] is not None: collection = record.get_fields('998') for field in collection: collectionC = field['c'] if collectionC == 'oh': catdb = get_catdb(record) bibno = get_bibno(record) transcript_url = None if record['856'] is not None: links = record.get_fields('856') for field in links: if 'http://www.aip.org/history/ohilist' in field[ 'u']: transcript_url = field['u'] url = 'http://www.aip.org/history/catalog/%s/%s.html' % ( catdb, bibno) interviewee = marc8_to_unicode(record.author()) date = parse_date(record) interviewdate = '(Interview date: %s)' % date.rstrip(',. ') interview = [interviewee, interviewdate] label = " ".join(interview) interviews.append( (url, label, transcript_url, alafiling(label))) recordcounter += 1 else: pass status = '%d OHI records found in %s' % (recordcounter, marcfile) if recordcounter == 0: sys.exit(status) else: sys.stderr.write(status) sys.stderr.write('\n') interviews.sort(key=lambda interviewkey: interviewkey[3]) for interview in interviews: for letter in letters: initial = interview[1].upper()[0] if initial == letter: linkdata = '<li>%s' % make_link(interview[0], interview[1]) if interview[2] is not None: try: urllib2.urlopen(interview[2]) except urllib2.HTTPError, e: if e.code == 404: sys.stderr.write('404 on %s\n' % interview[2]) else: sys.stderr.write('Fail: %d, %s' % (e.code, e.msg)) except urllib2.URLError, e: sys.stderr.write('Fail: %s' % e.reason) linkdata = "%s - <strong>%s</strong>" % ( linkdata, make_link(interview[2], 'Online transcript available')) transonlydata = '<li>%s</li>\n' % make_link( interview[2], interview[1]) transindex.setdefault(letter, []).append(transonlydata) linkdata = '%s</li>\n' % linkdata ohiindex.setdefault(letter, []).append(linkdata)
def main(marcfile): reader = MARCReader(file(marcfile)) xmlURLs = [] nutchURLs = [] browsetitles = [] browserepos = [] browselinks = [] ''' Creating list of preset repository codes. ''' reposmasterlist = [[ u"American Institute of Physics", "MdCpAIP", "http://www.aip.org/history/nbl/index.html" ]] reposmasterlist.append([ u"Académie des Sciences", "FrACADEMIE", "http://www.academie-sciences.fr/en/archive.htm" ]) #not an official MARC code reposmasterlist.append([ u"American Association for the Advancement of Science", "daaas", "http://archives.aaas.org/" ]) reposmasterlist.append([ u"American Philosophical Society", "ppamp", "http://www.amphilsoc.org/library" ]) reposmasterlist.append( [u"Amherst College", "ma", "https://www.amherst.edu/library/archives"]) reposmasterlist.append( [u"Armagh Observatory", "ukARMAGH", "http://www.arm.ac.uk/history/"]) #not an official MARC code reposmasterlist.append( [u"Auburn University", "aapa", "http://www.lib.auburn.edu/sca/"]) reposmasterlist.append([ u"Austin Public Library", "TxAu", "http://www.austinlibrary.com/ahc/" ]) reposmasterlist.append([ u"Australian Academy of Science", "AUAAS", "http://science.org.au/basser/" ]) #not an official MARC code reposmasterlist.append([ u"Birmingham Reference Library", "BCA", "http://www.birmingham.gov.uk/cs/Satellite/localstudieslibrary?packedargs=website%3D1&rendermode=live" ]) reposmasterlist.append([ u"Brandeis University", "MWalB", "http://lts.brandeis.edu/research/archives-speccoll/" ]) reposmasterlist.append([ u"Brigham Young University", "upb", "http://library.byuh.edu/library/archives" ]) reposmasterlist.append([ u"British Antarctic Survey", "ukntnls-BAS", "http://www.antarctica.ac.uk/about_bas/our_organisation/eid/archives.php" ]) #not an official MARC code reposmasterlist.append([ u"California Institute of Technology", "CPT", "http://archives.caltech.edu/" ]) reposmasterlist.append( [u"Cambridge University", "UkCU", "http://www.lib.cam.ac.uk/"]) reposmasterlist.append( [u"Canisius College", "nbucc", "http://www.canisius.edu/archives/"]) reposmasterlist.append([ u"Carnegie Institution of Washington", "dcit", "http://carnegiescience.edu/legacy/" ]) reposmasterlist.append( [u"Carnegie Mellon University", "cmu", "http://diva.library.cmu.edu/"]) reposmasterlist.append([ u"Case Western Reserve University", "oclw", "http://library.case.edu/ksl/collections/special/" ]) reposmasterlist.append([ u"Catholic University of America", "dcu", "http://libraries.cua.edu/rarebooks/index.cfm" ]) reposmasterlist.append([ u"Central Michigan University", "MiMtpT", "http://quod.lib.umich.edu/c/clarke/" ]) reposmasterlist.append([ u"CERN (European Organization for Nuclear Research)", "szgecern", "http://library.web.cern.ch/library/Archives/" ]) reposmasterlist.append([ u"Chemical Heritage Foundation", "paphchf", "http://www.chemheritage.org/discover/collections/index.aspx" ]) reposmasterlist.append([ u"Christ's College", "UkCU-CHR", "http://www.christs.cam.ac.uk/current-students/library/" ]) reposmasterlist.append([ u"Churchill College", "UkCU-CHU", "http://www.chu.cam.ac.uk/archives/" ]) reposmasterlist.append([ u"Clark University", "MWC", "http://www.clarku.edu/research/archives/" ]) reposmasterlist.append([ u"Clemson University", "sccleu", "http://www.clemson.edu/library/special_collections/" ]) reposmasterlist.append([ u"College of William and Mary", "viw", "https://swem.wm.edu/research/special-collections" ]) reposmasterlist.append( [u"Columbia University", "nncrb", "http://library.columbia.edu/"]) reposmasterlist.append( [u"Cornell University", "NIC-RMC", "http://rmc.library.cornell.edu/"]) reposmasterlist.append([ u"Dartmouth College", "nhd", "http://www.dartmouth.edu/~library/rauner/" ]) reposmasterlist.append([ u"DePauw University", "ingradi", "http://www.depauw.edu/libraries/about/librarylocations/archives/" ]) reposmasterlist.append( [u"Dickinson College", "PCarlD", "http://archives.dickinson.edu/"]) reposmasterlist.append( [u"Dudley Observatory", "nald", "http://www.dudleyobservatory.org/"]) reposmasterlist.append( [u"Duke University", "NcD", "http://library.duke.edu/rubenstein/"]) reposmasterlist.append([ u"Duke University Medical Center", "NcD-MC", "http://archives.mc.duke.edu/" ]) reposmasterlist.append([ u"Dwight D. Eisenhower Library", "KAbE", "http://www.eisenhower.archives.gov/" ]) reposmasterlist.append( [u"Earlham College", "InRE", "http://library.earlham.edu/"]) reposmasterlist.append([ u"Eastern Kentucky University", "kyre", "http://libguides.eku.edu/archives" ]) reposmasterlist.append( [u"Fermilab", "IBatF", "http://history.fnal.gov/index.html"]) reposmasterlist.append( [u"George Mason University", "vifgm", "http://sca.gmu.edu/"]) reposmasterlist.append([ u"Georgetown University", "dgu", "http://www.library.georgetown.edu/dept/speccoll/" ]) reposmasterlist.append([ u"George Washington University", "DGW", "http://library.gwu.edu/collections/scrc" ]) reposmasterlist.append([ u"Georgia Institute of Technology", "GAT", "http://www.library.gatech.edu/" ]) reposmasterlist.append([ u"Georgia Southern University", "gstg", "http://library.georgiasouthern.edu/specialcollections/main.html" ]) reposmasterlist.append([ u"Gerald R. Ford Library", "MiAaF", "http://www.fordlibrarymuseum.gov/" ]) reposmasterlist.append([ u"Hagley Museum and Library", "DeGH", "http://www.hagley.org/library" ]) reposmasterlist.append( [u"Harry S. Truman Library", "MoIT", "http://www.trumanlibrary.org/"]) reposmasterlist.append( [u"Harvard University", "MH", "http://hul.harvard.edu/"]) reposmasterlist.append([ u"Hebrew University of Jerusalem", "IsJJNL", "http://www.huji.ac.il/huji/eng/library_e.htm" ]) reposmasterlist.append([ u"Henry E. Huntington Library", "CSmH", "http://www.huntington.org/huntingtonlibrary.aspx?id=554" ]) reposmasterlist.append([ u"Hoover Institution on War, Revolution and Peace", "csth", "http://www.hoover.org/library-and-archives" ]) reposmasterlist.append([ u"Houston Academy of Medicine", "TxHMC", "http://www.library.tmc.edu/" ]) reposmasterlist.append([ u"Houston Public Library. Houston Metropolitan Research Center", "TxH", "http://www2.houstonlibrary.org/hmrc/" ]) reposmasterlist.append([ u"Imperial College of Science and Technology", "UkLU-ICA", "http://www3.imperial.ac.uk/recordsandarchives" ]) reposmasterlist.append([ u"Imperial War Museum", "IWM", "http://www.iwm.org.uk/collections-research" ]) reposmasterlist.append([ u"Indiana University", "inu", "http://www.indiana.edu/~liblilly/index.php" ]) reposmasterlist.append( [u"Institute for Advanced Study", "NjPI", "http://library.ias.edu/"]) reposmasterlist.append([ u"Institution of Engineering and Technology", "IET", "http://www.theiet.org/resources/library/archives/" ]) reposmasterlist.append([ u"Iowa State University", "iaamsusc", "http://www.lib.iastate.edu/spcl/index.html" ]) reposmasterlist.append([ u"Johns Hopkins University", "mdbj", "http://www.library.jhu.edu/collections/specialcollections/" ]) reposmasterlist.append([ u"King's College", "UkLU-K", "http://www.kcl.ac.uk/library/archivespec/index.aspx" ]) reposmasterlist.append([ u"Lawrence Berkeley National Laboratory", "culbla", "https://commons.lbl.gov/display/aro/Archives+and+Records" ]) reposmasterlist.append([ u"Lehigh University", "PBL", "http://www.lehigh.edu/library/speccoll/index.html" ]) reposmasterlist.append([ u"Library and Archives Canada", "OONL", "http://www.collectionscanada.gc.ca/index-e.html" ]) reposmasterlist.append( [u"Library of Congress", "DLC", "http://findingaids.loc.gov/"]) reposmasterlist.append([ u"Louisiana State University", "LU", "http://www.lib.lsu.edu/special/" ]) reposmasterlist.append( [u"Lowell Observatory", "AzFLO", "http://www.lowell.edu/"]) reposmasterlist.append([ u"Massachusetts Institute of Technology", "MCM-B", "http://libraries.mit.edu/archives/" ]) reposmasterlist.append([ u"McMaster University", "OHMA", "http://library.mcmaster.ca/archives/" ]) reposmasterlist.append( [u"Michigan State University", "MiEM", "http://archives.msu.edu/"]) reposmasterlist.append([ u"Mount Holyoke College", "mshm", "http://www.mtholyoke.edu/archives/index.html" ]) reposmasterlist.append([ u"Museu de Astronomia e Ciências Afins (Brazil)", "BR-MAST", "http://www.mast.br/acervos_arquivistico.html" ]) #not an official MARC code reposmasterlist.append([ u"National Academy of Sciences", "DNAS", "http://www.nasonline.org/about-nas/history/archives/" ]) reposmasterlist.append([ u"National Archives and Records Administration", "dna", "http://www.archives.gov/" ]) reposmasterlist.append([ u"National Center for Atmospheric Research/University Corporation for Atmospheric Research", "CoBA", "http://opensky.library.ucar.edu/" ]) reposmasterlist.append([ u"National Library of Australia", "AuCNL", "http://www.austehc.unimelb.edu.au/" ]) reposmasterlist.append([ u"National Radio Astronomy Observatory", "ViCRA", "http://www.nrao.edu/archives/" ]) reposmasterlist.append( [u"New Mexico State University", "NmLcU", "http://rmoa.unm.edu/"]) reposmasterlist.append( [u"New York Public Library", "NN", "http://www.nypl.org/"]) reposmasterlist.append( [u"New York University", "NNU", "http://library.nyu.edu/"]) reposmasterlist.append( [u"Niels Bohr Archive", "DK-KoNBA", "http://nba.nbi.dk/webpage.html"]) reposmasterlist.append([ u"North Carolina Department of Cultural Resources", "Nc-Ar", "http://www.ncdcr.gov/" ]) #not the name associated with this MARC code reposmasterlist.append([ u"North Carolina State University", "ncrhsus", "http://www.lib.ncsu.edu/specialcollections/" ]) reposmasterlist.append([ u"Northwestern University", "IEN", "http://www.library.northwestern.edu/libraries-collections/evanston-campus/university-archives" ]) reposmasterlist.append([ u"Nuffield College, Oxford", "UkOxU-N", "http://www.nuffield.ox.ac.uk/library/" ]) reposmasterlist.append( [u"Oberlin College", "oo", "http://www.oberlin.edu/library/special/"]) reposmasterlist.append([ u"Oregon State University", "orcs", "http://osulibrary.oregonstate.edu/" ]) reposmasterlist.append([ u"Pennsylvania State University", "PSt", "http://www.libraries.psu.edu/psul/home.html" ]) reposmasterlist.append( [u"Princeton University", "njp", "http://www.princeton.edu/~rbsc/"]) reposmasterlist.append( [u"Queen's University", "canQUEEN", "http://archives.queensu.ca/"]) #not an official MARC code reposmasterlist.append([ u"Radcliffe Institute for Advanced Study", "MCR-S", "http://www.radcliffe.edu/schlesinger_library.aspx" ]) reposmasterlist.append([ u"Rice University", "TxHR", "http://library.rice.edu/collections/WRC/manuscripts" ]) reposmasterlist.append( [u"Rockefeller Archive Center", "NNttR", "http://www.rockarch.org/"]) reposmasterlist.append([ u"Royal Astronomical Society", "uklors", "http://www.ras.org.uk/library" ]) reposmasterlist.append([ u"Royal Institution of Great Britain", "UkRi", "http://www.rigb.org/contentControl?action=displayContent&id=00000002889" ]) reposmasterlist.append( [u"Royal Society", "UkLRs", "http://royalsociety.org/Collections/"]) reposmasterlist.append([ u"Rutgers University", "NjR", "http://www.libraries.rutgers.edu/rul/libs/scua/scua.shtml" ]) reposmasterlist.append([ u"Schenectady Museum", "NSchM", "http://www.schenectadymuseum.org/archives/archives.html" ]) reposmasterlist.append([ u"Science Museum (Great Britain)", "UkLS", "http://www.sciencemuseum.org.uk/" ]) reposmasterlist.append([ u"Scripps Institution of Oceanography", "CaLjSIOA", "http://libraries.ucsd.edu/locations/sio/scripps-archives/index.html" ]) reposmasterlist.append( [u"Simon Fraser University", "BVAS", "http://www.sfu.ca/archives/"]) reposmasterlist.append([ u"Smith College", "MNS", "http://www.smith.edu/libraries/libs/archives/" ]) reposmasterlist.append([ u"Smithsonian Institution. Archives", "dsiai", "http://siarchives.si.edu/" ]) reposmasterlist.append([ u"Smithsonian Institution. National Air and Space Museum", "dsinas", "http://www.nasm.si.edu/research/" ]) reposmasterlist.append([ u"Smithsonian Institution. National Museum of American History", "dsimah", "http://americanhistory.si.edu/archives/ac-i.htm" ]) reposmasterlist.append([ u"Southern Methodist University", "TxDaM", "http://www.smu.edu/Libraries" ]) reposmasterlist.append([ u"Stanford Linear Accelerator Center (SLAC)", "CSt-SLAC", "http://www.slac.stanford.edu/history/" ]) #not an official MARC code reposmasterlist.append([ u"Stanford University", "CSt-SCUA", "http://www-sul.stanford.edu/depts/spc/mss/" ]) reposmasterlist.append([ u"State University of New York at Albany", "nalsu", "http://library.albany.edu/speccoll/" ]) reposmasterlist.append([ u"State University of New York at Buffalo", "nbuuar", "http://library.buffalo.edu/specialcollections/" ]) reposmasterlist.append([ u"State University of New York at Stony Brook", "nsbsu", "http://www.stonybrook.edu/libspecial/" ]) reposmasterlist.append( [u"Syracuse University", "NSyU", "http://library.syr.edu/find/scrc/"]) reposmasterlist.append( [u"Temple University", "PPT", "http://library.temple.edu/collections"]) reposmasterlist.append([ u"Tennessee State Library and Archives", "T", "http://www.tn.gov/tsla/Collections.htm" ]) reposmasterlist.append( [u"Texas A&M University", "TxCcTAM", "http://library.tamu.edu/"]) reposmasterlist.append([ u"Trinity College", "UkCU-TRI", "http://library.trincoll.edu/index.cfm" ]) reposmasterlist.append( [u"Tufts University", "MMeT-DCA", "http://sites.tufts.edu/dca/"]) reposmasterlist.append([ u"United States Naval Academy", "MdAN", "http://usna.edu/Library/sca/" ]) reposmasterlist.append([ u"Université Louis Pasteur de Strasbourg", "FrSULP", "http://www.hp-physique.org/" ]) reposmasterlist.append([ u"University College, London", "UCL", "http://www.ucl.ac.uk/library/special-coll/" ]) reposmasterlist.append([ u"University of Adelaide", "AuAU", "http://www.adelaide.edu.au/library/special/" ]) reposmasterlist.append([ u"University of Alaska", "AkU", "http://library.uaf.edu/apr-collections" ]) reposmasterlist.append([ u"University of Alberta", "AEUA", "http://www.ualberta.edu/~archives/" ]) reposmasterlist.append([ u"University of Arizona", "AzU", "http://speccoll.library.arizona.edu/" ]) reposmasterlist.append( [u"University of Bath", "Uk-Bath", "http://www.bath.ac.uk/library/"]) reposmasterlist.append([ u"University of Birmingham", "UkBU", "http://www.special-coll.bham.ac.uk/" ]) reposmasterlist.append([ u"University of Bristol", "UkBrU", "http://www.bristol.ac.uk/library/resources/specialcollections/" ]) reposmasterlist.append([ u"University of California, Berkeley", "CU-BANC", "http://bancroft.berkeley.edu/" ]) reposmasterlist.append([ u"University of California, Irvine", "CU-I", "http://special.lib.uci.edu/" ]) reposmasterlist.append([ u"University of California, Los Angeles", "CLSU", "http://www2.library.ucla.edu/libraries/special.cfm" ]) reposmasterlist.append([ u"University of California, San Diego", "CUS", "http://libraries.ucsd.edu/collections/sca/index.html" ]) reposmasterlist.append([ u"University of California, Santa Barbara", "cusb", "http://www.library.ucsb.edu/special-collections" ]) reposmasterlist.append([ u"University of California, Santa Cruz", "cmthl", "http://library.ucsc.edu/speccoll" ]) reposmasterlist.append([ u"University of Chicago", "ICU", "http://www.lib.uchicago.edu/e/scrc/" ]) reposmasterlist.append([ u"University of Cincinnati", "ohciuar", "http://libraries.uc.edu/collections/" ]) reposmasterlist.append([ u"University of Colorado", "CoU", "http://ucblibraries.colorado.edu/archives/index.htm" ]) reposmasterlist.append([ u"University of Dayton", "odau", "http://www.udayton.edu/libraries/archives_and_collections/" ]) reposmasterlist.append([ u"University of Delaware", "deneuar", "http://www.lib.udel.edu/ud/spec/" ]) reposmasterlist.append( [u"University of Denver", "CoDU", "http://library.du.edu/site/"]) reposmasterlist.append( [u"University of Florida", "fu", "http://web.uflib.ufl.edu/spec/"]) reposmasterlist.append([ u"University of Houston", "TxAHU-Li", "http://info.lib.uh.edu/about/campus-libraries-collections/special-collections" ]) reposmasterlist.append([ u"University of Idaho", "idu", "http://www.lib.uidaho.edu/special-collections/" ]) reposmasterlist.append([ u"University of Illinois at Chicago", "ICIU", "http://library.uic.edu/home/collections/manuscripts-and-rare-books" ]) reposmasterlist.append([ u"University of Illinois at Urbana-Champaign", "IU-Ar", "http://archives.library.illinois.edu/" ]) reposmasterlist.append( [u"University of Iowa", "IaU", "http://www.lib.uiowa.edu/spec-coll/"]) reposmasterlist.append([ u"University of Kansas", "kus", "http://spencer.lib.ku.edu/collections/sc/" ]) reposmasterlist.append([ u"University of Leeds", "UkLeUBL", "http://library.leeds.ac.uk/special-collections" ]) reposmasterlist.append([ u"University of London, Birkbeck College", "UkLU-B", "http://www.bbk.ac.uk/lib/" ]) reposmasterlist.append( [u"University of London Library", "UkLU", "http://www.ull.ac.uk/"]) reposmasterlist.append([ u"University of Manchester", "ukmajru", "http://www.library.manchester.ac.uk/specialcollections/" ]) reposmasterlist.append([ u"University of Maryland", "MdCpUHL", "http://www.lib.umd.edu/special/" ]) reposmasterlist.append([ u"University of Massachusetts at Amherst", "mu", "http://www.library.umass.edu/spcoll/" ]) reposmasterlist.append( [u"University of Melbourne", "AuMU", "http://library.unimelb.edu.au/"]) reposmasterlist.append([ u"University of Miami", "fmu", "http://www.library.miami.edu/specialcollections/" ]) reposmasterlist.append( [u"University of Michigan", "miu", "http://bentley.umich.edu/"]) reposmasterlist.append( [u"University of Minnesota", "MnU", "http://special.lib.umn.edu/"]) reposmasterlist.append([ u"University of Mississippi", "MsU", "http://www.olemiss.edu/depts/general_library/archives/" ]) reposmasterlist.append([ u"University of Missouri", "mou", "http://mulibraries.missouri.edu/specialcollections/" ]) reposmasterlist.append([ u"University of Nebraska-Lincoln", "NbU", "http://libraries.unl.edu/spec" ]) reposmasterlist.append([ u"University of Nevada, Reno", "nvreusc", "http://knowledgecenter.unr.edu/materials/specoll/" ]) reposmasterlist.append([ u"University of New Hampshire", "nhu", "http://www.library.unh.edu/milne/" ]) reposmasterlist.append([ u"University of North Carolina at Chapel Hill", "ncush", "http://www.lib.unc.edu/wilson/" ]) reposmasterlist.append([ u"University of North Dakota", "NdU", "http://webapp.und.edu/dept/library/Collections/" ]) reposmasterlist.append([ u"University of Notre Dame", "inndh", "http://archives.nd.edu/index.htm" ]) reposmasterlist.append([ u"University of Nottingham", "UkNtU", "http://www.nottingham.ac.uk/manuscriptsandspecialcollections/index.aspx" ]) # reposmasterlist.append([u"University of Oregon", "OrU", "http://libweb.uoregon.edu/speccoll/archives/"]) reposmasterlist.append([ u"University of Oxford", "UkOxU", "http://www.bodleian.ox.ac.uk/bodley" ]) reposmasterlist.append([ u"University of Pennsylvania", "puar", "http://www.library.upenn.edu/" ]) reposmasterlist.append([ u"University of Pittsburgh", "PPiU", "http://www.library.pitt.edu/libraries/archives/archives.html" ]) reposmasterlist.append([ u"University of Puget Sound", "WaTU", "http://www.pugetsound.edu/academics/academic-resources/collins-memorial-library/archives/" ]) reposmasterlist.append([ u"University of Reading", "UkReU", "http://www.reading.ac.uk/special-collections/sp-home.aspx" ]) reposmasterlist.append([ u"University of Rhode Island", "RUn", "http://www.uri.edu/library/special_collections/" ]) reposmasterlist.append([ u"University of Rochester", "NRU", "http://www.library.rochester.edu/" ]) reposmasterlist.append([ u"University of Sheffield", "UkShU", "http://www.sheffield.ac.uk/library/special" ]) reposmasterlist.append( [u"University of South Dakota", "SdU", "http://www.usd.edu/library/"]) reposmasterlist.append([ u"University of Tennessee, Knoxville", "tusc", "http://www.lib.utk.edu/special/" ]) reposmasterlist.append([ u"University of Texas at Austin", "TxU", "http://www.cah.utexas.edu/" ]) reposmasterlist.append([ u"University of Toronto", "OTUP", "http://onesearch.library.utoronto.ca/special-collections" ]) reposmasterlist.append([ u"University of Utah", "uumlsc", "http://www.lib.utah.edu/collections/special-collections/" ]) reposmasterlist.append([ u"University of Virginia", "ViU", "http://www.lib.virginia.edu/index.html" ]) reposmasterlist.append([ u"University of Wales, Aberystwyth", "WlAbUW", "https://archives.aber.ac.uk/index.php/" ]) reposmasterlist.append([ u"University of Washington", "WaU-AR", "http://www.lib.washington.edu/specialcollections/" ]) reposmasterlist.append( [u"University of Wyoming", "WyU-AH", "http://ahc.uwyo.edu/"]) reposmasterlist.append([ u"Vanderbilt University", "TNJ", "http://www.library.vanderbilt.edu/speccol/" ]) reposmasterlist.append( [u"Vassar College", "NPV", "http://specialcollections.vassar.edu/"]) reposmasterlist.append([ u"Virginia Polytechnic Institute and State University", "viblbv", "http://spec.lib.vt.edu/" ]) reposmasterlist.append([ u"Washington University", "moslwua", "http://library.wustl.edu/units/spec/" ]) reposmasterlist.append([ u"Wellcome Institute for the History of Medicine", "UkLW", "http://library.wellcome.ac.uk/" ]) reposmasterlist.append([ u"Wellesley College", "MWelC", "http://www.wellesley.edu/lts/collections/archives" ]) # reposmasterlist.append([u"Western Historical Manuscript Collection", "MoCoJ", "http://shs.umsystem.edu/manuscripts/"]) reposmasterlist.append([ u"Woods Hole Oceanographic Institution", "MWhB", "http://dla.whoi.edu/dla/" ]) reposmasterlist.append([ u"Worcester Polytechnic Institute", "MWP", "http://www.wpi.edu/academics/library/" ]) reposmasterlist.append( [u"Yale University Library", "CtY-BR", "http://www.library.yale.edu/"]) newreposcounter = 0 newrepos = "" newreposlist = [] findingaidcounter = 0 reposcounter = 0 for record in reader: if record['903']: # Get only records where 903a="PHFAWS" phfawsfull = record.get_fields('903') for field in phfawsfull: phfawsnote = field['a'] if 'PHFAWS' in phfawsnote: if record[ '852'] is not None: # Get only records where 852/repository is not blank repository = record.get_fields('852') for field in repository: reposname = field['a'] reposname = marc8_to_unicode(reposname) reposname = reposname.rstrip('.,') reposcode = None reposurl = None for row in reposmasterlist: # Match field 852 against the repository list. if row[0] == reposname: # If it's in the list, use the list to populate our repository-related fields reposcode = row[1] reposurl = row[2] author = marc8_to_unicode(record.author()) author = author.rstrip('.,') title = marc8_to_unicode(record.title()) date = parse_date(record) title = '%s %s' % (title, date) title = title.rstrip('.,') if record[ '856'] is not None: # Get only records where 856 is not blank links = record.get_fields('856') for field in links: human_url = None titlenote = None human_code = None linksthree = field['3'] if linksthree is not None and "online finding aid" in linksthree: # if linksthree == '(online finding aid)': # Use only 856 entries for finding aids. A record may have multiple 856es. #Can this be less strict? (if in...) if reposcode == None: # If this record's repository wasn't inthe repository list, we need to create output for the list of new repositories newreposcounter += 1 newrepos = '%s %s \n' % (newrepos, reposname) reposcode = "NEWCODE" + str( newreposcounter) reposurl = "TEST" reposmasterlist.append( [reposname, reposcode, reposurl]) newreposlist.append( [reposname, reposcode, reposurl]) human_url = field['u'] titlenote = field['z'] if titlenote is not None: fulltitle = '%s, %s' % (title, titlenote) else: fulltitle = title filingtitlejoin = '%s %s' % (author, fulltitle) linkdata = '<tr><td width="35%%">%s</td><td width="65%%">%s</td></tr>' % ( make_link(human_url, author), make_link(human_url, fulltitle) ) # create link rows for the browse page browselinks.append( [ alafiling(filingtitlejoin), linkdata, reposname ] ) # add links to the browse page list along with the sorting metadata findingaidcounter += 1 human_code = field['w'] if record[ '857'] is not None: # If there's a separate URL for indexing, we need to handle that too. crawl_code = None crawllinks = record.get_fields('857') for crawlfield in crawllinks: crawl_code = crawlfield['w'] if human_code == crawl_code: # Since each record may have multiple 856 entries, we need to make sure they're linked with the 857 entries. We do that by matching 856w to 857w crawl_url = crawlfield['u'] shortstartnum = crawl_url.rfind( "/") + 1 if crawl_url[ shortstartnum:] == -1: shorttitle = crawl_url[ shortstartnum:] else: shortendnum = crawl_url.rfind( ".") shorttitle = crawl_url[ shortstartnum: shortendnum] # xmlURLs.append([fulltitle.replace("&", "&"), reposcode, reposurl.replace("&", "&"), reposname.replace("&", "&"), crawl_url.replace("&", "&"), human_url.replace("&", "&"), shorttitle]) nutchURLs.append([ reposname, alafiling(filingtitlejoin), crawl_url, human_url, fulltitle, author, reposurl ]) else: crawl_url = human_url shortstartnum = crawl_url.rfind( "/") + 1 if crawl_url[shortstartnum:].find( ".") == -1: shorttitle = crawl_url[ shortstartnum:] else: shortendnum = crawl_url.rfind(".") shorttitle = crawl_url[ shortstartnum:shortendnum] # xmlURLs.append([fulltitle.replace("&", "&"), reposcode, reposurl.replace("&", "&"), reposname.replace("&", "&"), crawl_url.replace("&", "&"), human_url.replace("&", "&"), shorttitle]) nutchURLs.append([ reposname, alafiling(filingtitlejoin), crawl_url, human_url, fulltitle, author, reposurl ]) else: pass else: pass else: pass else: pass else: pass # Output lists needed by crawlers xmlURLs.sort(key=lambda name: name[0]) xmlURLs.sort(key=lambda repo: repo[3]) nutchURLs.sort(key=lambda name: name[1]) nutchURLs.sort(key=lambda repo: repo[0]) # make_xml(xmlURLs, 'ead_urls.xml') # Output XML for Verity make_nutch(nutchURLs, 'nutch') # Output list for nutch # make_ead_urls(nutchURLs, 'ead_urls.sh') # Output URLs list for Verity # make_titles_bif(nutchURLs, 'titles.bif') # Output titles list for Verity # Output browse.html for row in reposmasterlist: browserepos.append([row[0], row[1], row[2], alafiling(row[0])]) reposcounter += 1 browselinks.sort(key=lambda name: name[0]) make_browse_page(browserepos, browselinks, 'browse.html') # Output list of new repositories newreposlist.sort(key=lambda rep: rep[0]) if newreposcounter != 0: status = '%d new repositories found. you must add information on these repositories, then run phfaws.py again. Please see the newly updated rewrepos.txt for details.' % ( newreposcounter) sys.stderr.write(status) make_newrepos_list(newreposlist, 'newrepos.txt') # Output file of counts make_counts(findingaidcounter, reposcounter, 'phfawscounts.txt')
def main(marcfile, newsletterissue, detailed): faissue = 'fa%s' % (newsletterissue) recordcounter = 0 docpreslist = [] try: detailed = sys.argv[3] except IndexError: detailed = '' reader = MARCReader(file(marcfile)) for record in reader: """ the try/except is a default handler so if exceptions come up the whole thing won't crash. better a slightly incomplete docpres list than no docpres list at all, right? use 'print get_bibno(record)' at different points to help debug. """ try: # test to see if 901 tag is there if record['901'] is not None: if newsletterissue in record['901'].format_field(): """ this is not ideal, but we need to handle missing fields or it'll skip entire records. perhaps add this as function to aipmarc.py? EDIT - 3/7/08 - gsf working on adding fix to pymarc for this. """ if record['904'] is not None: country = record['904']['a'] else: country = 'RECORD MISSING 904 TAG' if record['852'] is not None: repos = record['852'].format_field() else: repos = 'RECORD MISSING 852 TAG' if record.author() is not None: creator = record.author() else: creator = 'RECORD MISSING AUTHOR' title = subfield_list(record['245'], 'akhbcnps') title = '%s.' % strip_isbd(title) #print marc8_to_unicode(title) date = parse_date(record) extent = get_all_tag(record, '300') restrictions = get_all_tag(record, '506') scopecontent = get_all_tag(record, '520') bioghist = get_all_tag(record, '545') item = { 'issue': record['901'].format_field(), 'country': country, 'repos': marc8_to_unicode(repos), 'creator': marc8_to_unicode(creator), 'title': marc8_to_unicode(title), 'date': date, 'extent': extent, 'restrictions': marc8_to_unicode(restrictions), 'scopecontent': marc8_to_unicode(scopecontent), 'bioghist': marc8_to_unicode(bioghist) } #print item docpreslist.append(item) recordcounter += 1 except: pass print '%s records matching "%s" found.' % (recordcounter, newsletterissue) docpreslist.sort( key=lambda a: (a['country'], a['repos'], a['creator'], a['title'])) reposlist = [ list(repos) for key, repos in groupby(docpreslist, itemgetter('repos')) ] newcoll = [] newfa = [] newcollsout = open('901a.html', 'w') newfasout = open('901b.html', 'w') for repos in reposlist: if faissue in repos[0]['issue']: build_list(repos, newfa, detailed) else: build_list(repos, newcoll, detailed) newcolls = ''.join(newcoll) newfas = ''.join(newfa) newcollsout.write( dppage.substitute(collections=newcolls, subtitle='New Collections')) newfasout.write( dppage.substitute(collections=newfas, subtitle='New Finding Aids')) newcollsout.close() newfasout.close()
repos_name = repos_address = repos_country = gc_address = normalized_address = address_source = u'' repos = {} repos_detail = {} repos_coll = {} if record['852'] is not None: repos_name = utf8_join(record['852'].get_subfields('a', 'b')) repos_address = utf8_join(record['852'].get_subfields('e')) else: (repos_name, repos_address) = (default_name, default_address) repos = { 'label': repos_name, 'type': 'repository' } auth = record.author() if auth == None: auth = '' repos_detail['id'] = repos_name repos_coll['id'] = repos_name repos_coll['repository'] = repos_name repos_coll['auth'] = pymarc.marc8_to_unicode(auth) repos_coll['label'] = pymarc.marc8_to_unicode(record['245'].format_field()) try: repos_country = pymarc.marc8_to_unicode(record['904']['a']).strip('.:,;/ ') except: pass if repos not in repos_list: repos_list.extend([repos]) repos_detail['country'] = repos_country repos_detail['address'] = repos_address for engine in engines: normalized_address = normalize_address(repos_address) try: canonical_address, (lat, lng) = engine.geocode(normalized_address) address_source = engine.__class__.__name__ repos_detail['normalized_address'] = normalized_address