def read(inputfile):
    outputfile = inputfile + "_no338_" + \
        datetime.datetime.now().isoformat() + ".mrc"
    has338Count = 0
    no338Count = 0
    totalCount = 0
    supplements = 0
    with open(inputfile, 'rb') as f:
        reader = MARCReader(f)
        writer = MARCWriter(open(outputfile, 'wb'))
        while True:
            try:
                record = next(reader)
                totalCount += 1
                if not testFor336To338(record):
                    print(record)
                    no338Count += 1
                    try:
                        writer.write(record)
                    except Exception as e:
                        print("Error with writing.")
                else:
                    has338Count += 1
                if (isSupplement(record)):
                    supplements += 1
            except UnicodeDecodeError:
                print("There was a Unicode error.")
            except StopIteration:
                print("End of file.")
                break
        writer.close()
    print(
        "{0} / {1} ({2} %) records have no 338 field.".format(no338Count,
                                                              totalCount, countPercentage(no338Count, totalCount)))
    print("The file contained {0} supplement records.".format(supplements))
def campus_split():
    '''
    Finds the master format files created by fmt_split(). then writes the records in each format file to
    separate files for holding campuses based on coding in MARC 049 subfield a. Outputs one file per campus per format.
    '''
    campuses = ['MNGE', 'MNXN']

    for campus in campuses:

        files = [
            f for f in os.listdir()
            if re.match(r'.+(bks|ser|maps|vis|other)\.mrc', f)
        ]

        for file in files:

            with open(file, 'rb') as f:

                filename = str(file)
                fpref, fsuf = filename.split('.')
                writer = MARCWriter(open(fpref + '_' + campus + '.mrc', 'wb'))
                reader = MARCReader(f)

                for rec in reader:
                    fields049 = rec.get_fields("049")
                    for field in fields049:
                        suba049 = field.get_subfields("a")
                        for suba in suba049:
                            if campus in suba:
                                writer.write(rec)
                            else:
                                continue

                writer.close()
Пример #3
0
def save2marc(outfile, bib):
    try:
        writer = MARCWriter(open(outfile, 'ab'))
        writer.write(bib)
    except WindowsError:
        raise WindowsError
    finally:
        writer.close()
Пример #4
0
def write_marc21(outfile, bib):
    try:
        writer = MARCWriter(open(outfile, "a"))
        writer.write(bib)
    except WindowsError:
        raise WindowsError
    finally:
        writer.close()
Пример #5
0
    def test_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        writer = MARCWriter(open('test/foo', 'w'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo'))
        record = reader.next()
        self.assertEqual(record['245']['a'], unichr(0x1234))
Пример #6
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        record.leader = '         a              '
        writer = MARCWriter(open('test/foo', 'w'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo'), to_unicode=True)
        record = reader.next()
        self.assertEqual(record['245']['a'], unichr(0x1234))

        os.remove('test/foo')
Пример #7
0
 def MakeMARCFile(self, recs, filename):
     filenameNoExt = re.sub('.\w*$', '', filename)
     mrcFileName = filenameNoExt + '_OUT.mrc'
     print('\n<Compiling file to MARC>\n')
     writer = MARCWriter(open(mrcFileName, "wb"))
     for r in recs:
         try:
             writer.write(r.as_marc())
         except:
             r.force_utf8 = True
             writer.write(r)
     writer.close()
     return recs
Пример #8
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        record.leader = '         a              '
        writer = MARCWriter(open('test/foo', 'wb'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo', 'rb'), to_unicode=True)
        record = next(reader)
        self.assertEqual(record['245']['a'], unichr(0x1234))
        reader.close()

        os.remove('test/foo')
Пример #9
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ["1", "0"], ["a", chr(0x1234)]))
        record.leader = "         a              "
        writer = MARCWriter(open("test/foo", "wb"))
        writer.write(record)
        writer.close()

        reader = MARCReader(open("test/foo", "rb"), to_unicode=True)
        record = next(reader)
        self.assertEqual(record["245"]["a"], chr(0x1234))
        reader.close()

        os.remove("test/foo")
Пример #10
0
def save2marc(outfile: str, record: Record) -> None:
    """
    Appends MARC records to outfile

    Args:
        outfile:                    file path
        record:                     MARC record as pymarc object
    """
    try:
        writer = MARCWriter(open(outfile, "ab"))
        writer.write(record)
    except:
        raise
    finally:
        writer.close()
Пример #11
0
class file_writer:
    """Stara sa o zapisovanie ohlasov do suboru. 
    """
    def __init__(self, name, encoding="utf-8", path=""):
        """Pri inicializacii sa pripravi subor na zapisovanie.
        Arguments:
            name {str} -- nazov suboru
        
        Keyword Arguments:
            encoding {str} -- kodovanie suboru (default: {"utf-8"})
            path {str} -- cesta kde bude subor ulozeny (default: {""})
        """
        self.CONST_FIELD_008 = "|2018    ne || ||||   ||   ||eng |"
        self.CONST_FIELD_LEADER = "nab a22001211a 4500"
        self.CONST_INDICATOR_1 = ' '
        self.CONST_INDICATOR_2 = ' '
        self.writer = MARCWriter(open(path + name, 'wb'))

    def write_record(self, references, field035="", field008=""):
        """Zapise do suboru jeden record vo forme iso2709        
        Arguments:
            field035 -- retazec obsahujuci data do pola 035
            field008 -- retazec obsahujuci data do pola 008
            references {set(reference)} -- set ohlasov na zapisanie
            do pola 591 
        """

        if (field008 == ""):
            field008 = self.CONST_FIELD_008
        record = Record(force_utf8=True)
        record.add_field(Field(tag='008', data=field008))
        record.add_field(
            Field(tag='035',
                  indicators=[self.CONST_INDICATOR_1, self.CONST_INDICATOR_2],
                  subfields=['a', field035]))
        for i in references:
            record.add_field(i.to_marc_field())
        record.leader = record.leader[:5] + 'n' + record.leader[6:]
        record.leader = record.leader[:7] + 'b' + record.leader[8:]
        record.leader = record.leader[:18] + 'a' + record.leader[19:]
        self.writer.write(record)

    def close(self):
        """Ukonci zapis a zavrie subor.
        """
        self.writer.close()
Пример #12
0
 def test_edit_mixed_code(self):
     reader = MARCReader(
         file('test/mixed-code.dat'), 
         to_unicode=True, 
         force_utf8=True, 
         utf8_handling='ignore'
     )
     writer = MARCWriter(open('test/foo', 'w'))
     for record in reader:
         field = Field(
             tag = '941',
             indicators = ['',''],
             subfields = [ 'a', 'x' ]
         )
         record.add_field(field)
         writer.write(record)
     writer.close()
     reader = MARCReader(open('test/foo'), to_unicode=True)
     for record in reader:
         self.assertEquals(type(record), Record)
     os.remove('test/foo')
    # MARC field 700
    if creators:
        for creator in creators:
            marc_record.add_field(
                Field(
                    tag = '700',
                    indicators = ['1', ''],
                    subfields = [
                        'a', f'{creator}',
                        't', '',
                    ]))
            
    # MARC field 856
    if identifiers:
        for identifier in identifiers:
            marc_record.add_field(
                Field(
                    tag = '856',
                    indicators = ['4', '2'],
                    subfields = [
                        '3', 'Finding aid',
                        'u', f'{identifier}',
                    ]))
    
    # write to MARC output file
    writer = MARCWriter(open(save_file,'ab'))
    writer.write(marc_record)
    writer.close()

# open up MARC record in default viewer (NOTEPAD most likely)    
os.system(save_file)
Пример #14
0
            #If there are no $a and no $s
            elif len(subfieldA) == 0 and len(subfieldS) == 0:
                myRecord = [
                    SysNum, OclcNum, title, subfieldA, subfieldS,
                    "Unchanged Record no coverage info"
                ]
                onlyFileWriter.writerow(myRecord)
                unchangedRecordsWriter.write(record)
            else:
                myRecord = [
                    SysNum, OclcNum, title, subfieldA, subfieldS,
                    "Other unchanged record"
                ]
                onlyFileWriter.writerow(myRecord)
                unchangedRecordsWriter.write(record)
        else:
            deletedRecordsWriter.write(record)
        items = record.get_fields("930")
        finalItemCount = finalItemCount + len(items)

    print("Number of records for " + filename + ": " +
          str(len(sysNumCountArray)))
    print("Number of original items for " + filename + ": " +
          str(originalItemCount))
    print("Final number of items for " + filename + ": " + str(finalItemCount))
print("Number of deleted rows: " + str(rowsDeleted))
print("Number of added rows: " + str(rowsAdded))
unchangedRecordsWriter.close()
changedRecordsWriter.close()
Пример #15
0
                for domain in weird_domains:
                    if domain in u:
                        rec.remove_field(field)
                        num_fields_removed += 1

                for domain in subscription_domains:
                    if domain in u:
                        print rec.title()
                        prefix(field)
                        num_proxied_ebooks += 1

            if len(field.get_subfields('u')) == 0:
                # 856 is useless without $u, delete the field
                rec.remove_field(field)
                num_fields_removed += 1

            for z in field.get_subfields('z'):
                deleteZ(field)

        processed.write(rec)
        i += 1

processed.close()

# stats
print "\n"
print "Total Records Processed:", num_total_books
print "Ebooks Proxied:", num_proxied_ebooks
print "856s Deleted:", num_fields_removed
print "\n"
Пример #16
0
"""
Base script for DLF Forum 2014 Listening-Based Python workshop.

Modified from files at https://github.com/LibraryCodeYearIG/MARC-record-edit .
"""

import os
from pymarc import Field, MARCReader, MARCWriter, record_to_xml

records = MARCReader(open('../../exampledump.mrc'),
                     to_unicode=True,
                     force_utf8=True,
                     utf8_handling='ignore')

index = 1

for marc in records:
    filename_dat = 'examplerecord_%s.dat' % index
    filename_xml = 'examplerecord_%s.xml' % index

    writer_dat = MARCWriter(file(filename_dat,'a'))
    writer_xml = open(filename_xml,'a')

    writer_dat.write(marc)
    writer_xml.write(record_to_xml(marc) + "\n")

    writer_dat.close()
    writer_xml.close()

    index += 1
def fmt_split(MARCfile):
    '''
    Parses a MARC binary file based on LDR/06-07 values into separate files for books, serials, maps, visual materials, and
    other formats. Output is one .mrc file for each format.
    '''

    fname_str = str(MARCfile)
    fpref, fsuf = fname_str.split('.')
    today = str(date.today())

    with open(MARCfile, 'rb') as f:

        reader = MARCReader(f)

        #opens a file for each format
        writer_bks = MARCWriter(open(fpref + '_bks.mrc', 'wb'))
        writer_ser = MARCWriter(open(fpref + '_ser.mrc', 'wb'))
        writer_maps = MARCWriter(open(fpref + '_maps.mrc', 'wb'))
        writer_vis = MARCWriter(open(fpref + '_vis.mrc', 'wb'))
        writer_other = MARCWriter(open(fpref + '_other.mrc', 'wb'))

        for rec in reader:

            field_909 = pymarc.Field(tag='909',
                                     indicators=[' ', ' '],
                                     subfields=[
                                         'a', 'bcat', 'b', 'MNU', 'c', today,
                                         'd', 'marcive'
                                     ])

            rec.add_ordered_field(field_909)

            ldr = rec.leader

            #regexes for string matching to determine format
            bks_re = re.compile('^.{6}am.*')
            ser_re = re.compile('^.{6}a[s|i].*')
            maps_re = re.compile('^.{6}e.*')
            vis_re = re.compile('^.{6}k.*')

            #determines format based on regex match of LDR/06-07 values
            bks = bks_re.match(ldr)
            ser = ser_re.match(ldr)
            maps = maps_re.match(ldr)
            vis = vis_re.match(ldr)

            #writes record to correct file based on regex matches
            if bks:
                writer_bks.write(rec)
            elif ser:
                writer_ser.write(rec)
            elif maps:
                writer_maps.write(rec)
            elif vis:
                writer_vis.write(rec)
            else:
                writer_other.write(rec)

    #closes master format files
    writer_bks.close()
    writer_ser.close()
    writer_maps.close()
    writer_vis.close()
    writer_other.close()
Пример #18
0
#!/usr/bin/env python
"""
write all the records with 856 fields out to an ebooks-only MARC file
"""
from pymarc import MARCReader, MARCWriter

"""
the MARCReader params come from the penultimate comment here:
github.com/edsu/pymarc/issues/7
basically, these work around mixed character encodings
"""
allRecords = MARCReader( open( 'ebooks.MRC' ), to_unicode=True, force_utf8=True, utf8_handling='ignore' )
onlyEbooks = MARCWriter( file( 'ebooks-edited.MRC', 'w' ) )

errCount = 0

for rec in allRecords:
    if rec[ '856' ] is not None:
        try:
            onlyEbooks.write( rec )
        except UnicodeDecodeError:
            print rec[ '245' ]
            errCount += 1

print "\nNumber of Errors: ", errCount

onlyEbooks.close()
                                            "2", subfield2, "h", subfieldh,
                                            "i", subfieldi, "5", subfield5,
                                            "8", subfield8, "f", subfieldf,
                                            "F", subfieldF, "w", subfieldw
                                        ])

                        record.add_field(myfield)
                        myWriterFile.writerow([
                            SysNum, subfieldm, subfield1, subfield2, subfield5,
                            "item added"
                        ])

                        if subfieldm == "ISSUE":
                            SerialsRecordsWriter.write(record)

                        elif subfieldm == "BOOK":
                            BooksRecordsWriter.write(record)

        else:
            mydeletesFile.writerow([SysNum, "a deleted item match"])
    else:
        SystemNum = str(record["001"])
        SysNum = cleanSysNumber(SystemNum)
        myErrorFile.writerow([SysNum, "no 852 or a 930"])

for keys in multiple930Needed:
    myMultipleFile.writerow([keys, multiple930Needed[keys]])

SerialsRecordsWriter.close()
BooksRecordsWriter.close()
	fix_245_misspelling(title_a_raw,word,'a',title_245)
	fix_245_misspelling(title_b_raw,word,'b',title_245)
	
	#get the bib record from the 907 field prior to deletion
	n = marc.get_fields('907')
	for field in n:
		bib_rec_num_raw = field.get_subfields('a')

	bib_rec_num = subfield_to_string(bib_rec_num_raw)

	#add 949 local field for overlay of bib record and creation of order record when record is uploaded into Millennium
	marc.add_field(
		Field(
			tag = '949',
			indicators = [' ',' '],
			subfields = [
				'a', '*recs-b;ov-%s;' %(bib_rec_num)
			]))	

	#delete 907, 998, 910, 945 fields
	for f in marc.get_fields('907', '998', '910', '945'):
		  if f['a'] != '':
			  marc.remove_field(f) 
	
	#append record to a generic file.dat file
	writer = MARCWriter(file(word+'.dat','a'))
	writer.write(marc)

#closes .dat file
writer.close() 
Пример #21
0
def fchange_sort(MARCfile, fname):
    
    '''
    Parses a MARC binary file based on 960 values into separate files for books, serials, maps, visual materials, and
    other formats. Output is one .mrc file for each format.
    '''
    #open a path to put the files for the FTP server - both OCN and BIB updates
    sorted_files_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/sorted_for_FTP " + today
    if not os.path.isdir(sorted_files_path):
        os.mkdir(sorted_files_path)
    
    #make a place to put the files with OCN updates for manual checking
    ocn_updates_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/OCN_updates_" + today
    if not os.path.isdir(ocn_updates_path):
        os.mkdir(ocn_updates_path)
        
    #make a place to put the files with URL updates for manual checking
    url_updates_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/URL_updates_" + today
    if not os.path.isdir(url_updates_path):
        os.mkdir(url_updates_path)
        

    fname_str = str(fname)
    print(fname)
    fname_str = fname_str.replace(".","")
    fname_str = fname_str.replace("mrc",".mrc")
    print(fname_str)
    fpref, fsuf = fname_str.split('.')
    print(fpref)
    print(fsuf)
    
    print(MARCfile)

    with open(MARCfile,'rb') as f:

        reader = MARCReader(f)
        
        # first, see if there are OCN or URL changes in the set; this will determine whether creating a file is necessary
        OCN_change_ct = 0
        URL_change_ct = 0
        writer_new = False
        writer_URLs = False
        
        for rec in reader:
            if rec['960']:
                field_960 = str(rec['960']['a'])
                if 'OCLC control number change' in field_960:
                    OCN_change_ct += 1
                if 'KB URL change' in field_960:
                    URL_change_ct += 1
        print("OCN_change_ct " ,OCN_change_ct)
        print("URL_change_ct ",OCN_change_ct)
        #if there are OCN updates or KB URL changes, create files to put those records in
    if OCN_change_ct > 0:
        writer_new_oclc_num_manual = MARCWriter(open(ocn_updates_path + "/" + fpref + '_new_oclc_num.mrc', 'wb'))
        writer_new = True
        print(writer_new)
    if URL_change_ct > 0:
        writer_update_URLs = MARCWriter(open(url_updates_path + "/" + fpref + '_update_URLs.mrc', 'wb'))
        writer_URLs = True
        print(writer_URLs)
        
    #create a file for all updates
    writer_update_bibs = MARCWriter(open(sorted_files_path + "/" + fpref + '_update_bibs.mrc', 'wb'))
    v = 0
    with open(MARCfile, 'rb') as f:
        reader = MARCReader(f)
        for rec in reader:
            v += 1
            print(v)
            if rec['960']:
                field_960 = str(rec['960']['a'])
                print(field_960)
                #writes record to correct file based on regex matches
                #these are ordered such that if a 960 field has more than one reason for the update, that the most critical to handle 
                #will be addressed first.  These are, in order: OCN change (affects matching), URL change, bib update.
                #Update: OCN changes can be processed alongside Bib updates.  URLs will need to be handled manually due to multi-vols?
                if 'OCLC control number change' in field_960:
                    writer_update_bibs.write(rec)
                    writer_new_oclc_num_manual.write(rec)
                    if 'KB URL change' in field_960:
                        writer_update_URLs.write(rec)
                elif 'KB URL change' in field_960:
                    writer_update_URLs.write(rec)
                    writer_update_bibs.write(rec)
                elif 'Subsequent record output' in field_960:
                    writer_update_bibs.write(rec)
                elif 'Master record variable field' in field_960:
                    writer_update_bibs.write(rec)
                else:
                    writer_update_bibs.write(rec)

    #closes master format files    
    writer_update_bibs.close()
    if writer_URLs == True:
        writer_update_URLs.close()
    if writer_new == True:
        writer_new_oclc_num_manual.close()
Пример #22
0
    filename_xml = 'examplerecord_%s.xml' % i 
    filename_out = 'examplerecord_%s.out' % i 

    records = MARCReader(open(filename),
                         to_unicode=True,
                         force_utf8=True,
                         utf8_handling='ignore')

    writer_dat = MARCWriter(file(filename_out,'a'))
    writer_xml = open(filename_xml,'a')

    for marc in records:

        isbn_list = marc.get_fields('020')
        try:
            isbn_field = isbn_list[0]
        except Exception, e:
            j = i - 10
            marc.add_ordered_field(
                Field(
                    tag='020',
                    indicators=[' ', ' '],
                    subfields = ['a', isbns[j]]
                    ))

        writer_dat.write(marc)
        writer_xml.write(record_to_xml(marc) + "\n")

    writer_dat.close()
    writer_xml.close()