def transpose_to_marc21(record):
    Mrecord=Record(force_utf8=True)
    Mrecord.leader=record["_LEADER"]
    for field in record:
        if isint(field):
            if int(field)<10:
                if isinstance(record[field],list):
                    for elem in record[field]:
                        Mrecord.add_field(Field(tag=field,data=elem))
                elif isinstance(record[field],str):
                    Mrecord.add_field(Field(tag=field,data=record[field]))
            else:
                for subfield in record[field]:
                    for ind, values in subfield.items():
                        indicators=[]
                        subfields=[]
                        for elem in values:
                            for k,v in elem.items():
                                if isinstance(v,str):
                                    subfields.append(k)
                                    subfields.append(v)
                                elif isinstance(v,list):
                                    for subfield_elem in v:
                                        subfields.append(k)
                                        subfields.append(subfield_elem)
                        for elem in ind:
                            indicators.append(elem)
                        Mrecord.add_field(Field(tag=str(field),
                                                indicators=indicators,
                                                subfields=subfields))
    return Mrecord.as_marc()
Exemplo n.º 2
0
    def create_record(cls,
                      work,
                      annotator,
                      force_create=False,
                      integration=None):
        """Build a complete MARC record for a given work."""
        if callable(annotator):
            annotator = annotator()

        if isinstance(work, BaseMaterializedWork):
            pool = work.license_pool
        else:
            pool = work.active_license_pool()
        if not pool:
            return None

        edition = pool.presentation_edition
        identifier = pool.identifier

        _db = Session.object_session(work)

        record = None
        existing_record = getattr(work, annotator.marc_cache_field)
        if existing_record and not force_create:
            record = Record(data=existing_record.encode('utf-8'),
                            force_utf8=True)

        if not record:
            record = Record(leader=annotator.leader(work), force_utf8=True)
            annotator.add_control_fields(record, identifier, pool, edition)
            annotator.add_isbn(record, identifier)

            # TODO: The 240 and 130 fields are for translated works, so they can be grouped even
            # though they have different titles. We do not group editions of the same work in
            # different languages, so we can't use those yet.

            annotator.add_title(record, edition)
            annotator.add_contributors(record, edition)
            annotator.add_publisher(record, edition)
            annotator.add_physical_description(record, edition)
            annotator.add_audience(record, work)
            annotator.add_series(record, edition)
            annotator.add_system_details(record)
            annotator.add_ebooks_subject(record)

            data = record.as_marc()
            if isinstance(work, BaseMaterializedWork):
                setattr(pool.work, annotator.marc_cache_field, data)
            else:
                setattr(work, annotator.marc_cache_field, data)

        # Add additional fields that should not be cached.
        annotator.annotate_work_record(work, pool, edition, identifier, record,
                                       integration)

        return record
Exemplo n.º 3
0
    def create_record(cls, work, annotator, force_create=False, integration=None):
        """Build a complete MARC record for a given work."""
        if callable(annotator):
            annotator = annotator()

        if isinstance(work, BaseMaterializedWork):
            pool = work.license_pool
        else:
            pool = work.active_license_pool()
        if not pool:
            return None

        edition = pool.presentation_edition
        identifier = pool.identifier

        _db = Session.object_session(work)

        record = None
        existing_record = getattr(work, annotator.marc_cache_field)
        if existing_record and not force_create:
            record = Record(data=existing_record.encode('utf-8'), force_utf8=True)

        if not record:
            record = Record(leader=annotator.leader(work), force_utf8=True)
            annotator.add_control_fields(record, identifier, pool, edition)
            annotator.add_isbn(record, identifier)

            # TODO: The 240 and 130 fields are for translated works, so they can be grouped even
            # though they have different titles. We do not group editions of the same work in
            # different languages, so we can't use those yet.

            annotator.add_title(record, edition)
            annotator.add_contributors(record, edition)
            annotator.add_publisher(record, edition)
            annotator.add_physical_description(record, edition)
            annotator.add_audience(record, work)
            annotator.add_series(record, edition)
            annotator.add_system_details(record)
            annotator.add_ebooks_subject(record)

            data = record.as_marc()
            if isinstance(work, BaseMaterializedWork):
                setattr(pool.work, annotator.marc_cache_field, data)
            else:
                setattr(work, annotator.marc_cache_field, data)

        # Add additional fields that should not be cached.
        annotator.annotate_work_record(work, pool, edition, identifier, record, integration)

        return record
Exemplo n.º 4
0
f = open('ostinos.csv')
csv_f = csv.reader(f)

out = open('osti_recs.csv', 'w')
data = csv.writer(out)
data.writerow([
    'Title', 'Author', 'Date', 'Subjects', 'Description', 'OstiID', 'DOI',
    'Report Number', 'DOE Number', 'URL', ''
])

marcOut = open('ostimarc.mrc', 'w')

dc = '{http://purl.org/dc/elements/1.1/}'
dcq = '{http://purl.org/dc/terms/}'

for number in csv_f:
    ostiId = number[0]
    marc = Record()  # Create a new record for each loop.
    tree = etree.parse('http://www.osti.gov/scitech/scitechxml?Identifier=' +
                       ostiId + '.xml')
    for node in tree.iter():
        if node.tag == dc + 'ostiId':
            if node.text == ostiId:
                o = node.getparent()
                osti = o.getchildren()
                getRecs(osti, data)
                getMarc(osti, marc)

    marcOut.write(marc.as_marc())  # Write each new record.
Exemplo n.º 5
0

#-------------------------------------------------

f = open('ostinos.csv')
csv_f = csv.reader(f)

out = open ('osti_recs.csv', 'w')
data = csv.writer(out)
data.writerow(['Title', 'Author', 'Date', 'Subjects', 'Description', 'OstiID', 'DOI', 'Report Number', 'DOE Number', 'URL', ''])

marcOut = open('ostimarc.mrc', 'w')

dc = '{http://purl.org/dc/elements/1.1/}'
dcq = '{http://purl.org/dc/terms/}'


for number in csv_f:
	ostiId = number[0]
	marc = Record() # Create a new record for each loop.
	tree = etree.parse('http://www.osti.gov/scitech/scitechxml?Identifier='+ ostiId+ '.xml')
	for node in tree.iter():
		if node.tag == dc + 'ostiId':
			if node.text == ostiId:
				o = node.getparent()
				osti = o.getchildren()
				getRecs(osti, data)
				getMarc(osti, marc)

	marcOut.write(marc.as_marc()) # Write each new record.
Exemplo n.º 6
0
def json_to_marc(infilename, outfilename):
    print('Processing: ' + infilename)  #progress message
    data = json.load(open(infilename, "r"))
    record = Record(force_utf8=True)  #create MARC record, enforce Unicode

    # add fields 006, 007 and 008 with minimal physical information to every marc file
    record.add_field(Field(tag='006', data="m"))
    record.add_field(Field(tag='007', data="cr"))

    # the iana language code from the json file is taken, checked against the list of language codes,
    # substituted with its iso639-2 equivalent and put in position 21-24 of the field 008 content
    field008val = "            o       0eng d"  # DEFAULT ENG
    try:
        if 'languages' in data and data['languages'][0] is not None:
            field008val = field008val[0:21] + lang_map.get(
                data['languages'][0], "   ") + field008val[24:]
    except IndexError:
        field008val = field008val[0:21] + "   " + field008val[24:]

    record.add_field(Field(tag='008', data=field008val))

    # extract issn, in json 'generic' and/or 'electronic', and put into separate subfields of 022

    if "identifiers" in data and "issn" in data["identifiers"]:
        field_issn = Field(tag='022', indicators=['0', '#'])

        if "generic" in data["identifiers"]["issn"]:
            field_issn.add_subfield('a',
                                    data["identifiers"]["issn"]["generic"][0])

        if "electronic" in data["identifiers"]["issn"]:
            field_issn.add_subfield(
                'l', data["identifiers"]["issn"]["electronic"][0])

        record.add_field(field_issn)

    # title of the series or journal
    if data["is_part_of"] is not None and data["is_part_of"]['title_full']:
        record.add_field(
            Field(tag='245',
                  indicators=['0', '0'],
                  subfields=["a", data["is_part_of"]["title_full"][:9000]]))
    if data["title"]:
        record.add_field(
            Field(tag='246',
                  indicators=['0', '0'],
                  subfields=["a", data["title"][:9000]]))

    if data["year"]:
        record.add_field(
            Field(tag="260",
                  indicators=["#", "#"],
                  subfields=["c", data["year"]]))

    # add field 506 to all records, as not present in all json files
    record.add_field(
        Field(tag='506', indicators=['0', '#'], subfields=["a",
                                                           "Open access"]))

    # some json files contain a very long description; the maximum length of data in a variable field
    #in MARC21 is 9,999 bytes, so here only a certain amount of content is put into the 520 field
    if data["description"]:
        record.add_field(
            Field(tag='520',
                  indicators=['2', '#'],
                  subfields=["a", data["description"][:9000]]))

    # keep together the journal url, host and domain as different subfields of field 856
    # check if either exists, before initializing a new field instance
    if data['url'] or (data['is_part_of'] is not None
                       and data['is_part_of']['url']):
        field = Field(tag='856', indicators=['0', '0'])
        if data['domain']:
            field.add_subfield('a', data['domain'])

        if data['is_part_of'] is not None and data['is_part_of']['url']:
            field.add_subfield('d', data['is_part_of']['url'])

        if data['url']:
            field.add_subfield('u', data['url'])

        record.add_field(field)

        if data["volume"]:
            record.add_field(
                Field(tag='866',
                      indicators=['0', '0'],
                      subfields=["a", data["volume"]]))

        #output marc file with same filename in Output directory
        out = open(outfilename, 'wb')
        out.write(record.as_marc())
        out.close()

        # execute function for creating separate records for subordinate resources
        if data['subordinate_resources'] is not None:
            subordinate_records = create_subordinate_records(
                record, data['subordinate_resources'])

        counter = 0

        # add counter and "-sub" to filenames of subordinate records
        for subordinate_record in subordinate_records:
            out = open(
                outfilename.replace(".marc", "-sub" + str(counter) + ".marc"),
                'wb')
            out.write(subordinate_record.as_marc())
            out.close()
            counter = counter + 1
Exemplo n.º 7
0
notimpl = {}

records = {}

c = conn.cursor()

record = Record(file_encoding="utf-8", force_utf8=True)
record.myextra = {}
last = None
for l in c.execute('SELECT * FROM DBF_database_library_DAMEDB_dbf'):
    if len(l.strip()) < 1:
        if record.title():
            log("**New record writing old one: ", record['001'], '\n',
                '-' * 20)
            with open('marcout.mrc', 'ab') as o:
                o.write(record.as_marc())
            records[record['001']] = record
            record = Record(file_encoding="utf-8", force_utf8=True)
            record.myextra = {}
        pass
    elif l.startswith("###"):
        pass
    elif l.startswith('M10'):
        log("Urls have a non number therefore are extracted first")
        record.add_field(
            Field(tag='856', indicators=['4', '2'], subfields=['u', l[4:]]))
    elif l.startswith('B03 '):
        # TODO Lehrer/Schüler Bibliothek
        pass
    elif l.startswith('651i'):
        # TODO Lehrer/Schüler Bibliothek
Exemplo n.º 8
0
#-------------------------------------------------

f = open('ostinos2.csv')
csv_f = csv.reader(f)

out = open ('osti_recs2.csv', 'w')
data = csv.writer(out)
data.writerow(['Title', 'Author', 'Date', 'Subjects', 'Description', 'OstiID', 'DOI', 'Report Number', 'DOE Number', 'URL', ''])

marcOut = open('ostimarc.mrc', 'a')

dc = '{http://purl.org/dc/elements/1.1/}'
dcq = '{http://purl.org/dc/terms/}'


for number in csv_f:
	ostiId = number[0]
	marc = Record()
	results = requests.get('http://www.osti.gov/scitech/scitechxml?Identifier="' + ostiId + '"')
	tree = etree.fromstring(results.content)
	for node in tree.iter():
		if node.tag == dc + 'identifierReport':
			#if node.text == ostiId:
			o = node.getparent()
			osti = o.getchildren()
			getRecs(osti, data)
			getMarc(osti, marc)

	
	marcOut.write(marc.as_marc())
Exemplo n.º 9
0
def main(arglist):
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help='path to spreadsheet')
    # parser.add_argument('output', help='save directory')
    # parser.add_argument('--production', help='production DOIs', action='store_true')
    args = parser.parse_args(arglist)

    input = Path(args.input)

    # Read spreadsheet
    book_in = xlrd.open_workbook(str(input))
    sheet = book_in.sheet_by_index(0)  # get first sheet
    col_headers = sheet.row_values(0)
    # print(col_headers)
    # print()

    title_col = col_headers.index('Title')
    subj_col = col_headers.index('Subject')
    genre_col = col_headers.index('Genre')
    pages_col = col_headers.index('Pages')
    date_col = col_headers.index('Date')
    pub_place_col = col_headers.index('Pub_Place')
    publisher_col = col_headers.index('Publisher')
    source_col = col_headers.index('Source')
    writer_col = col_headers.index('Writer')
    penciller_col = col_headers.index('Penciller')
    inker_col = col_headers.index('Inker')
    colorist_col = col_headers.index('Colorist')
    letterer_col = col_headers.index('Letterer')
    cover_artist_col = col_headers.index('Cover Artist')
    editor_col = col_headers.index('Editor')
    hist_note_col = col_headers.index('Historical Note')
    note_col = col_headers.index('Note')
    characters_col = col_headers.index('Characters')
    story_arc_col = col_headers.index('Story Arc')
    toc_col = col_headers.index('Table of Contents')
    series_col = col_headers.index('Is Part of Series')

    outmarc = open('records.mrc', 'wb')

    # Boilerplate fields
    field_ldr = '00000nam  2200000Ii 4500'
    field_040 = Field(
        tag='040',
        indicators=[' ', ' '],
        subfields=['a', 'VMC', 'b', 'eng', 'e', 'rda', 'c', 'VMC'])
    field_049 = Field(tag='049',
                      indicators=[' ', ' '],
                      subfields=['a', 'VMCM'])
    field_336_text = Field(
        tag='336',
        indicators=[' ', ' '],
        subfields=['a', 'text', 'b', 'txt', '2', 'rdacontent'])
    field_336_image = Field(
        tag='336',
        indicators=[' ', ' '],
        subfields=['a', 'still image', 'b', 'sti', '2', 'rdacontent'])
    field_337 = Field(tag='337',
                      indicators=[' ', ' '],
                      subfields=['a', 'unmediated', 'b', 'n', '2', 'rdamedia'])
    field_338 = Field(tag='338',
                      indicators=[' ', ' '],
                      subfields=['a', 'volume', 'b', 'nc', '2', 'rdacarrier'])
    field_380 = Field(tag='380',
                      indicators=[' ', ' '],
                      subfields=['a', 'Comic books and graphic novels.'])
    field_506 = Field(
        tag='506',
        indicators=['1', ' '],
        subfields=[
            'a',
            'Collection open to research. Researchers must register and agree to copyright and privacy laws before using this collection. Please contact Research Services staff before visiting the James Madison University Special Collections Library to use this collection.'
        ])
    field_542 = Field(tag='542',
                      indicators=[' ', ' '],
                      subfields=[
                          'a', 'Copyright not evaluated', 'u',
                          'http://rightsstatements.org/vocab/CNE/1.0/'
                      ])
    field_588 = Field(
        tag='588',
        indicators=['0', ' '],
        subfields=[
            'a', 'Description based on indicia and Grand Comics Database.'
        ])
    field_989 = Field(tag='989',
                      indicators=[' ', ' '],
                      subfields=['a', 'PN6728'])

    for row in range(1, sheet.nrows):
        print('Record ' + str(row))

        title = sheet.cell(row, title_col).value
        print(title)
        lower_title = parse_title(lowercase_title(title))
        title = parse_title(sheet.cell(row, title_col).value)
        has_part_title = False
        if len(title) == 3:
            has_part_title = True

        subj = sheet.cell(row, subj_col).value
        subj = [x.strip() for x in subj.split(';')]
        genre = sheet.cell(row, genre_col).value
        genre = [x.strip() for x in genre.split(';')]
        pages = sheet.cell(row, pages_col).value
        date = sheet.cell(row, date_col).value[0:4]
        pub_place = sheet.cell(row, pub_place_col).value
        publisher = sheet.cell(row, publisher_col).value
        source = sheet.cell(row, source_col).value
        # writer = sheet.cell(row, writer_col).value

        writer = []
        if sheet.cell(row, writer_col).value:
            writer = sheet.cell(row, writer_col).value
            writer = [x.strip() for x in writer.split(';')]
        penciller = []
        if sheet.cell(row, penciller_col).value:
            penciller = sheet.cell(row, penciller_col).value
            penciller = [x.strip() for x in penciller.split(';')]
        inker = []
        if sheet.cell(row, inker_col).value:
            inker = sheet.cell(row, inker_col).value
            inker = [x.strip() for x in inker.split(';')]
        colorist = []
        if sheet.cell(row, colorist_col).value:
            colorist = sheet.cell(row, colorist_col).value
            # print(colorist)
            # print('COLORIST FROM SHEET=' + colorist + '=END')
            # print(bool(colorist))
            colorist = [x.strip() for x in colorist.split(';')]
        letterer = []
        if sheet.cell(row, letterer_col).value:
            letterer = sheet.cell(row, letterer_col).value
            letterer = [x.strip() for x in letterer.split(';')]
        cover_artist = []
        if sheet.cell(row, cover_artist_col).value:
            cover_artist = sheet.cell(row, cover_artist_col).value
            cover_artist = [x.strip() for x in cover_artist.split(';')]
        editor = []
        if sheet.cell(row, editor_col).value:
            editor = sheet.cell(row, editor_col).value
            editor = [x.strip() for x in editor.split(';')]
        hist_note = []
        if sheet.cell(row, hist_note_col).value:
            hist_note = sheet.cell(row, hist_note_col).value
        note = []
        if sheet.cell(row, note_col).value:
            note = sheet.cell(row, note_col).value
        characters = []
        if sheet.cell(row, characters_col).value:
            characters = sheet.cell(row, characters_col).value
            characters = [x.strip() for x in characters.split(';')]
        story_arc = []
        if sheet.cell(row, story_arc_col).value:
            story_arc = sheet.cell(row, story_arc_col).value
        toc = []
        if sheet.cell(row, toc_col).value:
            toc = sheet.cell(row, toc_col).value
        series = sheet.cell(row, series_col).value

        # print(cover_artist)
        # print(characters)
        # print(writer)
        # print(subfields_from_string(writer[0]))
        # print(name_direct_order(subfields_from_string(writer[0])[1]))
        # print(title)
        # print(parse_title(title))

        record = Record()

        # Add boilerplate fields
        record.leader = field_ldr
        record.add_ordered_field(field_040)
        record.add_ordered_field(field_049)
        record.add_ordered_field(field_336_text)
        record.add_ordered_field(field_336_image)
        record.add_ordered_field(field_337)
        record.add_ordered_field(field_338)
        record.add_ordered_field(field_380)
        record.add_ordered_field(field_506)
        record.add_ordered_field(field_542)
        record.add_ordered_field(field_588)
        record.add_ordered_field(field_989)

        # Add other fields
        today = datetime.today().strftime('%y%m%d')
        data_008 = today + 't' + date + date + 'xx a     6    000 1 eng d'
        field_008 = Field(tag='008', data=data_008)
        record.add_ordered_field(field_008)

        subfields_099 = []
        if has_part_title:
            subfields_099 = ['a', title[0] + ': ' + title[1], 'a', title[2]]
        else:
            subfields_099 = ['a', title[0], 'a', title[1]]
        field_099 = Field(tag='099',
                          indicators=[' ', '9'],
                          subfields=subfields_099)
        record.add_ordered_field(field_099)

        if writer:
            # Add 100 for first writer
            subfield_content = subfields_from_string_relator(
                writer[0], 'writer')
            field_100 = Field(tag='100',
                              indicators=['1', ' '],
                              subfields=subfield_content)
            record.add_ordered_field(field_100)
            # Multiple writers
            if len(writer) > 1:
                # Add 700s for all writers after the first
                for i in writer[1:]:
                    subfield_content = subfields_from_string_relator(
                        i, 'writer')
                    field_700 = Field(tag='700',
                                      indicators=['1', ' '],
                                      subfields=subfield_content)
                    record.add_ordered_field(field_700)

        if writer:
            f245_ind1 = 1
        else:
            f245_ind1 = 0

        f245_ind2 = 0
        if str.startswith(title[0], 'The '):
            f245_ind2 = 4
        elif str.startswith(title[0], 'An '):
            f245_ind2 = 3
        elif str.startswith(title[0], 'A '):
            f245_ind2 = 2

        subfields_245 = []
        if has_part_title:
            subfields_245 = [
                'a', title[0] + '.', 'p', title[1] + ',', 'n', title[2]
            ]
        else:
            subfields_245 = ['a', title[0] + ',', 'n', title[1]]
        # If writer exists, add $c
        if writer:
            subfields_245[-1] = subfields_245[-1] + ' /'
            subfields_245.append('c')
            subfields_245.append(
                name_direct_order(subfields_from_string(writer[0])[1]) +
                ', writer.')
        else:
            # If no writer, add 245 ending punctuation
            subfields_245[-1] = subfields_245[-1] + '.'
        field_245 = Field(tag='245',
                          indicators=[f245_ind1, f245_ind2],
                          subfields=subfields_245)
        record.add_ordered_field(field_245)

        field_264_1 = Field(tag='264',
                            indicators=[' ', '1'],
                            subfields=[
                                'a', pub_place + ' :', 'b', publisher + ',',
                                'c', date + '.'
                            ])
        record.add_ordered_field(field_264_1)

        field_264_4 = Field(tag='264',
                            indicators=[' ', '4'],
                            subfields=['c', '©' + date])
        record.add_ordered_field(field_264_4)

        field_300 = Field(tag='300',
                          indicators=[' ', ' '],
                          subfields=[
                              'a', pages + ' pages :', 'b',
                              'chiefly color illustrations.'
                          ])
        record.add_ordered_field(field_300)

        subfields_490 = []
        if has_part_title:
            subfields_490 = [
                'a', lower_title[0] + '. ' + lower_title[1] + ' ;', 'v',
                lower_title[2]
            ]
        else:
            subfields_490 = ['a', lower_title[0] + ' ;', 'v', lower_title[1]]
        field_490 = Field(tag='490',
                          indicators=['1', ' '],
                          subfields=subfields_490)
        record.add_ordered_field(field_490)

        if hist_note:
            field_500_hist = Field(tag='500',
                                   indicators=[' ', ' '],
                                   subfields=['a', hist_note + '.'])
            record.add_ordered_field(field_500_hist)

        if note:
            field_500_note = Field(tag='500',
                                   indicators=[' ', ' '],
                                   subfields=['a', note + '.'])
            record.add_ordered_field(field_500_note)

        if toc:
            if not toc.endswith('.') and not toc.endswith(
                    '?') and not toc.endswith('!'):
                toc += '.'
            field_505 = Field(tag='505',
                              indicators=['0', ' '],
                              subfields=['a', toc])
            record.add_ordered_field(field_505)

        if story_arc:
            field_520 = Field(tag='520',
                              indicators=[' ', ' '],
                              subfields=[
                                  'a', '"' + story_arc +
                                  '" -- Grand Comics Database.'
                              ])
            record.add_ordered_field(field_520)

        field_561 = Field(tag='561',
                          indicators=[' ', ' '],
                          subfields=['a', source + '.'])
        record.add_ordered_field(field_561)

        for i in subj:
            if not i.endswith('.') and not i.endswith(')'):
                i += '.'
            field_650 = Field(tag='650',
                              indicators=[' ', '0'],
                              subfields=['a', i])
            record.add_ordered_field(field_650)

        for i in genre:
            if not i.endswith('.') and not i.endswith(')'):
                i += '.'
            field_655 = Field(tag='655',
                              indicators=[' ', '7'],
                              subfields=['a', i, '2', 'lcgft'])
            record.add_ordered_field(field_655)

        if characters:
            # print(characters)
            subfield_content = 'Characters: '
            for i in characters[:-1]:
                subfield_content += i + '; '
            subfield_content += characters[-1] + '.'
            field_500 = Field(tag='500',
                              indicators=[' ', ' '],
                              subfields=['a', subfield_content])
            record.add_ordered_field(field_500)

            # Create 600 and 650 for "Fictitious character" entries
            # TODO check for existing 650 and don't add if a duplicate
            if any('Fictitious character' in c for c in characters):
                fic_chars = [
                    c for c in characters if 'Fictitious character' in c
                ]
                for i in fic_chars:
                    fic_char_name = re.sub(
                        r'^(.*?) (\(Fictitious character.*\))$', r'\g<1>', i)
                    fic_char_c = re.sub(
                        r'^(.*?) (\(Fictitious character.*\))$', r'\g<2>', i)
                    field_600 = Field(
                        tag='600',
                        indicators=['0', '0'],
                        subfields=['a', fic_char_name, 'c', fic_char_c])
                    record.add_ordered_field(field_600)

                    field_650 = Field(tag='650',
                                      indicators=[' ', '0'],
                                      subfields=['a', i])
                    record.add_ordered_field(field_650)

        if penciller:
            for i in penciller:
                subfield_content = subfields_from_string_relator(
                    i, 'penciller')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        if inker:
            for i in inker:
                subfield_content = subfields_from_string_relator(i, 'inker')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        if colorist:
            for i in colorist:
                subfield_content = subfields_from_string_relator(i, 'colorist')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        if letterer:
            for i in letterer:
                subfield_content = subfields_from_string_relator(i, 'letterer')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        if cover_artist:
            for i in cover_artist:
                subfield_content = subfields_from_string_relator(
                    i, 'cover artist')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        if editor:
            for i in editor:
                subfield_content = subfields_from_string_relator(i, 'editor')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        # field_700 = Field(tag = '700',
        # indicators = ['7',' '],
        # subfields = [
        # 'a', doi,
        # '2', 'doi'])

        subfields_773 = subfields_from_string(series)
        field_773 = Field(tag='773',
                          indicators=['0', '8'],
                          subfields=subfields_773)
        record.add_ordered_field(field_773)

        subfields_830 = []
        if has_part_title:
            subfields_830 = [
                'a', lower_title[0] + '.', 'p', lower_title[1] + ' ;', 'v',
                lower_title[2] + '.'
            ]
        else:
            subfields_830 = [
                'a', lower_title[0] + ' ;', 'v', lower_title[1] + '.'
            ]
        field_830 = Field(tag='830',
                          indicators=[' ', '0'],
                          subfields=subfields_830)
        record.add_ordered_field(field_830)

        outmarc.write(record.as_marc())
        print()
    outmarc.close()
Exemplo n.º 10
0
out = open('osti_recs.csv', 'w')
data = csv.writer(out)
data.writerow([
    'Title', 'Author', 'Date', 'Subjects', 'Description', 'OstiID', 'DOI',
    'Report Number', 'DOE Number', 'URL', ''
])

marcOut = open('ostimarc.mrc', 'w')
marc = Record()

dc = '{http://purl.org/dc/elements/1.1/}'
dcq = '{http://purl.org/dc/terms/}'

for number in csv_f:
    ostiId = number[0]

    results = requests.get(
        'http://www.osti.gov/scitech/scitechxml?Identifier=' + ostiId)
    tree = etree.fromstring(results.content)
    for node in tree.iter():
        if node.tag == dc + 'ostiId':
            if node.text == ostiId:
                o = node.getparent()
                osti = o.getchildren()
                getRecs(osti)
                getMarc(osti)
                continue

marcOut.write(marc.as_marc())
Exemplo n.º 11
0
def to_marc(obj):
    primary_author = format_author(obj['z_authors'][0],
                                   reverse=True) if obj['z_authors'] else None

    split_title = [part.strip() for part in title_splitter.split(obj['title'])]

    if len(split_title) > 2:
        primary_title = f'{split_title[0]} {split_title[1]}'
        remainder_of_title = f'{" ".join(split_title[2:])} /'
    else:
        primary_title = f'{" ".join(split_title)} /'
        remainder_of_title = ''

    record = Record()
    record.leader.type_of_record = 'a'
    record.leader.coding_scheme = 'a'
    record.bibliographic_level = 'm'
    record.cataloging_form = 'a'
    # record.add_field(Field(
    #     tag = '041',
    #     indicators = ['#', '#'],
    #     subfields = [
    #         'a', 'Eng'
    #     ]))

    if primary_author:
        record.add_field(
            Field(tag='100',
                  indicators=['0', ' '],
                  subfields=['a', primary_author]))

    if remainder_of_title:
        record.add_field(
            Field(tag='245',
                  indicators=['0', '0'],
                  subfields=[
                      'a', primary_title, 'b', remainder_of_title, 'c',
                      format_authors(obj['z_authors'])
                  ]))
    else:
        record.add_field(
            Field(tag='245',
                  indicators=['0', '0'],
                  subfields=[
                      'a', f"{obj['title']} /", 'c',
                      format_authors(obj['z_authors'])
                  ]))

    record.add_field(
        Field(tag='260',
              indicators=[' ', ' '],
              subfields=['b', obj['publisher'], 'c',
                         str(obj['year'])]))

    record.add_field(
        Field(tag='500',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  f"Article from {obj['journal_name']}.",
              ]))

    record.add_field(
        Field(tag='856',
              indicators=['4', '0'],
              subfields=[
                  'u', obj['best_oa_location']['url'], 'y',
                  'View article as PDF'
              ]))

    record.add_field(
        Field(tag='856',
              indicators=[' ', ' '],
              subfields=['u', obj['doi_url'], 'y', 'DOI']))

    # print(str(record))
    return record.as_marc()
Exemplo n.º 12
0
def output_to_marc(output_text, doc_year, doc_num, non_filing, doc_title,
                   doc_iss_year, num_pages, welcomes, this_res, date_field_008,
                   time_field_590, field_710, time_field_856, url_entry):
    '''
    Outputs data in MARC format via PyMARC. Field 999 includes a random number
    generator to get around the problem of the LMS needing unique item ids.
    '''
    record = Record()
    record.add_field(Field(
        tag='000',
        data='im  0c',
    ))
    record.add_field(
        Field(
            tag='008',
            data=f'{date_field_008}n                      000 0 eng u',
        ))
    record.add_field(
        Field(tag='080',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  f'657 {doc_year}{doc_num}',
              ]))
    record.add_field(
        Field(tag='110', indicators=['2', ' '], subfields=[
            'a',
            'COMPANY',
        ]))
    record.add_field(
        Field(tag='245',
              indicators=['1', f'{str(non_filing)}'],
              subfields=[
                  'a',
                  f'{doc_title}',
              ]))
    record.add_field(
        Field(tag='260',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  'London : ',
                  'b',
                  'COMPANY, ',
                  'c',
                  f'{doc_iss_year}',
              ]))
    record.add_field(
        Field(tag='300',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  f'{str(num_pages)} pages',
              ]))
    record.add_field(
        Field(tag='336',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  'text |2rdacontent',
              ]))
    record.add_field(
        Field(tag='337',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  'unmediated |2rdamedia',
              ]))
    record.add_field(
        Field(tag='338',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  'volume |2rdacarrier',
              ]))
    record.add_field(
        Field(tag='490',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  'COMPANY ; ',
                  'v',
                  f'{doc_num}/{doc_year}',
              ]))
    record.add_field(
        Field(tag='500',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  f'"{welcomes}... {this_res}"',
              ]))
    record.add_field(
        Field(tag='590',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  f'{time_field_590}',
              ]))
    record.add_field(
        Field(tag='650', indicators=[' ', ' '], subfields=[
            'a',
            '',
        ]))
    record.add_field(
        Field(tag='650', indicators=[' ', ' '], subfields=[
            'a',
            '',
        ]))
    record.add_field(
        Field(tag='710',
              indicators=['2', ' '],
              subfields=[
                  'a',
                  f'{field_710}',
              ]))
    record.add_field(
        Field(tag='710', indicators=['2', ' '], subfields=[
            'a',
            'COMPANY',
        ]))
    record.add_field(
        Field(tag='830', indicators=[' ', ' '], subfields=[
            'a',
            'COMPANY',
        ]))
    record.add_field(
        Field(tag='856',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  f'In {time_field_856} available at : ',
                  'u',
                  f'{url_entry}',
              ]))
    record.add_field(
        Field(tag='999',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  f'087 {doc_year}{doc_num}',
                  'w',
                  'UDC',
                  'c',
                  '1',
                  'i',
                  f'{random.randint(1000000000,9999999999)}',
                  'l',
                  'STORE',
                  'm',
                  'COMPANY-LIB',
                  'r',
                  'Y',
                  's',
                  'Y',
                  't',
                  'REFERENCE',
              ]))
    record.add_field(
        Field(tag='999',
              indicators=[' ', ' '],
              subfields=[
                  'a',
                  f'657 {doc_year}{doc_num}',
                  'w',
                  'UDC',
                  'c',
                  '1',
                  'i',
                  f'{random.randint(1000000000,9999999999)}',
                  'l',
                  'STORE',
                  'm',
                  'COMPANY-LIB',
                  'r',
                  'Y',
                  's',
                  'Y',
                  't',
                  'LENDING',
              ]))

    # Write MARC file to disk.
    with open(f'.\\marc_files\\marc_{doc_year}-{doc_num}.mrc', 'wb') as out:
        out.write(record.as_marc())
Exemplo n.º 13
0
def main(arglist):
    parser = argparse.ArgumentParser()
    parser.add_argument('setname',
                        help='bepress collection setname (e.g., diss201019)')
    parser.add_argument(
        'input',
        help=
        'path to bepress spreadsheet (containing DOIs) in "Excel 97-2003 Workbook (.xls)" format'
    )
    # parser.add_argument('output', help='save directory')
    # parser.add_argument('--production', help='production DOIs', action='store_true')
    args = parser.parse_args(arglist)

    # Read config file and parse setnames into lists by category
    config = configparser.ConfigParser(allow_no_value=True)
    config.read('local_settings.ini')
    etd_setnames = []
    for i in config.items('ETD'):
        etd_setnames.append(i[0])
    # Add additional categories here

    setname = args.setname
    input = Path(args.input)

    # jsonmerge setup
    schema = {"properties": {"entries": {"mergeStrategy": "append"}}}
    merger = Merger(schema)

    # Timestamp output
    date_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print(date_time)
    print()
    print('------------------------------------------------------------')
    print('------------------------------------------------------------')

    # Read Bepress spreadsheet
    # TODO check that setname matches spreadsheet?
    # print()
    # print('Reading spreadsheet...')
    book_in = xlrd.open_workbook(str(input))
    sheet1 = book_in.sheet_by_index(0)  # get first sheet
    # sheet1_name = book_in.sheet_names()[0]  # name of first sheet
    sheet1_col_headers = sheet1.row_values(0)

    try:
        doi_col_index = sheet1_col_headers.index('doi')
    except ValueError:
        print('DOI field not found in bepress metadata')
    url_col_index = sheet1_col_headers.index('calc_url')

    # Read URLs and DOIs from spreadsheet
    bepress_data = {}
    for row in range(1, sheet1.nrows):
        bepress_url = sheet1.cell(row, url_col_index).value
        bepress_doi = sheet1.cell(row, doi_col_index).value
        bepress_data[bepress_url] = bepress_doi
    print(bepress_data)

    # Read query criteria from file, inserting setname and starting bib number
    with open('query_setname_no_doi_bib_limiter.json', 'r') as file:
        data = file.read().replace('SETNAME',
                                   setname).replace('bxxxxxxx', 'b1000000')
    # print(data)

    # Authenticate to get token, using Client Credentials Grant https://techdocs.iii.com/sierraapi/Content/zReference/authClient.htm
    key_secret = config.get('Sierra API', 'key') + ':' + config.get(
        'Sierra API', 'secret')
    key_secret_encoded = base64.b64encode(
        key_secret.encode('UTF-8')).decode('UTF-8')
    headers = {
        'Authorization': 'Basic ' + key_secret_encoded,
        'Content-Type': 'application/x-www-form-urlencoded'
    }
    response = requests.request(
        'POST',
        'https://catalog.lib.jmu.edu/iii/sierra-api/v5/token',
        headers=headers)
    j = response.json()
    token = j['access_token']
    auth = 'Bearer ' + token
    headers = {'Accept': 'application/json', 'Authorization': auth}

    # Search Sierra for records with URL+setname and no DOI in 024 field
    limit = 2000
    response = requests.request(
        'POST',
        'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/query?offset=0&limit='
        + str(limit),
        headers=headers,
        data=data)
    # print(response.text)
    j = response.json()
    records_returned = j['total']
    # print('Records returned:', j['total'])
    j_all = j

    if j['total'] == 0:
        print('No ' + setname + ' records in Sierra are missing DOIs')
    else:
        # If limit was reached, repeat until all record IDs are retrieved
        while j['total'] == limit:
            # print('--------------------------------')
            last_record_id = j['entries'][-1:][0]['link'].replace(
                'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/', '')
            # print('id of last record returned:', last_record_id)
            next_record_id = str(int(last_record_id) + 1)
            # print('id of starting record for next query:', next_record_id)

            # Read query criteria from file, inserting setname
            with open('query_setname_no_doi_bib_limiter.json', 'r') as file:
                data = file.read().replace('SETNAME', setname).replace(
                    'bxxxxxxx', 'b' + next_record_id)

            response = requests.request(
                'POST',
                'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/query?offset=0&limit='
                + str(limit),
                headers=headers,
                data=data)
            j = response.json()
            records_returned += j['total']
            print('Found ' + records_returned + ' ' + setname +
                  ' Sierra records that are missing DOIs')
            # print(response.text)

            # Add new response to previous ones
            j_all = merger.merge(j_all, j)
            j_all['total'] = records_returned
        # print(j_all)

        # Put bib IDs in list
        bib_id_list = []
        for i in j_all['entries']:
            bib_id = i['link'].replace(
                'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/', '')
            bib_id_list.append(bib_id)
        # print(bib_id_list)

        # Get bib varField info for all records, 500 bib IDs at a time
        fields = 'varFields'
        #querystring = {'id':'3323145', 'fields':fields}
        j_data_all = {}
        records_returned_data = 0
        chunk_size = 499
        for i in range(0, len(bib_id_list), chunk_size):
            bib_id_list_partial = bib_id_list[i:i + chunk_size]
            querystring = {
                'id': ','.join(bib_id_list_partial),
                'fields': fields,
                'limit': limit
            }
            response = requests.request(
                'GET',
                'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/',
                headers=headers,
                params=querystring)
            j_data = response.json()
            records_returned_data += j_data['total']
            j_data_all = merger.merge(j_data_all, j_data)
            j_data_all['total'] = records_returned_data

        # Parse varField data for OCLC number and URL
        sierra_data = {}
        for i in j_data_all['entries']:
            id = i['id']
            var_fields = i['varFields']
            sierra_url = ''

            for v in var_fields:
                if 'marcTag' in v:
                    if '001' in v['marcTag']:
                        oclc_num = v['content']
                    if '856' in v['marcTag']:
                        for s in v['subfields']:
                            if 'u' in s['tag']:
                                if 'commons.lib.jmu.edu' in s['content']:
                                    if sierra_url:
                                        sierra_url += ';'
                                    sierra_url += s['content']

            # Turn bib id into bib number
            bib_reversed = id[::-1]
            total = 0
            for i, digit, in enumerate(bib_reversed):
                prod = (i + 2) * int(digit)
                total += prod
            checkdigit = total % 11
            if checkdigit == 10:
                checkdigit = 'x'
            bib_num = 'b' + id + str(checkdigit)

            # print(bib_num)
            # print('OCLC number:', oclc_num)
            # print('URL:', sierra_url)
            # print()
            sierra_data[bib_num] = (oclc_num, sierra_url)
        print(sierra_data)

        # Create short MARC records with bib number and DOI fields, and create spreadsheet with OCLC numbers and DOI fields
        outmarc = open('shortrecs.mrc', 'wb')
        outtext = open('searchkeys.txt', 'w')
        outbook = xlwt.Workbook()
        outsheet = outbook.add_sheet('Sheet 1')
        col_headers = ['OCLC Number', 'Bib Number', '024', '856']
        for x, y in enumerate(col_headers, 0):
            outsheet.write(0, x, y)
        outbook.save('OCLC Changes.xls')

        for i, j in enumerate(sierra_data, 1):
            print(i)
            print(j)

            # Get DOI from spreadsheet data
            doi_url = bepress_data[sierra_data[j][1]]
            print(doi_url)
            doi = doi_url.replace('https://doi.org/', '')

            spreadsheet_024 = '7\$a' + doi + '$2doi'
            #spreadsheet_856 = '40$zFull-text of dissertation on the Internet$u' + doi_url

            # Get OCLC number
            oclcnum = sierra_data[j][0]

            field_907 = Field(tag='907',
                              indicators=[' ', ' '],
                              subfields=['a', '.' + j])
            field_024 = Field(tag='024',
                              indicators=['7', ' '],
                              subfields=['a', doi, '2', 'doi'])
            field_856 = Field(tag='856',
                              indicators=['4', '0'],
                              subfields=[
                                  'z',
                                  'Full-text of dissertation on the Internet',
                                  'u', doi_url
                              ])
            field_506_1 = Field(
                tag='506',
                indicators=['0', ' '],
                subfields=[
                    'a',
                    'James Madison University Libraries is providing a metadata record and hyperlink to this full-text resource.',
                    'f', 'Unrestricted online access', '2', 'star'
                ])
            field_506_2 = Field(tag='506',
                                indicators=['0', ' '],
                                subfields=[
                                    'a', 'Open access content.', 'f',
                                    'Open access content', '2', 'star'
                                ])
            field_540 = Field(
                tag='540',
                indicators=[' ', ' '],
                subfields=[
                    'a',
                    'This work is licensed under a Creative Commons Attribution-NonCommercial-No Derivative Works 4.0 License.',
                    'u',
                    'https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode'
                ])

            record = Record()
            record.add_ordered_field(field_907)
            record.add_ordered_field(field_024)
            record.add_ordered_field(field_856)
            record.add_ordered_field(field_506_1)
            record.add_ordered_field(field_506_2)
            record.add_ordered_field(field_540)
            outmarc.write(record.as_marc())

            outtext.write(oclcnum + '\n')

            outsheet.write(i, 0, oclcnum)
            outsheet.write(i, 1, j)
            outsheet.write(i, 2, spreadsheet_024)
            outsheet.write(i, 3, doi_url)
            outbook.save('OCLC Changes.xls')
        outmarc.close()
        outtext.close()
Exemplo n.º 14
0
def main(arglist):
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help='path to spreadsheet')
    # parser.add_argument('output', help='save directory')
    args = parser.parse_args(arglist)
    
    input = Path(args.input)
    
    # Read spreadsheet
    book_in = xlrd.open_workbook(str(input))
    sheet = book_in.sheet_by_index(0)  # get first sheet
    col_headers = sheet.row_values(0)
    
    title_col = col_headers.index('Title')
    subj_person_col = col_headers.index('Subject_Person')
    subj_topical_col = col_headers.index('Subject_Topical')
    subj_place_col = col_headers.index('Subject_Place')
    subj_corp_col = col_headers.index('Subject_Jurisdictional')
    genre_col = col_headers.index('Genre')
    pages_col = col_headers.index('Pages')
    pub_date_col = col_headers.index('Date') # previously Publication Date
    copy_date_col = col_headers.index('Copyright Date')
    pub_place_col = col_headers.index('Pub_Place')
    publisher_col = col_headers.index('Publisher')
    edition_col = col_headers.index('Edition')
    source_col = col_headers.index('Source')
    # source_acq_col = col_headers.index('Source of Acquisition')
    writer_col = col_headers.index('Writer')
    penciller_col = col_headers.index('Penciller')
    inker_col = col_headers.index('Inker')
    colorist_col = col_headers.index('Colorist')
    letterer_col = col_headers.index('Letterer')
    cover_artist_col = col_headers.index('Cover Artist')
    editor_col = col_headers.index('Editor')
    # hist_note_col = col_headers.index('Historical Note')
    notes_col = col_headers.index('Note')
    characters_col = col_headers.index('Characters')
    synopsis_col = col_headers.index('Story Arc')
    toc_col = col_headers.index('Table of Contents')
    in_series_col = col_headers.index('Is Part of Series')
    black_creators_col = col_headers.index('Black Creators (MARC 590)')
    black_chars_col = col_headers.index('Black Characters (MARC 590)')
    isbn_col = col_headers.index('ISBN')
    color_col = col_headers.index('Color?')
    series_note_col = col_headers.index('Series Note')
    copyright_holder_col = col_headers.index('Copyright holder')
    gcd_uri_col = col_headers.index('Grand Comics Database')
    
    outmarc = open('records.mrc', 'wb')
    
    # Boilerplate fields
    field_ldr = '00000nam a2200000Ii 4500'
    field_040 = Field(tag = '040',
                indicators = [' ',' '],
                subfields = [
                    'a', 'VMC',
                    'b', 'eng',
                    'e', 'rda',
                    'c', 'VMC'])
    field_049 = Field(tag = '049',
                indicators = [' ',' '],
                subfields = [
                    'a', 'VMCS'])
    field_336_text = Field(tag = '336',
                    indicators = [' ',' '],
                    subfields = [
                        'a', 'text',
                        'b', 'txt',
                        '2', 'rdacontent'])
    field_336_image = Field(tag = '336',
                indicators = [' ',' '],
                subfields = [
                    'a', 'still image',
                    'b', 'sti',
                    '2', 'rdacontent'])
    field_337 = Field(tag = '337',
                indicators = [' ',' '],
                subfields = [
                    'a', 'unmediated',
                    'b', 'n',
                    '2', 'rdamedia'])
    field_338 = Field(tag = '338',
                indicators = [' ',' '],
                subfields = [
                    'a', 'volume',
                    'b', 'nc',
                    '2', 'rdacarrier'])
    field_380 = Field(tag = '380',
                indicators = [' ',' '],
                subfields = [
                    'a', 'Comic books and graphic novels.'])
    field_506 = Field(tag = '506',
                    indicators = ['1',' '],
                    subfields = [
                        'a', 'Collection open to research. Researchers must register and agree to copyright and privacy laws before using this collection. Please contact Research Services staff before visiting the James Madison University Special Collections Library to use this collection.'])
    field_542 = Field(tag = '542',
                indicators = [' ',' '],
                subfields = [
                    'a', 'Copyright not evaluated',
                    'u', 'http://rightsstatements.org/vocab/CNE/1.0/'])
    field_588 = Field(tag = '588',
                indicators = ['0',' '],
                subfields = [
                    'a', 'Description based on indicia and Grand Comics Database.'])
    field_989 = Field(tag = '989',
                indicators = [' ',' '],
                subfields = [
                    'a', 'PN6728'])
    
    for row in range(1, sheet.nrows):
        print('Record ' + str(row))
        
        title = sheet.cell(row, title_col).value
        print(title)
        
        subj_person = sheet.cell(row, subj_person_col).value
        if subj_person:
            subj_person = [x.strip() for x in subj_person.split(';')]
        subj_topical = sheet.cell(row, subj_topical_col).value
        if subj_topical:
            subj_topical = [x.strip() for x in subj_topical.split(';')]
        subj_place = sheet.cell(row, subj_place_col).value
        if subj_place:
            subj_place = [x.strip() for x in subj_place.split(';')]
        subj_corp = sheet.cell(row, subj_corp_col).value
        if subj_corp:
            subj_corp = [x.strip() for x in subj_corp.split(';')]
        genre = sheet.cell(row, genre_col).value
        genre = [x.strip() for x in genre.split(';')]
        pages = str(sheet.cell(row, pages_col).value)
        pub_date = str(sheet.cell(row, pub_date_col).value)
        pub_date_str = date_from_string(pub_date)
        pub_date_year = year_from_date(pub_date_str)
        copy_date = ''
        copy_date = str(sheet.cell(row, copy_date_col).value)
        copy_date_str = date_from_string(copy_date)
        copy_date_year = year_from_date(copy_date_str)
        pub_place = sheet.cell(row, pub_place_col).value
        publisher = sheet.cell(row, publisher_col).value
        edition = sheet.cell(row, edition_col).value
        source = sheet.cell(row, source_col).value
        # source_acq = sheet.cell(row, source_acq_col).value
        characters = sheet.cell(row, characters_col).value
        black_creators = sheet.cell(row, black_creators_col).value
        if black_creators:
            black_creators = [x.strip() for x in black_creators.split(';')]
        black_chars = sheet.cell(row, black_chars_col).value
        if black_chars:
            black_chars = [x.strip() for x in black_chars.split(';')]
        isbn = str(sheet.cell(row, isbn_col).value)
        color = sheet.cell(row, color_col).value
        series_note = sheet.cell(row, series_note_col).value
        gcd_uri = sheet.cell(row, gcd_uri_col).value
        
        country_code = country_code_from_pub_place(pub_place)
        
        copyright_holder = []
        if sheet.cell(row, copyright_holder_col).value:
            copyright_holder = sheet.cell(row, copyright_holder_col).value
            copyright_holder = [x.strip() for x in copyright_holder.split(';')]
        writer = []
        if sheet.cell(row, writer_col).value:
            writer = sheet.cell(row, writer_col).value
            writer = [x.strip() for x in writer.split(';')]
        penciller = []
        if sheet.cell(row, penciller_col).value:
            penciller = sheet.cell(row, penciller_col).value
            penciller = [x.strip() for x in penciller.split(';')]
        inker = []
        if sheet.cell(row, inker_col).value:
            inker = sheet.cell(row, inker_col).value
            inker = [x.strip() for x in inker.split(';')]
        colorist = []
        if sheet.cell(row, colorist_col).value:
            colorist = sheet.cell(row, colorist_col).value
            colorist = [x.strip() for x in colorist.split(';')]
        letterer = []
        if sheet.cell(row, letterer_col).value:
            letterer = sheet.cell(row, letterer_col).value
            letterer = [x.strip() for x in letterer.split(';')]
        cover_artist = []
        if sheet.cell(row, cover_artist_col).value:
            cover_artist = sheet.cell(row, cover_artist_col).value
            cover_artist = [x.strip() for x in cover_artist.split(';')]
        editor = []
        if sheet.cell(row, editor_col).value:
            editor = sheet.cell(row, editor_col).value
            editor = [x.strip() for x in editor.split(';')]
        # hist_note = []
        # if sheet.cell(row, hist_note_col).value:
            # hist_note = sheet.cell(row, hist_note_col).value
        notes = []
        if sheet.cell(row, notes_col).value:
            notes = sheet.cell(row, notes_col).value
        synopsis = []
        if sheet.cell(row, synopsis_col).value:
            synopsis = sheet.cell(row, synopsis_col).value
        toc = []
        if sheet.cell(row, toc_col).value:
            toc = sheet.cell(row, toc_col).value
        in_series = sheet.cell(row, in_series_col).value
        
        contribs = {}
        if copyright_holder:
            for i in copyright_holder:
                contribs.update({i: ['copyright holder']})
        else:
            if writer:
                for i in writer:
                    contribs.update({i: ['writer']})
            if penciller:
                for i in penciller:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('penciller')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['penciller']})
            if inker:
                for i in inker:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('inker')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['inker']})
            if colorist:
                for i in colorist:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('colorist')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['colorist']})
            if letterer:
                for i in letterer:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('letterer')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['letterer']})
            if cover_artist:
                for i in cover_artist:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('cover artist')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['cover artist']})
            if editor:
                for i in editor:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('editor')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['editor']})
        
        record = Record()
        
        # Add boilerplate fields
        record.leader = field_ldr
        record.add_ordered_field(field_040)
        record.add_ordered_field(field_049)
        record.add_ordered_field(field_336_text)
        record.add_ordered_field(field_336_image)
        record.add_ordered_field(field_337)
        record.add_ordered_field(field_338)
        record.add_ordered_field(field_380)
        record.add_ordered_field(field_506)
        record.add_ordered_field(field_542)
        record.add_ordered_field(field_588)
        record.add_ordered_field(field_989)        
        
        # Add other fields
        today = datetime.today().strftime('%y%m%d')
        if copy_date:
            data_008 = today + 't' + pub_date_year + copy_date_year + country_code + 'a     6    000 1 eng d'
        else:
            data_008 = today + 's' + pub_date_year + '    ' + country_code + 'a     6    000 1 eng d'
        field_008 = Field(tag = '008',
                    data = data_008)
        record.add_ordered_field(field_008)
        
        if isbn:
            field_020 = Field(tag = '020',
                        indicators = [' ',' '],
                        subfields = [
                            'a', isbn])
            record.add_ordered_field(field_020)
        
        
        subfields_099 = subfields_from_string(title)
        if 'b' in subfields_099:
            subfields_099.pop(3)
            subfields_099.pop(2)
        if 'n' in subfields_099:
            subfields_099[subfields_099.index('n')] = 'a'
        if subfields_099[1].endswith(',') or subfields_099[1].endswith(':'):
            subfields_099[1] = subfields_099[1][:-1]
        field_099 = Field(tag = '099',
                    indicators = [' ','9'],
                    subfields = subfields_099)
        record.add_ordered_field(field_099)
        
        for i in contribs:
            if i == list(contribs.keys())[0] and 'copyright holder' in contribs[i]: # first contributor is copyright holder
                subfield_content = subfields_from_string_relator(i, contribs[i])
                field_110 = Field(tag = '110',
                        indicators = ['2', ' '],
                        subfields = subfield_content)
                record.add_ordered_field(field_110)
            elif i == list(contribs.keys())[0] and 'writer' in contribs[i]: # first contributor is a writer
                subfield_content = subfields_from_string_relator(i, contribs[i])
                field_100 = Field(tag = '100',
                        indicators = ['1', ' '],
                        subfields = subfield_content)
                record.add_ordered_field(field_100)
            else:
                subfield_content = subfields_from_string_relator(i, contribs[i])
                if ',' not in subfield_content[1]:
                    field_710 = Field(tag = '710',
                                indicators = ['2',' '],
                                subfields = subfield_content)
                    record.add_ordered_field(field_710)
                else:
                    field_700 = Field(tag = '700',
                                indicators = ['1',' '],
                                subfields = subfield_content)
                    record.add_ordered_field(field_700)
        
        if contribs and ('writer' in contribs[list(contribs.keys())[0]] or 'copyright holder' in contribs[list(contribs.keys())[0]]):
            f245_ind1 = 1
        else:
            f245_ind1 = 0
        
        f245_ind2 = 0
        if str.startswith(title, 'The '):
            f245_ind2 = 4
        elif str.startswith(title, 'An '):
            f245_ind2 = 3
        elif str.startswith(title, 'A '):
            f245_ind2 = 2
        
        subfields_245 = subfields_from_string(title)
        # If writer exists, add $c for first writer
        if writer:
            subfields_245[-1] = subfields_245[-1] + ' /'
            subfields_245.append('c')
            subfields_245.append(name_direct_order(subfields_from_string(writer[0])[1]) + ', writer.')
        else:
            # If no writer, add 245 ending punctuation
            subfields_245[-1] = subfields_245[-1] + '.'
        field_245 = Field(tag = '245',
                    indicators = [f245_ind1, f245_ind2],
                    subfields = subfields_245)
        record.add_ordered_field(field_245)
        
        if edition:
            if not edition.endswith('.'):
                edition += '.'
            field_250 = Field(tag = '250',
                    indicators = [' ', ' '],
                    subfields = [
                        'a', edition])
            record.add_ordered_field(field_250)
        
        field_264_1 = Field(tag = '264',
                    indicators = [' ','1'],
                    subfields = [
                        'a', pub_place + ' :',
                        'b', publisher + ',',
                        'c', pub_date_str + '.'])
        record.add_ordered_field(field_264_1)
        
        if copy_date:
            field_264_4 = Field(tag = '264',
                        indicators = [' ','4'],
                        subfields = [
                            'c', '©' + copy_date_str])
            record.add_ordered_field(field_264_4)
        
        if color == 'yes':
            subfields_300 = [
                'a', pages + ' pages :',
                'b', 'chiefly color illustrations.']
        elif color == 'no':
            subfields_300 = [
                'a', pages + ' pages :',
                'b', 'black and white illustrations.']
        
        field_300 = Field(tag = '300',
                    indicators = [' ',' '],
                    subfields = subfields_300)
        record.add_ordered_field(field_300)
        
        if title_to_series(title):
            subfields_490 = title_to_series(title)
            field_490 = Field(tag = '490',
                        indicators = ['1',' '],
                        subfields = subfields_490)
            record.add_ordered_field(field_490)
        
        if series_note:
            if not series_note.endswith('.'):
                series_note += '.'
            field_490_series_note = Field(tag = '490',
                                    indicators = ['1', ' '],
                                    subfields = ['a', series_note])
            record.add_ordered_field(field_490_series_note)
        
        # if hist_note:
        #     field_500_hist = Field(tag = '500',
        #                 indicators = [' ',' '],
        #                 subfields = [
        #                     'a', hist_note + '.'])
        #     record.add_ordered_field(field_500_hist)
        
        if notes:
            field_500_notes = Field(tag = '500',
                        indicators = [' ',' '],
                        subfields = [
                            'a', notes + '.'])
            record.add_ordered_field(field_500_notes)
        
        if toc:
            if not toc.endswith('.') and not toc.endswith('?') and not toc.endswith('!'):
                toc += '.'
            field_505 = Field(tag = '505',
                        indicators = ['0',' '],
                        subfields = [
                            'a', toc])
            record.add_ordered_field(field_505)
        
        if synopsis:
            field_520 = Field(tag = '520',
                        indicators = [' ',' '],
                        subfields = [
                            'a', synopsis])
            record.add_ordered_field(field_520)
        
        if black_creators:
            for i in black_creators:
                if not i.endswith('.'):
                    i += '.'
                field_590_creators = Field(tag = '590',
                            indicators = [' ',' '],
                            subfields = [
                                'a', i])
                record.add_ordered_field(field_590_creators)
        
        if black_chars:
            for i in black_chars:
                if not i.endswith('.'):
                    i += '.'
                field_590_chars = Field(tag = '590',
                            indicators = [' ',' '],
                            subfields = [
                                'a', i])
                record.add_ordered_field(field_590_chars)
        
        if source:
            field_541_source = Field(tag = '541',
                        indicators = [' ',' '],
                        subfields = [
                            'a', source + '.'])
            record.add_ordered_field(field_541_source)
        
        # if source_acq:
            # field_541_source_acq = Field(tag = '541',
                        # indicators = [' ',' '],
                        # subfields = [
                            # 'a', source_acq + '.'])
            # record.add_ordered_field(field_541_source_acq)
        
        if subj_person:
            for i in subj_person:
                i_subfields = subfields_from_string(i)
                
                # Set first indicator based on presence of comma in $a
                if 'a' in i_subfields:
                    if ',' in i_subfields[i_subfields.index('a') + 1]:
                        field_600_ind1 = '1'
                    else:
                        field_600_ind1 = '0'
                
                if '1' in i_subfields:
                    last_except_subf1 = i_subfields.index('1') - 1
                else:
                    last_except_subf1 = len(i_subfields) - 1
                
                if i_subfields[last_except_subf1].endswith(','):
                    i_subfields[last_except_subf1] = re.sub(r'^(.*),$', r'\g<1>.', i_subfields[last_except_subf1])
                if not i_subfields[last_except_subf1].endswith('.') and not i_subfields[last_except_subf1].endswith(')') and not i_subfields[last_except_subf1].endswith('?') and not i_subfields[last_except_subf1].endswith('-'):
                    i_subfields[last_except_subf1] += '.'
                
                field_600 = Field(tag = '600', 
                            indicators = [field_600_ind1,'0'],
                            subfields = i_subfields)
                record.add_ordered_field(field_600)
        
        if subj_topical:
            for i in subj_topical:
                i_subfields = subfields_from_string(i)
                if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'):
                    i_subfields[-1] += '.'
                field_650 = Field(tag = '650',
                            indicators = [' ','0'],
                            subfields = i_subfields)
                record.add_ordered_field(field_650)
        
        if subj_place:
            for i in subj_place:
                i_subfields = subfields_from_string(i)
                if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'):
                    i_subfields[-1] += '.'
                field_651 = Field(tag = '651',
                        indicators = [' ','0'],
                        subfields = i_subfields)
                record.add_ordered_field(field_651)
        
        if subj_corp:
            for i in subj_corp:
                i_subfields = subfields_from_string(i)
                if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'):
                    i_subfields[-1] += '.'
                field_610 = Field(tag = '610',
                        indicators = ['1','0'],
                        subfields = i_subfields)
                record.add_ordered_field(field_610)
        
        if genre:
            for i in genre:
                if not i.endswith('.') and not i.endswith(')'):
                    i += '.'
                field_655 = Field(tag = '655',
                        indicators = [' ','7'],
                        subfields = [
                            'a', i,
                            '2', 'lcgft'])
                record.add_ordered_field(field_655)
        
        if characters:
            field_500_chars = Field(tag = '500',
                        indicators = [' ', ' '],
                        subfields = [
                            'a', characters])
            record.add_ordered_field(field_500_chars)
        
        if gcd_uri:
            title_758 = subfields_from_string(title)[1]
            if title_758.endswith(',') or title_758.endswith(':'):
                title_758 = title_758[:-1]
            field_758 = Field(tag = '758',
                        indicators = [' ',' '],
                        subfields = [
                            '4', 'http://rdaregistry.info/Elements/m/P30135',
                            'i', 'Has work manifested:',
                            'a', title_758,
                            '1', gcd_uri])
            record.add_ordered_field(field_758)
        
        if in_series:
            subfields_773 = subfields_from_string(in_series)
            field_773 = Field(tag = '773',
                        indicators = ['0','8'],
                        subfields = subfields_773)
            record.add_ordered_field(field_773)
        
        subfields_852 = [
            'b', 'CARRIER',
            'c', 'carrspec']
        if len(subfields_099) == 4:
            subfields_852.append('h')
            subfields_852.append(subfields_099[1])
            subfields_852.append('i')
            subfields_852.append(subfields_099[3])
        if len(subfields_099) == 2:
            subfields_852.append('h')
            subfields_852.append(subfields_099[1])
        if edition:
            if edition.endswith('.'):
                edition = edition[:-1]
            subfields_852.append('z')
            subfields_852.append(edition)
        
        field_852 = Field(tag = '852',
                    indicators = ['8',' '],
                    subfields = subfields_852)
        record.add_ordered_field(field_852)
        
        outmarc.write(record.as_marc())
        print()
    outmarc.close()
Exemplo n.º 15
0
Arquivo: logic.py Projeto: NateWr/rua
def book_to_mark21_file(book,owner, xml = False):
	#New record
	record = Record()
	
	# Number and value explanation : http://www.loc.gov/marc/bibliographic/bdleader.html
	# Adding Leader tags
	l = list(record.leader)
	l[5] = 'n' # New
	l[6] = 'a'   #For manuscript file use 't' 
	l[7] = 'm' # Monograph
	l[9] = 'a'
	l[19] = '#'
	record.leader = "".join(l)

	# Category of material  - Text
	record.add_field(record_control_field('007','t'))

	#Languages
	languages = book.languages.all()
	if languages:
		for lang in languages:
			record.add_field(record_control_field('008',lang.code)) 
	else:
		record.add_field(record_control_field('008','eng'))

	#ISBN - International Standard Book Number 
	isbn = models.Identifier.objects.filter(book=book).exclude(identifier='pub_id').exclude(identifier='urn').exclude(identifier='doi')
	for identifier in isbn:
		if book.book_type:
			record.add_field(record_field('020',['#','#'],['a', str(identifier.value)+' '+book.book_type]))
		else:
			record.add_field(record_field('020',['#','#'],['a', str(identifier.value)]))
	
	#Source of acquisition
	try:
		base_url = models.Setting.objects.get(group__name='general', name='base_url').value
	except:
		base_url='localhost:8000'
	book_url = 'http://%s/editor/submission/%s/' % (base_url, book.id)
	record.add_field(record_field('030',['#','#'],['b', book_url]))

	# Main entry - Personal name
	authors = book.author.all()
	author_names=''
	for author in authors:
		auhtor_names=author_names+author.full_name()+' '
		name=author.last_name+', '+author.first_name
		if author.middle_name:
			name=name+' '+author.middle_name[:1]+'.'
		record.add_field(record_field('100',['1','#'],['a', name]))

	#Title statement
	title_words = (book.title).split(' ')
	first_word = title_words[0]
	if first_word.lower() == 'the':
		record.add_field(record_field('245',['1','4'],['a', book.title,'c',author_names]))
	else:
		record.add_field(record_field('245',['1','0'],['a', book.title,'c',author_names]))

	#Publication
	try:
		press_name = models.Setting.objects.get(group__name='general', name='press_name').value
	except:
		press_name=None
	try: 
		city = models.Setting.objects.get(group__name='general', name='city').value
	except:
		city = None

	publication_info=[]
	if book.publication_date:
		#Press' city
		if city :
			publication_info.append('a')
			publication_info.append(str(city))
		#Press' name
		if press_name:
			publication_info.append('b')
			publication_info.append(str(press_name))
		#Date of Publication
		publication_info.append('c')
		publication_info.append(str(book.publication_date))
		record.add_field(record_field('260',['#','#'],publication_info))

	#Physical details
	if book.pages:
		record.add_field(record_field('300',['#','#'],['a',str(book.pages)+' pages']))
	
	#Content type
	record.add_field(record_field('336',['#','#'],['a', 'text','2','rdacontent']))

	#Media type
	record.add_field(record_field('337',['#','#'],['a', 'unmediated','2','rdamedia']))

	#Carrier type
	record.add_field(record_field('338',['#','#'],['a', 'volume','2','rdacarrier']))

	#Language note
	if languages:
		for lang in languages:
			record.add_field(record_field('546',['#','#'],['a', lang.display]))
	else:
		record.add_field(record_field('546',['#','#'],['a', 'In English']))
	
	press_editors = book.press_editors.all()
	#editors
	for editor in press_editors:
		record.add_field(record_field('700',['1','#'],['a', '%s, %s' % (editor.last_name,editor.first_name),'e','Press editor']))
	
	#Series
	if book.series:
		record.add_field(record_field('830',['#','0'],['a', book.series.name ]))
		if book.series.editor:
			record.add_field(record_field('700',['1','#'],['a', '%s, %s' % (book.series.editor.last_name,book.series.editor.first_name),'e','Series editor']))
	#Add record to file
	title= book.title
	if not xml:
		filename='book_'+str(book.id)+'_'+re.sub('[^a-zA-Z0-9\n\.]', '', title.lower())+'_marc21.dat'
		file=handle_marc21_file(record.as_marc(),filename, book, owner)
	else:
		filename='book_'+str(book.id)+'_'+re.sub('[^a-zA-Z0-9\n\.]', '', title.lower())+'_marc21.xml'
		content=record_to_xml(record, quiet=False, namespace=False)
		file=handle_marc21_file(content,filename, book, owner)
	return file.pk
Exemplo n.º 16
0
 def write(self, record: Record) -> None:
     """Writes a record."""
     Writer.write(self, record)
     self.file_handle.write(record.as_marc())
Exemplo n.º 17
0
            rec_505 = Field(tag='505',
                            indicators=['0', ' '],
                            subfields=['a', toc])
            new_marc_rec.add_ordered_field(rec_505)
        #--------------------------------------------
        # Create 520 field for the Abstract
        abstract = fields[16].strip()
        if not abstract == '':
            rec_520 = Field(tag='520',
                            indicators=['3', ' '],
                            subfields=['a', abstract])
            new_marc_rec.add_ordered_field(rec_520)
        #--------------------------------------------
        # Create 655 field for the Genre/Form term
        # NOTE: if multiple terms are used in the field, you may want to try to break them up if they're separated by commas or other consistent punctuation
        # Marked field as $2local since it may not be authorized term
        genre = fields[17].strip()
        if not genre == '':
            rec_655 = Field(tag='655',
                            indicators=[' ', '7'],
                            subfields=['a', genre, '2', 'local'])
            new_marc_rec.add_ordered_field(rec_655)

        print 'After:'
        print new_marc_rec
        marc_recs_out.write(new_marc_rec.as_marc())

    rec_cnt += 1

marc_recs_out.close()