示例#1
0
 def test_process_record(self):
     for record_type in self.records:
         for r in self.records[record_type]:
             original_record = Record()
             #nimiön 6. paikasta katsotaan tietuetyyppi:
             if record_type == "music":
                 original_record.leader = "XXXXXXcX"
             elif record_type == "text":
                 original_record.leader = "XXXXXXaX"
             elif record_type == "movie":
                 original_record.leader = "XXXXXXgX"
             else:
                 raise ValueError("Testattava aineistotyyppi on tuntematon")
             original_record.add_field(Field(tag='001', data='00000001'))
             if record_type == "movie":
                 original_record.add_field(Field(tag='007', data='v'))
             original_fields = []
             for field in r['original']:
                 original_fields.append(field)
                 original_record.add_field(self.str_to_marc(field))
             new_record = self.cc.process_record(original_record)
             new_fields = []
             result_fields = []
             for field in new_record.get_fields():
                 if not field.tag in ['001', '007']:
                     new_fields.append(str(field))
             for field in r['converted']:
                 result_fields.append(field)
             self.assertEqual(result_fields, new_fields)
示例#2
0
def platform2pymarc_obj(data=None):
    """
    converts platform bib data into pymarc object
    args:
        data in json format
    return:
        pymarc Record obj
    """
    record = Record(to_unicode=True, force_utf8=True)
    # parse variable fields
    varFields = data.get("varFields")
    for f in varFields:
        if f.get("fieldTag") == "_":
            record.leader = f.get("content")
        # control fields case
        elif f.get("subfields") is None:
            field = Field(
                tag=f.get("marcTag"),
                indicators=[f.get("ind1"), f.get("ind2")],
                data=f.get("content"),
            )
            record.add_field(field)
        else:  # variable fields
            subfields = []
            for d in f.get("subfields"):
                subfields.append(d.get("tag"))
                subfields.append(d.get("content"))
            field = Field(
                tag=f.get("marcTag"),
                indicators=[f.get("ind1"), f.get("ind2")],
                subfields=subfields,
            )
            record.add_field(field)
    return record
示例#3
0
    def test_nypl_branches_BT_SERIES_YA_graphic_novel_compound_name(self):
        bib = Record()
        bib.leader = "00000nam a2200000u  4500"
        tags = []
        tags.append(Field(tag="001", data="0001"))
        tags.append(
            Field(tag="245",
                  indicators=["0", "0"],
                  subfields=["a", "Test title"]))
        tags.append(
            Field(
                tag="091",
                indicators=[" ", " "],
                subfields=["a", "GRAPHIC GN FIC COMPOUND NAME"],
            ))
        for tag in tags:
            bib.add_ordered_field(tag)

        mod_bib = patches.bib_patches("nypl", "branches", "cat", "BT SERIES",
                                      bib)
        correct_indicators = [" ", " "]
        correct_subfields = [
            "f", "GRAPHIC", "a", "GN FIC", "c", "COMPOUND NAME"
        ]

        self.assertEqual(correct_indicators,
                         mod_bib.get_fields("091")[0].indicators)
        self.assertEqual(correct_subfields,
                         mod_bib.get_fields("091")[0].subfields)
示例#4
0
    def test_nypl_branch_BT_SERIES_Spanish_prefix(self):
        bib = Record()
        bib.leader = "00000nam a2200000u  4500"
        tags = []
        tags.append(Field(tag="001", data="0001"))
        tags.append(
            Field(tag="245",
                  indicators=["0", "0"],
                  subfields=["a", "Test title"]))
        tags.append(
            Field(
                tag="091",
                indicators=[" ", " "],
                subfields=["a", "J SPA E COMPOUND NAME"],
            ))
        for tag in tags:
            bib.add_ordered_field(tag)

        mod_bib = patches.bib_patches("nypl", "branches", "cat", "BT SERIES",
                                      bib)
        correct_indicators = [" ", " "]
        correct_subfields = ["p", "J SPA", "a", "E", "c", "COMPOUND NAME"]

        self.assertEqual(correct_indicators,
                         mod_bib.get_fields("091")[0].indicators)
        self.assertEqual(correct_subfields,
                         mod_bib.get_fields("091")[0].subfields)
def transpose_to_marc21(record):
    Mrecord=Record(force_utf8=True)
    Mrecord.leader=record["_LEADER"]
    for field in record:
        if isint(field):
            if int(field)<10:
                if isinstance(record[field],list):
                    for elem in record[field]:
                        Mrecord.add_field(Field(tag=field,data=elem))
                elif isinstance(record[field],str):
                    Mrecord.add_field(Field(tag=field,data=record[field]))
            else:
                for subfield in record[field]:
                    for ind, values in subfield.items():
                        indicators=[]
                        subfields=[]
                        for elem in values:
                            for k,v in elem.items():
                                if isinstance(v,str):
                                    subfields.append(k)
                                    subfields.append(v)
                                elif isinstance(v,list):
                                    for subfield_elem in v:
                                        subfields.append(k)
                                        subfields.append(subfield_elem)
                        for elem in ind:
                            indicators.append(elem)
                        Mrecord.add_field(Field(tag=str(field),
                                                indicators=indicators,
                                                subfields=subfields))
    return Mrecord.as_marc()
示例#6
0
    def decode_record(self, record):
        r"""
        >>> reader = Reader('http://opac.uthsc.edu', 2)
        >>> raw = "\nLEADER 00000cas  2200517 a 4500 \n001    1481253 \n003    OCoLC \n005    19951109120000.0 \n008    750727c19589999fr qrzp   b   0   b0fre d \n010    sn 86012727 \n022    0003-3995 \n030    AGTQAH \n035    0062827|bMULS|aPITT  NO.  0639600000|asa64872000|bFULS \n040    MUL|cMUL|dFUL|dOCL|dCOO|dNYG|dHUL|dSER|dAIP|dNST|dAGL|dDLC\n       |dTUM \n041 0  engfre|bgeritaspa \n042    nsdp \n049    TUMS \n069 1  A32025000 \n210 0  Ann. genet. \n222  0 Annales de genetique \n229 00 Annales de genetique \n229    Ann Genet \n242 00 Annals on genetics \n245 00 Annales de genetique. \n260    Paris :|bExpansion scientifique,|c1958-2004. \n300    v. :|bill. &#59;|c28 cm. \n310    Quarterly \n321    Two no. a year \n362 0  1,1958-47,2004. \n510 1  Excerpta medica \n510 1  Index medicus|x0019-3879 \n510 2  Biological abstracts|x0006-3169 \n510 2  Chemical abstracts|x0009-2258 \n510 2  Life sciences collection \n510 0  Bulletin signaletique \n510 0  Current contents \n546    French and English, with summaries in German, Italian, and\n       Spanish. \n550    Journal of the Societe francaise de genetique. \n650  2 Genetics|vPeriodicals. \n710 2  Societ\xe9 fran\xe7aise de genetique. \n785 00 |tEuropean journal of medical genetics.  \n856 41 |uhttp://library.uthsc.edu/ems/eresource/3581|zFull text \n       at ScienceDirect: 43(1) Jan 2000 - 47(4) Dec 2004 \n936    Unknown|ajuin 1977 \n"
        >>> record = reader.decode_record(raw)
        >>> print record.title
        Annales de genetique
        """
        
        pseudo_marc = record.strip().split('\n')
        raw_fields = []
        if pseudo_marc[0][0:6] == 'LEADER':
            record = Record()
            record.leader = pseudo_marc[0][7:].strip()
        else:
            return None

        for field in pseudo_marc[1:]:
            tag = field[:3]
            data = unescape_entities(field[6:].decode('latin1')).encode('utf8')

            if tag.startswith(' '):
                # Additional field data needs to be prepended with an extra space 
                # for certain fields ...
                #for special_tag in ('55','260'):
                #    data = " %s" % (data,) if tag.startswith(special_tag) else data
                data = " %s" % (data.strip(),)
                raw_fields[-1]['value'] = "%s%s" % (raw_fields[-1]['value'], data)
                raw_fields[-1]['raw'] = "%s%s" % (raw_fields[-1]['raw'], field.strip())
            else:
                data = data if (tag < '010' and tag.isdigit()) else "a%s" % (data,)
                raw_fields.append({
                    'tag': tag, 
                    'indicator1': field[3], 
                    'indicator2': field[4], 
                    'value': data.strip(), 
                    'raw': field.strip()
                })
        
        for raw in raw_fields:
            tag = raw['tag']
            data = raw['value'].strip()
            field = Field(tag=tag, indicators=[raw['indicator1'], raw['indicator2']], data=data)
            if not field.is_control_field():
                for sub in data.split('|'):
                    try:
                        field.add_subfield(sub[0].strip(), sub[1:].strip())
                    except Exception:
                        # Skip blank/empty subfields
                        continue
            record.add_field(field)
            
        record.parse_leader()
        
        # Disregard record if no title present
        if not record.get_fields('245'):
            return None
        else:
            return record
def record_sorted(record: Record) -> Record:
    result = Record()
    result.leader = record.leader
    for i in range(1000):
        field_name = str(i)
        while len(field_name) < 3:
            field_name = "0" + field_name
        # 先寻出旧数据中所有的字段名下字段
        old_fields = record.get_fields(field_name)
        for field in old_fields:
            result.add_field(field)
    return result
示例#8
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        record.leader = '         a              '
        writer = MARCWriter(open('test/foo', 'w'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo'), to_unicode=True)
        record = reader.next()
        self.assertEqual(record['245']['a'], unichr(0x1234))

        os.remove('test/foo')
示例#9
0
文件: marc8.py 项目: mjgiarlo/pymarc
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        record.leader = '         a              '
        writer = MARCWriter(open('test/foo', 'w'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo'), to_unicode=True)
        record = reader.next()
        self.assertEqual(record['245']['a'], unichr(0x1234))

        os.remove('test/foo')
示例#10
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ["1", "0"], ["a", chr(0x1234)]))
        record.leader = "         a              "
        writer = MARCWriter(open("test/foo", "wb"))
        writer.write(record)
        writer.close()

        reader = MARCReader(open("test/foo", "rb"), to_unicode=True)
        record = next(reader)
        self.assertEqual(record["245"]["a"], chr(0x1234))
        reader.close()

        os.remove("test/foo")
    def write_record(self, references, field035="", field008=""):
        """Zapise do suboru jeden record vo forme iso2709        
        Arguments:
            field035 -- retazec obsahujuci data do pola 035
            field008 -- retazec obsahujuci data do pola 008
            references {set(reference)} -- set ohlasov na zapisanie
            do pola 591 
        """

        if (field008 == ""):
            field008 = self.CONST_FIELD_008
        record = Record(force_utf8=True)
        record.add_field(Field(tag='008', data=field008))
        record.add_field(
            Field(tag='035',
                  indicators=[self.CONST_INDICATOR_1, self.CONST_INDICATOR_2],
                  subfields=['a', field035]))
        for i in references:
            record.add_field(i.to_marc_field())
        record.leader = record.leader[:5] + 'n' + record.leader[6:]
        record.leader = record.leader[:7] + 'b' + record.leader[8:]
        record.leader = record.leader[:18] + 'a' + record.leader[19:]
        self.writer.write(record)
示例#12
0
 def test_nypl_branch_BT_SERIES_exception(self):
     bib = Record()
     bib.leader = "00000nam a2200000u  4500"
     tags = []
     tags.append(Field(tag="001", data="0001"))
     tags.append(
         Field(tag="245",
               indicators=["0", "0"],
               subfields=["a", "Test title"]))
     tags.append(
         Field(tag="091",
               indicators=[" ", " "],
               subfields=["a", "J B EDISON C"]))
     for tag in tags:
         bib.add_ordered_field(tag)
     with self.assertRaises(AssertionError):
         patches.bib_patches("nypl", "branches", "cat", "BT SERIES", bib)
示例#13
0
 def __next__(self):
     jobj = next(self.iter)
     rec = Record()
     rec.leader = jobj['leader']
     for field in jobj['fields']:
         k,v = list(field.items())[0]
         if 'subfields' in v and hasattr(v,'update'):
             # flatten m-i-j dict to list in pymarc
             subfields = []
             for sub in v['subfields']:
                 for code,value in sub.items():
                     subfields.extend((code,value))
             fld = Field(tag=k,subfields=subfields,indicators=[v['ind1'], v['ind2']])
         else:
             fld = Field(tag=k,data=v)
         rec.add_field(fld)
     return rec
示例#14
0
文件: reader.py 项目: termim/pymarc
 def __next__(self):
     jobj = next(self.iter)
     rec = Record()
     rec.leader = jobj['leader']
     for field in jobj['fields']:
         k,v = list(field.items())[0]
         if 'subfields' in v and hasattr(v,'update'):
             # flatten m-i-j dict to list in pymarc
             subfields = []
             for sub in v['subfields']:
                 for code,value in sub.items():
                     subfields.extend((code,value))
             fld = Field(tag=k,subfields=subfields,indicators=[v['ind1'], v['ind2']])
         else:
             fld = Field(tag=k,data=v)
         rec.add_field(fld)
     return rec
示例#15
0
 def __next__(self):
     jobj = next(self.iter)
     rec = Record()
     rec.leader = jobj["leader"]
     for field in jobj["fields"]:
         k, v = list(field.items())[0]
         if "subfields" in v and hasattr(v, "update"):
             # flatten m-i-j dict to list in pymarc
             subfields = []
             for sub in v["subfields"]:
                 for code, value in sub.items():
                     subfields.extend((code, value))
             fld = Field(tag=k,
                         subfields=subfields,
                         indicators=[v["ind1"], v["ind2"]])
         else:
             fld = Field(tag=k, data=v)
         rec.add_field(fld)
     return rec
示例#16
0
    def next(self):
        """
        To support iteration.
        """
        record_data = ''
        line = self.file_handle.readline()
        if not line:
            raise StopIteration
        key = line[0:9]
        current_key = key

        while key == current_key:
            record_data += line
            position = self.file_handle.tell()
            line = self.file_handle.readline()
            key = line[0:9]

        self.file_handle.seek(position)
        record = Record()
        for recordln in record_data.splitlines():
            tag = recordln[10:13]
            ind1 = recordln[13:14]
            ind2 = recordln[14:15]
            rest = recordln[18:]
            #if tag == 'FMT': pass
            if tag == 'LDR':
                record.leader = rest.replace('^', ' ')
            elif tag < '010' and tag.isdigit():
                if tag == '008': rest = rest.replace('^', ' ')
                record.add_field(Field(tag=tag, data=rest))
            else:
                subfields = list()
                subfield_data = rest.split('$$')
                subfield_data.pop(0)
                for subfield in subfield_data:
                    subfields.extend([subfield[0], subfield[1:]])
                record.add_field(
                    Field(tag=tag,
                          indicators=[ind1, ind2],
                          subfields=subfields))
        return record
示例#17
0
    def test_bib_no_oclc_prefix(self):
        bib = Record()
        bib.leader = "00000nam a2200000u  4500"
        tags = []
        tags.append(Field(tag="001", data="bl00000001"))
        tags.append(
            Field(tag="245",
                  indicators=["0", "0"],
                  subfields=["a", "Test title"]))
        tags.append(
            Field(
                tag="091",
                indicators=[" ", " "],
                subfields=["a", "GRAPHIC GN FIC COMPOUND NAME"],
            ))
        for tag in tags:
            bib.add_ordered_field(tag)

        mod_bib = patches.bib_patches("nypl", "branches", "cat", "Amalivre",
                                      bib)

        self.assertEqual(mod_bib.get_fields("001")[0].data, "bl00000001")
示例#18
0
def record_crosswalk(record):

    # A new record object is created. As we walk through fields in the original record, we will add the mapped fields to this new one.
    newrecord = Record()

    # Local variables to hold values that will be used outside of the for loop are defined here.
    recordid = ''
    callnr = ''
    callorigin = ''
    newclassif = ''

    # The first 572 field is mapped differently, this variable enables this behaviour. After the first 572 is mapped, it is set to False.
    firstsubject = True

    # Walk through each field in the original record
    for field in record.get_fields():

        # 001 field will not be mapped as is, but is recorded as record ID. It will be stored as a 035 later on.
        if field.tag == '001':
            recordid = field.value()

        # 008 field is mapped as is (if it exists)
        elif field.tag == '008':
            newrecord.add_field(field)

        # 019__$a field is mapped to 680__$i
        elif field.tag == '019':
            try:
                indexnote = field.get_subfields('a')[0]
                newrecord.add_ordered_field(
                    Field(tag='680',
                          indicators=[' ', ' '],
                          subfields=['i', indexnote]))
            except IndexError:
                #print(f"WARNING: record {recordid} has no 172__$a.")
                print(
                    f"{recordid},019__$a,{field.value},Notice avec 019 mais sans 019__$a"
                )

        # 035 fields are mapped as is (if they exist)
        elif field.tag == '035':
            newrecord.add_field(field)

        # 172__$2 is mapped to 084__$a according to which vocabulary is being mapped.
        elif field.tag == '172':
            vocab = field.get_subfields('2')[0]
            if vocab in ["BCUR1", "BCUR2", "BCUR3"]:
                mappedvalue = "CLASBCUR"
            elif vocab in ["vddoc", "vddoc-la"]:
                mappedvalue = "vddoc"
            elif vocab == "laf":
                mappedvalue = "laf"
            else:
                mappedvalue = vocab
                #print(f"WARNING: 172__$2 for record {recordid} ({vocab}) is not in the list of mapped vocabularies.")
                print(
                    f"{recordid},172__$2,{vocab},Ne figure pas sur la liste des vocabulaires traités"
                )

            newrecord.add_ordered_field(
                Field(tag='084',
                      indicators=[' ', ' '],
                      subfields=['a', mappedvalue]))

            # 172__$a will be mapped to 153__$a later on
            try:
                callnr = field.get_subfields('a')[0]
            except IndexError:
                #print(f"WARNING: record {recordid} has no 172__$a.")
                print(f"{recordid},172__$a,,Notice sans champ  172__$a")

        # The first 572 is mapped to 153__$j (concatenating subfields)
        elif field.tag == '572':
            if firstsubject == True:
                # Extract subfields and concatenate them. The get_subfield() method will return them in the
                # order they are stored in the record, so no reordering is required.
                newclassif = ' -- '.join(
                    field.get_subfields('a', 'c', 'd', 'e', 'h', 'l', 'm', 's',
                                        't', 'v', 'x', 'X', 'y', 'z'))
                firstsubject = False

                # Look for unexpected subfields
                if len(field.get_subfields('9', '[')) > 0:
                    #print(f"WARNING: Record {recordid} has unexpected 752 subfields:")
                    #print(field)
                    print(
                        f"{recordid},752,{field.value},Sous-champ(s) 752 inattendu(s)"
                    )

            # All 572s are mapped to 753s
            # Keeping the oringial subfield structure
            subjectfield = field
            subjectfield.tag = '753'
            newrecord.add_ordered_field(subjectfield)

        # 680 fields are mapped as is (if they exist)
        elif field.tag == '680':
            newrecord.add_ordered_field(field)

        # Log all unmapped fields, except 003, 005, 039, 040 and 072
        elif field.tag not in ['003', '005', '039', '040', '072']:
            #print(f"SKIPPED: Field not mapped for record {recordid}: {field}")
            print(f"{recordid},{field.tag},{field},Champ non mappé")

    # Check for empty or missing call numbers
    if len(callnr) < 1:
        #print(f"WARNING: Record {recordid} has an empty call number in 153__$a")
        print(f"{recordid},153__$a,,Indice vide ou manquant")

    # Put the 153 field together
    if len(newclassif) < 1:
        # If there is no concatenated classification string, it was a record without 572, only store the call number.
        # If the target is in one of the BCUR* vocabularies, also add the target as a $a
        if target in ["BCURmu", "BCURpt", "BCURcg"]:
            newrecord.add_ordered_field(
                Field(tag='153',
                      indicators=[' ', ' '],
                      subfields=['a', callnr, 'a', target]))
        else:
            newrecord.add_ordered_field(
                Field(tag='153',
                      indicators=[' ', ' '],
                      subfields=['a', callnr]))
    else:
        # If there is a concatenated classification string, same process but with the new classification in a $j
        if target in ["BCURmu", "BCURpt", "BCURcg"]:
            newrecord.add_ordered_field(
                Field(tag='153',
                      indicators=[' ', ' '],
                      subfields=['a', callnr, 'a', target, 'j', newclassif]))
        else:
            newrecord.add_ordered_field(
                Field(tag='153',
                      indicators=[' ', ' '],
                      subfields=['a', callnr, 'j', newclassif]))

    # Add the existing 001 field (record id) as an additional 035 with (vtls_reroVD) prefix.
    newrecord.add_ordered_field(
        Field(tag='035',
              indicators=[' ', ' '],
              subfields=['a', "(vtls_reroVD)" + recordid]))

    # 040__$a is set to static value "RNV vdbcul"
    newrecord.add_ordered_field(
        Field(tag='040', indicators=[' ', ' '], subfields=['a', "RNV vdbcul"]))

    # Edit and map the leader field
    # Position 17 is set to 'o' for temporary classifications (input file includes "temp")
    leader = list(record.leader)
    leader[6] = 'w'
    if inputfile.find('temp') > -1:
        leader[17] = 'o'
    else:
        leader[17] = 'n'
    newrecord.leader = ''.join(leader)

    return newrecord
示例#19
0
def stub_marc_bib():
    tags = []
    marc_bib = Record()
    marc_bib.leader = "00000nam a2200000u  4500"
    tags.append(Field(tag="001", data="ocm0001"))
    tags.append(
        Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"]))
    tags.append(Field(tag="001", data="ocn1111"))
    tags.append(
        Field(
            tag="019",
            indicators=[" ", " "],
            subfields=["a", "some-id-001"],
        ))
    tags.append(
        Field(
            tag="020",
            indicators=[" ", " "],
            subfields=["a", "isbn001", "b", "isbn002"],
        ))
    tags.append(
        Field(
            tag="024",
            indicators=[" ", " "],
            subfields=["a", "upc001"],
        ))
    tags.append(
        Field(
            tag="037",
            indicators=[" ", " "],
            subfields=["a", "some-id-0001", "b", "test-distributor"],
        ))
    tags.append(
        Field(
            tag="037",
            indicators=[" ", " "],
            subfields=["a", "some-id-0002", "b", "Overdrive, Inc."],
        ))
    tags.append(
        Field(
            tag="084",
            indicators=[" ", " "],
            subfields=["a", "some-classification", "2", "test-thesaurus"],
        ))
    tags.append(
        Field(
            tag="091",
            indicators=[" ", " "],
            subfields=["a", "some-callnumber"],
        ))
    tags.append(
        Field(
            tag="099",
            indicators=[" ", " "],
            subfields=["a", "some-callnumber"],
        ))
    tags.append(
        Field(
            tag="263",
            indicators=[" ", " "],
            subfields=["a", "some-date"],
        ))
    tags.append(
        Field(
            tag="856",
            indicators=[" ", "3"],
            subfields=["u", "url1", "3", "public-note-1"],
        ))
    tags.append(
        Field(
            tag="856",
            indicators=[" ", "3"],
            subfields=["u", "url2", "3", "public-note-2"],
        ))
    tags.append(
        Field(
            tag="856",
            indicators=[" ", "3"],
            subfields=["u", "url3", "3", "public-note-3"],
        ))
    tags.append(
        Field(
            tag="838",
            indicators=[" ", " "],
            subfields=["u", "EBSCOhost", "b", "EBSC", "n", "11111"],
        ))
    for tag in tags:
        marc_bib.add_ordered_field(tag)

    return marc_bib
示例#20
0
def main(arglist):
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help='path to spreadsheet')
    # parser.add_argument('output', help='save directory')
    args = parser.parse_args(arglist)
    
    input = Path(args.input)
    
    # Read spreadsheet
    book_in = xlrd.open_workbook(str(input))
    sheet = book_in.sheet_by_index(0)  # get first sheet
    col_headers = sheet.row_values(0)
    
    title_col = col_headers.index('Title')
    subj_person_col = col_headers.index('Subject_Person')
    subj_topical_col = col_headers.index('Subject_Topical')
    subj_place_col = col_headers.index('Subject_Place')
    subj_corp_col = col_headers.index('Subject_Jurisdictional')
    genre_col = col_headers.index('Genre')
    pages_col = col_headers.index('Pages')
    pub_date_col = col_headers.index('Date') # previously Publication Date
    copy_date_col = col_headers.index('Copyright Date')
    pub_place_col = col_headers.index('Pub_Place')
    publisher_col = col_headers.index('Publisher')
    edition_col = col_headers.index('Edition')
    source_col = col_headers.index('Source')
    # source_acq_col = col_headers.index('Source of Acquisition')
    writer_col = col_headers.index('Writer')
    penciller_col = col_headers.index('Penciller')
    inker_col = col_headers.index('Inker')
    colorist_col = col_headers.index('Colorist')
    letterer_col = col_headers.index('Letterer')
    cover_artist_col = col_headers.index('Cover Artist')
    editor_col = col_headers.index('Editor')
    # hist_note_col = col_headers.index('Historical Note')
    notes_col = col_headers.index('Note')
    characters_col = col_headers.index('Characters')
    synopsis_col = col_headers.index('Story Arc')
    toc_col = col_headers.index('Table of Contents')
    in_series_col = col_headers.index('Is Part of Series')
    black_creators_col = col_headers.index('Black Creators (MARC 590)')
    black_chars_col = col_headers.index('Black Characters (MARC 590)')
    isbn_col = col_headers.index('ISBN')
    color_col = col_headers.index('Color?')
    series_note_col = col_headers.index('Series Note')
    copyright_holder_col = col_headers.index('Copyright holder')
    gcd_uri_col = col_headers.index('Grand Comics Database')
    
    outmarc = open('records.mrc', 'wb')
    
    # Boilerplate fields
    field_ldr = '00000nam a2200000Ii 4500'
    field_040 = Field(tag = '040',
                indicators = [' ',' '],
                subfields = [
                    'a', 'VMC',
                    'b', 'eng',
                    'e', 'rda',
                    'c', 'VMC'])
    field_049 = Field(tag = '049',
                indicators = [' ',' '],
                subfields = [
                    'a', 'VMCS'])
    field_336_text = Field(tag = '336',
                    indicators = [' ',' '],
                    subfields = [
                        'a', 'text',
                        'b', 'txt',
                        '2', 'rdacontent'])
    field_336_image = Field(tag = '336',
                indicators = [' ',' '],
                subfields = [
                    'a', 'still image',
                    'b', 'sti',
                    '2', 'rdacontent'])
    field_337 = Field(tag = '337',
                indicators = [' ',' '],
                subfields = [
                    'a', 'unmediated',
                    'b', 'n',
                    '2', 'rdamedia'])
    field_338 = Field(tag = '338',
                indicators = [' ',' '],
                subfields = [
                    'a', 'volume',
                    'b', 'nc',
                    '2', 'rdacarrier'])
    field_380 = Field(tag = '380',
                indicators = [' ',' '],
                subfields = [
                    'a', 'Comic books and graphic novels.'])
    field_506 = Field(tag = '506',
                    indicators = ['1',' '],
                    subfields = [
                        'a', 'Collection open to research. Researchers must register and agree to copyright and privacy laws before using this collection. Please contact Research Services staff before visiting the James Madison University Special Collections Library to use this collection.'])
    field_542 = Field(tag = '542',
                indicators = [' ',' '],
                subfields = [
                    'a', 'Copyright not evaluated',
                    'u', 'http://rightsstatements.org/vocab/CNE/1.0/'])
    field_588 = Field(tag = '588',
                indicators = ['0',' '],
                subfields = [
                    'a', 'Description based on indicia and Grand Comics Database.'])
    field_989 = Field(tag = '989',
                indicators = [' ',' '],
                subfields = [
                    'a', 'PN6728'])
    
    for row in range(1, sheet.nrows):
        print('Record ' + str(row))
        
        title = sheet.cell(row, title_col).value
        print(title)
        
        subj_person = sheet.cell(row, subj_person_col).value
        if subj_person:
            subj_person = [x.strip() for x in subj_person.split(';')]
        subj_topical = sheet.cell(row, subj_topical_col).value
        if subj_topical:
            subj_topical = [x.strip() for x in subj_topical.split(';')]
        subj_place = sheet.cell(row, subj_place_col).value
        if subj_place:
            subj_place = [x.strip() for x in subj_place.split(';')]
        subj_corp = sheet.cell(row, subj_corp_col).value
        if subj_corp:
            subj_corp = [x.strip() for x in subj_corp.split(';')]
        genre = sheet.cell(row, genre_col).value
        genre = [x.strip() for x in genre.split(';')]
        pages = str(sheet.cell(row, pages_col).value)
        pub_date = str(sheet.cell(row, pub_date_col).value)
        pub_date_str = date_from_string(pub_date)
        pub_date_year = year_from_date(pub_date_str)
        copy_date = ''
        copy_date = str(sheet.cell(row, copy_date_col).value)
        copy_date_str = date_from_string(copy_date)
        copy_date_year = year_from_date(copy_date_str)
        pub_place = sheet.cell(row, pub_place_col).value
        publisher = sheet.cell(row, publisher_col).value
        edition = sheet.cell(row, edition_col).value
        source = sheet.cell(row, source_col).value
        # source_acq = sheet.cell(row, source_acq_col).value
        characters = sheet.cell(row, characters_col).value
        black_creators = sheet.cell(row, black_creators_col).value
        if black_creators:
            black_creators = [x.strip() for x in black_creators.split(';')]
        black_chars = sheet.cell(row, black_chars_col).value
        if black_chars:
            black_chars = [x.strip() for x in black_chars.split(';')]
        isbn = str(sheet.cell(row, isbn_col).value)
        color = sheet.cell(row, color_col).value
        series_note = sheet.cell(row, series_note_col).value
        gcd_uri = sheet.cell(row, gcd_uri_col).value
        
        country_code = country_code_from_pub_place(pub_place)
        
        copyright_holder = []
        if sheet.cell(row, copyright_holder_col).value:
            copyright_holder = sheet.cell(row, copyright_holder_col).value
            copyright_holder = [x.strip() for x in copyright_holder.split(';')]
        writer = []
        if sheet.cell(row, writer_col).value:
            writer = sheet.cell(row, writer_col).value
            writer = [x.strip() for x in writer.split(';')]
        penciller = []
        if sheet.cell(row, penciller_col).value:
            penciller = sheet.cell(row, penciller_col).value
            penciller = [x.strip() for x in penciller.split(';')]
        inker = []
        if sheet.cell(row, inker_col).value:
            inker = sheet.cell(row, inker_col).value
            inker = [x.strip() for x in inker.split(';')]
        colorist = []
        if sheet.cell(row, colorist_col).value:
            colorist = sheet.cell(row, colorist_col).value
            colorist = [x.strip() for x in colorist.split(';')]
        letterer = []
        if sheet.cell(row, letterer_col).value:
            letterer = sheet.cell(row, letterer_col).value
            letterer = [x.strip() for x in letterer.split(';')]
        cover_artist = []
        if sheet.cell(row, cover_artist_col).value:
            cover_artist = sheet.cell(row, cover_artist_col).value
            cover_artist = [x.strip() for x in cover_artist.split(';')]
        editor = []
        if sheet.cell(row, editor_col).value:
            editor = sheet.cell(row, editor_col).value
            editor = [x.strip() for x in editor.split(';')]
        # hist_note = []
        # if sheet.cell(row, hist_note_col).value:
            # hist_note = sheet.cell(row, hist_note_col).value
        notes = []
        if sheet.cell(row, notes_col).value:
            notes = sheet.cell(row, notes_col).value
        synopsis = []
        if sheet.cell(row, synopsis_col).value:
            synopsis = sheet.cell(row, synopsis_col).value
        toc = []
        if sheet.cell(row, toc_col).value:
            toc = sheet.cell(row, toc_col).value
        in_series = sheet.cell(row, in_series_col).value
        
        contribs = {}
        if copyright_holder:
            for i in copyright_holder:
                contribs.update({i: ['copyright holder']})
        else:
            if writer:
                for i in writer:
                    contribs.update({i: ['writer']})
            if penciller:
                for i in penciller:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('penciller')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['penciller']})
            if inker:
                for i in inker:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('inker')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['inker']})
            if colorist:
                for i in colorist:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('colorist')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['colorist']})
            if letterer:
                for i in letterer:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('letterer')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['letterer']})
            if cover_artist:
                for i in cover_artist:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('cover artist')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['cover artist']})
            if editor:
                for i in editor:
                    if i in contribs:
                        role_list = contribs[i]
                        role_list.append('editor')
                        contribs.update({i: role_list})
                    else:
                        contribs.update({i: ['editor']})
        
        record = Record()
        
        # Add boilerplate fields
        record.leader = field_ldr
        record.add_ordered_field(field_040)
        record.add_ordered_field(field_049)
        record.add_ordered_field(field_336_text)
        record.add_ordered_field(field_336_image)
        record.add_ordered_field(field_337)
        record.add_ordered_field(field_338)
        record.add_ordered_field(field_380)
        record.add_ordered_field(field_506)
        record.add_ordered_field(field_542)
        record.add_ordered_field(field_588)
        record.add_ordered_field(field_989)        
        
        # Add other fields
        today = datetime.today().strftime('%y%m%d')
        if copy_date:
            data_008 = today + 't' + pub_date_year + copy_date_year + country_code + 'a     6    000 1 eng d'
        else:
            data_008 = today + 's' + pub_date_year + '    ' + country_code + 'a     6    000 1 eng d'
        field_008 = Field(tag = '008',
                    data = data_008)
        record.add_ordered_field(field_008)
        
        if isbn:
            field_020 = Field(tag = '020',
                        indicators = [' ',' '],
                        subfields = [
                            'a', isbn])
            record.add_ordered_field(field_020)
        
        
        subfields_099 = subfields_from_string(title)
        if 'b' in subfields_099:
            subfields_099.pop(3)
            subfields_099.pop(2)
        if 'n' in subfields_099:
            subfields_099[subfields_099.index('n')] = 'a'
        if subfields_099[1].endswith(',') or subfields_099[1].endswith(':'):
            subfields_099[1] = subfields_099[1][:-1]
        field_099 = Field(tag = '099',
                    indicators = [' ','9'],
                    subfields = subfields_099)
        record.add_ordered_field(field_099)
        
        for i in contribs:
            if i == list(contribs.keys())[0] and 'copyright holder' in contribs[i]: # first contributor is copyright holder
                subfield_content = subfields_from_string_relator(i, contribs[i])
                field_110 = Field(tag = '110',
                        indicators = ['2', ' '],
                        subfields = subfield_content)
                record.add_ordered_field(field_110)
            elif i == list(contribs.keys())[0] and 'writer' in contribs[i]: # first contributor is a writer
                subfield_content = subfields_from_string_relator(i, contribs[i])
                field_100 = Field(tag = '100',
                        indicators = ['1', ' '],
                        subfields = subfield_content)
                record.add_ordered_field(field_100)
            else:
                subfield_content = subfields_from_string_relator(i, contribs[i])
                if ',' not in subfield_content[1]:
                    field_710 = Field(tag = '710',
                                indicators = ['2',' '],
                                subfields = subfield_content)
                    record.add_ordered_field(field_710)
                else:
                    field_700 = Field(tag = '700',
                                indicators = ['1',' '],
                                subfields = subfield_content)
                    record.add_ordered_field(field_700)
        
        if contribs and ('writer' in contribs[list(contribs.keys())[0]] or 'copyright holder' in contribs[list(contribs.keys())[0]]):
            f245_ind1 = 1
        else:
            f245_ind1 = 0
        
        f245_ind2 = 0
        if str.startswith(title, 'The '):
            f245_ind2 = 4
        elif str.startswith(title, 'An '):
            f245_ind2 = 3
        elif str.startswith(title, 'A '):
            f245_ind2 = 2
        
        subfields_245 = subfields_from_string(title)
        # If writer exists, add $c for first writer
        if writer:
            subfields_245[-1] = subfields_245[-1] + ' /'
            subfields_245.append('c')
            subfields_245.append(name_direct_order(subfields_from_string(writer[0])[1]) + ', writer.')
        else:
            # If no writer, add 245 ending punctuation
            subfields_245[-1] = subfields_245[-1] + '.'
        field_245 = Field(tag = '245',
                    indicators = [f245_ind1, f245_ind2],
                    subfields = subfields_245)
        record.add_ordered_field(field_245)
        
        if edition:
            if not edition.endswith('.'):
                edition += '.'
            field_250 = Field(tag = '250',
                    indicators = [' ', ' '],
                    subfields = [
                        'a', edition])
            record.add_ordered_field(field_250)
        
        field_264_1 = Field(tag = '264',
                    indicators = [' ','1'],
                    subfields = [
                        'a', pub_place + ' :',
                        'b', publisher + ',',
                        'c', pub_date_str + '.'])
        record.add_ordered_field(field_264_1)
        
        if copy_date:
            field_264_4 = Field(tag = '264',
                        indicators = [' ','4'],
                        subfields = [
                            'c', '©' + copy_date_str])
            record.add_ordered_field(field_264_4)
        
        if color == 'yes':
            subfields_300 = [
                'a', pages + ' pages :',
                'b', 'chiefly color illustrations.']
        elif color == 'no':
            subfields_300 = [
                'a', pages + ' pages :',
                'b', 'black and white illustrations.']
        
        field_300 = Field(tag = '300',
                    indicators = [' ',' '],
                    subfields = subfields_300)
        record.add_ordered_field(field_300)
        
        if title_to_series(title):
            subfields_490 = title_to_series(title)
            field_490 = Field(tag = '490',
                        indicators = ['1',' '],
                        subfields = subfields_490)
            record.add_ordered_field(field_490)
        
        if series_note:
            if not series_note.endswith('.'):
                series_note += '.'
            field_490_series_note = Field(tag = '490',
                                    indicators = ['1', ' '],
                                    subfields = ['a', series_note])
            record.add_ordered_field(field_490_series_note)
        
        # if hist_note:
        #     field_500_hist = Field(tag = '500',
        #                 indicators = [' ',' '],
        #                 subfields = [
        #                     'a', hist_note + '.'])
        #     record.add_ordered_field(field_500_hist)
        
        if notes:
            field_500_notes = Field(tag = '500',
                        indicators = [' ',' '],
                        subfields = [
                            'a', notes + '.'])
            record.add_ordered_field(field_500_notes)
        
        if toc:
            if not toc.endswith('.') and not toc.endswith('?') and not toc.endswith('!'):
                toc += '.'
            field_505 = Field(tag = '505',
                        indicators = ['0',' '],
                        subfields = [
                            'a', toc])
            record.add_ordered_field(field_505)
        
        if synopsis:
            field_520 = Field(tag = '520',
                        indicators = [' ',' '],
                        subfields = [
                            'a', synopsis])
            record.add_ordered_field(field_520)
        
        if black_creators:
            for i in black_creators:
                if not i.endswith('.'):
                    i += '.'
                field_590_creators = Field(tag = '590',
                            indicators = [' ',' '],
                            subfields = [
                                'a', i])
                record.add_ordered_field(field_590_creators)
        
        if black_chars:
            for i in black_chars:
                if not i.endswith('.'):
                    i += '.'
                field_590_chars = Field(tag = '590',
                            indicators = [' ',' '],
                            subfields = [
                                'a', i])
                record.add_ordered_field(field_590_chars)
        
        if source:
            field_541_source = Field(tag = '541',
                        indicators = [' ',' '],
                        subfields = [
                            'a', source + '.'])
            record.add_ordered_field(field_541_source)
        
        # if source_acq:
            # field_541_source_acq = Field(tag = '541',
                        # indicators = [' ',' '],
                        # subfields = [
                            # 'a', source_acq + '.'])
            # record.add_ordered_field(field_541_source_acq)
        
        if subj_person:
            for i in subj_person:
                i_subfields = subfields_from_string(i)
                
                # Set first indicator based on presence of comma in $a
                if 'a' in i_subfields:
                    if ',' in i_subfields[i_subfields.index('a') + 1]:
                        field_600_ind1 = '1'
                    else:
                        field_600_ind1 = '0'
                
                if '1' in i_subfields:
                    last_except_subf1 = i_subfields.index('1') - 1
                else:
                    last_except_subf1 = len(i_subfields) - 1
                
                if i_subfields[last_except_subf1].endswith(','):
                    i_subfields[last_except_subf1] = re.sub(r'^(.*),$', r'\g<1>.', i_subfields[last_except_subf1])
                if not i_subfields[last_except_subf1].endswith('.') and not i_subfields[last_except_subf1].endswith(')') and not i_subfields[last_except_subf1].endswith('?') and not i_subfields[last_except_subf1].endswith('-'):
                    i_subfields[last_except_subf1] += '.'
                
                field_600 = Field(tag = '600', 
                            indicators = [field_600_ind1,'0'],
                            subfields = i_subfields)
                record.add_ordered_field(field_600)
        
        if subj_topical:
            for i in subj_topical:
                i_subfields = subfields_from_string(i)
                if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'):
                    i_subfields[-1] += '.'
                field_650 = Field(tag = '650',
                            indicators = [' ','0'],
                            subfields = i_subfields)
                record.add_ordered_field(field_650)
        
        if subj_place:
            for i in subj_place:
                i_subfields = subfields_from_string(i)
                if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'):
                    i_subfields[-1] += '.'
                field_651 = Field(tag = '651',
                        indicators = [' ','0'],
                        subfields = i_subfields)
                record.add_ordered_field(field_651)
        
        if subj_corp:
            for i in subj_corp:
                i_subfields = subfields_from_string(i)
                if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'):
                    i_subfields[-1] += '.'
                field_610 = Field(tag = '610',
                        indicators = ['1','0'],
                        subfields = i_subfields)
                record.add_ordered_field(field_610)
        
        if genre:
            for i in genre:
                if not i.endswith('.') and not i.endswith(')'):
                    i += '.'
                field_655 = Field(tag = '655',
                        indicators = [' ','7'],
                        subfields = [
                            'a', i,
                            '2', 'lcgft'])
                record.add_ordered_field(field_655)
        
        if characters:
            field_500_chars = Field(tag = '500',
                        indicators = [' ', ' '],
                        subfields = [
                            'a', characters])
            record.add_ordered_field(field_500_chars)
        
        if gcd_uri:
            title_758 = subfields_from_string(title)[1]
            if title_758.endswith(',') or title_758.endswith(':'):
                title_758 = title_758[:-1]
            field_758 = Field(tag = '758',
                        indicators = [' ',' '],
                        subfields = [
                            '4', 'http://rdaregistry.info/Elements/m/P30135',
                            'i', 'Has work manifested:',
                            'a', title_758,
                            '1', gcd_uri])
            record.add_ordered_field(field_758)
        
        if in_series:
            subfields_773 = subfields_from_string(in_series)
            field_773 = Field(tag = '773',
                        indicators = ['0','8'],
                        subfields = subfields_773)
            record.add_ordered_field(field_773)
        
        subfields_852 = [
            'b', 'CARRIER',
            'c', 'carrspec']
        if len(subfields_099) == 4:
            subfields_852.append('h')
            subfields_852.append(subfields_099[1])
            subfields_852.append('i')
            subfields_852.append(subfields_099[3])
        if len(subfields_099) == 2:
            subfields_852.append('h')
            subfields_852.append(subfields_099[1])
        if edition:
            if edition.endswith('.'):
                edition = edition[:-1]
            subfields_852.append('z')
            subfields_852.append(edition)
        
        field_852 = Field(tag = '852',
                    indicators = ['8',' '],
                    subfields = subfields_852)
        record.add_ordered_field(field_852)
        
        outmarc.write(record.as_marc())
        print()
    outmarc.close()
示例#21
0
def game_record(data, control_number, suppressed=True, status_code="-"):
    """
    Creates a record object from data namedtuple
    args:
        data: namedtuple
    returns:
        record: pymarc.Record object
    """

    record = Record()
    record.leader = "00000crm a2200000M  4500"

    tags = []

    # 001 - control field
    tags.append(Field(tag="001", data=control_number))

    # 005
    tags.append(
        Field(tag="005", data=datetime.strftime(datetime.now(), "%y%m%d%H%M%S.%f"))
    )

    # 008
    date_created = date.strftime(date.today(), "%y%m%d")
    if data.pub_date:
        t008 = f"{date_created}s{data.pub_date}    xxu               vneng d"
    else:
        t008 = f"{date_created}n        xxu               vneng d"
    tags.append(Field(tag="008", data=t008))

    # 020
    for isbn in data.isbn:
        tags.append(Field(tag="020", indicators=[" ", " "], subfields=["a", isbn]))

    # 024
    for upc in data.upc:
        tags.append(Field(tag="024", indicators=["1", " "], subfields=["a", upc]))

    # 040
    tags.append(
        Field(
            tag="040",
            indicators=[" ", " "],
            subfields=["a", "BKL", "b", "eng", "e", "rda", "c", "BKL"],
        )
    )

    # 099
    tags.append(Field(tag="099", indicators=[" ", " "], subfields=["a", "BOARD GAME"]))

    # 245 (no final puctuation neeeded per new PCC ISBD policy)
    subfields = []
    if not data.title:
        raise ValueError("Missing title data")
    else:
        subfields.extend(["a", data.title])

    if data.subtitle:
        subfields[-1] = f"{subfields[-1]} : "
        subfields.extend(["b", data.subtitle])

    if data.title_part:
        subfields[-1] = f"{subfields[-1]}. "
        subfields.extend(["p", data.title_part])

        # add 246 tag
        ind2 = check_article(data.title_part)
        tags.append(
            Field(
                tag="246",
                indicators=["1", ind2],
                subfields=["a", data.title_part[int(ind2) :]],
            )
        )

    if data.author:
        subfields[-1] = f"{subfields[-1]} / "
        subfields.extend(["c", data.author])

    ind2 = check_article(data.title)

    tags.append(Field(tag="245", indicators=["0", ind2], subfields=subfields))

    # 246 - other title
    for title in data.title_other:
        tags.append(Field(tag="246", indicators=["1", "3"], subfields=["a", title]))

    # 264 publication tags
    subfields = []
    if data.pub_place:
        subfields.extend(["a", f"{data.pub_place}:"])
    else:
        subfields.extend(["a", "[Place of publication not identified]:"])
    if data.publisher:
        subfields.extend(["b", f"{data.publisher},"])
    else:
        subfields.extend(["b", "[publisher not identified],"])
    if data.pub_date:
        subfields.extend(["c", data.pub_date])
    else:
        subfields.extend(["c", "[date of publication not identified]"])

    tags.append(Field(tag="264", indicators=[" ", "1"], subfields=subfields))

    # 300 tag
    tags.append(
        Field(tag="300", indicators=[" ", " "], subfields=["a", "1 board game"])
    )

    # RDA 3xx tags
    tags.append(
        Field(
            tag="336",
            indicators=[" ", " "],
            subfields=["a", "three-dimensional form", "b", "tdf", "2", "rdacontent"],
        )
    )
    tags.append(
        Field(
            tag="337",
            indicators=[" ", " "],
            subfields=["a", "unmediated", "b", "n", "2", "rdamedia"],
        )
    )
    tags.append(
        Field(
            tag="338",
            indicators=[" ", " "],
            subfields=["a", "object", "b", "nr", "2", "rdacarrier"],
        )
    )

    # 500 notes
    tags.append(
        Field(
            tag="500",
            indicators=[" ", " "],
            subfields=["a", f"Number of players: {data.players}"],
        )
    )

    tags.append(
        Field(
            tag="500",
            indicators=[" ", " "],
            subfields=["a", f"Game duration: {data.duration}"],
        )
    )

    # content note 505
    if data.content:
        tags.append(
            Field(tag="505", indicators=["0", " "], subfields=["a", data.content])
        )

    # 520 summary
    if data.desc:
        tags.append(Field(tag="520", indicators=[" ", " "], subfields=["a", data.desc]))

    # 521 note
    tags.append(Field(tag="521", indicators=[" ", " "], subfields=["a", data.age]))

    # 655 genre
    tags.append(
        Field(
            tag="655",
            indicators=[" ", "7"],
            subfields=["a", "Board games.", "2", "lcgft"],
        )
    )

    # 856 fields (link to project)
    tags.append(
        Field(
            tag="856",
            indicators=["4", " "],
            subfields=[
                "u",
                "https://www.bklynlibrary.org/boardgamelibrary",
                "z",
                "Board Game Library website",
            ],
        )
    )

    # 960 item field
    for barcode in data.central_barcodes:
        subfields = [
            "i",
            barcode,
            "l",
            "02abg",
            "p",
            data.price,
            "q",
            "11",
            "t",
            "53",
            "r",
            "i",
            "s",
            status_code,
        ]

        tags.append(Field(tag="960", indicators=[" ", " "], subfields=subfields))

    for barcode in data.crown_barcodes:
        subfields = [
            "i",
            barcode,
            "l",
            "30abg",
            "p",
            data.price,
            "q",
            "11",
            "t",
            "53",
            "r",
            "i",
            "s",
            status_code,
        ]

        tags.append(Field(tag="960", indicators=[" ", " "], subfields=subfields))

    # 949 command line
    if suppressed:
        opac_display_command = "b3=n"
    else:
        opac_display_command = ""
    tags.append(
        Field(
            tag="949",
            indicators=[" ", " "],
            subfields=["a", f"*b2=o;{opac_display_command}"],
        )
    )

    for tag in tags:
        record.add_ordered_field(tag)

    return record
示例#22
0
        #--------------------------------------------
        # Modify the default LDR field in the new MARC record object
        rec_LDR = list(
            new_marc_rec.leader
        )  # split the LDR bytes into a list so you can modify based on index position
        rec_LDR[5] = 'n'  # code for new record
        res_type = fields[12].strip()
        if res_type == '':
            rec_LDR[6] = 'a'  # code for text
        else:
            rec_LDR[
                6] = res_type  # if the "Resource Type" is not blank, use that code instead of 'a'
        rec_LDR[7] = 'm'  # code for monographic record
        new_marc_rec.leader = ''.join(
            rec_LDR
        )  # join the list of LDR bytes into a string and assign to the 'leader' field of the MARC record
        #--------------------------------------------
        # Create 001 and 040 MARC fields for record number and cataloging source
        rec_001 = Field(tag='001', data='000' + str(rec_cnt))
        rec_040 = Field(tag='040',
                        indicators=[' ', ' '],
                        subfields=['a', 'NNU', 'b', 'eng', 'c', 'NNU'])
        new_marc_rec.add_ordered_field(rec_001)
        new_marc_rec.add_ordered_field(rec_040)
        #--------------------------------------------
        # Create a 245 Title MARC field
        title = fields[0].strip()
        if not title == '':
            rec_245a = title.split(':')[0]
            rec_245b = title.split(':')[1]
示例#23
0
#trash = open(TRASH, 'w')

bib = open(OUT, 'w')
#writer = MARCWriter(open('retrobi.mrc','wb'))

# MAIN -----------------

with open(IN, 'rb') as f:
    for LINE in f:

        # INIT -----------------

        #record = Record(force_utf8=True)
        record = Record()

        record.leader = '     nab a22     4a 4500'  # overwrite internal(pymarc.record) LDR tag
        record.add_ordered_field(Field(tag='FMT', data='RS'))
        record.add_ordered_field(Field(tag='003', data='CZ PrUCL'))
        record.add_ordered_field(Field(tag='005', data='20201231'))
        record.add_ordered_field(
            Field(tag='040',
                  indicators=['\\', '\\'],
                  subfields=['a', 'ABB060', 'b', 'cze']))
        #record.add_ordered_field(Field(tag='041', indicators=['0','\\'], subfields=['a', 'cze']))
        record.add_ordered_field(
            Field(tag='336',
                  indicators=['\\', '\\'],
                  subfields=['a', 'text', 'b', 'txt', '2', 'rdacontent']))
        record.add_ordered_field(
            Field(tag='337',
                  indicators=['\\', '\\'],
示例#24
0
def convert(cs, language, g):

    vocId = cs.get("vocabulary_code")

    # variable for a bit complicated constants and casting/converting them to appropiate types
    helper_variables = {
        "vocCode" : (cs.get("vocabulary_code") + "/" + LANGUAGES[language] \
            if cs.getboolean("multilanguage", fallback=False) \
            else vocId),
        "groupingClasses" : [URIRef(x) for x in cs.get("groupingClasses", fallback=",".join(GROUPINGCLASSES)).split(",")],
        "groupingClassesDefault" : [URIRef(x) for x in cs.parser.get("DEFAULT", "groupingClasses", fallback=",".join(GROUPINGCLASSES)).split(",")],
        'modificationDates': cs.get("modificationDates", fallback=None),
        'keepModified' : cs.get("keepModifiedAfter", fallback=None),
        'keepGroupingClasses' : cs.getboolean("keepGroupingClasses", fallback=False),
        'defaultOutputFileName' : "yso2marc-" + cs.name.lower() + "-" + language + ".mrcx"
    }

    if helper_variables['keepModified']:
        helper_variables['keepModifiedLimit'] = False \
        if cs.get("keepModifiedAfter", fallback=KEEPMODIFIEDAFTER).lower() == "all" \
        else datetime.date(datetime.strptime(cs.get("keepModifiedAfter"), "%Y-%m-%d"))

    if cs.get("output", fallback=None):
        parts = cs.get("languages").split(",")
        if len(parts) > 1:
            output = cs.get("output")
            if len(output.split(".")) > 1:
                helper_variables["outputFileName"] = ".".join(
                    output.split(".")
                    [:-1]) + "-" + language + "." + output.split(".")[-1]
            else:
                helper_variables["outputFileName"] = output + "-" + language
    if not "outputFileName" in helper_variables:
        helper_variables["outputFileName"] = cs.get(
            "output", fallback=helper_variables["defaultOutputFileName"])

    #modified_dates on dict-objekti, joka sisältää tietueen id:n avaimena ja
    #arvona tuplen, jossa on tietueen viimeinen muokkauspäivämäärä ja tietueen sisältö MD5-tiivisteenä
    if helper_variables['modificationDates']:
        if os.path.isfile(helper_variables['modificationDates']):
            with open(helper_variables['modificationDates'],
                      'rb') as pickle_file:
                try:
                    modified_dates = pickle.load(pickle_file)
                except EOFError:
                    logging.error(
                        "The file %s for modification dates is empty " %
                        helper_variables['modificationDates'])
                    sys.exit(2)
        else:
            modified_dates = {}

    logging.info(
        "Processing vocabulary with vocabulary code '%s' in language '%s'" %
        (vocId, language))
    incrementor = 0
    writer_records_counter = 0
    ET_namespaces = {"marcxml": "http://www.loc.gov/MARC21/slim"}

    handle = open(
        cs.get("output", fallback=helper_variables["defaultOutputFileName"]),
        "wb")
    writer = XMLWriter(handle)

    # listataan preflabelit, jotta voidaan karsia alt_labelit, jotka toisessa käsitteessä pref_labelina
    pref_labels = set()
    for conc in g.subjects(RDF.type, SKOS.Concept):
        pref_label = g.preferredLabel(conc, lang=language)
        if pref_label:
            pref_labels.add(str(pref_label[0][1]))

    # vain nämä mts-käsiteryhmät otetaan mukaan, ryhmän nimestä ei tehdä MARC21-tietuetta
    ids = {"occupations": ['m2332'], "titles": ['m121', 'm3764']}

    uris = {}
    for key in ids:
        uris[key] = set()
        for id in ids[key]:
            uris[key].add(MTS + id)

    for group in g.subjects(RDF.type, ISOTHES.ConceptGroup):
        for key in uris:
            if any(str(group).endswith(uri) for uri in uris[key]):
                get_member_groups(g, group, uris[key])

    concs = []
    if helper_variables['keepModified']:
        concs = []
        for uri in modified_dates:
            if modified_dates[uri][0] >= helper_variables['keepModifiedLimit']:
                concs.append(URIRef(uri))
    else:
        for conc in g.subjects(RDF.type, SKOS.Concept):
            concs.append(conc)

    #luotujen käsitteiden tunnukset, joilla voidaan selvittää modification_dates-listan avulla poistetut käsitteet
    created_concepts = set()

    for concept in concs:
        #vain ammateista ja arvonimistä luodaan MARC21-tietueet
        if not (concept in uris['occupations'] or concept in uris['titles']):
            continue
        created_concepts.add(str(concept))
        incrementor += 1
        if incrementor % 1000 == 0:
            logging.info("Processing %sth concept" % (incrementor))

        #skipataan ryhmittelevät käsitteet
        if not helper_variables['keepGroupingClasses']:
            if any(conceptType in helper_variables["groupingClasses"]
                   for conceptType in g.objects(concept, RDF.type)):
                continue

        rec = Record()

        rec.leader = cs.get("leaderNew", fallback=LEADERNEW)

        # 024 muut standarditunnukset - käsitteen URI tallennetaan tähän
        rec.add_field(
            Field(tag='024',
                  indicators=['7', ' '],
                  subfields=['a', concept, '2', "uri"]))

        # 040 luetteloiva organisaatio
        rec.add_field(
            Field(tag='040',
                  indicators=[' ', ' '],
                  subfields=[
                      'a',
                      cs.get("creatorAgency", fallback=CREATOR_AGENCY), 'b',
                      LANGUAGES[language], 'f', helper_variables["vocCode"]
                  ]))

        valueProps = sorted(getValues(g,
                                      concept,
                                      SKOS.prefLabel,
                                      language=language),
                            key=lambda o: o.value)
        if len(valueProps) == 0:
            logging.warning(
                "Could not find preflabel for concept %s in language %s. Skipping the whole concept."
                % (concept, language))
            continue
        elif len(valueProps) != 1:
            logging.warning(
                "Multiple prefLabels detected for concept %s in language %s. Choosing the first."
                % (concept, language))

        if concept in uris['occupations']:
            tag = "174"
            subfield_code = "a"
        elif concept in uris['titles']:
            tag = "168"
            subfield_code = "d"

        rec.add_field(
            Field(tag=tag,
                  indicators=[' ', ' '],
                  subfields=[
                      subfield_code,
                      decomposedÅÄÖtoUnicodeCharacters(
                          unicodedata.normalize(NORMALIZATION_FORM,
                                                str(valueProps[0].value)))
                  ]))

        # skos:altLabel -> 467, 474
        # 450 katso-viittaus
        # jätetään tuottamatta 45X-kentät, jotka ovat toisessa käsitteessä 15X-kenttinä, paitsi altLabelein kohdalla
        seen_values = set()

        for valueProp in sorted(getValues(g,
                                          concept, [SKOS.altLabel],
                                          language=language),
                                key=lambda o: str(o.value)):
            if valueProp.prop != SKOS.altLabel and str(
                    valueProp.value) in pref_labels:
                continue
            if valueProp.prop == SKOS.hiddenLabel:
                if str(valueProp.value) in seen_values:
                    continue
            seen_values.add(str(valueProp.value))
            if concept in uris['occupations']:
                tag = "474"
                subfield_code = "a"
            elif concept in uris['titles']:
                tag = "468"
                subfield_code = "d"

            rec.add_field(
                Field(tag=tag,
                      indicators=[' ', ' '],
                      subfields=[
                          subfield_code,
                          decomposedÅÄÖtoUnicodeCharacters(
                              unicodedata.normalize(NORMALIZATION_FORM,
                                                    str(valueProp.value)))
                      ]))

        valueProps = getValues(g, concept, [
            SKOS.prefLabel, SKOS.exactMatch, SKOS.closeMatch, SKOS.broadMatch,
            SKOS.narrowMatch, SKOS.relatedMatch
        ])

        fields = list(
        )  # kerätään kentät tähän muuttujaan, joka sitten lopuksi järjestetään

        for valueProp in valueProps:
            if valueProp.prop == SKOS.prefLabel:
                # suodatetaan samankieliset, jotka menivät jo 1xx-kenttiin
                # valueProp.value sisältää tässä poikkeuksellisesti jo halutun literaalin
                # (vrt. kun muissa on solmu)
                if valueProp.value.language == language:
                    continue

            else:
                # otetaan vain viittaukset samaan sanastoon
                continue

            if concept in uris['occupations']:
                tag = "774"
                subfield_code = "a"
            elif concept in uris['titles']:
                tag = "768"
                subfield_code = "d"

            sub2 = "mts" + "/" + LANGUAGES[valueProp.value.language]
            fields.append(
                Field(tag=tag,
                      indicators=[' ', ' '],
                      subfields=[
                          subfield_code,
                          decomposedÅÄÖtoUnicodeCharacters(
                              unicodedata.normalize(NORMALIZATION_FORM,
                                                    str(valueProp.value))),
                          '4', 'EQ', '2', sub2, '0', concept
                      ]))

        # sort fields and add them
        for sorted_field in sorted(fields,
                                   key=lambda o: (o.tag, o.value().lower())):
            rec.add_field(sorted_field)

        writer_records_counter += 1
        writer.write(rec)

        if helper_variables['modificationDates']:
            md5 = hashlib.md5()
            md5.update(str.encode(str(rec)))
            hash = md5.hexdigest()
            if str(concept) in modified_dates:
                if not hash == modified_dates[str(concept)][1]:
                    modified_dates[str(concept)] = (date.today(), hash)
            else:
                modified_dates[str(concept)] = (date.today(), hash)

    #tuotetaan poistetut käsitteet, kun haetaan muuttuneet käsitteet
    #jos tietue on modified_dates-parametrillä määritettyssä tiedostossa, mutta ei graafissa, tulkitana poistetuksi tietueeksi
    #mts:ssä ei ole deprekointipäiviä
    #

    if helper_variables['keepModified']:
        concs = []
        for conc in g.subjects(RDF.type, SKOS.Concept):
            if conc in uris['occupations'] or conc in uris['titles']:
                concs.append(str(conc))
        for conc in modified_dates:
            if conc not in concs:
                #jos ei ole hajautussummaa (tuplen 2. arvo), luodaan deprekoitu käsite
                if modified_dates[conc][1]:
                    rec = Record()
                    rec.leader = cs.get("leaderDeleted0",
                                        fallback=LEADERDELETED0)
                    rec.add_field(
                        Field(tag='024',
                              indicators=['7', ' '],
                              subfields=['a', conc, '2', "uri"]))
                    modified_dates[conc] = (date.today(), "")
                    writer_records_counter += 1
                    writer.write(rec)

    if handle is not sys.stdout:
        writer.close()

    if helper_variables['modificationDates']:
        with open(helper_variables['modificationDates'], 'wb') as output:
            pickle.dump(modified_dates, output, pickle.HIGHEST_PROTOCOL)

    #jos luodaan kaikki käsitteet, tuotetaan tuotetaan lopuksi käsitteet laveassa XML-muodossa
    #if not helper_variables['keepModified']:
    parser = ET.XMLParser(remove_blank_text=True, strip_cdata=False)
    file_path = helper_variables["outputFileName"]
    tree = ET.parse(file_path, parser)
    e = tree.getroot()
    handle = open(
        cs.get("output", fallback=helper_variables["defaultOutputFileName"]),
        "wb")
    handle.write(
        ET.tostring(e,
                    encoding='UTF-8',
                    pretty_print=True,
                    xml_declaration=True))

    if handle is not sys.stdout:
        handle.close()

    # lokitetaan vähän tietoa konversiosta
    logging.info("Processed %s concepts. Wrote %s MARCXML records." %
                 (incrementor, writer_records_counter))

    if cs.get("outputSpecified", fallback=None) == None:
        outputChannel = sys.stdout.buffer
        with open(
                cs.get("output",
                       fallback=helper_variables['defaultOutputFileName']),
                "rb") as f:
            shutil.copyfileobj(f, outputChannel)
    if cs.get("outputSpecified", fallback=None) == None:
        os.remove(
            cs.get("output",
                   fallback=helper_variables['defaultOutputFileName']))

    logging.info("Conversion completed: %s" %
                 datetime.now().replace(microsecond=0).isoformat())
示例#25
0
文件: logic.py 项目: NateWr/rua
def book_to_mark21_file(book,owner, xml = False):
	#New record
	record = Record()
	
	# Number and value explanation : http://www.loc.gov/marc/bibliographic/bdleader.html
	# Adding Leader tags
	l = list(record.leader)
	l[5] = 'n' # New
	l[6] = 'a'   #For manuscript file use 't' 
	l[7] = 'm' # Monograph
	l[9] = 'a'
	l[19] = '#'
	record.leader = "".join(l)

	# Category of material  - Text
	record.add_field(record_control_field('007','t'))

	#Languages
	languages = book.languages.all()
	if languages:
		for lang in languages:
			record.add_field(record_control_field('008',lang.code)) 
	else:
		record.add_field(record_control_field('008','eng'))

	#ISBN - International Standard Book Number 
	isbn = models.Identifier.objects.filter(book=book).exclude(identifier='pub_id').exclude(identifier='urn').exclude(identifier='doi')
	for identifier in isbn:
		if book.book_type:
			record.add_field(record_field('020',['#','#'],['a', str(identifier.value)+' '+book.book_type]))
		else:
			record.add_field(record_field('020',['#','#'],['a', str(identifier.value)]))
	
	#Source of acquisition
	try:
		base_url = models.Setting.objects.get(group__name='general', name='base_url').value
	except:
		base_url='localhost:8000'
	book_url = 'http://%s/editor/submission/%s/' % (base_url, book.id)
	record.add_field(record_field('030',['#','#'],['b', book_url]))

	# Main entry - Personal name
	authors = book.author.all()
	author_names=''
	for author in authors:
		auhtor_names=author_names+author.full_name()+' '
		name=author.last_name+', '+author.first_name
		if author.middle_name:
			name=name+' '+author.middle_name[:1]+'.'
		record.add_field(record_field('100',['1','#'],['a', name]))

	#Title statement
	title_words = (book.title).split(' ')
	first_word = title_words[0]
	if first_word.lower() == 'the':
		record.add_field(record_field('245',['1','4'],['a', book.title,'c',author_names]))
	else:
		record.add_field(record_field('245',['1','0'],['a', book.title,'c',author_names]))

	#Publication
	try:
		press_name = models.Setting.objects.get(group__name='general', name='press_name').value
	except:
		press_name=None
	try: 
		city = models.Setting.objects.get(group__name='general', name='city').value
	except:
		city = None

	publication_info=[]
	if book.publication_date:
		#Press' city
		if city :
			publication_info.append('a')
			publication_info.append(str(city))
		#Press' name
		if press_name:
			publication_info.append('b')
			publication_info.append(str(press_name))
		#Date of Publication
		publication_info.append('c')
		publication_info.append(str(book.publication_date))
		record.add_field(record_field('260',['#','#'],publication_info))

	#Physical details
	if book.pages:
		record.add_field(record_field('300',['#','#'],['a',str(book.pages)+' pages']))
	
	#Content type
	record.add_field(record_field('336',['#','#'],['a', 'text','2','rdacontent']))

	#Media type
	record.add_field(record_field('337',['#','#'],['a', 'unmediated','2','rdamedia']))

	#Carrier type
	record.add_field(record_field('338',['#','#'],['a', 'volume','2','rdacarrier']))

	#Language note
	if languages:
		for lang in languages:
			record.add_field(record_field('546',['#','#'],['a', lang.display]))
	else:
		record.add_field(record_field('546',['#','#'],['a', 'In English']))
	
	press_editors = book.press_editors.all()
	#editors
	for editor in press_editors:
		record.add_field(record_field('700',['1','#'],['a', '%s, %s' % (editor.last_name,editor.first_name),'e','Press editor']))
	
	#Series
	if book.series:
		record.add_field(record_field('830',['#','0'],['a', book.series.name ]))
		if book.series.editor:
			record.add_field(record_field('700',['1','#'],['a', '%s, %s' % (book.series.editor.last_name,book.series.editor.first_name),'e','Series editor']))
	#Add record to file
	title= book.title
	if not xml:
		filename='book_'+str(book.id)+'_'+re.sub('[^a-zA-Z0-9\n\.]', '', title.lower())+'_marc21.dat'
		file=handle_marc21_file(record.as_marc(),filename, book, owner)
	else:
		filename='book_'+str(book.id)+'_'+re.sub('[^a-zA-Z0-9\n\.]', '', title.lower())+'_marc21.xml'
		content=record_to_xml(record, quiet=False, namespace=False)
		file=handle_marc21_file(content,filename, book, owner)
	return file.pk
示例#26
0
def make_bib(data: namedtuple):
    bib = Record()
    tags = []
    locker_num = determine_locker_num(data.comp_name)

    # leader
    bib.leader = "00000nrm a2200000Mi 4500"

    # 008 tag
    dateCreated = date.strftime(date.today(), "%y%m%d")
    tags.append(
        Field(tag="008", data=f"{dateCreated}s2019    xx             00 r|und d")
    )

    # 099 tag
    tags.append(Field(tag="099", indicators=[" ", " "], subfields=["a", "LAPTOP"]))

    # 245 tag
    tags.append(
        Field(tag="245", indicators=["0", "0"], subfields=["a", f"{locker_num}."])
    )

    # single sub A 246 tags
    lap_num = determine_lap_num(data.comp_name)
    alt_titles = [
        "Laptop circulation",
        "Laptops in the branches",
        "Wireless laptops",
        "Circulating laptops",
        "Laptop computers",
        f"32_PUBLAP{lap_num}",
    ]
    for at in alt_titles:
        tags.append(Field(tag="246", indicators=["3", " "], subfields=["a", at]))

    # complex 246 tags

    tags.append(
        Field(
            tag="246",
            indicators=["3", " "],
            subfields=["a", f"{data.type}.", "n", locker_num],
        )
    )
    tags.append(
        Field(
            tag="246",
            indicators=["3", " "],
            subfields=["a", f"{data.type}.", "n", f"32_PUBLAP{lap_num}"],
        )
    )

    # 300 tag
    tags.append(
        Field(tag="300", indicators=[" ", " "], subfields=["a", "1 laptop computer"])
    )

    # 500 tag
    tags.append(
        Field(
            tag="500",
            indicators=[" ", " "],
            subfields=["a", f"Serial number: {data.serial}"],
        )
    )

    # 960 tag
    item_note = construct_item_note(locker_num, lap_num, data,)
    tags.append(
        Field(
            tag="960",
            indicators=[" ", " "],
            subfields=[
                "l",
                "32lap",
                "t",
                "49",
                "r",
                "7",
                "q",
                "7",
                "s",
                "g",
                "n",
                f"{item_note}",
            ],
        )
    )

    # commnad line tag
    tags.append(
        Field(tag="949", indicators=[" ", " "], subfields=["a", f"*b2=7;bn=32;"])
    )

    for t in tags:
        bib.add_ordered_field(t)

    return bib
示例#27
0
def create_record(row):
    """Take a  row from the csv dict and return a pymarc.Record"""
    rec = Record()
    rec.leader = "00000ntm#a22000005c#4500"
    rec.add_ordered_field(
        pymarc.Field(tag="005",
                     data=datetime.datetime.now().strftime("%Y%m%d%H%M%S.0")))
    # generiere Inhalt für 245
    if not row['Signatur modern']:
        return "Keine Signatur vorhanden"
    else:
        if row["Bd."]:
            val245 = f"UBG Ms {row['Signatur modern'].strip()}/{row['Bd.'].strip()}"
        else:
            val245 = f"UBG Ms {row['Signatur modern'].strip()}"

        rec.add_ordered_field(
            Field(tag='245', indicators=['0', '0'], subfields=['a', val245]))

    # Umfangsangabe in 300
    if "rolle" in row["Umfang"].lower():
        sfa = row["Umfang"].strip()
    else:
        sfa = f'{row["Umfang"].strip()} Blätter'

    sfc = f'{row["Format"].strip()}, {row["Größe h : b   "].strip().replace(":", "x")}'

    if sfa.startswith(" "):
        sfa = ""
    if sfc.startswith(", "):
        sfc = sfc[2:]
    if sfc.endswith(", "):
        sfc = sfc[:-2]
    rec.add_ordered_field(
        Field(tag='300', indicators=[' ', ' '], subfields=["a", sfa, "c",
                                                           sfc]))
    if row["Signatur alt"]:
        rec.add_field(
            Field(
                tag='500',
                indicators=[' ', ' '],
                subfields=[
                    'a',
                    f'Historische Signatur der Universitätsbibliothek Graz: {row["Signatur alt"].strip()}'
                ]))
    rec.add_ordered_field(
        Field(tag="500", indicators=[" ", " "], subfields=["a", "Stand 2018"]))
    beschreibstoff = row["Beschreibstoff"].strip()
    rec.add_ordered_field(
        Field(tag="340",
              indicators=[" ", " "],
              subfields=["a", beschreibstoff]))
    rec.add_ordered_field(
        pymarc.Field(tag="264",
                     indicators=[" ", "1"],
                     subfields=["c", f"[{get_date(row)}]"]))

    rec.add_field(
        Field(tag="710",
              indicators=["2", " "],
              subfields=[
                  "a", "Universitätsbibliothek Graz", "0", "(DE-588)18018-X",
                  "4", "own"
              ]))
    date = get_date(row)

    if date == "Datum unbekannt":
        print("Kein Datum vorhanden: " + val245)
        return f"{val245}: Kein Datum vorhanden"
    else:
        year = date_008(date)
        if year is None:
            print("Keine Jahreszahl für 008 extrahierbar: " + val245)
            return f"{val245}: Keine Jahreszahl für 008 extrahierbar."

    date_on_file = datetime.datetime.now().strftime("%y%m%d")
    data008 = date_on_file + "s" + year + "    " + "xx " + "||||" "|" + " " + "||||" + " 00||||   ||"
    rec.add_ordered_field(Field(tag="008", data=data008))
    vorbes_nat_pers = []
    if row["1. VB natürl. Personen"] != '':
        vorbes_nat_pers.append(row["1. VB natürl. Personen"].strip())
    if row["2. VB natürl. Personen"] != '':
        vorbes_nat_pers.append(row["2. VB natürl. Personen"].strip())

    if len(vorbes_nat_pers) > 0:
        for pers in vorbes_nat_pers:
            if pers not in vb_pers:
                print(f"Person nicht vorhanden: {pers}")
                continue
            else:
                persfield = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=vb_pers[pers] + ['4', 'fmo'])
                if "," not in vb_pers[pers][1]:
                    persfield.indicators = ['0', ' ']
                rec.add_ordered_field(persfield)
    vorbes_kor = []
    if row["1. Vorbesitz Institution"] != '':
        vorbes_kor.append(row["1. Vorbesitz Institution"].strip())
    if row["2. Vorbesitz Institution"] != '':
        vorbes_kor.append(row["2. Vorbesitz Institution"].strip())

    if len(vorbes_kor) > 0:
        for kor in vorbes_kor:
            if kor not in vb_kor:
                korfield = Field(tag='710',
                                 indicators=['2', ' '],
                                 subfields=['a', kor, '4', 'fmo'])
                rec.add_ordered_field(korfield)
                print(korfield)
            else:
                korfield = Field(tag='710',
                                 indicators=['2', ' '],
                                 subfields=vb_kor[kor] + ['4', 'fmo'])
                rec.add_ordered_field(korfield)
    standort = "SSHS"
    signatur = "Ms " + row["Signatur modern"]

    rec.add_field(
        Field(tag="995",
              indicators=[" ", " "],
              subfields=[
                  "b", "BHB", "c", standort, "h", signatur, "a",
                  row["Signatur alt"], "9", "LOCAL"
              ]))

    return rec
示例#28
0
def make_bib(fh, oclc_code, library_code, blanketPO, selector_code, order):
    """creates bib & order record in MARC21 format
       with UTF-8 encoded charset
    """

    record = Record()
    tags = []

    # MARC leader
    if order.mat_bib in ('h', 'v'):
        MARCmatType = 'g'
    elif order.mat_bib in ('i', 'u'):
        MARCmatType = 'i'
    elif order.mat_bib in ('j', 'y'):
        MARCmatType = 'j'
    elif order.mat_bib == 'a':
        MARCmatType = 'a'
    else:
        MARCmatType = 'a'

    if order.lang == 'eng':
        order_code3 = 'd'
    else:
        order_code3 = 'f'

    record.leader = f'00000n{MARCmatType}m a2200000u  4500'

    # 001 field
    tags.append(Field(tag='001', data=order.wlo))

    # 008 field
    # needs to take into account differences between different
    # non-print formats
    dateCreated = date.strftime(date.today(), '%y%m%d')
    tag008 = f'{dateCreated}s        xx            000 u {order.lang} d'
    if order.resource.pub_date is not None:
        tag008 = tag008[:7] + order.resource.pub_date + tag008[11:]
    tags.append(Field(tag='008', data=tag008))

    # 020 field
    if order.resource.isbn is not None:
        tags.append(Field(tag='020',
                          indicators=[' ', ' '],
                          subfields=['a', order.resource.isbn]))
    # 024 field
    if order.resource.upc is not None:
        tags.append(Field(tag='024',
                          indicators=['1', ' '],
                          subfields=['a', order.resource.upc]))

    # 028 field
    if order.resource.other_no is not None:
        tags.append(Field(tag='028',
                          indicators=['6', '0'],
                          subfields=['a', order.resource.other_no]))

    # 040 field
    tags.append(Field(
        tag='040',
        indicators=[' ', ' '],
        subfields=[
            'a', oclc_code,
            'b', 'eng',
            'c', oclc_code]))

    # # 100
    author_present = False
    if order.resource.author is not None:
        author_present = True
        subfields = ['a', order.resource.author]

        tags.append(Field(
            tag='100',
            indicators=['1', ' '],
            subfields=subfields))

    # 245 field
    # add format to title for non-print mat
    if MARCmatType == 'g':
        order.resource.title += ' (DVD)'
    elif MARCmatType == 'i':
        order.resource.title += ' (Audiobook)'
    elif MARCmatType == 'j':
        order.resource.title += ' (CD)'

    if author_present:
        t245_ind1 = '1'
    else:
        t245_ind1 = '0'
    subfields = ['a', order.resource.title]

    tags.append(Field(
        tag='245',
        indicators=[t245_ind1, '0'],
        subfields=subfields))

    # 264
    subfields = []
    if order.resource.pub_place is not None:
        subfieldA = ['a', order.resource.pub_place]
        subfields.extend(subfieldA)
    if order.resource.publisher is not None:
        subfieldB = ['b', order.resource.publisher]
        subfields.extend(subfieldB)
    if order.resource.pub_date is None:
        subfieldC = ['c', '[date not specified]']
    else:
        subfieldC = ['c', order.resource.pub_date]
    subfields.extend(subfieldC)
    tags.append(Field(
        tag='264',
        indicators=[' ', '1'],
        subfields=subfields))

    # 300 field
    if MARCmatType == 'g':
        container = 'videodisc ; 4 3/4 in.'
    elif MARCmatType == 'i':
        container = 'sound disc ; 4 3/4 in.'
    elif MARCmatType == 'j':
        container = 'sound disc ; 4 3/4 in.'
    else:
        container = 'pages ; cm.'

    tags.append(Field(
        tag='300',
        indicators=[' ', ' '],
        subfields=['a', container]))

    # 940 field
    tags.append(Field(
        tag='940',
        indicators=[' ', ' '],
        subfields=['a', 'brief wlo record']))

    # 960 field
    subfields = []
    if oclc_code == 'BKL':
        # subfield_A = ['a', BPL_ORDERS['acqType']]  # set by load table
        subfield_C = ['c', selector_code]
        subfield_M = ['m', BPL_ORDERS['status']]
        subfield_N = ['n', BPL_ORDERS['tloc']]
        subfield_Z = ['z', BPL_ORDERS['currency']]
        subfields.extend(subfield_C)

    elif oclc_code == 'NYP':
        # subfield_A = ['a', NYPL_ORDERS['acqType']]  # set by load table
        subfield_D = ['d', library_code]
        subfield_E = ['e', order_code3]
        subfield_M = ['m', NYPL_ORDERS['status']]
        subfield_N = ['n', NYPL_ORDERS['tloc']]
        subfield_Y = ['y', NYPL_ORDERS['volumes']]
        subfield_Z = ['z', NYPL_ORDERS['currency']]
        subfields.extend(subfield_D)
        subfields.extend(subfield_E)
        subfields.extend(subfield_Y)

    subfield_F = ['f', order.audn]
    subfield_G = ['g', order.mat_ord]
    subfield_O = ['o', order.copies]
    subfield_Q = ['q', order.order_date]
    subfield_S = ['s', f'{order.resource.price_disc:.2f}']
    subfield_T = ['t', order.locs]
    subfield_U = ['u', order.funds]
    subfield_V = ['v', order.vendor]
    subfield_W = ['w', order.lang]

    subfields.extend(subfield_F)
    subfields.extend(subfield_G)
    subfields.extend(subfield_M)
    subfields.extend(subfield_N)
    subfields.extend(subfield_O)
    subfields.extend(subfield_Q)
    subfields.extend(subfield_S)
    subfields.extend(subfield_T)
    subfields.extend(subfield_U)
    subfields.extend(subfield_V)
    subfields.extend(subfield_W)
    subfields.extend(subfield_Z)

    tags.append(Field(tag='960',
                      indicators=[' ', ' '],
                      subfields=subfields))
    # 961 field
    subfields = []
    subfield_I = ['i', order.wlo]
    if order.poPerLine is not None:
        subfield_H = ['h', order.poPerLine]
        subfields.extend(subfield_H)
    if blanketPO is not None:
        subfield_M = ['m', blanketPO]
        subfields.extend(subfield_M)
    if order.note is not None:
        subfield_D = ['d', order.note]
        subfields.extend(subfield_D)
    subfields.extend(subfield_I)
    tags.append(Field(
        tag='961',
        indicators=[' ', ' '],
        subfields=subfields))

    # construct & send to file
    for tag in tags:
        record.add_ordered_field(tag)
    save2marc(fh, record)
def make_bib(row: namedtuple, sequence: int):
    bib = Record()
    # leader
    bib.leader = "00000cem a2200000Mi 4500"

    tags = []

    # 001 tag
    tags.append(Field(tag="001", data=f"bkops{sequence}"))

    # 003 tag
    tags.append(Field(tag="003", data="BookOps"))

    # 005 tag

    timestamp = create_timestamp()
    tags.append(Field(tag="005", data=timestamp))

    # 007 tag

    tags.append(Field(
        tag="007",
        data="aj canzn",
    ))

    # 008 tag
    dateCreated = date.strftime(date.today(), "%y%m%d")
    pub_year = encode_pub_year(row.pub_year)
    data = f"{dateCreated}s{pub_year}    xx |||||| a  |  |   und d"
    tags.append(Field(tag="008", data=data))

    # 034 tag

    esc = encode_scale(row.scale)
    if esc is not None:
        tags.append(
            Field(tag="034",
                  indicators=["1", " "],
                  subfields=["a", "a", "b", esc]))

    # 110 tag

    tags.append(
        Field(
            tag="110",
            indicators=["1", " "],
            subfields=["a", f"{row.author},", "e", "cartographer."],
        ))

    # 245 tag

    tags.append(
        Field(tag="245",
              indicators=["1", "0"],
              subfields=["a", f"{row.title}."]))

    # 246 tag
    if row.alt_title:
        tags.append(
            Field(tag="246",
                  indicators=["3", " "],
                  subfields=["a", row.alt_title]))

    # 255 tag

    nsc = norm_scale(row.scale)
    tags.append(Field(tag="255", indicators=[" ", " "], subfields=["a", nsc]))

    # 264 tag

    npub_date = norm_pub_date(row.pub_year)
    tags.append(
        Field(
            tag="264",
            indicators=[" ", "1"],
            subfields=[
                "a",
                "[Place of publication not identified] :",
                "b",
                f"{row.author},",
                "c",
                npub_date,
            ],
        ))

    # tag 300
    tags.append(
        Field(
            tag="300",
            indicators=[" ", " "],
            subfields=["a", "1 folded map :", "b", "color"],
        ))

    tags.append(
        Field(
            tag="336",
            indicators=[" ", " "],
            subfields=[
                "a", "cartographic image", "b", "cri", "2", "rdacontent"
            ],
        ))
    tags.append(
        Field(
            tag="337",
            indicators=[" ", " "],
            subfields=["a", "unmediated", "b", "n", "2", "rddcontent"],
        ))
    tags.append(
        Field(
            tag="338",
            indicators=[" ", " "],
            subfields=["a", "sheet", "b", "nb", "2", "rdacontent"],
        ))

    # 490 tag
    if row.series:
        tags.append(
            Field(tag="490",
                  indicators=["0", " "],
                  subfields=["a", row.series]))

    # 500 tag
    if row.note:
        tags.append(
            Field(tag="500",
                  indicators=[" ", " "],
                  subfields=["a", f"{row.note}."]))

    # 505 tag

    if row.content:
        tags.append(
            Field(tag="505",
                  indicators=["0", " "],
                  subfields=["a", f"{row.content}."]))

    # 650 tags
    if row.subjects:
        subject_fields = encode_subjects(row.subjects)
        tags.extend(subject_fields)

    # 655 tag
    if row.genre:
        tags.append(
            Field(
                tag="655",
                indicators=[" ", "7"],
                subfields=["a", f"{row.genre}.", "2", "lcgft"],
            ))

    # tag 852
    if row.call_number:
        tags.append(
            Field(tag="852",
                  indicators=["8", " "],
                  subfields=["h", row.call_number]))

    for t in tags:
        bib.add_ordered_field(t)
    return bib
def epub_to_marc(fname, conf_file=None):
    ns = {
    'n': 'urn:oasis:names:tc:opendocument:xmlns:container',
    'pkg': 'http://www.idpf.org/2007/opf',
    'dc': 'http://purl.org/dc/elements/1.1/'
    }

    # prepare to read from the .epub file
    zip = zipfile.ZipFile(fname)

    # find the contents metafile
    txt = zip.read('META-INF/container.xml')
    tree = etree.fromstring(txt)
    for el in tree:
        for elel in el:
            for item in elel.items():
                if item[0] == 'full-path':
                    cfname = item[1]
    
    # grab the metadata block from the contents metafile
    cf = zip.read(cfname)
    tree = etree.fromstring(cf)
    p = tree.xpath('/pkg:package/pkg:metadata',namespaces=ns)[0]

    # Read from the config file
    conf = configparser.ConfigParser()
    if conf_file:
        conf.read(conf_file)
    else:
        conf.read_string(DEFAULT_CONF)
    leader_dict = {}
    tag_005_dict = {}
    tag_006_dict = {}
    tag_007_dict = {}
    tag_008_dict = {}
    tag_040_dict = {}
    tag_264_dict = {}

    sections = conf.sections()
    for section in sections:
        if section == 'leader':
            for option in conf.options(section):
                leader_dict[option] = conf.get(section, option)
        elif section == '006':
            for option in conf.options(section):
                tag_006_dict[option] = conf.get(section, option)
        elif section == '007':
            for option in conf.options(section):
                tag_007_dict[option] = conf.get(section, option)
        elif section == '008':
            for option in conf.options(section):
                tag_008_dict[option] = conf.get(section, option)
        elif section == '040':
            for option in conf.options(section):
                tag_040_dict[option] = conf.get(section, option)
        elif section == '264':
            for option in conf.options(section):
                tag_264_dict[option] = conf.get(section, option)

    record = Record(force_utf8=True)
    # set the leader
    record.leader = build_leader(leader_dict)
    # I *think* it's updating the 'Base Address of Data' position when
    # it is written to file, so I have kept characters 12-16 blank.
    # Field 005
    record.add_field(Field(tag='005', data=build_tag_005()))
    # Field 006
    record.add_field(Field(tag='006', data=build_tag_006(tag_006_dict, 
        tag_008_dict)))
    # Field 007
    record.add_field(Field(tag='007', data=build_tag_007(tag_007_dict)))
    # Field 008
    record.add_field(Field(tag='008', data=build_tag_008(tag_008_dict, 
        p, ns)))
    # Field 020
    if p.xpath('dc:identifier[@id="ISBN"]/text()', namespaces=ns):
        epub_isbn = p.xpath(
            'dc:identifier[@id="ISBN"]/text()', namespaces=ns)[0].strip()
        epub_field = Field(
            tag = '020',
            indicators = [' ', ' '],
            subfields = ['a', epub_isbn, 'q', 'epub']
                )
    elif p.xpath('dc:identifier[@pkg:scheme="ISBN"]/text()', namespaces=ns):
        epub_isbn = p.xpath(
                'dc:identifier[@pkg:scheme="ISBN"]/text()', namespaces=ns)[0].strip()

        epub_field = Field(
            tag = '020',
            indicators = [' ', ' '],
            subfields = ['a', epub_isbn, 'q', 'epub']
                )

    # Field 040
    # First, check if the indicators are empty and if they are,
    # turn them into single spaces.
    for value in ('indicator_1', 'indicator_2'):
        if tag_040_dict[value] == '':
            tag_040_dict[value] = ' '
    record.add_field(Field(
                tag = '040',
                indicators = [tag_040_dict['indicator_1'], 
                              tag_040_dict['indicator_2']],
                subfields = ['a', tag_040_dict['subfield_a'], 
                             'b', tag_040_dict['subfield_b'], 
                             'e', tag_040_dict['subfield_e'],
                             'c', tag_040_dict['subfield_c']]
    ))

    # Field 245
    if p.xpath('dc:title/text()',namespaces=ns):
        full_title = p.xpath('dc:title/text()',namespaces=ns)[0]
        if ":" in full_title:
            title = full_title[:full_title.index(':') ].strip()
            subtitle = full_title[full_title.index(':') + 1:].strip()
        else:
            title = full_title
            subtitle = None
    if p.xpath('dc:creator/text()', namespaces=ns)[0]:
        creator_statement = p.xpath('dc:creator/text()', namespaces=ns)[0]
    if title and subtitle and creator_statement:
        offset = 0
        if ' ' in title:
            title_words = title.split(' ')
            if title_words[0].lower() in NON_FILING_WORDS:
                offset = len(title_words[0]) + 1
        record.add_field(
            Field('245', ['0', offset], 
                ['a', title + " :", 
                 'b', subtitle + " /", 
                 'c', creator_statement]))
    elif title and creator_statement:
        offset = 0
        if ' ' in title:
            title_words = title.split(' ')
            if title_words[0].lower() in NON_FILING_WORDS:
                offset = len(title_words[0]) + 1
        record.add_field(
            Field('245', ['0', offset], 
                ['a', title + " /", 
                 'c', creator_statement]))

    # Field 264
    if p.xpath('dc:publisher/text()', namespaces=ns) \
    and p.xpath('dc:date/text()', namespaces=ns):
        record.add_field(Field('264', [' ', '1'], 
            ['a', tag_264_dict['subfield_a'] + ' :', 
             'b', p.xpath('dc:publisher/text()', namespaces=ns)[0] + ", ",
             'c', p.xpath('dc:date/text()', namespaces=ns)[0]]))
    if p.xpath('dc:rights/text()', namespaces=ns):
        copyright_statement = ""
        copyright_symbol = "©"
        rights_words_array = p.xpath('dc:rights/text()', 
            namespaces=ns)[0].split()
        for word in rights_words_array:
            if word in copyright_year_range:
                copyright_statement = copyright_symbol + word
        if len(copyright_statement) > 4:
            record.add_field(Field('264', [' ', '4'], 
                ['c', copyright_statement]))
    return record
    def test_barcodes_duplicates_in_two_bpl_files(self):
        bib = Record()
        bib.leader = '00000nam a2200000u  4500'
        tags = []
        tags.append(Field(tag='001', data='ocm00000003'))
        tags.append(
            Field(tag='245',
                  indicators=['0', '0'],
                  subfields=['a', 'Test title 1']))
        tags.append(
            Field(tag='960',
                  indicators=[' ', ' '],
                  subfields=[
                      'i', '34444849044539', 'l', '14afc', 'p', '14.95', 't',
                      '100', 'v', 'BTURBN'
                  ]))
        for tag in tags:
            bib.add_ordered_field(tag)

        bibs.write_marc21(self.fh1, bib)

        bib = Record()
        bib.leader = '00000nam a2200000u  4500'
        tags = []
        tags.append(Field(tag='001', data='ocm00000001'))
        tags.append(
            Field(tag='245',
                  indicators=['0', '0'],
                  subfields=['a', 'Test title 1']))
        tags.append(
            Field(tag='960',
                  indicators=[' ', ' '],
                  subfields=[
                      'i', '34444849044538', 'l', '14afc', 'p', '14.95', 't',
                      '100', 'v', 'BTURBN'
                  ]))
        for tag in tags:
            bib.add_ordered_field(tag)

        bibs.write_marc21(self.fh1, bib)

        bib = Record()
        bib.leader = '00000nam a2200000u  4500'
        tags = []
        tags.append(Field(tag='001', data='ocm00000001'))
        tags.append(
            Field(tag='245',
                  indicators=['0', '0'],
                  subfields=['a', 'Test title 1']))
        tags.append(
            Field(tag='960',
                  indicators=[' ', ' '],
                  subfields=[
                      'i', '34444849044538', 'l', '14afc', 'p', '14.95', 't',
                      '100', 'v', 'BTURBN'
                  ]))
        for tag in tags:
            bib.add_ordered_field(tag)

        bibs.write_marc21(self.fh2, bib)

        self.assertEqual(
            default.barcode_duplicates([self.fh1, self.fh2], 'bpl'), {
                u'34444849044538': [('barcode1_dup_test.mrc', 2),
                                    ('barcode2_dup_test.mrc', 1)]
            })
def output_iso(file_name: str) -> None:
    output_file_name = file_name[:-4] + ".iso"
    temp_file_name = "临时文件.iso"
    # 先刷新output_file_name
    fp1 = open(output_file_name, 'w', encoding='utf-8')
    fp1.close()
    # 用list-dict显示出来
    dataFrame_temp = pd.read_csv(file_name, encoding='utf-8',
                                 dtype=str).to_dict(orient='records')
    dataFrame = []
    # 先把表格中的全部信息录入dataFrame中.注意,如果是nan的部分,则删掉不计入;另,需要删除掉Unnamed列与continue列
    for index, value in enumerate(dataFrame_temp):
        data_single = {}
        for k in value:
            v = str(value[k])
            if v == 'nan' or len(
                    v.strip()) == 0 or "Unnamed" in k or "continue" in k:
                pass
            else:
                data_single[k] = v.strip()
        dataFrame.append(data_single)

    for data in dataFrame:
        record = Record()
        # 先把isbn列筛掉,同时把head列改成000列
        data2 = {}
        for key, value in data.items():
            if key == "head":
                data2["000"] = value
            elif '0' <= key[0] <= '9':
                data2[key] = value

        # 然后对其列进行排序
        keys = list(data2.keys())
        keys.sort()
        # 按照排序后的顺序,逐一进行抓取,并添加入record数据
        for key in keys:
            # 如果是"000",是题名
            if key == "000":
                record.leader = data2[key]
            # 如果是"009"及以内的数据
            elif key <= "009":
                record.add_field(Field(tag=key, data=data2[key]))
            # 如果是"009"以上的数据,需要把"▼"都换成"|",且把第一个"|"之前的数据作为指示符
            elif key > "009":
                # 替换特殊字符
                data2[key] = data2[key].replace("▼", "|")
                # 选中指示位
                indicators = data2[key].split("|")[0]
                if len(indicators) == 0:
                    indicators = [" ", " "]
                elif len(indicators) == 1:
                    indicators = [indicators[0], " "]
                else:
                    indicators = [indicators[0], indicators[1]]
                # 选中数据内容.按照"|"切割,每段"|"之前写两个数据内容
                subfields = []
                for words in data2[key].split("|")[1:]:
                    subfields.append(words[0])
                    subfields.append(words[1:])
                # 加入数据
                record.add_field(
                    Field(tag=key[:3],
                          indicators=indicators,
                          subfields=subfields))

        # 数据生成完毕,写入临时文件
        with open(temp_file_name, 'wb') as fh:
            writer = MARCWriter(fh)
            writer.write(record)
        # 从临时文件录入到生成文件中
        fp1, fp2 = open(temp_file_name, 'r',
                        encoding='utf-8'), open(output_file_name,
                                                'a',
                                                encoding='utf-8')
        fp2.write(fp1.readline())
        fp2.write('\n')
        fp1.close()
        fp2.close()
    # 删除临时文件
    os.remove(temp_file_name)
def output_iso_from_data(file_name: str, isbn_total: list,
                         data_total: dict) -> None:
    temp_file_name = "临时文件.iso"
    fp = open(file_name, 'w', encoding='utf-8')
    fp.close()
    records = []
    for isbn in isbn_total:
        record = Record()
        if isbn in data_total:
            data = data_total[isbn]
            for key, value in data.items():
                # 把一些utf8无法识别的符号替换掉.
                for character in NON_CHARACTERS_IN_UTF_8:
                    key, value = str(key).replace(character,
                                                  ""), str(value).replace(
                                                      character, "")
                if key in ['continue']:
                    continue
                elif key[:3] == '000':
                    record.leader = value
                elif key[:3] <= '009':
                    record.add_field(Field(tag=key[:3], data=value))
                else:
                    subfields = []
                    words = value[2:].replace("$", " ").replace("|",
                                                                "$").strip()
                    for word in words.split("$"):
                        if len(word.strip()) == 0:
                            continue
                        else:
                            subfields.append(word.strip()[0])
                            subfields.append(word.strip()[1:])
                    record.add_field(
                        Field(tag=key[:3],
                              indicators=[value[0], value[1]],
                              subfields=subfields))
        if str(record.leader) == str(Record().leader):  # 新的数据
            record.add_field(Field(tag='001', data=isbn))
        record = record_sorted(record)
        records.append(record)

        # 数据生成完毕,写入临时文件
        with open(temp_file_name, 'wb') as fh:
            writer = MARCWriter(fh)
            try:
                writer.write(record)
                # 测试读取是否有问题(如大英9780714827308)
            except UnicodeEncodeError:
                print("编号为:{}的数据格式有误,清空数据以利于输出.".format(isbn))
                record = Record()
                record.add_field(Field(tag='001', data=isbn))
                writer.write(record)

        # 从临时文件录入到生成文件中
        fp1, fp2 = open(temp_file_name, 'r',
                        encoding='utf-8'), open(file_name,
                                                'a',
                                                encoding='utf-8')
        try:
            fp2.write(fp1.readline())
        except UnicodeDecodeError:  # 部分解码有误 如大英9780714827308
            fp1.close()
            fp2.close()
            with open(temp_file_name, 'wb') as fh:
                writer = MARCWriter(fh)
                record = Record()
                record.add_field(Field(tag='001', data=isbn))
                writer.write(record)
            fp1, fp2 = open(temp_file_name, 'r',
                            encoding='utf-8'), open(file_name,
                                                    'a',
                                                    encoding='utf-8')
            fp2.write(fp1.readline())
        fp2.write('\n')
        fp1.close()
        fp2.close()

    # 删除临时文件
    os.remove(temp_file_name)
示例#34
0
def main(arglist):
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help='path to spreadsheet')
    # parser.add_argument('output', help='save directory')
    # parser.add_argument('--production', help='production DOIs', action='store_true')
    args = parser.parse_args(arglist)

    input = Path(args.input)

    # Read spreadsheet
    book_in = xlrd.open_workbook(str(input))
    sheet = book_in.sheet_by_index(0)  # get first sheet
    col_headers = sheet.row_values(0)
    # print(col_headers)
    # print()

    title_col = col_headers.index('Title')
    subj_col = col_headers.index('Subject')
    genre_col = col_headers.index('Genre')
    pages_col = col_headers.index('Pages')
    date_col = col_headers.index('Date')
    pub_place_col = col_headers.index('Pub_Place')
    publisher_col = col_headers.index('Publisher')
    source_col = col_headers.index('Source')
    writer_col = col_headers.index('Writer')
    penciller_col = col_headers.index('Penciller')
    inker_col = col_headers.index('Inker')
    colorist_col = col_headers.index('Colorist')
    letterer_col = col_headers.index('Letterer')
    cover_artist_col = col_headers.index('Cover Artist')
    editor_col = col_headers.index('Editor')
    hist_note_col = col_headers.index('Historical Note')
    note_col = col_headers.index('Note')
    characters_col = col_headers.index('Characters')
    story_arc_col = col_headers.index('Story Arc')
    toc_col = col_headers.index('Table of Contents')
    series_col = col_headers.index('Is Part of Series')

    outmarc = open('records.mrc', 'wb')

    # Boilerplate fields
    field_ldr = '00000nam  2200000Ii 4500'
    field_040 = Field(
        tag='040',
        indicators=[' ', ' '],
        subfields=['a', 'VMC', 'b', 'eng', 'e', 'rda', 'c', 'VMC'])
    field_049 = Field(tag='049',
                      indicators=[' ', ' '],
                      subfields=['a', 'VMCM'])
    field_336_text = Field(
        tag='336',
        indicators=[' ', ' '],
        subfields=['a', 'text', 'b', 'txt', '2', 'rdacontent'])
    field_336_image = Field(
        tag='336',
        indicators=[' ', ' '],
        subfields=['a', 'still image', 'b', 'sti', '2', 'rdacontent'])
    field_337 = Field(tag='337',
                      indicators=[' ', ' '],
                      subfields=['a', 'unmediated', 'b', 'n', '2', 'rdamedia'])
    field_338 = Field(tag='338',
                      indicators=[' ', ' '],
                      subfields=['a', 'volume', 'b', 'nc', '2', 'rdacarrier'])
    field_380 = Field(tag='380',
                      indicators=[' ', ' '],
                      subfields=['a', 'Comic books and graphic novels.'])
    field_506 = Field(
        tag='506',
        indicators=['1', ' '],
        subfields=[
            'a',
            'Collection open to research. Researchers must register and agree to copyright and privacy laws before using this collection. Please contact Research Services staff before visiting the James Madison University Special Collections Library to use this collection.'
        ])
    field_542 = Field(tag='542',
                      indicators=[' ', ' '],
                      subfields=[
                          'a', 'Copyright not evaluated', 'u',
                          'http://rightsstatements.org/vocab/CNE/1.0/'
                      ])
    field_588 = Field(
        tag='588',
        indicators=['0', ' '],
        subfields=[
            'a', 'Description based on indicia and Grand Comics Database.'
        ])
    field_989 = Field(tag='989',
                      indicators=[' ', ' '],
                      subfields=['a', 'PN6728'])

    for row in range(1, sheet.nrows):
        print('Record ' + str(row))

        title = sheet.cell(row, title_col).value
        print(title)
        lower_title = parse_title(lowercase_title(title))
        title = parse_title(sheet.cell(row, title_col).value)
        has_part_title = False
        if len(title) == 3:
            has_part_title = True

        subj = sheet.cell(row, subj_col).value
        subj = [x.strip() for x in subj.split(';')]
        genre = sheet.cell(row, genre_col).value
        genre = [x.strip() for x in genre.split(';')]
        pages = sheet.cell(row, pages_col).value
        date = sheet.cell(row, date_col).value[0:4]
        pub_place = sheet.cell(row, pub_place_col).value
        publisher = sheet.cell(row, publisher_col).value
        source = sheet.cell(row, source_col).value
        # writer = sheet.cell(row, writer_col).value

        writer = []
        if sheet.cell(row, writer_col).value:
            writer = sheet.cell(row, writer_col).value
            writer = [x.strip() for x in writer.split(';')]
        penciller = []
        if sheet.cell(row, penciller_col).value:
            penciller = sheet.cell(row, penciller_col).value
            penciller = [x.strip() for x in penciller.split(';')]
        inker = []
        if sheet.cell(row, inker_col).value:
            inker = sheet.cell(row, inker_col).value
            inker = [x.strip() for x in inker.split(';')]
        colorist = []
        if sheet.cell(row, colorist_col).value:
            colorist = sheet.cell(row, colorist_col).value
            # print(colorist)
            # print('COLORIST FROM SHEET=' + colorist + '=END')
            # print(bool(colorist))
            colorist = [x.strip() for x in colorist.split(';')]
        letterer = []
        if sheet.cell(row, letterer_col).value:
            letterer = sheet.cell(row, letterer_col).value
            letterer = [x.strip() for x in letterer.split(';')]
        cover_artist = []
        if sheet.cell(row, cover_artist_col).value:
            cover_artist = sheet.cell(row, cover_artist_col).value
            cover_artist = [x.strip() for x in cover_artist.split(';')]
        editor = []
        if sheet.cell(row, editor_col).value:
            editor = sheet.cell(row, editor_col).value
            editor = [x.strip() for x in editor.split(';')]
        hist_note = []
        if sheet.cell(row, hist_note_col).value:
            hist_note = sheet.cell(row, hist_note_col).value
        note = []
        if sheet.cell(row, note_col).value:
            note = sheet.cell(row, note_col).value
        characters = []
        if sheet.cell(row, characters_col).value:
            characters = sheet.cell(row, characters_col).value
            characters = [x.strip() for x in characters.split(';')]
        story_arc = []
        if sheet.cell(row, story_arc_col).value:
            story_arc = sheet.cell(row, story_arc_col).value
        toc = []
        if sheet.cell(row, toc_col).value:
            toc = sheet.cell(row, toc_col).value
        series = sheet.cell(row, series_col).value

        # print(cover_artist)
        # print(characters)
        # print(writer)
        # print(subfields_from_string(writer[0]))
        # print(name_direct_order(subfields_from_string(writer[0])[1]))
        # print(title)
        # print(parse_title(title))

        record = Record()

        # Add boilerplate fields
        record.leader = field_ldr
        record.add_ordered_field(field_040)
        record.add_ordered_field(field_049)
        record.add_ordered_field(field_336_text)
        record.add_ordered_field(field_336_image)
        record.add_ordered_field(field_337)
        record.add_ordered_field(field_338)
        record.add_ordered_field(field_380)
        record.add_ordered_field(field_506)
        record.add_ordered_field(field_542)
        record.add_ordered_field(field_588)
        record.add_ordered_field(field_989)

        # Add other fields
        today = datetime.today().strftime('%y%m%d')
        data_008 = today + 't' + date + date + 'xx a     6    000 1 eng d'
        field_008 = Field(tag='008', data=data_008)
        record.add_ordered_field(field_008)

        subfields_099 = []
        if has_part_title:
            subfields_099 = ['a', title[0] + ': ' + title[1], 'a', title[2]]
        else:
            subfields_099 = ['a', title[0], 'a', title[1]]
        field_099 = Field(tag='099',
                          indicators=[' ', '9'],
                          subfields=subfields_099)
        record.add_ordered_field(field_099)

        if writer:
            # Add 100 for first writer
            subfield_content = subfields_from_string_relator(
                writer[0], 'writer')
            field_100 = Field(tag='100',
                              indicators=['1', ' '],
                              subfields=subfield_content)
            record.add_ordered_field(field_100)
            # Multiple writers
            if len(writer) > 1:
                # Add 700s for all writers after the first
                for i in writer[1:]:
                    subfield_content = subfields_from_string_relator(
                        i, 'writer')
                    field_700 = Field(tag='700',
                                      indicators=['1', ' '],
                                      subfields=subfield_content)
                    record.add_ordered_field(field_700)

        if writer:
            f245_ind1 = 1
        else:
            f245_ind1 = 0

        f245_ind2 = 0
        if str.startswith(title[0], 'The '):
            f245_ind2 = 4
        elif str.startswith(title[0], 'An '):
            f245_ind2 = 3
        elif str.startswith(title[0], 'A '):
            f245_ind2 = 2

        subfields_245 = []
        if has_part_title:
            subfields_245 = [
                'a', title[0] + '.', 'p', title[1] + ',', 'n', title[2]
            ]
        else:
            subfields_245 = ['a', title[0] + ',', 'n', title[1]]
        # If writer exists, add $c
        if writer:
            subfields_245[-1] = subfields_245[-1] + ' /'
            subfields_245.append('c')
            subfields_245.append(
                name_direct_order(subfields_from_string(writer[0])[1]) +
                ', writer.')
        else:
            # If no writer, add 245 ending punctuation
            subfields_245[-1] = subfields_245[-1] + '.'
        field_245 = Field(tag='245',
                          indicators=[f245_ind1, f245_ind2],
                          subfields=subfields_245)
        record.add_ordered_field(field_245)

        field_264_1 = Field(tag='264',
                            indicators=[' ', '1'],
                            subfields=[
                                'a', pub_place + ' :', 'b', publisher + ',',
                                'c', date + '.'
                            ])
        record.add_ordered_field(field_264_1)

        field_264_4 = Field(tag='264',
                            indicators=[' ', '4'],
                            subfields=['c', '©' + date])
        record.add_ordered_field(field_264_4)

        field_300 = Field(tag='300',
                          indicators=[' ', ' '],
                          subfields=[
                              'a', pages + ' pages :', 'b',
                              'chiefly color illustrations.'
                          ])
        record.add_ordered_field(field_300)

        subfields_490 = []
        if has_part_title:
            subfields_490 = [
                'a', lower_title[0] + '. ' + lower_title[1] + ' ;', 'v',
                lower_title[2]
            ]
        else:
            subfields_490 = ['a', lower_title[0] + ' ;', 'v', lower_title[1]]
        field_490 = Field(tag='490',
                          indicators=['1', ' '],
                          subfields=subfields_490)
        record.add_ordered_field(field_490)

        if hist_note:
            field_500_hist = Field(tag='500',
                                   indicators=[' ', ' '],
                                   subfields=['a', hist_note + '.'])
            record.add_ordered_field(field_500_hist)

        if note:
            field_500_note = Field(tag='500',
                                   indicators=[' ', ' '],
                                   subfields=['a', note + '.'])
            record.add_ordered_field(field_500_note)

        if toc:
            if not toc.endswith('.') and not toc.endswith(
                    '?') and not toc.endswith('!'):
                toc += '.'
            field_505 = Field(tag='505',
                              indicators=['0', ' '],
                              subfields=['a', toc])
            record.add_ordered_field(field_505)

        if story_arc:
            field_520 = Field(tag='520',
                              indicators=[' ', ' '],
                              subfields=[
                                  'a', '"' + story_arc +
                                  '" -- Grand Comics Database.'
                              ])
            record.add_ordered_field(field_520)

        field_561 = Field(tag='561',
                          indicators=[' ', ' '],
                          subfields=['a', source + '.'])
        record.add_ordered_field(field_561)

        for i in subj:
            if not i.endswith('.') and not i.endswith(')'):
                i += '.'
            field_650 = Field(tag='650',
                              indicators=[' ', '0'],
                              subfields=['a', i])
            record.add_ordered_field(field_650)

        for i in genre:
            if not i.endswith('.') and not i.endswith(')'):
                i += '.'
            field_655 = Field(tag='655',
                              indicators=[' ', '7'],
                              subfields=['a', i, '2', 'lcgft'])
            record.add_ordered_field(field_655)

        if characters:
            # print(characters)
            subfield_content = 'Characters: '
            for i in characters[:-1]:
                subfield_content += i + '; '
            subfield_content += characters[-1] + '.'
            field_500 = Field(tag='500',
                              indicators=[' ', ' '],
                              subfields=['a', subfield_content])
            record.add_ordered_field(field_500)

            # Create 600 and 650 for "Fictitious character" entries
            # TODO check for existing 650 and don't add if a duplicate
            if any('Fictitious character' in c for c in characters):
                fic_chars = [
                    c for c in characters if 'Fictitious character' in c
                ]
                for i in fic_chars:
                    fic_char_name = re.sub(
                        r'^(.*?) (\(Fictitious character.*\))$', r'\g<1>', i)
                    fic_char_c = re.sub(
                        r'^(.*?) (\(Fictitious character.*\))$', r'\g<2>', i)
                    field_600 = Field(
                        tag='600',
                        indicators=['0', '0'],
                        subfields=['a', fic_char_name, 'c', fic_char_c])
                    record.add_ordered_field(field_600)

                    field_650 = Field(tag='650',
                                      indicators=[' ', '0'],
                                      subfields=['a', i])
                    record.add_ordered_field(field_650)

        if penciller:
            for i in penciller:
                subfield_content = subfields_from_string_relator(
                    i, 'penciller')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        if inker:
            for i in inker:
                subfield_content = subfields_from_string_relator(i, 'inker')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        if colorist:
            for i in colorist:
                subfield_content = subfields_from_string_relator(i, 'colorist')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        if letterer:
            for i in letterer:
                subfield_content = subfields_from_string_relator(i, 'letterer')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        if cover_artist:
            for i in cover_artist:
                subfield_content = subfields_from_string_relator(
                    i, 'cover artist')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        if editor:
            for i in editor:
                subfield_content = subfields_from_string_relator(i, 'editor')
                field_700 = Field(tag='700',
                                  indicators=['1', ' '],
                                  subfields=subfield_content)
                record.add_ordered_field(field_700)

        # field_700 = Field(tag = '700',
        # indicators = ['7',' '],
        # subfields = [
        # 'a', doi,
        # '2', 'doi'])

        subfields_773 = subfields_from_string(series)
        field_773 = Field(tag='773',
                          indicators=['0', '8'],
                          subfields=subfields_773)
        record.add_ordered_field(field_773)

        subfields_830 = []
        if has_part_title:
            subfields_830 = [
                'a', lower_title[0] + '.', 'p', lower_title[1] + ' ;', 'v',
                lower_title[2] + '.'
            ]
        else:
            subfields_830 = [
                'a', lower_title[0] + ' ;', 'v', lower_title[1] + '.'
            ]
        field_830 = Field(tag='830',
                          indicators=[' ', '0'],
                          subfields=subfields_830)
        record.add_ordered_field(field_830)

        outmarc.write(record.as_marc())
        print()
    outmarc.close()