def transpose_to_marc21(record):
    Mrecord=Record(force_utf8=True)
    Mrecord.leader=record["_LEADER"]
    for field in record:
        if isint(field):
            if int(field)<10:
                if isinstance(record[field],list):
                    for elem in record[field]:
                        Mrecord.add_field(Field(tag=field,data=elem))
                elif isinstance(record[field],str):
                    Mrecord.add_field(Field(tag=field,data=record[field]))
            else:
                for subfield in record[field]:
                    for ind, values in subfield.items():
                        indicators=[]
                        subfields=[]
                        for elem in values:
                            for k,v in elem.items():
                                if isinstance(v,str):
                                    subfields.append(k)
                                    subfields.append(v)
                                elif isinstance(v,list):
                                    for subfield_elem in v:
                                        subfields.append(k)
                                        subfields.append(subfield_elem)
                        for elem in ind:
                            indicators.append(elem)
                        Mrecord.add_field(Field(tag=str(field),
                                                indicators=indicators,
                                                subfields=subfields))
    return Mrecord.as_marc()
Exemplo n.º 2
0
 def element(self, element_dict, name=None):
     if not name:
         self._record = Record()
         self.element(element_dict, 'leader')
     elif name == 'leader':
         self._record.leader = element_dict[name]
         self.element(element_dict, 'fields')
     elif name == 'fields':
         fields = iter(element_dict[name])
         for field in fields:
             tag, remaining = field.popitem()
             self._field = Field(tag)
             if self._field.is_control_field():
                 self._field.data = remaining
             else:
                 self.element(remaining, 'subfields')
                 self._field.indicators.extend(
                     [remaining['ind1'], remaining['ind2']])
             self._record.add_field(self._field)
         self.process_record(self._record)
     elif name == 'subfields':
         subfields = iter(element_dict[name])
         for subfield in subfields:
             code, text = subfield.popitem()
             self._field.add_subfield(code, text)
Exemplo n.º 3
0
 def test_960_items_nonrepeatable_subfields(self):
     b = Record()
     b.add_field(
         Field(tag='960',
               indicators=[' ', ' '],
               subfields=[
                   'i', 'TEST', 'i', 'TEST', 'l', 'TEST', 'l', 'TEST', 'p',
                   '9.99', 'p', '9.99', 'q', 'TEST', 'q', 'TEST', 'o',
                   'TEST', 'o', 'TEST', 't', 'TEST', 't', 'TEST', 'r',
                   'TEST', 'r', 'TEST', 's', 'TEST', 's', 'TEST', 'v',
                   'TEST', 'v', 'TEST', 'n', 'TEST', 'n', 'TEST', 'v',
                   'TEST', 'v', 'TEST'
               ]))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'bpl', ['specs_test.mrc'], self.bcl)
     self.assertFalse(passed)
     self.assertIn('"i" subfield is not repeatable.', report)
     self.assertIn('"l" subfield is not repeatable.', report)
     self.assertIn('"p" subfield is not repeatable.', report)
     self.assertIn('"q" subfield is not repeatable.', report)
     self.assertIn('"o" subfield is not repeatable.', report)
     self.assertIn('"t" subfield is not repeatable.', report)
     self.assertIn('"r" subfield is not repeatable.', report)
     self.assertIn('"s" subfield is not repeatable.', report)
     self.assertIn('"v" subfield is not repeatable.', report)
     self.assertIn('"n" subfield is not repeatable.', report)
Exemplo n.º 4
0
    def test_add_title(self):
        edition = self._edition()
        edition.title = "The Good Soldier"
        edition.sort_title = "Good Soldier, The"
        edition.subtitle = "A Tale of Passion"

        record = Record()
        Annotator.add_title(record, edition)
        [field] = record.get_fields("245")
        self._check_field(
            record, "245", {
                "a": edition.title,
                "b": edition.subtitle,
                "c": edition.author,
            }, ["0", "4"])

        # If there's no subtitle or no author, those subfields are left out.
        edition.subtitle = None
        edition.author = None

        record = Record()
        Annotator.add_title(record, edition)
        [field] = record.get_fields("245")
        self._check_field(
            record, "245", {
                "a": edition.title,
            }, ["0", "4"])
        eq_([], field.get_subfields("b"))
        eq_([], field.get_subfields("c"))
Exemplo n.º 5
0
    def startElementNS(self, name, qname, attrs):
        # NO Stricts
        try:
            element, parameter = name[1].split(".")
        except ValueError:
            element = name[1]

        if element == "rusmarc":
            self._record = Record()
        elif element == "mrk":
            self._record.leader = ""
        elif element.startswith("m_"):
            pass  # See endElementNS for implementation
        elif element == "IND":
            self._indicators = parameter.replace("_", " ")
            self._field.subfields = []
        elif element == "FIELD":
            self._field = Field(parameter, [" ", " "])
        elif element == "SUBFIELD":
            self._subfield_code = parameter
        elif element == "RECORDS":
            pass
        else:
            raise RuntimeError("cannot process tag %s" % element)

        self._text = []
Exemplo n.º 6
0
    def test_add_simplified_genres(self):
        work = self._work(with_license_pool=True)
        fantasy, ignore = Genre.lookup(self._db, "Fantasy", autocreate=True)
        romance, ignore = Genre.lookup(self._db, "Romance", autocreate=True)
        work.genres = [fantasy, romance]

        record = Record()
        Annotator.add_simplified_genres(record, work)
        fields = record.get_fields("650")
        [fantasy_field, romance_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0])
        eq_(["0", "7"], fantasy_field.indicators)
        eq_("Fantasy", fantasy_field.get_subfields("a")[0])
        eq_("Library Simplified", fantasy_field.get_subfields("2")[0])
        eq_(["0", "7"], romance_field.indicators)
        eq_("Romance", romance_field.get_subfields("a")[0])
        eq_("Library Simplified", romance_field.get_subfields("2")[0])

        # It also works with a materialized work.
        self.add_to_materialized_view([work])
        # The work is in the materialized view twice since it has two genres,
        # but we can use either one.
        [mw, ignore] = self._db.query(MaterializedWorkWithGenre).all()

        record = Record()
        Annotator.add_simplified_genres(record, mw)
        fields = record.get_fields("650")
        [fantasy_field, romance_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0])
        eq_(["0", "7"], fantasy_field.indicators)
        eq_("Fantasy", fantasy_field.get_subfields("a")[0])
        eq_("Library Simplified", fantasy_field.get_subfields("2")[0])
        eq_(["0", "7"], romance_field.indicators)
        eq_("Romance", romance_field.get_subfields("a")[0])
        eq_("Library Simplified", romance_field.get_subfields("2")[0])
Exemplo n.º 7
0
def filter_subject_headings(record: Record, librarySystemId: int) -> List[Field]:
    """
    Removes subject heading tags that are not supported by our systems

    Args:
        record:                     pymarc.record.Record object

    Returns:
        list of tags
    """
    approved_tags = []
    subjects = record.subjects()
    for tag in subjects:
        # LCSH
        if tag.indicator2 == "0":
            approved_tags.append(tag)

        # Children's LCSH
        elif tag.indicator2 == "1":
            if librarySystemId == 1:
                approved_tags.append(tag)
        # source specified in $2
        elif tag.indicator2 == "7":
            src_vocab = tag["2"]
            if src_vocab:
                if is_approved_vacabulary(src_vocab, librarySystemId):
                    approved_tags.append(tag)

        record.remove_field(tag)
    return approved_tags
Exemplo n.º 8
0
    def test_add_series(self):
        edition = self._edition()
        edition.series = self._str
        edition.series_position = 5
        record = Record()
        Annotator.add_series(record, edition)
        self._check_field(record, "490", {
            "a": edition.series,
            "v": str(edition.series_position),
        }, ["0", " "])

        # If there's no series position, the same field is used without
        # the v subfield.
        edition.series_position = None
        record = Record()
        Annotator.add_series(record, edition)
        self._check_field(record, "490", {
            "a": edition.series,
        }, ["0", " "])
        [field] = record.get_fields("490")
        eq_([], field.get_subfields("v"))

        # If there's no series, the field is left out.
        edition.series = None
        record = Record()
        Annotator.add_series(record, edition)
        eq_([], record.get_fields("490"))
Exemplo n.º 9
0
 def element(self, element_dict, name=None):
     """Converts a JSON `element_dict` to pymarc fields."""
     if not name:
         self._record = Record()
         self.element(element_dict, "leader")
     elif name == "leader":
         self._record.leader = element_dict[name]
         self.element(element_dict, "fields")
     elif name == "fields":
         fields = iter(element_dict[name])
         for field in fields:
             tag, remaining = field.popitem()
             self._field = Field(tag)
             if self._field.is_control_field():
                 self._field.data = remaining
             else:
                 self.element(remaining, "subfields")
                 self._field.indicators.extend(
                     [remaining["ind1"], remaining["ind2"]])
             self._record.add_field(self._field)
         self.process_record(self._record)
     elif name == "subfields":
         subfields = iter(element_dict[name])
         for subfield in subfields:
             code, text = subfield.popitem()
             self._field.add_subfield(code, text)
Exemplo n.º 10
0
    def test_nypl_branches_BT_SERIES_YA_graphic_novel_compound_name(self):
        bib = Record()
        bib.leader = "00000nam a2200000u  4500"
        tags = []
        tags.append(Field(tag="001", data="0001"))
        tags.append(
            Field(tag="245",
                  indicators=["0", "0"],
                  subfields=["a", "Test title"]))
        tags.append(
            Field(
                tag="091",
                indicators=[" ", " "],
                subfields=["a", "GRAPHIC GN FIC COMPOUND NAME"],
            ))
        for tag in tags:
            bib.add_ordered_field(tag)

        mod_bib = patches.bib_patches("nypl", "branches", "cat", "BT SERIES",
                                      bib)
        correct_indicators = [" ", " "]
        correct_subfields = [
            "f", "GRAPHIC", "a", "GN FIC", "c", "COMPOUND NAME"
        ]

        self.assertEqual(correct_indicators,
                         mod_bib.get_fields("091")[0].indicators)
        self.assertEqual(correct_subfields,
                         mod_bib.get_fields("091")[0].subfields)
Exemplo n.º 11
0
    def test_add_contributors(self):
        author = "a"
        author2 = "b"
        translator = "c"

        # Edition with one author gets a 100 field and no 700 fields.
        edition = self._edition(authors=[author])
        edition.sort_author = "sorted"

        record = Record()
        Annotator.add_contributors(record, edition)
        eq_([], record.get_fields("700"))
        self._check_field(record, "100", {"a": edition.sort_author}, ["1", " "])

        # Edition with two authors and a translator gets three 700 fields and no 100 fields.
        edition = self._edition(authors=[author, author2])
        edition.add_contributor(translator, Contributor.TRANSLATOR_ROLE)

        record = Record()
        Annotator.add_contributors(record, edition)
        eq_([], record.get_fields("100"))
        fields = record.get_fields("700")
        for field in fields:
            eq_(["1", " "], field.indicators)
        [author_field, author2_field, translator_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0])
        eq_(author, author_field.get_subfields("a")[0])
        eq_(Contributor.PRIMARY_AUTHOR_ROLE, author_field.get_subfields("e")[0])
        eq_(author2, author2_field.get_subfields("a")[0])
        eq_(Contributor.AUTHOR_ROLE, author2_field.get_subfields("e")[0])
        eq_(translator, translator_field.get_subfields("a")[0])
        eq_(Contributor.TRANSLATOR_ROLE, translator_field.get_subfields("e")[0])
Exemplo n.º 12
0
    def test_nypl_branch_BT_SERIES_Spanish_prefix(self):
        bib = Record()
        bib.leader = "00000nam a2200000u  4500"
        tags = []
        tags.append(Field(tag="001", data="0001"))
        tags.append(
            Field(tag="245",
                  indicators=["0", "0"],
                  subfields=["a", "Test title"]))
        tags.append(
            Field(
                tag="091",
                indicators=[" ", " "],
                subfields=["a", "J SPA E COMPOUND NAME"],
            ))
        for tag in tags:
            bib.add_ordered_field(tag)

        mod_bib = patches.bib_patches("nypl", "branches", "cat", "BT SERIES",
                                      bib)
        correct_indicators = [" ", " "]
        correct_subfields = ["p", "J SPA", "a", "E", "c", "COMPOUND NAME"]

        self.assertEqual(correct_indicators,
                         mod_bib.get_fields("091")[0].indicators)
        self.assertEqual(correct_subfields,
                         mod_bib.get_fields("091")[0].subfields)
Exemplo n.º 13
0
    def test_add_physical_description(self):
        book = self._edition()
        book.medium = Edition.BOOK_MEDIUM
        audio = self._edition()
        audio.medium = Edition.AUDIO_MEDIUM

        record = Record()
        Annotator.add_physical_description(record, book)
        self._check_field(record, "300", {"a": "1 online resource"})
        self._check_field(record, "336", {
            "a": "text",
            "b": "txt",
            "2": "rdacontent",
        })
        self._check_field(record, "337", {
            "a": "computer",
            "b": "c",
            "2": "rdamedia",
        })
        self._check_field(record, "338", {
            "a": "online resource",
            "b": "cr",
            "2": "rdacarrier",
        })
        self._check_field(record, "347", {
            "a": "text file",
            "2": "rda",
        })
        self._check_field(record, "380", {
            "a": "eBook",
            "2": "tlcgt",
        })

        record = Record()
        Annotator.add_physical_description(record, audio)
        self._check_field(record, "300", {
            "a": "1 sound file",
            "b": "digital",
        })
        self._check_field(record, "336", {
            "a": "spoken word",
            "b": "spw",
            "2": "rdacontent",
        })
        self._check_field(record, "337", {
            "a": "computer",
            "b": "c",
            "2": "rdamedia",
        })
        self._check_field(record, "338", {
            "a": "online resource",
            "b": "cr",
            "2": "rdacarrier",
        })
        self._check_field(record, "347", {
            "a": "audio file",
            "2": "rda",
        })
        eq_([], record.get_fields("380"))
Exemplo n.º 14
0
    def test_add_physical_description(self):
        book = self._edition()
        book.medium = Edition.BOOK_MEDIUM
        audio = self._edition()
        audio.medium = Edition.AUDIO_MEDIUM

        record = Record()
        Annotator.add_physical_description(record, book)
        self._check_field(record, "300", {"a": "1 online resource"})
        self._check_field(record, "336", {
            "a": "text",
            "b": "txt",
            "2": "rdacontent",
        })
        self._check_field(record, "337", {
            "a": "computer",
            "b": "c",
            "2": "rdamedia",
        })
        self._check_field(record, "338", {
            "a": "online resource",
            "b": "cr",
            "2": "rdacarrier",
        })
        self._check_field(record, "347", {
            "a": "text file",
            "2": "rda",
        })
        self._check_field(record, "380", {
            "a": "eBook",
            "2": "tlcgt",
        })

        record = Record()
        Annotator.add_physical_description(record, audio)
        self._check_field(record, "300", {
            "a": "1 sound file",
            "b": "digital",
        })
        self._check_field(record, "336", {
            "a": "spoken word",
            "b": "spw",
            "2": "rdacontent",
        })
        self._check_field(record, "337", {
            "a": "computer",
            "b": "c",
            "2": "rdamedia",
        })
        self._check_field(record, "338", {
            "a": "online resource",
            "b": "cr",
            "2": "rdacarrier",
        })
        self._check_field(record, "347", {
            "a": "audio file",
            "2": "rda",
        })
        eq_([], record.get_fields("380"))
Exemplo n.º 15
0
    def tearDown(self):
        # Test MARC record

        # NYPL bib
        self.n_marc = Record()

        # BPL bib
        self.b_marc = Record()
Exemplo n.º 16
0
 def test_949_items_stat_code_incorrect(self):
     b = Record()
     b.add_field(
         Field(tag='949', indicators=[' ', '1'], subfields=['t', '600']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'nypl', ['specs_test.mrc'], self.ncl)
     self.assertIn('"t" subfield has incorrect value.', report)
Exemplo n.º 17
0
 def test_949_items_empty_price_subfield(self):
     b = Record()
     b.add_field(
         Field(tag='949', indicators=[' ', '1'], subfields=['p', '']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'nypl', ['specs_test.mrc'], self.ncl)
     self.assertIn('"p" subfield has incorrect price format.', report)
Exemplo n.º 18
0
 def test_960_items_incorrect_format(self):
     b = Record()
     b.add_field(
         Field(tag='960', indicators=[' ', ' '], subfields=['r', 'z']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'bpl', ['specs_test.mrc'], self.bcl)
     self.assertIn('"r" subfield has incorrect value.', report)
Exemplo n.º 19
0
 def test_949_subfield_a_mandatory(self):
     b = Record()
     b.add_field(Field(tag='949', indicators=[' ', ' '], subfields=[]))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'nypl', ['specs_test.mrc'], self.ncl)
     self.assertFalse(passed)
     self.assertIn('"a" subfield is mandatory.', report)
Exemplo n.º 20
0
 def test_960_items_correct_price_format(self):
     b = Record()
     b.add_field(
         Field(tag='960', indicators=[' ', '1'], subfields=['p', '9.99']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'bpl', ['specs_test.mrc'], self.bcl)
     self.assertNotIn('"p" subfield has incorrect price format.', report)
Exemplo n.º 21
0
 def test_949_items_barcode_not_digits(self):
     b = Record()
     b.add_field(
         Field(tag='949', indicators=[' ', '1'], subfields=['i', 'TEST']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'nypl', ['specs_test.mrc'], self.ncl)
     self.assertFalse(passed)
     self.assertIn('"i" subfield has incorrect barcode.', report)
Exemplo n.º 22
0
 def test_947_incorrect_subfield_a_value(self):
     b = Record()
     b.add_field(
         Field(tag='947', indicators=[' ', ' '], subfields=['a', 'TEST']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'bpl', ['specs_test.mrc'], self.bcl)
     self.assertFalse(passed)
     self.assertIn('"a" subfield has incorrect value', report)
Exemplo n.º 23
0
 def test_960_items_mandatory(self):
     b = Record()
     b.add_field(
         Field(tag='960', indicators=[' ', '1'], subfields=['a', 'TEST']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'bpl', ['specs_test.mrc'], self.bcl)
     self.assertFalse(passed)
     self.assertIn('"960  " mandatory tag not found.', report)
Exemplo n.º 24
0
    def test_bib_with_vendor_910_tag(self):
        bib = Record()
        bib.add_field(
            Field(tag="910", indicators=[" ", " "], subfields=["a", "foo"]))
        patches.bib_patches("nypl", "research", "acq", "Amalivre", bib)

        tags_910 = bib.get_fields("910")
        self.assertEqual(len(tags_910), 1)
        self.assertEqual(str(bib["910"]), "=910  \\\\$aRL")
Exemplo n.º 25
0
 def test_091_no_subfield_a(self):
     b = Record()
     b.add_field(
         Field(tag='099', indicators=[' ', ' '], subfields=['p', 'TEST']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'bpl', ['specs_test.mrc'], self.bcl)
     self.assertFalse(passed)
     self.assertIn('"a" subfield is mandatory.', report)
Exemplo n.º 26
0
 def test_960_items_incorrect_location(self):
     b = Record()
     b.add_field(
         Field(tag='960', indicators=[' ', ' '], subfields=['l', 'mma0l']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'bpl', ['specs_test.mrc'], self.bcl)
     self.assertFalse(passed)
     self.assertIn('"l" subfield has incorrect location code.', report)
Exemplo n.º 27
0
    def create_record(cls,
                      work,
                      annotator,
                      force_create=False,
                      integration=None):
        """Build a complete MARC record for a given work."""
        if callable(annotator):
            annotator = annotator()

        if isinstance(work, BaseMaterializedWork):
            pool = work.license_pool
        else:
            pool = work.active_license_pool()
        if not pool:
            return None

        edition = pool.presentation_edition
        identifier = pool.identifier

        _db = Session.object_session(work)

        record = None
        existing_record = getattr(work, annotator.marc_cache_field)
        if existing_record and not force_create:
            record = Record(data=existing_record.encode('utf-8'),
                            force_utf8=True)

        if not record:
            record = Record(leader=annotator.leader(work), force_utf8=True)
            annotator.add_control_fields(record, identifier, pool, edition)
            annotator.add_isbn(record, identifier)

            # TODO: The 240 and 130 fields are for translated works, so they can be grouped even
            # though they have different titles. We do not group editions of the same work in
            # different languages, so we can't use those yet.

            annotator.add_title(record, edition)
            annotator.add_contributors(record, edition)
            annotator.add_publisher(record, edition)
            annotator.add_physical_description(record, edition)
            annotator.add_audience(record, work)
            annotator.add_series(record, edition)
            annotator.add_system_details(record)
            annotator.add_ebooks_subject(record)

            data = record.as_marc()
            if isinstance(work, BaseMaterializedWork):
                setattr(pool.work, annotator.marc_cache_field, data)
            else:
                setattr(work, annotator.marc_cache_field, data)

        # Add additional fields that should not be cached.
        annotator.annotate_work_record(work, pool, edition, identifier, record,
                                       integration)

        return record
Exemplo n.º 28
0
 def test_949_subfield_a_incorrect_value(self):
     b = Record()
     b.add_field(
         Field(tag='949', indicators=[' ', ' '],
               subfields=['a', 'b2=a;']))  # missing * in the begining
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'nypl', ['specs_test.mrc'], self.ncl)
     self.assertFalse(passed)
     self.assertIn('"a" subfield has incorrect value', report)
Exemplo n.º 29
0
 def test_960_items_good_barcode(self):
     b = Record()
     b.add_field(
         Field(tag='960',
               indicators=[' ', '1'],
               subfields=['i', '34444987954328']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'bpl', ['specs_test.mrc'], self.bcl)
     self.assertNotIn('"i" subfield has incorrect barcode.', report)
Exemplo n.º 30
0
    def test_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        writer = MARCWriter(open('test/foo', 'w'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo'))
        record = reader.next()
        self.assertEqual(record['245']['a'], unichr(0x1234))
Exemplo n.º 31
0
def callZ3950(search_id, target, depth=0):
    if target == 'UIU':
        print "UIUC NUMBER: ", search_id
        query = zoom.Query('PQF', '@attr 1=12 %s' % str(search_id))

        database_address = 'z3950.carli.illinois.edu'
        username = '******'
        database_name = 'voyager'
    else:
        print "LC NUMBER: ", search_id
        query = zoom.Query('PQF', '@attr 1=9 %s' % str(formatLCCN(search_id)))

        database_address = 'lx2.loc.gov'
        username = ''
        if 'n' in search_id:
            database_name = 'NAF'
        else:
            database_name = 'SAF'

#	conn = establishZ3950Connection(database_address,210,username,database_name)
    res = queryZ3950(database_address, username, database_name, query)
    print len(res)
    print res

    if len(res) > 0:
        for r in res:
            valid_leader = checkLeader(r.data[:24])

            if valid_leader:
                if len(res) > 1:
                    try:
                        new_record = Record(data=r.data)
                    except UnicodeDecodeError:
                        return (False, 'BROKEN CHARACTER IN RECORD')
                    lccn = new_record.get_fields('001')[0].data.replace(
                        " ", "")
                    if lccn == search_id:
                        marc_record = new_record
                        fixNames(marc_record)
                else:
                    try:
                        marc_record = Record(data=r.data)
                    except UnicodeDecodeError:
                        return (False, 'BROKEN CHARACTER IN RECORD')
                    fixNames(marc_record)
            else:
                return (False, 'BROKEN LEADER')

        return (marc_record, None)
    elif depth < 20:
        waitSixSeconds(datetime.datetime.now().time())
        return callZ3950(search_id, target, depth=depth + 1)
    else:
        return (None, 'RECORD NOT FOUND')
Exemplo n.º 32
0
 def setUp(self):
     self.bib = Record()
     self.bib.leader = "00000nam a2200000u  4500"
     tags = []
     tags.append(Field(tag="001", data="0001"))
     tags.append(
         Field(tag="245",
               indicators=["0", "0"],
               subfields=["a", "Test title"]))
     for tag in tags:
         self.bib.add_ordered_field(tag)
Exemplo n.º 33
0
 def test_947_nonrepeatable_subfield_a(self):
     b = Record()
     b.add_field(
         Field(tag='947',
               indicators=[' ', ' '],
               subfields=['a', 'TEST', 'a', 'TEST1']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'bpl', ['specs_test.mrc'], self.bcl)
     self.assertFalse(passed)
     self.assertIn('"a" subfield is not repeatable.', report)
Exemplo n.º 34
0
class TestFindMatches(unittest.TestCase):
    def setUp(self):
        self.bib1 = Record()
        self.bib1.add_field(
            Field(
                tag='245',
                indicators=['0', '0'],
                subfields=[
                    'a', 'Test '
                ]))
        self.bib1.add_field(
            Field(
                tag='901',
                indicators=[' ', ' '],
                subfields=[
                    'a', 'abcd'
                ]))
        self.bib1.add_field(
            Field(
                tag='001',
                data='1234'
            ))

        self.bib2 = Record()
        self.bib2.add_field(
            Field(
                tag='245',
                indicators=['0', '0'],
                subfields=[
                    'a', 'Test '
                ]))
        self.bib2.add_field(
            Field(
                tag='901',
                indicators=[' ', ' '],
                subfields=[
                    'a', 'abcd'
                ]))

    def test_2_matches(self):
        conditions = [('901', 'a', 'abcd'), ('001', None, '1234')]
        self.assertEqual(
            vendors.find_matches(self.bib1, conditions), 2)

    def test_only_1_match(self):
        conditions = [('901', 'a', 'abcd'), ('001', None, '12345')]
        self.assertEqual(
            vendors.find_matches(self.bib1, conditions), 1)

    def test_bib_missing_tag(self):
        conditions = [('901', 'a', 'abcd'), ('001', None, '1234')]
        self.assertEqual(
            vendors.find_matches(self.bib2, conditions), 1)
Exemplo n.º 35
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        record.leader = '         a              '
        writer = MARCWriter(open('test/foo', 'w'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo'), to_unicode=True)
        record = reader.next()
        self.assertEqual(record['245']['a'], unichr(0x1234))

        os.remove('test/foo')
Exemplo n.º 36
0
 def test_091_subfields(self):
     b = Record()
     b.add_field(
         Field(tag='099',
               indicators=[' ', ' '],
               subfields=['a', 'TEST', 'a', 'TEST2']))
     bibs.write_marc21('specs_test.mrc', b)
     passed, report = local_specs.local_specs_validation(
         'bpl', ['specs_test.mrc'], self.bcl)
     self.assertFalse(passed)
     self.assertNotIn(
         '"099  ": tag occurance 1:\n\t"p" subfield is not repeatable.',
         report)
Exemplo n.º 37
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        record.leader = '         a              '
        writer = MARCWriter(open('test/foo', 'wb'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo', 'rb'), to_unicode=True)
        record = next(reader)
        self.assertEqual(record['245']['a'], unichr(0x1234))
        reader.close()

        os.remove('test/foo')
Exemplo n.º 38
0
    def create_record(cls, work, annotator, force_create=False, integration=None):
        """Build a complete MARC record for a given work."""
        if callable(annotator):
            annotator = annotator()

        if isinstance(work, BaseMaterializedWork):
            pool = work.license_pool
        else:
            pool = work.active_license_pool()
        if not pool:
            return None

        edition = pool.presentation_edition
        identifier = pool.identifier

        _db = Session.object_session(work)

        record = None
        existing_record = getattr(work, annotator.marc_cache_field)
        if existing_record and not force_create:
            record = Record(data=existing_record.encode('utf-8'), force_utf8=True)

        if not record:
            record = Record(leader=annotator.leader(work), force_utf8=True)
            annotator.add_control_fields(record, identifier, pool, edition)
            annotator.add_isbn(record, identifier)

            # TODO: The 240 and 130 fields are for translated works, so they can be grouped even
            # though they have different titles. We do not group editions of the same work in
            # different languages, so we can't use those yet.

            annotator.add_title(record, edition)
            annotator.add_contributors(record, edition)
            annotator.add_publisher(record, edition)
            annotator.add_physical_description(record, edition)
            annotator.add_audience(record, work)
            annotator.add_series(record, edition)
            annotator.add_system_details(record)
            annotator.add_ebooks_subject(record)

            data = record.as_marc()
            if isinstance(work, BaseMaterializedWork):
                setattr(pool.work, annotator.marc_cache_field, data)
            else:
                setattr(work, annotator.marc_cache_field, data)

        # Add additional fields that should not be cached.
        annotator.annotate_work_record(work, pool, edition, identifier, record, integration)

        return record
Exemplo n.º 39
0
def sort_6_subs(rec):
	msg = ''
	new_rec = Record(to_unicode=True, force_utf8=True)
	new_rec_fields = []
	rec_fields = rec.get_fields()
	for field in rec_fields:
		script_field = False
		if not field.is_control_field() and (len(field.get_subfields('6')) > 0):	# the field contains a subfield $6
			script_field = True
			ind1 = field.indicator1
			ind2 = field.indicator2
			tag = field.tag
			first_sub = True		# variable to keep track of whether you're on the first subfield in the field
			needs_sorted = True		# variable to keep track of whether the field needs sorted or if the $6 is already correctly the first subfield
			field_subs = []			# list variable to capture all the subfields in the field *except* for the subfield $6
			for subfield in field:
				# check if $6 is the first subfield - if so, the field is OK and does *not* need to be sorted
				if needs_sorted and first_sub and subfield[0] == '6':
					needs_sorted = False
				
				elif needs_sorted:
					if first_sub:
						# this is the first subfield and is *not* $6, so the field needs sorted - creates one instance of a new_field object only when the 1st subfield is encountered
						new_field = Field(tag=tag, indicators=[ind1,ind2], subfields=[])
					
					# when subfield $6 is finally encountered in the field (not the 1st), add it to the new_field object now so it becomes the first subfield
					# Note: subfield[0] is the subfield code and subfield[1] is the subfield content for this subfield
					if subfield[0]=='6':
						new_field.add_subfield(subfield[0],subfield[1])
					
					# if the subfield is *not* $6, add it to the list of subfields to be added later to the new_field
					else:
						field_subs.append([subfield[0],subfield[1]])
				
				first_sub = False
			
			if needs_sorted:
				# then the $6 was *not* the 1st subfield and we need to now add the remaining subfields to the new_field object
				for sub in field_subs:
					# add the remaining subfields to the new_field object
					new_field.add_subfield(sub[0],sub[1])
				new_rec_fields.append(new_field)	# add the new field to the record
		
		if not script_field or not needs_sorted:
			new_rec_fields.append(field)
	
	for new_f in new_rec_fields:
		new_rec.add_field(new_f)
	
	return new_rec
Exemplo n.º 40
0
def writeMetadataToMarc(data, MARCMapping, saveLocation):
    record = Record()
    for key in data:
        if key in MARCMapping:
            if(key == u'UUID'):
                field = Field(
                    tag = MARCMapping[key],
                    data = data[key])
            else:
		field = Field(
			tag = MARCMapping[key][:3],
			subfields = [MARCMapping[key][3], data[key]],
			indicators=['0', '0'])  
            record.add_field(field)
    writeRecordToFile(record, filename)
Exemplo n.º 41
0
def faulty015(record: Record) -> bool:
    found = False
    for f in record.get_fields("015"):
        if "a" in f:
            if len(f["a"].split(' ')) > 1:
                found = True
    return found
Exemplo n.º 42
0
 def __next__(self):
     jobj = next(self.iter)
     rec = Record()
     rec.leader = jobj['leader']
     for field in jobj['fields']:
         k,v = list(field.items())[0]
         if 'subfields' in v and hasattr(v,'update'):
             # flatten m-i-j dict to list in pymarc
             subfields = []
             for sub in v['subfields']:
                 for code,value in sub.items():
                     subfields.extend((code,value))
             fld = Field(tag=k,subfields=subfields,indicators=[v['ind1'], v['ind2']])
         else:
             fld = Field(tag=k,data=v)
         rec.add_field(fld)
     return rec
Exemplo n.º 43
0
def empty020a(record: Record) -> bool:
    sf020a = []
    if "020" in record:
        fields = record.get_fields("020")
        for f in fields:
            if "a" in f:
                sf020a.append(f.get_subfields("a").pop())
    return sf020a == [""]
Exemplo n.º 44
0
def phrasesInFields(record: Record, phrases: list, fields: list) -> bool:
    '''Takes a record object, a list of strings and a list of fields (as strings).
    Returns a boolean.'''
    for f in fields:
        for rf in record.get_fields(f):
            for p in phrases:
                if p in rf.value().lower():
                    return True
    return False
Exemplo n.º 45
0
    def test_add_formats(self):
        edition, pool = self._edition(with_license_pool=True)
        epub_no_drm, ignore = DeliveryMechanism.lookup(
            self._db, Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM)
        pool.delivery_mechanisms[0].delivery_mechanism = epub_no_drm
        LicensePoolDeliveryMechanism.set(
            pool.data_source, pool.identifier, Representation.PDF_MEDIA_TYPE,
            DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT)

        record = Record()
        Annotator.add_formats(record, pool)
        fields = record.get_fields("538")
        eq_(2, len(fields))
        [pdf, epub] = sorted(fields, key=lambda x: x.get_subfields("a")[0])
        eq_("Adobe PDF eBook", pdf.get_subfields("a")[0])
        eq_([" ", " "], pdf.indicators)
        eq_("EPUB eBook", epub.get_subfields("a")[0])
        eq_([" ", " "], epub.indicators)
Exemplo n.º 46
0
def periodsMissing(record: Record) -> bool:
    fields = record.get_fields("100", "110", "700", "710")
    for f in fields:
        if "e" in f:
            functions = f.get_subfields("e")
            for func in functions:
                if func[-1].isalpha():
                    return True
    return False
Exemplo n.º 47
0
    def decode_record(self, record):
        r"""
        >>> reader = Reader('http://opac.uthsc.edu', 2)
        >>> raw = "\nLEADER 00000cas  2200517 a 4500 \n001    1481253 \n003    OCoLC \n005    19951109120000.0 \n008    750727c19589999fr qrzp   b   0   b0fre d \n010    sn 86012727 \n022    0003-3995 \n030    AGTQAH \n035    0062827|bMULS|aPITT  NO.  0639600000|asa64872000|bFULS \n040    MUL|cMUL|dFUL|dOCL|dCOO|dNYG|dHUL|dSER|dAIP|dNST|dAGL|dDLC\n       |dTUM \n041 0  engfre|bgeritaspa \n042    nsdp \n049    TUMS \n069 1  A32025000 \n210 0  Ann. genet. \n222  0 Annales de genetique \n229 00 Annales de genetique \n229    Ann Genet \n242 00 Annals on genetics \n245 00 Annales de genetique. \n260    Paris :|bExpansion scientifique,|c1958-2004. \n300    v. :|bill. &#59;|c28 cm. \n310    Quarterly \n321    Two no. a year \n362 0  1,1958-47,2004. \n510 1  Excerpta medica \n510 1  Index medicus|x0019-3879 \n510 2  Biological abstracts|x0006-3169 \n510 2  Chemical abstracts|x0009-2258 \n510 2  Life sciences collection \n510 0  Bulletin signaletique \n510 0  Current contents \n546    French and English, with summaries in German, Italian, and\n       Spanish. \n550    Journal of the Societe francaise de genetique. \n650  2 Genetics|vPeriodicals. \n710 2  Societ\xe9 fran\xe7aise de genetique. \n785 00 |tEuropean journal of medical genetics.  \n856 41 |uhttp://library.uthsc.edu/ems/eresource/3581|zFull text \n       at ScienceDirect: 43(1) Jan 2000 - 47(4) Dec 2004 \n936    Unknown|ajuin 1977 \n"
        >>> record = reader.decode_record(raw)
        >>> print record.title
        Annales de genetique
        """
        
        pseudo_marc = record.strip().split('\n')
        raw_fields = []
        if pseudo_marc[0][0:6] == 'LEADER':
            record = Record()
            record.leader = pseudo_marc[0][7:].strip()
        else:
            return None

        for field in pseudo_marc[1:]:
            tag = field[:3]
            data = unescape_entities(field[6:].decode('latin1')).encode('utf8')

            if tag.startswith(' '):
                # Additional field data needs to be prepended with an extra space 
                # for certain fields ...
                #for special_tag in ('55','260'):
                #    data = " %s" % (data,) if tag.startswith(special_tag) else data
                data = " %s" % (data.strip(),)
                raw_fields[-1]['value'] = "%s%s" % (raw_fields[-1]['value'], data)
                raw_fields[-1]['raw'] = "%s%s" % (raw_fields[-1]['raw'], field.strip())
            else:
                data = data if (tag < '010' and tag.isdigit()) else "a%s" % (data,)
                raw_fields.append({
                    'tag': tag, 
                    'indicator1': field[3], 
                    'indicator2': field[4], 
                    'value': data.strip(), 
                    'raw': field.strip()
                })
        
        for raw in raw_fields:
            tag = raw['tag']
            data = raw['value'].strip()
            field = Field(tag=tag, indicators=[raw['indicator1'], raw['indicator2']], data=data)
            if not field.is_control_field():
                for sub in data.split('|'):
                    try:
                        field.add_subfield(sub[0].strip(), sub[1:].strip())
                    except Exception:
                        # Skip blank/empty subfields
                        continue
            record.add_field(field)
            
        record.parse_leader()
        
        # Disregard record if no title present
        if not record.get_fields('245'):
            return None
        else:
            return record
Exemplo n.º 48
0
    def test_add_publisher(self):
        edition = self._edition()
        edition.publisher = self._str
        edition.issued = datetime.datetime(1894, 4, 5)

        record = Record()
        Annotator.add_publisher(record, edition)
        self._check_field(
            record, "264", {
                "a": "[Place of publication not identified]",
                "b": edition.publisher,
                "c": "1894",
            }, [" ", "1"])

        # If there's no publisher, the field is left out.
        record = Record()
        edition.publisher = None
        Annotator.add_publisher(record, edition)
        eq_([], record.get_fields("264"))
Exemplo n.º 49
0
    def test_add_isbn(self):
        isbn = self._identifier(identifier_type=Identifier.ISBN)
        record = Record()
        Annotator.add_isbn(record, isbn)
        self._check_field(record, "020", {"a": isbn.identifier})

        # If the identifier isn't an ISBN, but has an equivalent that is, it still
        # works.
        equivalent = self._identifier()
        data_source = DataSource.lookup(self._db, DataSource.OCLC)
        equivalent.equivalent_to(data_source, isbn, 1)
        record = Record()
        Annotator.add_isbn(record, equivalent)
        self._check_field(record, "020", {"a": isbn.identifier})

        # If there is no ISBN, the field is left out.
        non_isbn = self._identifier()
        record = Record()
        Annotator.add_isbn(record, non_isbn)
        eq_([], record.get_fields("020"))
Exemplo n.º 50
0
def textIn020z(record: Record) -> bool:
    sf020z = []
    if "020" in record:
        fields = record.get_fields("020")
        for f in fields:
            if "z" in f:
                sf020z.append(f.get_subfields("z").pop())
    if len(sf020z) > 0:
        alpha = 0
        for f in sf020z:
            alpha += len([x for x in f if x.isalpha()])
        return alpha > 1
    return False
Exemplo n.º 51
0
    def test_add_control_fields(self):
        # This edition has one format and was published before 1900.
        edition, pool = self._edition(with_license_pool=True)
        identifier = pool.identifier
        edition.issued = datetime.datetime(956, 1, 1)

        now = datetime.datetime.now()
        record = Record()
        
        Annotator.add_control_fields(record, identifier, pool, edition)
        self._check_control_field(record, "001", identifier.urn)
        assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value()
        self._check_control_field(record, "006", "m        d        ")
        self._check_control_field(record, "007", "cr cn ---anuuu")
        self._check_control_field(
            record, "008",
            now.strftime("%y%m%d") + "s0956    xxu                 eng  ")

        # This French edition has two formats and was published in 2018.
        edition2, pool2 = self._edition(with_license_pool=True)
        identifier2 = pool2.identifier
        edition2.issued = datetime.datetime(2018, 2, 3)
        edition2.language = "fre"
        LicensePoolDeliveryMechanism.set(
            pool2.data_source, identifier2, Representation.PDF_MEDIA_TYPE,
            DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT)

        record = Record()
        Annotator.add_control_fields(record, identifier2, pool2, edition2)
        self._check_control_field(record, "001", identifier2.urn)
        assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value()
        self._check_control_field(record, "006", "m        d        ")
        self._check_control_field(record, "007", "cr cn ---mnuuu")
        self._check_control_field(
            record, "008",
            now.strftime("%y%m%d") + "s2018    xxu                 fre  ")
Exemplo n.º 52
0
    def startElementNS(self, name, qname, attrs):
        if self._strict and name[0] != MARC_XML_NS:
            return

        element = name[1]
        self._text = []

        if element == 'record':
            self._record = Record()
        elif element == 'controlfield':
            tag = attrs.getValue((None, u'tag'))
            self._field = Field(tag)
        elif element == 'datafield':
            tag = attrs.getValue((None, u'tag'))
            ind1 = attrs.get((None, u'ind1'), u' ')
            ind2 = attrs.get((None, u'ind2'), u' ')
            self._field = Field(tag, [ind1, ind2])
        elif element == 'subfield':
            self._subfield_code = attrs[(None, 'code')]
Exemplo n.º 53
0
    def test_add_web_client_urls(self):
        # Web client URLs can come from either the MARC export integration or
        # a library registry integration.

        annotator = LibraryAnnotator(self._default_library)

        # If no web catalog URLs are set for the library, nothing will be changed.
        record = Record()
        identifier = self._identifier(foreign_id="identifier")
        annotator.add_web_client_urls(record, self._default_library, identifier)
        eq_([], record.get_fields("856"))

        # Add a URL from a library registry.
        registry = self._external_integration(
            ExternalIntegration.OPDS_REGISTRATION, ExternalIntegration.DISCOVERY_GOAL,
            libraries=[self._default_library])
        ConfigurationSetting.for_library_and_externalintegration(
            self._db, Registration.LIBRARY_REGISTRATION_WEB_CLIENT,
            self._default_library, registry).value = "http://web_catalog"

        record = Record()
        annotator.add_web_client_urls(record, self._default_library, identifier)
        [field] = record.get_fields("856")
        eq_(["4", "0"], field.indicators)
        eq_("http://web_catalog/book/Gutenberg%20ID%2Fidentifier",
            field.get_subfields("u")[0])

        # Add a manually configured URL on a MARC export integration.
        integration = self._external_integration(
            ExternalIntegration.MARC_EXPORT, ExternalIntegration.CATALOG_GOAL,
            libraries=[self._default_library])

        ConfigurationSetting.for_library_and_externalintegration(
            self._db, MARCExporter.WEB_CLIENT_URL,
            self._default_library, integration).value = "http://another_web_catalog"

        record = Record()
        annotator.add_web_client_urls(record, self._default_library, identifier, integration)
        [field1, field2] = record.get_fields("856")
        eq_(["4", "0"], field1.indicators)
        eq_("http://another_web_catalog/book/Gutenberg%20ID%2Fidentifier",
            field1.get_subfields("u")[0])

        eq_(["4", "0"], field2.indicators)
        eq_("http://web_catalog/book/Gutenberg%20ID%2Fidentifier",
            field2.get_subfields("u")[0])
Exemplo n.º 54
0
def handleXLSX(mapping, sheet, outputFolder, rowsToIgnore):
    # mapping as defined in mapping function loadMapping
    # sheet is the specific excel sheet that contains the data we're extracting
    # rowsToIgnore is the number of rows at top of sheet that we don't care about
    for row in sheet.rows[rowsToIgnore:]:
        record = Record()
        # for every key in our map
        for i, key_entry in enumerate(mapping):
            key = key_entry["field"]
            # if the key isn't empty (i.e. we're not mapping that column)
            if key:
                addField(record, key_entry, [key[3], unicode(row[i].value)])
        if DEBUG:
            print record.title()

	if record.isbn() <> None:
            # Create unique UUID & add to record
            bookUUID = str(uuid.uuid1())
            record.add_field(Field(tag='001', data=bookUUID))
        
            # Write out our marcxml for each row
            writeMARCXML(record, os.path.join(outputFolder, record.isbn() + '.xml'))
'''
script to migrate book information to koha database
'''
from pymarc import Record,Field,record_to_xml
import MySQLdb
record=Record()
print dir(record)
dbLo=MySQLdb.connect("localhost","shailesh","123","shailesh")
dbKoha=MySQLdb.connect("localhost","root","","koha1")
curaKoha=dbKoha.cursor()
curaLocl = dbLo.cursor()
curaLocl.execute("select BookNo,BookName,authorName from book_info group by BookNo;")
dat=curaLocl.fetchall()
curaLocl.execute("select accession,BookNo,callNo from book_info;")
datIte=curaLocl.fetchall()
for i in dat:
		record=Record()
		record.add_field(Field(tag='040',indicators=['0','1'],subfields=['c','LIBRARY OF CONGRESS']))
		record.add_field(Field(tag='245',indicators=['0','1'],subfields=['a',i[1]]))	
		record.add_field(Field(tag='942',indicators=['0','1'],subfields=['2','book of parag','c','BOOK']))
		record.add_field(Field(tag='100',indicators=['0','1'],subfields=['a',i[2]]))
		record.add_field(Field(tag='999',indicators=['0','1'],subfields=['c','8','d','8']))
		marcI=record_to_xml(record)
		#print i[0],i[1],i[2]
		curaKoha.execute("insert into biblio(biblionumber,title,author) values(%s,%s,%s);",(i[0],i[1],i[2]))
		curaKoha.execute("insert into biblioitems(biblionumber,biblioitemnumber,marcxml) values(%s,%s,%s);",(i[0],i[0],marcI))

for i in datIte:
		barcode='1111'+str(i[0])
		curaKoha.execute("insert into items(itemnumber,biblionumber,biblioitemnumber,barcode,itemcallnumber) values(%s,%s,%s,%s,%s);",(i[0],i[1],i[1],barcode,i[2]))
Exemplo n.º 56
0
def format_language(langcode):
    return '<LNG>' + LANGUAGES.get(langcode, PRIMARYLANG)
    
def preflabel_of(conc):
    labels = g.preferredLabel(conc, lang=PRIMARYLANG)
    try:
        return labels[0][1]
    except IndexError:
        print >>sys.stderr, "WARNING: couldn't find label of %s, result: %s" % (conc,labels)
        return ''

for conc in sorted(g.subjects(RDF.type, SKOS.Concept)):
    if (conc, OWL.deprecated, Literal(True)) in g:
        continue
    rec = Record(leader=LEADER)
    
    # URI -> 001
    rec.add_field(
        Field(
            tag='001',
            data=conc
        )
    )
    
    # dct:modified -> 005
    mod = g.value(conc, DCT.modified, None)
    if mod is None:
        modified = datetime.date(2000, 1, 1)
    else:
        modified = mod.toPython() # datetime.date or datetime.datetime object
Exemplo n.º 57
0
		parsed_html = getParsedHTML(url)
		# Get raw publication metadata
		trList = parsed_html.find(class_="table itemDisplayTable").find_all('tr')
		# Sort metadata
		for tr in trList:
			data[tr.find(class_="metadataFieldLabel").string[:-2].lower()] = tr.find(class_="metadataFieldValue").string
		# Record where all this data came from
		data[u'originURL'] = url
		# If the pdf link exists, download book and print metadata
		aPdf = parsed_html.find('td', headers='t0', class_="standard").a
		if (aPdf):
			# Get PDF
			pdfUrl = 'http://apps.who.int' + aPdf['href']
			urllib.urlretrieve(pdfUrl, SAVE_LOCATION + data['UUID'] + '.' + url.rsplit('.')[-1])
			# Print metadata in MARCXML
			record = Record()
			for key in data:
				if key in MARCMapping:
					if(key == u'UUID'):
						field = Field(
							tag = MARCMapping[key],
							data = data[key])
					else:
						field = Field(
							tag = MARCMapping[key][:3],
							subfields = [MARCMapping[key][3], data[key]],
							indicators=['0', '0'])  
					record.add_field(field)
			writer = XMLWriter(open(SAVE_LOCATION + data[u'UUID'] + '.xml', 'wb'))
			writer.write(record)
			writer.close() 
Exemplo n.º 58
0

#-------------------------------------------------

f = open('ostinos.csv')
csv_f = csv.reader(f)

out = open ('osti_recs.csv', 'w')
data = csv.writer(out)
data.writerow(['Title', 'Author', 'Date', 'Subjects', 'Description', 'OstiID', 'DOI', 'Report Number', 'DOE Number', 'URL', ''])

marcOut = open('ostimarc.mrc', 'w')

dc = '{http://purl.org/dc/elements/1.1/}'
dcq = '{http://purl.org/dc/terms/}'


for number in csv_f:
	ostiId = number[0]
	marc = Record() # Create a new record for each loop.
	tree = etree.parse('http://www.osti.gov/scitech/scitechxml?Identifier='+ ostiId+ '.xml')
	for node in tree.iter():
		if node.tag == dc + 'ostiId':
			if node.text == ostiId:
				o = node.getparent()
				osti = o.getchildren()
				getRecs(osti, data)
				getMarc(osti, marc)

	marcOut.write(marc.as_marc()) # Write each new record.
Exemplo n.º 59
0
class XmlHandler(ContentHandler):

    """
    You can subclass XmlHandler and add your own process_record
    method that'll be passed a pymarc.Record as it becomes
    available. This could be useful if you want to stream the
    records elsewhere (like to a rdbms) without having to store
    them all in memory.
    """
    def __init__(self, strict=False, normalize_form=None):
        self.records = []
        self._record = None
        self._field = None
        self._subfield_code = None
        self._text = []
        self._strict = strict
        self.normalize_form = normalize_form

    def startElementNS(self, name, qname, attrs):
        if self._strict and name[0] != MARC_XML_NS:
            return

        element = name[1]
        self._text = []

        if element == 'record':
            self._record = Record()
        elif element == 'controlfield':
            tag = attrs.getValue((None, u'tag'))
            self._field = Field(tag)
        elif element == 'datafield':
            tag = attrs.getValue((None, u'tag'))
            ind1 = attrs.get((None, u'ind1'), u' ')
            ind2 = attrs.get((None, u'ind2'), u' ')
            self._field = Field(tag, [ind1, ind2])
        elif element == 'subfield':
            self._subfield_code = attrs[(None, 'code')]

    def endElementNS(self, name, qname):
        if self._strict and name[0] != MARC_XML_NS:
            return

        element = name[1]
        if self.normalize_form is not None:
            text = unicodedata.normalize(self.normalize_form, u''.join(self._text))
        else:
            text = u''.join(self._text)

        if element == 'record':
            self.process_record(self._record)
            self._record = None
        elif element == 'leader':
            self._record.leader = text
        elif element == 'controlfield':
            self._field.data = text
            self._record.add_field(self._field)
            self._field = None
        elif element == 'datafield':
            self._record.add_field(self._field)
            self._field = None
        elif element == 'subfield':
            self._field.subfields.append(self._subfield_code)
            self._field.subfields.append(text)
            self._subfield_code = None

        self._text = []

    def characters(self, chars):
        self._text.append(chars)

    def process_record(self, record):
        self.records.append(record)
Exemplo n.º 60
0
def epub_to_marc(fname, conf_file=None):
    ns = {
    'n': 'urn:oasis:names:tc:opendocument:xmlns:container',
    'pkg': 'http://www.idpf.org/2007/opf',
    'dc': 'http://purl.org/dc/elements/1.1/'
    }

    # prepare to read from the .epub file
    zip = zipfile.ZipFile(fname)

    # find the contents metafile
    txt = zip.read('META-INF/container.xml')
    tree = etree.fromstring(txt)
    for el in tree:
        for elel in el:
            for item in elel.items():
                if item[0] == 'full-path':
                    cfname = item[1]
    
    # grab the metadata block from the contents metafile
    cf = zip.read(cfname)
    tree = etree.fromstring(cf)
    p = tree.xpath('/pkg:package/pkg:metadata',namespaces=ns)[0]

    # Read from the config file
    conf = configparser.ConfigParser()
    if conf_file:
        conf.read(conf_file)
    else:
        conf.read_string(DEFAULT_CONF)
    leader_dict = {}
    tag_005_dict = {}
    tag_006_dict = {}
    tag_007_dict = {}
    tag_008_dict = {}
    tag_040_dict = {}
    tag_264_dict = {}

    sections = conf.sections()
    for section in sections:
        if section == 'leader':
            for option in conf.options(section):
                leader_dict[option] = conf.get(section, option)
        elif section == '006':
            for option in conf.options(section):
                tag_006_dict[option] = conf.get(section, option)
        elif section == '007':
            for option in conf.options(section):
                tag_007_dict[option] = conf.get(section, option)
        elif section == '008':
            for option in conf.options(section):
                tag_008_dict[option] = conf.get(section, option)
        elif section == '040':
            for option in conf.options(section):
                tag_040_dict[option] = conf.get(section, option)
        elif section == '264':
            for option in conf.options(section):
                tag_264_dict[option] = conf.get(section, option)

    record = Record(force_utf8=True)
    # set the leader
    record.leader = build_leader(leader_dict)
    # I *think* it's updating the 'Base Address of Data' position when
    # it is written to file, so I have kept characters 12-16 blank.
    # Field 005
    record.add_field(Field(tag='005', data=build_tag_005()))
    # Field 006
    record.add_field(Field(tag='006', data=build_tag_006(tag_006_dict, 
        tag_008_dict)))
    # Field 007
    record.add_field(Field(tag='007', data=build_tag_007(tag_007_dict)))
    # Field 008
    record.add_field(Field(tag='008', data=build_tag_008(tag_008_dict, 
        p, ns)))
    # Field 020
    if p.xpath('dc:identifier[@id="ISBN"]/text()', namespaces=ns):
        epub_isbn = p.xpath(
            'dc:identifier[@id="ISBN"]/text()', namespaces=ns)[0].strip()
        epub_field = Field(
            tag = '020',
            indicators = [' ', ' '],
            subfields = ['a', epub_isbn, 'q', 'epub']
                )
    elif p.xpath('dc:identifier[@pkg:scheme="ISBN"]/text()', namespaces=ns):
        epub_isbn = p.xpath(
                'dc:identifier[@pkg:scheme="ISBN"]/text()', namespaces=ns)[0].strip()

        epub_field = Field(
            tag = '020',
            indicators = [' ', ' '],
            subfields = ['a', epub_isbn, 'q', 'epub']
                )

    # Field 040
    # First, check if the indicators are empty and if they are,
    # turn them into single spaces.
    for value in ('indicator_1', 'indicator_2'):
        if tag_040_dict[value] == '':
            tag_040_dict[value] = ' '
    record.add_field(Field(
                tag = '040',
                indicators = [tag_040_dict['indicator_1'], 
                              tag_040_dict['indicator_2']],
                subfields = ['a', tag_040_dict['subfield_a'], 
                             'b', tag_040_dict['subfield_b'], 
                             'e', tag_040_dict['subfield_e'],
                             'c', tag_040_dict['subfield_c']]
    ))

    # Field 245
    if p.xpath('dc:title/text()',namespaces=ns):
        full_title = p.xpath('dc:title/text()',namespaces=ns)[0]
        if ":" in full_title:
            title = full_title[:full_title.index(':') ].strip()
            subtitle = full_title[full_title.index(':') + 1:].strip()
        else:
            title = full_title
            subtitle = None
    if p.xpath('dc:creator/text()', namespaces=ns)[0]:
        creator_statement = p.xpath('dc:creator/text()', namespaces=ns)[0]
    if title and subtitle and creator_statement:
        offset = 0
        if ' ' in title:
            title_words = title.split(' ')
            if title_words[0].lower() in NON_FILING_WORDS:
                offset = len(title_words[0]) + 1
        record.add_field(
            Field('245', ['0', offset], 
                ['a', title + " :", 
                 'b', subtitle + " /", 
                 'c', creator_statement]))
    elif title and creator_statement:
        offset = 0
        if ' ' in title:
            title_words = title.split(' ')
            if title_words[0].lower() in NON_FILING_WORDS:
                offset = len(title_words[0]) + 1
        record.add_field(
            Field('245', ['0', offset], 
                ['a', title + " /", 
                 'c', creator_statement]))

    # Field 264
    if p.xpath('dc:publisher/text()', namespaces=ns) \
    and p.xpath('dc:date/text()', namespaces=ns):
        record.add_field(Field('264', [' ', '1'], 
            ['a', tag_264_dict['subfield_a'] + ' :', 
             'b', p.xpath('dc:publisher/text()', namespaces=ns)[0] + ", ",
             'c', p.xpath('dc:date/text()', namespaces=ns)[0]]))
    if p.xpath('dc:rights/text()', namespaces=ns):
        copyright_statement = ""
        copyright_symbol = "©"
        rights_words_array = p.xpath('dc:rights/text()', 
            namespaces=ns)[0].split()
        for word in rights_words_array:
            if word in copyright_year_range:
                copyright_statement = copyright_symbol + word
        if len(copyright_statement) > 4:
            record.add_field(Field('264', [' ', '4'], 
                ['c', copyright_statement]))
    return record