def test_ht_marc(self): rec = Utils.create_ht_marc('010000666241') # Only one field for the oclc self.assertEqual(len(re.findall('OCoLC|ocm|ocn', rec.serialize())), 1) # Only one 999 field for HT self.assertEqual(len(rec.tag_999), 1) # And that one need the barcode in code i self.assertTrue('<subfield code="i">010000666241</subfield>' in rec.tag_999[0].serialize()) # Need to change Aleph reference from `(Aleph)..` to `(GEU)Aleph...` self.assertEqual(len(re.findall('\(Aleph', rec.serialize())), 0) # and make sure we did add the new one correctly, this is also check to # see if we put it in the right spot self.assertTrue('(GEU)Aleph000116142' in rec.field_035[-1].serialize())
def load(self, *args, **kwargs): "Class method to scan data directory specified in the ``localsettings`` **KDIP_DIR** and create new KDIP objects in the database." # The only thing that should be sending any args is when the kdip is # set to reporcess and the kdip object will be the first (and only) arg. if args: reproc_kdip = args[0] # We need to make sure that we are sending the rights # type of object. Just sending `args[0]` had issues. # Most noteably with the Mets validation. kdip = KDip.objects.get(pk=reproc_kdip.id) # Clear out previous validation errors. errors = kdip.validationerror_set.all() errors.delete() kdip.validate() Utils.create_ht_marc(kdip) else: kdip_list = {} exclude = ['%s/HT' % kdip_dir, '%s/out_of_scope' % kdip_dir, '%s/test' % kdip_dir] for path, subdirs, files in os.walk(kdip_dir): for dir in subdirs: kdip = re.search(r"^[0-9]", dir) full_path = os.path.join(path, dir) # Only process new KDips or ones. try: skip = getattr(settings, 'SKIP_DIR', None) if skip not in path: processed_KDip = KDip.objects.get(kdip_id = dir) # Check to see if the a KDip has moved and update the path. if processed_KDip != path: processed_KDip.path = path processed_KDip.save() except KDip.DoesNotExist: if kdip and full_path not in exclude: kdip_list[dir] = path # Empty list to gather errant KDips bad_kdips = [] # create the KDIP is it does not exits for k in kdip_list: try: # lookkup bib record for note field bib_rec = Utils.create_ht_marc(k[:12]) # Find the OCLC in the MARCXML # First an empty list to put all the 035 tags in oclc_tags = [] for oclc_tag in bib_rec.tag_035a: oclc_search = re.search('<.*>(.*?)</.*>', oclc_tag.serialize()) # Make a readable list of 035$a tags oclc_tags.append(oclc_search.group(1)) # The oclc filed can have a few patterns. We want the first match oclc = next(oclc_val for oclc_val in oclc_tags \ if "(OCoLC)" in oclc_val \ or "ocm" in oclc_val \ or "ocn" in oclc_val \ and bib_rec.alma_number not in oclc_val) # Remove all non-numeric characters oclc = re.sub("[^0-9]", "", oclc) # Set the note field to 'EnumCron not found' if the 999a filed # is empty or missing. note = bib_rec.note(k[:12]) or 'EnumCron not found' defaults={ 'create_date': datetime.fromtimestamp(os.path.getctime('%s/%s' % (kdip_list[k], k))), 'note': note, 'path': kdip_list[k], 'oclc': oclc } kdip, created = self.objects.get_or_create(kdip_id=k, defaults = defaults) if created: logger.info("Created KDip %s" % kdip.kdip_id) if kwargs.get('kdip_enumcron'): kdip.note = kwargs.get('kdip_enumcron') Utils.update_999a(kdip.path, kdip.kdip_id, kwargs.get('kdip_enumcron')) if kwargs.get('kdip_pid'): kdip.pid = kwargs.get('kdip_pid') kdip.validate() # If the KDip had errors, add it to the list so an email alert can be sent. if kdip.status == 'invalid': bad_kdips.append(kdip.kdip_id) # else: # kdip.validate() except: bad_kdips.append(k) logger.error("Error creating KDip %s : %s" % (k, sys.exc_info()[0])) bad_kdip_list = '\n'.join(map(str, bad_kdips))