def _detect_collections_from_marcxml_file(recs): """ Extract all possible recIDs from MARCXML file and guess collections for these recIDs. """ from invenio.bibrecord import record_get_field_values from invenio.search_engine import guess_collection_of_a_record from invenio.bibupload import find_record_from_sysno, \ find_records_from_extoaiid, \ find_record_from_oaiid dbcollids = {} sysno_tag = CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG oaiid_tag = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG oai_tag = CFG_OAI_ID_FIELD for rec, dummy1, dummy2 in recs: if rec: for tag001 in record_get_field_values(rec, '001'): collection = guess_collection_of_a_record(int(tag001)) dbcollids[collection] = 1 for tag_sysno in record_get_field_values(rec, tag=sysno_tag[:3], ind1=sysno_tag[3], ind2=sysno_tag[4], code=sysno_tag[5]): record = find_record_from_sysno(tag_sysno) if record: collection = guess_collection_of_a_record(int(record)) dbcollids[collection] = 1 for tag_oaiid in record_get_field_values(rec, tag=oaiid_tag[:3], ind1=oaiid_tag[3], ind2=oaiid_tag[4], code=oaiid_tag[5]): try: records = find_records_from_extoaiid(tag_oaiid) except Error: records = [] if records: record = records.pop() collection = guess_collection_of_a_record(int(record)) dbcollids[collection] = 1 for tag_oai in record_get_field_values(rec, tag=oai_tag[0:3], ind1=oai_tag[3], ind2=oai_tag[4], code=oai_tag[5]): record = find_record_from_oaiid(tag_oai) if record: collection = guess_collection_of_a_record(int(record)) dbcollids[collection] = 1 return dbcollids.keys()
break else: rec_id = search_pattern(p=identifier, f=matching, m='e') if not rec_id: errors.append((docfile, err_desc[2])) continue elif len(rec_id) > 1: errors.append((docfile, err_desc[1])) continue else: rec_id = str(list(rec_id)[0]) write_message("%s matches to record %s" % (docfile, rec_id)) # Check if user has rights to upload file if req is not None: file_collection = guess_collection_of_a_record(int(rec_id)) auth_code, auth_message = acc_authorize_action( req, 'runbatchuploader', collection=file_collection) if auth_code != 0: error_msg = err_desc[5] % file_collection errors.append((docfile, error_msg)) continue # Move document to be uploaded to temporary folder (fd, tmp_file) = tempfile.mkstemp( prefix=identifier + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_", suffix=extension, dir=CFG_TMPSHAREDDIR) shutil.copy(os.path.join(folder, docfile), tmp_file)
for bibdoc in rec_info.bibdocs: attached_files = bibdoc.list_all_files() file_md5 = md5(open(os.path.join(folder, docfile), "rb").read()).hexdigest() num_errors = len(errors) for attached_file in attached_files: if attached_file.checksum == file_md5: errors.append((docfile, err_desc[3])) break elif attached_file.get_full_name() == docfile: errors.append((docfile, err_desc[4])) break if len(errors) > num_errors: continue # Check if user has rights to upload file if req is not None: file_collection = guess_collection_of_a_record(int(rec_id)) auth_code, auth_message = acc_authorize_action(req, 'runbatchuploader', collection=file_collection) if auth_code != 0: error_msg = err_desc[5] % file_collection errors.append((docfile, error_msg)) continue tempfile.tempdir = CFG_TMPSHAREDDIR # Move document to be uploaded to temporary folder tmp_file = tempfile.mktemp(prefix=identifier + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_", suffix=extension) shutil.copy(os.path.join(folder, docfile), tmp_file) # Create MARC temporary file with FFT tag and call bibupload filename = tempfile.mktemp(prefix=identifier + '_') filedesc = open(filename, 'w') marc_content = """ <record> <controlfield tag="001">%(rec_id)s</controlfield> <datafield tag="FFT" ind1=" " ind2=" ">