def perform_upload_check(xml_record, mode): """ Performs a upload simulation with the given record and mode @return: string describing errors @rtype: string """ error_cache = [] def my_writer(msg, stream=sys.stdout, verbose=1): if verbose == 1: if 'DONE' not in msg: error_cache.append(msg.strip()) orig_writer = bibupload_module.write_message bibupload_module.write_message = my_writer error_cache.extend(perform_basic_upload_checks(xml_record)) if error_cache: # There has been some critical error return '\n'.join(error_cache) recs = xml_marc_to_records(xml_record) try: upload_mode = mode[2:] # Adapt input data for bibupload function if upload_mode == "r insert-or-replace": upload_mode = "replace_or_insert" for record in recs: if record: record_strip_empty_volatile_subfields(record) record_strip_empty_fields(record) bibupload(record, opt_mode=upload_mode, pretend=True) finally: bibupload_module.write_message = orig_writer return '\n'.join(error_cache)
def generate_keywords(req, recid, argd): """Extract keywords from the fulltexts. Do the extraction on the record witth a recid equal to the parameter. It first checks whether the keywords are not already stored in the temp file (maybe from the previous run). :param req: req object. :param recid: record id. :param argd: arguments passed from web. :keyword store_keywords: boolean, whether to save records in the file. :return: standard dictionary of kw objects or {}. """ ln = argd['ln'] _ = gettext_set_language(ln) keywords = {} # check the files were not already generated abs_path = get_tmp_file(recid) if os.path.exists(abs_path): try: # Try to load the data from the tmp file recs = xml_marc_to_records(open_marc_file(abs_path)) return record_get_keywords(recs[0]) except: pass # check it is allowed (for this user) to generate pages (exit_stat, msg) = acce.acc_authorize_action(req, 'runbibclassify') if exit_stat != 0: log.info('Access denied: ' + msg) msg = _("The site settings do not allow automatic keyword extraction") req.write(template.tmpl_page_msg(msg=msg)) return 0, keywords, None # register generation bibdocfiles = BibRecDocs(recid).list_latest_files() if bibdocfiles: # User arrived at a page, but no keywords are available inprogress, msg = _doc_already_submitted(recid) if argd['generate'] != 'yes': # Display a form and give them possibility to generate keywords if inprogress: req.write( template.tmpl_page_msg( msg='<div class="warningbox">%s</div>' % _(msg))) else: req.write(template.tmpl_page_generate_keywords(req=req, **argd)) return 0, keywords, None else: # after user clicked on "generate" button if inprogress: req.write( template.tmpl_page_msg( msg='<div class="warningbox">%s</div>' % _(msg))) else: schedule_extraction(recid, taxonomy=bconfig.CFG_EXTRACTION_TAXONOMY) req.write( template. tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _( 'We have registered your request, the automated' 'keyword extraction will run after some time. Please return back in a while.' ))) else: req.write( template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _( "Unfortunately, we don't have a PDF fulltext for this record in the storage, \ keywords cannot be generated using an automated process.")) ) return 0, keywords, None
def generate_keywords(req, recid, argd): """Extract keywords from the fulltexts. Do the extraction on the record witth a recid equal to the parameter. It first checks whether the keywords are not already stored in the temp file (maybe from the previous run). :param req: req object. :param recid: record id. :param argd: arguments passed from web. :keyword store_keywords: boolean, whether to save records in the file. :return: standard dictionary of kw objects or {}. """ ln = argd["ln"] _ = gettext_set_language(ln) keywords = {} # check the files were not already generated abs_path = get_tmp_file(recid) if os.path.exists(abs_path): try: # Try to load the data from the tmp file recs = xml_marc_to_records(open_marc_file(abs_path)) return record_get_keywords(recs[0]) except: pass # check it is allowed (for this user) to generate pages (exit_stat, msg) = acce.acc_authorize_action(req, "runbibclassify") if exit_stat != 0: log.info("Access denied: " + msg) msg = _("The site settings do not allow automatic keyword extraction") req.write(template.tmpl_page_msg(msg=msg)) return 0, keywords, None # register generation bibdocfiles = BibRecDocs(recid).list_latest_files() if bibdocfiles: # User arrived at a page, but no keywords are available inprogress, msg = _doc_already_submitted(recid) if argd["generate"] != "yes": # Display a form and give them possibility to generate keywords if inprogress: req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(msg))) else: req.write(template.tmpl_page_generate_keywords(req=req, **argd)) return 0, keywords, None else: # after user clicked on "generate" button if inprogress: req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(msg))) else: schedule_extraction(recid, taxonomy=bconfig.CFG_EXTRACTION_TAXONOMY) req.write( template.tmpl_page_msg( msg='<div class="warningbox">%s</div>' % _( "We have registered your request, the automated" "keyword extraction will run after some time. Please return back in a while." ) ) ) else: req.write( template.tmpl_page_msg( msg='<div class="warningbox">%s</div>' % _( "Unfortunately, we don't have a PDF fulltext for this record in the storage, \ keywords cannot be generated using an automated process." ) ) ) return 0, keywords, None