def process_bibcodes_to_delete(extraction_directory, upload_mode):
    """method that creates the MarcXML for the bibcodes to delete"""
    logger.info("In function %s" % (inspect.stack()[0][3],))

    #I create an unique file for all the bibcodes to delete:
    #I don't think it's necessary to split the content in groups, since the XML is really simple

    #I create the base object for the tree
    doc = libxml2.newDoc("1.0")
    root = doc.newChild(None, "collection", None)

    #then for each bibcode to delete I create the proper record
    for bibcode in BIBCODES_TO_DELETE_LIST:
        record = root.newChild(None, 'record', None)
        #I add to the record the 2 necessary datafields
        d970 = record.newChild(None, 'datafield', None)
        d970.setProp('tag', '970')
        d970.setProp('ind1', '')
        d970.setProp('ind2', '')
        #I create the subfield tag
        sub = d970.newChild(None, 'subfield', bibcode.replace('&', '&'))
        sub.setProp("code", "a")
        d980 = record.newChild(None, 'datafield', None)
        d980.setProp('tag', '980')
        d980.setProp('ind1', '')
        d980.setProp('ind2', '')
        #I create the subfield tag
        sub = d980.newChild(None, 'subfield', "DELETED")
        sub.setProp("code", "c")

    #I extract the node
    marcxml_string = doc.serialize('UTF-8', 1)
    #I remove the data
    doc.freeDoc()
    del doc
    #I write the bibcodes in the done bibcodes file
    w2f = write_files.WriteFile(extraction_directory, logger)
    w2f.write_done_bibcodes_to_file(BIBCODES_TO_DELETE_LIST)
    del w2f
    
    if upload_mode == 'concurrent':
        #I transform the xml in bibrecords
        bibrecord_object = [elem[0] for elem in bibrecord.create_records(marcxml_string)]
        #I upload the result with option append
        logger.warning('Upload of records to delete started.')
        bibupload_merger(bibrecord_object, logger, 'append')
        logger.warning('Upload of records to delete ended.')
    elif upload_mode == 'bibupload':
        filepath = os.path.join(settings.BASE_OUTPUT_PATH, extraction_directory, settings.BASE_BIBRECORD_FILES_DIR, settings.BIBCODE_TO_DELETE_OUT_NAME)
        with open(filepath, 'w') as marcxml_to_del_file:
            marcxml_to_del_file.write(marcxml_string)
        task_low_level_submission('bibupload', 'admin', '-a', filepath)
        logger.warning('File "%s" submitted to bibupload.' % filepath)
    else:
        logger.error('Upload mode "%s" not supported! File not uploaded' % upload_mode)
    return True
def upload_process(q_uplfile, lock_stdout, lock_donefiles, q_life, extraction_directory, extraction_name, upload_mode):
    """Worker that uploads the data in invenio"""
    logger.warning(multiprocessing.current_process().name + ' (upload worker) Process started')
    
    #I create a local logger
    fh = logging.FileHandler(os.path.join(pipeline_settings.BASE_OUTPUT_PATH, extraction_directory, pipeline_settings.BASE_LOGGING_PATH, multiprocessing.current_process().name+'_uploader_bibcodes.log'))
    fmt = logging.Formatter(pipeline_settings.LOGGING_FORMAT)
    fh.setFormatter(fmt)
    local_logger = logging.getLogger(pipeline_settings.LOGGING_UPLOAD_NAME)
    local_logger.addHandler(fh)
    local_logger.setLevel(logger.level)
    local_logger.propagate = False
    #I print the same message for the local logger
    local_logger.warning(multiprocessing.current_process().name + ' Process started')
    
    while(True):
        file_to_upload = q_uplfile.get()
        if len(file_to_upload) == 2:
            local_logger.info('Processing group "%s" with file "%s"' % (file_to_upload[0], file_to_upload[1]))
        else:
            local_logger.info('Message in queue "%s" ' % file_to_upload[0])
        #first of all I check if the group I'm getting is a message from the manager saying that the workers are done
        if file_to_upload[0] == 'WORKERS DONE':
            local_logger.info('No more workers active: stopping to upload...')
            break
        else:
            #otherwise I have to upload the file
            try:
                filepath = file_to_upload[1]
            except IndexError:
                logger.error('Received the unexpected message "%s" from upload queue.' % file_to_upload[0])
                break
            if upload_mode == 'concurrent':
                file_obj = open(filepath, 'rb')
                # I load the object in the file
                local_logger.warning('Upload of the group "%s" started' % file_to_upload[0])
                merged_records = pickle.load(file_obj)
                file_obj.close()
                #finally I upload
                bibupload_merger(merged_records, local_logger, 'replace_or_insert')
                #I log that I uploaded the file
                lock_donefiles.acquire()
                with open(os.path.join(settings.BASE_OUTPUT_PATH, extraction_directory,settings.LIST_BIBREC_UPLOADED), 'a') as bibrec_file_obj:
                    bibrec_file_obj.write(filepath + '\n')
                lock_donefiles.release()
                local_logger.warning('Upload of the group "%s" ended' % file_to_upload[0])
                del merged_records
            elif upload_mode == 'bibupload':
                task_low_level_submission('bibupload', 'admin', '-i', '-r', '--pickled-input-file', '--update-mode', filepath)
                with open(os.path.join(settings.BASE_OUTPUT_PATH, extraction_directory,settings.LIST_BIBREC_UPLOADED), 'a') as bibrec_file_obj:
                    bibrec_file_obj.write(filepath + '\n')
                local_logger.warning('File "%s" submitted to bibupload.' % filepath)
            else:
                local_logger.error('Upload mode "%s" not supported! File not uploaded' % upload_mode)
            
    #I tell the manager that I'm done and I'm exiting
    q_life.put(['UPLOAD DONE'])

    logger.warning(multiprocessing.current_process().name + ' (upload worker) job finished: exiting')
    local_logger.warning(multiprocessing.current_process().name + ' job finished: exiting')
    return
Пример #3
0
def merge_bibcodes_and_upload(bibcodes):
    """function that extracts, merges and uploads a bunch of bibcodes"""
    logger.setLevel(logging.WARNING)
    merged_records = merge_bibcodes(bibcodes)
    bibupload_merger(merged_records, logger)