Python intbitset.union примеры использования

Язык программирования: Python

Пространство имен/Пакет: invenio.intbitset

Класс/Тип: intbitset

Метод/Функция: union

Примеров на hotexamples.com: 6

Python intbitset.union - 6 примеров найдено. Это лучшие примеры Python кода для invenio.intbitset.intbitset.union, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

union(3)

Основные методы

union (3)

Пример #1

Показать файл

Файл: oai_repository_updater.py Проект: pombredanne/invenio

def get_recids_for_set_spec(set_spec):
    """
    Returns the list (as HitSet) of recids belonging to 'set'

    Parameters:

      set_spec - *str* the set_spec for which we would like to get the
                 recids
    """
    recids = HitSet()

    for set_def in get_set_definitions(set_spec):
        new_recids = perform_request_search(c=[coll.strip() \
                                               for coll in set_def['c'].split(',')],
                                            p1=set_def['p1'],
                                            f1=set_def['f1'],
                                            m1=set_def['m1'],
                                            op1=set_def['op1'],
                                            p2=set_def['p2'],
                                            f2=set_def['f2'],
                                            m2=set_def['m2'],
                                            op2=set_def['op2'],
                                            p3=set_def['p3'],
                                            f3=set_def['f3'],
                                            m3=set_def['m3'],
                                            ap=0)

        recids = recids.union(HitSet(new_recids))

    return recids

Пример #2

Показать файл

def get_recids_for_set_spec(set_spec):
    """
    Returns the list (as HitSet) of recids belonging to 'set'

    Parameters:

      set_spec - *str* the set_spec for which we would like to get the
                 recids
    """
    recids = HitSet()

    for set_def in get_set_definitions(set_spec):
        new_recids = perform_request_search(c=[coll.strip() \
                                               for coll in set_def['c'].split(',')],
                                            p1=set_def['p1'],
                                            f1=set_def['f1'],
                                            m1=set_def['m1'],
                                            op1=set_def['op1'],
                                            p2=set_def['p2'],
                                            f2=set_def['f2'],
                                            m2=set_def['m2'],
                                            op2=set_def['op2'],
                                            p3=set_def['p3'],
                                            f3=set_def['f3'],
                                            m3=set_def['m3'],
                                            ap=0)

        recids = recids.union(HitSet(new_recids))

    return recids

Пример #3

Показать файл

Файл: oai_repository_updater.py Проект: pombredanne/invenio

def oairepositoryupdater_task():
    """Main business logic code of oai_archive"""
    no_upload = task_get_option("no_upload")
    report = task_get_option("report")

    if report > 1:
        print_repository_status(verbose=report)
        return True

    task_update_progress("Fetching records to process")

    # Build the list of records to be processed, that is, search for
    # the records that match one of the search queries defined in OAI
    # Repository admin interface.
    recids_for_set = {} # Remember exactly which record belongs to which set
    recids = HitSet() # "Flat" set of the recids_for_set values
    for set_spec in all_set_specs():
        task_sleep_now_if_required(can_stop_too=True)
        _recids = get_recids_for_set_spec(set_spec)
        recids_for_set[set_spec] = _recids
        recids = recids.union(_recids)

    # Also get the list of records that are currently exported through
    # OAI and that might need to be refreshed
    oai_recids = perform_request_search(c=CFG_SITE_NAME,
                                        p1='oai:%s:*' % CFG_OAI_ID_PREFIX,
                                        f1=CFG_OAI_ID_FIELD,
                                        m1="e", ap=0)
    recids = recids.union(HitSet(oai_recids))

    # Prepare to save results in a tmp file
    (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                  prefix='oairepository_' + \
                                  time.strftime("%Y%m%d_%H%M%S_",
                                                time.localtime()))
    oai_out = os.fdopen(fd, "w")

    # Iterate over the recids
    i = 0
    for recid in recids:
        i += 1
        task_sleep_now_if_required(can_stop_too=True)
        task_update_progress("Done %s out of %s records." % \
                             (i, len(recids)))

        # Check if an OAI identifier is already in the record or
        # not.
        oai_id_entry = ""
        oai_ids = [_oai_id for _oai_id in \
                   get_fieldvalues(recid, CFG_OAI_ID_FIELD) \
                   if _oai_id.strip() != '']
        if len(oai_ids) == 0:
            oai_id_entry = "<subfield code=\"%s\">oai:%s:%s</subfield>\n" % \
                         (CFG_OAI_ID_FIELD[5:6], CFG_OAI_ID_PREFIX, recid)

        # Get the sets to which this record already belongs according
        # to the metadata
        current_oai_sets = set(\
            [_oai_set for _oai_set in \
             get_fieldvalues(recid, CFG_OAI_SET_FIELD) \
             if _oai_set.strip() != ''])

        # Get the sets that should be in this record according to
        # settings
        updated_oai_sets = set(\
            [_set for _set, _recids in recids_for_set.iteritems()
             if recid in _recids])

        # Ok, we have the old sets and the new sets. If they are equal
        # and oai ID does not need to be added, then great, nothing to
        # change . Otherwise apply the new sets.
        if current_oai_sets == updated_oai_sets and not oai_id_entry:
            continue # Jump to next recid

        # Generate the xml sets entry
        oai_set_entry = '\n'.join(["<subfield code=\"%s\">%s</subfield>" % \
                                 (CFG_OAI_SET_FIELD[5:6], _oai_set) \
                                 for _oai_set in updated_oai_sets]) + \
                                 "\n"

        # Also get all the datafields with tag and indicator matching
        # CFG_OAI_SET_FIELD[:5] and CFG_OAI_ID_FIELD[:5] but with
        # subcode != CFG_OAI_SET_FIELD[5:6] and subcode !=
        # CFG_OAI_SET_FIELD[5:6], so that we can preserve these values
        other_data = marcxml_filter_out_tags(recid, [CFG_OAI_SET_FIELD,
                                                     CFG_OAI_ID_FIELD])

        if oai_id_entry or oai_set_entry:
            if CFG_OAI_ID_FIELD[0:5] == CFG_OAI_SET_FIELD[0:5]:
                # Put set and OAI ID in the same datafield
                oai_out.write("<record>\n")
                oai_out.write("<controlfield tag=\"001\">%s"
                    "</controlfield>\n" % recid)
                oai_out.write(DATAFIELD_ID_HEAD)
                oai_out.write("\n")
                #if oai_id_entry:
                oai_out.write(oai_id_entry)
                #if oai_set_entry:
                oai_out.write(oai_set_entry)
                oai_out.write("</datafield>\n")
                oai_out.write(other_data)
                oai_out.write("</record>\n")
            else:
                oai_out.write("<record>\n")
                oai_out.write("<controlfield tag=\"001\">%s"
                    "</controlfield>\n" % recid)
                if oai_id_entry:
                    oai_out.write(DATAFIELD_ID_HEAD)
                    oai_out.write("\n")
                    oai_out.write(oai_id_entry)
                    oai_out.write("</datafield>\n")
                if oai_set_entry:
                    oai_out.write(DATAFIELD_SET_HEAD)
                    oai_out.write("\n")
                    oai_out.write(oai_set_entry)
                    oai_out.write("</datafield>\n")
                oai_out.write(other_data)
                oai_out.write("</record>\n")

    oai_out.close()
    write_message("Wrote to file %s" % filename)

    if not no_upload:
        task_sleep_now_if_required(can_stop_too=True)
        # Check if file is empty or not:
        len_file = os.stat(filename)[ST_SIZE]
        if len_file > 0:
            command = "%s/bibupload -c %s -u oairepository" % (CFG_BINDIR, filename)
            os.system(command)
        else:
            os.remove(filename)

    return True

Пример #4

Показать файл

Файл: oai_repository_updater.py Проект: pombredanne/invenio

def print_repository_status(write_message=write_message,
                            verbose=0):
    """
    Prints the repository status to the standard output.

    Parameters:

      write_message - *function* the function used to write the output

            verbose - *int* the verbosity of the output
                       - 0: print repository size
                       - 1: print quick status of each set (numbers
                         can be wrong if the repository is in some
                         inconsistent state, i.e. a record is in an
                         OAI setSpec but has not OAI ID)
                       - 2: print detailed status of repository, with
                         number of records that needs to be
                         synchronized according to the sets
                         definitions. Precise, but ~slow...
    """
    repository_size_s = "%d" % repository_size()
    repository_recids_after_update = HitSet()

    write_message(CFG_SITE_NAME)
    write_message(" OAI Repository Status")

    set_spec_max_length = 19 # How many max char do we display for
    set_name_max_length = 20 # setName and setSpec?

    if verbose == 0:
        # Just print repository size
        write_message("  Total(**)" + " " * 29 +
                      " " * (9 - len(repository_size_s)) + repository_size_s)
        return
    elif verbose == 1:
        # We display few information: show longer set name and spec
        set_spec_max_length = 30
        set_name_max_length = 30

    write_message("=" * 80)
    header = "  setSpec" + " " * (set_spec_max_length - 7) + \
             "  setName" + " " * (set_name_max_length - 5) + " Volume"
    if verbose > 1:
        header += " " * 5 + "After update(*):"
    write_message(header)

    if verbose > 1:
        write_message(" " * 57 + "Additions  Deletions")

    write_message("-" * 80)

    for set_spec in all_set_specs():

        if verbose <= 1:
            # Get the records that are in this set. This is an
            # incomplete check, as it can happen that some records are
            # in this set (according to the metadata) but have no OAI
            # ID (so they are not exported). This can happen if the
            # repository has some records coming from external
            # sources, or if it has never been synchronized with this
            # tool.
            current_recids = perform_request_search(c=CFG_SITE_NAME,
                                                    p1=set_spec,
                                                    f1=CFG_OAI_SET_FIELD,
                                                    m1="e", ap=0)
            nb_current_recids = len(current_recids)
        else:
            # Get the records that are *currently* exported for this
            # setSpec
            current_recids = perform_request_search(c=CFG_SITE_NAME,
                                                    p1=set_spec,
                                                    f1=CFG_OAI_SET_FIELD,
                                                    m1="e", ap=0, op1="a",
                                                    p2="oai:*",
                                                    f2=CFG_OAI_ID_FIELD,
                                                    m2="e")
            nb_current_recids = len(current_recids)
            # Get the records that *should* be in this set according to
            # the admin defined settings, and compute how many should be
            # added or removed
            should_recids = get_recids_for_set_spec(set_spec)
            repository_recids_after_update = repository_recids_after_update.union(should_recids)

            nb_add_recids = len(HitSet(should_recids).difference(HitSet(current_recids)))
            nb_remove_recids = len(HitSet(current_recids).difference(HitSet(should_recids)))
            nb_should_recids = len(should_recids)
            nb_recids_after_update = len(repository_recids_after_update)


        # Adapt setName and setSpec strings lengths
        set_spec_str = set_spec
        if len(set_spec_str) > set_spec_max_length :
            set_spec_str = "%s.." % set_spec_str[:set_spec_max_length]
        set_name_str = get_set_name_for_set_spec(set_spec)
        if len(set_name_str) > set_name_max_length :
            set_name_str = "%s.." % set_name_str[:set_name_max_length]

        row = "  " + set_spec_str + \
               " " * ((set_spec_max_length + 2) - len(set_spec_str)) + set_name_str + \
               " " * ((set_name_max_length + 2) - len(set_name_str)) + \
               " " * (7 - len(str(nb_current_recids))) + str(nb_current_recids)
        if verbose > 1:
            row += \
                " " * max(9 - len(str(nb_add_recids)), 0) + '+' + str(nb_add_recids) + \
                " " * max(7 - len(str(nb_remove_recids)), 0) + '-' + str(nb_remove_recids) + " = " +\
                " " * max(7 - len(str(nb_should_recids)), 0) + str(nb_should_recids)
        write_message(row)

    write_message("=" * 80)
    footer = "  Total(**)" + " " * (set_spec_max_length + set_name_max_length - 7) + \
             " " * (9 - len(repository_size_s)) + repository_size_s
    if verbose > 1:
        footer += ' ' * (28 - len(str(nb_recids_after_update))) + str(nb_recids_after_update)
    write_message(footer)

    if verbose > 1:
        write_message('  *The "after update" columns show the repository after you run this tool.')
    else:
        write_message(' *"Volume" is indicative if repository is out of sync. Use --detailed-report.')
    write_message('**The "total" is not the sum of the above numbers, but the union of the records.')

Пример #5

Показать файл

def oairepositoryupdater_task():
    """Main business logic code of oai_archive"""
    no_upload = task_get_option("no_upload")
    report = task_get_option("report")

    if report > 1:
        print_repository_status(verbose=report)
        return True

    task_update_progress("Fetching records to process")

    # Build the list of records to be processed, that is, search for
    # the records that match one of the search queries defined in OAI
    # Repository admin interface.
    recids_for_set = {}  # Remember exactly which record belongs to which set
    recids = HitSet()  # "Flat" set of the recids_for_set values
    for set_spec in all_set_specs():
        task_sleep_now_if_required(can_stop_too=True)
        _recids = get_recids_for_set_spec(set_spec)
        recids_for_set[set_spec] = _recids
        recids = recids.union(_recids)

    # Also get the list of records that are currently exported through
    # OAI and that might need to be refreshed
    oai_recids = perform_request_search(c=CFG_SITE_NAME,
                                        p1='oai:%s:*' % CFG_OAI_ID_PREFIX,
                                        f1=CFG_OAI_ID_FIELD,
                                        m1="e",
                                        ap=0)
    recids = recids.union(HitSet(oai_recids))

    # Prepare to save results in a tmp file
    (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                  prefix='oairepository_' + \
                                  time.strftime("%Y%m%d_%H%M%S_",
                                                time.localtime()))
    oai_out = os.fdopen(fd, "w")
    oai_out.write('<collection>')
    has_updated_records = False
    # Iterate over the recids
    i = 0
    for recid in recids:
        i += 1
        task_sleep_now_if_required(can_stop_too=True)
        task_update_progress("Done %s out of %s records." % \
                             (i, len(recids)))

        # Check if an OAI identifier is already in the record or
        # not.
        oai_id_entry = "<subfield code=\"%s\">oai:%s:%s</subfield>\n" % \
                       (CFG_OAI_ID_FIELD[5:6], CFG_OAI_ID_PREFIX, recid)
        already_has_oai_id = True
        oai_ids = [_oai_id for _oai_id in \
                   get_fieldvalues(recid, CFG_OAI_ID_FIELD) \
                   if _oai_id.strip() != '']
        if len(oai_ids) == 0:
            already_has_oai_id = False

        # Get the sets to which this record already belongs according
        # to the metadata
        current_oai_sets = set(\
            [_oai_set for _oai_set in \
             get_fieldvalues(recid, CFG_OAI_SET_FIELD) \
             if _oai_set.strip() != ''])

        # Get the sets that should be in this record according to
        # settings
        updated_oai_sets = set(\
            [_set for _set, _recids in recids_for_set.iteritems()
             if recid in _recids if _set])

        # Ok, we have the old sets and the new sets. If they are equal
        # and oai ID does not need to be added, then great, nothing to
        # change . Otherwise apply the new sets.
        if current_oai_sets == updated_oai_sets and already_has_oai_id:
            continue  # Jump to next recid

        has_updated_records = True

        # Generate the xml sets entry
        oai_set_entry = '\n'.join(["<subfield code=\"%s\">%s</subfield>" % \
                                   (CFG_OAI_SET_FIELD[5:6], _oai_set) \
                                   for _oai_set in updated_oai_sets if \
                                   _oai_set]) + \
                                   "\n"

        # Also get all the datafields with tag and indicator matching
        # CFG_OAI_SET_FIELD[:5] and CFG_OAI_ID_FIELD[:5] but with
        # subcode != CFG_OAI_SET_FIELD[5:6] and subcode !=
        # CFG_OAI_SET_FIELD[5:6], so that we can preserve these values
        other_data = marcxml_filter_out_tags(
            recid, [CFG_OAI_SET_FIELD, CFG_OAI_ID_FIELD])

        if CFG_OAI_ID_FIELD[0:5] == CFG_OAI_SET_FIELD[0:5]:
            # Put set and OAI ID in the same datafield
            oai_out.write("<record>\n")
            oai_out.write("<controlfield tag=\"001\">%s"
                          "</controlfield>\n" % recid)
            oai_out.write(DATAFIELD_ID_HEAD)
            oai_out.write("\n")
            #if oai_id_entry:
            oai_out.write(oai_id_entry)
            #if oai_set_entry:
            oai_out.write(oai_set_entry)
            oai_out.write("</datafield>\n")
            oai_out.write(other_data)
            oai_out.write("</record>\n")
        else:
            oai_out.write("<record>\n")
            oai_out.write("<controlfield tag=\"001\">%s"
                          "</controlfield>\n" % recid)
            oai_out.write(DATAFIELD_ID_HEAD)
            oai_out.write("\n")
            oai_out.write(oai_id_entry)
            oai_out.write("</datafield>\n")
            oai_out.write(DATAFIELD_SET_HEAD)
            oai_out.write("\n")
            oai_out.write(oai_set_entry)
            oai_out.write("</datafield>\n")
            oai_out.write(other_data)
            oai_out.write("</record>\n")

    oai_out.write('</collection>')
    oai_out.close()
    write_message("Wrote to file %s" % filename)

    if not no_upload:
        task_sleep_now_if_required(can_stop_too=True)
        if has_updated_records:
            command = "%s/bibupload -c %s -u oairepository" % (CFG_BINDIR,
                                                               filename)
            os.system(command)
        else:
            os.remove(filename)

    return True

Пример #6

Показать файл

def print_repository_status(write_message=write_message, verbose=0):
    """
    Prints the repository status to the standard output.

    Parameters:

      write_message - *function* the function used to write the output

            verbose - *int* the verbosity of the output
                       - 0: print repository size
                       - 1: print quick status of each set (numbers
                         can be wrong if the repository is in some
                         inconsistent state, i.e. a record is in an
                         OAI setSpec but has not OAI ID)
                       - 2: print detailed status of repository, with
                         number of records that needs to be
                         synchronized according to the sets
                         definitions. Precise, but ~slow...
    """
    repository_size_s = "%d" % repository_size()
    repository_recids_after_update = HitSet()

    write_message(CFG_SITE_NAME)
    write_message(" OAI Repository Status")

    set_spec_max_length = 19  # How many max char do we display for
    set_name_max_length = 20  # setName and setSpec?

    if verbose == 0:
        # Just print repository size
        write_message("  Total(**)" + " " * 29 + " " *
                      (9 - len(repository_size_s)) + repository_size_s)
        return
    elif verbose == 1:
        # We display few information: show longer set name and spec
        set_spec_max_length = 30
        set_name_max_length = 30

    write_message("=" * 80)
    header = "  setSpec" + " " * (set_spec_max_length - 7) + \
             "  setName" + " " * (set_name_max_length - 5) + " Volume"
    if verbose > 1:
        header += " " * 5 + "After update(*):"
    write_message(header)

    if verbose > 1:
        write_message(" " * 57 + "Additions  Deletions")

    write_message("-" * 80)

    for set_spec in all_set_specs():

        if verbose <= 1:
            # Get the records that are in this set. This is an
            # incomplete check, as it can happen that some records are
            # in this set (according to the metadata) but have no OAI
            # ID (so they are not exported). This can happen if the
            # repository has some records coming from external
            # sources, or if it has never been synchronized with this
            # tool.
            current_recids = perform_request_search(c=CFG_SITE_NAME,
                                                    p1=set_spec,
                                                    f1=CFG_OAI_SET_FIELD,
                                                    m1="e",
                                                    ap=0)
            nb_current_recids = len(current_recids)
        else:
            # Get the records that are *currently* exported for this
            # setSpec
            current_recids = perform_request_search(c=CFG_SITE_NAME,
                                                    p1=set_spec,
                                                    f1=CFG_OAI_SET_FIELD,
                                                    m1="e",
                                                    ap=0,
                                                    op1="a",
                                                    p2="oai:*",
                                                    f2=CFG_OAI_ID_FIELD,
                                                    m2="e")
            nb_current_recids = len(current_recids)
            # Get the records that *should* be in this set according to
            # the admin defined settings, and compute how many should be
            # added or removed
            should_recids = get_recids_for_set_spec(set_spec)
            repository_recids_after_update = repository_recids_after_update.union(
                should_recids)

            nb_add_recids = len(
                HitSet(should_recids).difference(HitSet(current_recids)))
            nb_remove_recids = len(
                HitSet(current_recids).difference(HitSet(should_recids)))
            nb_should_recids = len(should_recids)
            nb_recids_after_update = len(repository_recids_after_update)

        # Adapt setName and setSpec strings lengths
        set_spec_str = set_spec
        if len(set_spec_str) > set_spec_max_length:
            set_spec_str = "%s.." % set_spec_str[:set_spec_max_length]
        set_name_str = get_set_name_for_set_spec(set_spec)
        if len(set_name_str) > set_name_max_length:
            set_name_str = "%s.." % set_name_str[:set_name_max_length]

        row = "  " + set_spec_str + \
               " " * ((set_spec_max_length + 2) - len(set_spec_str)) + set_name_str + \
               " " * ((set_name_max_length + 2) - len(set_name_str)) + \
               " " * (7 - len(str(nb_current_recids))) + str(nb_current_recids)
        if verbose > 1:
            row += \
                " " * max(9 - len(str(nb_add_recids)), 0) + '+' + str(nb_add_recids) + \
                " " * max(7 - len(str(nb_remove_recids)), 0) + '-' + str(nb_remove_recids) + " = " +\
                " " * max(7 - len(str(nb_should_recids)), 0) + str(nb_should_recids)
        write_message(row)

    write_message("=" * 80)
    footer = "  Total(**)" + " " * (set_spec_max_length + set_name_max_length - 7) + \
             " " * (9 - len(repository_size_s)) + repository_size_s
    if verbose > 1:
        footer += ' ' * (28 - len(str(nb_recids_after_update))) + str(
            nb_recids_after_update)
    write_message(footer)

    if verbose > 1:
        write_message(
            '  *The "after update" columns show the repository after you run this tool.'
        )
    else:
        write_message(
            ' *"Volume" is indicative if repository is out of sync. Use --detailed-report.'
        )
    write_message(
        '**The "total" is not the sum of the above numbers, but the union of the records.'
    )