Exemplo n.º 1
0
def MERGE_SMALL(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader

    unchanged_segments = []
    segments_to_merge = []

    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0

    merge_point_found = False
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count

        if merge_point_found:  # append the remaining to unchanged
            unchanged_segments.append(seg)
        else:  # look for a merge point
            segments_to_merge.append((seg, i)) # merge every segment up to the merge point
            if i > 3 and total_docs < fib(i + 5):
                merge_point_found = True

    if merge_point_found and len(segments_to_merge) > 1:
        for seg, i in segments_to_merge:
            reader = SegmentReader(writer.storage, writer.schema, seg)
            writer.add_reader(reader)
            reader.close()
        return unchanged_segments
    else:
        return segments
Exemplo n.º 2
0
def MERGE_SMALL(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader

    unchanged_segments = []
    segments_to_merge = []

    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0

    merge_point_found = False
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count

        if merge_point_found:  # append the remaining to unchanged
            unchanged_segments.append(seg)
        else:  # look for a merge point
            segments_to_merge.append(
                (seg, i))  # merge every segment up to the merge point
            if i > 3 and total_docs < fib(i + 5):
                merge_point_found = True

    if merge_point_found and len(segments_to_merge) > 1:
        for seg, i in segments_to_merge:
            reader = SegmentReader(writer.storage, writer.schema, seg)
            writer.add_reader(reader)
            reader.close()
        return unchanged_segments
    else:
        return segments
Exemplo n.º 3
0
def OPTIMIZE(writer, segments):
    """This policy merges all existing segments.
    """

    from whoosh.reading import SegmentReader

    for seg in segments:
        reader = SegmentReader(writer.storage, writer.schema, seg)
        writer.add_reader(reader)
        reader.close()
    return []
Exemplo n.º 4
0
def OPTIMIZE(writer, segments):
    """This policy merges all existing segments.
    """

    from whoosh.reading import SegmentReader

    for seg in segments:
        reader = SegmentReader(writer.storage, writer.schema, seg)
        writer.add_reader(reader)
        reader.close()
    return []
Exemplo n.º 5
0
def MERGE_CUSTOM(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader
    from whoosh.util import fib

    unchanged_segments = []
    segments_to_merge = []

    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0

    log_stats = False

    merge_point_found = False
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count

        if log_stats:
            logger.debug("%s: %s/%s, fib %s", i, count, total_docs, fib(i + 5))

        if merge_point_found:
            unchanged_segments.append(seg)
        else:
            segments_to_merge.append((seg, i))
            if i > 3 and total_docs < fib(i + 5):
                logger.debug("Merge point found at %s - %s", i, total_docs)
                merge_point_found = True

    if merge_point_found and len(segments_to_merge) > 1:
        for seg, i in segments_to_merge:
            logger.info("Merging segment %s having size %s", i,
                        seg.doc_count_all())
            reader = SegmentReader(writer.storage, writer.schema, seg)
            writer.add_reader(reader)
            reader.close()
        return unchanged_segments
    else:
        logger.debug("No merge point found, no merge yet")
        return segments
Exemplo n.º 6
0
def MERGE_CUSTOM(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader
    from whoosh.util import fib

    unchanged_segments = []
    segments_to_merge = []

    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0

    log_stats = False

    merge_point_found = False
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count

        if log_stats:
            logger.debug("%s: %s/%s, fib %s", i, count, total_docs, fib(i+5))

        if merge_point_found:
            unchanged_segments.append(seg)
        else:
            segments_to_merge.append((seg, i))
            if i > 3 and total_docs < fib(i + 5):
                logger.debug("Merge point found at %s - %s", i, total_docs)
                merge_point_found = True

    if merge_point_found and len(segments_to_merge) > 1:
        for seg, i in segments_to_merge:
            logger.info("Merging segment %s having size %s", i, seg.doc_count_all())
            reader = SegmentReader(writer.storage, writer.schema, seg)
            writer.add_reader(reader)
            reader.close()
        return unchanged_segments
    else:
        logger.debug("No merge point found, no merge yet")
        return segments
Exemplo n.º 7
0
def MERGE_SMALL(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader

    newsegments = []
    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count
            if total_docs < fib(i + 5):
                reader = SegmentReader(writer.storage, writer.schema, seg)
                writer.add_reader(reader)
                reader.close()
            else:
                newsegments.append(seg)
    return newsegments
Exemplo n.º 8
0
def MERGE_SMALL(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader

    newsegments = []
    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count
            if total_docs < fib(i + 5):
                reader = SegmentReader(writer.storage, writer.schema, seg)
                writer.add_reader(reader)
                reader.close()
            else:
                newsegments.append(seg)
    return newsegments