示例#1
0
def MERGE_SQUARES(writer, segments):
    """This is an alternative merge policy similar to Lucene's. It is less
    optimal than the default MERGE_SMALL.
    """

    from whoosh.filedb.filereading import SegmentReader

    sizedsegs = [(s.doc_count_all(), s) for s in segments]
    tomerge = []
    for size in (10, 100, 1000, 10000, 100000):
        smaller = [
            seg for segsize, seg in sizedsegs
            if segsize < size - 1 and segsize >= size // 10
        ]
        if len(smaller) >= 10:
            tomerge.extend(smaller)
            for seg in smaller:
                segments.remove(seg)

    for seg in tomerge:
        reader = SegmentReader(writer.storage, writer.schema, seg)
        writer.add_reader(reader)
        reader.close()

    return segments
示例#2
0
def OPTIMIZE(writer, segments):
    """This policy merges all existing segments.
    """

    from whoosh.filedb.filereading import SegmentReader
    for seg in segments:
        reader = SegmentReader(writer.storage, writer.schema, seg)
        writer.add_reader(reader)
        reader.close()
    return []
def OPTIMIZE(writer, segments):
    """This policy merges all existing segments.
    """

    from whoosh.filedb.filereading import SegmentReader
    for seg in segments:
        reader = SegmentReader(writer.storage, writer.schema, seg)
        writer.add_reader(reader)
        reader.close()
    return []
示例#4
0
def MERGE_SMALL(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.filedb.filereading import SegmentReader
    newsegments = []
    sorted_segment_list = sorted((s.doc_count_all(), s) for s in segments)
    total_docs = 0
    for i, (count, seg) in enumerate(sorted_segment_list):
        if count > 0:
            total_docs += count
            if total_docs < fib(i + 5):
                reader = SegmentReader(writer.storage, writer.schema, seg)
                writer.add_reader(reader)
                reader.close()
            else:
                newsegments.append(seg)
    return newsegments
示例#5
0
def MERGE_SQUARES(writer, segments):
    """This is an alternative merge policy similar to Lucene's. It is less
    optimal than the default MERGE_SMALL.
    """

    from whoosh.filedb.filereading import SegmentReader

    sizedsegs = [(s.doc_count_all(), s) for s in segments]
    tomerge = []
    for size in (10, 100, 1000, 10000, 100000):
        smaller = [seg for segsize, seg in sizedsegs
                   if segsize < size - 1 and segsize >= size//10]
        if len(smaller) >= 10:
            tomerge.extend(smaller)
            for seg in smaller:
                segments.remove(seg)

    for seg in tomerge:
        reader = SegmentReader(writer.storage, writer.schema, seg)
        writer.add_reader(reader)
        reader.close()

    return segments