def MERGE_SMALL(writer, segments): """This policy merges small segments, where "small" is defined using a heuristic based on the fibonacci sequence. """ from whoosh.reading import SegmentReader unchanged_segments = [] segments_to_merge = [] sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all()) total_docs = 0 merge_point_found = False for i, seg in enumerate(sorted_segment_list): count = seg.doc_count_all() if count > 0: total_docs += count if merge_point_found: # append the remaining to unchanged unchanged_segments.append(seg) else: # look for a merge point segments_to_merge.append((seg, i)) # merge every segment up to the merge point if i > 3 and total_docs < fib(i + 5): merge_point_found = True if merge_point_found and len(segments_to_merge) > 1: for seg, i in segments_to_merge: reader = SegmentReader(writer.storage, writer.schema, seg) writer.add_reader(reader) reader.close() return unchanged_segments else: return segments
def MERGE_SMALL(writer, segments): """This policy merges small segments, where "small" is defined using a heuristic based on the fibonacci sequence. """ from whoosh.reading import SegmentReader unchanged_segments = [] segments_to_merge = [] sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all()) total_docs = 0 merge_point_found = False for i, seg in enumerate(sorted_segment_list): count = seg.doc_count_all() if count > 0: total_docs += count if merge_point_found: # append the remaining to unchanged unchanged_segments.append(seg) else: # look for a merge point segments_to_merge.append( (seg, i)) # merge every segment up to the merge point if i > 3 and total_docs < fib(i + 5): merge_point_found = True if merge_point_found and len(segments_to_merge) > 1: for seg, i in segments_to_merge: reader = SegmentReader(writer.storage, writer.schema, seg) writer.add_reader(reader) reader.close() return unchanged_segments else: return segments
def MERGE_CUSTOM(writer, segments): """This policy merges small segments, where "small" is defined using a heuristic based on the fibonacci sequence. """ from whoosh.reading import SegmentReader from whoosh.util import fib unchanged_segments = [] segments_to_merge = [] sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all()) total_docs = 0 log_stats = False merge_point_found = False for i, seg in enumerate(sorted_segment_list): count = seg.doc_count_all() if count > 0: total_docs += count if log_stats: logger.debug("%s: %s/%s, fib %s", i, count, total_docs, fib(i + 5)) if merge_point_found: unchanged_segments.append(seg) else: segments_to_merge.append((seg, i)) if i > 3 and total_docs < fib(i + 5): logger.debug("Merge point found at %s - %s", i, total_docs) merge_point_found = True if merge_point_found and len(segments_to_merge) > 1: for seg, i in segments_to_merge: logger.info("Merging segment %s having size %s", i, seg.doc_count_all()) reader = SegmentReader(writer.storage, writer.schema, seg) writer.add_reader(reader) reader.close() return unchanged_segments else: logger.debug("No merge point found, no merge yet") return segments
def MERGE_CUSTOM(writer, segments): """This policy merges small segments, where "small" is defined using a heuristic based on the fibonacci sequence. """ from whoosh.reading import SegmentReader from whoosh.util import fib unchanged_segments = [] segments_to_merge = [] sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all()) total_docs = 0 log_stats = False merge_point_found = False for i, seg in enumerate(sorted_segment_list): count = seg.doc_count_all() if count > 0: total_docs += count if log_stats: logger.debug("%s: %s/%s, fib %s", i, count, total_docs, fib(i+5)) if merge_point_found: unchanged_segments.append(seg) else: segments_to_merge.append((seg, i)) if i > 3 and total_docs < fib(i + 5): logger.debug("Merge point found at %s - %s", i, total_docs) merge_point_found = True if merge_point_found and len(segments_to_merge) > 1: for seg, i in segments_to_merge: logger.info("Merging segment %s having size %s", i, seg.doc_count_all()) reader = SegmentReader(writer.storage, writer.schema, seg) writer.add_reader(reader) reader.close() return unchanged_segments else: logger.debug("No merge point found, no merge yet") return segments
def MERGE_SMALL(ix, writer, segments): """This policy merges small segments, where small is defined using a heuristic based on the fibonacci sequence. """ newsegments = SegmentSet() sorted_segment_list = sorted((s.doc_count_all(), s) for s in segments) total_docs = 0 for i, (count, seg) in enumerate(sorted_segment_list): if count > 0: total_docs += count if total_docs < fib(i + 5): writer.add_segment(ix, seg) else: newsegments.append(seg) return newsegments
def MERGE_SMALL(ix, writer, segments): """This policy merges small segments, where small is defined using a heuristic based on the fibonacci sequence. """ newsegments = index.SegmentSet() sorted_segment_list = sorted((s.doc_count_all(), s) for s in segments) total_docs = 0 for i, (count, seg) in enumerate(sorted_segment_list): if count > 0: total_docs += count if total_docs < fib(i + 5): writer.add_segment(ix, seg) else: newsegments.append(seg) return newsegments
def MERGE_SMALL(ix, writer, segments): """This policy merges small segments, where "small" is defined using a heuristic based on the fibonacci sequence. """ from whoosh.filedb.filereading import SegmentReader newsegments = SegmentSet() sorted_segment_list = sorted((s.doc_count_all(), s) for s in segments) total_docs = 0 for i, (count, seg) in enumerate(sorted_segment_list): if count > 0: total_docs += count if total_docs < fib(i + 5): writer.add_reader(SegmentReader(ix.storage, seg, ix.schema)) else: newsegments.append(seg) return newsegments
def MERGE_SMALL(writer, segments): """This policy merges small segments, where "small" is defined using a heuristic based on the fibonacci sequence. """ from whoosh.filedb.filereading import SegmentReader newsegments = [] sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all()) total_docs = 0 for i, seg in enumerate(sorted_segment_list): count = seg.doc_count_all() if count > 0: total_docs += count if total_docs < fib(i + 5): reader = SegmentReader(writer.storage, writer.schema, seg) writer.add_reader(reader) reader.close() else: newsegments.append(seg) return newsegments
def MERGE_SMALL(writer, segments): """This policy merges small segments, where "small" is defined using a heuristic based on the fibonacci sequence. """ from whoosh.reading import SegmentReader newsegments = [] sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all()) total_docs = 0 for i, seg in enumerate(sorted_segment_list): count = seg.doc_count_all() if count > 0: total_docs += count if total_docs < fib(i + 5): reader = SegmentReader(writer.storage, writer.schema, seg) writer.add_reader(reader) reader.close() else: newsegments.append(seg) return newsegments