def test_stored_fields(): with TempStorage("storedfields") as st: sf = st.create_file("test.sf") sfw = StoredFieldWriter(sf, ["a", "b"]) sfw.append({"a": "hello", "b": "there"}) sfw.append({"a": "one", "b": "two"}) sfw.append({"a": "alfa", "b": "bravo"}) sfw.close() sf = st.open_file("test.sf") sfr = StoredFieldReader(sf) assert_equal(sfr[0], {"a": "hello", "b": "there"}) assert_equal(sfr[2], {"a": "alfa", "b": "bravo"}) assert_equal(sfr[1], {"a": "one", "b": "two"}) sfr.close()
def __init__(self, ix, poolclass=None, procs=0, blocklimit=128, timeout=0.0, delay=0.1, name=None, _l=True, **poolargs): self.writelock = None if _l: self.writelock = ix.lock("WRITELOCK") if not try_for(self.writelock.acquire, timeout=timeout, delay=delay): raise LockError self.readlock = ix.lock("READLOCK") info = ix._read_toc() self.schema = info.schema self.segments = info.segments self.storage = ix.storage self.indexname = ix.indexname self.is_closed = False self.blocklimit = blocklimit self.segment_number = info.segment_counter + 1 self.generation = info.generation + 1 self._doc_offsets = [] base = 0 for s in self.segments: self._doc_offsets.append(base) base += s.doc_count_all() self.name = name or Segment.basename(self.indexname, self.segment_number) self.docnum = 0 self.fieldlength_totals = defaultdict(int) self._added = False self._unique_cache = {} # Create a temporary segment to use its .*_filename attributes segment = Segment(self.name, self.generation, 0, None, None) # Terms index tf = self.storage.create_file(segment.termsindex_filename) ti = TermIndexWriter(tf) # Term postings file pf = self.storage.create_file(segment.termposts_filename) pw = FilePostingWriter(pf, blocklimit=blocklimit) # Terms writer self.termswriter = TermsWriter(self.schema, ti, pw) if self.schema.has_vectored_fields(): # Vector index vf = self.storage.create_file(segment.vectorindex_filename) self.vectorindex = TermVectorWriter(vf) # Vector posting file vpf = self.storage.create_file(segment.vectorposts_filename) self.vpostwriter = FilePostingWriter(vpf, stringids=True) else: self.vectorindex = None self.vpostwriter = None # Stored fields file sf = self.storage.create_file(segment.storedfields_filename) self.storedfields = StoredFieldWriter(sf, self.schema.stored_names()) # Field lengths file self.lengthfile = self.storage.create_file(segment.fieldlengths_filename) # Create the pool if poolclass is None: if procs > 1: from whoosh.filedb.multiproc import MultiPool poolclass = MultiPool else: poolclass = TempfilePool self.pool = poolclass(self.schema, procs=procs, **poolargs)