示例#1
0
    def __init__(self, storage, schema, segment):
        self.storage = storage
        self.schema = schema
        self.segment = segment

        if hasattr(self.segment, "uuid"):
            self.uuid_string = str(self.segment.uuid)
        else:
            import uuid
            self.uuid_string = str(uuid.uuid4())

        # Term index
        tf = storage.open_file(segment.termsindex_filename)
        self.termsindex = TermIndexReader(tf)

        # Term vector index, and vector postings: lazy load
        self.vectorindex = None
        self.vpostfile = None

        # Stored fields file
        sf = storage.open_file(segment.storedfields_filename, mapped=False)
        self.storedfields = StoredFieldReader(sf)

        # Field length file
        self.fieldlengths = None
        if self.schema.has_scorable_fields():
            flf = storage.open_file(segment.fieldlengths_filename)
            self.fieldlengths = LengthReader(flf, segment.doc_count_all())

        # Copy info from underlying segment
        self._has_deletions = segment.has_deletions()
        self._doc_count = segment.doc_count()

        # Postings file
        self.postfile = self.storage.open_file(segment.termposts_filename,
                                               mapped=False)

        # Dawg file
        self.dawg = None
        if any(field.spelling for field in self.schema):
            fname = segment.dawg_filename
            if self.storage.file_exists(fname):
                dawgfile = self.storage.open_file(fname, mapped=False)
                self.dawg = DiskNode.load(dawgfile, expand=False)

        self.dc = segment.doc_count_all()
        assert self.dc == self.storedfields.length

        self.set_caching_policy()

        self.is_closed = False
        self._sync_lock = Lock()
示例#2
0
def test_stored_fields():
    with TempStorage("storedfields") as st:
        sf = st.create_file("test.sf")
        sfw = StoredFieldWriter(sf, ["a", "b"])
        sfw.append({"a": "hello", "b": "there"})
        sfw.append({"a": "one", "b": "two"})
        sfw.append({"a": "alfa", "b": "bravo"})
        sfw.close()

        sf = st.open_file("test.sf")
        sfr = StoredFieldReader(sf)
        assert_equal(sfr[0], {"a": "hello", "b": "there"})
        assert_equal(sfr[2], {"a": "alfa", "b": "bravo"})
        assert_equal(sfr[1], {"a": "one", "b": "two"})
        sfr.close()
示例#3
0
    def __init__(self, storage, schema, segment):
        self.storage = storage
        self.schema = schema
        self.segment = segment

        if hasattr(self.segment, "uuid"):
            self.uuid_string = str(self.segment.uuid)
        else:
            import uuid
            self.uuid_string = str(uuid.uuid4())

        # Term index
        tf = storage.open_file(segment.termsindex_filename)
        self.termsindex = TermIndexReader(tf)

        # Term postings file, vector index, and vector postings: lazy load
        self.postfile = None
        self.vectorindex = None
        self.vpostfile = None

        # Stored fields file
        sf = storage.open_file(segment.storedfields_filename, mapped=False)
        self.storedfields = StoredFieldReader(sf)

        # Field length file
        self.fieldlengths = None
        if self.schema.has_scorable_fields():
            flf = storage.open_file(segment.fieldlengths_filename)
            self.fieldlengths = LengthReader(flf, segment.doc_count_all())

        # Copy methods from underlying segment
        self.has_deletions = segment.has_deletions
        self.is_deleted = segment.is_deleted
        self.doc_count = segment.doc_count

        # Postings file
        self.postfile = self.storage.open_file(segment.termposts_filename,
                                               mapped=False)

        self.dc = segment.doc_count_all()
        assert self.dc == self.storedfields.length

        self.set_caching_policy()

        self.is_closed = False
        self._sync_lock = Lock()