Пример #1
0
    def __init__(self, storage, schema, segment):
        self.storage = storage
        self.schema = schema
        self.segment = segment

        if hasattr(self.segment, "uuid"):
            self.uuid_string = str(self.segment.uuid)
        else:
            import uuid
            self.uuid_string = str(uuid.uuid4())

        # Term index
        tf = storage.open_file(segment.termsindex_filename)
        self.termsindex = TermIndexReader(tf)

        # Term vector index, and vector postings: lazy load
        self.vectorindex = None
        self.vpostfile = None

        # Stored fields file
        sf = storage.open_file(segment.storedfields_filename, mapped=False)
        self.storedfields = StoredFieldReader(sf)

        # Field length file
        self.fieldlengths = None
        if self.schema.has_scorable_fields():
            flf = storage.open_file(segment.fieldlengths_filename)
            self.fieldlengths = LengthReader(flf, segment.doc_count_all())

        # Copy info from underlying segment
        self._has_deletions = segment.has_deletions()
        self._doc_count = segment.doc_count()

        # Postings file
        self.postfile = self.storage.open_file(segment.termposts_filename,
                                               mapped=False)

        # Dawg file
        self.dawg = None
        if any(field.spelling for field in self.schema):
            fname = segment.dawg_filename
            if self.storage.file_exists(fname):
                dawgfile = self.storage.open_file(fname, mapped=False)
                self.dawg = DiskNode.load(dawgfile, expand=False)

        self.dc = segment.doc_count_all()
        assert self.dc == self.storedfields.length

        self.set_caching_policy()

        self.is_closed = False
        self._sync_lock = Lock()
Пример #2
0
    def __init__(self, storage, schema, segment):
        self.storage = storage
        self.schema = schema
        self.segment = segment

        if hasattr(self.segment, "uuid"):
            self.uuid_string = str(self.segment.uuid)
        else:
            import uuid
            self.uuid_string = str(uuid.uuid4())

        # Term index
        tf = storage.open_file(segment.termsindex_filename)
        self.termsindex = TermIndexReader(tf)

        # Term vector index, and vector postings: lazy load
        self.vectorindex = None
        self.vpostfile = None

        # Stored fields file
        sf = storage.open_file(segment.storedfields_filename, mapped=False)
        self.storedfields = StoredFieldReader(sf)

        # Field length file
        self.fieldlengths = None
        if self.schema.has_scorable_fields():
            flf = storage.open_file(segment.fieldlengths_filename)
            self.fieldlengths = LengthReader(flf, segment.doc_count_all())

        # Copy info from underlying segment
        self._has_deletions = segment.has_deletions()
        self._doc_count = segment.doc_count()

        # Postings file
        self.postfile = self.storage.open_file(segment.termposts_filename,
                                               mapped=False)

        # Dawg file
        self.dawg = None
        if any(field.spelling for field in self.schema):
            fname = segment.dawg_filename
            if self.storage.file_exists(fname):
                dawgfile = self.storage.open_file(fname, mapped=False)
                self.dawg = DiskNode.load(dawgfile, expand=False)

        self.dc = segment.doc_count_all()
        assert self.dc == self.storedfields.length

        self.set_caching_policy()

        self.is_closed = False
        self._sync_lock = Lock()
Пример #3
0
 def word_graph(self, segment):
     dawgfile = segment.open_file(self.storage, self.DAWG_EXT, mapped=False)
     return DiskNode.load(dawgfile, expand=False)
Пример #4
0
 def word_graph(self, segment):
     dawgfile = segment.open_file(self.storage, self.DAWG_EXT, mapped=False)
     return DiskNode.load(dawgfile, expand=False)