def __init__(self, stored=False, field_boost=1.0): """ :param stored: Whether the value of this field is stored with the document. """ self.stored = stored self.field_boost = field_boost self.format = formats.Existence(field_boost=field_boost)
def __init__(self, type=int, stored=False, unique=False, field_boost=1.0, decimal_places=0, shift_step=4, signed=True): """ :param type: the type of numbers that can be stored in this field: one of ``int``, ``long``, ``float``, or ``Decimal``. :param stored: Whether the value of this field is stored with the document. :param unique: Whether the value of this field is unique per-document. :param decimal_places: specifies the number of decimal places to save when storing Decimal instances as ``int`` or ``float``. :param shift_steps: The number of bits of precision to shift away at each tiered indexing level. Values should generally be 1-8. Lower values yield faster searches but take up more space. A value of `0` means no tiered indexing. :param signed: Whether the numbers stored in this field may be negative. """ self.type = type if self.type is long_type: # This will catch the Python 3 int type self._to_text = long_to_text self._from_text = text_to_long self.sortable_typecode = "q" if signed else "Q" elif self.type is int: self._to_text = int_to_text self._from_text = text_to_int self.sortable_typecode = "i" if signed else "I" elif self.type is float: self._to_text = float_to_text self._from_text = text_to_float self.sortable_typecode = "f" elif self.type is Decimal: raise TypeError("To store Decimal instances, set type to int or " "float and use the decimal_places argument") else: raise TypeError("%s field type can't store %r" % (self.__class__, self.type)) self.stored = stored self.unique = unique self.decimal_places = decimal_places self.shift_step = shift_step self.signed = signed self.analyzer = IDAnalyzer() self.format = formats.Existence(field_boost=field_boost)
def __init__(self, stored=False, unique=False, field_boost=1.0, spelling=False, sortable=False, analyzer=None): """ :param stored: Whether the value of this field is stored with the document. """ self.analyzer = analyzer or analysis.IDAnalyzer() self.format = formats.Existence(field_boost=field_boost) self.stored = stored self.unique = unique self.spelling = spelling self.set_sortable(sortable)
def __init__(self, stored=False, unique=False, expression=None, field_boost=1.0, spelling=False): """ :param stored: Whether the value of this field is stored with the document. :param unique: Whether the value of this field is unique per-document. :param expression: The regular expression object to use to extract tokens. The default expression breaks tokens on CRs, LFs, tabs, spaces, commas, and semicolons. """ expression = expression or re.compile(r"[^\r\n\t ,;]+") self.analyzer = analysis.RegexAnalyzer(expression=expression) self.format = formats.Existence(field_boost=field_boost) self.stored = stored self.unique = unique self.spelling = spelling
def __init__(self, stored=False, unique=False, separator=None, field_boost=1.0, spelling=False): """ :param stored: Whether the value of this field is stored with the document. :param unique: Whether the value of this field is unique per-document. :param expression: The regular expression object to use to extract tokens. The default expression breaks tokens on CRs, LFs, tabs, spaces, commas, and semicolons. """ super(SPLITTEDIDLIST, self).__init__(stored, unique, separator, field_boost, spelling) self.analyzer = SplitAnalyzer(separator="\t") self.format = formats.Existence(field_boost=field_boost) self.stored = stored self.unique = unique self.spelling = spelling
from __future__ import with_statement import os.path, random, string import sqlite3 as sqlite from whoosh import fields, formats, index, query, sorting from whoosh.util import now tagcount = 100 doccount = 500000 dirname = "testindex" schema = fields.Schema( tags=fields.KEYWORD(stored=True, vector=formats.Existence())) if not os.path.exists(dirname): os.mkdir(dirname) reindex = False if reindex or not index.exists_in(dirname): tags = [] for _ in xrange(tagcount): tag = u"".join( random.choice(string.ascii_lowercase) for _ in xrange(5)) tags.append(tag) ix = index.create_in(dirname, schema) t = now() with ix.writer() as w: for i in xrange(doccount): doc = u" ".join(random.sample(tags, random.randint(10, 20))) w.add_document(tags=doc)
def __init__(self, numtype=int, bits=32, stored=False, unique=False, field_boost=1.0, decimal_places=0, shift_step=4, signed=True, sortable=False, default=None): """ :param numtype: the type of numbers that can be stored in this field, either ``int``, ``float``. If you use ``Decimal``, use the ``decimal_places`` argument to control how many decimal places the field will store. :param stored: Whether the value of this field is stored with the document. :param unique: Whether the value of this field is unique per-document. :param decimal_places: specifies the number of decimal places to save when storing Decimal instances. If you set this, you will always get Decimal instances back from the field. :param shift_steps: The number of bits of precision to shift away at each tiered indexing level. Values should generally be 1-8. Lower values yield faster searches but take up more space. A value of `0` means no tiered indexing. :param signed: Whether the numbers stored in this field may be negative. """ # Allow users to specify strings instead of Python types in case # docstring isn't clear if numtype == "int": numtype = int if numtype == "float": numtype = float # Raise an error if the user tries to use a type other than int or # float if numtype is Decimal: raise TypeError("To store Decimal instances, set type to int use " "the decimal_places argument") elif numtype not in (int, float): raise TypeError("Can't use %r as a type, use int or float" % numtype) # Sanity check if numtype is float and decimal_places: raise Exception("A float type and decimal_places argument %r are " "incompatible" % decimal_places) # Set up field configuration based on type and size if numtype is float: bits = 64 # Floats are converted to 64 bit ints intsizes = [8, 16, 32, 64] intcodes = ["B", "H", "I", "Q"] if bits not in intsizes: raise Exception("Invalid bits %r, use 8, 16, 32, or 64" % bits) # Type code for the *sortable* representation self.sortable_typecode = intcodes[intsizes.index(bits)] self._struct = struct.Struct(">" + self.sortable_typecode) self.numtype = numtype self.bits = bits self.stored = stored self.unique = unique self.decimal_places = decimal_places self.shift_step = shift_step self.signed = signed self.analyzer = analysis.IDAnalyzer() self.format = formats.Existence(field_boost=field_boost) # Column configuration if default is None: if numtype is int: default = typecode_max[self.sortable_typecode] else: default = NaN elif not self.is_valid(default): raise Exception("The default %r is not a valid number for this " "field" % default) self.default = default self.set_sortable(sortable)