class TASearcher():
    def __init__(self, queries=[], criteria=[], conjunctions=[], orderby=["ta"], ascending=True, limit=10000):
        vm.attachCurrentThread()

        self.queries = [query for query in queries if len(query.strip()) > 0]
        self.criteria = criteria
        self.conjunctions = conjunctions
        self.orderby = orderby
        self.ascending = ascending
        self.queryString = ""
        self.limit = limit

        self.fields = fields
        self.analyzer = PorterStemmerAnalyzer()
        self.queryParser = QueryParser(Version.LUCENE_30, "freetext", self.analyzer)
        self.queryParser.setAllowLeadingWildcard(True)
        self.queryParser.setDefaultOperator(QueryParser.Operator.AND)
        indexDir = settings.LUCENE_INDEX_DIRECTORY
        self.index = MMapDirectory(File(indexDir))

    def createQueryString(self):
        # Simple
        if len(self.criteria) == 0:
            self.queryString = "(%s) OR freetext-normalized:(%s)" % (self.queries[0], self.queries[0])
        # Advanced
        else:
            queryPairs = []
            criteriaQueries = zip(self.criteria, self.queries)
            self.criteria = dict(criteriaQueries).keys()
            for criterion, query in criteriaQueries:
                if criterion in ("volume", "number", "category-label", "pubtype", "author-sort"):
                    queryPairs.append("%s:%s" % (criterion, query))
                elif criterion == "year":
                    queryPairs.append("year-start:%s OR year-end:%s" % (query, query))
                else:
                    queryPairs.append('%s:%s OR %s-normalized:%s' % (criterion, query, criterion, query))
            # queryPairs = ["%s:%s"%(criterion,query.replace(" ", "+")) for criterion, query in zip(criteria, queries)]
            try:
                queryString = "%s %s" % (queryPairs[0], " ".join(
                    ["%s (%s)" % (conj, pair) for conj, pair in zip(self.conjunctions, queryPairs[1:])]))
                self.queryString = queryString
                return queryString
            except:
                self.queryString = "freetext"
                return self.queryString

    def getQueryString(self):
        return self.queryString

    def _getHits(self):
        reader = IndexReader.open(self.index)
        searcher = IndexSearcher(reader)

        # Sortierung nach Band- und Eintragsnummer (4: Wert als Integer behandeln)
        sortDict = {
            "ta": (("volume", SortField.Type.INT), ("number", SortField.Type.INT)),
            "year": (("year-start", SortField.Type.INT), ("year-end", SortField.Type.INT)),
            "author-title": (("author-sort", SortField.Type.STRING), ("title-sort", SortField.Type.STRING)),
            "title": (("title-sort", Locale.GERMAN),),
            "author": (("author-sort", Locale.GERMAN),),
        }

        sortFields = []

        reverse = not self.ascending

        for name in self.orderby:
            for fieldName, typeNum in sortDict.get(name, []):
                sortFields.append(SortField(fieldName, typeNum, reverse))

        if len(sortFields) == 0:
            sortFields = [SortField("volume", SortField.Type.INT), SortField("number", SortField.Type.INT)]

        sort = Sort(sortFields)

        topDocs = searcher.search(self.query, None, 80000, sort)
        hits = topDocs.scoreDocs
        self.hits = hits
        self.searcher = searcher

        lang = translation.get_language()
        if lang != "de":
            lang = "en"

        facets = {"author": {}, "pubtype": {}, "category-%s" % lang: {}}

        # Highlighting
        highlighter = Highlighter(SimpleHTMLFormatter('<span class="highlight">', '</span>'), QueryScorer(self.query))

        hitObjects = []
        fields = {}
        for hit in hits:
            doc = searcher.doc(hit.doc)
            # print unicode(doc)
            fields["score"] = hit.score
            fields["volume"] = doc["volume"]
            fields["number"] = doc["number"]
            fields["id"] = doc["id"]
            fields["title"] = doc["title"]
            fields["author"] = doc["author"]
            fields["authors"] = [field.stringValue() for field in doc.getFields("author")]
            for author in fields["authors"]:  # XXX
                facets["author"][author] = facets["author"].get(author, 0) + 1  # XXX

            fields["categories"] = [field.stringValue() for field in doc.getFields("category-%s" % lang)]
            for cat in fields["categories"]:
                facets["category-%s" % lang][cat] = facets["category-%s" % lang].get(cat, 0) + 1
            maxNumFragmentsRequired = 2
            fragmentSeparator = "...";
            pubtype = doc["pubtype"]
            fields["pubtype"] = pubtype
            facets["pubtype"][pubtype] = facets["pubtype"].get(pubtype, 0) + 1
            fields["city"] = doc["city"]
            fields["year"] = doc["year-start"]
            if fields["year"] and doc["year-end"] and doc["year-end"] != fields["year"]:
                fields["year"] += " - " + doc["year-end"]
            highlightFields = ("title", "author", "city", "year", "category")

            if "freetext" in self.criteria:
                for fieldName in highlightFields:
                    try:
                        tokenStream = self.analyzer.tokenStream(fieldName, lucene.StringReader(fields[fieldName]))
                        newVal = highlighter.getBestFragments(tokenStream, fields[fieldName], maxNumFragmentsRequired,
                                                              fragmentSeparator)
                        if len(newVal) > 0:
                            # fields[fieldName] = re.sub(r'</span>\s*<span class="highlight">', ' ', newVal)
                            fields[fieldName] = newVal
                    except:
                        continue

            for fieldName in highlightFields:
                if fieldName in self.criteria or fieldName + "-de" in self.criteria or fieldName + "-en" in self.criteria:
                    try:
                        tokenStream = self.analyzer.tokenStream(fieldName, lucene.StringReader(fields[fieldName]))
                        newVal = highlighter.getBestFragments(tokenStream, fields[fieldName], maxNumFragmentsRequired,
                                                              fragmentSeparator)
                        if len(newVal) > 0:
                            # fields[fieldName] = re.sub(r'</span>\s*<span class="highlight">', ' ', newVal)
                            fields[fieldName] = newVal
                    except:
                        continue
            """if "author" in self.criteria:
                try:
                    tokenStream = self.analyzer.tokenStream("author", lucene.StringReader(fields["author"]))
                    fields["author"] = highlighter.getBestFragments(tokenStream, fields["author"], maxNumFragmentsRequired, fragmentSeparator)
                except:
                        pass"""

            hitObjects.append(
                Hit(fields["id"], fields["volume"], fields["number"], fields["title"], fields["author"], fields["city"],
                    fields["year"], fields["categories"], fields["pubtype"], fields["score"]))

        facetsToDelete = []
        for facet in facets:
            if len(facets[facet]) < 2:
                facetsToDelete.append(facet)
                continue
            values = sorted(facets[facet].items(), key=itemgetter(0))
            values = sorted(values, key=itemgetter(1), reverse=True)
            facets[facet] = values[:25]
        for facet in facetsToDelete:
            del facets[facet]
        self.facets = facets
        reader.close()
        self.hitObjects = hitObjects
        return hitObjects

    def search(self):
        self.createQueryString()
        querystr = self.getQueryString()
        self.query = self.queryParser.parse(querystr)
        return self._getHits()

    def getAll(self):
        self.query = MatchAllDocsQuery()
        return self._getHits()