Python QueryParser.setDefaultOperator示例，lucene.QueryParser.setDefaultOperator Python示例

示例#1

0

显示文件

文件： LiveIndex.py 项目： swalter2/knowledgeLexicalisation

    def searchForDbpediaURI(self, uri):
        """
        Returns all sentences, which are tagged with the given DBpedia URI
        """
        print "in searchForDbpediaURI" 
        uri_old = uri
        uri = uri.replace("http://dbpedia.org/ontology/","")
        uri = uri.replace("http://dbpedia.org/property/","")
        uri = uri.replace("http://dbpedia.org/resource/","")

        array = re.findall(r'[\w\s]+',uri)
        uri = ""
        for item in array:
            uri+=item
        
        try:
            qp = QueryParser(Version.LUCENE_35, "URI", analyzer)
            qp.setDefaultOperator(qp.Operator.AND)
            query = qp.parse(uri)
            print "query: "+str(query)
            MAX = 500000
            result = []
            hits = searcher.search(query, MAX)
            for hit in hits.scoreDocs:
                doc = searcher.doc(hit.doc)
                dbpedia_uri = doc["URI"]
                if dbpedia_uri == uri_old:
                    result.append([IndexUtils.sentence_wrapper(doc["Sentence"]), doc["X"], doc["Y"],dbpedia_uri])
            return result
        except:
            print("Fail in uri: "+uri)
            print "Unexpected error:", sys.exc_info()[0]
            return result

示例#2

0

显示文件

文件： LiveIndex.py 项目： swalter2/knowledgeLexicalisation

    def searchXYPair(self,x,y):
        """
        Returns all sentences, which are tagged with the given two entities (x,y)
        """
        tmp_hm = {}
        if x == "" or y == "":
            return []
        try:
            array = re.findall(r'[\w\s]+',x)
            x = ""
            for item in array:
                x+=item
            qp = QueryParser(Version.LUCENE_35, "X", analyzer)
            qp.setDefaultOperator(qp.Operator.AND)
            query = qp.parse(x)
            MAX = 100000
            result_list = []
            hits = searcher.search(query, MAX)
            for hit in hits.scoreDocs:
                doc = searcher.doc(hit.doc)
                y_entry = doc["Y"]
                if y_entry == y:
                    tmp_hm[doc["Sentence"]]=""
                    
            for key in tmp_hm:
                result_list.append(IndexUtils.sentence_wrapper(key))
            tmp_hm = {}
            return result_list
        except:
            print("Fail (search XYPair) in x:"+x+" y:"+y)
            print "Unexpected error:", sys.exc_info()[0]
            print

            
        return []

示例#3

0

显示文件

文件： TaggedIndex.py 项目： swalter2/knowledgeLexicalisation

    def searchKey(self, key , rank = None):
        query = ""
        try:
            MAX = 100000
            qp = QueryParser(Version.LUCENE_35, "key", analyzer)
            qp.setDefaultOperator(qp.Operator.AND)
            query = qp.parse(key)
#             print ("query",query)
                        
            hits = searcher.search(query, MAX)

            sentence_list = []
            for hit in hits.scoreDocs:
                doc = searcher.doc(hit.doc)
                try:
                    sentence_list.append(eval(doc.get("sentence").encode("utf-8")))
                except:
                    print doc.get("sentence")
            return sentence_list
        except:
            print("Fail in receiving sentence with term "+key)
            print ("query",query)
            print "Unexpected error:", sys.exc_info()[0]
#            raw_input("wait")
            print
            return []

示例#4

0

显示文件

文件： AnchorIndex.py 项目： swalter2/knowledgeLexicalisation

    def searchForDbpediaURI(self, uri):
        """
        Returns all anchor texts, which are related to the given DBpedia URI.
        Also returns for each anchor text the corresponding URI and the number of how often the anchor appears on the english Wikipedia
        """
        uri_old = uri
        uri = uri.replace("http://dbpedia.org/resource/","")

        array = re.findall(r'[\w\s]+',uri)
        uri = ""
        for item in array:
            uri+=item
        
        try:
            qp = QueryParser(Version.LUCENE_35, "dbpedia_uri", analyzer)
            qp.setDefaultOperator(qp.Operator.AND)
            query = qp.parse(uri)
            MAX = 10000
            result = []
            hits = searcher.search(query, MAX)
            for hit in hits.scoreDocs:
                doc = searcher.doc(hit.doc)
                dbpedia_uri = doc["dbpedia_uri"].encode("utf-8")
                if dbpedia_uri == uri_old:
                    result.append([doc["anchor"].encode("utf-8"), doc["anchor_uri"].encode("utf-8"), dbpedia_uri, doc["number"].encode("utf-8")])
            return result
        except:
            print("searchForDbpediaURI - Fail in uri: "+uri)
            return []

示例#5

0

显示文件

文件： LiveIndex.py 项目： swalter2/knowledgeLexicalisation

 def does_line_existNew(self,line,x,y):
     """
     Checks, if parsed sentence already exists in index
     """
     query = ""
     try:
         array = re.findall(r'[\w]+',line)
         string = ""
         for item in array:
             string+=item+" "
         qp = QueryParser(Version.LUCENE_35, "Sentence", analyzer)
         qp.setDefaultOperator(qp.Operator.AND)
         query = qp.parse(string)
         
         MAX = 10
         hits = searcher.search(query, MAX)
         if len(hits.scoreDocs)>0:
             return True
         else:
             return False
     except Exception:
         s_tmp =  str(sys.exc_info())
         if "too many boolean clauses" in s_tmp:
             print "too many boolean clauses"
             """
             Returns true, so that the sentence is not added each time, to avoid further error messages.
             Only occours with very large sentences.
             """
             return True
         else:
             print "Unexpected error:", sys.exc_info()[0]
             print "in does line exist"
             print s_tmp
     return False

示例#6

0

显示文件

文件： TaggedIndex.py 项目： swalter2/knowledgeLexicalisation

    def searchString(self, string):
        'searches for a string and returns an array of POS-tagged sentences'
        query = ""
        #print("Input String: ",string)
        try:
            MAX = 100000
            #for dates such as 1931.08.06
            string = string.replace("."," ")
            
            array = re.findall(r'[\w\s]+',string)
            string = ""
            for item in array:
                string+=item
            #print("Input String2: ",string)
            qp = QueryParser(Version.LUCENE_35, "sentence", analyzer)
            qp.setDefaultOperator(qp.Operator.AND)
            query = qp.parse(string)
            #print ("query",query)
                        
            hits = searcher.search(query, MAX)
            #print len(hits)
            sentence_list = []
            for hit in hits.scoreDocs:
                doc = searcher.doc(hit.doc)
                #print doc.get("sentence")
                sentence_list.append(eval(doc.get("sentence").encode("utf-8")))
            return sentence_list
        except:
            print("Fail in receiving sentence with term "+string+" in search term")
            print ("query",query)
            print "Unexpected error:", sys.exc_info()[0]
#            raw_input("wait")
            print
            return []

示例#7

0

显示文件

文件： query.py 项目： KasperBrandt/UvA-AIR

    def query(indexName, queryString):

        indSearcher = IndexSearcher(SimpleFSDirectory(File(indexName)))
        qp = QueryParser(Version.LUCENE_CURRENT, "content", StandardAnalyzer(Version.LUCENE_CURRENT))
        qp.setDefaultOperator(qp.Operator.AND)
         
        query = qp.parse(queryString.replace("-","_"))
                
        aux = indSearcher.search(query, 100)
        results = aux.scoreDocs
        hits = aux.totalHits
        
        ir = indSearcher.getIndexReader()

        #results = collector.topDocs()
        i = 0

        res = []
    
        for r in results:        
            doc = ir.document(i)
            res.insert(i, doc.get('id'))
            i+=1
            
        return res

示例#8

0

显示文件

文件： Index.py 项目： swalter2/knowledgeLexicalisation

    def search(self, string ,special = None):
        query = ""
        try:
            MAX = 100000
            #for dates such as 1931.08.06
            string = string.replace("."," ")
            
            array = re.findall(r'[\w\s]+',string)
            string = ""
            for item in array:
                string+=item
            qp = QueryParser(Version.LUCENE_35, "title", analyzer)
            qp.setDefaultOperator(qp.Operator.AND)
            query = qp.parse(string)
#            print ("query",query)
                        
            hits = searcher.search(query, MAX)
            
            sentence_list = []
            for hit in hits.scoreDocs:
                doc = searcher.doc(hit.doc)
                sentence_list.append(doc.get("title").encode("utf-8"))
            return sentence_list
        except:
            print("Fail in receiving sentence with term "+string)
            print ("query",query)
            print "Unexpected error:", sys.exc_info()[0]
#            raw_input("wait")
            print
            return []

示例#9

0

显示文件

文件： utils.py 项目： asorici/review-search-engine

def getResultScoreDocs(query):
    # create analyzer
    analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
    
    # create parser for user submitted query
    parser = QueryParser(Version.LUCENE_CURRENT, "title", analyzer)
    parser.setDefaultOperator(QueryParser.Operator.AND)
    formatted_query = parser.parse(query)
    scoreDocs = searcher.search(formatted_query, 50).scoreDocs
    
    return scoreDocs

示例#10

0

显示文件

def getResultScoreDocs(query):
    # create analyzer
    analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)

    # create parser for user submitted query
    parser = QueryParser(Version.LUCENE_CURRENT, "title", analyzer)
    parser.setDefaultOperator(QueryParser.Operator.AND)
    formatted_query = parser.parse(query)
    scoreDocs = searcher.search(formatted_query, 50).scoreDocs

    return scoreDocs

示例#11

0

显示文件

文件： LiveIndex.py 项目： swalter2/knowledgeLexicalisation

 def does_line_exist(self,line,x,y):
     """
     Old, more complex function if a sentence already exists in the index.
     Not used in the moment
     """
     return self.does_line_existNew(line, x, y)
     try:
         array = re.findall(r'[\w\s]+',x)
         x = ""
         for item in array:
             x+=item
         qp = QueryParser(Version.LUCENE_35, "X", analyzer)
         qp.setDefaultOperator(qp.Operator.AND)
         query = qp.parse(x)
                     
         MAX = 100000
         hits = searcher.search(query, MAX)
         #First check, if an x already exists
         for hit in hits.scoreDocs:
             doc = searcher.doc(hit.doc)
             y_entry = doc["Y"]
             if y_entry == y:
                 print "y found"
                 print
                 try:
                     array = re.findall(r'[\w\s]+',line)
                     string = ""
                     for item in array:
                         string+=item
                     qp = QueryParser(Version.LUCENE_35, "Sentence", analyzer)
                     qp.setDefaultOperator(qp.Operator.AND)
                     query = qp.parse(string)
                     MAX = 10
                     hits = searcher.search(query, MAX)
                     if len(hits.scoreDocs)>0:
                         return True
                 except Exception:
                     s_tmp =  str(sys.exc_info())
                     if "too many boolean clauses" in s_tmp:
                         print "too many boolean clauses"
                         return True
                     else:
                         print "Unexpected error:", sys.exc_info()[0]
                         print "in does line exist"
                         print s_tmp
         print 'nothing found'
         return False
     except:
         print("Fail (does line exists) in x:"+x+" y:"+y)
         print "Unexpected error:", sys.exc_info()[0]
         print

示例#12

0

显示文件

文件： SearchFiles.py 项目： azakus/PhoneSearch

def run(command):
    if command == '':
        return None
    STORE_DIR = "index"
    initVM(CLASSPATH)
    directory = FSDirectory.getDirectory(STORE_DIR, False)
    searcher = IndexSearcher(directory)
    analyzer = StandardAnalyzer()
    
    parser = QueryParser("contents", analyzer)
    parser.setDefaultOperator(QueryParser.Operator.AND)
    parser.setFuzzyMinSim(0.2)
    query = parser.parse(command)
    hits = map(transform, searcher.search(query))
    searcher.close()
    return hits

示例#13

0

显示文件

文件： AnchorIndex.py 项目： swalter2/knowledgeLexicalisation

    def searchForDbpediaURImax(self, uri, number):
        """
        Returns maximal the number of anchor texts, which are related to the given DBpedia URI.
        Also returns for each anchor text the corresponding URI and the number of how often the anchor appears on the English Wikipedia
        """
        uri_old = uri
        uri = uri.replace("http://dbpedia.org/resource/","")

        array = re.findall(r'[\w\s]+',uri)
        uri = ""
        for item in array:
            uri+=item
        
        try:
            qp = QueryParser(Version.LUCENE_35, "dbpedia_uri", analyzer)
            qp.setDefaultOperator(qp.Operator.AND)
            query = qp.parse(uri)
            MAX = 10000
            result = []
            hits = searcher.search(query, MAX)
            for hit in hits.scoreDocs:
                doc = searcher.doc(hit.doc)
                dbpedia_uri = doc["dbpedia_uri"].encode("utf-8")
                if dbpedia_uri == uri_old:
                    result.append([doc["anchor"].encode("utf-8"), doc["anchor_uri"].encode("utf-8"), dbpedia_uri, int(doc["number"].encode("utf-8"))])
            result = sorted(result, key = itemgetter(3), reverse=True)
            if len(result) > number:
                return result[0:number]
        
            else:
                return result
            
            return result
        except:
            print("searchForDbpediaURImax - Fail in uri: "+uri)
            print "Unexpected error:", sys.exc_info()[0]
#            raise
            print
            return []

示例#14

0

显示文件

文件： lucene_index_dir.py 项目： clintpgeorge/ediscovery

def test_search(index_dir):
    '''
    The test function to test the created index 
    '''
    store = SimpleFSDirectory(File(index_dir))
   
    searcher = IndexSearcher(store, True)
    parser = QueryParser(Version.LUCENE_CURRENT, "keywords", STD_ANALYZER)
    parser.setDefaultOperator(QueryParser.Operator.AND)
    query = parser.parse('email_subject:Training')
    start = datetime.datetime.now()
    scoreDocs = searcher.search(query, 50).scoreDocs
    duration = datetime.datetime.now() - start
    
    print "Found %d document(s) (in %s) that matched query '%s':" %(len(scoreDocs), duration, query)
    
    
    for scoreDoc in scoreDocs:
        doc = searcher.doc(scoreDoc.doc)
        print scoreDoc.score
        table = dict((field.name(), field.stringValue())
                     for field in doc.getFields())
        print table

示例#15

0

显示文件

文件： SearchFiles.py 项目： azakus/PhoneSearch

def similar(command, docno):
    STORE_DIR = "index"
    initVM(CLASSPATH)
    directory = FSDirectory.getDirectory(STORE_DIR, False)
    searcher = IndexSearcher(directory)
    analyzer = StandardAnalyzer()
    
    parser = QueryParser("contents", analyzer)
    parser.setDefaultOperator(QueryParser.Operator.AND)
    parser.setFuzzyMinSim(0.2)
    query = parser.parse(command)
    hits = searcher.search(query)
    document = hits.id(docno)

    ir = IndexReader.open(STORE_DIR)
    mlt = MoreLikeThis(ir)
    mlt.setFieldNames(['name', 'contents'])
    mlt.setMinWordLen(2)
    mlt.setBoost(True)
    query = mlt.like(document)
    hits = map(transform, searcher.search(query))
    searcher.close()
    return hits

示例#16

0

显示文件

文件： searcher.py 项目： fay/wt

 def search(self, query,category_id=None):
     SHOULD = BooleanClause.Occur.SHOULD
     #MultiFieldQueryParser.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
     parser1 = QueryParser('summary',self.analyzer)
     parser2 = QueryParser('title',self.analyzer)        
     parser1.setDefaultOperator(QueryParser.AND_OPERATOR)
     parser2.setDefaultOperator(QueryParser.AND_OPERATOR)
     q1 = parser1.parse(query)
     q2 = parser2.parse(query)
     boolQuery = BooleanQuery()
     boolQuery.add(q1,SHOULD)
     boolQuery.add(q2,SHOULD)
     
     #camp = CategoryComparatorSource(query)
     #sortfield = SortField("link", camp)
     #sort = Sort(sortfield)
     if category_id:
         self.catfilter.query = query
         self.catfilter.category_id = category_id
         hits = self.searcher.search(boolQuery,self.catfilter)
     else:
         hits = self.searcher.search(boolQuery)
     return hits

示例#17

0

显示文件

文件： LiveIndex.py 项目： swalter2/knowledgeLexicalisation

    def searchForXY(self, uri):
        print "in searchForDbpediaURI" 
        uri_old = uri
        uri = uri.replace("http://dbpedia.org/ontology/","")
        uri = uri.replace("http://dbpedia.org/property/","")
        uri = uri.replace("http://dbpedia.org/resource/","")

        array = re.findall(r'[\w\s]+',uri)
        uri = ""
        for item in array:
            uri+=item
        hm = {}
        try:
            qp = QueryParser(Version.LUCENE_35, "URI", analyzer)
            qp.setDefaultOperator(qp.Operator.AND)
            query = qp.parse(uri)
            print "query: "+str(query)
            MAX = 500000
            result = []
            hits = searcher.search(query, MAX)
            #print len(hits)
            for hit in hits.scoreDocs:
                doc = searcher.doc(hit.doc)
                dbpedia_uri = doc["URI"].encode("utf-8")
                if dbpedia_uri == uri_old:
                    x = doc["X"]
                    y = doc["Y"]
                    term = x+" "+y
                    if hm.has_key(term):
                        pass
                    else:
                        hm[term] = ""
                        result.append([x,y])
            return result
        except:
            print("Fail in uri: "+uri)
            return []

示例#18

0

显示文件

文件： SearchFiles.py 项目： christineyen/location-extraction

def run(searcher, parser):
    while True:
        print
        print "Hit enter with no input to quit."
        command = raw_input("Query: ")
        if command == '':
            return

        print "Searching for:", command
        query = parser.parse(command)
        hits = searcher.search(query, Sort("population", True))
        print "%s total matching documents." % hits.length()

        for hit in hits:
            doc = Hit.cast_(hit).getDocument()
            print 'name:', doc.get("name"), ' state:', doc.get("state")


if __name__ == '__main__':
    STORE_DIR = "index"
    initVM(CLASSPATH)
    print 'lucene', VERSION
    directory = FSDirectory.getDirectory(STORE_DIR)
    searcher = IndexSearcher(directory)
    analyzer = StopAnalyzer()
    parser = QueryParser("all_names", analyzer)
    parser.setDefaultOperator(parser.AND_OPERATOR)
    run(searcher, parser)
    searcher.close()

示例#19

0

显示文件

文件： mansearch.py 项目： lauromoraes/pylucene

        format = a
    elif o == "--index":
        indexDir = a
    elif o == "--stats":
        stats = True


class CustomTemplate(Template):
    delimiter = '#'

template = CustomTemplate(format)

fsDir = SimpleFSDirectory(File(indexDir))
searcher = IndexSearcher(fsDir, True)

analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
parser = QueryParser(Version.LUCENE_CURRENT, "keywords", analyzer)
parser.setDefaultOperator(QueryParser.Operator.AND)
query = parser.parse(' '.join(args))
start = datetime.now()
scoreDocs = searcher.search(query, 50).scoreDocs
duration = datetime.now() - start
if stats:
    print >>sys.stderr, "Found %d document(s) (in %s) that matched query '%s':" %(len(scoreDocs), duration, query)

for scoreDoc in scoreDocs:
    doc = searcher.doc(scoreDoc.doc)
    table = dict((field.name(), field.stringValue())
                 for field in doc.getFields())
    print template.substitute(table)

示例#20

0

显示文件

文件： searcher.py 项目： fay/wt

 def search_by_field(self,query,field='summary'):
     parser = QueryParser(field,self.analyzer)
     parser.setDefaultOperator(QueryParser.AND_OPERATOR)
     q = parser.parse(query)
     return self.searcher.search(q)

示例#21

0

显示文件

文件： mansearch.py 项目： pombredanne/python-lucenepp

    if o == "--format":
        format = a
    elif o == "--index":
        indexDir = a
    elif o == "--stats":
        stats = True


class CustomTemplate(Template):
    delimiter = '#'

template = CustomTemplate(format)

fsDir = SimpleFSDirectory(indexDir)
searcher = IndexSearcher(fsDir, True)

parser = QueryParser(Version.LUCENE_CURRENT, "keywords", StandardAnalyzer(Version.LUCENE_CURRENT))
parser.setDefaultOperator(QueryParser.Operator.AND)
query = parser.parse(' '.join(args))
start = datetime.now()
scoreDocs = searcher.search(query, 50).scoreDocs
duration = datetime.now() - start
if stats:
    print >>sys.stderr, "Found %d document(s) (in %s) that matched query '%s':" %(len(scoreDocs), duration, query)

for scoreDoc in scoreDocs:
    doc = searcher.doc(scoreDoc.doc)
    table = dict((field.name(), field.stringValue())
                 for field in doc.getFields())
    print template.substitute(table)