def __init__(self): smartcn = lucene.SmartChineseAnalyzer(lucene.Version.LUCENE_33) self.analyzers = {"smartcn": smartcn} directory = lucene.SimpleFSDirectory(lucene.File(self.STORE_DIR)) self.searcher = lucene.IndexSearcher(directory, True) self.pgconn = mypass.getConn() self.sw = sinaweibooauth.SinaWeiboOauth()
def main1(): print "retrieve and display files......" direc = lucene.SimpleFSDirectory(lucene.File(INDEX_DIR)) analyzer = lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT) searcher = lucene.IndexSearcher(direc) search(searcher, analyzer) search2(searcher, analyzer)
def getRecentConversations(self, username): #Determine index and data paths index_dir = self.indexdir + username data_dir = self.datadir + username #Load the index if os.path.isdir(index_dir) == True: luc_index = lucene.FSDirectory.getDirectory(index_dir) #Get the current time in UTC seconds curtime = int(time.time()) #Convert to a search range searchstart = self.__padTimestamp(curtime - SECONDS_IN_20_MINUTES) searchend = self.__padTimestamp(MAX_TIMESTAMP) #Build and perform the query qtext = "timestamp:[" + searchstart + " TO " + searchend + "]" searcher = lucene.IndexSearcher(luc_index) qparser = lucene.QueryParser("text", lucene.StandardAnalyzer()) query = qparser.parse(qtext) sortmethod = lucene.Sort(["protocol", "friend_chat", "timestamp"]) qresults = searcher.search(query, sortmethod) #Fetch the results conversationlist = [] for i in range(qresults.length()): mprotocol = qresults.doc(i).get("protocol") mfriend_chat = qresults.doc(i).get("friend_chat") mtimestamp = int(qresults.doc(i).get("timestamp")) mwho_sent = qresults.doc(i).get("who_sent") mfileoffset = int(qresults.doc(i).get("file_offset")) mrank = qresults.score(i) #This is a really bad and slow method that should #be optimized at a later date. #Simply search through all previously retrieved #conversations and check for a match. If match is #found, add it, otherwise create a new conversation. messagetext = self.__getMessageFromFile( username, mfriend_chat, mprotocol, mfileoffset) message = LogMessage(messagetext, mtimestamp, mwho_sent) message.setRank(mrank) found = False for j in range(len(conversationlist)): if conversationlist[j].getProtocol() == mprotocol and \ conversationlist[j].getFriendChat() == mfriend_chat: found = True conversationlist[j].addMessage(message) break if found == False: conversation = LogConversation(mprotocol, mfriend_chat) conversation.addMessage(message) conversationlist.append(conversation) return conversationlist else: #Index does not exist return False
def search(self, restrictions, destination): """ @see: L{NullPrincipalSearcher<datafinder.persistence.search.searcher.NullSearcher>} E1101: Pylint cannot detect the internals of the modules solr and lucene. """ # pylint: disable=E1101 results = list() queryString = search_restriction_mapping.mapSearchRestriction( restrictions) if self._configuration.luceneIndexUri.startswith("file:///"): try: self._configuration.env.attachCurrentThread() indexDir = lucene.SimpleFSDirectory( lucene.File( self._configuration.luceneIndexUri.replace( "file:///", ""))) analyzer = lucene.StandardAnalyzer( lucene.Version.LUCENE_CURRENT) searcher = lucene.IndexSearcher(indexDir) query = lucene.QueryParser(lucene.Version.LUCENE_CURRENT, "content", analyzer).parse(queryString) hits = searcher.search(query, constants.MAX_RESULTS) for hit in hits.scoreDocs: doc = searcher.doc(hit.doc) results.append("/%s" % urllib.unquote( doc.get(constants.FILEPATH_FIELD).encode("utf-8"))) searcher.close() except Exception, error: errorMessage = "Cannot search items. Reason: '%s'" % error raise PersistenceError(errorMessage)
def __init__(self, session, config, parent): IndexStore.__init__(self, session, config, parent) path = self.get_path(session, 'defaultPath') self.analyzer = NullC3Analyzer() self.dir = lucene.FSDirectory.getDirectory(path, False) self.parser = lucene.QueryParser("", lucene.StandardAnalyzer()) self.searcher = lucene.IndexSearcher(self.dir) self.writer = None self.currDoc = None self.currRec = None
def __init__(self, forumname): if not forumname in self.supported_forums: sys.exit() else: self.forum = forumname self.STORE_DIR = self.STORE_BASE_DIR + forumname smartcn = lucene.SmartChineseAnalyzer(lucene.Version.LUCENE_33) self.analyzers = {"smartcn": smartcn} directory = lucene.SimpleFSDirectory(lucene.File(self.STORE_DIR)) self.searcher = lucene.IndexSearcher(directory, True) self.pgconn = mypass.getConn()
def SearchExactContents(self, keyword): "블로그 내용에 대하서 Exactch Matching 수행" searcher = lucene.IndexSearcher(self.store) print("Searching for ", keyword) k = keyword.decode('cp949').encode('utf-8') query = lucene.QueryParser('contents', self.analyzer).parse(k) hits = searcher.search(query) print ("%s matching documents" % hits.length()) return self.__MakeResultFormat(hits, searcher)
def getHitCount(self, fieldName, searchString): reader = lucene.IndexReader.open(self.dir, True) #readOnly = True print '%s total docs in index' % reader.numDocs() reader.close() searcher = lucene.IndexSearcher(self.dir, True) #readOnly = True t = lucene.Term(fieldName, searchString) query = lucene.TermQuery(t) hitCount = len(searcher.search(query, 50).scoreDocs) searcher.close() print "%s total matching documents for %s\n---------------" \ % (hitCount, searchString) return hitCount
def SearchPrefixContents(self, keyword): "블로그 내용에 대하여 Prefix Matching 수행" searcher = lucene.IndexSearcher(self.store) print("Searching for ", keyword) k = keyword.decode('cp949').encode('utf-8') query = lucene.PrefixQuery( lucene.Term("contents", k) ) hits = searcher.search(query) print ("%s matching documents" % hits.length()) return self.__MakeResultFormat(hits, searcher)
def func_pic(command): global vm_env vm_env = lucene.getVMEnv() vm_env.attachCurrentThread() STORE_DIR="graphIndex" directory = lucene.SimpleFSDirectory(lucene.File(STORE_DIR)) searcher = lucene.IndexSearcher(directory, True) analyzer = lucene.SimpleAnalyzer(lucene.Version.LUCENE_CURRENT) title = [] url = [] imgurl = [] score = [] resultInfo, title, url, imgurl, score = run(command, searcher, analyzer) searcher.close() return resultInfo, title, url, imgurl, score
def search(request, template_name='reviews/search.html', local_site_name=None): """ Searches review requests on Review Board based on a query string. """ query = request.GET.get('q', '') siteconfig = SiteConfiguration.objects.get_current() if not siteconfig.get("search_enable"): # FIXME: show something useful raise Http404 if not query: # FIXME: I'm not super thrilled with this return HttpResponseRedirect(reverse("root")) if query.isdigit(): query_review_request = get_object_or_none(ReviewRequest, pk=query) if query_review_request: return HttpResponseRedirect(query_review_request.get_absolute_url()) import lucene lv = [int(x) for x in lucene.VERSION.split('.')] lucene_is_2x = lv[0] == 2 and lv[1] < 9 lucene_is_3x = lv[0] == 3 or (lv[0] == 2 and lv[1] == 9) # We may have already initialized lucene try: lucene.initVM(lucene.CLASSPATH) except ValueError: pass index_file = siteconfig.get("search_index_file") if lucene_is_2x: store = lucene.FSDirectory.getDirectory(index_file, False) elif lucene_is_3x: store = lucene.FSDirectory.open(lucene.File(index_file)) else: assert False try: searcher = lucene.IndexSearcher(store) except lucene.JavaError, e: # FIXME: show a useful error raise e
def SearchKeyword(indexDir, keyword): directory = lucene.FSDirectory.getDirectory(indexDir) searcher = lucene.IndexSearcher(directory) # 인덱스 검색 객체 analyzer = lucene.StandardAnalyzer() print ("Searching for %s" % keyword) keyword = keyword.decode('cp949').encode('utf-8') queryParser = lucene.QueryParser('content', analyzer) # 질의 생성 query = queryParser.parse(keyword) hits = searcher.search(query) # 검색 수행 print ("%s matching documents" % hits.length()) # 결과 갯수 for h in hits: # 결과 출력 doc = lucene.Hit.cast_(h).getDocument() print("Path: %s, name: %s" % (doc.get("path"), doc.get("name"))) searcher.close()
def SearchExactAll(self, keyword): "블로그 내용과 ID에 대해여 Exact Matching 수행" searcher = lucene.IndexSearcher(self.store) print("Searching for ", keyword) k = keyword.decode('cp949').encode('utf-8') tqBloger = lucene.TermQuery(lucene.Term("bloger", k)) tqContents = lucene.TermQuery(lucene.Term("contents", k)) qBoolean = lucene.BooleanQuery() qBoolean.add(tqBloger, lucene.BooleanClause.Occur.SHOULD) qBoolean.add(tqContents, lucene.BooleanClause.Occur.SHOULD) hits = searcher.search(qBoolean) print ("%s matching documents" % hits.length()) return self.__MakeResultFormat(hits, searcher)
def search(input_q, web_data): numberOfHits = 5 collector = lucene.TopScoreDocCollector.create(numberOfHits, True) searcher = lucene.IndexSearcher(directory, True) qp = lucene.QueryParser(lucene.Version.LUCENE_CURRENT, 'word', analyzer) qp.setDefaultOperator(lucene.QueryParser.Operator.OR) query = qp.parse(input_q) searcher.search(query, collector) score_docs = collector.topDocs().scoreDocs count = 0 url_list = [] for my_doc in score_docs: #print my_doc.score doc = searcher.doc(my_doc.doc) # count,'|', doc['page_num'] ,'|',web_data[doc['page_num']] url_list.append('http://' + web_data[doc['page_num']]) count += 1 return url_list
def search(self, query, field="content", limit=None): ''' Searches the index based on the query supplied. ''' directory = lucene.SimpleFSDirectory(lucene.File(self.index_dir)) searcher = lucene.IndexSearcher(directory, True) query = lucene.QueryParser(lucene.Version.LUCENE_CURRENT, field, self.analyser).parse(query) try: #if there's no limit then use a collector to retrieve them all if limit is None: collector = DocumentHitCollector(searcher) scoreDocs = searcher.search(query, collector) results = collector.get_collected_documents() else: scoreDocs = searcher.search(query, limit).scoreDocs results = [] for scoreDoc in scoreDocs: results.append(searcher.doc(scoreDoc.doc)) except lucene.JavaError, e: print e
def search_location(word): print("searching ") vm_env = lucene.getVMEnv() vm_env.attachCurrentThread() searcher = lucene.IndexSearcher(directory1, True) query = lucene.QueryParser(lucene.Version.LUCENE_CURRENT, 'eng', analyzer1).parse(word) #print "查询" results = searcher.search(query, None, 20) score_docs = results.scoreDocs f**k = [] for score_doc in score_docs: doc = searcher.doc(score_doc.doc) p = doc['eng'] f**k.append(p) searcher.close() return f**k
def searchMessages(self, username, querytext): #Determine index and data paths index_dir = self.indexdir + username data_dir = self.datadir + username #Load the index if os.path.isdir(index_dir) == True: luc_index = lucene.FSDirectory.getDirectory(index_dir) #Build and perform the query searcher = lucene.IndexSearcher(luc_index) qparser = lucene.QueryParser("text", lucene.StandardAnalyzer()) query = qparser.parse(querytext) qresults = searcher.search(query) #Fetch the results conversationlist = [] for i in range(qresults.length()): mid = int(qresults.id(i)) mprotocol = qresults.doc(i).get("protocol") mfriend_chat = qresults.doc(i).get("friend_chat") mtimestamp = int(qresults.doc(i).get("timestamp")) mwho_sent = qresults.doc(i).get("who_sent") mfileoffset = int(qresults.doc(i).get("file_offset")) mrank = qresults.score(i) #First check if it exists in one of the previously matched #conversations found = False for j in range(len(conversationlist)): for k in range(len(conversationlist[j].messages)): if conversationlist[j].messages[k].getID() == mid: #Match found, so just update the messages rank conversationlist[j].messages[k].setRank(mrank) found = True #If no match was found, create a new conversation if found == False: #Create a conversation for each result conversation = LogConversation(mprotocol, mfriend_chat) messagetext = self.__getMessageFromFile( username, mfriend_chat, mprotocol, mfileoffset) before_msgs = self.__getSurroundingMessages( "before", searcher, username, mprotocol, mfriend_chat, mtimestamp, mid) for j in range(len(before_msgs)): conversation.addMessage(before_msgs[j]) message = LogMessage(messagetext, mtimestamp, mwho_sent) message.setRank(mrank) message.setID(mid) conversation.addMessage(message) after_msgs = self.__getSurroundingMessages( "after", searcher, username, mprotocol, mfriend_chat, mtimestamp, mid) for j in range(len(after_msgs)): conversation.addMessage(after_msgs[j]) conversationlist.append(conversation) #End of fetching each result return conversationlist else: #Index not found return False
def __init__(self, dir_file_path): lucene.initVM() self.directory = lucene.SimpleFSDirectory(lucene.File(dir_file_path)) self.analyzer = lucene.StandardAnalyzer(lucene.Version.LUCENE_30) self.search = lucene.IndexSearcher(self.directory)