Exemplo n.º 1
0
	def SearchExactAll(self, keyword):
		"블로그 내용과 ID에 대해여 Exact Matching 수행"
		searcher = lucene.IndexSearcher(self.store)

		print("Searching for ", keyword)
		k = keyword.decode('cp949').encode('utf-8')

		tqBloger = lucene.TermQuery(lucene.Term("bloger", k))
		tqContents = lucene.TermQuery(lucene.Term("contents", k))

		qBoolean = lucene.BooleanQuery()
		qBoolean.add(tqBloger, lucene.BooleanClause.Occur.SHOULD)
		qBoolean.add(tqContents, lucene.BooleanClause.Occur.SHOULD)

		hits = searcher.search(qBoolean)
		print ("%s matching documents" % hits.length())

		return self.__MakeResultFormat(hits, searcher)
Exemplo n.º 2
0
	def SearchPrefixContents(self, keyword):
		"블로그 내용에 대하여 Prefix Matching 수행"
		searcher = lucene.IndexSearcher(self.store)

		print("Searching for ", keyword)
		
		k = keyword.decode('cp949').encode('utf-8')
		query = lucene.PrefixQuery( lucene.Term("contents", k) )

		hits = searcher.search(query)
		print ("%s matching documents" % hits.length())	

		return self.__MakeResultFormat(hits, searcher)
Exemplo n.º 3
0
 def getHitCount(self, fieldName, searchString):
     reader = lucene.IndexReader.open(self.dir, True)  #readOnly = True
     print '%s total docs in index' % reader.numDocs()
     reader.close()
     
     searcher = lucene.IndexSearcher(self.dir, True) #readOnly = True
     t = lucene.Term(fieldName, searchString)
     query = lucene.TermQuery(t)
     hitCount = len(searcher.search(query, 50).scoreDocs)
     searcher.close()
     print "%s total matching documents for %s\n---------------" \
           % (hitCount, searchString)
     return hitCount
Exemplo n.º 4
0
def get_word_list(text, is_list=False, field_name = 'fieldname'):
    if is_list:
        new_text = ""
        for i in text:
            new_text += i + "\n"
        text = new_text

    lucene.initVM(lucene.CLASSPATH)
    analyzer = lucene.KoreanAnalyzer();

    #directory = lucene.FSDirectory.open("/tmp/testindex");
    directory = lucene.RAMDirectory()

    # writer
    writer = lucene.IndexWriter(directory, analyzer)
    doc = lucene.Document()

    doc.add(lucene.Field(field_name, text, lucene.Field.Store.YES, lucene.Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();

    # get all terms from all index
    ireader = lucene.IndexReader.open(directory, False)
    term = lucene.Term(field_name, '')
    termenum = ireader.terms(term)
    term = termenum.term()
    i = 0

    word_list = []

    while term and term.field() == field_name:
        i += 1
        termDocs = ireader.termDocs(term)
        termDocs.next()
        #print "[%04d]===> <%s> " % (i, term.text())
        #print term.text() + " : " + str(termDocs.freq())
        word_list.append({'text': term.text(), 'freq': termDocs.freq()})
        term = termenum.next() and termenum.term()

    ireader.close();
    directory.close();

    return word_list
Exemplo n.º 5
0
    def handle_noargs(self, **options):
        siteconfig = SiteConfiguration.objects.get_current()

        # Refuse to do anything if they haven't turned on search.
        if not siteconfig.get("search_enable"):
            sys.stderr.write('Search is currently disabled. It must be '
                             'enabled in the Review Board administration '
                             'settings to run this command.\n')
            sys.exit(1)

        if not have_lucene:
            sys.stderr.write('PyLucene is required to build the search index.\n')
            sys.exit(1)

        incremental = options.get('incremental', True)

        store_dir = siteconfig.get("search_index_file")
        if not os.path.exists(store_dir):
            os.mkdir(store_dir)
        timestamp_file = os.path.join(store_dir, 'timestamp')

        timestamp = 0
        if incremental:
            try:
                f = open(timestamp_file, 'r')
                timestamp = datetime.utcfromtimestamp(int(f.read()))
                f.close()
            except IOError:
                incremental = False

        f = open(timestamp_file, 'w')
        f.write('%d' % time.time())
        f.close()

        if lucene_is_2x:
            store = lucene.FSDirectory.getDirectory(store_dir, False)
            writer = lucene.IndexWriter(store, False,
                                        lucene.StandardAnalyzer(),
                                        not incremental)
        elif lucene_is_3x:
            store = lucene.FSDirectory.open(lucene.File(store_dir))
            writer = lucene.IndexWriter(store,
                lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT),
                not incremental,
                lucene.IndexWriter.MaxFieldLength.LIMITED)
        else:
            assert False

        status = Q(status='P') | Q(status='S')
        objects = ReviewRequest.objects.filter(status)
        if incremental:
            query = Q(last_updated__gt=timestamp)
            # FIXME: re-index based on reviews once reviews are indexed.  I
            # tried ORing this in, but it doesn't seem to work.
            #        Q(review__timestamp__gt=timestamp)
            objects = objects.filter(query)

        if sys.stdout.isatty():
            print 'Creating Review Request Index'
        totalobjs = objects.count()
        i = 0
        prev_pct = -1

        for request in objects:
            try:
                # Remove the old documents from the index
                if incremental:
                    writer.deleteDocuments(lucene.Term('id', str(request.id)))

                self.index_review_request(writer, request)

                if sys.stdout.isatty():
                    i += 1
                    pct = (i * 100 / totalobjs)
                    if pct != prev_pct:
                        sys.stdout.write("  [%s%%]\r" % pct)
                        sys.stdout.flush()
                        prev_pct = pct

            except Exception, e:
                sys.stderr.write('Error indexing ReviewRequest #%d: %s\n' % \
                                 (request.id, e))
Exemplo n.º 6
0
 def testDelete(self, fieldName, searchString):
     reader = lucene.IndexReader.open(self.dir, False)  #readOnly = False
     reader.deleteDocuments(lucene.Term(fieldName, searchString))
     reader.close()