Python searcher примеры, searchengine.searcher Python примеры использования

Пример #1

0

Показать файл

Файл: Jareth_Moyo.py Проект: DrJayLight/Search_Engine

    def content_based_calculator(self, stringofwords):
        divider = re.compile('\\W*')
        res=[x.lower() for x in divider.split(stringofwords) if x!= '']
        content_scores={}
        paperid_index={}
        content_out={}
        index=1
        for word in res:
            if word in self.wordlocations:
                for papid in self.wordlocations[word]:
                    papid_score=len(self.wordlocations[word][papid])
                    if papid not in content_scores:
                        content_scores[papid]=papid_score
                        paperid_index[papid]=index
                    else:
                        content_scores[papid]=content_scores[papid]*papid_score
                        paperid_index[papid]+=1
            else:
                continue

        for pid in paperid_index:
            if paperid_index[pid]==len(res):
                content_out[pid]=content_scores[pid]

        inst=searchengine.searcher('database')
        content_out=inst.normalizescores(content_out)
        self.contentscore= content_out

Пример #2

0

Показать файл

Файл: generateDataWithDBalready.py Проект: JaminJiang/spiderAndAnalysis

def generateFig(filePath):
    e=searchengine.searcher('searchindex.db')
    frequencies= e.getFrequentWords()
    # take relative word frequencies into account, lower max_font_size
    #wordcloud = WordCloud(max_font_size=40, relative_scaling=.5).generate(text)
    wordcloud = WordCloud(font_path='/home/jamin/Documents/resource/msyh.ttf',background_color="white",stopwords=STOPWORDS.add(u"黄豆"),max_font_size=40, relative_scaling=.25).fit_words(frequencies)
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.savefig(filePath)

Пример #3

0

Показать файл

Файл: flasky.py Проект: takukawasaki/pysearch

def makeindex(key):
    e = s.searcher('searchindex.db')
    result = e.query(key)
    List = []
    size = len(result)
    for i in range(size):
        for j in result[i]:
            List.append(e.geturlname(j))
    return List

Пример #4

0

Показать файл

Файл: run.py Проект: wz125/courses

def pageRank():
  reload(searchengine)
  crawler=searchengine.crawler('searchindex.db')
  e=searchengine.searcher('searchindex.db')
  #crawler.calculatepagerank( )
  cur=crawler.con.execute('select * from pagerank order by score desc')
  for i in range(3): 
   d=cur.next()
   print d,e.geturlname(d[0])

Пример #5

0

Показать файл

Файл: flasky.py Проект: takukawasaki/searchEngine

def makeindex(key):
  e = s.searcher('searchindex.db')
  result = e.query(key)
  List = []
  size = len(result)
  for i in range(size):
    for j in result[i]:
      List.append(e.geturlname(j))
  return List

Пример #6

0

Показать файл

Файл: app.py Проект: shaunswanson/hipsterpics

def queryhandler():
    e = searchengine.searcher()
    q = bottle.request.forms.get("query")
    mywords, myurls = e.query(q)
    s = bottle.request.environ.get('beaker.session')
    s['mywords'] = mywords
    s['myurls'] = myurls
    s.save()
    bottle.redirect('/results')

Пример #7

0

Показать файл

Файл: main.py Проект: tayashigenori/collective_intelligence

def test_calculate_pagerank():
    sys.stderr.write("testing pagerank calculation...\n")
    crawler=searchengine.crawler('searchindex.db')
    crawler.calculatepagerank()
    sys.stderr.write("checking pagerank result...\n")
    cur=crawler.con.execute('select * from pagerank order by score desc')
    for i in range(3): print cur.next()
    sys.stderr.write("checking pagerank top url...\n")
    e=searchengine.searcher('searchindex.db')
    urlid=cur.next()[0]
    print e.geturlname(urlid)

Пример #8

0

Показать файл

Файл: unittestquery.py Проект: ssashita/tobysegaran

 def testQueryIndian(self):
     wordids=[]
     rows=[]
     if self.config.queries == None or len(self.config.queries) <= 0:
         queries=['memory', 'mental', 'mind', 'storage', 'magnetic', 'cache', 'psychological', 'semiconductor', 'transistor', 'random access', 'data storage']
     else:
         queries = self.config.queries
     s=searcher(self.dbname)
     if self.numusers >= 1:
         for q in queries:
             for userid in [x+1 for x in range(self.numusers)]:
                 wordids,rows = s.query(q, userid)

Пример #9

0

Показать файл

Файл: searchui.py Проект: ssashita/tobysegaran

def firesearch():
    outputwidget.delete(1.0,END)
    fillconfig()
    s=searcher(config.dbname)
    q=queryvar.get()
    urllist=[]
    try:
        widlist,urlidlist = s.query(q,config.userid,config.userurlhitscoresweight)
        for urlid in urlidlist:
            url=s.geturlname(urlid)
            urllist.append(url)
        outputwidget.insert(END, '\n'.join(urllist))
    except:
        print "Error:", sys.exc_info()
        tkMessageBox.showerror("Input Error", sys.exc_info())
        raise

Пример #10

0

Показать файл

Файл: searchengine_web.py Проект: Ignorant-Instigator/collectiveintelligence-book

def serve_search(environ, start_response):

  query_words = ''
  results = ''
  if 'QUERY_STRING' in environ:
    query_dict = cgi.parse_qs(environ['QUERY_STRING'])
    if 'q' in query_dict:
      # parse_qs returns a list for values as query parameters can appear
      # several times (e.g. 'q=ddsview&q=makeicns'). Ignore all but the first
      # occurence of q.
      query_words = query_dict['q'][0]
      s = searchengine.searcher('searchindex.db')
      results = '<br>\n'.join(['%f: <a href="%s">%s</a>' % (score, url, url)
        for score, url in s.query(query_words)])
      results = results.encode('utf-8')

  # Note: this also returns html for favicon queries.
  start_response('200 OK',[('Content-type','text/html')])
  return [template % locals()]

Пример #11

0

Показать файл

Файл: Jareth_Moyo.py Проект: DrJayLight/Search_Engine

 def pagerank_calculator(self, iterations=20):
     pageranks={}
     for item in self.citations:
         pageranks.setdefault(item,1.0)
     for i in range(iterations):
         #print 'Iteration' %i
         pr=0.15
         for item in pageranks:
             init_score=0
             for element in self.citations[item]:
                 if element not in pageranks:
                     val=1.0
                 else:
                     val=pageranks[element]
                 linknum=self.citationcounts[element]
                 init_score+=float(val/linknum)
             pageranks[item]=pr+(0.85*init_score)
     inst=searchengine.searcher('database')
     pageranks=inst.normalizescores(pageranks)
     #print pageranks['9402117']
     self.pagerankscore=pageranks

Пример #12

0

Показать файл

Файл: searchengine_web.py Проект: xzjs/collectiveintelligence-book

def serve_search(environ, start_response):

    query_words = ''
    results = ''
    if 'QUERY_STRING' in environ:
        query_dict = cgi.parse_qs(environ['QUERY_STRING'])
        if 'q' in query_dict:
            # parse_qs returns a list for values as query parameters can appear
            # several times (e.g. 'q=ddsview&q=makeicns'). Ignore all but the first
            # occurence of q.
            query_words = query_dict['q'][0]
            s = searchengine.searcher('searchindex.db')
            results = '<br>\n'.join([
                '%f: <a href="%s">%s</a>' % (score, url, url)
                for score, url in s.query(query_words)
            ])
            results = results.encode('utf-8')

    # Note: this also returns html for favicon queries.
    start_response('200 OK', [('Content-type', 'text/html')])
    return [template % locals()]

Пример #13

0

Показать файл

Файл: generateDataWithDBalready.py Проект: JaminJiang/spiderAndAnalysis

def generatePosNegFile(filepath):
    f=open(filepath,'w')
    e=searchengine.searcher('searchindex.db')
    cursor= e.con.execute(
            " select * from urllist where posnegscore is not null order by posnegscore desc limit 3  " )
    for row in cursor:
        f.write(row[0])
        f.write("\t")
        f.write(str(row[1]))
        f.write("\t")
        f.write("pos")
        f.write("\n")
    cursor= e.con.execute(
            " select * from urllist where posnegscore is not null order by posnegscore asc limit 3  " )
    for row in cursor:
        f.write(row[0])
        f.write("\t")
        f.write(str(row[1]))
        f.write("\t")
        f.write("neg")
        f.write("\n")
    f.close()

Пример #14

0

Показать файл

Файл: Test_Chapter4.py Проект: yeahydq/programming-collective-intelligence-code

#coding:utf-8
#!/usr/bin/env python
__author__ = 'dick'

import searchengine

craw = searchengine.crawler('searchindex.db')
# craw.createindextables()
pages = [
    # 'http://www.bbc.com/',
    'https://www.hao123.com/?1477704964',
    # 'https://www.baidu.com',
]

# craw.crawl(pages)

e = searchengine.searcher('searchindex.db')
print e.getmatchrows('hao weather yes')

Пример #15

0

Показать файл

Файл: runquery.py Проект: ssashita/tobysegaran

'''
Created on Feb 16, 2014

@author: ssashita
A query is to be given as 
 python runquery.py 1 functional programming
 {1 is the userid, and the rest are the query words} 
'''

from searchengine import searcher
import sys
import cconfigurator

if __name__ == '__main__':
    config = cconfigurator.configure('crawled.db')
    listargs=[]
    if len(sys.argv) > 2:
        for arg in sys.argv[2:]:
            listargs.append(arg)
        s=searcher('crawled.db')
        s.query(' '.join([str(x) for x in listargs]),sys.argv[1])
    else:
        print("At least 3 args required. Second one is the userid and rest are the query words")

Пример #16

0

Показать файл

Файл: test.py Проект: timothydotchan/search_engine

def query():
    e = searchengine.searcher('searchIndex.db')
    print e.query('functional programming...')

Пример #17

0

Показать файл

Файл: test.py Проект: cloudaice/simple-search-engine

def testquery(q = 'functional programming'):
   search = searchengine.searcher()
   search.query(q)

Пример #18

0

Показать файл

Файл: main.py Проект: digideskio/MLStudy

    print '\n'

if __name__ == '__main__':
    '''
2. Boolean operations. Many search engines support Boolean queries, which allow
users to construct searches like "python OR perl." An OR search can work by
doing the queries separately and combining the results, but what about "python
AND (program OR code)"? Modify the query methods to support some basic
Boolean operations.
3. Exact matches. Search engines often support "exact match" queries, where the
words in the page must match the words in the query in the same order with no
additional words in between. Create a new version of getrows that only returns
results that are exact matches. (Hint: you can use subtraction in SQL to get the
difference between the word locations.) 
    '''
    dbname = 'searchindex.db'
    if True:
        crawler = se.crawler(dbname)
        crawler.createindextables()
        pages = [
            'https://www.zhihu.com/',
            'https://github.com/'
        ]
        crawler.crawl(pages, depth=2)
        crawler.calculatepagerank()
    else:
        searcher = se.searcher(dbname)
        q = 'zhihu career'
        print searcher.query(q)

Пример #19

0

Показать файл

Файл: example.py Проект: hotbaby/programming-collective-intelligence

def test_se_search():
    searcher = se.searcher('crawler.db')
    result = searcher.query('python language blog')
    print result

Пример #20

0

Показать файл

Файл: main.py Проект: tayashigenori/collective_intelligence

def test_query_ranking(weightFunc):
    sys.stderr.write("testing query with weighting function '%s'...\n" % weightFunc)
    e=searchengine.searcher('searchindex.db')
    print e.query('programming',weightFunc)

Пример #21

0

Показать файл

from flask import Flask, render_template, request, redirect
import searchengine, neuralnet, crawler
searcher = searchengine.searcher('searchengine.db')
crawler = crawler.crawler('searchengine.db')
nnet = neuralnet.searchnet('nn.db')


app = Flask(__name__)


@app.route("/")
def search():
	if request.args:
		queryText = request.args.get('q')
		(wordids, scores, urlIdsList, urlsList) = searcher.query(queryText)
		if len(urlIdsList) != 0:
			listOfItems = [{'id': urlIdsList[i], 'url': urlsList[i], 'score': scores[i]} for i in range(len(urlIdsList))]
		else:
			listOfItems = []
		return render_template('index.html', list=listOfItems, q=queryText)
	return render_template('index.html', list=None)


@app.route('/train', methods=['POST', 'GET'])
def train():		
	if request.method == 'POST':
		queryPhrase = request.json['q']
		selectedURLId = int(request.json['clicked'])
		app.logger.debug('queryPhrase: %s => selectedURLId: %s' %(queryPhrase, selectedURLId))
		(wordids, scores, urlIdsList, urlsList) = searcher.query(queryPhrase)
		nnet.trainquery(wordids, urlIdsList, selectedURLId)

Пример #22

0

Показать файл

Файл: test4.py Проект: qingfeng0820/ML

def test_full_match_words():
    s = searcher("output/search.db")
    print s.getfullmatchrows("simple web page")

Пример #23

0

Показать файл

Файл: run.py Проект: wz125/courses

def wordFrequency():
  reload(searchengine)
  e=searchengine.searcher('searchindex.db')
  e.query('sqlite3 python')

Пример #24

0

Показать файл

Файл: views.py Проект: rupesh-5/fuzzycloudsearch

from django.conf.urls.static import static
from django.shortcuts import render
from django.http import HttpResponse, HttpRequest
from django.shortcuts import render_to_response
from django.template import RequestContext
import searchengine
import nn

e = searchengine.searcher('wikipedia.db')
allurls = e.getallurls("functional")

# Create your views here.


def home_view(request):
    return HttpResponse(request.method)


def search_string(request):
    query = request.GET['searchquery']
    data = e.query(query)
    context_dict = {'results': data, 'query': query}
    return render_to_response('results_page.html', context_dict)


def train_nn(request, page_alias, selected_result):
    network = nn.searchnet('nn.db')
    words = e.getwordids(page_alias)
    if (selected_result.endswith("/")):
        selected_result = selected_result[:-1]
    urlid = e.geturlid(selected_result)

Пример #25

0

Показать файл

Файл: test_searchengine.py Проект: duncanmortimer/Working-Through-PCI

 def setUp(self):
     self.s = searchengine.searcher("test.db")

Пример #26

0

Показать файл

Файл: test.py Проект: timothydotchan/search_engine

def query():
	e=searchengine.searcher('searchIndex.db')
	print e.query('functional programming...')

Пример #27

0

Показать файл

Файл: run.py Проект: yiran02/study

import searchengine
pages = ['https://news.google.com.tw/']
crawler = searchengine.crawler('test')
crawler.createindextables()  #create tables

crawler.crawl(pages)

crawler.caculatepagerank()

e = searchengine.searcher('test')
e.query('單場 球季')

Пример #28

0

Показать файл

Файл: run.py Проект: wz125/courses

def contentranking():
  reload(searchengine)
  #mynet=nn.searchnet('nn.db')
  #mynet.maketables()
  e=searchengine.searcher('searchindex.db')
  e.query('sqlite3 python')

Пример #29

0

Показать файл

# -*- coding: utf-8 -*-
from tornado.ioloop import IOLoop
from tornado.web import RequestHandler, Application, url, StaticFileHandler
import os.path
import sys

sys.path.insert(0, os.path.abspath("../collective-intelligence"))

import searchengine as se
searcher = se.searcher("index.db")

foofle_data = {"query" : "",
               "results" : []}

def update_data(query):
    foofle_data["query"] = query
    foofle_data["results"] = searcher.query(query)

class MainHandler(RequestHandler):
    def initialize(self, data):
        self.data = data

    def get(self):
        self.render("index.html", query = self.data["query"], results = self.data["results"])

    def post(self):
        query = self.get_argument("input-query")
        print "La busqueda que se realizara utilizara la cadena '%s' como consulta" % query
        update_data(query)
        self.get()

Пример #30

0

Показать файл

Файл: run.py Проект: wz125/courses

def documentLocation():
  reload(searchengine)
  e=searchengine.searcher('searchindex.db')
  e.query('sqlite3 python')

Пример #31

0

Показать файл

Файл: main.py Проект: tayashigenori/collective_intelligence

def test_getmatchrows():
    sys.stderr.write("testing get match rows...\n")
    e=searchengine.searcher('searchindex.db')
    print e.getmatchrows('programming')

Пример #32

0

Показать файл

Файл: query.py Проект: YauzZ/searchengine

## coding:utf-8 ##
import searchengine

e=searchengine.searcher('searchindex.db')
#print e.getmatchrows('perl python functional')
while 1:
	print "输入查询的单词(en)"
	q=raw_input()
	print e.query(q)

Пример #33

0

Показать файл

Файл: main.py Проект: tayashigenori/collective_intelligence

def test_query():
    sys.stderr.write("testing query...\n")
    e=searchengine.searcher('searchindex.db')
    print e.query('programming')

Пример #34

0

Показать файл

for test in eval_tests:
    node = ast.parse(test)
    print ast.dump(node)
    # MyVisitor().visit(node)
    print '\n'

if __name__ == '__main__':
    '''
2. Boolean operations. Many search engines support Boolean queries, which allow
users to construct searches like "python OR perl." An OR search can work by
doing the queries separately and combining the results, but what about "python
AND (program OR code)"? Modify the query methods to support some basic
Boolean operations.
3. Exact matches. Search engines often support "exact match" queries, where the
words in the page must match the words in the query in the same order with no
additional words in between. Create a new version of getrows that only returns
results that are exact matches. (Hint: you can use subtraction in SQL to get the
difference between the word locations.) 
    '''
    dbname = 'searchindex.db'
    if True:
        crawler = se.crawler(dbname)
        crawler.createindextables()
        pages = ['https://www.zhihu.com/', 'https://github.com/']
        crawler.crawl(pages, depth=2)
        crawler.calculatepagerank()
    else:
        searcher = se.searcher(dbname)
        q = 'zhihu career'
        print searcher.query(q)

Пример #35

0

Показать файл

Файл: search_test.py Проект: HHHHHamburger/MyCodes-1

#!/usr/bin/python
# coding: UTF-8
# Author: David
# Email: [email protected]
# Created: 2016-08-01 14:08
# Last modified: 2016-08-01 15:54
# Filename: search_test.py
# Description:
import searchengine
e = searchengine.searcher()
e.query('form authentication')

Python searcher примеры использования