Python GoogleSearch示例，xgoogle.search.GoogleSearch Python示例

示例#1

0

显示文件

文件： aa.py 项目： ammathew/Letters

def getUrls2( page_num ):
   gs = GoogleSearch('shareholder letter')
   gs.results_per_page = 50
   gs.page = page_num
   results = gs.get_results()
   for item in results:
      print item.url.encode("utf8")

示例#2

0

显示文件

文件： functs.py 项目： jony123/PyMail

def google(data):  #In this fuction we will do the phrasing of the subject line ourselfs.
   print "Called google"
   tore = ''
   search_string = ''

   if data[1] == 'search':
      for i in range(2,len(data)):
         search_string = search_string + data[i] + ' '
      try:
         
         tore = "Here are the first 25 results from google when \'"+search_string+"\' is queried\n\n"
         gs = GoogleSearch(search_string)
         gs.results_per_page = 25
         results = gs.get_results()
         
         for res in results:
            #print res.title.encode('utf8')
            tore = tore+res.title.encode('utf8')+"\n"
            #print res.desc.encode('utf8')
            tore = tore+res.desc.encode('utf8')+"\n"
            #print res.url.encode('utf8')
            tore = tore+res.url.encode('utf8')+"\n\n--------------------------------------\n"
            print
      except:
         print "Search failed: %s" % e
         tore = "Search failed: %s" % e

   return tore

示例#3

0

显示文件

文件： plecost-0.2.2-9-beta.py 项目： richardsonlima/Raspberry-Pwn

    def getGoogleResults(self, pluginname, latest, cve):
        try:
            gs = GoogleSearch("inurl:'wp-content/plugins/" + pluginname + "'",
                              random_agent=True)
            gs.results_per_page = 100

            numberOfprocessed = 0
            self.all_run = []

            for i in range(int(limitForSearch)):
                results = gs.get_results()
                if not results:
                    break

        # Semaphore for write in order to screen
                self.checkSimultaneus = threading.Semaphore(int(NumThreats))
                # Semaphore for write to file
                self.writeFile = threading.Semaphore(int(NumThreats) - 1)

                for res in results:
                    self.checkSimultaneus.acquire()
                    host_name = urlparse(res.url.encode()).hostname
                    # Create thread
                    t = threading.Thread(target=self.__getGoogleResults,
                                         args=(host_name, latest, pluginname,
                                               cve))
                    self.all_run.append(t)
                    # run thread
                    self.all_run[len(self.all_run) - 1].start()

        except SearchError, e:
            print "Search failed: %s" % e

示例#4

0

显示文件

文件： plecost-0.2.2-9-beta.py 项目： 1EDTHEMAN1/raspberry_pwn

    def getGoogleResults(self,pluginname,latest,cve):
        try:
		gs = GoogleSearch("inurl:'wp-content/plugins/" + pluginname + "'", random_agent=True)
        	gs.results_per_page = 100
                        

		numberOfprocessed = 0	
                self.all_run = []
                
       		for i in range(int(limitForSearch)):
        		results = gs.get_results()
            		if not results:
            		        break

                        # Semaphore for write in order to screen
                        self.checkSimultaneus = threading.Semaphore(int(NumThreats))            
                        # Semaphore for write to file
                        self.writeFile = threading.Semaphore(int(NumThreats)-1)

			for res in results:
			        self.checkSimultaneus.acquire()
				host_name = urlparse(res.url.encode()).hostname
                                # Create thread
                                t = threading.Thread(target=self.__getGoogleResults, args=(host_name,latest,pluginname,cve))
                                self.all_run.append(t)
                                # run thread
                                self.all_run[len(self.all_run)-1].start()
				


				
	except SearchError, e:
  		print "Search failed: %s" % e

示例#5

0

显示文件

文件： alumnisearch.py 项目： light94/Ultimate-Search

	def google(self,text):
		
		try:
			print "Trying to search for "+text
		
			g1 = GoogleSearch(text)
		
		
			g1.results_per_page = 25
		
			results = g1.get_results()
			
			if len(results)==0:
				print "No search result!!"
			else:
				print "Results FOund!!"
				print type(results)
				print len(results)
				for res in results[:2]:
					time.sleep(1)
					url = res.url.encode("utf8")
					response = self.search(url)
					if response == "Kgpian":
						self.close()
						break
		except SearchError, e:
			print "Failed Once"

示例#6

0

显示文件

文件： alumnisearch.py 项目： light94/Ultimate-Search

    def google(self, text):

        try:
            print "Trying to search for " + text

            g1 = GoogleSearch(text)

            g1.results_per_page = 25

            results = g1.get_results()

            if len(results) == 0:
                print "No search result!!"
            else:
                print "Results FOund!!"
                print type(results)
                print len(results)
                for res in results[:2]:
                    time.sleep(1)
                    url = res.url.encode("utf8")
                    response = self.search(url)
                    if response == "Kgpian":
                        self.close()
                        break
        except SearchError, e:
            print "Failed Once"

示例#7

0

显示文件

文件： urldigger.py 项目： momolas/urldigger

def google(termtosearch, action):
	
	#action = spam or phis
	try:
		gs = GoogleSearch(termtosearch)
		gs.results_per_page = 100
		results = []

		while True:
			tmp = gs.get_results()
			if not tmp:
				break
			results.extend(tmp)

			#TODO switch in this code block
			if action == 'mal':
				for res in results:
					checkAgainstGoogle(res.url.encode('utf8'))
			else:

					if action == 'spam':
						for res in results:
							print '\033[1;34mLooking for SPAM in ......%s\033[1;m' % (res.url.encode('utf8'))
							spam_detect(res.url.encode('utf8'))
					elif action == 'phis':
						for res in results:
							print '\033[1;34mLooking for PHISHING in ......%s\033[1;m' % (res.url.encode('utf8'))
							phishing_detect(res.url.encode('utf8'))
							
					else:
							for res in results:
								print res.url.encode('utf8')

	except SearchError, e:
		print "Search failed: %s" % e

示例#8

0

显示文件

文件： researcher.py 项目： adamstein/researcher.py

def getGooogleResults(query, exclude):
    try:
        print "Searching for {0} but excluding these {1}".format(query, exclude)
        page = 1
        gs = GoogleSearch(query)
        gs.results_per_page = 100
        results = gs.get_results()

示例#9

0

显示文件

 def __init__(self, config):
     self.config = config
     self.gs = GoogleSearch(self.config["p_query"], page=self.config["p_skippages"], random_agent=True)
     self.gs.results_per_page = self.config["p_results_per_query"];
     self.cooldown = self.config["p_googlesleep"];
     if (self.config["p_skippages"] > 0):
         print "Google Scanner will skip the first %d pages..."%(self.config["p_skippages"])

示例#10

0

显示文件

文件： google_collect.py 项目： file-citas/pyhtoncrawler

	def collect(self):
		gs = GoogleSearch("site:"+self.target)
		while True:
			results = gs.get_results()
			for res in results:
				self.urls.append(res.url)
			if len(results)<10:
				break

示例#11

0

显示文件

文件： main.py 项目： amrjsingh/Google-EmailScraper

    def go(self, query, pages):
        search = GoogleSearch(query)
        search.results_per_page = 10

        for i in range(pages):
            search.page = i
            results = search.get_results()
            for page in results:
                self.scrape(page)

示例#12

0

显示文件

文件： main.py 项目： mhenes/Google-EmailScraper

    def go(self, query, pages):
        search = GoogleSearch(query)
        search.results_per_page = 10

        for i in range(pages):
            search.page = i
            results = search.get_results()
            for page in results:
                self.scrape(page)

示例#13

0

显示文件

文件： grabber.py 项目： ATOM49/django-voip

def update_from_web( model, film, year ):
  search = "kinopoisk.ru " + year + " " + film
  print "Search: %s" % search  
  browser=Browser(debug=True)
  gs = GoogleSearch(search)
  gs.results_per_page = 1
  results = gs.get_results()
  try:
    for res in results:
      pageurl = res.url.encode('utf8')
      page = browser.get_page( pageurl )
      soup = BeautifulStoneSoup( page[ page.find("<html"):], convertEntities=BeautifulStoneSoup.HTML_ENTITIES, fromEncoding="windows-1251" )
      print "URL: %s" % pageurl
      rating = soup.find('a',attrs={'class':'continue'})
      if rating:
	  r = strip(rating).split(' ')
          try:
              model.rating = float( r[1] )
              print "Rating: %s" % r[1] 
          except Exception, ex:
              model.rating = 0.0
              print "Can't parse rating"
 
      title = soup.find('h1','moviename-big')
      if title:
          print "Title: %s" % strip(title)
          model.title = strip(title)

      info = soup.find('span','_reachbanner_')
      if info:
          print "Info: %s" % strip(info)
          model.description = strip( info )

      img = soup.find('img', attrs={"width" : "120"})
      if img:
          print "Image: %s" % img['src']
	  model.image = "http://www.kinopoisk.ru%s" % img['src']
   
#getTrailer("t26538","397494/kinopoisk.ru-District-9-36971.mp4","397494/1_36971.jpg","480","270","tr","","");

      import re
      m = re.search("getTrailer\((.*)\)",str(soup))
      if not m:
          pass
      else:
          parts = m.group(1).split('"')
          url = "http://tr.kinopoisk.ru/%s" % parts[3]
	  model.trailer = url
	  image = "http://tr.kinopoisk.ru/%s" % parts[5]
          model.trailer_image = image
          print "Trailer: %s" % url
          print "TrailerImage: %s" % image
     
      break
  
  except Exception,e:
      print "WARNING: %s" % e

示例#14

0

显示文件

文件： keywordTrackingPosition.py 项目： xbox/seoSuite

 def startSearch(self, domain="", target_keywords=[]):
     gs = GoogleSearch(target_keyword)
     gs.results_per_page = 100
     results = gs.get_results()
     for idx, res in enumerate(results):
         parsed = urlparse(res.url)
         domain = self.__makeUrl(parsed.netloc)
         if domain == target_domain:
             print "Ranking position %d for keyword %s on domain %s" % (idx + 1, target_keyword, target_domain)

示例#15

0

显示文件

文件： google_manip.py 项目： ARDivekar/Reportr

def google_search_results(search_query,
                          wait=40,
                          number_of_results=10,
                          encode=True,
                          max_fail_count=5,
                          current_fail_count=1,
                          random_text=None):
    ''' DO NOT MESS WITH THIS IT IS PERFECT FOR NOW'''
    # gets AT LEAST number_of_results results
    # don't query too fast or Google will block your IP temporarily
    # for this purpose, I have added the variable max_result_size
    # if your IP does get blocked, try later in the day or wait a day or two

    try:
        max_result_size = 10  #don't change it from this: the standard of 10 seems the least suspicious to google
        gs = GoogleSearch(search_query,
                          random_agent=True)  # does not actually search
        gs.results_per_page = max_result_size

        gs.page = 0
        times_tried = 0
        results = []
        prev = 0
        # print "getting results:"
        while len(results) < number_of_results:
            prev = len(results)
            times_tried += 1
            time.sleep(random.uniform(0.5 * wait, 1.5 * wait))
            results += gs.get_results(
            )  # Actual search and extraction of results.
            print "\rtimes_tried: %s\tlen(results): %s\tpage_number: %s" % (
                times_tried, len(results), gs.page),
        print "\n"

        # We now have a list of SearchResult objects, called 'results'.
        # A SearchResult object has three attributes -- "title", "desc", and "url".
        # They are Unicode strings, so do a proper encoding before outputting them. (done below)
        if encode:
            for i in range(0, len(results)):
                results[i].title = results[i].title.encode("utf8", "ignore")
                results[i].desc = results[i].desc.encode("utf8", "ignore")
                results[i].url = results[i].url
        # random.shuffle(results)

    except SearchError, e:
        print "Google Try #%s: Search failed on this url:\t%s" % (
            current_fail_count, e)
        google_search_redirect(random_text)
        if current_fail_count != max_fail_count:
            return google_search_results(
                search_query,
                wait=wait,
                number_of_results=wait,
                encode=encode,
                max_fail_count=max_fail_count,
                current_fail_count=current_fail_count + 1)

示例#16

0

显示文件

文件： Locator.py 项目： JhetoX/CardingLocator

def searchPage(textToSearch, page):
    items = []
    gs = GoogleSearch(textToSearch)
    gs.results_per_page = 100
    gs.page = page
    results = gs.get_results()
    for res in results:
        url = res.url.encode('utf8')
        items.append(url);
    return items

示例#17

0

显示文件

文件： AuthLeech.py 项目： Yoshi-/AuthLeech

def get_results(query):
    gs = GoogleSearch(query);
    gs.results_per_page = 9001;
    results = gs.get_results();
    ret = [];
    for idx, res in enumerate(results):
        domain = mk_nice_domain(res.url);
        domain = domain.replace("pastebin.com/", "pastebin.com/raw.php?i=");
        print 'Found codes at %s' % domain;
        ret.append(domain);
    return ret;

示例#18

0

显示文件

文件： server.py 项目： yaroschak/monkeylearn-seo-demo

def search_google(term, domain):
    try:
        log.debug('Performing Google search for "{}"'.format(term))
        gs = GoogleSearch(term, tld=domain)
        gs.results_per_page = 10
        results = gs.get_results()
        log.debug('Got {} results'.format(len(results)))
        return [Url(res.url) for res in results[:10]]
    except SearchError as exc:
        log.exception(exc)
        return None

示例#19

0

显示文件

文件： avoiding_msg_ynet.py 项目： zehavitc/HamagidBot

    def get(self, params=None):
        """
        gets the answer from the answer template
        :param params: msg = params[0], func = params[1]
        :return:
        returns the first template if is_random is false, otherwise returns random template
        """
        ynet_sections = [
            u"חדשות", u"כלכלה", u"ספורט", u"תרבות", u"רכילות", u"דיגיטל",
            u"בריאות", u"יהדות", u"חופש", u"רכב", u"אוכל", u"צרכנות", u"יחסים",
            u"mynet", u"מדע", u"לימודים", u"קניות", u"קהילות",
            u"חדשות תוכן ועדכונים"
        ]
        msg = ('ynet.co.il:' + params[0]).encode('utf-8')
        try:
            b = Browser()
            gs = GoogleSearch(msg, lang='he', tld="co.il")
            gs.results_per_page = 50
            results = gs.get_results()
            for res in results:
                try:
                    if (res.url is not None):
                        page = b.get_page(res.url)
                        soup = BeautifulSoup(page)
                        title = soup.find("title")
                        if (title is not None):
                            if (' &quot;' in title.text
                                    and '&quot; ' in title.text):
                                return self.find_between(
                                    title.text, ' &quot;', '&quot; ')
                            res = title.text.split('-')[0].replace(
                                'ynet', '').strip().strip('&quot;')
                            if ':' in res:
                                res = res.split(':')[1].strip().strip('&quot;')
                            res = res.strip()
                            if res == u'' or res in ynet_sections: continue
                            else: return res
                except:
                    continue
            return "?"
        except SearchError, e:
            return "?"


#a = avoiding_msg_ynet(None,None)
# a.get(["ynet.co.il:האם טביב ימכור את הקבוצה?"])
# res = a.get(["ynet.co.il:האם ביבי ימכור את המדינה?"])
#Sa.get(["ynet.co.il:מה יהיה עם הגז?"])
#a.get(["seret.co.il:המרגלת"])

#a = avoiding_msg_ynet()
#a.test_browser()
# a.get(["האם אלי טביב ימכור את הקבוצה?"])
#a.get(["ynet.co.il:איזה גרוע ביבי הא?"])

示例#20

0

显示文件

def get_number_of_results(term, ajax=False, verbose=True):
    if not ajax:
        gs = GoogleSearch(term)
        page = str(gs._get_results_page())
        match = reg.search(page)
        if match:
            if verbose: print(term, match.groups()[0])
            return int(match.groups()[0].replace(',',''))
        else:
            raw_input((term, page))
    return int(search(term)['responseData']['cursor']['estimatedResultCount'])

示例#21

0

显示文件

文件： hackmd5.py 项目： assalw/PythonScripts

def search( md5hash ):
	urls = []

	gs = GoogleSearch( md5hash )
	gs.results_per_page = 100
	results = gs.get_results()

	for res in results:
		urls.append( res.url.encode('utf8')  )

	return urls

示例#22

0

显示文件

文件： crow.py 项目： Rafe/Crow

def main():
	#the hardcoded search query:
	gs = GoogleSearch("computer")
	gs.result_per_page=10
	results = gs.get_results()

	for r in results:
		Crow(r.url).select("a").to(SqlitePipeline()).async_start(50)

	Crow.run()
	f.close()

示例#23

0

显示文件

文件： server.py 项目： monkeylearn/monkeylearn-seo-demo

def search_google(term, domain):
    try:
        log.debug('Performing Google search for "{}"'.format(term))
        gs = GoogleSearch(term, tld=domain)
        gs.results_per_page = 10
        results = gs.get_results()
        log.debug('Got {} results'.format(len(results)))
        return [Url(res.url) for res in results[:10]]
    except SearchError as exc:
        log.exception(exc)
        return None

示例#24

0

显示文件

文件： seretil.py 项目： rafaelschechter/xbmc-israel

def searchInSeretil():
    search_entered = ''
    keyboard = xbmc.Keyboard(search_entered, 'הכנס מילות חיפוש כאן')
    keyboard.doModal()
    if keyboard.isConfirmed():
        search_entered = keyboard.getText()

    if search_entered != '':
        try:
            gs = GoogleSearch("site:seretil.me " + search_entered)
            gs.results_per_page = 100
            results = gs.get_results()
            for res in results:
                title = res.title.encode('utf8')
                url = res.url.encode('utf8')
                title = title.replace('SERETIL.ME', '')
                title = title.replace('לצפייה ישירה', '')
                title = title.replace('וסדרות', '')
                title = title.replace('תרגום מובנה', '')
                title = title.replace('|', '')
                title = title.replace('.', '')
                title = title.replace('סרטים', '')
                title = title.replace('עם', '')
                title = title.replace('לצפיה', '')

                if 'עונה' in title:
                    if not 'page' in url and not 'tag' in url and not '?s' in url and not 'search' in url:
                        addDir(title, url, 211, '')
                else:
                    if not 'page' in url and not 'tag' in url and not '?s' in url and not 'search' in url:
                        image = ''
                        req = urllib2.Request(url)
                        req.add_header(
                            'User-Agent',
                            ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'
                        )
                        response = urllib2.urlopen(req)
                        link3 = response.read()
                        response.close()

                        block = re.compile(
                            '<div class="post-wrap post-wrap-single">(.*?)linkwithin_hook',
                            re.M + re.I + re.S).findall(link3)
                        image = ''
                        images = re.compile('src="http(.*?).?jpg').findall(
                            block[0])
                        if images:
                            image = 'http' + images[0] + '.jpg'
                        addDir(title, url, 5, image)

        except SearchError, e:
            print "Search failed: %s" % e
        xbmcplugin.setContent(int(sys.argv[1]), 'tvshows')

示例#25

0

显示文件

 def goggle(self, word):
     """Get results from google """
     try:
         results = []
         gs = GoogleSearch(word, random_agent=True)
         gs.results_per_page = 50
         hits = gs.get_results()
         for hit in hits:
             results.append(hit.url.encode('utf8'))
         return results
     except SearchError, e:
         print "Search failed: %s" % e

示例#26

0

显示文件

文件： google_url_scrapper.py 项目： skillsapphire/backlink_checker

 def scrape(self, keyword, pages=2):
     try:
         gs = GoogleSearch(keyword)
         gs.results_per_page = 10
         gs.page = 0
         results = gs.get_results()
         for res in results:
             url = res.url.encode('utf8')
             Title = res.title
             self.urls.append((url, Title))
     except SearchError, e:
         print "Search failed: %s" % e

示例#27

0

显示文件

文件： google_url_scrapper.py 项目： javaongsan/backlink_checker

 def scrape(self, keyword, pages=2):
     try:
         gs = GoogleSearch(keyword)
         gs.results_per_page = 10
         gs.page = 0
         results = gs.get_results()
         for res in results:
             url = res.url.encode('utf8')
             Title = res.title
             self.urls.append((url, Title))
     except SearchError, e:
       print "Search failed: %s" % e

示例#28

0

显示文件

文件： movie_renamer.py 项目： s6d/mv_renamer

def search_by_filename(args):
    args_e = args.encode('utf8')
    try:
        gs = GoogleSearch('"' + args_e + '"')
        gs.results_per_page = 50
        results = gs.get_results()
        for res in results:
            if re_math_sites(allow_sites, res.url.encode('utf8')):
                if re_math_sites(args_e, res.desc.encode('utf8')):
                    return clean_result(res.title.encode('utf8'))

    except SearchError, e:
        print "Search failed: %s" % e

示例#29

0

显示文件

文件： GSPyLib.py 项目： LawDataProject/Social_Media_Mining

def get_hits(term):
	#data = fetch_data("http://api.thriftdb.com/api.hnsearch.com/items/_search?q=" + term)
	#if data[0] is not None:
	#	if loads(data[0])['hits'] > 0:  #loads() dumps a json file which is what the hnsearch api returns
	#		return loads(data[0])['hits']
#		else:
#			return 0.000001
#	else:
#		return data[1]
    gs = GoogleSearch(key)
    gs.results_per_page = 100
    results = gs.get_results()
    return results

示例#30

0

显示文件

文件： movie_renamer.py 项目： s6d/mv_renamer

def search_by_filename(args):
	args_e=args.encode('utf8')
	try:
		gs = GoogleSearch('"' + args_e + '"')
		gs.results_per_page = 50
		results = gs.get_results()
		for res in results:
			if re_math_sites(allow_sites,res.url.encode('utf8')):
				if re_math_sites(args_e,res.desc.encode('utf8')):
					return clean_result(res.title.encode('utf8'))

	except SearchError, e:
		print "Search failed: %s" % e

示例#31

0

显示文件

文件： seretil.py 项目： AMIR27/xbmc-israel

def searchInSeretil():
        search_entered =''
        keyboard = xbmc.Keyboard(search_entered, 'הכנס מילות חיפוש כאן')
        keyboard.doModal()
        if keyboard.isConfirmed():
                    search_entered = keyboard.getText()

        if search_entered !='' :
                try:
                  gs = GoogleSearch("site:seretil.me "+ search_entered) 
                  gs.results_per_page = 100
                  results = gs.get_results()
                  for res in results:
                    title=res.title.encode('utf8')
                    url= res.url.encode('utf8')
                    title=title.replace('SERETIL.ME','')
                    title=title.replace('לצפייה ישירה','')
                    title=title.replace('וסדרות','')
                    title=title.replace('תרגום מובנה','')
                    title=title.replace('|','')
                    title=title.replace('.','')
                    title=title.replace('סרטים','')
                    title=title.replace('עם','')
                    title=title.replace('לצפיה','')
                    
                    
                    
                    
                    if 'עונה' in title   :
                                        if not 'page' in url  and not 'tag' in url  and not '?s' in url and not 'search' in url :
                                                addDir(title,url,211,'')
                    else:        
                                    if not 'page' in url  and not 'tag' in url  and not '?s' in url and not 'search' in url:
                                        image=''
                                        req = urllib2.Request(url)
                                        req.add_header('User-Agent', ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')
                                        response = urllib2.urlopen(req)
                                        link3=response.read()
                                        response.close()  
                                        
                                        block= re.compile('<div class="post-wrap post-wrap-single">(.*?)linkwithin_hook',re.M+re.I+re.S).findall(link3)
                                        image=''
                                        images= re.compile('src="http(.*?).?jpg').findall(block[0])
                                        if images:
                                                image='http'+images[0]+'.jpg'
                                        addDir(title,url,5,image)
                
                                    
                except SearchError, e:
                  print "Search failed: %s" % e
                xbmcplugin.setContent(int(sys.argv[1]), 'tvshows')

示例#32

0

显示文件

 def Search_YTonGoogle(self,search):
     # import Google Search
     from xgoogle.search import GoogleSearch
     # search on google
     gs = GoogleSearch(search+' site:http://www.youtube.com ')
     gs.results_per_page = 25
     gs.page = 0
     # return result or None
     try:
         results = gs.get_results()
         return results
     except Exception, e:
         print 'getTrailer --> Error: %s' % e
         return None

示例#33

0

显示文件

文件： google.py 项目： pioneerwxf/markpro

 def run(self):
     try:
         gs = GoogleSearch(self.key)
         while not self.gotcha:
             results = gs.get_results()
             for res in results:
                 self.rank += 1
                 if res.url == self.url:
                     self.gotcha = True
                     break
             if gs.page >= 30:
                 break
     except SearchError:
         pass

示例#34

0

显示文件

文件： google_scrapper.py 项目： javaongsan/python_scrapper

def scrapsomesqlfiles(keyword, pages=20):
    try:
        for i in range(0,pages+1):
            wt = random.uniform(2, 5)   
            gs = GoogleSearch(keyword)
            gs.results_per_page = 50
            gs.page = i 
            results = gs.get_results()
            time.sleep(wt)
            print 'This is the %dth iteration and waited %f seconds' % (i, wt)
            for res in results:
                get_url_info(res.url.encode('utf8'))    
    except SearchError, e:
      print "Search failed: %s" % e

示例#35

0

显示文件

文件： __init__.py 项目： tomtomdk/IceFilms.bundle

def DoSearch(mc,search,page,accountStatus=None):        
	gs = GoogleSearch('site:'+ICEFILMS_URL+'ip '+search+'')
        gs.results_per_page = 25
        gs.page = page
        results = gs.get_results()
	
        for res in results:
                name=res.title.encode('utf8')
                name=CLEANSEARCH(name)
                url=res.url.encode('utf8')
		index=url.index("/ip")
		match=url[index:len(url)]
		
		addSearchResult(mc,name,match,'Movie')

示例#36

0

显示文件

文件： aa.py 项目： ammathew/Letters

def getUrls ( searchTerm ):
   links = []
   f = open('output.txt', 'w')
   try:
      gs = GoogleSearch( searchTerm)
      gs.results_per_page = 50
      results = gs.get_results()
      for res in results:
         links.append( res.url.encode("utf8") )
      pickle.dump( links, f )
      f.close()
      return links
   except SearchError, e:
      print "Search failed: %s" % e

示例#37

0

显示文件

文件： webcandidates.py 项目： JoshCason/LING573

def websearch(query):
    limit = config['web_results_limit']
    search_library = config['search_library_active']
    search_engine = config['search_engine_active']
    
    ret = []
    # Bing=50 per page, Google=10 - go figure!
    per_page = config[search_engine + '_per_page']
    pages = int(math.ceil(limit / float(per_page)))

    if search_library == 'pattern':
        if search_engine == 'bing':
            engine = Bing(license='cvzWROzO9Vaxqu0k33+y6h++ts+a4PLQfvA7HlyJyXM=', language="en")
        elif search_engine == 'google':
            engine = Google(license=config[config['use_whose_key'] + '_google_key'], language="en")
        for page in range(pages):
            try:
                # turns out start = starting page and count is results per page
                # could probably do some logic to make sure count is right if limit was 130, on page 3, count should be 30, whereas 
                # our code is going to fetch 50 for a total of 150. ... I think we can probably mess with that later and just work in blocks of 50
                request = asynchronous(engine.search, clean_query(query), start=page+1, count=per_page, type=SEARCH, timeout=10, throttle=0.5)
                while not request.done:
                    time.sleep(0.01)
            except:
                raise
            if request.value != None:
                for result in request.value:
                    ret.append({'title' : result.title, 'description' : result.text})
            
    elif search_library == 'requests':
        for page in range(pages):
            offset = per_page * page
            params = {'$format': 'json', '$top': per_page,'$skip': offset}
            results = bing.search('web',clean_query(query),params)()['d']['results'][0]['Web']
            for result in results:
                ret.append({'title' : result['Title'], 'description' : result['Description']})
                
    elif search_library == 'xgoogle':
        for page in range(pages):
            try:
                # inject some delay
                time.sleep(0.04)
                gs = GoogleSearch(clean_query(query))
                gs.page = page+1
                gs.results_per_page = per_page
                results = gs.get_results()
                for res in results:
                    ret.append({'title' : res.title.encode("utf8"), 'description' : res.desc.encode("utf8")})
            except SearchError, e:
                print "Search failed: %s" % e

示例#38

0

显示文件

文件： urldigger.py 项目： momolas/urldigger

def googledefault(termtosearch, lookspam):
	try:
	  gs = GoogleSearch(termtosearch)
	  gs.results_per_page = 50
	  results = gs.get_results()
	  if lookspam:
	      for res in results:
                  print '\033[1;34mLooking for SPAM in........%s\033[1;m' % (res.url.encode('utf8'))
		  spam_detect(res.url.encode('utf8'))
	  else:
	      for res in results:
	          print res.url.encode('utf8')
	except SearchError, e:
	  print "Search failed: %s" % e

示例#39

0

显示文件

def googledefault(termtosearch, lookspam):
    try:
        gs = GoogleSearch(termtosearch)
        gs.results_per_page = 50
        results = gs.get_results()
        if lookspam:
            for res in results:
                print '\033[1;34mLooking for SPAM in........%s\033[1;m' % (
                    res.url.encode('utf8'))
                spam_detect(res.url.encode('utf8'))
        else:
            for res in results:
                print res.url.encode('utf8')
    except SearchError, e:
        print "Search failed: %s" % e

示例#40

0

显示文件

文件： functions.py 项目： Ambuj-UF/FANNOT

 def perform_search(self):
     url_list = list()
     
     try:
         gs = GoogleSearch(self.object)
         gs.results_per_page = 50
         results = gs.get_results()
         
         for res in results:
             url_list.append(res.url.encode("utf8"))
     
         return url_list
     
     except SearchError, e:
         print("Search failed: %s" %e)

示例#41

0

显示文件

文件： imdbex.py 项目： amadamala/Movie-Sort

def get_rating(movie_name):
	
	try:
	# Use xgoogle api to parse google. Following is the url to the api
	# http://www.catonmat.net/blog/python-library-for-google-search/
		search_str = movie_name + ' site:imdb.com'
		gs = GoogleSearch(search_str)
		gs.results_per_page = 1
		results = gs.get_results()
		url = results[0].url.encode('utf8')
		# url_title = results[0].title.encode('utf8')
		# print url, url_title
		imdb_rating(url)
	except SearchError, e:
		print "Search failed: %s" % e

示例#42

0

显示文件

文件： functions.py 项目： Ambuj-UF/FANNOT

    def perform_search(self):
        url_list = list()

        try:
            gs = GoogleSearch(self.object)
            gs.results_per_page = 50
            results = gs.get_results()

            for res in results:
                url_list.append(res.url.encode("utf8"))

            return url_list

        except SearchError, e:
            print("Search failed: %s" % e)

示例#43

0

显示文件

文件： DocumentRetriever.py 项目： alessioferrari/workspaceAmbiguityDetection

 def searchDocuments(self, terms):
     '''
     This function search terms in google and store the textual content in DomainKnowledgeDocument objects
     @param terms: list of string terms to be searched through internet
     '''
     try:
         sentence = ' '.join(terms)  
         gs = GoogleSearch(sentence)
         results = gs.get_results()
         for result in results:
             self.documentsURLs.append(result.get_URL())
         print gs.num_results
     
     except SearchError, e:
         print "Search failed: %s" % e

示例#44

0

显示文件

文件： example4.py 项目： fr830/xgoogle

def main():
    gs = GoogleSearch('intitle:道德黑客技术论坛内部专版WEBSHELL')
    gs.results_per_page = 100
    for index in range(4):
        gs.page = index + 1
        results = gs.get_results()
        for result in results:
            url = result.getURL()
            print result

            ret = exploit(url)
            if ret == '':
                continue

            open('result.txt', 'a').write(ret)

示例#45

0

显示文件

文件： DocumentRetriever.py 项目： alessioferrari/workspaceAmbiguityDetection

    def searchDocuments(self, terms):
        '''
        This function search terms in google and store the textual content in DomainKnowledgeDocument objects
        @param terms: list of string terms to be searched through internet
        '''
        try:
            sentence = ' '.join(terms)
            gs = GoogleSearch(sentence)
            results = gs.get_results()
            for result in results:
                self.documentsURLs.append(result.get_URL())
            print gs.num_results

        except SearchError, e:
            print "Search failed: %s" % e

示例#46

0

显示文件

    def __init__(self, query, filetypes, site, resultsperpage, maxresults,
                 repeat):
        if filetypes:
            filetypes = re.split(",", filetypes)
            query += " filetype:" + filetypes.pop(0)
            for filetype in filetypes:
                query += " OR filetype:" + filetype

        if site:
            query += " site:" + site

        print(query)
        self.gs = GoogleSearch(query, random_agent=True, repeat=repeat)
        self.gs.results_per_page = int(resultsperpage)
        self.maxresults = int(maxresults)
        self.lastpage = False

示例#47

0

显示文件

文件： googlemd5.py 项目： heartbeast/md5crack

        def run(self, string):

            query = "site:ReverseIndexSite %s" % string

            #if not thread:
            #    say("Querying Google: '%s'" % query)

            gs = GoogleSearch(query)
            gs.results_per_page = 10
            results = gs.get_results()
            if len(results) >= 1:
                result = None  #At the end result must be a string containing the decoded md5 hash
                result = ["ReverseIndexSite", result]
                if thread:
                    say(result)
                return result

示例#48

0

显示文件

文件： rankchecker.py 项目： peaches/rankchecker

    def get_ranks(self):
        for keyword, urls in campaigns.get_keywords().iteritems():
            gs = GoogleSearch(keyword)
            gs.results_per_page = self.config['limits']['results_per_page']

            sys.stderr.write('\n\nChecking keyword: %s\n' % keyword)
            results = self.get_results(gs)
            offset = 1
            query_count = 0
            while len(urls) > 0 and results:
                # Display a period for every hit we make to Google
                if query_count % 5 == 0: sys.stderr.write(' ')
                sys.stderr.write('.')

                for rank, row in enumerate(results):
                    if (len(urls) > 0):
                        # Find results containing one of our sites
                        found = filter(lambda x: row.url.find(x) != -1, urls)
                        for entry in found:
                            campaigns.set_rank(entry, keyword, rank + offset)

                        # Using sets to get remaining sites to check for
                        urls = list(set(urls) - set(found))
                    else:
                        break

                # Don't collect another time if no more URLs are left to check
                offset += len(results)
                results = None

                # We want to sleep here regardless because we might scrape
                # really fast if all the results are on the first page
                time.sleep(self.config['limits']['delay'])

                # Only check if there are sites remaining and we have not
                # surpassed our maximum configured depth
                if (len(urls) > 0 and
                        offset <= self.config['limits']['search_depth'] + 1):
                    results = self.get_results(gs)
                    query_count += 1
                elif verbose:
                    sys.stderr.write('Not retrieving more results\n')

                if verbose:
                    sys.stderr.write('URLs: %s\n' % ', '.join(urls))
                    if results:
                        sys.stderr.write('Results: %s\n' % len(results))

示例#49

0

显示文件

文件： HTML_API.py 项目： sethc23/seamless_yelp_scraping

class google:

    def __init__(self):
        self.gs = GoogleSearch('')
    
    def get_results(self,src):
        if src != '': 
            return self.gs._extract_results(BeautifulSoup(src))

示例#50

0

显示文件

def searchHandler(user, command, args, mess):
    try:
        if len(args) < 2:
            return "Please Provide your search Query"
        else:
            gs = GoogleSearch(args)
            gs.results_per_page = 10
            gs.page = 1
            results = gs.get_results()
            if len(results) > 0:
                for res in results:
                    return res.title.encode("utf8") + "\n" + res.desc.encode(
                        "utf8") + "\n" + res.url.encode("utf8")
            else:
                return "No Search Result Found for your query."
    except SearchError, e:
        return "Search failed: %s" % e

示例#51

0

显示文件

文件： googlemd5.py 项目： heartbeast/md5crack

        def run(self, string):

            query = "site:http://md5-database.org/md5 %s" % string

            #if not thread:
            #    say("Querying Google: '%s'" % query)

            gs = GoogleSearch(query)
            gs.results_per_page = 10
            results = gs._get_results_page()
            texts = results.findAll(text=True)
            texts = ''.join(texts)
            results = re.findall(re.compile('MD5\}.*?MD5'), texts)
            for line in results:
                if string in line:
                    result = line[(line.find(',') + 1):line.find('.')].strip()
                    return result

            return ''

示例#52

0

显示文件

def google_search(query):
    try:
        list = Set()
        for i in range(0, 15):
            print "Step: " + str(i) + " for " + query
            gs = GoogleSearch(query)
            gs.results_per_page = 100
            gs.page = i
            results = gs.get_results()
            for res in results:
                url = res.url.encode('utf8')
                url = url[url.find(".") + 1:find_nth(url, "/", 3)]
                if url.count('.', 0, len(url)) > 1:
                    url = url[url.find(".") + 1:len(url)]
                list.add(url)

        return list
    except SearchError, e:
        print "Search failed: %s" % e

示例#53

0

显示文件

def google(text):
    response = ""
    time.sleep(0.5)
    count = 0

    try:
        print "Trying to search for " + text

        g1 = GoogleSearch(text)

        g1.results_per_page = 25

        results = g1.get_results()

        for res in results[:2]:
            time.sleep(0.5)
            response = search(res.url.encode("utf8"))
            return response
    except SearchError, e:
        print "Failed Once"

示例#54

0

显示文件

文件： Parsing.py 项目： plugy/IceFilms.Bundle

def GetSearchResults(query=None,type=None,imdb_id=None, exact=False):
	
	if (type=="movies"):
		# This a google search. The -tv will ommit all TV shows.
		search = 'intitle:%s -"Episode List" -"Series Rating" site:%s' % (query,ICEFILMS_URL)
	else:
		search = 'allintitle:%s "Episode List" site:%s' % (query, ICEFILMS_URL)
	
	gs = GoogleSearch(search)
	gs.results_per_page = 25
	gs.page = 0
	results = gs.get_results()
	items = []
	
	for res in results:
	
		name = re.sub(
			'(<em>|</em>|<a>|</a>|DivX|-|icefilms(\.info)?|<b>\.\.\.</b>|Episode List|links)',
			'',
			res.title.encode('utf8')
		).strip()

		url=res.url
		video_url = re.search("icefilms\.info(/.*)", url).group(1)
		
		res = MediaInfo()
		
		res.type = type
		res.title = name

		match = re.search("(.*)\((\d*)\)", res.title)
		
		if (match):
			res.title = match.group(1).strip()
			res.year = int(match.group(2).strip())
			
		res.id = video_url
		
		items.append(res)
	
	return items

示例#55

0

显示文件

def searchFor(text):
    gs = GoogleSearch(text)
    gs.results_per_page = 32
    page = 1
    results = []
    titles = []
    while page < 5:
        results.extend(gs.get_results())
        page += 1
    results = results[:10]
    for res in results:
        titles.append(str(res.title.encode("utf-8")))
        urls.append(str(res.url.encode("utf-8")))

    print len(results)
    print titles

    try:
        sublime.active_window().show_quick_panel(titles, onSelection,
                                                 sublime.MONOSPACE_FONT)
    except:
        webbrowser.open_new_tab("https://www.google.com/search?q=" +
                                text.replace(" ", "+"))

示例#56

0

显示文件

def google_search(query):
    try:
        results = []
        resultg = []
        gs = GoogleSearch(query)
        gs.results_per_page = 30
        while True:
            tmp = gs.get_results()
            if not tmp:  # no more results were found
                break
            results.extend(tmp)
        #f.write(res.title.encode('utf8'))
        #f.write("\n<br><br>")
        #f.write(res.desc.encode('utf8'))
        #f.write("\n<br><br>")
        f = open("final.txt", "w")
        for res in results:
            f.write('\n <a href=' + res.url.encode('utf8') + '>' + '<h1>' +
                    res.title.encode('utf8') + '</h1>\n' + '</a>\n')
            resultg.extend(res.url.encode('utf8'))
        f.close()
    except SearchError, e:
        print "Search failed: %s" % e

示例#57

0

显示文件

def google(termtosearch, action):

    #action = spam or phis
    try:
        gs = GoogleSearch(termtosearch)
        gs.results_per_page = 100
        results = []

        while True:
            tmp = gs.get_results()
            if not tmp:
                break
            results.extend(tmp)

            #TODO switch in this code block
            if action == 'mal':
                for res in results:
                    checkAgainstGoogle(res.url.encode('utf8'))
            else:

                if action == 'spam':
                    for res in results:
                        print '\033[1;34mLooking for SPAM in ......%s\033[1;m' % (
                            res.url.encode('utf8'))
                        spam_detect(res.url.encode('utf8'))
                elif action == 'phis':
                    for res in results:
                        print '\033[1;34mLooking for PHISHING in ......%s\033[1;m' % (
                            res.url.encode('utf8'))
                        phishing_detect(res.url.encode('utf8'))

                else:
                    for res in results:
                        print res.url.encode('utf8')

    except SearchError, e:
        print "Search failed: %s" % e

示例#58

0

显示文件

文件： core.py 项目： mr-wrmsr/BlackServerOS

def scan(url, wordlist):

    fname = wordlist
    with open(fname, 'r') as f:
        dorks = f.readlines()
    f.close()
    for dork in dorks:
        if len(dork) < 2:
            continue
        try:
            rnd = random_int(2, 5)
            time.sleep(rnd)
            g = GoogleSearch("site:" + url + " " + dork, random_agent=True)
            g.results_per_page = 10
            print("."),
            results = g.get_results()
            if len(results) > 0:
                msg = "[+] Found " + results + " results with dork: " + dork
                logger.info(msg)
                for res in results:
                    print res.title.encode('utf8')
                    print res.url.encode("utf8")
        except SearchError, e:
            print "Search failed: %s" % e