def getUrls2( page_num ): gs = GoogleSearch('shareholder letter') gs.results_per_page = 50 gs.page = page_num results = gs.get_results() for item in results: print item.url.encode("utf8")
def google(data): #In this fuction we will do the phrasing of the subject line ourselfs. print "Called google" tore = '' search_string = '' if data[1] == 'search': for i in range(2,len(data)): search_string = search_string + data[i] + ' ' try: tore = "Here are the first 25 results from google when \'"+search_string+"\' is queried\n\n" gs = GoogleSearch(search_string) gs.results_per_page = 25 results = gs.get_results() for res in results: #print res.title.encode('utf8') tore = tore+res.title.encode('utf8')+"\n" #print res.desc.encode('utf8') tore = tore+res.desc.encode('utf8')+"\n" #print res.url.encode('utf8') tore = tore+res.url.encode('utf8')+"\n\n--------------------------------------\n" print except: print "Search failed: %s" % e tore = "Search failed: %s" % e return tore
def getGoogleResults(self, pluginname, latest, cve): try: gs = GoogleSearch("inurl:'wp-content/plugins/" + pluginname + "'", random_agent=True) gs.results_per_page = 100 numberOfprocessed = 0 self.all_run = [] for i in range(int(limitForSearch)): results = gs.get_results() if not results: break # Semaphore for write in order to screen self.checkSimultaneus = threading.Semaphore(int(NumThreats)) # Semaphore for write to file self.writeFile = threading.Semaphore(int(NumThreats) - 1) for res in results: self.checkSimultaneus.acquire() host_name = urlparse(res.url.encode()).hostname # Create thread t = threading.Thread(target=self.__getGoogleResults, args=(host_name, latest, pluginname, cve)) self.all_run.append(t) # run thread self.all_run[len(self.all_run) - 1].start() except SearchError, e: print "Search failed: %s" % e
def getGoogleResults(self,pluginname,latest,cve): try: gs = GoogleSearch("inurl:'wp-content/plugins/" + pluginname + "'", random_agent=True) gs.results_per_page = 100 numberOfprocessed = 0 self.all_run = [] for i in range(int(limitForSearch)): results = gs.get_results() if not results: break # Semaphore for write in order to screen self.checkSimultaneus = threading.Semaphore(int(NumThreats)) # Semaphore for write to file self.writeFile = threading.Semaphore(int(NumThreats)-1) for res in results: self.checkSimultaneus.acquire() host_name = urlparse(res.url.encode()).hostname # Create thread t = threading.Thread(target=self.__getGoogleResults, args=(host_name,latest,pluginname,cve)) self.all_run.append(t) # run thread self.all_run[len(self.all_run)-1].start() except SearchError, e: print "Search failed: %s" % e
def google(self,text): try: print "Trying to search for "+text g1 = GoogleSearch(text) g1.results_per_page = 25 results = g1.get_results() if len(results)==0: print "No search result!!" else: print "Results FOund!!" print type(results) print len(results) for res in results[:2]: time.sleep(1) url = res.url.encode("utf8") response = self.search(url) if response == "Kgpian": self.close() break except SearchError, e: print "Failed Once"
def google(self, text): try: print "Trying to search for " + text g1 = GoogleSearch(text) g1.results_per_page = 25 results = g1.get_results() if len(results) == 0: print "No search result!!" else: print "Results FOund!!" print type(results) print len(results) for res in results[:2]: time.sleep(1) url = res.url.encode("utf8") response = self.search(url) if response == "Kgpian": self.close() break except SearchError, e: print "Failed Once"
def google(termtosearch, action): #action = spam or phis try: gs = GoogleSearch(termtosearch) gs.results_per_page = 100 results = [] while True: tmp = gs.get_results() if not tmp: break results.extend(tmp) #TODO switch in this code block if action == 'mal': for res in results: checkAgainstGoogle(res.url.encode('utf8')) else: if action == 'spam': for res in results: print '\033[1;34mLooking for SPAM in ......%s\033[1;m' % (res.url.encode('utf8')) spam_detect(res.url.encode('utf8')) elif action == 'phis': for res in results: print '\033[1;34mLooking for PHISHING in ......%s\033[1;m' % (res.url.encode('utf8')) phishing_detect(res.url.encode('utf8')) else: for res in results: print res.url.encode('utf8') except SearchError, e: print "Search failed: %s" % e
def getGooogleResults(query, exclude): try: print "Searching for {0} but excluding these {1}".format(query, exclude) page = 1 gs = GoogleSearch(query) gs.results_per_page = 100 results = gs.get_results()
def __init__(self, config): self.config = config self.gs = GoogleSearch(self.config["p_query"], page=self.config["p_skippages"], random_agent=True) self.gs.results_per_page = self.config["p_results_per_query"]; self.cooldown = self.config["p_googlesleep"]; if (self.config["p_skippages"] > 0): print "Google Scanner will skip the first %d pages..."%(self.config["p_skippages"])
def collect(self): gs = GoogleSearch("site:"+self.target) while True: results = gs.get_results() for res in results: self.urls.append(res.url) if len(results)<10: break
def go(self, query, pages): search = GoogleSearch(query) search.results_per_page = 10 for i in range(pages): search.page = i results = search.get_results() for page in results: self.scrape(page)
def update_from_web( model, film, year ): search = "kinopoisk.ru " + year + " " + film print "Search: %s" % search browser=Browser(debug=True) gs = GoogleSearch(search) gs.results_per_page = 1 results = gs.get_results() try: for res in results: pageurl = res.url.encode('utf8') page = browser.get_page( pageurl ) soup = BeautifulStoneSoup( page[ page.find("<html"):], convertEntities=BeautifulStoneSoup.HTML_ENTITIES, fromEncoding="windows-1251" ) print "URL: %s" % pageurl rating = soup.find('a',attrs={'class':'continue'}) if rating: r = strip(rating).split(' ') try: model.rating = float( r[1] ) print "Rating: %s" % r[1] except Exception, ex: model.rating = 0.0 print "Can't parse rating" title = soup.find('h1','moviename-big') if title: print "Title: %s" % strip(title) model.title = strip(title) info = soup.find('span','_reachbanner_') if info: print "Info: %s" % strip(info) model.description = strip( info ) img = soup.find('img', attrs={"width" : "120"}) if img: print "Image: %s" % img['src'] model.image = "http://www.kinopoisk.ru%s" % img['src'] #getTrailer("t26538","397494/kinopoisk.ru-District-9-36971.mp4","397494/1_36971.jpg","480","270","tr","",""); import re m = re.search("getTrailer\((.*)\)",str(soup)) if not m: pass else: parts = m.group(1).split('"') url = "http://tr.kinopoisk.ru/%s" % parts[3] model.trailer = url image = "http://tr.kinopoisk.ru/%s" % parts[5] model.trailer_image = image print "Trailer: %s" % url print "TrailerImage: %s" % image break except Exception,e: print "WARNING: %s" % e
def startSearch(self, domain="", target_keywords=[]): gs = GoogleSearch(target_keyword) gs.results_per_page = 100 results = gs.get_results() for idx, res in enumerate(results): parsed = urlparse(res.url) domain = self.__makeUrl(parsed.netloc) if domain == target_domain: print "Ranking position %d for keyword %s on domain %s" % (idx + 1, target_keyword, target_domain)
def google_search_results(search_query, wait=40, number_of_results=10, encode=True, max_fail_count=5, current_fail_count=1, random_text=None): ''' DO NOT MESS WITH THIS IT IS PERFECT FOR NOW''' # gets AT LEAST number_of_results results # don't query too fast or Google will block your IP temporarily # for this purpose, I have added the variable max_result_size # if your IP does get blocked, try later in the day or wait a day or two try: max_result_size = 10 #don't change it from this: the standard of 10 seems the least suspicious to google gs = GoogleSearch(search_query, random_agent=True) # does not actually search gs.results_per_page = max_result_size gs.page = 0 times_tried = 0 results = [] prev = 0 # print "getting results:" while len(results) < number_of_results: prev = len(results) times_tried += 1 time.sleep(random.uniform(0.5 * wait, 1.5 * wait)) results += gs.get_results( ) # Actual search and extraction of results. print "\rtimes_tried: %s\tlen(results): %s\tpage_number: %s" % ( times_tried, len(results), gs.page), print "\n" # We now have a list of SearchResult objects, called 'results'. # A SearchResult object has three attributes -- "title", "desc", and "url". # They are Unicode strings, so do a proper encoding before outputting them. (done below) if encode: for i in range(0, len(results)): results[i].title = results[i].title.encode("utf8", "ignore") results[i].desc = results[i].desc.encode("utf8", "ignore") results[i].url = results[i].url # random.shuffle(results) except SearchError, e: print "Google Try #%s: Search failed on this url:\t%s" % ( current_fail_count, e) google_search_redirect(random_text) if current_fail_count != max_fail_count: return google_search_results( search_query, wait=wait, number_of_results=wait, encode=encode, max_fail_count=max_fail_count, current_fail_count=current_fail_count + 1)
def searchPage(textToSearch, page): items = [] gs = GoogleSearch(textToSearch) gs.results_per_page = 100 gs.page = page results = gs.get_results() for res in results: url = res.url.encode('utf8') items.append(url); return items
def get_results(query): gs = GoogleSearch(query); gs.results_per_page = 9001; results = gs.get_results(); ret = []; for idx, res in enumerate(results): domain = mk_nice_domain(res.url); domain = domain.replace("pastebin.com/", "pastebin.com/raw.php?i="); print 'Found codes at %s' % domain; ret.append(domain); return ret;
def search_google(term, domain): try: log.debug('Performing Google search for "{}"'.format(term)) gs = GoogleSearch(term, tld=domain) gs.results_per_page = 10 results = gs.get_results() log.debug('Got {} results'.format(len(results))) return [Url(res.url) for res in results[:10]] except SearchError as exc: log.exception(exc) return None
def get(self, params=None): """ gets the answer from the answer template :param params: msg = params[0], func = params[1] :return: returns the first template if is_random is false, otherwise returns random template """ ynet_sections = [ u"חדשות", u"כלכלה", u"ספורט", u"תרבות", u"רכילות", u"דיגיטל", u"בריאות", u"יהדות", u"חופש", u"רכב", u"אוכל", u"צרכנות", u"יחסים", u"mynet", u"מדע", u"לימודים", u"קניות", u"קהילות", u"חדשות תוכן ועדכונים" ] msg = ('ynet.co.il:' + params[0]).encode('utf-8') try: b = Browser() gs = GoogleSearch(msg, lang='he', tld="co.il") gs.results_per_page = 50 results = gs.get_results() for res in results: try: if (res.url is not None): page = b.get_page(res.url) soup = BeautifulSoup(page) title = soup.find("title") if (title is not None): if (' "' in title.text and '" ' in title.text): return self.find_between( title.text, ' "', '" ') res = title.text.split('-')[0].replace( 'ynet', '').strip().strip('"') if ':' in res: res = res.split(':')[1].strip().strip('"') res = res.strip() if res == u'' or res in ynet_sections: continue else: return res except: continue return "?" except SearchError, e: return "?" #a = avoiding_msg_ynet(None,None) # a.get(["ynet.co.il:האם טביב ימכור את הקבוצה?"]) # res = a.get(["ynet.co.il:האם ביבי ימכור את המדינה?"]) #Sa.get(["ynet.co.il:מה יהיה עם הגז?"]) #a.get(["seret.co.il:המרגלת"]) #a = avoiding_msg_ynet() #a.test_browser() # a.get(["האם אלי טביב ימכור את הקבוצה?"]) #a.get(["ynet.co.il:איזה גרוע ביבי הא?"])
def get_number_of_results(term, ajax=False, verbose=True): if not ajax: gs = GoogleSearch(term) page = str(gs._get_results_page()) match = reg.search(page) if match: if verbose: print(term, match.groups()[0]) return int(match.groups()[0].replace(',','')) else: raw_input((term, page)) return int(search(term)['responseData']['cursor']['estimatedResultCount'])
def search( md5hash ): urls = [] gs = GoogleSearch( md5hash ) gs.results_per_page = 100 results = gs.get_results() for res in results: urls.append( res.url.encode('utf8') ) return urls
def main(): #the hardcoded search query: gs = GoogleSearch("computer") gs.result_per_page=10 results = gs.get_results() for r in results: Crow(r.url).select("a").to(SqlitePipeline()).async_start(50) Crow.run() f.close()
def searchInSeretil(): search_entered = '' keyboard = xbmc.Keyboard(search_entered, 'הכנס מילות חיפוש כאן') keyboard.doModal() if keyboard.isConfirmed(): search_entered = keyboard.getText() if search_entered != '': try: gs = GoogleSearch("site:seretil.me " + search_entered) gs.results_per_page = 100 results = gs.get_results() for res in results: title = res.title.encode('utf8') url = res.url.encode('utf8') title = title.replace('SERETIL.ME', '') title = title.replace('לצפייה ישירה', '') title = title.replace('וסדרות', '') title = title.replace('תרגום מובנה', '') title = title.replace('|', '') title = title.replace('.', '') title = title.replace('סרטים', '') title = title.replace('עם', '') title = title.replace('לצפיה', '') if 'עונה' in title: if not 'page' in url and not 'tag' in url and not '?s' in url and not 'search' in url: addDir(title, url, 211, '') else: if not 'page' in url and not 'tag' in url and not '?s' in url and not 'search' in url: image = '' req = urllib2.Request(url) req.add_header( 'User-Agent', ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3' ) response = urllib2.urlopen(req) link3 = response.read() response.close() block = re.compile( '<div class="post-wrap post-wrap-single">(.*?)linkwithin_hook', re.M + re.I + re.S).findall(link3) image = '' images = re.compile('src="http(.*?).?jpg').findall( block[0]) if images: image = 'http' + images[0] + '.jpg' addDir(title, url, 5, image) except SearchError, e: print "Search failed: %s" % e xbmcplugin.setContent(int(sys.argv[1]), 'tvshows')
def goggle(self, word): """Get results from google """ try: results = [] gs = GoogleSearch(word, random_agent=True) gs.results_per_page = 50 hits = gs.get_results() for hit in hits: results.append(hit.url.encode('utf8')) return results except SearchError, e: print "Search failed: %s" % e
def scrape(self, keyword, pages=2): try: gs = GoogleSearch(keyword) gs.results_per_page = 10 gs.page = 0 results = gs.get_results() for res in results: url = res.url.encode('utf8') Title = res.title self.urls.append((url, Title)) except SearchError, e: print "Search failed: %s" % e
def search_by_filename(args): args_e = args.encode('utf8') try: gs = GoogleSearch('"' + args_e + '"') gs.results_per_page = 50 results = gs.get_results() for res in results: if re_math_sites(allow_sites, res.url.encode('utf8')): if re_math_sites(args_e, res.desc.encode('utf8')): return clean_result(res.title.encode('utf8')) except SearchError, e: print "Search failed: %s" % e
def get_hits(term): #data = fetch_data("http://api.thriftdb.com/api.hnsearch.com/items/_search?q=" + term) #if data[0] is not None: # if loads(data[0])['hits'] > 0: #loads() dumps a json file which is what the hnsearch api returns # return loads(data[0])['hits'] # else: # return 0.000001 # else: # return data[1] gs = GoogleSearch(key) gs.results_per_page = 100 results = gs.get_results() return results
def search_by_filename(args): args_e=args.encode('utf8') try: gs = GoogleSearch('"' + args_e + '"') gs.results_per_page = 50 results = gs.get_results() for res in results: if re_math_sites(allow_sites,res.url.encode('utf8')): if re_math_sites(args_e,res.desc.encode('utf8')): return clean_result(res.title.encode('utf8')) except SearchError, e: print "Search failed: %s" % e
def searchInSeretil(): search_entered ='' keyboard = xbmc.Keyboard(search_entered, 'הכנס מילות חיפוש כאן') keyboard.doModal() if keyboard.isConfirmed(): search_entered = keyboard.getText() if search_entered !='' : try: gs = GoogleSearch("site:seretil.me "+ search_entered) gs.results_per_page = 100 results = gs.get_results() for res in results: title=res.title.encode('utf8') url= res.url.encode('utf8') title=title.replace('SERETIL.ME','') title=title.replace('לצפייה ישירה','') title=title.replace('וסדרות','') title=title.replace('תרגום מובנה','') title=title.replace('|','') title=title.replace('.','') title=title.replace('סרטים','') title=title.replace('עם','') title=title.replace('לצפיה','') if 'עונה' in title : if not 'page' in url and not 'tag' in url and not '?s' in url and not 'search' in url : addDir(title,url,211,'') else: if not 'page' in url and not 'tag' in url and not '?s' in url and not 'search' in url: image='' req = urllib2.Request(url) req.add_header('User-Agent', ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3') response = urllib2.urlopen(req) link3=response.read() response.close() block= re.compile('<div class="post-wrap post-wrap-single">(.*?)linkwithin_hook',re.M+re.I+re.S).findall(link3) image='' images= re.compile('src="http(.*?).?jpg').findall(block[0]) if images: image='http'+images[0]+'.jpg' addDir(title,url,5,image) except SearchError, e: print "Search failed: %s" % e xbmcplugin.setContent(int(sys.argv[1]), 'tvshows')
def Search_YTonGoogle(self,search): # import Google Search from xgoogle.search import GoogleSearch # search on google gs = GoogleSearch(search+' site:http://www.youtube.com ') gs.results_per_page = 25 gs.page = 0 # return result or None try: results = gs.get_results() return results except Exception, e: print 'getTrailer --> Error: %s' % e return None
def run(self): try: gs = GoogleSearch(self.key) while not self.gotcha: results = gs.get_results() for res in results: self.rank += 1 if res.url == self.url: self.gotcha = True break if gs.page >= 30: break except SearchError: pass
def scrapsomesqlfiles(keyword, pages=20): try: for i in range(0,pages+1): wt = random.uniform(2, 5) gs = GoogleSearch(keyword) gs.results_per_page = 50 gs.page = i results = gs.get_results() time.sleep(wt) print 'This is the %dth iteration and waited %f seconds' % (i, wt) for res in results: get_url_info(res.url.encode('utf8')) except SearchError, e: print "Search failed: %s" % e
def DoSearch(mc,search,page,accountStatus=None): gs = GoogleSearch('site:'+ICEFILMS_URL+'ip '+search+'') gs.results_per_page = 25 gs.page = page results = gs.get_results() for res in results: name=res.title.encode('utf8') name=CLEANSEARCH(name) url=res.url.encode('utf8') index=url.index("/ip") match=url[index:len(url)] addSearchResult(mc,name,match,'Movie')
def getUrls ( searchTerm ): links = [] f = open('output.txt', 'w') try: gs = GoogleSearch( searchTerm) gs.results_per_page = 50 results = gs.get_results() for res in results: links.append( res.url.encode("utf8") ) pickle.dump( links, f ) f.close() return links except SearchError, e: print "Search failed: %s" % e
def websearch(query): limit = config['web_results_limit'] search_library = config['search_library_active'] search_engine = config['search_engine_active'] ret = [] # Bing=50 per page, Google=10 - go figure! per_page = config[search_engine + '_per_page'] pages = int(math.ceil(limit / float(per_page))) if search_library == 'pattern': if search_engine == 'bing': engine = Bing(license='cvzWROzO9Vaxqu0k33+y6h++ts+a4PLQfvA7HlyJyXM=', language="en") elif search_engine == 'google': engine = Google(license=config[config['use_whose_key'] + '_google_key'], language="en") for page in range(pages): try: # turns out start = starting page and count is results per page # could probably do some logic to make sure count is right if limit was 130, on page 3, count should be 30, whereas # our code is going to fetch 50 for a total of 150. ... I think we can probably mess with that later and just work in blocks of 50 request = asynchronous(engine.search, clean_query(query), start=page+1, count=per_page, type=SEARCH, timeout=10, throttle=0.5) while not request.done: time.sleep(0.01) except: raise if request.value != None: for result in request.value: ret.append({'title' : result.title, 'description' : result.text}) elif search_library == 'requests': for page in range(pages): offset = per_page * page params = {'$format': 'json', '$top': per_page,'$skip': offset} results = bing.search('web',clean_query(query),params)()['d']['results'][0]['Web'] for result in results: ret.append({'title' : result['Title'], 'description' : result['Description']}) elif search_library == 'xgoogle': for page in range(pages): try: # inject some delay time.sleep(0.04) gs = GoogleSearch(clean_query(query)) gs.page = page+1 gs.results_per_page = per_page results = gs.get_results() for res in results: ret.append({'title' : res.title.encode("utf8"), 'description' : res.desc.encode("utf8")}) except SearchError, e: print "Search failed: %s" % e
def googledefault(termtosearch, lookspam): try: gs = GoogleSearch(termtosearch) gs.results_per_page = 50 results = gs.get_results() if lookspam: for res in results: print '\033[1;34mLooking for SPAM in........%s\033[1;m' % (res.url.encode('utf8')) spam_detect(res.url.encode('utf8')) else: for res in results: print res.url.encode('utf8') except SearchError, e: print "Search failed: %s" % e
def googledefault(termtosearch, lookspam): try: gs = GoogleSearch(termtosearch) gs.results_per_page = 50 results = gs.get_results() if lookspam: for res in results: print '\033[1;34mLooking for SPAM in........%s\033[1;m' % ( res.url.encode('utf8')) spam_detect(res.url.encode('utf8')) else: for res in results: print res.url.encode('utf8') except SearchError, e: print "Search failed: %s" % e
def perform_search(self): url_list = list() try: gs = GoogleSearch(self.object) gs.results_per_page = 50 results = gs.get_results() for res in results: url_list.append(res.url.encode("utf8")) return url_list except SearchError, e: print("Search failed: %s" %e)
def get_rating(movie_name): try: # Use xgoogle api to parse google. Following is the url to the api # http://www.catonmat.net/blog/python-library-for-google-search/ search_str = movie_name + ' site:imdb.com' gs = GoogleSearch(search_str) gs.results_per_page = 1 results = gs.get_results() url = results[0].url.encode('utf8') # url_title = results[0].title.encode('utf8') # print url, url_title imdb_rating(url) except SearchError, e: print "Search failed: %s" % e
def perform_search(self): url_list = list() try: gs = GoogleSearch(self.object) gs.results_per_page = 50 results = gs.get_results() for res in results: url_list.append(res.url.encode("utf8")) return url_list except SearchError, e: print("Search failed: %s" % e)
def searchDocuments(self, terms): ''' This function search terms in google and store the textual content in DomainKnowledgeDocument objects @param terms: list of string terms to be searched through internet ''' try: sentence = ' '.join(terms) gs = GoogleSearch(sentence) results = gs.get_results() for result in results: self.documentsURLs.append(result.get_URL()) print gs.num_results except SearchError, e: print "Search failed: %s" % e
def main(): gs = GoogleSearch('intitle:道德黑客技术论坛内部专版WEBSHELL') gs.results_per_page = 100 for index in range(4): gs.page = index + 1 results = gs.get_results() for result in results: url = result.getURL() print result ret = exploit(url) if ret == '': continue open('result.txt', 'a').write(ret)
def __init__(self, query, filetypes, site, resultsperpage, maxresults, repeat): if filetypes: filetypes = re.split(",", filetypes) query += " filetype:" + filetypes.pop(0) for filetype in filetypes: query += " OR filetype:" + filetype if site: query += " site:" + site print(query) self.gs = GoogleSearch(query, random_agent=True, repeat=repeat) self.gs.results_per_page = int(resultsperpage) self.maxresults = int(maxresults) self.lastpage = False
def run(self, string): query = "site:ReverseIndexSite %s" % string #if not thread: # say("Querying Google: '%s'" % query) gs = GoogleSearch(query) gs.results_per_page = 10 results = gs.get_results() if len(results) >= 1: result = None #At the end result must be a string containing the decoded md5 hash result = ["ReverseIndexSite", result] if thread: say(result) return result
def get_ranks(self): for keyword, urls in campaigns.get_keywords().iteritems(): gs = GoogleSearch(keyword) gs.results_per_page = self.config['limits']['results_per_page'] sys.stderr.write('\n\nChecking keyword: %s\n' % keyword) results = self.get_results(gs) offset = 1 query_count = 0 while len(urls) > 0 and results: # Display a period for every hit we make to Google if query_count % 5 == 0: sys.stderr.write(' ') sys.stderr.write('.') for rank, row in enumerate(results): if (len(urls) > 0): # Find results containing one of our sites found = filter(lambda x: row.url.find(x) != -1, urls) for entry in found: campaigns.set_rank(entry, keyword, rank + offset) # Using sets to get remaining sites to check for urls = list(set(urls) - set(found)) else: break # Don't collect another time if no more URLs are left to check offset += len(results) results = None # We want to sleep here regardless because we might scrape # really fast if all the results are on the first page time.sleep(self.config['limits']['delay']) # Only check if there are sites remaining and we have not # surpassed our maximum configured depth if (len(urls) > 0 and offset <= self.config['limits']['search_depth'] + 1): results = self.get_results(gs) query_count += 1 elif verbose: sys.stderr.write('Not retrieving more results\n') if verbose: sys.stderr.write('URLs: %s\n' % ', '.join(urls)) if results: sys.stderr.write('Results: %s\n' % len(results))
class google: def __init__(self): self.gs = GoogleSearch('') def get_results(self,src): if src != '': return self.gs._extract_results(BeautifulSoup(src))
def searchHandler(user, command, args, mess): try: if len(args) < 2: return "Please Provide your search Query" else: gs = GoogleSearch(args) gs.results_per_page = 10 gs.page = 1 results = gs.get_results() if len(results) > 0: for res in results: return res.title.encode("utf8") + "\n" + res.desc.encode( "utf8") + "\n" + res.url.encode("utf8") else: return "No Search Result Found for your query." except SearchError, e: return "Search failed: %s" % e
def run(self, string): query = "site:http://md5-database.org/md5 %s" % string #if not thread: # say("Querying Google: '%s'" % query) gs = GoogleSearch(query) gs.results_per_page = 10 results = gs._get_results_page() texts = results.findAll(text=True) texts = ''.join(texts) results = re.findall(re.compile('MD5\}.*?MD5'), texts) for line in results: if string in line: result = line[(line.find(',') + 1):line.find('.')].strip() return result return ''
def google_search(query): try: list = Set() for i in range(0, 15): print "Step: " + str(i) + " for " + query gs = GoogleSearch(query) gs.results_per_page = 100 gs.page = i results = gs.get_results() for res in results: url = res.url.encode('utf8') url = url[url.find(".") + 1:find_nth(url, "/", 3)] if url.count('.', 0, len(url)) > 1: url = url[url.find(".") + 1:len(url)] list.add(url) return list except SearchError, e: print "Search failed: %s" % e
def google(text): response = "" time.sleep(0.5) count = 0 try: print "Trying to search for " + text g1 = GoogleSearch(text) g1.results_per_page = 25 results = g1.get_results() for res in results[:2]: time.sleep(0.5) response = search(res.url.encode("utf8")) return response except SearchError, e: print "Failed Once"
def GetSearchResults(query=None,type=None,imdb_id=None, exact=False): if (type=="movies"): # This a google search. The -tv will ommit all TV shows. search = 'intitle:%s -"Episode List" -"Series Rating" site:%s' % (query,ICEFILMS_URL) else: search = 'allintitle:%s "Episode List" site:%s' % (query, ICEFILMS_URL) gs = GoogleSearch(search) gs.results_per_page = 25 gs.page = 0 results = gs.get_results() items = [] for res in results: name = re.sub( '(<em>|</em>|<a>|</a>|DivX|-|icefilms(\.info)?|<b>\.\.\.</b>|Episode List|links)', '', res.title.encode('utf8') ).strip() url=res.url video_url = re.search("icefilms\.info(/.*)", url).group(1) res = MediaInfo() res.type = type res.title = name match = re.search("(.*)\((\d*)\)", res.title) if (match): res.title = match.group(1).strip() res.year = int(match.group(2).strip()) res.id = video_url items.append(res) return items
def searchFor(text): gs = GoogleSearch(text) gs.results_per_page = 32 page = 1 results = [] titles = [] while page < 5: results.extend(gs.get_results()) page += 1 results = results[:10] for res in results: titles.append(str(res.title.encode("utf-8"))) urls.append(str(res.url.encode("utf-8"))) print len(results) print titles try: sublime.active_window().show_quick_panel(titles, onSelection, sublime.MONOSPACE_FONT) except: webbrowser.open_new_tab("https://www.google.com/search?q=" + text.replace(" ", "+"))
def google_search(query): try: results = [] resultg = [] gs = GoogleSearch(query) gs.results_per_page = 30 while True: tmp = gs.get_results() if not tmp: # no more results were found break results.extend(tmp) #f.write(res.title.encode('utf8')) #f.write("\n<br><br>") #f.write(res.desc.encode('utf8')) #f.write("\n<br><br>") f = open("final.txt", "w") for res in results: f.write('\n <a href=' + res.url.encode('utf8') + '>' + '<h1>' + res.title.encode('utf8') + '</h1>\n' + '</a>\n') resultg.extend(res.url.encode('utf8')) f.close() except SearchError, e: print "Search failed: %s" % e
def google(termtosearch, action): #action = spam or phis try: gs = GoogleSearch(termtosearch) gs.results_per_page = 100 results = [] while True: tmp = gs.get_results() if not tmp: break results.extend(tmp) #TODO switch in this code block if action == 'mal': for res in results: checkAgainstGoogle(res.url.encode('utf8')) else: if action == 'spam': for res in results: print '\033[1;34mLooking for SPAM in ......%s\033[1;m' % ( res.url.encode('utf8')) spam_detect(res.url.encode('utf8')) elif action == 'phis': for res in results: print '\033[1;34mLooking for PHISHING in ......%s\033[1;m' % ( res.url.encode('utf8')) phishing_detect(res.url.encode('utf8')) else: for res in results: print res.url.encode('utf8') except SearchError, e: print "Search failed: %s" % e
def scan(url, wordlist): fname = wordlist with open(fname, 'r') as f: dorks = f.readlines() f.close() for dork in dorks: if len(dork) < 2: continue try: rnd = random_int(2, 5) time.sleep(rnd) g = GoogleSearch("site:" + url + " " + dork, random_agent=True) g.results_per_page = 10 print("."), results = g.get_results() if len(results) > 0: msg = "[+] Found " + results + " results with dork: " + dork logger.info(msg) for res in results: print res.title.encode('utf8') print res.url.encode("utf8") except SearchError, e: print "Search failed: %s" % e