def _get_goog_urls(query): g = pygoogle.pygoogle(query) g.pages = 1 g.hl = "en" self.l.info("Google search result count: %s" % g.get_result_count()) if g.get_result_count() > 0: return g.search_page_wise()[0] else: g = pygoogle.pygoogle("site:blogspot.com groot") g.pages = 1 g.hl = "en" self.l.info("No results for original query, retrying with 'groot'") return g.search_page_wise()[0]
def do_action(lcars, string, case): target = string.split(' ', 1)[1] g = pygoogle("site:youtube.com " + target) g.pages = 1 urls = g.get_urls() if len(urls) == 0: reply = "No results found for" + target lcars.reply_with(reply) return reply link = urls[0] if "user" in link or "store" in link or "feed" in link or "playlist" in link or "channel" in link: for url in urls: if "user" in url: link = "http://youtube.nestharion.de/" + url.split('/')[-1] break if not link: for url in urls: if "store" in url or "feed" in url or "playlist" in url or "channel" in url: continue else: link = url break if not link: link = urls[randint(0, len(urls) - 1)] lcars.background(["google-chrome", link]) reply = "Playing " + target lcars.reply_with(reply) os.system("sleep 1") os.system("xdotool windowactivate --sync $(xdotool search --class Chrome | head -n 1) & sleep 0.3; pkill xdotool") os.system("xdotool windowactivate --sync $(xdotool search --class Chrome | tail -n 1) & sleep 0.3; pkill xdotool") return reply
def get_impact_factor_from_issn(issn="1475-7516", debug=False): """ For the input ISSN in the format NNNN-NNNN obtain the headers and the datasets in a nested list equivalent to an array of (# headers)*[4 (years)] """ g = pygoogle("site:http://www.bioxbio.com/if/html " + issn) g.pages = 1 if g.get_urls(): if_file = urllib.urlopen(g.get_urls()[0]) html = if_file.read() if_file.close() else: return [], [] if debug: print (html) soup = BeautifulSoup(html) table = soup.find("table") # The first tr contains the field names. headings = [th.get_text().strip() for th in table.find("tr").find_all("td")] datasets = [] for row in table.find_all("tr")[1:]: dataset = [eval(td.get_text().replace("-", "0")) for td in row.find_all("td") if td.get_text().strip()] datasets.append(dataset) return headings, datasets
def LinkedIn(linkedinusername): from pygoogle import pygoogle g = pygoogle("linkedin "+linkedinusername) g.pages = 5 g.get_result_count() myURLs = g.get_urls() return myURLs
def dork(self,search_term,p,output): print YELLOW + "[+] " + END + WHITE + "Searching for " + END + "%s " % search_term gs = pygoogle(search_term) gs.pages = p print YELLOW + "[+] " + END + WHITE + "Results Found : " + END + "%s " % (gs.get_result_count()) if gs.get_result_count() == 0: print RED + "[-] " + END + WHITE + "No Results Found" + END; time.sleep(1); sys.exit() print YELLOW + "[+] " + END + WHITE + "Fetching " + END + "[%s] Results " % (gs.get_result_count()) url_list = gs.get_urls() if len(url_list) == 0: print YELLOW + "[!] " + END + WHITE + "Got 0 URLs" + END print RED + "[!] " + END + WHITE + "Nothing to save" + END time.sleep(1) sys.exit() elif len(url_list) > 1: print YELLOW + "[+] " + END + WHITE + "Got " + END + "[%s] URLs" % (len(url_list)) print YELLOW + "[+] " + END + WHITE + "Writing URLs to " + END + "[%s] " % (output) with open(output,'w') as w_file: for i in url_list: w_file.write(i+'\n') print YELLOW + "[+] " + END + WHITE + "URLs saved to " + END + "[%s] " % (output) time.sleep(2)
def googleIt(url): db = database() source = url en = ["it","zh-Hans","fr","nl","es","pt-BR","ca","pa","qu","mr","mo","mn","ne","pcm","nn","or","qu"] random.shuffle(en) search = pygoogle.pygoogle(hl=en[0],query="site:"+source) urlList = search.get_urls() print urlList sha1 = hashlib.sha1() for eachUrl in urlList: #Generate hash for url - used as primary key for database try: eachUrl = u"".join(eachUrl).encode('utf-8').strip() sha1.update(eachUrl) hash = sha1.hexdigest() numTLD = db.countTLD(eachUrl) #Persist item in database db.addGoodUrl(source,hash, eachUrl, numTLD) except: doNothing =0 print 'Done' db.close()
def fresh_google_check(link: str, attempt=5, debug=False): """ Проверяет, индексировался ли уже ресурс гуглом раньше чем за 2 недели до сегодня. :param link: :param attempt: :return: """ if debug: return False try: assert isinstance(link, str) today = datetime.date.today() date_s = _date_to_julian_day(today - datetime.timedelta(days=365 * 8)) date_e = _date_to_julian_day(today - datetime.timedelta(days=7 * 2)) query = u'site:%s daterange:%s-%s' % (link, date_s, date_e,) result = False for i in range(0, attempt): g = pygoogle( query.encode('utf-8'), raise_http_exceptions=True, proxies=settings.PROXIES_FOR_GOOGLING ) try: result = bool(g.get_result_count()) except PyGoogleHttpException as e: renew_connection() continue break except (AssertionError, PyGoogleHttpException, stem.SocketError): result = False return result
def searchGoogle(searchTerm): print '' print 'Searching Google...' print '' googler = pygoogle( searchTerm) # initialize pygoogle object with search term googler.pages = 3 # set max pages print '*********************************' print 'Google Results' print '*********************************' print '' # display google results in a formatted way for keys, values in googler.search().items(): theKey = keys.replace("'", "'") theKey = theKey.replace("&", "&") theValue = values.replace("'", "'") theValue = theValue.replace("&", "&") print 'Title: ' + (theKey.encode('ascii', 'ignore')) print 'URL: ' + (theValue.encode('ascii', 'ignore')) print '' print '' print '*********************************' print ''
def fresh_google_check(link: str, attempt=5, debug=False): """Проверяет, индексировался ли уже ресурс гуглом раньше. чем за 2 недели до сегодня. :param link: :param attempt: :return: """ if debug: return False try: assert isinstance(link, str) today = datetime.date.today() date_s = _date_to_julian_day(today - datetime.timedelta(days=365 * 8)) date_e = _date_to_julian_day(today - datetime.timedelta(days=7 * 2)) query = u'site:%s daterange:%s-%s' % (link, date_s, date_e, ) result = False for i in range(0, attempt): g = pygoogle(query.encode('utf-8'), raise_http_exceptions=True, proxies=settings.PROXIES_FOR_GOOGLING) try: result = bool(g.get_result_count()) except PyGoogleHttpException as e: renew_connection() continue break except (AssertionError, PyGoogleHttpException, stem.SocketError): result = False return result
def searchGoogle(searchTerm): print '' print 'Searching Google...' print '' googler = pygoogle(searchTerm) # initialize pygoogle object with search term googler.pages = 3 # set max pages print '*********************************' print 'Google Results' print '*********************************' print '' # display google results in a formatted way for keys, values in googler.search().items(): theKey=keys.replace("'","'") theKey=theKey.replace("&","&") theValue=values.replace("'","'") theValue=theValue.replace("&","&") print 'Title: ' + (theKey.encode('ascii', 'ignore')) print 'URL: ' + (theValue.encode('ascii', 'ignore')) print '' print '' print '*********************************' print ''
def google_search(search_string): g = pygoogle(search_string) g.pages = 1 results = g.get_urls() try: return results[0] except: return "That was not the word you're looking for"
def google_first_result(googlestring): pygoog = pygoogle(googlestring) pygoog.pages = 1 urls = pygoog.get_urls() try: return urls[0] except IndexError: return "http://www.google.com"
def https_search(url): string_search = "inurl:https site:"+str(url) g = pygoogle(string_search) g.pages = 5 g.hl = "br" print string_search results_numbers = 0 count = 0 temp = 6 # segundos while results_numbers == 0 : results_numbers = g.get_result_count() print "Resultados:",results_numbers print if results_numbers == 0: time.sleep( temp ) count += temp if count > 60: # segundos count = -1 print "Desisto!" break desired_results = 5 search_sites = {} if count == -1: print "Sem estima dos resultados da pesquisa" return 0 elif results_numbers < desired_results: print "Poucos sites!" desired_results = results_numbers while len(search_sites) == 0: search_sites = g.search() print search_sites print for key in search_sites.keys(): #print key, search_sites[key] print unicode(key).encode('cp850'), unicode(search_sites[key]).encode('cp850') if len(search_sites) == 0 or len(search_sites) < desired_results: time.sleep( temp ) count += temp if count > 60: # segundos count = -1 print "Desisto!" break if count == -1: print "Possivel bloqueio do Google" return 0 print "Fim"
def googleSearch ( searchString ): g = pygoogle(searchString) g.pages = 2 urls = g.get_urls() urls = urls[:10] for i in range(len(urls)): urls[i]=unicode(urlnorm.norm(urls[i])) return urls
def getSearchAndDownloadPaper(textToSearch, fileNameToSave): g = pygoogle(textToSearch + ' filetype:pdf') g.pages = 1 try: pdfUrl = g.get_urls()[0] urllib.urlretrieve(pdfUrl, "../pdfdownloaded/" + fileNameToSave) time.sleep(random.randint(30,60)) except IndexError: print fileNameToSave + " " + textToSearch time.sleep(180);
def crackedonpygoogle(passhash, plaintext): #trying this approach from pygoogle import pygoogle googleresult = pygoogle(passhash) #default is for moderate safe search. Probably OK to let this be, since we won't find p**n while googling a password hash. Probably throwing caution (and Rule 34) to the wind here. googleresult.pages = 1 resulturls = googleresult.get_urls() for i in range(0,len(resulturls)): resulturls[i] = str(resulturls[i]) if crackedonweb(passhash, plaintext, resulturls): return True else: return False
def searchtext(self, user_input): searchterms = user_input + ' site:stackoverflow.com' print "Searching:", searchterms g = pygoogle(searchterms) g.pages = 1 urls = g.get_urls() #go through search results for url in urls[:int(len(urls)/4+0.5)]: req = urllib2.Request(url, headers = hdr) try: self.myParser.answers = 0 page = urllib2.urlopen(req) html = page.read() #print html html_fixed = html.replace('>', '3cmr93iwm0c9ri3w0') html_fixed = html_fixed.replace('<','98jdsf98j3oisdf') html_fixed = html_fixed.replace('&','dksljf9w8ejfosidjf') #html_fixed = html_fixed.replace('...',' ') self.myParser.feed(html_fixed) self.snips = self.myParser.snips #print self.snips for x in self.snips: for y in x[0]: print url answer = sublime.ok_cancel_dialog(y.replace('98jdsf98j3oisdf','<').replace('3cmr93iwm0c9ri3w0','>').replace('dksljf9w8ejfosidjf','&')) if answer == 1: self.view.insert(self.editor, self.view.sel()[0].begin(),y.replace('98jdsf98j3oisdf','<').replace('3cmr93iwm0c9ri3w0','>').replace('dksljf9w8ejfosidjf','&')) if self.language in starter: self.view.insert(self.editor, self.view.sel()[0].begin(),"\n\n"+starter[self.language]+'\n'+x[1].replace('98jdsf98j3oisdf','<').replace('3cmr93iwm0c9ri3w0','>').replace('\t',' ').replace('\n','').replace(starter[self.language],' ').replace(ender[self.language],' ').replace('dksljf9w8ejfosidjf','&')+'\n'+\ ender[self.language]+"\n\n") else: self.view.insert(self.editor, self.view.sel()[0].begin(),"/*"+x[1].replace('98jdsf98j3oisdf','<').replace('3cmr93iwm0c9ri3w0','>').replace('\t',' ').replace('\n','').replace('dksljf9w8ejfosidjf','&')+\ '*/'+"\n\n") self.myParser.snips = [] self.myParser.curr_snips = [] self.myParser.curr_snip = '' self.myParser.curr_comment = '' self.snips = [] break else: continue break else: self.myParser.snips = [] continue break except urllib2.HTTPError,e: print e.fp.read()
def google(self, args, irc): '''(google [search term]) -- Return the top Google result for the term searched. ''' try: g = pygoogle(u' '.join(args)) g.pages = 1 for title, descr in g.search().iteritems(): reply = u'{} | {}'.format(descr.strip(), title.strip()) return reply except: log.err('[Error]: Google {}'.format(sys.exc_info()[0])) return '[Error]: Cannot contact Google API.'
def fresh_google_check(link): ''' Проверяет, индексировался ли уже ресурс гуглом раньше чем за 2 недели до сегодня. ''' sleep(random.random()) today = datetime.date.today() date_s = date_to_julian_day( today - datetime.timedelta(days=365 * 8) ) date_e = date_to_julian_day( today - datetime.timedelta(days=7 * 2) ) query = u'site:%s daterange:%s-%s' % (link, date_s, date_e,) g = pygoogle(query.encode('utf-8')) g.pages = 1 return bool(g.get_result_count())
def findUrl(key, numPages, save = False): g = pygoogle(key) g.pages = numPages links = g.get_urls() if save: try: f = open("links.txt","w") for link in links: f.write(link+"\n") f.close() except IOError: print "cannot open new file" else: return links
def google_query(query): g = pygoogle(query) g.pages = 1 g.rsz = 4 results = {} results = g.search() rl = results.keys() print rl s = rl[0] s.encode('utf-8') return s
def google_query(query): g = pygoogle(query) g.pages=1 g.rsz = 4 results = {} results = g.search() rl = results.keys() print rl s = rl[0] s.encode('utf-8') return s
def search(self, group, filename, destination): movie_name = getTitle(group['library']) movienorm = unicodedata.normalize('NFKD', movie_name).encode('ascii','ignore') movie_year = group['library']['year'] searchstring=movienorm+' '+ str(movie_year) +' bande annonce vf HD' time.sleep(3) log.info('Searching google for: %s', searchstring) g = pygoogle(str(searchstring)) diclist = g.search() urllist = g.get_urls() cleanlist=[] for x in urllist: if 'youtube' in x or 'dailymotion' in x: cleanlist.append(x) if cleanlist: bocount=0 for bo in cleanlist: if bocount==0: tempdest=unicodedata.normalize('NFKD', os.path.join(rootDir,filename)).encode('ascii','ignore')+u'.%(ext)s' dest=destination+u'.%(ext)s' log.info('Trying to download : %s to %s ', (bo, tempdest)) p=subprocess.Popen([sys.executable, 'youtube_dl/__main__.py', '-o',tempdest,'--newline', bo],cwd=rootDir, shell=False, stdout=subprocess.PIPE,stderr=subprocess.PIPE) while p.poll() is None: l = p.stdout.readline() # This blocks until it receives a newline. lmsg= l.replace('%',' percent')+' '+filename log.info(lmsg) # When the subprocess terminates there might be unconsumed output # that still needs to be processed. (out, err) = p.communicate() outmsg='Out for '+filename +' : '+out errmsg='Err for '+filename +' : '+err if out: log.info(outmsg) if err: log.info(errmsg) continue else: listetemp=glob.glob(os.path.join(rootDir,'*')) for listfile in listetemp: if unicodedata.normalize('NFKD', filename).encode('ascii','ignore') in listfile: ext=listfile[-4:] finaldest=destination+ext shutil.move(listfile, finaldest) bocount=1 log.info('Downloaded trailer for : %s', movienorm) return True else: return False
def giveSong(user, sortedtweets, mood, p): #returns a (user, mood, newsongslist) tuple #get latest p tweets tweettexts = [x.text for x in sortedtweets[-p:]] #use a filtered list of sentiment terms from text + mood for text in tweettexts: words = [strip(x) for x in text.split()] searchterm = mood for word in words: if word in sentimentlexicon: searchterm += ' ' + word print 'searching for: ' + searchterm if len(searchterm) > 0: search = pygoogle.pygoogle(searchterm) urls = [x for x in search.get_urls() if isYoutube(x)] if len(urls) > 0: return urls[0]
def autoupdate(request): search=request.POST['search'] db = MySQLdb.connect(host="127.0.0.1",user="******", passwd="root", db="nutch") cur = db.cursor() cur.execute("insert into web select * from webpage") cur.execute("truncate table webpage") os.chdir("/root/nutch/runtime/local") g = pygoogle(search) g.pages = 1 x=g.get_urls() dataFile = open('urls/seed.txt','w') for eachitem in x: dataFile.write(str(eachitem)+'\n') dataFile.close() os.system("bin/nutch crawl urls -depth 3 -topN 5") return HttpResponseRedirect("/")
def fetch_google_results(self): print "Searching Google" search = pygoogle(self.query) results = search.get_urls()[:10] #Only get the first 10 results for result in results: print "Google Result: " + str(result) if ( self.skip_specific_websites(result) == True ): continue time = datetime.now().time() score,code = self.calculate_BM25_score(result) if (not (score == None)) and (code == 200) and (self.is_illegal_folder(result) == False) and (self.is_illegal_extension(result) == False) : self.urls.put((score,(str(result),1))) #All google results are at depth 1 with google.com being at depth 0 self.write_to_file(result,score,int(1),code,time) self.pages_crawled += 1
def getpost(name): searcht = name results = pygoogle(searcht + ' wikipedia') results.pages = 1 links = results.get_urls() url = links[0] wikititle = arttitle(url) #Check if last char is a ) and fix link if needed if url[-1] == ')': url = url[:-1] url+='\)' print ('#Here is a Wikipedia link to [' + wikititle + '](' + url + ').\n\n^This ^message ^was ^created ^by ^a ^[bot](http://www.reddit.com/r/wikime/comments/1vweq5/what_is_this_bot/).') return ('#Here is a Wikipedia link to [' + wikititle + '](' + url + ').\n\n^This ^message ^was ^created ^by ^a ^[bot](http://www.reddit.com/r/wikime/comments/1vweq5/what_is_this_bot/).')
def main(query): g = pygoogle(query) g.pages = 1 # Get one page of results linkFile = open(os.path.join('data', 'linkFile'),'w') # Store all search URLs count=0 cleaner = re.compile('\[.*?\]') for url in g.get_urls(): linkFile.write(url+'\n') target = open(os.path.join('data', query+'_'+str(count)+'.txt' ),'w') # Create corresponding 'query_' + count filename #target.write(get_text(url).encode('ascii','ignore')) text = (get_text(url).encode('ascii','ignore')) for line in text.split('\n'): if len(line) > 600: line = re.sub(cleaner,'',line) if line[len(line)-1] == '.': target.write(line+'\n\n') else: target.write(line+'.\n\n') count = count + 1
def _google_search(self, songs): for song in songs: print '''fetching ''' + song googsearch = pygoogle(song + ' site:youtube.com/watch') googsearch.pages = int(self._Entry2.get()) namelist = [] index = 0 for url in googsearch.get_urls(): if self._CheckVar1.get() > 0 and index > 0: namelist.append(url) else: try: video = pafy.new(url) namelist.append(video.title + ''' : ''' + video.author) index = index+1 except IOError: namelist.append("Video Not Available") self.songlist.append((namelist,googsearch.get_urls())) self.selectedurllist.append(0)
def fetch_google_results(self): print "Searching Google" search = pygoogle(self.query) results = search.get_urls()[:10] #Only get the first 10 results for result in results: print "Google Result: " + str(result) if (self.skip_specific_websites(result) == True): continue time = datetime.now().time() score, code = self.calculate_BM25_score(result) if (not (score == None)) and (code == 200) and ( self.is_illegal_folder(result) == False) and (self.is_illegal_extension(result) == False): self.urls.put( (score, (str(result), 1)) ) #All google results are at depth 1 with google.com being at depth 0 self.write_to_file(result, score, int(1), code, time) self.pages_crawled += 1
def search(self, search): g = pygoogle(search) g.pages = 5 searchNot = self.mapKeeper.searchNot(search.replace(" ", "_")) results = g.get_urls() print "number of results: ", len(results) for url in results: base_url = url req = urllib2.Request(url, headers=self.hdr) try: response = urllib2.urlopen(req) print "Processing: ", url except (UnicodeEncodeError, urllib2.HTTPError, urllib2.URLError, socket.error, httplib.BadStatusLine), e: print "Error when opening url -> " + url + ": ", e continue page = BeautifulSoup(response, "lxml") images = page.select("img[alt]") for image in images: if search in image.get("alt").lower(): imageURL = image.get("src") imageURL = urlparse.urljoin(base_url, imageURL) if imageURL in searchNot: print "Image is in searchNot: ", imageURL continue try: imgdata = urllib2.urlopen(imageURL) except urllib2.HTTPError, e: print "Error: " + imageURL + ":", e.code self.mapKeeper.addNot(search.replace(" ", "_") + " " + imageURL) continue except urllib2.URLError, e: print "Error: " + imageURL + ":", e.args self.mapKeeper.addNot(search.replace(" ", "_") + " " + imageURL) continue image_type, width, height = getimageinfo.getImageInfo(imgdata) if image_type == " " or (width < 200 and height < 200): print "Image Invalid: ", imageURL self.mapKeeper.addNot(search.replace(" ", "_") + " " + imageURL) continue print "image type:", image_type, "width:", width, "height:", height return imageURL
def main(): urls = [] if len(sys.argv) > 1 and sys.argv[1] == '-csv': with open(sys.argv[2], 'rb') as csvfile: csvreader = csv.reader(csvfile) csvarray = [] for row in csvreader: csvarray.append(row) for row in csvarray: for song in row: googsearch = pygoogle(song + ' site:youtube.com/watch') googsearch.pages = 1 ytlink = googsearch.get_urls()[0] urls.append(ytlink) if len(sys.argv) > 2: scrape(urls, sys.argv[2]) else: scrape(urls) elif len(sys.argv) > 1 and (sys.argv[1] == '-h' or sys.argv[1] == '-help'): print '''Youtube Song Downloader Usage examples: OPEN GUI python pysonggui.py DOWNLOAD FROM CSV FILE python pysonggui.py -csv <csv filename> [download directory] DISPLAY HELP python pysonggui.py -h''' else: Root = Tk() Pmw.initialise(Root) import Tkinter del Tkinter App = pysonggui(Root) App.pack(expand='yes',fill='both') Root.geometry('1000x480+10+10') Root.title('Python Youtube Downloader') Root.mainloop()
def getNPages(searchterms, N, verbose): myParser = MyHTMLParser() myParser.verbose = verbose len_modifer = 0 searchterms += " site:stackoverflow.com" print "Searching:", searchterms g = pygoogle(searchterms) modifer = 0 if N < 1: g.pages = 1 else: g.pages = N urls = g.get_urls() # can do less than a page too! if N < 1: urls = urls[: max([int(len(urls) * N), 1])] # go through search results for url in urls: req = urllib2.Request(url, headers=hdr) try: myParser.answers = 0 page = urllib2.urlopen(req) html = page.read() # IDs for unusual characters myParser.feed(IDsIn(html)) snips = myParser.snips # print snips for x in snips: comment = IDsOut(x[1]) for y in x[0]: yield [termFix(IDsOut(y)), comment, url] myParser.code_flag = 0 myParser.curr_snip = "" myParser.curr_snips = [] myParser.curr_comment = "" myParser.snips = [] myParser.answers = 0 except urllib2.HTTPError, e: print e.fp.read()
def main(search_root, pages=1, word=None): """ - search_root : the first request on Google - word : word to search in sentence - pages[1-5] : number of google pages results """ list_sentences = [] list_soup = [] list_url = [] # Initialising research research = pygoogle(search_root) # Define page number research.pages = int(pages) # Getting URLs url_list = research.get_urls() # Parsing for url in url_list: list_soup.append(get_soup(url))
from pygoogle import pygoogle g = pygoogle('cisco') g.pages = 1 x = g.results() print x
def main(input=input, *args): response = None choose = False choice = "" YorN = None words = [''] link = 0 more = 0 more2 = 0 more3 = 0 doChunk = True responseChunks = [] url = 'https://en.wikipedia.org/wiki/Main_Page' global droid, prompt, tts exec('with open(storageFile) as file: list1 = file.readlines()' ) #in locals(),globals() #### MAIN LOOP: quit = False verbose = True #False# #while response is not "": while quit is not True: try: if verbose: print 0 ################### input and convert to list of words print 'input1=', repr( input), "response1=", response #, "choice=",choice while input == "" or not input or input is None: # input = droid.recognizeSpeech().result # if not response: print 'noresponse'; input = droid.recognizeSpeech().result#exec(channel) # if choose: print 'choose'; prompt = choice; choice = droid.recognizeSpeech().result; input="choose"#exec(channel) # if not choose and response: input = droid.recognizeSpeech().result # prompt = response+'>'; exec(channel) if response is None: prompt = '>' exec(channel) #if not choose: prompt = '>'; exec(channel) if choose: print 'choose' prompt = choice exec(channel) choice = input.strip('\r') input = '' print choice break #print 1 if input is None: time.sleep(7) print 'input is None' input = "" exec(channel) #else: print "input2=",input; if verbose: print 1 input = input.strip('\r') #if input == 'set': continue # if input == 'loop': response = mainLoop() # run=True; tts=False # global response; reponse = True # code='';i=0 # input = raw_input('yes?\n').strip('\r'); print repr(input) #input = input.strip('\r'); print repr(input) for index, item in enumerate(list1): try: exec(list1[index]) #print i;i=i+1 except Exception, e: pass #print 'err', str(e) try: words = input.split(' ') except: pass if verbose: print 2 #### set context(s) '''if context: phrase2 = raw_input(str(context)+ ' is ') context['action'] = phrase2; context = None print dctn[df[0]]['action'] #confirm = raw_input('confirm?') #if confirm == 'y': context = confirm; context = None; input ="okay"''' ################# direct commands # if input == 'quit': response = "" if input == 'quit' or input == 'q' or input == 'end' or input == 'exit': break if input == 'load': exec( 'with open(storageFile) as file: list1 = file.readlines()') if input == 'dump': exec( 'with open(storageFile, "wb") as file: file.writelines(list1)' ) if input == 'save': PBcreateBranch() break if input == 'dctn': response = str(dctn) print response, dctn continue if input == 'done': choose = False if verbose: print 3 # print 3################### keyword based commands ######## parsing phrase # if ' is ' in input and not 'what is ' in input and not words[0] == 'is': # df = input.split(' is ') #definition # try: dctn[df[0]] = df[1] # except: print 'error, not entered' #dctn[df[0]]=[df[1]] # if df[1] == 'action': # dctn[df[0]]={'action':''} # response = 'how '+ df[0] +"?" # context = dctn[df[0]] # response = 'okay' # if ' is not ' in input: # split= input.split(' is not ') #remove definition # try: dctn[split[0]].remove(split[1]) # except: pass ###### question if '?' in input: input = input.strip('?') if 'what is' in input: q = input.split('what is ') # print dctn[q[1]] if q[1] in dctn: response = dctn[q[1]] else: try: input = "search " + q[1] except: response = q[1] + ' is not known' ###### google if 'search' in input: try: query = input.replace('search ', '') print "searching.. " + query from pygoogle import pygoogle g = pygoogle(query) g.pages = 1 results = g.__search__() #choose=True; response = results[link]['content'] #response = repr(response) response = response.encode('ascii', 'ignore').replace('\n', '') url = list(results[link]['url'])[0] print url # response.encode('ascii', 'ignore'); doChunk = False except Exception, e: print str(e) # print str(results) print response if verbose: print 5 # print 5######## browse if choose: print 'chooseTrue' if choice == 'next': link = link + 1 print 'link=', link response = results[link]['content'] #response = repr(response) response.encode('ascii') print choice if choice == 'go': try: response = " ".join(go(url)) except Exception, e: print str(e) input = raw_input('pause')
def search(nome): dis = pygoogle(nome) dis.pages = 1 print('dis is', dis.cont()) result = dis.cont() return result
def google_search(keyword): p = pygoogle(keyword) p.pages = 1 result = p.search().items()[0] return result[0] + " - " + result[1]
def pygoogle_test(self): g = pygoogle()
#choose 3 keywords from random import randint import os import subprocess from pygoogle import pygoogle num_lines = sum(1 for line in open('strippedkws')) print("%s lines in keyword file" % num_lines) a = (randint(0, (num_lines))) b = (randint(0, (num_lines))) c = (randint(0, (num_lines))) f = open('strippedkws') lines = f.readlines() a = (lines[a]) b = (lines[b]) c = (lines[c]) print("Searching for...") for i in a, b, c: print(i) g = pygoogle(i) g.pages = 5 print '*Found %s results*' % (g.get_result_count()) g.get_urls() g.get_urls() g.display_results()
from pygoogle import pygoogle import wikipedia searchTarget = raw_input() g = pygoogle(searchTarget) g.pages = 1 #g.display_results() urls = [] urls = g.get_urls() urlwiki = "" for url in urls: if url.find("wikipedia") == -1: continue else: urlwiki = url break if(urlwiki != ""): urlparse = urlwiki.rstrip().split("/") wikipage = wikipedia.page(urlparse[-1]) print(wikipedia.summary(wikipage.title, sentences=1))
def khoj(request): #form = PostForm(request.POST) #s_term = request.POST.get('s_term',request.GET.get('s_term',None)) s_term = PostForm(request.POST) #s_term = forms.CharField(error_messages = my_default_errors) top_10 = {} dummy = OrderedDict() dummy2 = OrderedDict() if request.POST: s_term = PostForm(request.POST) dummy = OrderedDict() #dummy2.clear() if s_term.is_valid(): c_dummy2 = "" print s_term.cleaned_data #s_term.save() if s_term.cleaned_data.values()[0] in infereddic2.keys(): c_dummy = infereddic2[s_term.cleaned_data.values()[0]] print type(c_dummy) c_dummy2 = " ".join(c_dummy) else: infereddic2.setdefault(s_term.cleaned_data.values()[0], []) #if s_term.cleaned_data.values()[0] in infereddic.keys(): # dummy = infereddic[s_term.cleaned_data.values()[0]] #else : # infereddic.setdefault(s_term.cleaned_data.values()[0], {}) #print infereddic #print type(c_dummy) print ";;;;;;;;;;;;;;;;;;;;" print s_term.cleaned_data.values()[0] + " " + c_dummy2 result = pygoogle(s_term.cleaned_data.values()[0] + " " + c_dummy2) result.pages = 2 top_10 = {} n = 0 print "dummmmmmmyy" print dummy for k, v in result.search().iteritems(): if n < 10: if k in dummy.keys(): n += 1 else: top_10[k] = v n += 1 else: break print "top_100->" print top_10 #if dummy: # # dummy2 = OrderedDict(dummy.items()+top_10.items()) # print "dummyyy" #print dummy #print "2" #print dummy2 #else: # dummy2 = top_10 if 'select-id' in request.POST: selected_ids = request.POST.getlist('select-id', []) text1 = [] for i in selected_ids: r_open = urllib.urlopen(i[:-1]).read() soup = BeautifulSoup(r_open) text1.append(soup.title.string) stopwords = nltk.corpus.stopwords.words('english') print stopwords[:10] from nltk.stem.snowball import SnowballStemmer stemmer = SnowballStemmer('english') def tokenize_and_stem(text): # first tokenize by sentence, then by word to ensure that punctuation is caught as it's own token tokens = [ word for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent) ] #print tokens filtered_tokens = [] # filter out any tokens not containing letters (e.g., numeric tokens, raw punctuation) for token in tokens: if re.search('[a-zA-Z]', token): filtered_tokens.append(token) stems = [stemmer.stem(t) for t in filtered_tokens] #print stems[:20] return stems def tokenize_only(text): # first tokenize by sentence, then by word to ensure that punctuation is caught as it's own token tokens = [ word.lower() for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent) ] filtered_tokens = [] # filter out any tokens not containing letters (e.g., numeric tokens, raw punctuation) for token in tokens: if re.search('[a-zA-Z]', token): filtered_tokens.append(token) #print filtered_tokens[:20] return filtered_tokens totalvocab_stemmed = [] totalvocab_tokenized = [] for i in text1: allwords_stemmed = tokenize_and_stem( i) #for each item in 'synopses', tokenize/stem totalvocab_stemmed.extend( allwords_stemmed ) #extend the 'totalvocab_stemmed' list allwords_tokenized = tokenize_only(i) totalvocab_tokenized.extend(allwords_tokenized) vocab_frame = pd.DataFrame({'words': totalvocab_tokenized}, index=totalvocab_stemmed) print 'there are ' + str( vocab_frame.shape[0]) + ' items in vocab_frame' from sklearn.feature_extraction.text import TfidfVectorizer #define vectorizer parameters tfidf_vectorizer = TfidfVectorizer(max_df=0.8, max_features=200000, min_df=0.2, stop_words='english', use_idf=True, tokenizer=tokenize_and_stem, ngram_range=(1, 3)) tfidf_matrix = tfidf_vectorizer.fit_transform( text1) #fit the vectorizer to synopses print(tfidf_matrix.shape) terms = tfidf_vectorizer.get_feature_names() from sklearn.metrics.pairwise import cosine_similarity dist = 1 - cosine_similarity(tfidf_matrix) from sklearn.cluster import KMeans num_clusters = 1 km = KMeans(n_clusters=num_clusters) km.fit(tfidf_matrix) clusters = km.labels_.tolist() print clusters #from __future__ import print_function order_centroids = km.cluster_centers_.argsort()[:, ::-1] print "-------start--------------------------------------------" for i in range(num_clusters): print "Cluster %d words:" % i for ind in order_centroids[ i, :3]: #replace 6 with n words per cluster print "---------------------this----------------------------------" print ' %s' % vocab_frame.ix[terms[ind].split( ' ')].values.tolist()[0][0].encode( 'utf-8', 'ignore') infereddic2[s_term.cleaned_data.values()[0]].append( vocab_frame.ix[terms[ind].split( ' ')].values.tolist()[0][0].encode( 'utf-8', 'ignore')) #infereddic2[s_term.cleaned_data.values()[0]].append(vocab_frame.ix[terms[ind].split(' ')].values.tolist()[0][0].encode('utf-8', 'ignore')[0])]) #---------------------------------------------------------------------------- #for i in selected_ids: #print "This is id"+i # for k,v in dummy2.iteritems(): #print k+"-------"+v # if v==i[:-1]: #print v # infereddic[s_term.cleaned_data.values()[0]][k]=v #print k print "-------" #print infereddic print "==========" #print selected_ids #print s_term.cleaned_data return render_to_response('khoj.html', { 'form': s_term, 'result': top_10 }, context_instance=RequestContext(request)) '''if request.POST:
'metronews', 'nowtoronto', 'torontoist', 'blogto', 'cbc', '680news', 'citynews' ] subreddit = r.get_subreddit('toronto') for submission in subreddit.get_new(limit=5): # print submission.title print('start submissions') # If we haven't replied to this post before if (submission.id not in posts_replied_to) and ( 'reddit' not in submission.url) and ('imgur' not in submission.url): print('gathered submission') # Reply to the post final_message = 'Hi there! This is the BetterNewsForToronto bot!\n\nI\'m here to provide some information related to this post. Below are a few relevant links from other news sources. (Links are not guaranteed to be news articles...sorry! Bot results depend on the post\'s title.)' g = pygoogle( 'Photo Album: Anti Uber protest at Nathan Phillips Square shut down by police' ) g.pages = 3 gDict = g.search() gTitles = gDict.keys() linkCount = 0 index = 0 print('Title1: ' + gTitles[0]) while (linkCount < 5): if (index >= len(gTitles)): break compURL = gDict[gTitles[index]] if (submission.url not in compURL) and ('reddit' not in compURL) and any(
def main(input=input, *args): response = 'hi' choose = False choice = "go" YorN = None words = [''] chunk = 0 link = 0 global droid, prompt #### MAIN LOOP: while response is not "": ################### input and convert to list of words print 'input1=' + input, "response1=" + response #, "choice="+choice while input == "" or input == 'nospeech' or input is None: input = droid.recognizeSpeech().result if not response: print 'noresponse' input = droid.recognizeSpeech().result #exec(channel) if choose: print 'choose' prompt = choice choice = droid.recognizeSpeech().result input = "choose" #exec(channel) if not choose and response: input = droid.recognizeSpeech( ).result # prompt = response+'>'; exec(channel) if input is None: time.sleep(7) input = "" #print 2 #exec(channel) else: print "input2=", input #exec('print 2') # if input is None: # prompt = response+'>' # input = raw_input('>') try: words = input.split(' ') except: pass #### set context(s) '''if context: phrase2 = raw_input(str(context)+ ' is ') context['action'] = phrase2; context = None print dctn[df[0]]['action'] #confirm = raw_input('confirm?') #if confirm == 'y': context = confirm; context = None; input ="okay"''' ################### direct commands if input == 'quit': response = "" if input == 'save': PBcreateBranch() break if input == 'dctn': response = str(dctn) print response, dctn continue if input == "hi": response = 'hello' if prompt == 'anything else? (yes/no)>': if YorN == 'yes': pass if YorN == 'no': break ################### keyword based commands ########## definitions if ' is ' in input and not 'what is ' in input and not words[0] == 'is': df = input.split(' is ') #definition try: dctn[df[0]] = df[1] except: print 'error, not entered' #dctn[df[0]]=[df[1]] if df[1] == 'action': dctn[df[0]] = {'action': ''} response = 'how ' + df[0] + "?" context = dctn[df[0]] response = 'okay' #continue if ' is not ' in input: split = input.split(' is not ') #remove definition try: dctn[split[0]].remove(split[1]) except: pass ######## question if '?' in input: input = input.strip('?') if 'what is' in input: q = input.split('what is ') #print dctn[q[1]] if q[1] in dctn: response = dctn[q[1]] else: try: input = "search " + q[1] except: response = q[1] + ' is not known' ######## google if 'search' in input: query = input.replace('search ', '') print "searching " + query from pygoogle import pygoogle g = pygoogle(query) g.pages = 1 results = g.__search__() #print str(results) choose = True response = results[link]['content'] #response = repr(response) response.encode('ascii') #response.encode('ascii', 'ignore'); ################################################################################################################################## if choose: print 'chooseTrue' if choice == 'next': link = link + 1 print 'link=', link response = results[link]['content'] #response = repr(response) response.encode('ascii') if choice == 'go': br = mechanize.Browser() br.set_handle_robots(False) br.addheaders = [( 'User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1' )] page = br.open(url) response = page.read() soup = BeautifulSoup(response, "html.parser") #paras=soup.p #findAll('p', text=True) VALID_TAGS = ['p', 'span'] #, 'ul', 'li', 'br']'div', paras = [ i.text.encode('ascii', "ignore") for i in soup.find_all(VALID_TAGS) ] ################## removes <p>s paras = filter(None, paras) paras = [ i.replace('\n', '.').replace('\r', '.') for i in paras ] paras = [ i.replace('(', 'parens').replace(')', 'parens').replace( '[', 'bracket').replace(']', 'bracket') for i in paras ] input = raw_input('pause') ######## actions if 'e' in input: exec1 = input.split('e ') #exec try: exec(exec1[1]) continue except Exception, e: print str(e) if 'do' in input: #action try: exec(dctn[words[1]]['action'] + ' "' + str(''.join(words[2:99])) + '"') continue except Exception, e: print str(e)
from __future__ import print_function from pygoogle import pygoogle g = pygoogle('barsha biswas') g.pages = 1 g.display_results()
def search(searchfor): g = pygoogle(searchfor) return g.get_result_count()
from pygoogle import pygoogle g = pygoogle('quake 3 arena') g.pages = 5 print '*Found %s results*' % (g.get_result_count()) g.get_urls()
else: usermagento = args.user passmagento = args.pwd if args.dork == None: nada = '' print " Usage: python letmefuckit.py --dork <dork> [options]" else: saveresults = open("urls.txt", "w") print " Searching for: ", args.dork print " Total of google pages to process: ", args.pages print " Save results is ", save print '\n Initializing...' g = pygoogle(args.dork) g.pages = 5 print ' [* Found %s results in search engine *]\n' % (g.get_result_count()) urles = g.get_urls() for n, elem in enumerate(urles): url = '{1}\n'.format(n, elem) saveresults.write(url) saveresults.close() print "\n" print "--------------------------" print " Right! Analysing data...." print "--------------------------" print "\n" print "Possible targets found...\n" text_file = open("C:\exploit\urls.txt", "r") for line in text_file:
import re, urllib, random, webbrowser, urllib2, sys import json import mechanize from pygoogle import pygoogle #phishListParsed = [] #json_list = open('seed.json', 'rb') #phishList = json.load(json_list) #for row in phishList: # phishListParsed.append(row['url']) phishBank = [] done = 0 try: g = pygoogle('sign up email list') br = mechanize.Browser() br.set_handle_robots(False) # ignore robots br.set_handle_refresh(False) response = br.open(crawl) print crawl br.form = list(br.forms())[0] for control in br.form.controls: if control.type == "text": # means it's class ClientForm.TextControl control.value = "*****@*****.**" response = br.submit() print response print("SUCCCCCCESSSSSSSSS") except Exception as e: print("Broken link to %s" % crawl) print(type(e))
"--output-only", dest="outFile", default=False, help="Only save results to the given file, No further action.") parser.add_option( "-d", "--debug", dest="debugRun", default=False, help="Debug the tool without poisoning or exploiting the host") (options, args) = parser.parse_args() fk = 7 if options.debugRun not in ["t", "True", "true", "TRUE"]: g = pygoogle(options.theDork) g.pages = options.numPages #Set to the number of result url pages to return from google fk = g.get_result_count() print '*Found %s results*' % (fk) # Handle case where we want to use the host for urls = g.get_urls() else: print '*Debug test %s*' % (fk) urls = ["http://localhost:8087"] # Handle case where we only want the results to a file. if options.outFile in ["t", "True", "true", "TRUE"]: file = open(options.outFile, "w") for url in urls: file.write("%s\n" % url) print "File saved to: %s" % options.outFile