def printLinks(url): ab = anonBrowser() ab.anonymize() page = ab.open(url) html = page.read() try: print '[+] Printing Links From Regex.' link_finder = re.compile('href="(.*?)"') links = link_finder.findall(html) for link in links: print link except: pass try: print '\n[+] Printing Links From BeautifulSoup.' soup = BeautifulSoup(html) links = soup.findAll(name='a') for link in links: if link.has_key('href'): print link['href'] except: pass
def printLinks(url): ab = anonBrowser() ab.anonymize() page = ab.open(url, 'rb') html = page.read() try: print('==========================================') print('[+] Printing Links From Regex.') link_finder = re.compile('href="(.*?)"') links = link_finder.findall(str(html, encoding='UTF-8')) print(type(links)) for link in links: print(link) except Exception as e: print("re find error: " + e) pass try: print('===========================================') print('\n[+] Printing Links From BeautifulSoup.') soup = BeautifulSoup(html) links = soup.findAll(name='a') for link in links: if link.has_key('href'): print(link['href']) except Exception as e: print("re find error: " + e) pass
def print_links(url): ab = anonBrowser() ab.anonymize() page = ab.open(url) html = page.read() try: print("[+] Printing Links From Regex.") link_finder = re.compile('href="(.*?)"') print("---isshe----1---") links = link_finder.findall(html) #str(html)) print("---isshe----2---") for link in links: print(link) except Exception as e: print("Unexpected error:", e) #sys.exc_info()[0]) exit(0) try: print("\n[+] Printing Links From BeautifulSoup.") soup = BeautifulSoup(str(html), features="html5lib") links = soup.findAll(name='a') for link in links: if link.has_key('href'): print(link["href"]) except: print("Unexpected error:", sys.exc_info()[0])
def printLinks(url): ab = anonBrowser() ab.anonymize() page = ab.open(url) html = page.read() try: print('[+] Printing Links From Regex.') link_finder = re.compile('href="(.*?)"') links = link_finder.findall(html) for link in links: print(link) except: pass try: print('\n[+] Printing Links From BeautifulSoup.') soup = BeautifulSoup(html) links = soup.findAll(name='a') for link in links: if 'href' in link: print(link['href']) except: pass
def google(search_term): ab = anonBrowser() search_term = urllib.parse.quote_plus(search_term) response = ab.open('http://ajax.googleapis.com/'+\ 'ajax/services/search/web?v=1.0&q='+ search_term) print(response.text)
def google(search_term): ab = anonBrowser() search_term = urllib.quote_plus(search_term) response = ab.open('http://ajax.googleapis.com/'+\ 'ajax/services/search/web?v=1.0&q='+ search_term) print response.read()
def google(search_term): ab = anonBrowser() search_term = urllib.quote_plus(search_term) response = ab.open('http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=' + search_term) objects = json.load(response) print objects
def printLinks(url): ab = anonBrowser() ab.anonymize() page = ab.open(url) html = page.read() try: print '[+] printing links from regex' link_finder = re.compile('href="(.*?)"') links = link_finder.findall(html) for link in links: print link except: pass try: print '\n[+] printing links from beautifulsoup' # 아래 명령어로 html 코드를 긁어온다 soup = BeautifulSoup(html) links = soup.findAll(name='a') for link in links: if link.has_key('href'): print link['href'] except: pass
def google(search_term): ab = anonBrowser() search_term = urllib.parse.quote_plus(search_term) response = ab.open('https://developers.google.com/custom-search' + search_term) objects = response.text results = [] print(objects)
def google(search_term): ab = anonBrowser() search_term = urllib.quote_plus(search_term) response = ab.open('http://ajax.googleapis.com/'+\ 'ajax/services/search/web?v=1.0&q='+ search_term) objects = json.load(response) print objects
def google(search_term): ab = anonBrowser() # URL编码 search_term = urllib.quote_plus(search_term) response = ab.open( 'https://www.googleapis.com/customsearch/v1?key=AIzaSyCn_IE6NM_ATjZ0j5vfXIFlyW-EpGs5gsU&cx=006431901905483214390:i3yxhoqkzo0&num=1&alt=json&q=' + search_term) objects = json.load(response) print objects
def get_tweets(handle): query = urllib.parse.quote_plus('from:' + handle+\ ' since:2009-01-01 include:retweets') tweets = [] browser = anonBrowser() browser.anonymize() response = browser.open('http://search.twitter.com/'+\ 'search.json?q='+ query) json_objects = response.text print(json_objects)
def google(search_term): ab = anonBrowser() search_term = urllib.quote_plus(search_term) response = ab.open('http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=' + search_term) objects = json.load(response) results = [] for result in objects['responseData']['results']: url = result['url'] title = result['titleNoFormatting'] text = result['content'] new_gr = Google_Result(title, text, url) results.append(new_gr) return results
def printLinks(url): ab=anonBrowser() ab.anonymize() page=ab.open(url) html=page.read() try: soup=BeautifulSoup(html) links = soup.findAll(name='a') for link in links: if link.has_key('href'): print link['href'] except: pass
def query_twitter(self, query): query = urllib.quote_plus(query) results = [] browser = anonBrowser() response = browser.open('http://search.twitter.com/search.json?q='+query) json_objects = json.load(response) for result in json_objects['results']: new_result = {} new_result['from_user'] = result['from_user_name'] new_result['geo'] = result['geo'] result['tweet'] = result['text'] results.append(new_result) return results
def google(search_term): ab = anonBrowser() search_term = urllib.quote_plus(search_term) response = ab.open("http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=" + search_term) objects = json.load(response) results = [] for result in objects["responseData"]["results"]: url = result["url"] title = result["titleNoFormatting"] text = result["content"] new_gr = Google_Result(title, text, url) results.append(new_gr) return results
def printLinks(url): ab = anonBrowser() ab.anonymize() page = ab.open(url) html = page.read() try: print 'Printing links from Regex' link_finder = re.compile('href="(.*?)"') links = link_finder.findall(html) for link in links: print link except: pass
def query_twitter(self, query): query = urllib3.quote_plus(query) results = [] browser = anonBrowser() response = browser.open('https://search.twitter.com/search.json?q=' + query) json_objects = json.load(response) for result in json_objects['results']: new_result = {} new_result['from_user'] = result['from_user_name'] new_result['geo'] = result['geo'] new_result['tweet'] = result['text'] results.append(new_result) return results
def get_tweets(self): query = urllib.quote_plus('from:' + self.handle + ' since:2009-01-01 include:retweets') tweets = [] browser = anonBrowser() browser.anonymize() response = browser.open('http://search.twitter.com' + 'search.json?q=' + query) json_objects = json.load(response) for result in json_objects['results']: new_result = {} new_result['from_user'] = result['from_user_name'] new_result['geo'] = result['text'] tweets.append(new_result) return tweets
def get_tweets(self): query = urllib.quote_plus('from:' + self.handle+' since:2009-01-01 include:retweets') tweets = [] browser = anonBrowser() browser.anonymize() response = browser.open('http://search.twitter.com/'+'search.json?q='+query) json_objects = json.load(response) for result in json_objects['results']: new_result = {} new_result['from_user'] = result['from_user_name'] new_result['geo'] = result['geo'] new_result['tweet'] = result['text'] tweets.append(new_result) return tweets
def mirrorImages(url, dir): ab = anonBrowser() ab.anonymize() html = ab.open(url) soup = BeautifulSoup(html) image_tags = soup.findall('img') for image in image_tags: filename = image['src'].lstrip('https://') filename = os.path.join(dir, filename.replace('/', '_')) print('[+] Saving ' + str(filename)) data = ab.open(image['src']).read() ab.back() save = open(filename, 'wb') save.write(data) save.close()
def mirrorImages(url , dir): ab=anonBrowser() ab.anonymize() html=ab.open(url) soup=BeautifulSoup(html) image_tags=soup.findAll('img') for image in image_tags: filename=image['src'].lstrip('http://') filename=os.path.join(dir,filename.replace('/','_')) print '[+] Saving'+str(filename) data= ab.open(image['src']).read() ab.back() save=open(filename,'wb') save.write(data) save.close()
def printLinks(url): ab = anonBrowser() ab.anonymize() page = ab.open(url) html = page.read() try: print '[+] Printing Links From BeautifulSoup.' soup = BeautifulSoup(html) links = soup.findAll(name='a') for link in links: if link.has_key('href'): print link['href'] except: pass
def mirrorImages(url, dire): ab = anonBrowser() ab.anonymize() html = ab.open(url) soup = BeautifulSoup(html) image_tags = soup.findAll("img") for image in image_tags: filename = image["src"].lstrip("http://") filename = os.path.join(dire, filename.replace("/", "_")) print "[+] Saving " + str(filename) data = ab.open(image["src"]).read() ab.back() save = open(filename, "wb") save.write(data) save.close()
def printLinks(url, indent=0, ab=None): if ab is None: ab = anonBrowser() ab.anonymize() indentation = "" for i in range(indent): indentation += " " try: page = ab.open(url) html = page.read() except: if not indent: print '[!!!] Exception. Cannot open page' return # try: # print '[+] Printing Links From Regex.' # link_finder = re.compile('href="(.*?)"') # links = link_finder.findall(html) # for link in links: # print link # except: # pass file_finder = re.compile('.*\.\w{1,4}$') try: if not indent: print '\n[+] Pring Links from BeautifulSoup' soup = BeautifulSoup(html) links = soup.findAll(name='a') for link in links: if link.has_key('href'): output = link['href'] if '../' in output: continue print indentation + output if len(file_finder.findall(output)): continue #if output.endswith('.mkv') or output.endswith('.jpg')\ # or output.endswith('mp4') or output.endswith('png')\ # or output.endswith('jpeg') \ # or output.endswith('avi') \ # or output.endswith('aac'): # continue printLinks(url + '/' + output, indent=indent + 1, ab=ab) except Exception, e: #print e pass
def get_tweets(handle): query = urllib.quote_plus("from:" + handle + " since:2009-01-01 include:retweets") tweets = [] browser = anonBrowser() browser.anonymize() response = browser.open("http://search.twitter.com/" + "search.json?q=" + query) json_objects = json.load(response) for result in json_objects["results"]: new_result = {} new_result["from_user"] = result["from_user_name"] new_result["geo"] = result["geo"] new_result["tweet"] = result["text"] tweets.append(new_result) return tweets
def query_twitter(self, query): query = urllib.parse.quote_plus(query) results = [] browser = anonBrowser() response = browser.open(\ 'http://search.twitter.com/search.json?q='+ query) json_objects = response.text print(json_objects) # for result in json_objects['results']: # new_result = {} # new_result['from_user'] = result['from_user_name'] # new_result['geo'] = result['geo'] # new_result['tweet'] = result['text'] #results.append(new_result) return results
def mirror_images(url, dir): ab = anonBrowser() ab.anonymize() html = ab.open(url) soup = BeautifulSoup(html) image_tags = soup.findAll('img') for image in image_tags: filename = image['src'].lstrip('http://') if not filename: continue filename = os.path.join(dir, filename.replace('/', '_')) print("[+] Saving " + str(filename) + ", src = " + image['src']) data = ab.open(image['src']).read() ab.back() save = open(filename, "wb") save.write(data) save.close()
def get_tweets(handle): query = urllib.parse.quote_plus('from:' + handle+\ ' since:2009-01-01 include:retweets') tweets = [] browser = anonBrowser() browser.anonymize() response = browser.open('http://search.twitter.com/'+\ 'search.json?q=' + query) json_objects = response.text print(json_objects) # for result in json_objects['results']: # new_result = {} # new_result['from_user'] = result['from_user_name'] # new_result['geo'] = result['geo'] # new_result['tweet'] = result['text'] #tweets.append(new_result) return tweets
def query_twitter(self, query): try: query = urllib.parse.quote_plus(query) results = [] browser = anonBrowser() print('http://search.twitter.com/search.json?q='+query) response = browser.open(\ 'http://search.twitter.com/search.json?q='+ query, 'rb') json_objects = json.load(response) for result in json_objects['results']: new_result = {} new_result['from_user'] = result['from_user_name'] new_result['geo'] = result['geo'] new_result['tweet'] = result['text'] results.append(new_result) except Exception as e: print(e) pass return results
def mirrorImages(url, dir): ab = anonBrowser() ab.anonymize() html = ab.open(url) soup = BeautifulSoup(html) # Finds all HTML objects with the img tag image_tags = soup.findAll('img') # Downloads the picture and saves it to local hard drive # as a binary file for image in image_tags: filename = image['src'].lstrip('http://') filename = os.path.join(dir,\ filename.replace('/', '_')) print '[+] Saving ' + str(filename) data = ab.open(image['src']).read() ab.back() save = open(filename, 'wb') save.write(data) save.close()
def google(search_term): ab = anonBrowser() # URL编码 search_term = urllib.quote_plus(search_term) response = ab.open( 'https://www.googleapis.com/customsearch/v1?key=AIzaSyCn_IE6NM_ATjZ0j5vfXIFlyW-EpGs5gsU&cx=006431901905483214390:i3yxhoqkzo0&num=1&alt=json&q=' + search_term) objects = json.load(response) results = [] for result in objects['items']: url = result['link'] title = result['title'] text = result['snippet'] print url print title print text new_gr = Google_Result(title, text, url) results.append(new_gr) return results
def get_tweets(handle): query = urllib.parse.quote_plus('from:' + handle+\ '&result_type=mixed&count=2') #' since:2019-01-01 include:retweets') tweets = [] browser = anonBrowser() browser.anonymize() print('https://api.twitter.com/1.1/search/'+\ 'tweets.json?q=' + query) response = browser.open('http://search.twitter.com/1.1/'+\ 'search.json?q=' + query) json_objects = json.load(response) for result in json_objects['results']: new_result = {} new_result['from_user'] = result['from_user_name'] new_result['geo'] = result['geo'] new_result['tweet'] = result['text'] tweets.append(new_result) return tweets
def fetchHTML(url): print '[-] HTML fetching started' ab = anonBrowser() ab.anonymize() page = ab.open(url) html = page.read() print '[-] HTML fetching done' allLinks = fetchLinks(html) print '[-] HREF parsed' allScripts = fetchScripts(html) print '[-] SCRIPT ref parsed' allMetas = fetchMetadata(html) print '[-] METADATA parsed' answer = {} answer['message'] = 'OK' answer['links'] = allLinks answer['scripts'] = allScripts answer['meta'] = allMetas print '[+] Returning the results' return answer
def printLinks(url): ab = anonBrowser() ab.anonymize() page = ab.open(url) html = page.read() # 使用re模块解析href链接 try: print '[+] Printing Links From Regex.' link_finder = re.compile('href="(.*?)"') links = link_finder.findall(html) for link in links: print link except: pass # 使用bs4模块解析href链接 try: print '\n[+] Printing Links From BeautifulSoup.' soup = BeautifulSoup(html) links = soup.findAll(name='a') for link in links: if link.has_key('href'): print link['href'] except: pass
#!/usr/bin/python # -*- coding: utf-8 -*- from anonBrowser import * user_agents=('User-agent','superSecretBroswer') ab = anonBrowser(user_agents) for attempt in range(1, 5): ab.anonymize() print('[*] Fetching page') response = ab.open('http://kittenwar.com') for cookie in ab.cookie_jar: print(cookie)
from anonBrowser import * if __name__ == "__main__": user_agents = [('User-Agent', 'superSecretBroswer')] ab = anonBrowser(proxies=[], user_agents=user_agents) for attempt in range(1, 5): ab.anonymize() print('[*] Fetching page') response = ab.open("http://kittenwar.com") for cookie in ab.cookie_jar: print(cookie)
from anonBrowser import * ab = anonBrowser(proxies=[],\ user_agents=[('User-agent','superSecretBroswer')]) for attempt in range(1, 5): ab.anonymize() print '[*] Fetching page' response = ab.open('http://kittenwar.com') for cookie in ab.cookie_jar: print cookie
def printLinks(url): ab = anonBrowser() ab.anonymize() page = ab.open(url) html = page.read()
def printLinks(url): ab = anonBrowser() ab.anonymize() page = ab.open(url, 'rb') html = page.read() print(str(html, encoding='UTF-8'))
#!/usr/bin/python # -*- coding: utf-8 -*- from anonBrowser import * ab = anonBrowser(proxies=[], user_agents=[('User-agent', 'superSecretBroswer')]) for attempt in range(1, 10): ab.anonymize() print '[*] Fetching page' response = ab.open('http://kittenwar.com') for cookie in ab.cookie_jar: print cookie
from anonBrowser import * ab = anonBrowser(proxies=[], user_agents=['superSecretBroser']) for attempt in range(1, 5): ab.anonymize() response = ab.open('http://www.baidu.com') for cookie in ab.cookie_jar: print(cookie)