def search(self): dl = Download.Download(self.verbose) while self.Counter <= self.Limit and self.Counter <= 1000: time.sleep(1) if self.verbose: p = ' [*] RedditPost Search on result: ' + str(self.Counter) self.logger.debug( "RedditPost Search on result: " + str(self.Counter)) print helpers.color(p, firewall=True) try: url = "https://www.reddit.com/search?q=%40" + str(self.Domain) + \ "&restrict_sr=&sort=relevance&t=all&count=" + str(self.Counter) + \ '&after=t3_3mkrqg' except Exception as e: error = " [!] Major issue with RedditPost search:" + str(e) self.logger.error( "Major issue with RedditPostSearch: " + str(e)) print helpers.color(error, warning=True) try: RawHtml = dl.requesturl(url, useragent=self.UserAgent) except Exception as e: error = " [!] Fail during Request to Reddit (Check Connection):" + \ str(e) self.logger.error( "Fail during Request to Reddit (Check Connection): " + str(e)) print helpers.color(error, warning=True) self.Html += RawHtml # reddit seems to increment by 25 in cases self.Counter += 25
def search(self): dl = Download.Download(self.verbose) while self.Counter <= self.Limit and self.Counter <= 1000: time.sleep(1) if self.verbose: p = ' [*] Google Search on page: ' + str(self.Counter) print helpers.color(p, firewall=True) try: url = "http://www.google.com/search?num=" + str(self.Quanity) + "&start=" + \ str(self.Counter) + "&hl=en&meta=&q=%40\"" + \ self.Domain + "\"" except Exception as e: error = " [!] Major issue with Google Search:" + str(e) print helpers.color(error, warning=True) try: results = dl.requesturl(url, useragent=self.UserAgent) except Exception as e: error = " [!] Fail during Request to Google (Check Connection):" + \ str(e) print helpers.color(error, warning=True) try: # Url = r.url dl.GoogleCaptchaDetection(results) except Exception as e: print e self.Html += results self.Counter += 100 helpers.modsleep(self.Sleep, jitter=self.Jitter)
def search(self): dl = Download.Download(verbose=self.verbose) while self.Counter <= self.Depth and self.Counter <= 100: helpers.modsleep(5) if self.verbose: p = ' [*] GitHubUser Search on page: ' + str(self.Counter) print helpers.color(p, firewall=True) try: url = 'https://github.com/search?p=' + str(self.Counter) + '&q=' + \ str(self.domain) + 'ref=searchresults&type=Users&utf8=' except Exception as e: error = " [!] Major issue with GitHubUser Search:" + str(e) print helpers.color(error, warning=True) try: r = dl.requesturl(url, useragent=self.UserAgent, raw=True, timeout=10) except Exception as e: error = " [!] Fail during Request to GitHubUser (Check Connection):" + \ str(e) print helpers.color(error, warning=True) results = r.content self.Html += results self.Counter += 1
def process(self): dl = Download.Download(self.verbose) try: # This returns a JSON object url = "https://emailhunter.co/trial/v1/search?offset=0&domain=" + \ self.domain + "&format=json" r = dl.requesturl(url, useragent=self.UserAgent, raw=True) except Exception as e: error = "[!] Major issue with EmailHunter Search:" + str(e) print helpers.color(error, warning=True) try: results = r.json() # pprint(results) # Check to make sure we got data back from the API if results['status'] == "success": # The API starts at 0 for the first value x = 0 EmailCount = int(results['results']) # We will itirate of the Json object for the index objects while x < EmailCount: self.results.append(results['emails'][int(x)]['value']) x += 1 if results['status'] == "error": # The API starts at 0 for the first value error = ' [!] EmailHunter Trial API failed: ' + \ str(results['message']) self.logger.error('EmailHunter Trial API failed: ' + str(results['message'])) print helpers.color(error, firewall=True) except Exception as e: pass if self.verbose: p = ' [*] EmailHunter completed JSON request' print helpers.color(p, firewall=True)
def process(self): dl = Download.Download(self.verbose) while self.Counter <= self.PageLimit: if self.verbose: p = ' [*] AskSearch on page: ' + str(self.Counter) print helpers.color(p, firewall=True) self.logger.info('AskSearch on page: ' + str(self.Counter)) try: url = 'http://www.ask.com/web?q=@' + str(self.Domain) + \ '&pu=10&page=' + str(self.Counter) except Exception as e: error = " [!] Major issue with Ask Search:" + str(e) self.logger.error('Major issue with Ask Search: ' + str(e)) print helpers.color(error, warning=True) try: rawhtml = dl.requesturl(url, useragent=self.UserAgent) except Exception as e: error = " [!] Fail during Request to Ask (Check Connection):" + \ str(e) self.logger.error( 'Fail during Request to Ask (Check Connection): ' + str(e)) print helpers.color(error, warning=True) self.Html += rawhtml self.Counter += 1 helpers.modsleep(self.Sleep, jitter=self.Jitter)
def test_downloads(): # perfrom Download testing ua = helpers.getua() dl = Download.Download(True) html = dl.requesturl( 'http://google.com', ua, timeout=2, retrytime=3, statuscode=False) dl.GoogleCaptchaDetection(html) f, download = dl.download_file( 'http://www.sample-videos.com/doc/Sample-doc-file-100kb.doc', '.pdf') dl.delete_file(f)
def process(self): dl = Download.Download(verbose=self.verbose) # Get all the USER code Repos # https://github.com/search?p=2&q=enron.com+&ref=searchresults&type=Code&utf8=✓ UrlList = [] while self.Counter <= self.Depth: if self.verbose: p = " [*] GitHub Gist Search Search on page: " + str( self.Counter) print helpers.color(p, firewall=True) try: # search?p=2&q=%40enron.com&ref=searchresults&utf8=✓ url = ("https://gist.github.com/search?p=" + str(self.Counter) + "&q=%40" + str(self.domain) + "+&ref=searchresults&utf8=✓") r = dl.requesturl(url, useragent=self.UserAgent, raw=True, timeout=10) if r.status_code != 200: break except Exception as e: error = " [!] Major issue with GitHubGist Search:" + str(e) print helpers.color(error, warning=True) RawHtml = r.content # Parse the results for our URLS) soup = BeautifulSoup(RawHtml) for a in soup.findAll("a", href=True): a = a["href"] if a.startswith("/"): UrlList.append(a) self.Counter += 1 # Now take all gathered URL's and gather the HTML content needed for url in UrlList: try: url = "https://gist.github.com" + url html = dl.requesturl(url, useragent=self.UserAgent, timeout=10) self.Html += html except Exception as e: error = " [!] Connection Timed out on GithubGist Search:" + str( e) print helpers.color(error, warning=True)
def search(self): # setup for helpers in the download class convert = Converter.Converter(verbose=self.verbose) dl = Download.Download(self.verbose) while self.Counter <= self.Limit and self.Counter <= 100: time.sleep(1) if self.verbose: p = ' [*] Google PDF Search on page: ' + str(self.Counter) print helpers.color(p, firewall=True) try: urly = "https://www.google.com/search?q=site:" + \ self.Domain + "+filetype:pdf&start=" + str(self.Counter) except Exception as e: error = " [!] Major issue with Google Search:" + str(e) print helpers.color(error, warning=True) try: r = requests.get(urly) except Exception as e: error = " [!] Fail during Request to Google (Check Connection):" + \ str(e) print helpers.color(error, warning=True) RawHtml = r.content # get redirect URL # Url = r.url dl.GoogleCaptchaDetection(RawHtml) soup = BeautifulSoup(RawHtml) for a in soup.findAll('a'): try: # https://stackoverflow.com/questions/21934004/not-getting-proper-links- # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412? # newreg=01f0ed80771f4dfaa269b15268b3f9a9 l = urlparse.parse_qs( urlparse.urlparse(a['href']).query)['q'][0] if l.startswith('http') or l.startswith('www'): if "webcache.googleusercontent.com" not in l: self.urlList.append(l) except: pass self.Counter += 10 # now download the required files try: for url in self.urlList: if self.verbose: p = ' [*] Google PDF search downloading: ' + str(url) print helpers.color(p, firewall=True) try: filetype = ".pdf" # use new helper class to download file FileName, FileDownload = dl.download_file(url, filetype) # check if the file was downloaded if FileDownload: if self.verbose: p = ' [*] Google PDF file was downloaded: ' + \ str(url) print helpers.color(p, firewall=True) self.Text += convert.convert_pdf_to_txt(FileName) except Exception as e: print e try: # now remove any files left behind dl.delete_file(FileName) except Exception as e: print e except: print helpers.color(" [*] No PDF's to download from Google!\n", firewall=True)
def search(self): dl = Download.Download(self.verbose) while self.Counter <= self.Limit and self.Counter <= 100: time.sleep(1) if self.verbose: p = ' [*] Google Search for PasteBin on page: ' + \ str(self.Counter) self.logger.info("GooglePasteBinSearch on page: " + str(self.Counter)) print helpers.color(p, firewall=True) try: url = "http://www.google.com/search?num=" + str(self.Quanity) + "&start=" + str(self.Counter) + \ '&hl=en&meta=&q=site:pastebin.com+"%40' + \ self.Domain + '"' except Exception as e: error = " [!] Major issue with Google Search for PasteBin:" + \ str(e) self.logger.error( "GooglePasteBinSearch could not create URL: " + str(e)) print helpers.color(error, warning=True) try: r = requests.get(url, headers=self.UserAgent) except Exception as e: error = " [!] Fail during Request to PasteBin (Check Connection):" + str( e) self.logger.error( "Fail during Request to PasteBin (Check Connection): " + str(e)) print helpers.color(error, warning=True) try: RawHtml = r.content try: # check for captcha in the source dl.GoogleCaptchaDetection(RawHtml) except Exception as e: self.logger.error("Issue checking for captcha: " + str(e)) soup = BeautifulSoup(RawHtml, "lxml") for a in soup.select('.r a'): # remove urls like pastebin.com/u/Anonymous if "/u/" not in str(a['href']): self.urlList.append(a['href']) except Exception as e: error = " [!] Fail during parsing result: " + str(e) self.logger.error( "PasteBinSearch Fail during parsing result: " + str(e)) print helpers.color(error, warning=True) self.Counter += 100 # Now take all gathered URL's and gather the Raw content needed for Url in self.urlList: try: Url = "http://pastebin.com/raw/" + str(Url).split('/')[3] data = requests.get(Url, timeout=2) self.Text += data.content except Exception as e: error = "[!] Connection Timed out on PasteBin Search:" + str(e) self.logger.error( "Connection Timed out on PasteBin raw download: " + str(e)) print helpers.color(error, warning=True) if self.verbose: p = ' [*] Searching PasteBin Complete' self.logger.info("Searching PasteBin Complete") print helpers.color(p, firewall=True)
def search(self): convert = Converter.Converter(verbose=self.verbose) while self.Counter <= self.Limit and self.Counter <= 100: time.sleep(1) if self.verbose: p = " [*] Google XLSX Search on page: " + str(self.Counter) self.logger.info("Google XLSX Search on page: " + str(self.Counter)) print helpers.color(p, firewall=True) try: urly = ("https://www.google.com/search?q=site:" + self.Domain + "+filetype:xlsx&start=" + str(self.Counter)) except Exception as e: error = " [!] Major issue with Google XLSX Search:" + str(e) self.logger.error("GoogleXlsxSearch failed to build url: " + str(e)) print helpers.color(error, warning=True) try: r = requests.get(urly) except Exception as e: error = " [!] Fail during Request to Google (Check Connection):" + str( e) self.logger.error( "GoogleXlsxSearch failed to request url (Check Connection): " + str(e)) print helpers.color(error, warning=True) RawHtml = r.content soup = BeautifulSoup(RawHtml) # I use this to parse my results, for URLS to follow for a in soup.findAll("a"): try: # https://stackoverflow.com/questions/21934004/not-getting-proper-links- # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412? # newreg=01f0ed80771f4dfaa269b15268b3f9a9 l = urlparse.parse_qs(urlparse.urlparse( a["href"]).query)["q"][0] if l.startswith("http") or l.startswith("www"): if "webcache.googleusercontent.com" not in l: self.urlList.append(l) except: pass self.Counter += 10 helpers.modsleep(self.Sleep, jitter=self.Jitter) # now download the required files self.logger.debug( "GoogleXlsxSearch completed HTML result query, starting downloads") try: for url in self.urlList: if self.verbose: p = " [*] Google XLSX search downloading: " + str(url) self.logger.info("Google XLSX search downloading: " + str(url)) print helpers.color(p, firewall=True) try: filetype = ".xlsx" dl = Download.Download(self.verbose) FileName, FileDownload = dl.download_file(url, filetype) if FileDownload: if self.verbose: p = " [*] Google XLSX file was downloaded: " + str( url) self.logger.info( "Google XLSX file was downloaded: " + str(url)) print helpers.color(p, firewall=True) self.Text += convert.convert_Xlsx_to_Csv(FileName) # print self.Text except Exception as e: print helpers.color(" [!] Issue with opening Xlsx Files\n", firewall=True) self.logger.error("Google XLSX had issue opening file") try: dl.delete_file(FileName) except Exception as e: self.logger.error("Google XLSX failed to delete file: " + str(e)) except Exception as e: print helpers.color(" [*] No XLSX's to download from google!\n", firewall=True) self.logger.error("No XLSX's to download from google! " + str(e))
def process(self): dl = Download.Download(self.verbose) try: # We will check to see that we have enough requests left to make a search url = "https://api.hunter.io/v2/account?api_key=" + self.apikeyv r = dl.requesturl(url, useragent=self.UserAgent, raw=True) accountInfo = r.json() quota = int(accountInfo['data']['calls']['available']) quotaUsed = int(accountInfo['data']['calls']['used']) if quotaUsed >= self.QuotaLimit: overQuotaLimit = True else: overQuotaLimit = False except Exception as e: error = " [!] Hunter API error: " + str( accountInfo['errors'][0]['details']) print helpers.color(error, warning=True) try: # Hunter's API only allows 100 emails per request, so we check the number of emails Hunter has # on our specified domain, and if it's over 100 we need to make multiple requests to get all of the emails url = "https://api.hunter.io/v2/email-count?domain=" + self.domain r = dl.requesturl(url, useragent=self.UserAgent, raw=True) response = r.json() totalEmails = int(response['data'][self.etype]) emailsLeft = totalEmails offset = 0 except Exception as e: error = "[!] Major issue with Hunter Search: " + str(e) print helpers.color(error, warning=True) requestsMade = 0 # Main loop to keep requesting the Hunter API until we get all of the emails they have while emailsLeft > 0: try: if overQuotaLimit or requestsMade + quotaUsed >= self.QuotaLimit: if self.verbose: print helpers.color(" [*] You are over your set Quota Limit: " + \ str(quotaUsed) + "/" + str(self.QuotaLimit) + " stopping search", firewall=True) break elif self.RequestLimit != 0 and requestsMade >= self.RequestLimit: if self.verbose: print helpers.color( " [*] Stopping search due to user set Request Limit", firewall=True) break # This returns a JSON object url = "https://api.hunter.io/v2/domain-search?domain=" + \ self.domain + self.type + "&limit=100&offset=" + str(offset) + "&api_key=" + self.apikeyv r = dl.requesturl(url, useragent=self.UserAgent, raw=True) results = r.json() emailCount = int(results['meta']['results']) except Exception as e: error = " [!] Hunter API error: " + str( results['errors'][0]['details']) + " QUITTING!" print helpers.color(error, warning=True) break try: # Make sure we don't exceed the index for the 'emails' array in the 'results' Json object if emailsLeft < 100: emailCount = emailsLeft if emailCount > 100: emailCount = 100 # 1 request is every 10 emails delivered requestsMade += emailCount // 10 if emailCount % 10 != 0: requestsMade += 1 # The API starts at 0 for the first value x = 0 # We will itirate of the Json object for the index objects while x < emailCount: self.results.append( results['data']['emails'][int(x)]['value']) x += 1 emailsLeft -= emailCount if emailsLeft > 100: offset += 100 else: offset += emailsLeft except Exception as e: error = " [!] Major issue with search parsing: " + str(e) print helpers.color(error, warning=True) break if self.verbose: # Print the avalible requests user has if verbose print helpers.color(' [*] Hunter has completed JSON request', firewall=True) requestsUsed = requestsMade + quotaUsed if quota - requestsUsed <= 0: print helpers.color(" [*] You have no Hunter requests left." \ + "They will refill in about a month", firewall=True) else: print helpers.color(" [*] You have " + str(requestsUsed) \ + "/" + str(quota) + " Hunter requests left", firewall=True)