def search(self): dl = Download.Download(self.verbose) while self.Counter <= self.Limit and self.Counter <= 1000: time.sleep(1) if self.verbose: p = ' [*] Google Search on page: ' + str(self.Counter) print helpers.color(p, firewall=True) try: url = "http://www.google.com/search?num=" + str(self.Quanity) + "&start=" + \ str(self.Counter) + "&hl=en&meta=&q=%40\"" + \ self.Domain + "\"" except Exception as e: error = " [!] Major issue with Google Search:" + str(e) print helpers.color(error, warning=True) try: results = dl.requesturl(url, useragent=self.UserAgent) except Exception as e: error = " [!] Fail during Request to Google (Check Connection):" + \ str(e) print helpers.color(error, warning=True) try: # Url = r.url dl.GoogleCaptchaDetection(results) except Exception as e: print e self.Html += results self.Counter += 100 helpers.modsleep(self.Sleep, jitter=self.Jitter)
def search(self): dl = Download.Download(self.verbose) convert = Converter.Converter(verbose=self.verbose) while self.Counter <= self.Limit and self.Counter <= 100: time.sleep(1) if self.verbose: p = ' [*] Google DOCX Search on page: ' + str(self.Counter) print helpers.color(p, firewall=True) try: urly = "https://www.google.com/search?q=site:" + \ self.Domain + "+filetype:docx&start=" + str(self.Counter) except Exception as e: error = "[!] Major issue with Google Search:" + str(e) print helpers.color(error, warning=True) try: r = requests.get(urly) except Exception as e: error = " [!] Fail during Request to Google (Check Connection):" + \ str(e) print helpers.color(error, warning=True) RawHtml = r.content soup = BeautifulSoup(RawHtml) # I use this to parse my results, for URLS to follow for a in soup.findAll('a'): try: # https://stackoverflow.com/questions/21934004/not-getting-proper-links- # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412? # newreg=01f0ed80771f4dfaa269b15268b3f9a9 l = urlparse.parse_qs( urlparse.urlparse(a['href']).query)['q'][0] if l.startswith('http') or l.startswith('www'): if "webcache.googleusercontent.com" not in l: self.urlList.append(l) except: pass self.Counter += 10 # now download the required files try: for url in self.urlList: if self.verbose: p = ' [*] Google DOCX search downloading: ' + str(url) print helpers.color(p, firewall=True) try: filetype = ".docx" FileName, FileDownload = dl.download_file(url, filetype) if FileDownload: if self.verbose: p = ' [*] Google DOCX file was downloaded: ' + \ str(url) print helpers.color(p, firewall=True) self.Text += convert.convert_docx_to_txt(FileName) # print self.Text except Exception as e: print helpers.color(" [!] Issue with Converting Docx Files\n", firewall=True) try: dl.delete_file(FileName) except Exception as e: print e except: print helpers.color(" [*] No DOCX's to download from Google!\n", firewall=True)
def process(self): dl = Download.Download(self.verbose) try: # This returns a JSON object url = "https://emailhunter.co/trial/v1/search?offset=0&domain=" + \ self.domain + "&format=json" r = dl.requesturl(url, useragent=self.UserAgent, raw=True) except Exception as e: error = "[!] Major issue with EmailHunter Search:" + str(e) print helpers.color(error, warning=True) try: results = r.json() # pprint(results) # Check to make sure we got data back from the API if results['status'] == "success": # The API starts at 0 for the first value x = 0 EmailCount = int(results['results']) # We will itirate of the Json object for the index objects while x < EmailCount: self.results.append(results['emails'][int(x)]['value']) x += 1 if results['status'] == "error": # The API starts at 0 for the first value error = ' [!] EmailHunter Trial API failed: ' + \ str(results['message']) self.logger.error('EmailHunter Trial API failed: ' + str(results['message'])) print helpers.color(error, firewall=True) except Exception as e: pass if self.verbose: p = ' [*] EmailHunter completed JSON request' print helpers.color(p, firewall=True)
def search(self): dl = Download.Download(verbose=self.verbose) while self.Counter <= self.Depth and self.Counter <= 100: helpers.modsleep(5) if self.verbose: p = ' [*] GitHubUser Search on page: ' + str(self.Counter) print helpers.color(p, firewall=True) try: url = 'https://github.com/search?p=' + str(self.Counter) + '&q=' + \ str(self.domain) + 'ref=searchresults&type=Users&utf8=' except Exception as e: error = " [!] Major issue with GitHubUser Search:" + str(e) print helpers.color(error, warning=True) try: r = dl.requesturl(url, useragent=self.UserAgent, raw=True, timeout=10) except Exception as e: error = " [!] Fail during Request to GitHubUser (Check Connection):" + \ str(e) print helpers.color(error, warning=True) results = r.content self.Html += results self.Counter += 1
def process(self): dl = Download.Download(self.verbose) while self.Counter <= self.PageLimit: if self.verbose: p = ' [*] AskSearch on page: ' + str(self.Counter) print helpers.color(p, firewall=True) self.logger.info('AskSearch on page: ' + str(self.Counter)) try: url = 'http://www.ask.com/web?q=@' + str(self.Domain) + \ '&pu=10&page=' + str(self.Counter) except Exception as e: error = " [!] Major issue with Ask Search:" + str(e) self.logger.error('Major issue with Ask Search: ' + str(e)) print helpers.color(error, warning=True) try: rawhtml = dl.requesturl(url, useragent=self.UserAgent) except Exception as e: error = " [!] Fail during Request to Ask (Check Connection):" + \ str(e) self.logger.error( 'Fail during Request to Ask (Check Connection): ' + str(e)) print helpers.color(error, warning=True) self.Html += rawhtml self.Counter += 1 helpers.modsleep(self.Sleep, jitter=self.Jitter)
def search(self): dl = Download.Download(self.verbose) while self.Counter <= self.Limit and self.Counter <= 1000: time.sleep(1) if self.verbose: p = ' [*] RedditPost Search on result: ' + str(self.Counter) self.logger.debug( "RedditPost Search on result: " + str(self.Counter)) print helpers.color(p, firewall=True) try: url = "https://www.reddit.com/search?q=%40" + str(self.Domain) + \ "&restrict_sr=&sort=relevance&t=all&count=" + str(self.Counter) + \ '&after=t3_3mkrqg' except Exception as e: error = " [!] Major issue with RedditPost search:" + str(e) self.logger.error( "Major issue with RedditPostSearch: " + str(e)) print helpers.color(error, warning=True) try: RawHtml = dl.requesturl(url, useragent=self.UserAgent) except Exception as e: error = " [!] Fail during Request to Reddit (Check Connection):" + \ str(e) self.logger.error( "Fail during Request to Reddit (Check Connection): " + str(e)) print helpers.color(error, warning=True) self.Html += RawHtml # reddit seems to increment by 25 in cases self.Counter += 25
def test_downloads(): # perfrom Download testing ua = helpers.getua() dl = Download.Download(True) html = dl.requesturl( 'http://google.com', ua, timeout=2, retrytime=3, statuscode=False) dl.GoogleCaptchaDetection(html) f, download = dl.download_file( 'http://www.sample-videos.com/doc/Sample-doc-file-100kb.doc', '.pdf') dl.delete_file(f)
def process(self): # Get all the Pastebin raw items # https://canary.pw/search/?q=earthlink.net&page=3 UrlList = [] dl = Download.Download(verbose=self.verbose) while self.Counter <= self.Depth: if self.verbose: p = ' [*] Canary Search on page: ' + str(self.Counter) self.logger.info("CanaryBinSearch on page: " + str(self.Counter)) print helpers.color(p, firewall=True) try: url = "https://canary.pw/search/?q=" + str(self.domain) + "&page=" + \ str(self.Counter) rawhtml, statuscode = dl.requesturl(url, useragent=self.UserAgent, statuscode=True, verify=False) if statuscode != 200: break except Exception as e: error = " [!] Major issue with Canary Pastebin Search:" + \ str(e) self.logger.error( 'Fail during Request to CanaryBinSearch (Check Connection): ' + str(e)) print helpers.color(error, warning=True) # Parse the results for our URLS) soup = BeautifulSoup(rawhtml) for a in soup.findAll('a', href=True): a = a['href'] if a.startswith('/view'): UrlList.append(a) self.Counter += 1 # Now take all gathered URL's and gather the HTML content needed Status = " [*] Canary found " + \ str(len(UrlList)) + " CanaryBin(s) to Search!" self.logger.info("CanaryBin found " + str(len(UrlList)) + " CanaryBin(s) to Search!") print helpers.color(Status, status=True) for item in UrlList: try: item = "https://canary.pw" + str(item) # They can be massive! rawhtml = dl.requesturl(item, useragent=self.UserAgent, timeout=20) self.Html += rawhtml except Exception as e: error = " [!] Connection Timed out on Canary Pastebin Search:" + \ str(e) self.logger.error( 'Fail during Request to CanaryBinSearch bin (Check Connection): ' + str(e)) print helpers.color(error, warning=True)
def process(self): dl = Download.Download(verbose=self.verbose) try: url = "https://www.flickr.com/search/?text=%40" + self.domain rawhtml = dl.requesturl(url, useragent=self.UserAgent) except Exception as e: error = " [!] Major issue with Flickr Search:" + str(e) print helpers.color(error, warning=True) self.results += rawhtml if self.verbose: p = ' [*] FlickrSearch has completed' print helpers.color(p, firewall=True)
def process(self): dl = Download.Download(verbose=self.verbose) # Get all the USER code Repos # https://github.com/search?p=2&q=enron.com+&ref=searchresults&type=Code&utf8=✓ UrlList = [] while self.Counter <= self.Depth: if self.verbose: p = " [*] GitHub Gist Search Search on page: " + str( self.Counter) print helpers.color(p, firewall=True) try: # search?p=2&q=%40enron.com&ref=searchresults&utf8=✓ url = ("https://gist.github.com/search?p=" + str(self.Counter) + "&q=%40" + str(self.domain) + "+&ref=searchresults&utf8=✓") r = dl.requesturl(url, useragent=self.UserAgent, raw=True, timeout=10) if r.status_code != 200: break except Exception as e: error = " [!] Major issue with GitHubGist Search:" + str(e) print helpers.color(error, warning=True) RawHtml = r.content # Parse the results for our URLS) soup = BeautifulSoup(RawHtml) for a in soup.findAll("a", href=True): a = a["href"] if a.startswith("/"): UrlList.append(a) self.Counter += 1 # Now take all gathered URL's and gather the HTML content needed for url in UrlList: try: url = "https://gist.github.com" + url html = dl.requesturl(url, useragent=self.UserAgent, timeout=10) self.Html += html except Exception as e: error = " [!] Connection Timed out on GithubGist Search:" + str( e) print helpers.color(error, warning=True)
def process(self): dl = Download.Download(self.verbose) try: # We will check to see that we have enough requests left to make a search url = "https://api.hunter.io/v2/account?api_key=" + self.apikeyv r = dl.requesturl(url, useragent=self.UserAgent, raw=True) accountInfo = r.json() quota = int(accountInfo['data']['calls']['available']) quotaUsed = int(accountInfo['data']['calls']['used']) if quotaUsed >= self.QuotaLimit: overQuotaLimit = True else: overQuotaLimit = False except Exception as e: error = " [!] Hunter API error: " + str( accountInfo['errors'][0]['details']) print helpers.color(error, warning=True) try: # Hunter's API only allows 100 emails per request, so we check the number of emails Hunter has # on our specified domain, and if it's over 100 we need to make multiple requests to get all of the emails url = "https://api.hunter.io/v2/email-count?domain=" + self.domain r = dl.requesturl(url, useragent=self.UserAgent, raw=True) response = r.json() totalEmails = int(response['data'][self.etype]) emailsLeft = totalEmails offset = 0 except Exception as e: error = "[!] Major issue with Hunter Search: " + str(e) print helpers.color(error, warning=True) requestsMade = 0 # Main loop to keep requesting the Hunter API until we get all of the emails they have while emailsLeft > 0: try: if overQuotaLimit or requestsMade + quotaUsed >= self.QuotaLimit: if self.verbose: print helpers.color(" [*] You are over your set Quota Limit: " + \ str(quotaUsed) + "/" + str(self.QuotaLimit) + " stopping search", firewall=True) break elif self.RequestLimit != 0 and requestsMade >= self.RequestLimit: if self.verbose: print helpers.color( " [*] Stopping search due to user set Request Limit", firewall=True) break # This returns a JSON object url = "https://api.hunter.io/v2/domain-search?domain=" + \ self.domain + self.type + "&limit=100&offset=" + str(offset) + "&api_key=" + self.apikeyv r = dl.requesturl(url, useragent=self.UserAgent, raw=True) results = r.json() emailCount = int(results['meta']['results']) except Exception as e: error = " [!] Hunter API error: " + str( results['errors'][0]['details']) + " QUITTING!" print helpers.color(error, warning=True) break try: # Make sure we don't exceed the index for the 'emails' array in the 'results' Json object if emailsLeft < 100: emailCount = emailsLeft if emailCount > 100: emailCount = 100 # 1 request is every 10 emails delivered requestsMade += emailCount // 10 if emailCount % 10 != 0: requestsMade += 1 # The API starts at 0 for the first value x = 0 # We will itirate of the Json object for the index objects while x < emailCount: self.results.append( results['data']['emails'][int(x)]['value']) x += 1 emailsLeft -= emailCount if emailsLeft > 100: offset += 100 else: offset += emailsLeft except Exception as e: error = " [!] Major issue with search parsing: " + str(e) print helpers.color(error, warning=True) break if self.verbose: # Print the avalible requests user has if verbose print helpers.color(' [*] Hunter has completed JSON request', firewall=True) requestsUsed = requestsMade + quotaUsed if quota - requestsUsed <= 0: print helpers.color(" [*] You have no Hunter requests left." \ + "They will refill in about a month", firewall=True) else: print helpers.color(" [*] You have " + str(requestsUsed) \ + "/" + str(quota) + " Hunter requests left", firewall=True)
def search(self): dl = Download.Download(self.verbose) while self.Counter <= self.Limit and self.Counter <= 100: time.sleep(1) if self.verbose: p = ' [*] Google CSV Search on page: ' + str(self.Counter) print helpers.color(p, firewall=True) try: url = "https://www.google.com/search?q=site:" + \ self.Domain + "+filetype:csv&start=" + str(self.Counter) except Exception as e: error = " [!] Major issue with Google Search:" + str(e) print helpers.color(error, warning=True) try: RawHtml = dl.requesturl(url, useragent=self.UserAgent) except Exception as e: error = " [!] Fail during Request to Google (Check Connection):" + \ str(e) print helpers.color(error, warning=True) # check for captcha try: # Url = r.url dl.GoogleCaptchaDetection(RawHtml) except Exception as e: print e soup = BeautifulSoup(RawHtml) # I use this to parse my results, for URLS to follow for a in soup.findAll('a'): try: # https://stackoverflow.com/questions/21934004/not-getting-proper-links- # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412? # newreg=01f0ed80771f4dfaa269b15268b3f9a9 l = urlparse.parse_qs( urlparse.urlparse(a['href']).query)['q'][0] if l.startswith('http') or l.startswith('www'): if "webcache.googleusercontent.com" not in l: self.urlList.append(l) except: pass self.Counter += 10 helpers.modsleep(self.Sleep, jitter=self.Jitter) # now download the required files try: for url in self.urlList: if self.verbose: p = ' [*] Google CSV search downloading: ' + str(url) print helpers.color(p, firewall=True) try: filetype = ".csv" FileName, FileDownload = dl.download_file2(url, filetype) if FileDownload: if self.verbose: p = '[*] Google CSV file was downloaded: ' + \ str(url) print helpers.color(p, firewall=True) with open(FileName) as f: self.Text += f.read() # print self.Text except Exception as e: print helpers.color(" [!] Issue with opening CSV Files\n", firewall=True) try: dl.delete_file(FileName) except Exception as e: print e except: print helpers.color(" [*] No CSV to download from Google!\n", firewall=True)
def search(self): convert = Converter.Converter(self.verbose) dl = Download.Download(self.verbose) while self.Counter <= self.Limit and self.Counter <= 100: time.sleep(1) if self.verbose: p = ' [*] Google PPTX Search on page: ' + str(self.Counter) print helpers.color(p, firewall=True) try: url = "https://www.google.com/search?q=" + \ self.Domain + "+filetype:pptx&start=" + str(self.Counter) except Exception as e: error = " [!] Major issue with Google Search:" + str(e) print helpers.color(error, warning=True) try: RawHtml = dl.requesturl(url, useragent=self.UserAgent) except Exception as e: error = " [!] Fail during Request to Google (Check Connection):" + \ str(e) print helpers.color(error, warning=True) # check for captcha try: # Url = r.url dl.GoogleCaptchaDetection(RawHtml) except Exception as e: print e soup = BeautifulSoup(RawHtml) # I use this to parse my results, for URLS to follow for a in soup.findAll('a'): try: # https://stackoverflow.com/questions/21934004/not-getting-proper-links- # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412? # newreg=01f0ed80771f4dfaa269b15268b3f9a9 l = urlparse.parse_qs(urlparse.urlparse( a['href']).query)['q'][0] if l.startswith('http') or l.startswith( 'www') or l.startswith('https'): if "webcache.googleusercontent.com" not in l: self.urlList.append(l) # for some reason PPTX seems to be cached data: l = urlparse.parse_qs(urlparse.urlparse( a['href']).query)['q'][0] l = l.split(':', 2) if "webcache.googleusercontent.com" not in l[2]: self.urlList.append(l[2]) except: pass self.Counter += 10 # now download the required files try: for url in self.urlList: if self.verbose: p = ' [*] Google PPTX search downloading: ' + str(url) print helpers.color(p, firewall=True) try: filetype = ".pptx" FileName, FileDownload = dl.download_file2(url, filetype) if FileDownload: if self.verbose: p = ' [*] Google PPTX file was downloaded: ' + \ str(url) print helpers.color(p, firewall=True) ft = helpers.filetype(FileName).lower() if 'powerpoint' in ft: self.Text += convert.convert_pptx_to_txt(FileName) else: self.logger.warning( 'Downloaded file is not a PPTX: ' + ft) # print self.Text except Exception as e: print helpers.color(" [!] Issue with opening PPTX Files\n", firewall=True) try: dl.delete_file(FileName) except Exception as e: print e except: print helpers.color(" [*] No PPTX to download from Google!\n", firewall=True)
def search(self): dl = Download.Download(self.verbose) convert = Converter.Converter(verbose=self.verbose) while self.Counter <= self.Limit and self.Counter <= 10: helpers.modsleep(1) if self.verbose: p = ' [*] Exalead PPTX Search on page: ' + str(self.Counter) self.logger.info('ExaleadPPTXSearch on page: ' + str(self.Counter)) print helpers.color(p, firewall=True) try: url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \ '"+filetype:pptx&elements_per_page=' + \ str(self.Quanity) + '&start_index=' + str(self.Counter) except Exception as e: self.logger.error('ExaleadPPTXSearch could not build URL') error = " [!] Major issue with Exalead PPTX Search: " + str(e) print helpers.color(error, warning=True) try: RawHtml = dl.requesturl(url, useragent=self.UserAgent) # sometimes url is broken but exalead search results contain # e-mail self.Text += RawHtml soup = BeautifulSoup(RawHtml, "lxml") self.urlList = [ h2.a["href"] for h2 in soup.findAll('h4', class_='media-heading') ] except Exception as e: self.logger.error( 'ExaleadPPTXSearch could not request / parse HTML') error = " [!] Fail during parsing result: " + str(e) print helpers.color(error, warning=True) self.Counter += 30 # now download the required files try: for url in self.urlList: if self.verbose: p = ' [*] Exalead PPTX search downloading: ' + str(url) self.logger.info('ExaleadPPTXSearch downloading: ' + str(url)) print helpers.color(p, firewall=True) try: filetype = ".pptx" dl = Download.Download(self.verbose) FileName, FileDownload = dl.download_file(url, filetype) if FileDownload: if self.verbose: p = ' [*] Exalead PPTX file was downloaded: ' + \ str(url) self.logger.info('ExaleadDOCSearch downloaded: ' + str(p)) print helpers.color(p, firewall=True) ft = helpers.filetype(FileName).lower() if 'powerpoint' in ft: self.Text += convert.convert_zip_to_text(FileName) else: self.logger.warning( 'Downloaded file is not a PPTX: ' + ft) except Exception as e: error = " [!] Issue with opening PPTX Files:%s" % (str(e)) print helpers.color(error, warning=True) try: dl.delete_file(FileName) except Exception as e: print e except Exception as e: self.logger.error("ExaleadPPTXSearch no doc's to download") print helpers.color(" [*] No PPTX's to download from Exalead!\n", firewall=True) if self.verbose: p = ' [*] Searching PPTX from Exalead Complete' print helpers.color(p, status=True)
def search(self): convert = Converter.Converter(verbose=self.verbose) while self.Counter <= self.Limit and self.Counter <= 10: time.sleep(1) if self.verbose: p = ' [*] Exalead Search on page: ' + str(self.Counter) print helpers.color(p, firewall=True) try: url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \ '"+filetype:pdf&elements_per_page=' + \ str(self.Quanity) + '&start_index=' + str(self.Counter) except Exception as e: error = " [!] Major issue with Exalead PDF Search: " + str(e) print helpers.color(error, warning=True) try: r = requests.get(url, headers=self.UserAgent) except Exception as e: error = " [!] Fail during Request to Exalead (Check Connection):" + str( e) print helpers.color(error, warning=True) try: RawHtml = r.content # sometimes url is broken but exalead search results contain # e-mail self.Text += RawHtml soup = BeautifulSoup(RawHtml, "lxml") self.urlList = [h2.a["href"] for h2 in soup.findAll('h4', class_='media-heading')] except Exception as e: error = " [!] Fail during parsing result: " + str(e) print helpers.color(error, warning=True) self.Counter += 30 # now download the required files try: for url in self.urlList: if self.verbose: p = ' [*] Exalead PDF search downloading: ' + str(url) print helpers.color(p, firewall=True) try: filetype = ".pdf" dl = Download.Download(self.verbose) FileName, FileDownload = dl.download_file(url, filetype) if FileDownload: if self.verbose: p = ' [*] Exalead PDF file was downloaded: ' + \ str(url) print helpers.color(p, firewall=True) self.Text += convert.convert_pdf_to_txt(FileName) except Exception as e: pass try: dl.delete_file(FileName) except Exception as e: print e except: print helpers.color(" [*] No PDF's to download from Exalead!\n", firewall=True) if self.verbose: p = ' [*] Searching PDF from Exalead Complete' print helpers.color(p, status=True)
def search(self): convert = Converter.Converter(verbose=self.verbose) while self.Counter <= self.Limit and self.Counter <= 100: time.sleep(1) if self.verbose: p = " [*] Google XLSX Search on page: " + str(self.Counter) self.logger.info("Google XLSX Search on page: " + str(self.Counter)) print helpers.color(p, firewall=True) try: urly = ("https://www.google.com/search?q=site:" + self.Domain + "+filetype:xlsx&start=" + str(self.Counter)) except Exception as e: error = " [!] Major issue with Google XLSX Search:" + str(e) self.logger.error("GoogleXlsxSearch failed to build url: " + str(e)) print helpers.color(error, warning=True) try: r = requests.get(urly) except Exception as e: error = " [!] Fail during Request to Google (Check Connection):" + str( e) self.logger.error( "GoogleXlsxSearch failed to request url (Check Connection): " + str(e)) print helpers.color(error, warning=True) RawHtml = r.content soup = BeautifulSoup(RawHtml) # I use this to parse my results, for URLS to follow for a in soup.findAll("a"): try: # https://stackoverflow.com/questions/21934004/not-getting-proper-links- # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412? # newreg=01f0ed80771f4dfaa269b15268b3f9a9 l = urlparse.parse_qs(urlparse.urlparse( a["href"]).query)["q"][0] if l.startswith("http") or l.startswith("www"): if "webcache.googleusercontent.com" not in l: self.urlList.append(l) except: pass self.Counter += 10 helpers.modsleep(self.Sleep, jitter=self.Jitter) # now download the required files self.logger.debug( "GoogleXlsxSearch completed HTML result query, starting downloads") try: for url in self.urlList: if self.verbose: p = " [*] Google XLSX search downloading: " + str(url) self.logger.info("Google XLSX search downloading: " + str(url)) print helpers.color(p, firewall=True) try: filetype = ".xlsx" dl = Download.Download(self.verbose) FileName, FileDownload = dl.download_file(url, filetype) if FileDownload: if self.verbose: p = " [*] Google XLSX file was downloaded: " + str( url) self.logger.info( "Google XLSX file was downloaded: " + str(url)) print helpers.color(p, firewall=True) self.Text += convert.convert_Xlsx_to_Csv(FileName) # print self.Text except Exception as e: print helpers.color(" [!] Issue with opening Xlsx Files\n", firewall=True) self.logger.error("Google XLSX had issue opening file") try: dl.delete_file(FileName) except Exception as e: self.logger.error("Google XLSX failed to delete file: " + str(e)) except Exception as e: print helpers.color(" [*] No XLSX's to download from google!\n", firewall=True) self.logger.error("No XLSX's to download from google! " + str(e))
def search(self): convert = Converter.Converter(verbose=self.verbose) while self.Counter <= self.Limit: time.sleep(1) if self.verbose: p = ' [*] Exalead Search on page: ' + str(self.Counter) self.logger.info("ExaleadDOCXSearch on page: " + str(self.Counter)) print helpers.color(p, firewall=True) try: url = 'http://www.exalead.com/search/web/results/?q="%40' + self.Domain + \ '"+filetype:docx&elements_per_page=' + \ str(self.Quanity) + '&start_index=' + str(self.Counter) except Exception as e: self.logger.error("Issue building URL to search") error = " [!] Major issue with Exalead DOCX Search: " + str(e) print helpers.color(error, warning=True) try: r = requests.get(url, headers=self.UserAgent) except Exception as e: error = " [!] Fail during Request to Exalead (Check Connection):" + str( e) print helpers.color(error, warning=True) try: RawHtml = r.content # sometimes url is broken but exalead search results contain # e-mail self.Text += RawHtml soup = BeautifulSoup(RawHtml, "lxml") self.urlList = [ h2.a["href"] for h2 in soup.findAll('h4', class_='media-heading') ] except Exception as e: self.logger.error("Fail during parsing result: " + str(e)) error = " [!] Fail during parsing result: " + str(e) print helpers.color(error, warning=True) self.Counter += 30 # now download the required files try: for url in self.urlList: if self.verbose: p = ' [*] Exalead DOCX search downloading: ' + str(url) self.logger.info("Starting download of DOCX: " + str(url)) print helpers.color(p, firewall=True) try: filetype = ".docx" dl = Download.Download(self.verbose) FileName, FileDownload = dl.download_file(url, filetype) if FileDownload: if self.verbose: self.logger.info("File was downloaded: " + str(url)) p = ' [*] Exalead DOCX file was downloaded: ' + \ str(url) print helpers.color(p, firewall=True) self.Text += convert.convert_docx_to_txt(FileName) except Exception as e: self.logger.error("Issue with opening DOCX Files: " + str(e)) error = " [!] Issue with opening DOCX Files:%s\n" % ( str(e)) print helpers.color(error, warning=True) try: dl.delete_file(FileName) except Exception as e: print e except Exception as e: p = " [*] No DOCX's to download from Exalead: " + e self.logger.info("No DOCX's to download from Exalead: " + str(e)) print helpers.color(p, firewall=True) if self.verbose: p = ' [*] Searching DOCX from Exalead Complete' self.logger.info("Searching DOCX from Exalead Complete") print helpers.color(p, status=True)
def search(self): # setup for helpers in the download class convert = Converter.Converter(verbose=self.verbose) dl = Download.Download(self.verbose) while self.Counter <= self.Limit and self.Counter <= 100: time.sleep(1) if self.verbose: p = ' [*] Google PDF Search on page: ' + str(self.Counter) print helpers.color(p, firewall=True) try: urly = "https://www.google.com/search?q=site:" + \ self.Domain + "+filetype:pdf&start=" + str(self.Counter) except Exception as e: error = " [!] Major issue with Google Search:" + str(e) print helpers.color(error, warning=True) try: r = requests.get(urly) except Exception as e: error = " [!] Fail during Request to Google (Check Connection):" + \ str(e) print helpers.color(error, warning=True) RawHtml = r.content # get redirect URL # Url = r.url dl.GoogleCaptchaDetection(RawHtml) soup = BeautifulSoup(RawHtml) for a in soup.findAll('a'): try: # https://stackoverflow.com/questions/21934004/not-getting-proper-links- # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412? # newreg=01f0ed80771f4dfaa269b15268b3f9a9 l = urlparse.parse_qs( urlparse.urlparse(a['href']).query)['q'][0] if l.startswith('http') or l.startswith('www'): if "webcache.googleusercontent.com" not in l: self.urlList.append(l) except: pass self.Counter += 10 # now download the required files try: for url in self.urlList: if self.verbose: p = ' [*] Google PDF search downloading: ' + str(url) print helpers.color(p, firewall=True) try: filetype = ".pdf" # use new helper class to download file FileName, FileDownload = dl.download_file(url, filetype) # check if the file was downloaded if FileDownload: if self.verbose: p = ' [*] Google PDF file was downloaded: ' + \ str(url) print helpers.color(p, firewall=True) self.Text += convert.convert_pdf_to_txt(FileName) except Exception as e: print e try: # now remove any files left behind dl.delete_file(FileName) except Exception as e: print e except: print helpers.color(" [*] No PDF's to download from Google!\n", firewall=True)
def search(self): dl = Download.Download(self.verbose) while self.Counter <= self.Limit and self.Counter <= 100: time.sleep(1) if self.verbose: p = ' [*] Google Search for PasteBin on page: ' + \ str(self.Counter) self.logger.info("GooglePasteBinSearch on page: " + str(self.Counter)) print helpers.color(p, firewall=True) try: url = "http://www.google.com/search?num=" + str(self.Quanity) + "&start=" + str(self.Counter) + \ '&hl=en&meta=&q=site:pastebin.com+"%40' + \ self.Domain + '"' except Exception as e: error = " [!] Major issue with Google Search for PasteBin:" + \ str(e) self.logger.error( "GooglePasteBinSearch could not create URL: " + str(e)) print helpers.color(error, warning=True) try: r = requests.get(url, headers=self.UserAgent) except Exception as e: error = " [!] Fail during Request to PasteBin (Check Connection):" + str( e) self.logger.error( "Fail during Request to PasteBin (Check Connection): " + str(e)) print helpers.color(error, warning=True) try: RawHtml = r.content try: # check for captcha in the source dl.GoogleCaptchaDetection(RawHtml) except Exception as e: self.logger.error("Issue checking for captcha: " + str(e)) soup = BeautifulSoup(RawHtml, "lxml") for a in soup.select('.r a'): # remove urls like pastebin.com/u/Anonymous if "/u/" not in str(a['href']): self.urlList.append(a['href']) except Exception as e: error = " [!] Fail during parsing result: " + str(e) self.logger.error( "PasteBinSearch Fail during parsing result: " + str(e)) print helpers.color(error, warning=True) self.Counter += 100 # Now take all gathered URL's and gather the Raw content needed for Url in self.urlList: try: Url = "http://pastebin.com/raw/" + str(Url).split('/')[3] data = requests.get(Url, timeout=2) self.Text += data.content except Exception as e: error = "[!] Connection Timed out on PasteBin Search:" + str(e) self.logger.error( "Connection Timed out on PasteBin raw download: " + str(e)) print helpers.color(error, warning=True) if self.verbose: p = ' [*] Searching PasteBin Complete' self.logger.info("Searching PasteBin Complete") print helpers.color(p, firewall=True)