Python Download.Download示例，Helpers.Download.Download Python示例

示例#1

0

显示文件

 def search(self):
     dl = Download.Download(self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 1000:
         time.sleep(1)
         if self.verbose:
             p = ' [*] RedditPost Search on result: ' + str(self.Counter)
             self.logger.debug(
                 "RedditPost Search on result: " + str(self.Counter))
             print helpers.color(p, firewall=True)
         try:
             url = "https://www.reddit.com/search?q=%40" + str(self.Domain) + \
                 "&restrict_sr=&sort=relevance&t=all&count=" + str(self.Counter) + \
                 '&after=t3_3mkrqg'
         except Exception as e:
             error = " [!] Major issue with RedditPost search:" + str(e)
             self.logger.error(
                 "Major issue with RedditPostSearch: " + str(e))
             print helpers.color(error, warning=True)
         try:
             RawHtml = dl.requesturl(url, useragent=self.UserAgent)
         except Exception as e:
             error = " [!] Fail during Request to Reddit (Check Connection):" + \
                 str(e)
             self.logger.error(
                 "Fail during Request to Reddit (Check Connection): " + str(e))
             print helpers.color(error, warning=True)
         self.Html += RawHtml
         # reddit seems to increment by 25 in cases
         self.Counter += 25

示例#2

0

显示文件

文件： GoogleSearch.py 项目： smaff/simply

 def search(self):
     dl = Download.Download(self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 1000:
         time.sleep(1)
         if self.verbose:
             p = ' [*] Google Search on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
         try:
             url = "http://www.google.com/search?num=" + str(self.Quanity) + "&start=" + \
                 str(self.Counter) + "&hl=en&meta=&q=%40\"" + \
                 self.Domain + "\""
         except Exception as e:
             error = " [!] Major issue with Google Search:" + str(e)
             print helpers.color(error, warning=True)
         try:
             results = dl.requesturl(url, useragent=self.UserAgent)
         except Exception as e:
             error = " [!] Fail during Request to Google (Check Connection):" + \
                 str(e)
             print helpers.color(error, warning=True)
         try:
             # Url = r.url
             dl.GoogleCaptchaDetection(results)
         except Exception as e:
             print e
         self.Html += results
         self.Counter += 100
         helpers.modsleep(self.Sleep, jitter=self.Jitter)

示例#3

0

显示文件

 def search(self):
     dl = Download.Download(verbose=self.verbose)
     while self.Counter <= self.Depth and self.Counter <= 100:
         helpers.modsleep(5)
         if self.verbose:
             p = ' [*] GitHubUser Search on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
         try:
             url = 'https://github.com/search?p=' + str(self.Counter) + '&q=' + \
                 str(self.domain) + 'ref=searchresults&type=Users&utf8='
         except Exception as e:
             error = " [!] Major issue with GitHubUser Search:" + str(e)
             print helpers.color(error, warning=True)
         try:
             r = dl.requesturl(url,
                               useragent=self.UserAgent,
                               raw=True,
                               timeout=10)
         except Exception as e:
             error = " [!] Fail during Request to GitHubUser (Check Connection):" + \
                 str(e)
             print helpers.color(error, warning=True)
         results = r.content
         self.Html += results
         self.Counter += 1

示例#4

0

显示文件

文件： EmailHunter.py 项目： vishalkrtr/SimplyEmail

 def process(self):
     dl = Download.Download(self.verbose)
     try:
         # This returns a JSON object
         url = "https://emailhunter.co/trial/v1/search?offset=0&domain=" + \
             self.domain + "&format=json"
         r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
     except Exception as e:
         error = "[!] Major issue with EmailHunter Search:" + str(e)
         print helpers.color(error, warning=True)
     try:
         results = r.json()
         # pprint(results)
         # Check to make sure we got data back from the API
         if results['status'] == "success":
             # The API starts at 0 for the first value
             x = 0
             EmailCount = int(results['results'])
             # We will itirate of the Json object for the index objects
             while x < EmailCount:
                 self.results.append(results['emails'][int(x)]['value'])
                 x += 1
         if results['status'] == "error":
             # The API starts at 0 for the first value
             error = ' [!] EmailHunter Trial API failed: ' + \
                 str(results['message'])
             self.logger.error('EmailHunter Trial API failed: ' +
                               str(results['message']))
             print helpers.color(error, firewall=True)
     except Exception as e:
         pass
     if self.verbose:
         p = ' [*] EmailHunter completed JSON request'
         print helpers.color(p, firewall=True)

示例#5

0

显示文件

文件： AskSearch.py 项目： smaff/simply

 def process(self):
     dl = Download.Download(self.verbose)
     while self.Counter <= self.PageLimit:
         if self.verbose:
             p = ' [*] AskSearch on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
             self.logger.info('AskSearch on page: ' + str(self.Counter))
         try:
             url = 'http://www.ask.com/web?q=@' + str(self.Domain) + \
                 '&pu=10&page=' + str(self.Counter)
         except Exception as e:
             error = " [!] Major issue with Ask Search:" + str(e)
             self.logger.error('Major issue with Ask Search: ' + str(e))
             print helpers.color(error, warning=True)
         try:
             rawhtml = dl.requesturl(url, useragent=self.UserAgent)
         except Exception as e:
             error = " [!] Fail during Request to Ask (Check Connection):" + \
                 str(e)
             self.logger.error(
                 'Fail during Request to Ask (Check Connection): ' + str(e))
             print helpers.color(error, warning=True)
         self.Html += rawhtml
         self.Counter += 1
         helpers.modsleep(self.Sleep, jitter=self.Jitter)

示例#6

0

显示文件

文件： test_simplyemail_list.py 项目： willvin313/SimplyEmail

def test_downloads():
    # perfrom Download testing
    ua = helpers.getua()
    dl = Download.Download(True)
    html = dl.requesturl(
        'http://google.com', ua, timeout=2, retrytime=3, statuscode=False)
    dl.GoogleCaptchaDetection(html)
    f, download = dl.download_file(
        'http://www.sample-videos.com/doc/Sample-doc-file-100kb.doc', '.pdf')
    dl.delete_file(f)

示例#7

0

显示文件

 def process(self):
     dl = Download.Download(verbose=self.verbose)
     # Get all the USER code Repos
     # https://github.com/search?p=2&q=enron.com+&ref=searchresults&type=Code&utf8=✓
     UrlList = []
     while self.Counter <= self.Depth:
         if self.verbose:
             p = " [*] GitHub Gist Search Search on page: " + str(
                 self.Counter)
             print helpers.color(p, firewall=True)
         try:
             # search?p=2&q=%40enron.com&ref=searchresults&utf8=✓
             url = ("https://gist.github.com/search?p=" +
                    str(self.Counter) + "&q=%40" + str(self.domain) +
                    "+&ref=searchresults&utf8=✓")
             r = dl.requesturl(url,
                               useragent=self.UserAgent,
                               raw=True,
                               timeout=10)
             if r.status_code != 200:
                 break
         except Exception as e:
             error = " [!] Major issue with GitHubGist Search:" + str(e)
             print helpers.color(error, warning=True)
         RawHtml = r.content
         # Parse the results for our URLS)
         soup = BeautifulSoup(RawHtml)
         for a in soup.findAll("a", href=True):
             a = a["href"]
             if a.startswith("/"):
                 UrlList.append(a)
         self.Counter += 1
     # Now take all gathered URL's and gather the HTML content needed
     for url in UrlList:
         try:
             url = "https://gist.github.com" + url
             html = dl.requesturl(url, useragent=self.UserAgent, timeout=10)
             self.Html += html
         except Exception as e:
             error = " [!] Connection Timed out on GithubGist Search:" + str(
                 e)
             print helpers.color(error, warning=True)

示例#8

0

显示文件

 def search(self):
     # setup for helpers in the download class
     convert = Converter.Converter(verbose=self.verbose)
     dl = Download.Download(self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 100:
         time.sleep(1)
         if self.verbose:
             p = ' [*] Google PDF Search on page: ' + str(self.Counter)
             print helpers.color(p, firewall=True)
         try:
             urly = "https://www.google.com/search?q=site:" + \
                 self.Domain + "+filetype:pdf&start=" + str(self.Counter)
         except Exception as e:
             error = " [!] Major issue with Google Search:" + str(e)
             print helpers.color(error, warning=True)
         try:
             r = requests.get(urly)
         except Exception as e:
             error = " [!] Fail during Request to Google (Check Connection):" + \
                 str(e)
             print helpers.color(error, warning=True)
         RawHtml = r.content
         # get redirect URL
         # Url = r.url
         dl.GoogleCaptchaDetection(RawHtml)
         soup = BeautifulSoup(RawHtml)
         for a in soup.findAll('a'):
             try:
                 # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
                 # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
                 # newreg=01f0ed80771f4dfaa269b15268b3f9a9
                 l = urlparse.parse_qs(
                     urlparse.urlparse(a['href']).query)['q'][0]
                 if l.startswith('http') or l.startswith('www'):
                     if "webcache.googleusercontent.com" not in l:
                         self.urlList.append(l)
             except:
                 pass
         self.Counter += 10
     # now download the required files
     try:
         for url in self.urlList:
             if self.verbose:
                 p = ' [*] Google PDF search downloading: ' + str(url)
                 print helpers.color(p, firewall=True)
             try:
                 filetype = ".pdf"
                 # use new helper class to download file
                 FileName, FileDownload = dl.download_file(url, filetype)
                 # check if the file was downloaded
                 if FileDownload:
                     if self.verbose:
                         p = ' [*] Google PDF file was downloaded: ' + \
                             str(url)
                         print helpers.color(p, firewall=True)
                     self.Text += convert.convert_pdf_to_txt(FileName)
             except Exception as e:
                 print e
             try:
                 # now remove any files left behind
                 dl.delete_file(FileName)
             except Exception as e:
                 print e
     except:
         print helpers.color(" [*] No PDF's to download from Google!\n", firewall=True)

示例#9

0

显示文件

文件： PasteBinSearch.py 项目： willvin313/SimplyEmail

    def search(self):
        dl = Download.Download(self.verbose)
        while self.Counter <= self.Limit and self.Counter <= 100:
            time.sleep(1)
            if self.verbose:
                p = ' [*] Google Search for PasteBin on page: ' + \
                    str(self.Counter)
                self.logger.info("GooglePasteBinSearch on page: " +
                                 str(self.Counter))
                print helpers.color(p, firewall=True)
            try:
                url = "http://www.google.com/search?num=" + str(self.Quanity) + "&start=" + str(self.Counter) + \
                      '&hl=en&meta=&q=site:pastebin.com+"%40' + \
                    self.Domain + '"'
            except Exception as e:
                error = " [!] Major issue with Google Search for PasteBin:" + \
                    str(e)
                self.logger.error(
                    "GooglePasteBinSearch could not create URL: " + str(e))
                print helpers.color(error, warning=True)

            try:
                r = requests.get(url, headers=self.UserAgent)
            except Exception as e:
                error = " [!] Fail during Request to PasteBin (Check Connection):" + str(
                    e)
                self.logger.error(
                    "Fail during Request to PasteBin (Check Connection): " +
                    str(e))
                print helpers.color(error, warning=True)
            try:
                RawHtml = r.content
                try:
                    # check for captcha in the source
                    dl.GoogleCaptchaDetection(RawHtml)
                except Exception as e:
                    self.logger.error("Issue checking for captcha: " + str(e))
                soup = BeautifulSoup(RawHtml, "lxml")
                for a in soup.select('.r a'):
                    # remove urls like pastebin.com/u/Anonymous
                    if "/u/" not in str(a['href']):
                        self.urlList.append(a['href'])
            except Exception as e:
                error = " [!] Fail during parsing result: " + str(e)
                self.logger.error(
                    "PasteBinSearch Fail during parsing result: " + str(e))
                print helpers.color(error, warning=True)
            self.Counter += 100
        # Now take all gathered URL's and gather the Raw content needed
        for Url in self.urlList:
            try:
                Url = "http://pastebin.com/raw/" + str(Url).split('/')[3]
                data = requests.get(Url, timeout=2)
                self.Text += data.content
            except Exception as e:
                error = "[!] Connection Timed out on PasteBin Search:" + str(e)
                self.logger.error(
                    "Connection Timed out on PasteBin raw download: " + str(e))
                print helpers.color(error, warning=True)

        if self.verbose:
            p = ' [*] Searching PasteBin Complete'
            self.logger.info("Searching PasteBin Complete")
            print helpers.color(p, firewall=True)

示例#10

0

显示文件

文件： GoogleXLSXSearch.py 项目： bjb28/SimplyEmail

 def search(self):
     convert = Converter.Converter(verbose=self.verbose)
     while self.Counter <= self.Limit and self.Counter <= 100:
         time.sleep(1)
         if self.verbose:
             p = " [*] Google XLSX Search on page: " + str(self.Counter)
             self.logger.info("Google XLSX Search on page: " +
                              str(self.Counter))
             print helpers.color(p, firewall=True)
         try:
             urly = ("https://www.google.com/search?q=site:" + self.Domain +
                     "+filetype:xlsx&start=" + str(self.Counter))
         except Exception as e:
             error = " [!] Major issue with Google XLSX Search:" + str(e)
             self.logger.error("GoogleXlsxSearch failed to build url: " +
                               str(e))
             print helpers.color(error, warning=True)
         try:
             r = requests.get(urly)
         except Exception as e:
             error = " [!] Fail during Request to Google (Check Connection):" + str(
                 e)
             self.logger.error(
                 "GoogleXlsxSearch failed to request url (Check Connection): "
                 + str(e))
             print helpers.color(error, warning=True)
         RawHtml = r.content
         soup = BeautifulSoup(RawHtml)
         # I use this to parse my results, for URLS to follow
         for a in soup.findAll("a"):
             try:
                 # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
                 # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
                 # newreg=01f0ed80771f4dfaa269b15268b3f9a9
                 l = urlparse.parse_qs(urlparse.urlparse(
                     a["href"]).query)["q"][0]
                 if l.startswith("http") or l.startswith("www"):
                     if "webcache.googleusercontent.com" not in l:
                         self.urlList.append(l)
             except:
                 pass
         self.Counter += 10
         helpers.modsleep(self.Sleep, jitter=self.Jitter)
     # now download the required files
     self.logger.debug(
         "GoogleXlsxSearch completed HTML result query, starting downloads")
     try:
         for url in self.urlList:
             if self.verbose:
                 p = " [*] Google XLSX search downloading: " + str(url)
                 self.logger.info("Google XLSX search downloading: " +
                                  str(url))
                 print helpers.color(p, firewall=True)
             try:
                 filetype = ".xlsx"
                 dl = Download.Download(self.verbose)
                 FileName, FileDownload = dl.download_file(url, filetype)
                 if FileDownload:
                     if self.verbose:
                         p = " [*] Google XLSX file was downloaded: " + str(
                             url)
                         self.logger.info(
                             "Google XLSX file was downloaded: " + str(url))
                         print helpers.color(p, firewall=True)
                     self.Text += convert.convert_Xlsx_to_Csv(FileName)
                 # print self.Text
             except Exception as e:
                 print helpers.color(" [!] Issue with opening Xlsx Files\n",
                                     firewall=True)
                 self.logger.error("Google XLSX had issue opening file")
             try:
                 dl.delete_file(FileName)
             except Exception as e:
                 self.logger.error("Google XLSX failed to delete file: " +
                                   str(e))
     except Exception as e:
         print helpers.color(" [*] No XLSX's to download from google!\n",
                             firewall=True)
         self.logger.error("No XLSX's to download from google! " + str(e))

示例#11

0

显示文件

文件： Hunter.py 项目： smaff/simply

    def process(self):
        dl = Download.Download(self.verbose)
        try:
            # We will check to see that we have enough requests left to make a search
            url = "https://api.hunter.io/v2/account?api_key=" + self.apikeyv
            r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
            accountInfo = r.json()
            quota = int(accountInfo['data']['calls']['available'])
            quotaUsed = int(accountInfo['data']['calls']['used'])
            if quotaUsed >= self.QuotaLimit:
                overQuotaLimit = True
            else:
                overQuotaLimit = False
        except Exception as e:
            error = " [!] Hunter API error: " + str(
                accountInfo['errors'][0]['details'])
            print helpers.color(error, warning=True)
        try:
            # Hunter's API only allows 100 emails per request, so we check the number of emails Hunter has
            # on our specified domain, and if it's over 100 we need to make multiple requests to get all of the emails
            url = "https://api.hunter.io/v2/email-count?domain=" + self.domain
            r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
            response = r.json()
            totalEmails = int(response['data'][self.etype])
            emailsLeft = totalEmails
            offset = 0
        except Exception as e:
            error = "[!] Major issue with Hunter Search: " + str(e)
            print helpers.color(error, warning=True)
        requestsMade = 0
        # Main loop to keep requesting the Hunter API until we get all of the emails they have
        while emailsLeft > 0:
            try:
                if overQuotaLimit or requestsMade + quotaUsed >= self.QuotaLimit:
                    if self.verbose:
                        print helpers.color(" [*] You are over your set Quota Limit: " + \
                            str(quotaUsed) + "/" + str(self.QuotaLimit) + " stopping search", firewall=True)
                    break
                elif self.RequestLimit != 0 and requestsMade >= self.RequestLimit:
                    if self.verbose:
                        print helpers.color(
                            " [*] Stopping search due to user set Request Limit",
                            firewall=True)
                    break

                # This returns a JSON object
                url = "https://api.hunter.io/v2/domain-search?domain=" + \
                    self.domain + self.type + "&limit=100&offset=" + str(offset) + "&api_key=" + self.apikeyv
                r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
                results = r.json()
                emailCount = int(results['meta']['results'])
            except Exception as e:
                error = " [!] Hunter API error: " + str(
                    results['errors'][0]['details']) + " QUITTING!"
                print helpers.color(error, warning=True)
                break
            try:
                # Make sure we don't exceed the index for the 'emails' array in the 'results' Json object
                if emailsLeft < 100:
                    emailCount = emailsLeft
                if emailCount > 100:
                    emailCount = 100
                # 1 request is every 10 emails delivered
                requestsMade += emailCount // 10
                if emailCount % 10 != 0:
                    requestsMade += 1
                # The API starts at 0 for the first value
                x = 0
                # We will itirate of the Json object for the index objects
                while x < emailCount:
                    self.results.append(
                        results['data']['emails'][int(x)]['value'])
                    x += 1
                emailsLeft -= emailCount
                if emailsLeft > 100:
                    offset += 100
                else:
                    offset += emailsLeft
            except Exception as e:
                error = " [!] Major issue with search parsing: " + str(e)
                print helpers.color(error, warning=True)
                break
        if self.verbose:
            # Print the avalible requests user has if verbose
            print helpers.color(' [*] Hunter has completed JSON request',
                                firewall=True)
            requestsUsed = requestsMade + quotaUsed
            if quota - requestsUsed <= 0:
                print helpers.color(" [*] You have no Hunter requests left." \
                    + "They will refill in about a month", firewall=True)
            else:
                print helpers.color(" [*] You have " + str(requestsUsed) \
                    + "/" + str(quota) + " Hunter requests left", firewall=True)