Exemplo n.º 1
0
 def extract(self, url):
     headers = {
         "Cookie": "VT_PREFERRED_LANGUAGE=en; __utma=194538546.1669411933.1482685628.1482685628.1482685628.1;"
                   " __utmb=194538546.1.10.1482685628; __utmc=194538546;"
                   " __utmz=194538546.1482685628.1.1."
                   "utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided);"
                   " __utmt=1",
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0",
         "Host": "www.virustotal.com"
     }
     content = self.requester.get(url, headers).text
     if not self.has_error(content):
         try:
             soup = BeautifulSoup(content, "html5lib")
             search_tags = soup.find_all("a", attrs={"target": "_blank", "class": None}, href=True)
             for tag in search_tags:
                 domain = tag.string
                 if domain is not None:
                     domain = domain.strip()
                     self.add(domain)
                 else:
                     break
         except:
             pass
     else:
         logger.error("Captcha detected during running VirusTotal Plugin")
Exemplo n.º 2
0
def query(host_name, query_type='ANY', name_server='8.8.8.8', tcp=True):
    """
    This method override the query method of dnslib
    :return: a list of info that we got
    """
    # logger.info("Querying %s with type %s and name server %s" % (host_name, query_type, name_server))
    results = []
    try:
        _query = dnslib.DNSRecord.question(host_name, query_type.upper().strip())
        response_raw = _query.send(name_server, tcp=tcp, timeout=2)
        response_parsed = dnslib.DNSRecord.parse(response_raw)

        for r in response_parsed.rr:
            try:
                _type = str(dnslib.QTYPE[r.rtype])
            # Server sent an unknown type:
            except dnslib.dns.DNSError:
                _type = str(r.rtype)
            _host = str(r.rname).rstrip(".")
            _data = str(r.rdata)
            result = {'host': _host,
                      'type': _type,
                      'data': _data}
            results.append(result)

    except socket.error:
        logger.error("The query meet timeout, so i broke it")
    except struct.error:
        logger.error("Could not decode the response of dns query")
    except:
        raise DnsQueryException
    return results
Exemplo n.º 3
0
 def create(self):
     """
     Create an instance of a plugin
     """
     try:
         module = "extensions.plugins.%s.%s" % (self.plugin_type, self._get_plugin_file_name())
         plugin_class = getattr(importlib.import_module(module), self.plugin_name)
         instance = plugin_class()
         instance.set_requester(self.requester)
         return instance
     except ImportError, e:
         logger.exception(str(e))
         logger.error("Could not load plugin %s.%s" % (self.plugin_type, self._get_plugin_file_name()))
         return None
Exemplo n.º 4
0
    def post(self, url, data=None, headers=None):
        if headers is None:
            headers = self._headers

        for i in range(3):
            while True:
                try:
                    return requests.post(url, headers=headers, proxies=self._proxies, data=data, timeout=60)
                except requests.exceptions.Timeout:
                    logger.error("It takes a request so long so I must kill it.")
                    logger.info("Trying to reconnect...")
                    continue
                except:
                    logger.error("I don't know why this error occurred")
                    logger.info("Trying to reconnect...")
                    continue
        return None
Exemplo n.º 5
0
    def extract(self, url):
        try:
            content = self.requester.get(url).text
            if self.has_error(content):
                logger.error("Can not find any result for: %s" %
                             self.base_domain.domain_name)
                return None

            _soup = BeautifulSoup(content, "html5lib")

            if not _soup.find_all("em"):
                logger.error("This site seem to blocked your requests")
                return

            _last = ""
            _from = ""

            total = int(_soup.find_all("em")[0].string.split()[1])
            logger.info("Total of results: %d for: %s " %
                        (total, self.base_domain.domain_name))

            if total > 20:
                count = total / 20
                for tem in range(count + 1):
                    url_temp = ""
                    r = self.requester.get(url + _last + _from)
                    soup = BeautifulSoup(r.text, "html5lib")
                    search_region = BeautifulSoup(
                        str(soup.find_all("table", attrs={"class":
                                                          "TBtable"})), "lxml")

                    for item in search_region.find_all('a',
                                                       attrs={"rel": True}):
                        url_temp = self.parse_domain_name(item['href'])
                        self.add(url_temp)
                    _last = "&last=" + url_temp
                    _from = "&from=" + str((tem + 1) * 20 + 1)
            else:
                search_region = BeautifulSoup(
                    str(_soup.find_all("table", attrs={"class": "TBtable"})),
                    "lxml")
                for item in search_region.find_all('a', attrs={"rel": True}):
                    url_temp = self.parse_domain_name(item['href'])
                    self.add(url_temp)
        except:
            raise
Exemplo n.º 6
0
 def get_total_page(self):
     url = self.base_url.format(query=self.get_query(), page=self.max_page)
     content = self.requester.get(url).text
     try:
         if not self.has_error(content):
             soup = BeautifulSoup(content, "html5lib")
             tag_a = soup.findAll('a', attrs={'class': 'fl'})
             try:
                 num_page = tag_a[-1]['aria-label']
                 return int(num_page.split()[1])
             except:
                 logger.error("Can not get total_page so return 0")
                 return 0
         else:
             logger.error("Google seems blocked my request")
             return 0
     except:
         return 0
Exemplo n.º 7
0
    def extract(self, url):
        try:
            header = {
                "User-Agent":
                "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            }
            content = self.requester.get(url, header).text
            soup = BeautifulSoup(content, "html5lib")
            search = soup.find_all("a",
                                   attrs={"class": " ac-algo ac-21th lh-24"})

            for line in search:
                try:
                    url = line['href'].split("/")[7].split("=")[1]
                    self.add(self.parse_domain_name(url))
                except:
                    logger.error("Yahoo- Plugin: can not extract domain")
        except:
            pass
Exemplo n.º 8
0
    def get_total_page(self):
        max_page_temp = self.max_page

        while max_page_temp >= 0:
            url = self.base_url.format(query=self.get_query(),
                                       page=max_page_temp)
            header = {
                "User-Agent":
                "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            }
            content = self.requester.get(url, header).text
            if self.has_error(content):
                logger.error("To much requests and Yahoo knew")
                return 0

            if (("We did not find results for" not in content)
                    or ("Check spelling or type a new query" not in content)):
                list_seed = []
                soup = BeautifulSoup(content, "html5lib")
                search_page = soup.find_all("a",
                                            href=True,
                                            title=True,
                                            attrs={'class': None})
                for i in search_page:
                    list_seed.append(int(i.string))
                current = soup.find("strong")
                try:
                    list_seed.append(int(current.string))
                except:
                    logger.error(
                        "Yahoo Plug-in: Failed to get current seed but that means there are "
                        "no more sub-domain for this base_domain but Yahoo could block your requests also"
                    )
                if not list_seed:
                    return 0
                else:
                    return max(list_seed)
            else:
                logger.info(
                    "Yahoo Plug-in: max_page down to %d since bot can not get any info about total_page"
                    % max_page_temp)
                max_page_temp -= 10
        return 0
Exemplo n.º 9
0
    def get_total_page(self):
        max_page_temp = self.max_page

        while max_page_temp >= 0:
            url = self.base_url.format(query=self.get_query(), page=max_page_temp)
            r = self.requester.get(url)

            if r is None:
                return 0

            content = r.text

            if self.was_blocked(content):
                logger.error("Ask blocked the request")
                return 0

            elif self.was_not_found(content):
                logger.info(
                    "Ask Plug-in: max_page down to %d for domain: %s" % (max_page_temp, self.base_domain.domain_name))
                max_page_temp -= 5

            else:
                soup = BeautifulSoup(content, "html5lib")
                search_pages = soup.find_all("a", attrs={"ul-attr-accn": "pagination"})
                list_no_page = []
                for tag in search_pages:
                    try:
                        no_page = int(tag.string)
                        list_no_page.append(no_page)
                    except:
                        continue
                if not list_no_page:
                    logger.debug(soup)
                    return 1
                else:
                    return max(list_no_page)
        return 0