Python urlparse примеры использования

Язык программирования: Python

Пространство имен/Пакет: lib.modules.passive.domain.engine.urlparse

Метод/Функция: urlparse

Примеров на hotexamples.com: 6

Python urlparse - 6 примеров найдено. Это лучшие примеры Python кода для lib.modules.passive.domain.engine.urlparse.urlparse, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

    def extract(self, content):
        pattern = re.compile(
            '<cite class=".*?">(.*?{domain})/.*?</cite>'.format(
                domain=self.target.netloc))
        next_page = "下一页"
        try:
            links = pattern.findall(content)

            self.find_new_domain = False
            for link in links:
                if not link.startswith('http://') and not link.startswith(
                        'https://'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc

                if subdomain != self.target.netloc and subdomain.endswith(
                        self.target.netloc):
                    if subdomain not in self.subdomains:
                        self.logger.info("{engine} Found {subdomain}".format(
                            engine=self.engine_name, subdomain=subdomain))
                        self.subdomains.update([subdomain])
                        self.find_new_domain = True
        except Exception:
            pass
        if next_page in content:
            # tell engine there still be next page
            return True
        else:
            return False

Пример #2

Показать файл

    def extract(self, content):
        next_page = re.compile('<A.*?>\s*<b>Next page</b>\s*</a>')
        pattern = re.compile(
            '<a href="http[s]*://(.*{domain}).*?" rel="nofollow">'.format(
                domain=self.target.netloc))
        try:
            links = pattern.findall(content)
            self.last_domain = self.target.netloc
            for link in links:
                if not link.startswith('http://') and not link.startswith(
                        'https://'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc

                if subdomain != self.target.netloc and subdomain.endswith(
                        self.target.netloc):
                    if subdomain not in self.subdomains:
                        self.logger.info("{engine} Found {subdomain}".format(
                            engine=self.engine_name, subdomain=subdomain))
                        self.subdomains.update([subdomain])
                self.last_domain = subdomain
        except Exception:
            pass
        if next_page.findall(content):
            # tell engine there still be next page
            return True
        else:
            return False

Пример #3

Показать файл

Файл: ChinazEngine.py Проект: t0w4r/recon

    def extract(self, content):
        pattern = re.compile(
            '<a href="javascript:" onclick="window.open.*?" target="_blank">(.*?{domain})</a>'
            .format(domain=self.target.netloc))
        next_page = "下一页"
        try:
            links = pattern.findall(content)

            for link in links:
                if not link.startswith('http://') and not link.startswith(
                        'https://'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc

                if subdomain != self.target.netloc and subdomain.endswith(
                        self.target.netloc):
                    if subdomain not in self.subdomains:
                        self.logger.info("{engine} Found {subdomain}".format(
                            engine=self.engine_name, subdomain=subdomain))
                        self.subdomains.update([subdomain])
        except Exception:
            pass
        if next_page in content:
            # tell engine there still be next page
            return True
        else:
            return False

Пример #4

Показать файл

Файл: YahooEngine.py Проект: t0w4r/recon

    def extract(self, content):
        next_page = re.compile('<a class="next".*?>Next</a>')
        pattern = re.compile('<span class=.{1,100}?>(.{0,100}?<b.{0,100}?>'+self.target.netloc+'</b>)')
        try:
            links = pattern.findall(content)
            self.find_new_domain = False
            for link in links:
                link = re.sub('<.*?>','',link)

                if not link.startswith('http://') and not link.startswith('https://'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc

                if subdomain != self.target.netloc and subdomain.endswith(self.target.netloc):
                    if subdomain not in self.subdomains:
                        self.logger.info(
                        "{engine} Found {subdomain}".format(
                                engine=self.engine_name,subdomain=subdomain))
                        self.subdomains.update([subdomain])
                        self.find_new_domain = True
        except Exception:
            pass
        if next_page.findall(content):
            # tell engine there still be next page
            return True
        else:
            return False

Пример #5

Показать файл

Файл: AskEngine.py Проект: t0w4r/recon

    def extract(self, content):
        next_page = '<li class="PartialWebPagination-next">Next</li>'
        pattern = re.compile('<p class="PartialSearchResults-item-url">(.*?\.{domain}).*?</p>'
                             .format(domain=self.target.netloc))
        try:
            links = pattern.findall(content)
            self.find_new_domain = False
            for link in links:
                if not link.startswith('http://') and not link.startswith('https://'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc

                if subdomain != self.target.netloc and subdomain.endswith(self.target.netloc):
                    if subdomain not in self.subdomains:
                        self.logger.info(
                        "{engine} Found {subdomain}".format(
                                engine=self.engine_name,subdomain=subdomain))
                        self.subdomains.update([subdomain])
                        self.find_new_domain = True
        except Exception:
            pass
        if next_page in content:
            # tell engine there still be next page
            return True
        else:
            return False

Пример #6

Показать файл

Файл: BaiduEngine.py Проект: t0w4r/recon

    def extract(self, content):
        pattern = re.compile('<a.*?class="c-showurl".*?>(.*?{domain})'.format(
            domain=self.target.netloc))
        next_page = re.compile('<a.*?class="n">(.*?)</a>')
        try:
            links = pattern.findall(content)

            self.find_new_domain = False
            for link in links:
                link = re.sub('<.*?>|>|<|&nbsp;', '', link)
                if not link.startswith('http://') and not link.startswith(
                        'https://'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc

                if subdomain != self.target.netloc and subdomain.endswith(
                        self.target.netloc):
                    if subdomain not in self.subdomains:
                        self.logger.info("{engine} Found {subdomain}".format(
                            engine=self.engine_name, subdomain=subdomain))
                        self.subdomains.update([subdomain])
                        self.find_new_domain = True
        except Exception:
            pass
        if next_page.findall(content):
            # tell engine there still be next page
            return True
        else:
            return False