Exemplo n.º 1
0
    def crawl_robots_txt():
        links = []
        robots_dict = parse_robots_txt(Spider.project['base_url'],
                                       Spider.project['base_url'])
        if robots_dict:
            for xml_link in robots_dict['Sitemap']:
                if xml_link.endswith('.xml'):

                    resp, err = Get(xml_link,
                                    headers=Spider.project['headers'],
                                    cookies=Spider.project['cookies'])
                    if err:
                        continue

                    if resp.content != '':
                        try:
                            tree = html.fromstring(resp.content)
                            links = links + tree.xpath('.//loc/text()')
                        except Exception as e:
                            continue
            links = [
                url_check(str(link), Spider.project['base_url'])
                for link in links
                if link not in Spider.project['links'].keys()
            ]
            links = filter(None, links)
            Spider.project['queue'].update(set(links))
            Spider.project['queue'].update(robots_dict['Allow'])
            Spider.project['queue'].update(robots_dict['Disallow'])
            Spider.project['queue'].update(robots_dict['Noindex'])
Exemplo n.º 2
0
 def paths_test(self, module, payloads, outputs):
     """Send get request to the path and analyze the output points
        input parameter must be a in this form {0: ['path1','path2']}"""
     print(process_icon, 'Analyzing the paths.')
     for payload in payloads[0]:
         url = url_check(payload, self.project['base_url'])
         resp, _ = Get(url, headers=self.project['headers'], timeout=self.req_timout)
         if self.analyze_the_output(module, resp, payload, {url: payload}, outputs) is None:
             return
Exemplo n.º 3
0
    def analyze_the_output(self, module, resp, inp, mod_vector, outputs):
        """Gets the info about the module and the output as parameters and analyze the output
        to find if the module is success.
        Return Boolean value."""
        vuln = False

        if module['output_path'] != 'SAME':
            resp = Get(url_check(module['output_path'], urlparse(self.project['base_url'])),
                       headers=self.project['headers'], timeout=self.req_timout, cookies=self.project['cookies'])

        if module['output_type'] == 'DELAY':
            if module['delay'] + 3 >= resp.elapsed.total_seconds() >= module['delay']:
                vuln = True

        if module['output_type'] == 'REFLECT':
            outputs = inp if isinstance(inp, list) else [inp]
            if not outputs:
                printc('[!] This output type is not supported for the module entry points', 'Red', attrs=['bold'])
                return None

        if resp and 'response_contents' in module['output_points']:
            if any(output in str(resp.content) for output in outputs):
                vuln = True

        if resp and 'response_headers_names' in module['output_points']:
            if set(outputs) & set(resp.headers.keys()):
                vuln = True
        if resp and 'response_headers_values' in module['output_points']:
            if set(outputs) & set(resp.headers.values()):
                vuln = True
        if resp and 'status_codes' in module['output_points']:
            if resp.status_code in outputs:
                vuln = True

        if vuln:
            if mod_vector not in self.project['vulnerabilities'][module['severity']].get(module['name'], []):
                self.project['vulnerabilities'][module['severity']][module['name']] = self.project['vulnerabilities'][
                                                                                          module['severity']].get(
                    module['name'], []) + [mod_vector]
            save_project(self.project)
        print(process_icon,
              c(len(self.project['vulnerabilities'][module['severity']].get(module['name'], [])),
                'Red', attrs=['bold']),
              module['name'].replace('_', ' ') + '\'s',
              'Detected.\r',
              end='')
        # print(c('\r[+]', 'Blue', attrs=['bold']),
        #       'Checking',
        #       c(next(iter(mod_vector.values())) if isinstance(mod_vector, dict) else mod_vector, 'DarkOrange3'),
        #       end='')
        return vuln
Exemplo n.º 4
0
def parse_robots_txt(link, base_url):
    result = {
        "Sitemap": set(),
        "User-agent": set(),
        "Disallow": set(),
        "Allow": set(),
        "Noindex": set()
    }

    resp, _ = Get(link + "/robots.txt")
    if resp and resp.status_code == 200:
        try:
            for line in resp.content.decode('utf-8').split('\n'):
                parts = line.split(': ')
                if len(parts) == 2:
                    url = url_check(parts[1].split('#')[0].strip(), base_url)
                    if url:
                        result[parts[0].strip()].add(url)
        except KeyError as e:
            pass
    return result
Exemplo n.º 5
0
    def crawl(link):
        Spider.project['queue'].discard(link)

        # Prevent logout if the user set a Cookies
        if 'logout' in link:
            return

        if link.endswith(COMMON_EXT):
            Spider.project['files'].add(link)

        elif link not in Spider.project['links'].keys():

            resp, err = Get(link,
                            headers=Spider.project['headers'],
                            cookies=Spider.project['cookies'])

            if err:
                return

            # Add the link to the project links
            Spider.project['links'][link] = resp.status_code

            # if link have queries add the link to the queries list
            parsed_link = urlparse(link)
            if parsed_link.query:
                if queries_check(link, Spider.project['base_url'],
                                 Spider.queries_hashes):
                    Spider.project['queries'].add(link)

            # Check if the link contain any contents
            if not resp.content:
                return

            # using 'lxml' for best performance
            try:
                soup = bs(resp.content.decode('utf-8'), 'lxml')
            except UnicodeDecodeError as e:
                soup = bs(resp.content, 'lxml')
            except Exception as e:
                logging.error('failed to creating the page soup.')
                return

            # Parse the forms
            forms = parse_forms(soup, link, Spider.project['base_url'])
            if forms:
                Spider.project['forms'] += forms

            trap = re.search('.*(/.*calendar.*)', link) or re.search(
                '^.*?(/.+?/).*?(\1).*(\1)', link)
            if not trap:
                # Parse URls from the page contents
                for tag in soup.findAll('a', href=True):
                    url = url_check(tag['href'].split('#')[0],
                                    Spider.project['base_url'])
                    if url:
                        Spider.project['queue'].add(url)
                for tag in soup.findAll(['frame', 'iframe'], src=True):
                    url = url_check(tag['src'].split('#')[0],
                                    Spider.project['base_url'])
                    if url:
                        Spider.project['queue'].add(url)
                for tag in soup.findAll('button', formaction=True):
                    url = url_check(tag['formaction'],
                                    Spider.project['base_url'])
                    if url:
                        Spider.project['queue'].add(url)
Exemplo n.º 6
0
def parse_forms(soup, link, base_url):
    forms = []

    # Parse forms from the link
    forms_soup = soup.findAll('form')
    if forms_soup is not []:
        for form in forms_soup:
            if form != "" and form is not None:
                form_contents = {}
                form_id = form.attrs.get('id')
                # Get the form action (requested URL)
                action = form.attrs.get("action")
                action = url_check(action, base_url)
                if action is not None:
                    # Get the form method (POST, Get, DELETE, etc) default GET
                    method = form.attrs.get("method", "Get")
                    # Get all form inputs
                    inputs = []
                    for textarea in soup.findAll('textarea'):
                        if textarea.get('form') == form_id:
                            inputs.append({'name': textarea.get('name'),
                                           'type': textarea.get('type') if textarea.get('type') else 'text',
                                           'value': textarea.get('value') if textarea.get('value') else 'YAWSS'})
                    first_check_box = True
                    for data_tag in form.find_all(['input', 'select', 'textarea']):

                        if data_tag.get('type') == 'submit' or data_tag.get('name') == 'Submit-button':
                            continue

                        # Prevent making change in the hidden data values
                        if data_tag.get('type') == 'hidden':
                            inputs.append({'name': data_tag.get('name'),
                                           'type': data_tag.get('type') if data_tag.get('type') else 'text',
                                           'value': data_tag.get('value')})
                        elif data_tag.name == 'select':
                            inputs.append({'name': data_tag.get('name'),
                                           'type': data_tag.get('type') if data_tag.get('type') else 'text',
                                           'value': data_tag.find('option').get('value')})
                        elif data_tag.name == 'textarea':
                            inputs.append({'name': data_tag.get('name'),
                                           'type': data_tag.get('type') if data_tag.get('type') else 'text',
                                           'value': data_tag.get('value') if data_tag.get('value') else 'YAWSS'})
                        else:
                            # Get just the first checkbox value
                            if data_tag.get('type') == 'checkbox':
                                if first_check_box:
                                    first_check_box = False
                                else:
                                    continue

                            inputs.append({'name': data_tag.get('name'),
                                           'type': data_tag.get('type') if data_tag.get('type') else 'text',
                                           'value': data_tag.get('value') if data_tag.get('value') else 'YAWSS'})

                    # put everything to the resulting dictionary
                    form_contents["action"] = action
                    form_contents["method"] = method.lower() if method else None
                    form_contents["inputs"] = inputs
                    # form_contents["link"] = link
                    forms.append(form_contents)
    return forms