Exemplo n.º 1
0
    def get_content_html(self):
        try:
            get_content = requests.get(self.url,
                                       data={'User-Agent': self.user_agent},
                                       verify=False)
            if self.test_connection:
                soup_objt = bs4_module.CoreParser(get_content.text, self.url)
                soup_objt.find_all_script()
                soup_objt.get_content_js()

        except ConnectionError as e:
            print(f"Connection error while try to parse the URL content \n{e}")
Exemplo n.º 2
0
    def get_content_js(self):

        test_conn = requests_module.CoreRequests(self.url_domain,
                                                 self.name_target)
        if test_conn:
            current_dir = os.getcwd()
            path_save = ""
            #parsing_dir_name
            dir_name = urlparse(self.url_domain)
            dir_name = dir_name.hostname
            #dir_name = dir_name.hostname.replace(".", "_", -1)
        else:
            print(f"{self.url_domain} maybe down :/ ?")

        try:
            os.mkdir(str(dir_name))
            print(f">> Create directory {dir_name}")
            print(f">> File will be saved at {dir_name}")
        except FileExistsError as e:
            print(f">> {e}")

        for url_src_tag in self.urls:
            arrays_match = []
            try:
                if url_src_tag[0:2] == "//":
                    url_src_tag = "http:" + url_src_tag
                print(colors.colors.fg.blue +
                      f"[INFO] Getting info from: {url_src_tag}" +
                      colors.colors.reset)
                r = requests.get(url_src_tag,
                                 verify=False,
                                 data={'User-Agent:': self.user_agent},
                                 stream=True)
                content_save = r.content
                for _, v in regex_modules.REGEX_PATT.items():
                    values_found = re.findall(v, r.text)
                    if values_found:
                        for v in values_found:
                            if v in arrays_match:
                                continue
                            else:
                                arrays_match.append(v)

                for url in arrays_match:
                    if "aws" in url:
                        print(colors.colors.fg.red + f"[AWS INFO] {url}" +
                              colors.colors.reset)
                    elif self.host in url:
                        print(colors.colors.fg.orange +
                              f"[DOMAIN INFO] {url}" + colors.colors.reset)
                    elif self.name_target in url:
                        print(colors.colors.fg.orange + f"[NAME INFO] {url}" +
                              colors.colors.reset)
                    else:
                        print(colors.colors.fg.blue + f"[INFO URL] {url}" +
                              colors.colors.reset)

            except ConnectionError as e:
                print(
                    f">> Error while save content from \n{url_src_tag} \n {e}")

            try:
                name_save_url_tag = url_src_tag.replace(".", "_")
                name_save_url_tag = name_save_url_tag.replace("//", "_")
                name_save_url_tag = name_save_url_tag.replace("/", "_")[0:10]
                path_save = current_dir + "/" + dir_name + "/" + name_save_url_tag

                with open(path_save + ".js", 'wb') as f:
                    f.write(content_save)

            except FileNotFoundError as e:
                print(f">> Error while saving JS content to parse \n {e}")