class AltOnionDir: def __init__(self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db=self.host_db, user_db=self.user_db, password_db=self.password_db, database=self.database_name, ) self.source = 'Alt OnionDir' compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.logger = logging.getLogger('Class:AltOnionDir') self.session = requests.session() self.proxies = { 'http': 'socks5h://localhost:9050', } @property def start(self): self.database.replaces() self.alt_onionDir() def alt_onionDir(self): url = 'http://onionf3ck2i74bmm.onion' self.logger.info(' Conectando em {url}'.format(url=url)) request = self.session.get(url, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features="lxml") pages = [] for raw in soup.find('navbar', {'id': 'content-navbar'}).findAll('a'): if '.html' in raw['href'].lower(): pages.append("{url}/{page}".format(url=url, page=raw['href'])) for urls in pages: try: request = self.session.get(urls, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features="lxml") next = [] for paginator in soup.find('ul', { 'id': 'paginator' }).findAll('a'): next.append("{url}/{page}".format( url=url, page=paginator['href'].replace('..', ''))) for nextpage in next: self.logger.info( ' Realizando scraping em {url}'.format(url=nextpage)) try: request = self.session.get(nextpage, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features="lxml") list_urls = [] for raw in soup.find('div', { 'class': 'generic-page' }).findAll('footer'): for get_onion in raw.findAll('a'): list_urls.append(get_onion['href']) regex = re.compile( "[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in list_urls: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') xurl = regex.match(rurls) if xurl is not None: self.database.saveonion(url=xurl.group(), source=self.source) except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error( ' Não consegui conectar na url, porque ocorreu um erro.\n{e}' .format(e=e)) pass except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error( ' Não consegui conectar na url, porque ocorreu um erro.\n{e}' .format(e=e)) pass
class UnderDir: def __init__(self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db=self.host_db, user_db=self.user_db, password_db=self.password_db, database=self.database_name, ) self.source = 'UnderDir' logging.basicConfig(level=logging.INFO) compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.logger = logging.getLogger('Class:UnderDir') self.session = requests.session() self.proxies = { 'http': 'socks5h://localhost:9050', } @property def start(self): self.database.replaces() self.underdir() def underdir(self): url = 'http://underdj5ziov3ic7.onion' self.logger.info(' Conectando em {url}'.format(url=url)) request = self.session.get(url, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features="lxml") for raw in soup.find('div', { 'id': 'incore' }).findAll('div', {'class': 'fr_m'}): for category in raw.findAll('a'): url_list = "{url}{category}".format(category=category['href'], url=url) self.logger.info( ' Realizando scraping em {url}'.format(url=url_list)) request = self.session.get(url_list, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features='lxml') pages = [] for raw in soup.find('div', {'class': 'pgn'}).findAll('a'): pages.append(raw.get_text()) cont = 2 urls = [url_list] while cont <= int(pages[-2]): cont += 1 urls.append("{url}/pg/{number}".format(url=url_list, number=cont - 1)) for get in urls: self.logger.info(' Conectando em {url}.'.format(url=get)) try: request = self.session.get(get, proxies=self.proxies, timeout=1000) if request.status_code == 200: soup = BeautifulSoup(request.content, features='lxml') itens = [] for raw in soup.find('div', { 'class': 'trr' }).findAll('a'): itens.append(raw['href'].replace('http://', '')) regex = re.compile( "[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in itens: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') urls = regex.match(rurls) if urls is not None: self.database.saveonion(url=urls.group(), source=self.source) except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error( ' Não consegui conectar na url, porque ocorreu um erro.\n{e}' .format(e=e)) pass
class ExternalListAPI: def __init__( self, file=None, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db = self.host_db, user_db = self.user_db, password_db = self.password_db, database = self.database_name, ) self.source = 'Pastebin' logging.basicConfig(level=logging.INFO) compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.logger = logging.getLogger('Class:ExternalListAPI') self.file = file @property def start(self): self.database.replaces() self.getExternal() def getExternal(self): self.logger.info(' Fazendo comparação da lista de URLS com o banco de dados. AGUARDE..') with open(self.file , 'r') as outputfile: self.logger.info(' Aplicando REGEX. Aguarde...') regex = re.compile("[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in outputfile.readlines(): rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') xurl = regex.match(rurls) if xurl is not None: compare_sorce = self.database.compare_source(source=self.source) compare_url = self.database.compare_url(url=xurl.group()) if compare_url: self.logger.debug(' A url {url} já existe no banco de dados.'.format(url=xurl.group())) else: self.database.save_url(url=xurl.group(), source=compare_sorce[0][0])
class CyberSecurityNews: def __init__( self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db = self.host_db, user_db = self.user_db, password_db = self.password_db, database = self.database_name, ) self.logger = logging.getLogger('Class:CyberSecurityNews') # TODO: QuickStart logging.basicConfig(level=logging.INFO) self.source = 'CyberSecurityNews-Pastebin' compare_sorce = self.database.compare_source(source=self.source) self.session = requests.session() if compare_sorce: pass else: self.database.save_source(source=self.source) # TODO: Arquivo de configuração self.argument = '.onion' self.url = 'https://pastebin.com/u/cybersecuritynews/1' self.desktop_agents = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0'] # Seleciona um agent aleatório de acordo com a lista. @property def random_headers(self): return { 'User-Agent': choice(self.desktop_agents), 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' } @property def start(self): self.database.replaces() self.pages() def pages(self): self.headers = self.random_headers self.logger.info(' Conectando em {}'.format(self.url)) time.sleep(2) request = self.session.get(self.url, headers=self.headers) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") pages_to_pages = [] for raw in soup.find('div', {'class': 'pagination'}).findAll('a'): pages_to_pages.append(raw.get_text()) cont = 2 pages_urls = [self.url] while cont <= int(pages_to_pages[-2]): cont +=1 pages_urls.append("https://pastebin.com/u/cybersecuritynews/{}".format(cont-1)) raw_urls = [] for get_urls in pages_urls: self.logger.info(' Conectando em {}'.format(get_urls)) request = self.session.get(get_urls, headers=self.headers) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") for raw in soup.find('table', {'class':'maintable'}).findAll('a'): if 'archive' in raw['href']: pass else: raw_urls.append("https://pastebin.com/raw{}".format(raw['href'])) itens = [] self.logger.info(' Realizando os replaces e regex. AGUARDE...') for raw in raw_urls: request = self.session.get(raw, headers=self.headers) self.soup = BeautifulSoup(request.content, features="lxml") for pre in self.soup.findAll('body'): list = pre.get_text().split('\n') itens.extend(list) regex = re.compile("[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in itens: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') url = regex.match(rurls) if url is not None: self.database.saveonion( url=url.group(), source=self.source)
class DiscoverDarkWebService: def __init__( self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db = self.host_db, user_db = self.user_db, password_db = self.password_db, database = self.database_name, ) self.source = 'Discover Dark Web Hidden Service' logging.basicConfig(level=logging.INFO) compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.logger = logging.getLogger('Class:DiscoverDarkWebService') self.session = requests.session() self.proxies = { 'http': 'socks5h://localhost:9050', } @property def start(self): self.database.replaces() self.discover_dark_web() def discover_dark_web(self): url = 'http://3bbaaaccczcbdddz.onion/discover' self.logger.info(' Conectando em {url}'.format(url=url)) try: request = self.session.get(url, proxies=self.proxies, timeout=1000) soup = BeautifulSoup(request.content, features="lxml") list_urls = [] for raw in soup.find('table', {'class': 'table'}).findAll('a'): list_urls.append(raw['href'].replace('/search?q=', '')) except(requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error(' Não consegui conectar na url, porque ocorreu um erro.\n{e}'.format(e=e)) pass self.logger.info(' Aplicando REGEX. Aguarde...') regex = re.compile("[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in list_urls: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') xurl = regex.match(rurls) if xurl is not None: self.database.saveonion( url=xurl.group(), source=self.source)
class FleshOnionsAPI: def __init__( self, host_db=None, user_db=None, password_db=None, database=None): self.host_db = host_db self.user_db = user_db self.password_db = password_db self.database_name = database self.database = DataBase( host_db = self.host_db, user_db = self.user_db, password_db = self.password_db, database = self.database_name, ) self.source = 'FlashOnions' logging.basicConfig(level=logging.INFO) compare_sorce = self.database.compare_source(source=self.source) if compare_sorce: pass else: self.database.save_source(source=self.source) self.logger = logging.getLogger('Class:FlashOnions') self.session = requests.session() self.proxies = { 'http': 'socks5h://localhost:9050', } @property def start(self): self.database.replaces() self.flash_onion() def flash_onion(self): url = 'http://vps7nsnlz3n4ckiie5evi5oz2znes7p57gmrvundbmgat22luzd4z2id.onion/' self.logger.info(' Conectando em {url}'.format(url=url)) try: request = self.session.get(url, proxies=self.proxies, timeout=1000) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") pages = [] for number_pages in soup.find('div', {'class':'pagination'}).findAll('a'): pages.append(number_pages.get_text()) cont = 0 urls = [] while cont <= int(pages[-1]): cont += 1 urls.append("{url}?search_title_only=on&search=&rep=n%2Fa&page={number}".format( number=cont-1, url=url )) onions = [] for connect in urls: time.sleep(4) self.logger.info(' Conectando em {url}'.format(url=connect)) request = self.session.get(url, proxies=self.proxies, timeout=1000) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") for raw in soup.find('table', {'class':'domain_list'}).findAll('a'): if 'http://' in raw['href']: onions.append(raw['href']) keywords = [ 'forum', 'press', 'search', 'introduction', 'arabic', 'chinese', 'french', 'german', 'italian', 'japanese', 'polish', 'portuguese', 'russians', 'Spanish', 'hardcore', 'softcore', 'erotica', 'fetish', 'violence', 'escorts', 'p**n', 'domains', 'file', 'pastebin', 'proxies', 'web', 'blog', 'books', 'bbs', 'chans', 'wiki', 'social', 'Social', 'activism', 'paranormal', 'politics', 'religion', 'whistleblowing', 'development', 'c++', 'c#', 'python', 'HTML', 'ruby', 'jupyter', 'java', 'javascript', 'java', 'hacker', 'blackbox', 'read', 'blackhat', 'cracked', 'wordlist', 'word', 'hacked', 'blueteam', 'Phishing', 'Malware', 'Lamer', 'Cracker', 'Defacer', 'Spyware', 'Scammers', 'DDOS', 'SQL', 'sql', 'Botnet', 'Exploit', 'Script', 'zero', '0day', 'zeroday', 'Cybersecurity', 'Cyber', 'Hacktivism', 'Hacktivist', 'Keylogger', 'Blacklist', 'ai', 'bitcoin', 'Equifax', 'Nessus', 'openvas', 'securitycenter', 'Truecrypt', 'ClamAV', 'OSSEC', 'paloalto', 'BackTrack', 'OSSIM', 'IPCop', 'Okta', 'sonicwall', 'pfsense', 'Metasploit', 'OpenSSH', 'Wireshark', 'NStealth', 'drugs', 'drug-shop', 'Acid', 'Asteroid', 'Berry', 'Poker', 'games', 'Multiplayer', 'Play', 'activism', 'Casino', '.mp3', '.mp4', 'Video', 'Filme', 'Movie', 'channel', 'message', 'conclusion', 'termination', 'heading', 'headline', 'english', 'mandarin', 'hindustani', 'arabic', 'malay', 'bengali', 'sex', 'sexy', 'sexo', 'sexual', 'LGBT', 'Abuse', 'local', 'ebook', 'ebooks', 'social', 'christianity', 'islam', 'nonreligious', 'secular', 'secular', 'agnostic', 'atheist', 'hinduism', 'buddhism', 'spiritism', 'judaism', 'primal-indigenous', 'php', 'visual', 'C++', 'delphi', 'pascal', 'cobol', 'Cyberark', 'Firewall', 'antivirus', 'marijuana', 'weed', 'cocaine', 'heroin', 'cannabis', 'crack', 'ecstasy', 'amphetamines', 'lsd', 'singleplayer', 'TV', 'television', 'radio', ] for term in keywords: time.sleep(2) query = urllib.parse.quote(term) search = "{url}/?rep=n%2Fa&search={term}&submit=Go+%3E%3E%3E".format( url=url, term=term) self.logger.info(' Conectando em {url}'.format(url=search)) request = self.session.get(url, proxies=self.proxies, timeout=1000) if request.status_code == 200: soup = BeautifulSoup(request.content, features="lxml") for raw in soup.find('table', {'class':'domain_list'}).findAll('a'): if 'http://' in raw['href']: onions.append(raw['href']) self.logger.info(' Aplicando REGEX. Aguarde...') regex = re.compile("[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion") for lines in onions: rurls = lines \ .replace('\xad', '') \ .replace('\n', '') \ .replace("http://", '') \ .replace("https://", '') \ .replace(r'\s', '') \ .replace('\t', '') xurl = regex.match(rurls) if xurl is not None: self.database.saveonion( url=xurl.group(), source=self.source) except(requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, requests.exceptions.InvalidURL) as e: self.logger.error(' Não consegui conectar na url, porque ocorreu um erro.\n{e}'.format(e=e)) pass