def search_abc_es(target): target = target.replace(" ", "+") target_split = target.split("+") try: url = f"https://abc.es/hemeroteca/resultos-busqueda-avanzada/todo?exa={target}" HTML = requests.get(url).text soup = BeautifulSoup(HTML, "html.parser") results = soup.findAll("a",attrs={"class": "titulo"}) if results: for r in results: print () print ("|----[INFO][SPAINPRESS][ABC][>]") print ("|--------[TITLE][>] " + er.remove_tags(str(r))) print ("|--------[URL][>]" + r["href"]) else: print () print("|----[INFO][SPAINPRESS[ABC][>] No record found...") except Exception as e: print () print (f"|----[WARNING][ABC ERROR][>] {e}")
def search_google_(target): engine = Google() results = engine.search("'" + target + "'") for r in results: print("|") print("|----[INFO][GOOGLE][RESULTS][>] " + r["title"]) print("|----[INFO][GOOGLE][RESULTS][DESCRIPTION][>] " + r["text"]) print("|----[INFO][GOOGLE][RESULTS][LINK][>] " + r["link"]) try: tsd, td, tsu = extract(r["link"]) domain = td + '.' + tsu spain_newspaper = open("data/newspaper/spain-newspaper.txt", "r") for news in spain_newspaper: if domain == news.strip(): newspaper.news_parser(r["link"], target) else: if not domain in config.BL_parserPhone: web = requests.get(r["link"], timeout=3) if web.status_code >= 200 or web.status_code < 300: TEXT = er.remove_tags(str(web.text)) parser.parserMAIN(TEXT) print("|") except Exception as e: print("|----[ERROR][HTTP CONNECTION][>] " + str(e))
def parser_email(text): r = re.compile( r"[a-z0-9!#$%&'*+-/=?^_`{|}~]{1,64}@[a-zA-Z0-9]{1,255}\.[a-zA-Z0-9-]{1,24}" ) results = r.findall(text) if results: for x in results: x = er.replace_acentos( er.remove_tags(er.replace_letras_raras(str(x)))) print("|--------[INFO][PARSER][EMAIL][>] " + x) if len(x) < 20: config.emailsData_list.append(x)
def parser_email(text): r = re.compile( r"(^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)" ) results = r.findall(text) if results: for x in results: x = er.replace_acentos( er.remove_tags(er.replace_letras_raras(str(x)))) print("|--------[INFO][PARSER][EMAIL][>] " + x) if len(x) < 20: config.emailsData_list.append(x)
def searchPaginasAmarillas(nombre, a1, a2, loc): url = "http://blancas.paginasamarillas.es/jsp/resultados.jsp?no=" + nombre + "&ap1=" + a1 + "&ap2=" + a2 + "&sec=41&pgpv=1&tbus=0&nomprov=" + loc + "&idioma=spa" html = requests.get(url).text soup = BeautifulSoup(html, "html.parser") r = soup.find("div", attrs={'class': 'resul yellad yellad_ad0'}) r = er.remove_tags(str(r)) if not r == "None": print("|----[INFO][PAGINAS AMARILLAS][>] ") print(" - " + str(cleanPaginasAmarillas_result(r))) else: pass
def search_abc_es(target): target = target.replace(" ", "+") url = f"https://abc.es/hemeroteca/resultos-busqueda-avanzada/todo?exa={target}" HTML = requests.get(url).text soup = BeautifulSoup(HTML, "html.parser") results = soup.findAll("a", attrs={"class": "titulo"}) for r in results: print() print("|----[INFO][SPAINPRESS][ABC][>]") print("|--------[TITLE][>] " + er.remove_tags(str(r))) print("|--------[URL][>]" + r["href"])
def search_google_(target): engine = Google() results = engine.search("'" + target + "'") for r in results: print ("|--[INFO][GOOGLE][RESULTS][>] " + r["title"] + " | " + r["text"] + " | " + r["link"]) try: web = requests.get(r["link"], timeout=3) print ("|----[INFO][WEB][HTTP CODE][>] " + str(web.status_code) + "\n") if web.status_code >= 200 or web.status_code < 300: TEXT = er.remove_tags(str(web.text)) parser.parserMAIN(TEXT) except Exception as e: print ("|----[ERROR][HTTP CONNECTION][>] " + str(e))
def check_facebook(email): r = br.open('https://mbasic.facebook.com/') br.select_form(nr=0) br.form["email"] = email br.form["pass"] = "******" br.submit() respuestaURL = br.response().geturl() html = br.response().read() soup = BeautifulSoup(html, "html.parser") div = soup.find("div", {"id": "login_error"}) if "ninguna" in R.remove_tags(str(div)): print("|--[INFO][FACEBOOK][CHECK][>] Account doesn't exist...") else: print("|--[INFO][FACEBOOK][CHECK][>] The account exist...")
def searchInfojobs(nombre, a1, a2, loc): headers = {'User-Agent': "DG Minimal Version"} url_array = ("https://www.infojobs.net/" + nombre.replace(" ", "-") + "-" + a1.replace(" ", "-") + "-" + a2.replace(" ", "-") + ".prf", "https://www.infojobs.net/" + nombre.replace(" ", "-") + "-" + a1.replace(" ", "-") + ".prf", "https://www.infojobs.net/" + nombre.replace(" ", "-") + "-" + a1.replace(" ", "-") + "-1.prf") for url in url_array: html = requests.get(url, headers=headers).text soup = BeautifulSoup(html, "html.parser") h1s = soup.findAll("h1") for h1 in h1s: if "humano" in er.remove_tags(str(h1)): print () print("|----[INFO][INFOJOBS][>] Captcha detectado...") break else: print() print("|----[INFO][INFOJOBS][>] " + str(h1))
def check_facebook(phone): r = br.open('https://mbasic.facebook.com/') br.select_form(nr=0) br.form["email"] = phone br.form["pass"] = "******" br.submit() respuestaURL = br.response().geturl() html = br.response().read() soup = BeautifulSoup(html, "html.parser") a = soup.find("a",{"class":"bb"}) if "olvidado" in R.remove_tags(str(a)): print("|--[INFO][FACEBOOK][CHECK][>] The account exist... \n") else: print("|--[INFO][FACEBOOK][CHECK][>] Account doesn't exist... \n")
def search_DDG_DORKS(TITLE, TEXT_0): engine = Duckduckgo() for FC_domain in config.FC_list: results = engine.search(f"site:{FC_domain} {TITLE}") for r in results: print("|--[INFO][GOOGLE][RESULTS][>] " + r["title"] + " | " + r["text"] + " | " + r["link"]) try: tsd, td, tsu = extract(r["link"]) domain = td + '.' + tsu web = requests.get(r["link"], timeout=3) print("|----[INFO][WEB][HTTP CODE][>] " + str(web.status_code) + "\n") if web.status_code >= 200 or web.status_code < 300: if ".pdf" in r["link"]: pass else: if not domain in config.BL_parserPhone: TEXT = er.remove_tags(str(web.text)) compareTEXT(TEXT, TEXT_0) parser.FC_words_in_text(TEXT) parser.parserMAIN(TEXT) ratio = compareTEXT(TEXT_0, TEXT) print( f"|----[INFO][COMPARE TEXTS][>] Ratio: {ratio}" ) #Guardamos la info en un log data = f"{r['title']} ||| {r['link']} ||| {r['text']}, ||| {ratio} \n" generateLOG(data, target) else: pass print("") time.sleep(2) except Exception as e: print("|----[ERROR][HTTP CONNECTION][>] " + str(e))
def check_netflix(email): try: r = br.open('https://www.netflix.com/es/login') br.select_form(nr=0) br.form["userLoginId"] = email br.form["password"] = "******" br.submit() respuestaURL = br.response().geturl() html = br.response().read() soup = BeautifulSoup(html, "html.parser") div = soup.find("div", {"class": "ui-message-contents"}) if "ninguna" in R.remove_tags(str(div)): print("|--[INFO][NETFLIX][ES][CHECK][>] Account doesn't exist...") else: print("|--[INFO][NETFLIX][ES][CHECK][>] The account exist...") except: print(C.colores.green + "|--[ERROR][Check_Netflix][>] Netflix error..." + C.colores.normal)
def check_wordpress(email): try: r = br.open('http://wordpress.com/wp-login.php') br.select_form("loginform") br.form["log"] = email br.form["pwd"] = "123456" br.submit() respuestaWP = br.response().geturl() html = br.response().read() soup = BeautifulSoup(html, "html.parser") divError = soup.findAll("div", {"id": "login_error"}) div = R.remove_tags(str(divError)) if "incorrect" in div: print("|--[INFO][WordPress][CHECK][>] The account exist...") if "Invalid" in div: print("|--[INFO][WordPress][CHECK][>] Account doesn't exist...") except: print(C.colores.alert + "|--[WARNING][LinkedIn][>] Error..." + C.colores.normal)
def check_AccountTwitter(email): username = get_usernameEmail(email) url = "https://twitter.com/" + username try: html = requests.get(url).text soup = BeautifulSoup(html, "html.parser") for text in soup.findAll("h1"): text = R.remove_tags(str(text)) if "Sorry" in text or "Lo sentimos," in text: print("|--[INFO][Twitter][" + C.colores.blue + username + C.colores.normal + "][>] Account doesn't exist...") else: print(C.colores.green + "|--[INFO][Twitter][" + C.colores.blue + username + C.colores.green + "][>] The account exist." + C.colores.normal) except urllib.error.HTTPError: print( C.colores.alert + "|--[404 HTTP RESPONSE][Check_AccountTwitter][>] 404 HTTP Twitter error..." + C.colores.normal)
def check_Facebook(email): url = 'https://mbasic.facebook.com/' br.open(url) br.select_form(nr=0) response = br.submit() #print (response.geturl()) br.select_form(nr=0) #This is login-password form -> nr = number = 0 br.form['email'] = email br.form['pass'] = "******" response = br.submit() #print (response.geturl()) html = br.response().read() soup = BeautifulSoup(html, "html.parser") divError = soup.findAll("div", {"id": "login_error"}) div = R.remove_tags(str(divError)) if "no coincide" in div or "do not match" in div: print("|--[INFO][Facebook][CHECK][>] Account doesn't exist...") else: print(C.colores.green + "|--[INFO][Facebook][CHECK][>] The account exist..." + C.colores.normal)
def main(): #Imprimimos el banner principal print(config.banner) #Insertamos la URL a buscar url = input("Insert URL: ") #Obtenemos el HTML HTML = requests.get(url) #Obtenemos el título TITLE = footprintingWEB_TITLE(HTML) #Obtenemos la descripción DESC = er.remove_tags(str(footprintingWEB_DESC(HTML))) print(f"|----[TARGET][>] {url}") print(f"|--------[TARGET][TITLE][>] {TITLE}") print(f"|--------[TARGET][DESCRIPTION][>] {DESC}") time.sleep(2) #Obtenemos el texto de la noticia TEXT_0 = er.remove_tags(str(HTML.text)) #buscamos una fecha en la URL DATE = parser.parser_EN_DATE(url) #Parseamos y obtenemos diferentes tipos de datos parser.parserMAIN(TEXT_0) time.sleep(3) #Buscamos en Google y DuckDuckGo print("|----[INFO][>] Now let's look for other news: \n") m = input("Do you want to search the original web? (Y/n): ") if m == "y" or m == "Y": search_google_(TITLE, TEXT_0) search_DDG_(TITLE, TEXT_0) else: pass #Buscamos en plataformas de verificación m = input("Do you want to analyze in fact-checking platforms? (Y/n): ") if m == "y" or m == "Y": #Buscamos con dorks en DDG search_DDG_DORKS(TITLE, TEXT_0) else: exit #Buscamos en Twitter m = input("Do you want to search in Twitter? (Y/n): ") if m == "y" or m == "Y": #Buscamos con dorks en DDG Twint.search_Twitter(url) else: exit
def extract_personalData_wikipedia(html, url, target): soup = BeautifulSoup(html.text, "html.parser") tables = soup.findAll("tr") for tr in tables: tr_ = er.remove_tags(str(tr)) if "Nacimiento" in tr_: print(f"|----[INFO][AGE][>] Age found on Wikipedia {tr_}") #Obtenemos el horoscopo signo = ("capricornio", "acuario", "piscis", "aries", "tauro", "géminis", "cáncer", "leo", "virgo", "libra", "escorpio", "sagitario") meses = { "enero": 1, "febrero": 2, "marzo": 3, "abril": 4, "mayo": 5, "junio": 6, "julio": 7, "agosto": 8, "septiembre": 9, "octubre": 10, "noviembre": 11, "diciembre": 12 } fechas = (20, 19, 20, 20, 21, 21, 22, 22, 22, 22, 22, 21) dia = 0 mes = 0 words = tr_.replace("\n", " ").split(" ") for w in words: if w.isdigit() and len(w) <= 2: dia = int(w) else: pass if w.lower() in meses.keys(): mes = meses.get(w.lower()) else: pass mes = mes - 1 if dia > fechas[mes]: mes = mes + 1 if mes == 12: mes = 0 print("|----[INFO][HOROSCOPO][>] " + signo[mes]) #Save to DB birth = tr_.replace("Age found on Wikipedia Nacimiento", "") mongo.personalData_Wikipedia_insertMongoDB(target, birth, url, 1) mongo.personalData_Wikipedia_insertMongoDB(target, signo[mes], url, 3) data.Wiki_birth = birth data.Wiki_url = url data.WIki_horoscopo = signo[mes] if "Fallecimiento" in tr_: print(f"|----[INFO][DEATH][>] Death found on Wikipedia {tr_}") #Save to DB mongo.personalData_Wikipedia_insertMongoDB(target, tr_, url, 2) data.Wiki_death = tr_ if "Partido político" in tr_: print(f"|----[INFO][POLITICAL PARTY][>] {tr_}") #Save to DB mongo.personalData_Wikipedia_insertMongoDB(target, tr_, url, 7) data.Wiki_politicalParty = tr_ if "Ocupación" in tr_: print(f"|----[INFO][EMPLOYMENT][>] {tr_}") #Save to DB mongo.personalData_Wikipedia_insertMongoDB(target, tr_, url, 4) data.Wiki_employment = tr_.replace("Ocupación", "") if "Religión" in tr_: print(f"|----[INFO][RELIGION][>] {tr_}") #Save to DB mongo.personalData_Wikipedia_insertMongoDB(target, tr_, url, 5) data.Wiki_religion = tr_ if "Hijos" in tr_: print(f"|----[INFO][SONS][>] {tr_}") #Save to DB mongo.personalData_Wikipedia_insertMongoDB(target, tr_, url, 6) data.Wiki_sons = tr_