Python remove_tags示例，modules.er.remove_tags Python示例

示例#1

0

显示文件

def search_abc_es(target):

        target = target.replace(" ", "+")
        target_split = target.split("+")

        try:
                url = f"https://abc.es/hemeroteca/resultos-busqueda-avanzada/todo?exa={target}"

                HTML = requests.get(url).text

                soup = BeautifulSoup(HTML, "html.parser")
                results = soup.findAll("a",attrs={"class": "titulo"})
                if results:
                        for r in results:
                                print ()
                                print ("|----[INFO][SPAINPRESS][ABC][>]")
                                print ("|--------[TITLE][>] " + er.remove_tags(str(r)))
                                print ("|--------[URL][>]" + r["href"])
                else:           
                        print ()
                        print("|----[INFO][SPAINPRESS[ABC][>] No record found...")
        
        except Exception as e:
               print ()
               print (f"|----[WARNING][ABC ERROR][>] {e}")

示例#2

0

显示文件

def search_google_(target):
    engine = Google()
    results = engine.search("'" + target + "'")
    for r in results:
        print("|")
        print("|----[INFO][GOOGLE][RESULTS][>] " + r["title"])
        print("|----[INFO][GOOGLE][RESULTS][DESCRIPTION][>] " + r["text"])
        print("|----[INFO][GOOGLE][RESULTS][LINK][>] " + r["link"])

        try:
            tsd, td, tsu = extract(r["link"])
            domain = td + '.' + tsu

            spain_newspaper = open("data/newspaper/spain-newspaper.txt", "r")

            for news in spain_newspaper:

                if domain == news.strip():

                    newspaper.news_parser(r["link"], target)

            else:
                if not domain in config.BL_parserPhone:

                    web = requests.get(r["link"], timeout=3)

                    if web.status_code >= 200 or web.status_code < 300:

                        TEXT = er.remove_tags(str(web.text))
                        parser.parserMAIN(TEXT)

            print("|")

        except Exception as e:
            print("|----[ERROR][HTTP CONNECTION][>] " + str(e))

示例#3

0

显示文件

文件： parsers.py 项目： mratok1/osint-suite-tools

def parser_email(text):

    r = re.compile(
        r"[a-z0-9!#$%&'*+-/=?^_`{|}~]{1,64}@[a-zA-Z0-9]{1,255}\.[a-zA-Z0-9-]{1,24}"
    )
    results = r.findall(text)
    if results:
        for x in results:
            x = er.replace_acentos(
                er.remove_tags(er.replace_letras_raras(str(x))))
            print("|--------[INFO][PARSER][EMAIL][>] " + x)
            if len(x) < 20:
                config.emailsData_list.append(x)

示例#4

0

显示文件

def parser_email(text):

    r = re.compile(
        r"(^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)"
    )
    results = r.findall(text)
    if results:
        for x in results:
            x = er.replace_acentos(
                er.remove_tags(er.replace_letras_raras(str(x))))
            print("|--------[INFO][PARSER][EMAIL][>] " + x)
            if len(x) < 20:
                config.emailsData_list.append(x)

示例#5

0

显示文件

文件： BuscadorPersonas.py 项目： 3v1lW1th1n/osint-suite-tools

def searchPaginasAmarillas(nombre, a1, a2, loc):
    url = "http://blancas.paginasamarillas.es/jsp/resultados.jsp?no=" + nombre + "&ap1=" + a1 + "&ap2=" + a2 + "&sec=41&pgpv=1&tbus=0&nomprov=" + loc + "&idioma=spa"

    html = requests.get(url).text

    soup = BeautifulSoup(html, "html.parser")
    r = soup.find("div", attrs={'class': 'resul yellad yellad_ad0'})
    r = er.remove_tags(str(r))

    if not r == "None":
        print("|----[INFO][PAGINAS AMARILLAS][>] ")
        print("     - " + str(cleanPaginasAmarillas_result(r)))
    else:
        pass

示例#6

0

显示文件

def search_abc_es(target):

    target = target.replace(" ", "+")
    url = f"https://abc.es/hemeroteca/resultos-busqueda-avanzada/todo?exa={target}"

    HTML = requests.get(url).text

    soup = BeautifulSoup(HTML, "html.parser")
    results = soup.findAll("a", attrs={"class": "titulo"})
    for r in results:
        print()
        print("|----[INFO][SPAINPRESS][ABC][>]")
        print("|--------[TITLE][>] " + er.remove_tags(str(r)))
        print("|--------[URL][>]" + r["href"])

示例#7

0

显示文件

文件： BuscadorPersonas.py 项目： 3v1lW1th1n/osint-suite-tools

def search_google_(target):
    engine = Google()
    results = engine.search("'" + target + "'")
    for r in results:
        print ("|--[INFO][GOOGLE][RESULTS][>] " + r["title"] + " | " + r["text"] + " | " + r["link"])
        
        try:
            web = requests.get(r["link"], timeout=3)
            print ("|----[INFO][WEB][HTTP CODE][>] " + str(web.status_code) + "\n")
            if web.status_code >= 200 or web.status_code < 300:
                TEXT = er.remove_tags(str(web.text))
                parser.parserMAIN(TEXT)

        except Exception as e:
            print ("|----[ERROR][HTTP CONNECTION][>] " + str(e))

示例#8

0

显示文件

def check_facebook(email):

    r = br.open('https://mbasic.facebook.com/')
    br.select_form(nr=0)
    br.form["email"] = email
    br.form["pass"] = "******"
    br.submit()
    respuestaURL = br.response().geturl()

    html = br.response().read()
    soup = BeautifulSoup(html, "html.parser")
    div = soup.find("div", {"id": "login_error"})
    if "ninguna" in R.remove_tags(str(div)):
        print("|--[INFO][FACEBOOK][CHECK][>] Account doesn't exist...")
    else:
        print("|--[INFO][FACEBOOK][CHECK][>] The account exist...")

示例#9

0

显示文件

文件： BuscadorPersonas.py 项目： 3v1lW1th1n/osint-suite-tools

def searchInfojobs(nombre, a1, a2, loc):
    
    headers = {'User-Agent': "DG Minimal Version"}
    url_array = ("https://www.infojobs.net/" + nombre.replace(" ", "-") + "-" + a1.replace(" ", "-") + "-" + a2.replace(" ", "-") + ".prf", "https://www.infojobs.net/" + nombre.replace(" ", "-") + "-" + a1.replace(" ", "-") + ".prf", "https://www.infojobs.net/" + nombre.replace(" ", "-") + "-" + a1.replace(" ", "-") + "-1.prf")
    for url in url_array:
        html = requests.get(url, headers=headers).text
        soup = BeautifulSoup(html, "html.parser")
        h1s = soup.findAll("h1")
        for h1 in h1s:
            if "humano" in er.remove_tags(str(h1)):
                print ()
                print("|----[INFO][INFOJOBS][>] Captcha detectado...")
                break
            else:
                print()
                print("|----[INFO][INFOJOBS][>] " + str(h1))

示例#10

0

显示文件

def check_facebook(phone):

	r = br.open('https://mbasic.facebook.com/')
	br.select_form(nr=0)
	br.form["email"] = phone
	br.form["pass"] = "******"
	br.submit()
	respuestaURL = br.response().geturl()

	html =  br.response().read()
	soup = BeautifulSoup(html, "html.parser")
	a = soup.find("a",{"class":"bb"})
	if "olvidado" in R.remove_tags(str(a)):
		print("|--[INFO][FACEBOOK][CHECK][>] The account exist... \n")
	else:
		print("|--[INFO][FACEBOOK][CHECK][>] Account doesn't exist... \n")

示例#11

0

显示文件

def search_DDG_DORKS(TITLE, TEXT_0):

    engine = Duckduckgo()
    for FC_domain in config.FC_list:

        results = engine.search(f"site:{FC_domain} {TITLE}")
        for r in results:
            print("|--[INFO][GOOGLE][RESULTS][>] " + r["title"] + " | " +
                  r["text"] + " | " + r["link"])

            try:

                tsd, td, tsu = extract(r["link"])
                domain = td + '.' + tsu

                web = requests.get(r["link"], timeout=3)
                print("|----[INFO][WEB][HTTP CODE][>] " +
                      str(web.status_code) + "\n")

                if web.status_code >= 200 or web.status_code < 300:
                    if ".pdf" in r["link"]:
                        pass
                    else:
                        if not domain in config.BL_parserPhone:
                            TEXT = er.remove_tags(str(web.text))

                            compareTEXT(TEXT, TEXT_0)
                            parser.FC_words_in_text(TEXT)
                            parser.parserMAIN(TEXT)

                            ratio = compareTEXT(TEXT_0, TEXT)
                            print(
                                f"|----[INFO][COMPARE TEXTS][>] Ratio: {ratio}"
                            )

                            #Guardamos la info en un log
                            data = f"{r['title']} ||| {r['link']} ||| {r['text']}, ||| {ratio} \n"
                            generateLOG(data, target)

                        else:
                            pass
                print("")
                time.sleep(2)

            except Exception as e:
                print("|----[ERROR][HTTP CONNECTION][>] " + str(e))

示例#12

0

显示文件

def check_netflix(email):
    try:
        r = br.open('https://www.netflix.com/es/login')
        br.select_form(nr=0)
        br.form["userLoginId"] = email
        br.form["password"] = "******"
        br.submit()
        respuestaURL = br.response().geturl()
        html = br.response().read()
        soup = BeautifulSoup(html, "html.parser")
        div = soup.find("div", {"class": "ui-message-contents"})
        if "ninguna" in R.remove_tags(str(div)):
            print("|--[INFO][NETFLIX][ES][CHECK][>] Account doesn't exist...")
        else:
            print("|--[INFO][NETFLIX][ES][CHECK][>] The account exist...")
    except:
        print(C.colores.green +
              "|--[ERROR][Check_Netflix][>] Netflix error..." +
              C.colores.normal)

示例#13

0

显示文件

def check_wordpress(email):
    try:
        r = br.open('http://wordpress.com/wp-login.php')
        br.select_form("loginform")
        br.form["log"] = email
        br.form["pwd"] = "123456"
        br.submit()
        respuestaWP = br.response().geturl()
        html = br.response().read()
        soup = BeautifulSoup(html, "html.parser")
        divError = soup.findAll("div", {"id": "login_error"})
        div = R.remove_tags(str(divError))
        if "incorrect" in div:
            print("|--[INFO][WordPress][CHECK][>] The account exist...")

        if "Invalid" in div:
            print("|--[INFO][WordPress][CHECK][>] Account doesn't exist...")
    except:
        print(C.colores.alert + "|--[WARNING][LinkedIn][>] Error..." +
              C.colores.normal)

示例#14

0

显示文件

def check_AccountTwitter(email):
    username = get_usernameEmail(email)
    url = "https://twitter.com/" + username
    try:
        html = requests.get(url).text
        soup = BeautifulSoup(html, "html.parser")
        for text in soup.findAll("h1"):
            text = R.remove_tags(str(text))
            if "Sorry" in text or "Lo sentimos," in text:
                print("|--[INFO][Twitter][" + C.colores.blue + username +
                      C.colores.normal + "][>] Account doesn't exist...")
            else:
                print(C.colores.green + "|--[INFO][Twitter][" +
                      C.colores.blue + username + C.colores.green +
                      "][>] The account exist." + C.colores.normal)
    except urllib.error.HTTPError:
        print(
            C.colores.alert +
            "|--[404 HTTP RESPONSE][Check_AccountTwitter][>] 404 HTTP Twitter error..."
            + C.colores.normal)

示例#15

0

显示文件

def check_Facebook(email):
    url = 'https://mbasic.facebook.com/'
    br.open(url)
    br.select_form(nr=0)
    response = br.submit()
    #print (response.geturl())

    br.select_form(nr=0)  #This is login-password form -> nr = number = 0
    br.form['email'] = email
    br.form['pass'] = "******"
    response = br.submit()
    #print (response.geturl())
    html = br.response().read()
    soup = BeautifulSoup(html, "html.parser")
    divError = soup.findAll("div", {"id": "login_error"})
    div = R.remove_tags(str(divError))
    if "no coincide" in div or "do not match" in div:

        print("|--[INFO][Facebook][CHECK][>] Account doesn't exist...")

    else:
        print(C.colores.green +
              "|--[INFO][Facebook][CHECK][>] The account exist..." +
              C.colores.normal)

示例#16

0

显示文件

def main():

    #Imprimimos el banner principal
    print(config.banner)

    #Insertamos la URL a buscar
    url = input("Insert URL: ")

    #Obtenemos el HTML
    HTML = requests.get(url)

    #Obtenemos el título
    TITLE = footprintingWEB_TITLE(HTML)

    #Obtenemos la descripción
    DESC = er.remove_tags(str(footprintingWEB_DESC(HTML)))

    print(f"|----[TARGET][>] {url}")
    print(f"|--------[TARGET][TITLE][>] {TITLE}")
    print(f"|--------[TARGET][DESCRIPTION][>] {DESC}")
    time.sleep(2)

    #Obtenemos el texto de la noticia
    TEXT_0 = er.remove_tags(str(HTML.text))

    #buscamos una fecha en la URL
    DATE = parser.parser_EN_DATE(url)

    #Parseamos y obtenemos diferentes tipos de datos
    parser.parserMAIN(TEXT_0)
    time.sleep(3)

    #Buscamos en Google y DuckDuckGo
    print("|----[INFO][>] Now let's look for other news: \n")

    m = input("Do you want to search the original web? (Y/n): ")

    if m == "y" or m == "Y":
        search_google_(TITLE, TEXT_0)
        search_DDG_(TITLE, TEXT_0)
    else:
        pass

    #Buscamos en plataformas de verificación
    m = input("Do you want to analyze in fact-checking platforms? (Y/n): ")

    if m == "y" or m == "Y":

        #Buscamos con dorks en DDG
        search_DDG_DORKS(TITLE, TEXT_0)

    else:

        exit

    #Buscamos en Twitter
    m = input("Do you want to search in Twitter? (Y/n): ")

    if m == "y" or m == "Y":

        #Buscamos con dorks en DDG
        Twint.search_Twitter(url)

    else:

        exit

示例#17

0

显示文件

文件： parsers.py 项目： mratok1/osint-suite-tools

def extract_personalData_wikipedia(html, url, target):

    soup = BeautifulSoup(html.text, "html.parser")

    tables = soup.findAll("tr")
    for tr in tables:
        tr_ = er.remove_tags(str(tr))
        if "Nacimiento" in tr_:
            print(f"|----[INFO][AGE][>] Age found on Wikipedia {tr_}")

            #Obtenemos el horoscopo
            signo = ("capricornio", "acuario", "piscis", "aries", "tauro",
                     "géminis", "cáncer", "leo", "virgo", "libra", "escorpio",
                     "sagitario")
            meses = {
                "enero": 1,
                "febrero": 2,
                "marzo": 3,
                "abril": 4,
                "mayo": 5,
                "junio": 6,
                "julio": 7,
                "agosto": 8,
                "septiembre": 9,
                "octubre": 10,
                "noviembre": 11,
                "diciembre": 12
            }
            fechas = (20, 19, 20, 20, 21, 21, 22, 22, 22, 22, 22, 21)
            dia = 0
            mes = 0

            words = tr_.replace("\n", " ").split(" ")

            for w in words:

                if w.isdigit() and len(w) <= 2:
                    dia = int(w)
                else:
                    pass

                if w.lower() in meses.keys():
                    mes = meses.get(w.lower())
                else:
                    pass

            mes = mes - 1
            if dia > fechas[mes]:
                mes = mes + 1
            if mes == 12:
                mes = 0

            print("|----[INFO][HOROSCOPO][>] " + signo[mes])

            #Save to DB
            birth = tr_.replace("Age found on Wikipedia Nacimiento", "")
            mongo.personalData_Wikipedia_insertMongoDB(target, birth, url, 1)
            mongo.personalData_Wikipedia_insertMongoDB(target, signo[mes], url,
                                                       3)
            data.Wiki_birth = birth
            data.Wiki_url = url
            data.WIki_horoscopo = signo[mes]

        if "Fallecimiento" in tr_:
            print(f"|----[INFO][DEATH][>] Death found on Wikipedia {tr_}")

            #Save to DB
            mongo.personalData_Wikipedia_insertMongoDB(target, tr_, url, 2)
            data.Wiki_death = tr_

        if "Partido político" in tr_:
            print(f"|----[INFO][POLITICAL PARTY][>] {tr_}")

            #Save to DB
            mongo.personalData_Wikipedia_insertMongoDB(target, tr_, url, 7)
            data.Wiki_politicalParty = tr_

        if "Ocupación" in tr_:
            print(f"|----[INFO][EMPLOYMENT][>] {tr_}")

            #Save to DB
            mongo.personalData_Wikipedia_insertMongoDB(target, tr_, url, 4)
            data.Wiki_employment = tr_.replace("Ocupación", "")

        if "Religión" in tr_:
            print(f"|----[INFO][RELIGION][>] {tr_}")

            #Save to DB
            mongo.personalData_Wikipedia_insertMongoDB(target, tr_, url, 5)
            data.Wiki_religion = tr_

        if "Hijos" in tr_:
            print(f"|----[INFO][SONS][>] {tr_}")

            #Save to DB
            mongo.personalData_Wikipedia_insertMongoDB(target, tr_, url, 6)
            data.Wiki_sons = tr_