Пример #1
0
class crearCorros:
    def __init__(self):
        self.urlProtocoe ='http://3g2upl4pq6kufc4m.onion','https://mail.protonmail.com/create/new','https://singlelogin.org/registration.php'
        print(self.urlProtocoe[2])
        self.tbb_dir = "/usr/local/share/tor-browser_en-US"
        self.protocoe = TorBrowserDriver(self.tbb_dir, tbb_logfile_path='test.log')
        self.dirNombre='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/nombre.txt'
        self.nombre=open(self.dirNombre,'r+')
        self.dirapellido='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/apellidos.txt'
        self.apellido=open(self.dirapellido,'r+')
        self.dirContrasenna='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña.txt'
        self.contrasenna=open(self.dirContrasenna,'r+')
        self.dirCotrasenna2='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña2.txt'
        self.Contrasenna2=open(self.dirCotrasenna2,'r+')
        self.datosContrasenna=[]
        self.lista=[]
        for self.d in range(0,101):
            self.lista.append(self.nombre.readline()+'asdsdf')
            self.datosContrasenna.append(self.contrasenna.readline()+"blabal")
        for self.d in range(0,100):
            self.lista[self.d]=re.sub('\n','asdaawderca',self.lista[self.d])
            self.datosContrasenna[self.d]=re.sub('\n','radabanals',self.datosContrasenna[self.d])
            self.lista[self.d]=re.sub(
            r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+", r"\1", 
            normalize( "NFD",self.lista[self.d]), 0, re.I
            )
            self.lista[self.d]= normalize( 'NFC',self.lista[self.d])
            self.datosContrasenna[self.d]=re.sub(
            r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+", r"\1", 
            normalize( "NFD",self.datosContrasenna[self.d]), 0, re.I
             )
            self.datosContrasenna[self.d]= normalize( 'NFC',self.datosContrasenna[self.d])
            self.lista[self.d]+='@maildrop.cc'
    def iniciarTor(self):
        self.protocoe.load_url(self.urlProtocoe[2])
    def ingresarDatos(self,fila):
        self.eamil=self.protocoe.find_element_by_name('email')
        self.eamil.click()
        sleep(random.uniform(1.0,4))
        self.eamil.send_keys(self.lista[fila])
        self.pasword=self.protocoe.find_element_by_name("password")
        self.pasword.click()
        sleep(random.uniform(1.0,5))
        self.pasword.send_keys(self.datosContrasenna[fila])
        self.name=self.protocoe.find_element_by_name("name")
        sleep(random.uniform(1.0,6))
        self.name.click()
        self.name.send_keys(self.datosContrasenna[fila])
        sleep(random.uniform(2.0,8.7))
        self.name.send_keys(Keys.RETURN)
    def serrarTor(self):
        self.protocoe.close() 
    def imprimirDatos(self):
        #self.dirscv='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/emailFalsos/contraseñasYcorreos.csv'
        #self.datos=csv.writer(open(self.dirscv,'w'))
        for d in range(0,100):
            #self.datos.writerow([self.lista[d]])
            #self.datos.writerow([self.datosContrasenna[d]])
            print(self.lista[d])
            print(self.datosContrasenna[d])
Пример #2
0
def up(name, ema, pas):

    browser = TorBrowserDriver(tbb_dir, tor_cfg=cm.USE_STEM)
    # connect to site
    browser.load_url(
        "https://www.udemy.com/join/signup-popup/?locale=en_US&response_type=html&next=https%3A%2F%2Fwww.udemy.com%2F",
        wait_on_page=5,
        wait_for_page_body=True)
    # find link button
    #reg_el = browser.find_element_by_link_text("Sign up")
    # https://www.udemy.com/join/login-popup/?locale=en_US&response_type=html&next=https%3A%2F%2Fwww.udemy.com%2F
    # click
    # reg_el.click()
    # enter full name
    full_name = browser.find_element_by_id("id_fullname")
    full_name.send_keys(name)
    # enter email
    email_el = browser.find_element_by_id("email--1")
    email_el.send_keys(ema)
    # enter password
    pass_el = browser.find_element_by_id("password")
    pass_el.send_keys(pas)
    # Scroll
    browser.execute_script("window.scrollBy(0,200)")
    browser.execute_script(
        'document.getElementById("id_subscribe_to_emails").checked = false')

    # find submit link
    sub_el = browser.find_element_by_id('submit-id-submit')
    # click submit
    sub_el.click()
    sleep(1)
    # check
    if 'occupation' in browser.current_url:
        # find submit link
        sleep(3)
        try:
            browser.execute_script(
                'document.getElementsByClassName("ot-sdk-container").sytle.display = "none"'
            )
        except:
            pass
        cl = browser.find_elements_by_class_name("udlite-btn")
        try:
            cl[0].click()
        except:
            browser.execute_script(
                'document.getElementsByClassName("ot-sdk-container").sytle.display = "none"'
            )
            cl[0].click()

        sleep(3)
        browser.close()
        return True
    if '=1' in browser.current_url:
        browser.close()
        return True
class TruliaHelper():

    def __init__(self):
        self.url = 'https://www.trulia.com'
        # need to set chrome path here.
        tbpath = "/home/XX/XXXX/tor-browser-linux64-8.0.8_en-US/tor-browser_en-US"
        self.driver = TorBrowserDriver(tbb_path=tbpath, tbb_logfile_path='test.log')
        # self.driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=binary)
        # self.driver = webdriver.Chrome(executable_path='../utility/chromedriver.exe', chrome_options=chrome_options)

    # method to get items from given link.
    def getItems(self):
        df=pd.read_excel("/home/XXXXX/XXXXX/XXXXXX.xlsx")
        a=df['Site Address']
        b=df['Site City']
        c=df['Site State']
        d=df['Site Zip']
        items = []
        # keywords = ['512 W 10th St Perris CA 92570', 'New York, NY', 'San Francisco, CA', 'Washington, CA']
        for keyword in (pd.concat([a,b,c,d],axis=1)).values.tolist():
#         keywords = ['512 W 10th St Perris CA 92570'] * 10
#         for keyword in keywords:
            self.driver.get(self.url)
            search_box = self.driver.find_element_by_id("homepageSearchBoxTextInput")
            search_box.clear()
            search_box.send_keys(str(keyword))
            search_btn = self.driver.find_element_by_xpath("//button[@data-auto-test-id='searchButton']")
            if search_btn:
                search_btn.click()
                time.sleep(10)
                items.append(self.getItemDetail())
            # break
        self.driver.close()
        return items


    def getItemDetail(self):
        data = {}
        try:
            soup = BeautifulSoup(self.driver.page_source, u'html.parser')
            #image = soup.find("div", attrs={"class": "Tiles__TileBackground-fk0fs3-0 cSObNX"}).find("img")["src"]
            price = soup.find("div", attrs={"class": "Text__TextBase-sc-1cait9d-0-div Text__TextContainerBase-sc-1cait9d-1 hlvKRM"}).text
            # container = soup.find("div", attrs={"class": "resultsColumn"}).find("ul")
            # items = container.findAll("li", recursive=False)
            print(price)
        except:
            pass
        return data
  
    # method to start process.
    def start(self):
        items = self.getItems()
        print("Items : ",items)
Пример #4
0
class TestSite(unittest.TestCase):
    def setUp(self):
        # Point the path to the tor-browser_en-US directory in your system
        tbpath = '/home/kdas/.local/tbb/tor-browser_en-US/'
        self.driver = TorBrowserDriver(tbpath, tbb_logfile_path='test.log')
        self.url = "https://check.torproject.org"

    def tearDown(self):
        # We want the browser to close at the end of each test.
        self.driver.close()

    def test_available(self):
        self.driver.load_url(self.url)
        # Find the element for success
        element = self.driver.find_element_by_class_name('on')
        self.assertEqual(
            str.strip(element.text),
            "Congratulations. This browser is configured to use Tor.")
        sleep(2)  # So that we can see the page
Пример #5
0
class validarCuentas:
    def __init__(self):
        self.dirNombre = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/nombre.txt'
        self.nombre = open(self.dirNombre, 'r+')
        self.email = []
        for self.d in range(0, 101):
            self.email.append(self.nombre.readline() + 'asdsdf')
        for self.d in range(0, 100):
            self.email[self.d] = re.sub('\n', 'asdaawderca',
                                        self.email[self.d])
            self.email[self.d] = re.sub(
                r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+",
                r"\1", normalize("NFD", self.email[self.d]), 0, re.I)
            self.email[self.d] = normalize('NFC', self.email[self.d])

    def iniciarTor(self, fila):
        self.tbb_dir = "/usr/local/share/tor-browser_en-US"
        self.mailpro = TorBrowserDriver(self.tbb_dir,
                                        tbb_logfile_path='test.log')
        self.mailpro.load_url('https://maildrop.cc/')

    def ingresarDatos(self, fila):
        self.pulsar = self.mailpro.find_elements_by_xpath('//input')[1]
        self.pulsar.send_keys(self.email[fila])
        self.pulsar.send_keys(Keys.RETURN)
        sleep(6)
        self.correo = self.mailpro.find_elements_by_xpath('//div[@class]')[14]
        self.correo.click()
        sleep(5)
        self.iframe = self.mailpro.find_element_by_tag_name('iframe')
        self.mailpro.switch_to.frame(self.iframe)
        print(self.mailpro.page_source)
        self.mailpro.find_elements_by_xpath('//a[@href]')[1].click()

    def serrarTor(self):
        self.mailpro.close()

    def imprimirDatos(self):
        for d in range(0, 100):
            print(self.email[d])
Пример #6
0
class DescargarPdf:
    def __init__(self):
        self.tbb_dir = "/usr/local/share/tor-browser_en-US"
        self.usuario = []
        self.contraseñaTxT = []
        self.conversor = '?convertedTo=pdf'

    def iniciarTor(self):
        self.zLibraty = TorBrowserDriver(self.tbb_dir,
                                         tbb_logfile_path='test.log')

    def iniciarSecion(self):
        self.element = self.zLibraty.find_element_by_name("email")
        self.element.send_keys(self.correo)
        sleep(2)
        self.element2 = self.zLibraty.find_elements_by_class_name(
            "form-control")[1]
        self.element2.send_keys(self.contraseña)
        self.element2.send_keys(Keys.RETURN)

    def paginaDescargas(self):
        print("estoy en la funcion paginaDescagas")
        sleep(4)
        self.zLibraty.get(self.url)
        self.html = self.zLibraty.page_source

    def paginaPrinsipal(self, añoInicial, añoFinal):
        self.urlAños = 'http://zlibraryexau2g3p.onion/s/?yearFrom=' + str(
            añoInicial) + '&yearTo=' + str(añoFinal)
        self.url = self.urlAños

    def cambiarPagina(self, x):
        self.url += '&page=' + str(x)

    def Crearcsv(self):
        self.carpetaUrl = '/home/dgc7/Documentos/zlibrary/libros1920-1921/url'
        try:
            os.mkdir(self.carpetaUrl)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise
        self.escrivirUrlWed = csv.writer(
            open('/home/dgc7/Documentos/zlibrary/libros1920-1921/url/url2.csv',
                 'w'))
        self.imprimirUrlPdf = csv.writer(
            open(
                '/home/dgc7/Documentos/zlibrary/libros1920-1921/url/urlDowload2.csv',
                'w'))

    def credenciales(self, numeroUsuario):
        print("llegue")
        self.correo = self.usuario[numeroUsuario]
        self.contraseña = self.contraseñaTxT[numeroUsuario]
        self.urlLoguin = 'http://zlibraryexau2g3p.onion'
        self.zLibraty.get(self.urlLoguin)

    def UsuariosYcontraseñas(self):
        self.dir = '/home/dgc7/Documentos/zlibrary/credenciales/contraseñasYcorreos.txt'
        self.data = open(self.dir, 'r+')
        for self.i in range(0, 200):
            if self.i % 2 == 0:
                self.usuario.append(self.data.readline())
            if self.i % 2 != 0:
                self.contraseñaTxT.append(self.data.readline())

    def urlPdf(self, ):
        self.boleanoPdf = 0
        self.respaldoContador = 0
        self.contadorUsuarios = usuarioUsadosLeer()
        self.contadorLibros = datosDescarga(4)
        self.contadorLibros2 = self.contadorLibros % 10
        self.Crearcsv()
        self.soup = BeautifulSoup(self.html, 'html.parser')
        try:
            for self.urlwed in self.soup.find_all(itemprop="name"):
                self.contador = 0
                self.urlwed = self.urlwed.find('a', href=re.compile(''))
                self.urlDowload = self.urlwed.get('href')
                self.urlpdfGeleneralH = re.sub('/book/',
                                               'https://b-ok.cc/book/',
                                               self.urlDowload)
                self.urlDowload = re.sub(
                    '/book/', 'http://zlibraryexau2g3p.onion/book/',
                    self.urlDowload)
                self.escrivirUrlWed.writerow([self.urlDowload])
                print(self.urlDowload)
                voleano = validarFormato(self.urlpdfGeleneralH)
                guardarNumeroDescargas(self.contadorLibros)
                print(self.respaldoContador)
                if self.contadorLibros == self.respaldoContador:
                    for self.urlRedirec in range(0, 1):
                        self.zLibraty.get(self.urlDowload)
                        sleep(5)
                        self.htmlPdf = self.zLibraty.page_source
                        self.soupRedirec = BeautifulSoup(
                            self.htmlPdf, 'html.parser')
                        self.urlDowloadPDF = self.soupRedirec.find(
                            class_="btn btn-primary dlButton addDownloadedBook"
                        )
                        self.urlDowloadPDF = self.urlDowloadPDF.get('href')
                        self.urlDowloadPDF = re.sub(
                            '/dl/', 'http://zlibraryexau2g3p.onion/dl/',
                            self.urlDowloadPDF)
                        self.imprimirUrlPdf.writerow([self.urlDowloadPDF])
                        print(self.urlDowloadPDF)
                        print("vamos a por el if")
                        sleep(10)
                        if voleano == True:
                            self.zLibraty.set_page_load_timeout(8)
                            try:
                                self.zLibraty.get(self.urlDowloadPDF)
                            except:
                                self.zLibraty.set_page_load_timeout(70)
                                self.zLibraty.refresh()
                                print("funciona PDF ")

                            voleano = False
                            sleep(5)
                            self.contadorLibros += 1
                            self.contadorLibros2 += 1
                        else:
                            try:
                                self.zLibraty.set_page_load_timeout(5)
                                try:
                                    self.zLibraty.get(self.urlDowloadPDF)
                                except:
                                    sleep(4)
                                    pyautogui.press("down")
                                    sleep(2)
                                    pyautogui.press("enter")
                                self.zLibraty.set_page_load_timeout(70)
                            except:
                                print(
                                    "\nerror al controlasr el teclado y dar enter\n"
                                )
                                raise
                            sleep(5)
                            self.zLibraty.refresh()
                            self.contadorLibros += 1
                            self.contadorLibros2 += 1
                        sleep(20)
                        tiempoDescarga()
                        informaiconPdf(self.urlpdfGeleneralH)
                self.respaldoContador += 1
                if self.contadorLibros == self.respaldoContador:
                    if self.contadorLibros2 % 10 == 0:
                        print((self.contadorLibros2 - 1) % 10)
                        self.contador += 1
                        pyautogui.hotkey("ctrl", "shift", "u")
                        sleep(2)
                        pyautogui.press("enter")
                        sleep(7)
                        pyautogui.press("enter")
                        sleep(15)
                        self.contadorUsuarios += 1
                        print(self.contadorUsuarios)
                        try:
                            self.zLibraty.switch_to_window(
                                self.zLibraty.window_handles[0])
                        except:
                            print("error al cambian de  ventana")
                        usuarioUsadosReescrivir(self.contadorUsuarios)
                        print("por aqui¿¿¿¿¿¿")
                        self.credenciales(self.contadorUsuarios)
                        print("no por aqui¿¿¿¿¿¿")
                        sleep(23)
                        self.iniciarSecion()
                        sleep(7)
                        self.contadorLibros2 = 0
                        sleep(15)
                        print("numero de li bros por usuario ",
                              self.contadorLibros2)
                        if self.contador == 5:
                            self.contador = 0
        except OSError as e:
            print(e.strerror)
            print("error en la urlPdf:::::")
            guardarNumeroDescargas(self.contadorLibros)
            usuarioUsadosReescrivir(self.contadorUsuarios)
            print(self.contadorLibros)
            raise
        print("termine la pagina")

    def DescargarContenido(self, _html):
        self.contenido = _html

    def serrarTor(self):
        self.zLibraty.close()
Пример #7
0
class DescargarPdf:
    def __init__(self):
        self.tbb_dir = "/usr/local/share/tor-browser_en-US"
        self.usuario = []
        self.contraseñaTxT = []
        self.conversor = '?convertedTo=pdf'

    def iniciarTor(self):
        self.zLibraty = TorBrowserDriver(self.tbb_dir,
                                         tbb_logfile_path='test.log')

    def iniciarSecion(self):
        self.zLibraty.refresh()
        sleep(10)
        self.element = self.zLibraty.find_element_by_name("email")
        self.element.send_keys(self.correo)
        sleep(2)
        self.element2 = self.zLibraty.find_elements_by_class_name(
            "form-control")[1]
        self.element2.send_keys(self.contraseña)
        self.element2.send_keys(Keys.RETURN)

    def paginaDescargas(self):
        self.zLibraty.load_url(self.url)
        self.html = self.zLibraty.page_source

    def paginaPrinsipal(self, añoInicial, añoFinal):
        self.urlAños = 'http://zlibraryexau2g3p.onion/s/?yearFrom=' + str(
            añoInicial) + '&yearTo=' + str(añoFinal)
        self.url = self.urlAños

    def cambiarPagina(self, x):
        self.url += '&page=' + str(x)

    def Crearcsv(self):
        print("hola")
        self.carpetaUrl = '/home/dgc7/Documentos/zlibrary/libros1920-1921/url'
        try:
            os.mkdir(self.carpetaUrl)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise
        self.escrivirUrlWed = csv.writer(
            open('/home/dgc7/Documentos/zlibrary/libros1920-1921/url/url2.csv',
                 'w'))
        self.imprimirUrlPdf = csv.writer(
            open(
                '/home/dgc7/Documentos/zlibrary/libros1920-1921/url/urlDowload2.csv',
                'w'))

    def credenciales(self, numeroUsuario):
        self.correo = self.usuario[numeroUsuario]
        self.contraseña = self.contraseñaTxT[numeroUsuario]
        self.urlLoguin = 'http://zlibraryexau2g3p.onion'
        self.zLibraty.load_url(self.urlLoguin)

    def UsuariosYcontraseñas(self):
        self.dir = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/descargarLIbros/descargarparte1/contraseñasYcorreos.txt'
        self.data = open(self.dir, 'r+')
        for self.i in range(0, 200):
            if self.i % 2 == 0:
                self.usuario.append(self.data.readline())
            if self.i % 2 != 0:
                self.contraseñaTxT.append(self.data.readline())

    def urlPdf(self, contador, _contadorusuarios):
        self.boleanoPdf = 0
        self.contadorUsuariosCon = _contadorusuarios
        self.contadorLibros2 = 0
        self.contadorLibros = 0
        self.Crearcsv()
        self.soup = BeautifulSoup(self.html, 'html.parser')
        for self.urlwed in self.soup.find_all(itemprop="name"):
            self.contador = 0
            self.urlwed = self.urlwed.find('a', href=re.compile(''))
            self.urlDowload = self.urlwed.get('href')
            self.urlpdfGeleneralH = re.sub('/book/', 'https://b-ok.cc/book/',
                                           self.urlDowload)
            self.urlDowload = re.sub('/book/',
                                     'http://zlibraryexau2g3p.onion/book/',
                                     self.urlDowload)
            self.escrivirUrlWed.writerow([self.urlDowload])
            print(self.urlDowload)
            self.contadorLibros += 1
            self.contadorLibros2 += 1
            if self.contadorLibros2 == 10:
                self.contador += 1
                self.serrarTor()
                sleep(4)
                self.iniciarTor()
                self.contadorUsuariosCon += 1
                print(self.contadorUsuariosCon)
                self.credenciales(contadorusuarios)
                self.iniciarSecion()
                sleep(7)
                self.contadorLibros2 = 0
                sleep(15)
                if self.contador == 5:
                    self.contador = 0
            voleano = validarFormato(self.urlpdfGeleneralH)
            for self.urlRedirec in range(0, 1):
                self.zLibraty.load_url(self.urlDowload)
                sleep(5)
                self.htmlPdf = self.zLibraty.page_source
                self.soupRedirec = BeautifulSoup(self.htmlPdf, 'html.parser')
                self.urlDowloadPDF = self.soupRedirec.find(
                    class_="btn btn-primary dlButton addDownloadedBook")
                self.urlDowloadPDF = self.urlDowloadPDF.get('href')
                self.urlDowloadPDF = re.sub(
                    '/dl/', 'http://zlibraryexau2g3p.onion/dl/',
                    self.urlDowloadPDF)
                self.imprimirUrlPdf.writerow([self.urlDowloadPDF])
                print(self.urlDowloadPDF)
                if voleano == True:
                    self.zLibraty.get(self.urlDowloadPDF)
                    voleano = False
                else:
                    self.convertirpdf = str(self.urlDowloadPDF) + str(
                        self.conversor)
                    self.zLibraty.get(self.convertirpdf)
                sleep(20)
                tiempoDescarga()
                informaiconPDf(self.urlpdfGeleneralH)

    def DescargarContenido(self, _html):
        self.contenido = _html

    def serrarTor(self):
        self.zLibraty.close()
Пример #8
0
# url = "https://check.torproject.org"
url = "https://www.google.com/search?q=playoffs"

headers = {}
headers["User-agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"


driver.load_url(url)
# driver.context()

# Find the element for success
element = driver.find_element_by_class_name('LC20lb')
print(element)
sleep(2)  # So that we can see the page

driver.close()


# class TestSite(unittest.TestCase):
#     def setUp(self):
#         # Point the path to the tor-browser_en-US directory in your system
#         tbpath = '/home/andrew/Desktop/tor-browser-linux64-8.0.8_en-US/tor-browser_en-US/'
#         self.driver = TorBrowserDriver(tbpath, tbb_logfile_path='test.log', tor_cfg=cm.USE_STEM)
#         self.url = "https://check.torproject.org"

#     def tearDown(self):
#         # We want the browser to close at the end of each test.
#         self.driver.close()

#     def test_available(self):
#         self.driver.load_url(self.url)
Пример #9
0
class crearCorros:
    def __init__(self):
        self.urlProtocoe = 'http://3g2upl4pq6kufc4m.onion', 'https://mail.protonmail.com/create/new', 'https://singlelogin.org/registration.php'
        print(self.urlProtocoe[2])
        self.tbb_dir = "/usr/local/share/tor-browser_en-US"
        self.protocoe = TorBrowserDriver(self.tbb_dir,
                                         tbb_logfile_path='test.log')
        self.dirNombre = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/nombre.txt'
        self.nombre = open(self.dirNombre, 'r+')
        self.dirapellido = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/apellidos.txt'
        self.apellido = open(self.dirapellido, 'r+')
        self.dirContrasenna = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña.txt'
        self.contrasenna = open(self.dirContrasenna, 'r+')
        self.dirCotrasenna2 = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña2.txt'
        self.Contrasenna2 = open(self.dirCotrasenna2, 'r+')
        self.datosContrasenna = []
        self.lista = []
        for self.d in range(0, 101):
            self.lista.append(self.nombre.readline() + 'asdsdf')
            self.datosContrasenna.append(self.contrasenna.readline() +
                                         self.Contrasenna2.readline())
        for self.d in range(0, 100):
            self.lista[self.d] = re.sub('\n', 'asdaawderca',
                                        self.lista[self.d])
            self.datosContrasenna[self.d] = re.sub(
                '\n', 'radabanals', self.datosContrasenna[self.d])
            self.lista[self.d] = re.sub(
                r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+",
                r"\1", normalize("NFD", self.lista[self.d]), 0, re.I)
            self.lista[self.d] = normalize('NFC', self.lista[self.d])
            self.datosContrasenna[self.d] = re.sub(
                r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+",
                r"\1", normalize("NFD",
                                 self.datosContrasenna[self.d]), 0, re.I)
            self.datosContrasenna[self.d] = normalize(
                'NFC', self.datosContrasenna[self.d])

    def iniciarTor(self):
        self.protocoe.load_url(self.urlProtocoe[2])

    def ingresarDatos(self, fila):
        self.pasword = self.protocoe.find_element_by_name("password")
        self.pasword.click()
        sleep(random.uniform(1.0, 4))
        self.pasword.send_keys(self.datosContrasenna[fila])
        self.pasword = self.protocoe.find_element_by_name("passwordc")
        sleep(random.uniform(1.0, 3))
        self.pasword.click()
        self.pasword.send_keys(self.datosContrasenna[fila])
        sleep(random.uniform(2.0, 5.7))
        self.iframes = self.protocoe.find_element_by_tag_name("iframe")
        self.protocoe.switch_to.frame(self.iframes)
        self.usuario = self.protocoe.find_element_by_xpath('//input')
        self.usuario.click()
        self.usuario.send_keys(self.lista[fila])
        sleep(random.uniform(0, 5))
        self.usuario.send_keys(Keys.ENTER)
        self.protocoe.switch_to.default_content()
        sleep(20)
        self.enter = self.protocoe.find_element_by_xpath(
            '//button[@class="pm_button primary modal-footer-button"]')
        self.enter.click()

    def serrarTor(self):
        self.protocoe.close()

    def imprimirDatos(self):
        for d in range(0, 100):
            print(self.lista[d])
            print(self.datosContrasenna[d])
Пример #10
0
class Browser:
    
    def __init__(self, config, browser, pet, env_type, proxy_setting):

        """
        If given valid proxy settings, this function will configure socks5 proxy properly on chrome (brave) and firefox.
        """
        def setup_socks5_proxy(browser, profile, proxy_setting):
            if proxy_setting is not None:
                address = proxy_setting["address"]
                port = proxy_setting["port"]
                bypass_list = proxy_setting["bypass-list"]

                if browser == "chrome":
                    # https://sordidfellow.wordpress.com/2015/05/21/ssh-tunnel-for-chrome/
                    profile.add_argument("--proxy-server=socks5://%s:%s" % (address, port))
                    profile.add_argument("--proxy-bypass-list=%s" % bypass_list)
                    print("socks5 proxy configured on chrome")

                elif browser == "firefox":
                    # https://developer.mozilla.org/en-US/docs/Mozilla/Preferences/Mozilla_networking_preferences
                    profile.set_preference("network.proxy.type", 1)
                    profile.set_preference("network.proxy.socks", address)
                    profile.set_preference("network.proxy.socks_port", port)
                    profile.set_preference("network.proxy.socks_version", 5)
                    profile.set_preference("network.proxy.socks_remote_dns", "true")
                    profile.set_preference("network.proxy.no_proxies_on", bypass_list)
                    print("socks5 proxy configured on firefox")

        """
            If the program is run in a virtual machine, xvfbwrapper has to get installed first.        
        """
        self.env_type = env_type
        if (env_type == "vm"):
            print("xvfb")
            from xvfbwrapper import Xvfb
            width, height, depth = get_display_parameters(config)
            self.vdisplay = Xvfb(width=width, height=height, colordepth=depth)
            self.vdisplay.start()

        print("Browser:", browser, "PET:", pet)
        pet_config = PetConfig()

        if pet == "brave":
            print("brave")
            chrome_options = ChromeOptions()
            bPath, dPath = pet_config.getPetBrowserDriverPath(pet,browser,env_type)
            print(bPath, dPath)
            chromedriver = dPath
            chrome_options.binary_location = bPath
            setup_socks5_proxy("chrome", chrome_options, proxy_setting)
            os.environ["webdriver.chrome.driver"] = chromedriver
            if env_type == "vm":
                chrome_options.add_argument("--no-sandbox")
            self.driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_options)
            press_enter(1)
            return

        elif pet == "tor":
            plt= platform.system().lower()
            if plt == "darwin" or plt == "windows": # https://stackoverflow.com/questions/15316304/open-tor-browser-with-selenium
                print("native tor")
                bPath, dPath = pet_config.getPetBrowserDriverPath(pet,browser,env_type)
                print(bPath, dPath)
                profile = FirefoxProfile()
                profile.set_preference("network.proxy.type", 0)
                binary = FirefoxBinary(bPath)
                self.driver = webdriver.Firefox(firefox_profile = profile, firefox_binary= binary, executable_path = dPath)
            elif plt == "linux": # https://medium.com/@manivannan_data/selenium-with-tor-browser-using-python-7b3606b8c55c
                print("vm tor")
                from tbselenium.tbdriver import TorBrowserDriver
                pref_dict = {"network.proxy.no_proxies_on": "http://10.0.2.2/, http://192.168.4.204/"}
                self.driver = TorBrowserDriver(os.environ['TBB_PATH'], pref_dict = pref_dict)
            return


        aPath, bPath, dPath, pref = pet_config.getPetBrowserDriverPath(pet,browser,env_type)
        if (browser == "firefox"):
            fp = FirefoxProfile()
            setup_socks5_proxy("firefox", fp, proxy_setting)
            binary = FirefoxBinary(bPath)
            if pref != None:
                fp.set_preference(pref[0],pref[1])
            self.driver = webdriver.Firefox(firefox_profile=fp, firefox_binary=binary, executable_path=dPath)

            if (aPath):
                self.driver.install_addon(aPath)            

        elif (browser == "chrome"):
            chrome_options = ChromeOptions()
            chrome_options = webdriver.ChromeOptions() #https://github.com/SeleniumHQ/selenium/issues/5966
            setup_socks5_proxy("chrome", chrome_options, proxy_setting)

            if aPath:
                chrome_options.add_extension(aPath)
            if pref != None:
                chrome_options.add_experimental_option(pref[0],pref[1])
	        chrome_options.binary_location = bPath
            os.environ["webdriver.chrome.driver"] = dPath
	    
            time.sleep(1)
            self.driver = webdriver.Chrome(executable_path=dPath, chrome_options=chrome_options)
            # to escape the alert chrome display on first visit
            time.sleep(1)
            press_enter(1)
        elif(browser == "safari"):
            self.driver = webdriver.Safari()
        else:
            print("Unsupported Browser")
            sys.exit(0)

    def quit(self):
        try:
            self.driver.quit()
        except:
            self.driver.close()     # for Tor
        if (self.env_type == "vm"):
            self.vdisplay.stop()


    def visit_sites(self, site_list, delay=5): 
        """Visits all pages in site_list with delay"""
        for site in site_list:
            sys.stdout.write(".")
            sys.stdout.flush()
            try:
                self.driver.get(site)
                time.sleep(delay)
            except:
                print("Unexpected error:", sys.exc_info()[0])
Пример #11
0
class UntitledTestCase(unittest.TestCase):
    def setUp(self):
        print 'Loading...'

        self.display = Display(visible=0, size=(800, 600))
        self.display.start()

        self.driver = TorBrowserDriver(
            '/scratch/zilton/troll/tor-browser_pt-BR/',
            tbb_logfile_path='test.log')

        # self.driver = webdriver.Chrome('chromium-browser')
        self.base_url = "https://lemonade.ctweb.inweb.org.br/#/workflows/1/"
        self.verificationErrors = []
        self.accept_next_alert = True

    def is_visible(self, locator, timeout=20):
        try:
            ui.WebDriverWait(self.driver, timeout).until(
                ec.visibility_of_element_located((By.ID, locator)))
            return True
        except TimeoutException:
            return False

    def is_not_visible(self, locator, timeout=2):
        try:
            ui.WebDriverWait(self.driver, timeout).until_not(
                ec.visibility_of_element_located((By.ID, locator)))
            return True
        except TimeoutException:
            return False

    def test_untitled_test_case(self):
        global workflow_message_error_warning, workflow_message_completed

        driver = self.driver
        '''Login'''
        driver.get("https://lemonade.ctweb.inweb.org.br/#/login")
        driver.find_element_by_xpath("//input[@type='email']").clear()
        driver.find_element_by_xpath("//input[@type='email']").send_keys(
            lemonade_login)
        driver.find_element_by_xpath("//input[@type='password']").clear()
        driver.find_element_by_xpath("//input[@type='password']").send_keys(
            lemonade_password)
        driver.find_element_by_xpath("//button[@type='submit']").click()
        time.sleep(LOAD_TIME)

        count_progress = 1.0
        length = len(workflow_ids)
        index = 0
        count_problem = 1
        while index < length:
            workflow_id = workflow_ids[index]
            '''Access the page of the workflow'''
            url = self.base_url + str(workflow_id)
            driver.get(url)
            '''Execute the workflow'''
            while True:
                try:
                    time.sleep(LOAD_TIME * 0.2)
                    driver.find_element_by_id("tlb-execute-wf").click()
                    break
                except Exception:
                    pass

            while True:
                try:
                    time.sleep(LOAD_TIME * 0.2)
                    driver.find_element_by_id("mdl-execute-wf").click()
                    break
                except Exception:
                    pass
            '''Monitoring the status of the execution'''
            time.sleep(LOAD_TIME)
            status = WAITING_MSG
            current_url = driver.current_url

            # Workflow with problem
            if current_url == "https://lemonade.ctweb.inweb.org.br/#/" and count_problem < MAX_LOAD_PROBLEM:
                count_problem += 1
                continue
            elif count_problem == MAX_LOAD_PROBLEM:
                status = WARNING_MSG

            while (status is WAITING_MSG) or (status == RUNNING_MSG):
                while True:
                    try:
                        status = str(
                            driver.find_element_by_id("dtl-job-status").
                            get_attribute(name='title').upper())
                        if status:
                            break
                        time.sleep(LOAD_TIME * 0.2)
                    except Exception:
                        pass

                if (status == WAITING_MSG) or (status == RUNNING_MSG):
                    driver.refresh()
                    time.sleep(LOAD_TIME)
            '''Main message after the execution ends'''
            message = ''
            if status != WARNING_MSG:
                while message == '':
                    try:
                        message = driver.find_element_by_id(
                            "dtl-job-status-text").text
                        break
                    except Exception:
                        pass
                    driver.refresh()
                    time.sleep(LOAD_TIME)

            workflow_name = ''
            while True and count_problem < MAX_LOAD_PROBLEM:
                try:
                    time.sleep(LOAD_TIME * 0.2)
                    workflow_name = driver.find_element_by_xpath(
                        "//a[contains(@href, '#/workflows/1/%s')]" %
                        workflow_id).text
                    break
                except Exception:
                    pass

            if status == WARNING_MSG:
                message += ' - The execution presented an atypical problem. ' \
                           'Please check the workflow and the correct ' \
                           'update of the messages on the Lemonade page.'

            msg_dict = {
                'workflow_name': workflow_name,
                'workflow_id': workflow_id,
                'message': message,
                'status': status,
                'url': url
            }

            if status != COMPLETED_MSG:
                workflow_message_error_warning.append(msg_dict)
            else:
                workflow_message_completed += " " + workflow_id

            UntitledTestCase.update_progress(
                job_title='Testing Lemonade workflow: ',
                progress=count_progress)
            count_progress += 1

            index += 1
            count_problem = 1

        self.driver.close()

    @staticmethod
    def update_progress(job_title, progress):
        global workflow_ids
        length = len(workflow_ids)
        progress = progress / length
        block = int(round(length * progress))
        message = "\r{0}: [{1}] {2}%".format(
            job_title, ', '.join(workflow_ids[:int(progress * length)]) + "-" *
            (length - block), round(progress * 100, 2))
        if progress >= 1:
            message += " DONE\r\n"
        sys.stdout.write(message)
        sys.stdout.flush()

    def is_element_present(self, how, what):
        try:
            self.driver.find_element(by=how, value=what)
        except NoSuchElementException:
            return False
        return True

    def is_alert_present(self):
        try:
            self.driver.switch_to_alert()
        except NoAlertPresentException:
            return False
        return True

    def close_alert_and_get_its_text(self):
        try:
            alert = self.driver.switch_to_alert()
            alert_text = alert.text
            if self.accept_next_alert:
                alert.accept()
            else:
                alert.dismiss()
            return alert_text
        finally:
            self.accept_next_alert = True

    def tearDown(self):
        UntitledTestCase.sendEmail()
        self.driver.quit()
        self.display.stop()
        self.assertEqual([], self.verificationErrors)

    @staticmethod
    def sendEmail():
        global workflow_message_error_warning, workflow_message_completed

        if len(workflow_message_error_warning) > 0:
            workflow_message_completed = re.sub("^\s+|\s+$", "",
                                                workflow_message_completed)

            message = 'WORKFLOWS THAT PERFORMED CORRECTLY: %s' % (
                workflow_message_completed.replace(' ', ', '))
            message += '\n\nWORKFLOWS THAT DID NOT RUN SUCCESSFULLY:\n'
            for m in workflow_message_error_warning:
                if m['status'] == WARNING_MSG:
                    message += '\n- WORKFLOW: %s' % m['workflow_id']
                else:
                    message += '\n- WORKFLOW: %s' % m['workflow_name']
                message += '\n\tSTATUS: %s' % m['status']
                message += '\n\tMESSAGE: %s' % m['message']
                message += '\n\tURL: %s' % m['url']
                message += '\n___________________________\n'

            subject = "[LEMONADE] - Automatic Test for Workflows"

            email_sender.main(message_status_report=message.encode('utf-8'),
                              subject=subject)
Пример #12
0
def makeRequest(url, domain):
    """
    Makes HTTP request to url given as argument, after changing IP.
    """
    import time

    # Opening log file
    f = open(logfile_name, 'a')

    print('Changing IP...\n')

    # Below is method A using requests library without opening real TOR browser.
    # Method B will be used instead, which opens a real browser, so that JS code is executed
    # and Google Analytics tracks us as a real user.
    """
    # Resetting IP
    tr.reset_identity()
    # This command changes restarts tor service, resulting in IP address change. After '-p' flag insert user password.
    #os.system('sudo systemctl restart tor -p 0000')

    #Creating empty session object
    session = requests.session()
    session.proxies = {}

    # Adding proxies to session
    session.proxies['http'] = 'socks5h://localhost:9050'
    session.proxies['https'] = 'socks5h://localhost:9050'

    #Changing request headers
    headers = {}
    headers['User-agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
    print('Request headers were set.\n') 
-
    new_ip = session.get('http://ipecho.net/plain').text
    

    # Executing requests 

    #Executing request and assigning response status code
    status_code = session.get(url).status_code
    """

    # Method B, using complete TOR Browser

    driver = TorBrowserDriver("/home/manos/Desktop/tor-browser_en-US")
    # driver.get('https://ipecho.net/plain')
    # new_ip = driver.find_element_by_tag_name('body').text

    checkConn()

    driver.get(url)
    time.sleep(2.0)
    driver.close()

    # Request logging
    time = 'Date: ' + str(datetime.datetime.now())[0:10] + '\nTime: ' + str(
        datetime.datetime.now())[11:19]
    f.write(
        time + '\nDomain: ' + domain + '\n'
        'Request sent to ' + url + '.' + '\nResponse status code: ' +
        str(200) +
        '\n*******************************************************************************************\n\n'
    )
    f.close()
    os.system('clear')
Пример #13
0
class DescargarPdf:
    def __init__(self):
        self.contadorCredenciales=0
        self.tbb_dir = "/usr/local/share/tor-browser_en-US"
        self.usuario=[]
        self.contraseñaTxT=[]
        self.conversor='?convertedTo=pdf'
    def iniciarTor(self):
        self.zLibraty = TorBrowserDriver(self.tbb_dir, tbb_logfile_path='test.log')
    def iniciarSecion(self):
        self.element=self.zLibraty.find_element_by_name("email")
        self.element.send_keys(self.correo)
        sleep(2)
        self.element2=self.zLibraty.find_elements_by_class_name("form-control")[1]
        self.element2.send_keys(self.contraseña)
        self.element2.send_keys(Keys.RETURN)
    def paginaDescargas(self):
        print("estoy en la funcion paginaDescagas")
        self.zLibraty.load_url(self.url)
        sleep(4)
        self.html=self.zLibraty.page_source
    def paginaPrinsipal(self,añoInicial,añoFinal):
        self.urlAños='http://zlibraryexau2g3p.onion/s/?yearFrom='+str(añoInicial)+'&yearTo='+str(añoFinal)
        self.url=self.urlAños  
    def cambiarPagina(self,x):
        print("estoy en cambiar pagina prinsipal")
        self.url+='&page='+str(x)
        print(self.url)
    def Crearcsv(self):
        desde=datosDescarga(1)
        asta=datosDescarga(2)
        self.carpetaUrl='/home/dd/Documentos/zlibrary/libros'+str(desde)+'-'+str(asta)+'/url'
        try :
             os.mkdir(self.carpetaUrl)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise
        self.escrivirUrlWed=csv.writer(open('/home/dd/Documentos/zlibrary/libros'+str(desde)+'-'+str(asta)+'/url/url2.csv','w'))
        self.imprimirUrlPdf=csv.writer(open('/home/dd/Documentos/zlibrary/libros'+str(desde)+'-'+str(asta)+'/url/urlDowload2.csv','w'))
    def credenciales(self,numeroUsuario):
        print("llegue")
        if self.contadorCredenciales==0 or self.contadorCredenciales==20:
            self.zLibraty.load_url("https://singlelogin.org/")
            self.zLibraty.find_element_by_name("redirectToHost").click()
            sleep(3)
            pyautogui.press("down")
            sleep(2)
            pyautogui.press("down")
            sleep(1)
            pyautogui.press("enter")
        sleep(5)
        self.correo=self.usuario[numeroUsuario]
        self.contraseña=self.contraseñaTxT[numeroUsuario]
    def UsuariosYcontraseñas(self):
        self.dir='/home/dd/Documentos/zlibrary/credenciales/contraseñasYcorreos.txt'
        self.data=open(self.dir,'r+')
        for self.i in range(0,200):
            if self.i%2==0 :
                self.usuario.append(self.data.readline())
            if self.i%2!=0:
                self.contraseñaTxT.append(self.data.readline())
    def urlPdf(self,):
        self.contadorCredenciales=1
        self.boleanoPdf=0
        self.respaldoContador=0
        self.contadorUsuarios=usuarioUsadosLeer()
        self.contadorLibros=datosDescarga(4)
        self.contadorLibros2=self.contadorLibros%10
        self.Crearcsv()
        self.soup=BeautifulSoup(self.html,'html.parser')
        try:
            for self.urlwed in self.soup.find_all(itemprop = "name") :
                self.contador=0
                self.urlwed=self.urlwed.find('a',href=re.compile(''))
                self.urlDowload=self.urlwed.get('href')
                self.urlpdfGeleneralH=re.sub('/book/','https://b-ok.cc/book/',self.urlDowload)
                self.urlDowload=re.sub('/book/','http://zlibraryexau2g3p.onion/book/',self.urlDowload)
                self.escrivirUrlWed.writerow([self.urlDowload])
                print(self.urlDowload)
                self.voleano=validarFormato(self.urlpdfGeleneralH)
                guardarNumeroDescargas(self.contadorLibros) 
                print(self.respaldoContador) 
                if self.contadorLibros==self.respaldoContador:
                    for self.urlRedirec in range(0,1):
                        self.zLibraty.load_url(self.urlDowload)
                        sleep(5)
                        self.htmlPdf=self.zLibraty.page_source
                        self.soupRedirec=BeautifulSoup(self.htmlPdf,'html.parser')
                        self.urlDowloadPDF=self.soupRedirec.find(class_="btn btn-primary dlButton addDownloadedBook")
                        self.urlDowloadPDF=self.urlDowloadPDF.get('href')
                        self.urlDowloadPDF=re.sub('/dl/','http://zlibraryexau2g3p.onion/dl/',self.urlDowloadPDF)
                        self.imprimirUrlPdf.writerow([self.urlDowloadPDF])
                        print(self.urlDowloadPDF)
                        print("vamos a por el if")
                        sleep(15)
                        if self.voleano==True:
                            self.zLibraty.set_page_load_timeout(12)
                            try:
                                self.zLibraty.load_url(self.urlDowloadPDF)
                            except:
                                sleep(5)
                                self.zLibraty.set_page_load_timeout(7000)
                                print("funciona PDF ")                                
                            self.voleano=False
                            sleep(5)
                            self.contadorLibros+=1
                            self.contadorLibros2+=1
                        else:                          
                            self.zLibraty.set_page_load_timeout(12)
                            try:
                                self.zLibraty.load_url(self.urlDowloadPDF)
                            except:
                                sleep(8)
                                pyautogui.press("down")
                                sleep(2)
                                pyautogui.press("enter")
                            self.zLibraty.set_page_load_timeout(7000)
                            sleep(5)
                            self.contadorLibros+=1
                            self.contadorLibros2+=1
                        self.zLibraty.load_url("about:downloads")
                        self.datosEsperaDescarga()
                        self.peticiones()
                        self.zLibraty.back()
                        informaiconPdf(self.urlpdfGeleneralH)
                        guardarNumeroDescargas(self.contadorLibros)
                self.respaldoContador+=1                   
                if self.contadorLibros==self.respaldoContador:
                    if self.contadorLibros2%10==0:
                        print((self.contadorLibros2-1)%10)
                        self.contador+=1
                        if self.contadorLibros==20:
                            self.contadorCredenciales=20
                            print("saliendo de secion¡¡¡¡¡¡")
                            pyautogui.moveTo(1707,245)
                            pyautogui.hotkey("ctrl","shift","u")
                            sleep(2)
                            pyautogui.press("enter")
                            sleep(7)
                            pyautogui.press("enter")
                            sleep(15)
                        else:
                            print("saliendo de secion")
                            self.zLibraty.get("http://zlibraryexau2g3p.onion/logout.php")          
                        self.contadorUsuarios+=1
                        print(self.contadorUsuarios)
                        try:
                            self.zLibraty.switch_to_window(self.zLibraty.window_handles[0])
                        except:
                            print("error al cambian de  ventana")
                       
                        usuarioUsadosReescrivir(self.contadorUsuarios)
                        print("por aqui¿¿¿¿¿¿")
                        self.credenciales(self.contadorUsuarios)
                        self.contadorCredenciales=1
                        print("no por aqui¿¿¿¿¿¿")
                        sleep(20)
                        self.iniciarSecion()
                        sleep(15)
                        self.paginaDescargas()
                        sleep(7)
                        self.contadorLibros2=0
                        sleep(15)
                        print("numero de li bros por usuario ",self.contadorLibros2)
                        if self.contador==5:
                            self.contador=0  
        except OSError as e :
            print(e.strerror)
            print("error en la urlPdf:::::")
            guardarNumeroDescargas(self.contadorLibros)
            usuarioUsadosReescrivir(self.contadorUsuarios)
            print(self.contadorLibros)
            archivos=int(contarNueroArchivos())
            print(archivos)
            self.zLibraty.load_url("about:downloads")
            self.datosEsperaDescarga()
            self.peticiones()
            self.zLibraty.back()
            informaiconPdf(self.urlpdfGeleneralH)
    def DescargarContenido(self,_html):         
        self.contenido=_html
    def serrarTor(self):
         self.zLibraty.close()
    def datosEsperaDescarga(self):
        sleep(4)
        self.htmlValidador=self.zLibraty.page_source
    def validarDescarga(self):
        self.htmlFalce=self.zLibraty.page_source
        self.soupFalce=BeautifulSoup(self.htmlFalce,"html.parser")
        self.validarfalce=self.soupFalce.find_all("description",class_="downloadDetails downloadDetailsNormal")
        self.respuestafalce=re.search("value=.+",str(self.validarfalce))
        self.buscarFalse=self.respuestafalce.group()
        if re.search("Canceled",self.buscarFalse):
            print("se daño al descarga =(")
            sleep(5)
            pyautogui.click(1393,139)
            sleep(5)
        else :
            if re.search("Failed",self.buscarFalse):
                print("se daño al descarga pero vamos a solucionarlo =( ")
                sleep(5)
                pyautogui.click(1393,139)
                sleep(5)
            else:    
                print("la descarga va bien =)")
    def peticiones(self):   
        self.validarDescarga()      
        self.carga=0
        self.daño=0
        self.conteo=0
        while self.carga<100:
            self.soup=BeautifulSoup(self.htmlValidador,"html.parser")
            try:
                self.archivoDescarga=self.soup.find_all("progress",class_="downloadProgress")
                self.respaldo=re.split("value",str(self.archivoDescarga))
                self.tiempo=re.search("[0-9]+",self.respaldo[1])
                print(self.tiempo.group())
                self.carga=int(self.tiempo.group())
                self.datosEsperaDescarga()
                sleep(3)
                self.validarDescarga()
                if self.conteo==3:
                    pyautogui.press("enter")
                    self.conteo=0
            except:
                print("o  no ,se daño la descargar y no la e podido volver a iniciar")
                if self.daño==7:
                    os.system('rm -r /home/dd/zlibros/libros1920-1921/libro/*.*')         
                    raise
                self.daño+=1
                sleep(5)
Пример #14
0
class TruliaHelper():
    def __init__(self):
        self.url = 'https://www.trulia.com'
        # need to set Tor Browser path here.
        tbpath = "/home/gc14/Documents/softwares/tor-browser_en-US"
        self.driver = TorBrowserDriver(tbb_path=tbpath,
                                       tbb_logfile_path='test.log')
        # self.driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=binary)
        # self.driver = webdriver.Chrome(executable_path='../utility/chromedriver.exe', chrome_options=chrome_options)

    # method to get items from given link.
    def getItems(self):
        items = []
        # keywords = ['512 W 10th St Perris CA 92570', 'New York, NY', 'San Francisco, CA', 'Washington, CA']
        keywords = ['512 W 10th St Perris CA 92570'] * 2
        for keyword in keywords:
            self.driver.get(self.url)
            search_box = self.driver.find_element_by_id(
                "homepageSearchBoxTextInput")
            search_box.clear()
            search_box.send_keys(keyword)
            search_btn = self.driver.find_element_by_xpath(
                "//button[@data-auto-test-id='searchButton']")
            if search_btn:
                print("Going to click")
                search_btn.click()
                time.sleep(10)
                items.append(self.getItemDetail())

        self.driver.close()
        return items

    def getItemDetail(self):
        data = {}
        try:
            soup = BeautifulSoup(self.driver.page_source, u'html.parser')
            image = soup.find("div",
                              attrs={
                                  "class":
                                  "Tiles__TileBackground-fk0fs3-0 cSObNX"
                              }).find("img")["src"]
            price = soup.find(
                "div",
                attrs={
                    "class":
                    "Text__TextBase-sc-1cait9d-0-div Text__TextContainerBase-sc-1cait9d-1 hlvKRM"
                }).text
            # container = soup.find("div", attrs={"class": "resultsColumn"}).find("ul")
            # items = container.findAll("li", recursive=False)
            data.update({"image": image, "price": price})
        except:
            pass
        return data

    # method to write csv file
    def writeCSVFile(self, data):
        try:
            with open(
                    '/home/gc14/Documents/fiverr/custom_scrapers/home/trulia.csv',
                    mode='w') as csv_file:
                fieldnames = ['Image', 'Price']
                writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
                writer.writeheader()
                for d in data:
                    writer.writerow({'Image': d['image'], 'Price': d['price']})
                csv_file.close()
            print("File written successfully.")
        except:
            print(sys.exc_info())
            pass

    # method to start process.
    def start(self):
        items = self.getItems()
        print("Items : ", len(items))
        if items:
            self.writeCSVFile(items)