def parse_attr(self, response): #####TOMA LA FECHA##### fecha = response.css('time.published ::text').get().strip() #####BÚSCA LA PALABRA DOWNLOAD##### link = response.xpath('//*/div[28]/b/span/a/@href').extract_first() if link is None: link = response.css('div > b > span > a ::attr(href)').get() if link is None: link = response.xpath( '//*/div[2]/div/div[13]/a/@href').extract_first() prev_inf = link #####IMPRIME INFORMACIÓN##### imprime_datos(response.meta['titulo'], fecha, response.meta['cantante'], response.meta['album'], response.meta['referer'], prev_inf) #####INSERTA EN BD##### self.inserta_BD(response, fecha, prev_inf) #####LLAMA AL REFERER##### infringing = self.Abre_pag(prev_inf) imprime_datos(response.meta['titulo'], fecha, response.meta['cantante'], response.meta['album'], response.meta['referer'], infringing) #####INSERTA EN BD##### self.inserta_BD(response, fecha, infringing)
def parse_attr2(self, response): #print('ENTRA') infringing = response.css('a.download-mp3-url ::attr(href)').get() #print(infringing[-3::]) if infringing[-3::] == 'mp3': if veri(infringing) == True: imprime_datos(response.meta['Titulo'], response.meta['Fecha'], response.meta['Cantante'], response.meta['Album'], response.meta['referer'], infringing)
def parse_attr(self, response): if response.css('a.btn-dl'): infringing = response.css('a.btn-dl ::attr(href)').get() imprime_datos(response.meta['titulo'], response.meta['fecha'], response.meta['cantante'], response.meta['album'], response.meta['referer'], infringing) #####INSERTA EN BD##### if veri(infringing) == True: if self.c.existe_inf(infringing, self.id_domin) == False: self.c.inserta_item(response.meta['titulo'], response.meta['cantante'], response.meta['album'], response.meta['referer'], infringing, response.meta['fecha'], self.id_domin)
def parse_attr(self, response): fecha = response.css('div.post-body.entry-content > div > div.post-info-icon.tanggal > span ::text').get().strip() prev_inf = response.xpath('//*[@class="post-body entry-content"]/div[2]/div[3]/a/@href').get() infringing = open_adfly(prev_inf,'span[id*="skip_button"]') imprime_datos(response.meta['titulo'], fecha, response.meta['cantante'], response.meta['album'], response.meta['referer'], infringing) #####INSERTA EN BD##### if self.c.existe_inf(infringing, self.id_domin) == False: if infringing is not None: #if veri(infringing) == True: self.c.inserta_item(response.meta['titulo'], response.meta['cantante'], response.meta['album'], response.meta['referer'], infringing, fecha, self.id_domin) #####INSERTA EN BD##### if self.c.existe_inf(prev_inf, self.id_domin) == False: #if veri(prev_inf) == True: self.c.inserta_item(response.meta['titulo'], response.meta['cantante'], response.meta['album'], response.meta['referer'], prev_inf, fecha, self.id_domin)
def parse_attr2(self, response): referer = response.url infringing = response.css( 'div.post-body.entry-content > center > table > tbody > tr > td > center > a ::attr(href)' ).get() imprime_datos(response.meta['titulo'], response.meta['fecha'], response.meta['cantante'], response.meta['album'], referer, infringing) #####INSERTA EN BD##### if veri(infringing) == True: if self.c.existe_inf(infringing, self.id_domin) == False: self.c.inserta_item(response.meta['titulo'], response.meta['cantante'], response.meta['album'], referer, infringing, response.meta['fecha'], self.id_domin)
def parse_attr(self, response): Referer = response.url Artista = response.xpath( '/html/body/div/div[2]/div/div[2]/span[2]/text()').extract_first() Album = response.xpath( '/html/body/div/div[2]/div/div[2]/span[4]/text()').extract_first() Fecha = response.xpath( '/html/body/div/div[2]/div/div[2]/span[6]/text()').extract_first() Infringing = response.css( 'div#download-btn-div :nth-child(4) ::attr(onclick)').get() Infringing = separa(Infringing, '"', 1) for tr in response.css('tbody > tr :nth-child(1)'): Cancion = tr.css('::text').get() #####IMPRIME INFORMACIÓN##### imprime_datos(Cancion, Fecha, Artista, Album, Referer, Infringing) #####INSERTA EN BD##### if self.c.existe_inf(Infringing, self.id_domin) == False: if veri(Infringing) == True: self.c.inserta_item(Cancion, Artista, Album, Referer, Infringing, Fecha, self.id_domin)
def extrae_categoria(driver): global pag next_page = 1 while next_page: print( "#################################### Página {} ####################################" .format(pag)) for a in driver.find_elements_by_css_selector( 'div#blog-entries > article h2 > a'): titulo = a.get_attribute('title').replace('(FLAC)', '').replace('(Mp3)', '') referer = a.get_attribute('href') cantante, album = separa_titulo(titulo, '–') time.sleep(2) driver.execute_script("window.open(arguments[0]);", referer) driver.switch_to.window(driver.window_handles[2]) ref_inf = extrae_infringing(driver) if ref_inf: for inf in ref_inf: mega_link = get_mega(inf) if mega_link != False: if mega_link.find('mega') != -1: if c.existe_inf(mega_link) == False: imprime_datos(titulo, fecha, cantante, album, referer, mega_link) c.inserta_item(titulo, cantante, album, referer, mega_link, fecha, id_domin) v.muestra_item_guardado(titulo) close_taps(driver, 1) try: next_page = driver.find_element_by_css_selector( 'a.next.page-numbers') next_page.click() pag += 1 except: print('Ocurrió un error al cambiar de página') break close_taps(driver, 0)
def parse_attr(self, response): titulo = response.css('h1 > a ::text').get() cantante, album = separa_titulo(titulo, '-') fecha = response.css('time > a ::text').get() fecha = strip_spaces(fecha) fecha = separa(fecha, '-', 0) infringing = response.css( 'div.post__content > p > a ::attr(href)').get() try: if infringing.find('images') > 0: infringing = response.xpath( '//*[@id="post"]/div[2]/p/a[2]/@href').get() if infringing is not None or infringing.find('megaupload') > 0: if veri(infringing) == True: imprime_datos(titulo, fecha, cantante, album, response.meta['referer'], infringing) if self.c.existe_inf(infringing, self.id_domin) == False: self.c.inserta_item(titulo, cantante, album, response.meta['referer'], infringing, fecha, self.id_domin) except: pass
#####ESPERA A QUE CARGUE LA PÁGINA##### WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "h2 > a"))) #####GUARDA LA PÁGINA PRINCIPAL##### main_window = driver.current_window_handle #####TOMA EL HREF DE LA PÁGINA SIGUIENTE##### next_page = driver.find_element_by_css_selector('a.sa.sa-nextpage.tip').get_attribute('href') #####RECORRE TODAS LAS PÁGINAS##### while next_page is not None: #####TOMA LOS DATOS##### for a in driver.find_elements_by_css_selector("h2 > a"): referer = a.get_attribute('href') titulo = a.find_element_by_css_selector('span').text cantante, album = separa_titulo(titulo, '-') fecha = date.today().strftime("%B %d, %Y") #####ABRE UNA NUEVA PESTAÑA##### driver.execute_script("window.open(arguments[0]);", referer) driver.switch_to.window(driver.window_handles[1]) infringing = driver.find_element_by_xpath('//*[@id="shell"]/section/div[1]/div[2]/article/section/center/a').get_attribute('href') imprime_datos(titulo, fecha, cantante, album, referer, infringing) if c.existe_inf(infringing, id_domin) == False: if veri(infringing) == True: c.inserta_item(titulo, cantante, album, referer, infringing, fecha, id_domin) #####CIERRA LA PESTAÑA##### driver.close() #####CAMBIA A LA PÁGINA PRINCIPAL##### driver.switch_to.window(main_window) #####ABRE LA SIGUIENTE PÁGINA##### driver.get(next_page) #####TOMA EL HREF DE LA PÁGINA SIGUIENTE##### next_page = driver.find_element_by_css_selector('a.sa.sa-nextpage.tip').get_attribute('href') driver.quit()