示例#1
0
	def handle(self, *args, **options):


		# numero_videos = int(args[0]) if args else 999

		#Inicio
		url_inicial = 'http://www.eporner.com/category/hd1080p/'
		pagina_tube = PaginaTube.objects.get( nombre='eporner.com' )

		for i in range(0, 3):

			# Peticion y sopa
			if i == 0:
				peticion = requests.get( url_inicial )
			else:
				peticion = requests.get( url_inicial + '%s/' % str(i))
			sopa = BeautifulSoup( peticion.content, 'html.parser' )

			# Sacamos urls a videos
			videos_pagina = []

			for video in sopa.find_all('div',{'class':'mbhd'}):

				# comprovamos si el video existe
				url_video = 'http://www.eporner.com' + video.find('a').get('href')
				try:
					video = Video.objects.get( url_video = url_video )
					continue
				except Video.DoesNotExist:
					pass

				videos_pagina.append( url_video )


			# Sacamos datos del video 
			for url_video in videos_pagina:

				# Peticion y sopa
				peticion = requests.get( url_video )
				soup = BeautifulSoup( peticion.content, 'html.parser' )

				titulo =  soup.find('h1').getText()
				publicado = datetime.datetime.now()

				# Info de la tabla ( cast y tags )
				cast = []
				tags = []
				tabla = soup.find('td',{'id':'hd-p**n-tags'})
				for tr in tabla.find_all('tr'):
					
					# CAST
					if tr.strong.string == 'Pornstars:':
						for link in tr.find_all('a'):
							if 'pornstar' in link.get('href'):
								cast.append( link.string )
					# TAGS
					if tr.strong.string == 'Tags:':
						tags = [ t.string for t in tr.find_all('a') ]

				# cod iframe
				codigo_iframe = soup.find('div',{'class':'textare1'})
				codigo_iframe = BeautifulSoup(codigo_iframe.textarea.string, 'html.parser')
				codigo_iframe = codigo_iframe.iframe.get('src')

				# thumbnail
				url_thumbnail = soup.find_all('div',{'class':'cutscenesbox'})[5]
				url_thumbnail = url_thumbnail.a.get('href')

				# Descargamos el thumbnail
				request_img = requests.get(url_thumbnail, stream = True )
				thumbnail = None

				if request_img.status_code == 200:
					url_imagen = 'static/imagenes/eporner/%s.jpg' % trim(titulo)
					with open( url_imagen , 'wb') as f:
						for chunk in request_img:
							f.write(chunk)
					thumbnail = url_imagen

				# Sacamos previews
				previews = []

				for n in range(0,12):
					url_prev = soup.find_all('div',{'class':'cutscenesbox'})[n]
					url_prev = url_prev.a.get('href')

					prev_n = requests.get( url_prev, stream = True )
					if prev_n.status_code == 200:
						url_prev_n = 'static/imagenes/eporner/%s-thumb-%s.jpg' % ( trim(titulo), str(n) )
						with open( url_prev_n, 'wb') as f:
							for chunk in prev_n:
								f.write(chunk)
						previews.append(url_prev_n)

				# Guardamos el objeto
				subir_video(
					previews,
					cast,
					[],
					pagina_tube,
					tags,
					titulo,
					thumbnail,
					publicado,
					url_video,
					codigo_iframe,
				)
	def handle(self, *args, **options):

		# Variables
		url_base = 'http://www.porndoe.com'
		cookies = dict(__language="en")
		pagina_tube = PaginaTube.objects.get(nombre='porndoe.com')

		''' Loop this
			llegamos a los 1000 en el loop numero 38

		 '''
		for i in range(0, 100):

			# Info util
			print 'Pagina %s de 100 ...' % str(i)

			# Peticion y sopa
			if i == 0:
				peticion = requests.get( url_base, cookies=cookies )
			else:
				peticion = requests.get( url_base+'/?page=%s' % str(i+1),
					cookies=cookies )
			soup = BeautifulSoup( peticion.content, 'html.parser' )

			# Sacamos todos los vids de la pagina
			lista_videos = {}

			for video in soup.find_all('article',{'class':'video-item'}):

				# Miramos si el video es HD
				if video.find('span',{'class':'ico-hd'}):
					link = url_base + video.a.get('href')

					# Miramos si el video existe
					try:
						v = Video.objects.get( url_video = link )
						continue
					except Video.DoesNotExist:
						pass

					thumbnail = video.img.get('src')
					lista_videos[link] = thumbnail


			# Recorremos video a video
			for url_video, url_thumbnail in lista_videos.iteritems():

				# Peticion y sopa
				peticion = requests.get( url_video, cookies=cookies )
				soup = BeautifulSoup( peticion.content, 'html.parser' )

				# Definimos variables
				titulo = soup.h1.text
				publicado = datetime.datetime.now()

				# Codigo_iframe
				c = soup.find('div', {'id':'my-embed'}).input.get('value')
				codigo_iframe = BeautifulSoup(c,'html.parser').iframe.get('src')
				
				# pagina_pago
				pagina_pago = [ soup.find('div',{'class':'channel-about'}).a.get('title') ]
				
				# Casting
				casting = [ s.text for s in soup.find_all('span',{'class':'performer-name'}) ]
				try:
					casting.remove('Suggest performer')
				except:
					pass

				# Tags
				tags = []
				for p in soup.find_all('p',{'class','data-row'}):
					if 'Tags:' in p.getText():
						tags = [ a.get('title') for a in p.find_all('a') ]


				# descargamos el thumbnail
				thumbnail = None
				peticion_img = requests.get(url_thumbnail, stream=True)

				if peticion_img.status_code == 200:
					path_imagen = 'static/imagenes/porndoe/%s.jpg' % trim(url_thumbnail)
					thumbnail = path_imagen
					with open( path_imagen , 'wb') as f:
						for chunk in peticion_img:
							f.write(chunk)


				# Guardamos el objeto
				subir_video(
					casting,
					pagina_pago,
					pagina_tube,
					tags,
					titulo,
					thumbnail,
					publicado,
					url_video,
					codigo_iframe
				)				

		
		print 'FIN sandbox_porndoe_1'
    def handle(self, *args, **options):

        url = 'http://www.porndig.com/posts/load_more_posts'
        pagina_tube = PaginaTube.objects.get(nombre='porndig.com')

        # Loop de las peticiones
        '''
			sin llegar a 50 peticiones ya tenemos 1000 videos
		'''
        for i in range(0, 1):

            # DATA
            payload = {
                'main_category_id': 1,
                'type': 'post',
                'name': 'category_videos',
                'filters': {
                    'filter_type': 'date',
                    'filter_period': ''
                },
                'category_id': {
                    '': 882
                },
                'offset': i * 100 if i != 0 else 0
            }

            # Peticion y sopa
            peticion = requests.post(url, data=payload)
            respuesta = peticion.json()['data']['content']
            soup = BeautifulSoup(respuesta, 'html.parser')

            # Guardamos todos los videos HD
            lista_videos = {}

            for elemento in soup.find_all('div',
                                          {'class': 'video_item_wrapper'}):
                if 'icon-video_full_hd' in str(elemento):

                    link = 'http://www.porndig.com' + elemento.a.get('href')

                    #miramos si el video existe
                    try:
                        video = Video.objects.get(url_video=link)
                        continue
                    except Video.DoesNotExist:
                        pass

                    thumbnail = elemento.img.get('src').replace(
                        '320x180', '400x225')
                    lista_videos[link] = thumbnail

            # Recorremos todos los videos HD y los guardamos
            for url_video, url_thumbnail in lista_videos.iteritems():

                # Peticion y sopa
                peticion = requests.get(url_video)
                sopa = BeautifulSoup(peticion.content, 'html.parser')

                # Todos los datos del video
                titulo = sopa.h1.text
                casting = []
                publicado = sopa.find_all(
                    'div', {'class': 'video_class_value'})[3].text
                publicado = dateutil.parser.parse(publicado)

                # pagpago (si existe) y Tags
                pagina_pago, tags = [], []

                for elemento in sopa.find_all(
                        'div', {'class': 'video_description_item'}):
                    if 'Studio:' in elemento.getText():
                        pagina_pago = [elemento.a.text]

                    if 'Categories:' in elemento.getText():
                        tags = [a.text for a in elemento.find_all('a')]

                    if 'Pornstar(s)' in elemento.getText():
                        casting = [a.text for a in elemento.find_all('a')]

                codigo_iframe = sopa.find('div', {'class': 'js_video_embed'})
                codigo_iframe = codigo_iframe.textarea.iframe.get('src')

                # Intentamos sacar la pagina pago del Iframe
                if not pagina_pago:
                    try:
                        headers = {'referer': 'http://www.porndig.com'}
                        sopa_iframe = requests.get(codigo_iframe,
                                                   headers=headers)
                        sopa_iframe = BeautifulSoup(sopa_iframe.content,
                                                    'html.parser')
                        el = sopa_iframe.find(
                            'span',
                            {'id': 'producer_overlay_content_top_left_text'})
                        pagina_pago = [el.a.text]
                    except:
                        pass

                # # Descargamos el thumbnail
                request_img = requests.get(url_thumbnail, stream=True)
                thumbnail = None

                if request_img.status_code == 200:
                    url_imagen = 'static/imagenes/porndig/%s.jpg' % trim(
                        titulo)
                    with open(url_imagen, 'wb') as f:
                        for chunk in request_img:
                            f.write(chunk)
                    thumbnail = url_imagen

                # Sacamos preview
                previews = []

                for n in range(0, 21):
                    img = url_thumbnail
                    if '/' in url_thumbnail[-7:]:
                        img = url_thumbnail.replace(url_thumbnail[-7:],
                                                    '/%s.jpg' % str(n))
                    else:
                        img = url_thumbnail.replace(url_thumbnail[-8:],
                                                    '/%s.jpg' % str(n))

                    # intentamos cojer la imagen.
                    prev_n = requests.get(img, stream=True)
                    if prev_n.status_code == 200:
                        url_prev_n = 'static/imagenes/porndig/%s-thumb-%s.jpg' % (
                            trim(titulo), str(n))
                        with open(url_prev_n, 'wb') as f:
                            for chunk in prev_n:
                                f.write(chunk)
                        previews.append(url_prev_n)

                # Guardamos el video
                subir_video(previews, casting, pagina_pago, pagina_tube, tags,
                            titulo, thumbnail, publicado, url_video,
                            codigo_iframe)