Пример #1
0
    def test_adfly(self):
        self.assertEqual(unshortenit.unshorten_only('http://adf.ly/WzXu2'),
                         ('http://www39.zippyshare.com/v/69303767/file.html', 200))
        self.assertEqual(unshortenit.unshorten_only('http://adf.ly/1icWR'),
                         ('http://adf.ly/1icWR', 'No ysmm variable found'))
        self.assertEqual(unshortenit.unshorten_only('http://links.devitrianto.com/yy', type='adfly'),
                         ('http://www.sendspace.com/file/a2z6ji', 200))

        # File has been DMCA removed, so if we allow the 301 HEAD request to resolve, it returns a different URL.
        # Also - sidenote: Pirated files in your unit tests? Really?
        self.assertEqual(unshortenit.unshorten_only('http://adf.ly/bJ8mm'),
                         ('http://www.mediafire.com/download/cixal2y0auya19m/com.ratrodstudio.skateparty2.zip', 200))
Пример #2
0
    def test_adfly(self):
        self.assertEqual(
            unshortenit.unshorten_only('http://adf.ly/WzXu2'),
            ('http://www39.zippyshare.com/v/69303767/file.html', 200))
        self.assertEqual(unshortenit.unshorten_only('http://adf.ly/1icWR'),
                         ('http://adf.ly/1icWR', 'No ysmm variable found'))
        self.assertEqual(
            unshortenit.unshorten_only('http://links.devitrianto.com/yy',
                                       type='adfly'),
            ('http://www.sendspace.com/file/a2z6ji', 200))

        # File has been DMCA removed, so if we allow the 301 HEAD request to resolve, it returns a different URL.
        # Also - sidenote: Pirated files in your unit tests? Really?
        self.assertEqual(unshortenit.unshorten_only('http://adf.ly/bJ8mm'), (
            'http://www.mediafire.com/download/cixal2y0auya19m/com.ratrodstudio.skateparty2.zip',
            200))
Пример #3
0
def cleanUrl(url):
    # F*****g tumblr redirects.
    if url.startswith("https://www.tumblr.com/login"):
        return None

    url, status = unshortenit.unshorten_only(url)
    #assert (status == 200)

    return url
Пример #4
0
def cleanUrl(url):
	# F*****g tumblr redirects.
	if url.startswith("https://www.tumblr.com/login"):
		return None

	url, status = unshortenit.unshorten_only(url)
	#assert (status == 200)

	return url
#!/bin/python
# -*- coding: utf-8 -*- 

import requests
import json
import sys
from bs4 import BeautifulSoup
import unshortenit

# Carregando JSON
with open("links.json") as infile:
    data = json.load(infile)

for post in data:
    soup = BeautifulSoup(requests.get(post['url']).text, "lxml")
    for post_body in soup.find_all("div", class_="post-body"):

        # Pegando link de download e desencurtando (se necessário)
        for anchor in post_body.find_all('a', href=True):
            if ("adf.ly" in anchor['href']):
                post["download_link"] = unshortenit.unshorten_only(anchor['href'])[0]
                break
            elif (("cloud.mail.ru" in anchor['href']) or ("mega.nz" in anchor['href'])):
                post["download_link"] = anchor['href']
                break

        # Printando estado atual das coisas
        print(json.dumps(post, sort_keys=True, indent=4))

with open('downloads.json', 'w') as fp:
    json.dump(data, fp)