def test_adfly(self): self.assertEqual(unshortenit.unshorten_only('http://adf.ly/WzXu2'), ('http://www39.zippyshare.com/v/69303767/file.html', 200)) self.assertEqual(unshortenit.unshorten_only('http://adf.ly/1icWR'), ('http://adf.ly/1icWR', 'No ysmm variable found')) self.assertEqual(unshortenit.unshorten_only('http://links.devitrianto.com/yy', type='adfly'), ('http://www.sendspace.com/file/a2z6ji', 200)) # File has been DMCA removed, so if we allow the 301 HEAD request to resolve, it returns a different URL. # Also - sidenote: Pirated files in your unit tests? Really? self.assertEqual(unshortenit.unshorten_only('http://adf.ly/bJ8mm'), ('http://www.mediafire.com/download/cixal2y0auya19m/com.ratrodstudio.skateparty2.zip', 200))
def test_adfly(self): self.assertEqual( unshortenit.unshorten_only('http://adf.ly/WzXu2'), ('http://www39.zippyshare.com/v/69303767/file.html', 200)) self.assertEqual(unshortenit.unshorten_only('http://adf.ly/1icWR'), ('http://adf.ly/1icWR', 'No ysmm variable found')) self.assertEqual( unshortenit.unshorten_only('http://links.devitrianto.com/yy', type='adfly'), ('http://www.sendspace.com/file/a2z6ji', 200)) # File has been DMCA removed, so if we allow the 301 HEAD request to resolve, it returns a different URL. # Also - sidenote: Pirated files in your unit tests? Really? self.assertEqual(unshortenit.unshorten_only('http://adf.ly/bJ8mm'), ( 'http://www.mediafire.com/download/cixal2y0auya19m/com.ratrodstudio.skateparty2.zip', 200))
def cleanUrl(url): # F*****g tumblr redirects. if url.startswith("https://www.tumblr.com/login"): return None url, status = unshortenit.unshorten_only(url) #assert (status == 200) return url
#!/bin/python # -*- coding: utf-8 -*- import requests import json import sys from bs4 import BeautifulSoup import unshortenit # Carregando JSON with open("links.json") as infile: data = json.load(infile) for post in data: soup = BeautifulSoup(requests.get(post['url']).text, "lxml") for post_body in soup.find_all("div", class_="post-body"): # Pegando link de download e desencurtando (se necessário) for anchor in post_body.find_all('a', href=True): if ("adf.ly" in anchor['href']): post["download_link"] = unshortenit.unshorten_only(anchor['href'])[0] break elif (("cloud.mail.ru" in anchor['href']) or ("mega.nz" in anchor['href'])): post["download_link"] = anchor['href'] break # Printando estado atual das coisas print(json.dumps(post, sort_keys=True, indent=4)) with open('downloads.json', 'w') as fp: json.dump(data, fp)