def get_torrent_link_from_html(html): """Uses somewhat flimsy HTML parsing due to apparent lack of other options. Args: html (str): string representing html of upload response. Returns: str: (hopefully) direct link to torrent. """ html = HTML(html=html) user_id = html.search('var userid = {};')[0] authkey = html.search('var authkey = "{}";')[0] passkey = html.search("passkey={}&")[0] user_torrents = [ t for t in html.find('[id^=torrent_]') if t.search('user.php?id={}"')[0] == user_id ] user_torrents_ids_and_dates = [(t.attrs['id'].split('_')[1], pendulum.from_format( t.find('span')[0].attrs['title'], 'MMM DD YYYY, HH:mm')) for t in user_torrents] torrent_id, torrent_dt = max(user_torrents_ids_and_dates, key=lambda x: x[1]) assert (pendulum.now() - torrent_dt).in_minutes() < 2 return "https://awesome-hd.me/torrents.php?action=download&id={}&authkey={}&torrent_pass={}".format( torrent_id, authkey, passkey)
def get_program_info(program_url): logging.info(f"Fetching {program_url}") r = requests.get(program_url) r.raise_for_status() html = HTML(html=r.text) title = html.find("h1") # assume this is the title if not title: logging.warning(f"No title for {program_url}") return None program_info = {"url": program_url, "title": title[0].text} search_res = html.search("vtt: [['PT','{}','{}']]") if search_res: program_info["vtt_url"] = search_res[1] return program_info
import json with open('settings.json') as json_data: d = json.load(json_data) WAT_STR = 'https://s1.wcy.wat.edu.pl/ed/' ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE r = urllib.request.urlopen('https://s1.wcy.wat.edu.pl/ed/', context=ctx) data = r.read().decode('windows-1252') html = HTML(html=data) sid = html.search('sid={}>')[0] STR = "https://s1.wcy.wat.edu.pl/ed/logged_inc.php?sid=" + sid + "&mid=328&iid=20175&exv=" + d[ 'group'] + "&pos=267.75&rdo=1&t=6801377" data = [ ("sid", sid), ('formname', 'login'), ('default_fun', '1'), ('userid', d["login"]), ('password', d["password"]), ('view_height', '1080'), ('view_width', '1920'), ] #req = urllib.request.Request('https://s1.wcy.wat.edu.pl/ed/index.php?sid='+sid) #req.add_header('Content-Type', 'application/x-www-form-urlencoded')