def get_value_from(url): soup = mksoup(requests.get(BASE + url).text) return str(soup.find_all("input")[-1]).split("\"")[-2]
return with open('ultBlockList.tmp.gz', 'wb') as out: while True: data = handle.read(1024) if len(data) == 0: break out.write(data) with gzip.open('ultBlockList.tmp.gz') as contents: with open("blocklist.txt", "a+") as f: for line in contents: f.write(line) os.remove('ultBlockList.tmp.gz') if __name__ == "__main__": print("Getting list page") soup = mksoup(requests.get("https://www.iblocklist.com/lists.php").text) links = {} #dict of name of list -> its url for row in soup.find_all("tr")[1:]: #for each table row section = str(list(row.children)[0]) pieces = section.split("\"") links[pieces[2].split("<")[0][1:]] = pieces[1] for link in links: #download and combine files print "Downloading " + link + " blocklist." value = get_value_from(links[link]) if value == "subscription": print "Blocklist is not available for free download D:" elif value == "unavailable": print "URL is unavailable" else: #download and add this sucker process(value)
return with open('ultBlockList.tmp.gz', 'wb') as out: while True: data = handle.read(1024) if len(data) == 0: break out.write(data) contents = gzip.GzipFile('ultBlockList.tmp.gz') f = open("blocklist.txt", "a+")#TODO add check for if it exists for line in contents: f.write(line) f.close() os.remove('ultBlockList.tmp.gz') if __name__=="__main__": print("Getting list page") soup = mksoup(requests.get("https://www.iblocklist.com/lists.php").text) links = {}#dict of name of list -> its url for row in soup.find_all("tr")[1:]:#for each table row section = str(list(row.children)[0]) pieces = section.split("\"") links[pieces[2].split("<")[0][1:]] = pieces[1] for link in links:#download and combine files print "Downloading " + link + " blocklist." value = get_value_from(links[link]) if value == "subscription": print "Blocklist is not available for free download D:" elif value == "unavailable": print "URL is unavailable" else:#download and add this sucker process(value)