def open_url(): url = request.args['url'] url = utils.transform_url(url) _quit_app("Plex") _open_url_in_safari(url) session['msg'] = "Opened on TV! (You'll need to click Play, unfortunately)" return redirect(url_for('index'))
def download_js(parsed_data, folder, base_url): # find all js links = [sc["src"] for sc in parsed_data.find_all("script", src=True)] for link in links: filename = re.search(r'/([^/]+)$', link) link = transform_url(link, base_url) if not filename or link is None: continue response = requests.get(link) if response.ok: save_to_file(response.content, folder + filename.group(1))
def download_media(parsed_data, folder, base_url): # find all jpg, png, gif, svg links = set( [link['href'] for link in parsed_data.findAll('link', href=True)] + [img['src'] for img in parsed_data.find_all('img', src=True)]) for link in links: filename = re.search(r'/([\w_\-.]+[.](jpg|gif|png|jpeg|svg))$', link) link = transform_url(link, base_url) if not filename or link is None: continue response = requests.get(link) if response.ok: save_to_file(response.content, folder + filename.group(1))
def download_css(parsed_data, folder, base_url): # find all css links = [ link['href'] for link in parsed_data.findAll('link', href=True, rel="stylesheet") ] for link in links: filename = re.search(r'/([^/]+)$', link) link = transform_url(link, base_url) if not filename or link is None: continue response = requests.get(link) if response.ok: save_to_file(response.content, folder + filename.group(1))
def download_data_from_url(url, task_id, base_url=None, depth=1): folder = get_folder(task_id) if base_url is None: base_url = re.search(r'((https|http)://[\w_\-.]+)', url) if not base_url: raise requests.exceptions.InvalidURL( f"This is not a valid URL: {url}") base_url = base_url.group(1) response = requests.get(url) http_encoding = response.encoding if 'charset' in response.headers.get( 'content-type', '').lower() else None html_encoding = EncodingDetector.find_declared_encoding(response.content, is_html=True) encoding = html_encoding or http_encoding soup = BeautifulSoup(response.content, from_encoding=encoding) with open(folder + 'index.html', 'w', encoding='utf-8') as f: f.write(response.text) download_media(soup, folder, base_url) download_js(soup, folder, base_url) download_css(soup, folder, base_url) if depth > 0: links = set( map(lambda x: transform_url(x, base_url), find_another_urls(soup))) for i, link in enumerate( filter(lambda x: check_url(x, base_url), links)): try: download_data_from_url(link, "{0}/{1}".format(task_id, i), base_url=base_url, depth=depth - 1) except requests.exceptions.RequestException as e: logging.error( f'Exception occurred while request to {url}\n {e}') return folder
def eventsview(request): check_db() if len(Vendor.objects.all()) == 0: content = read_data("http://offthegridsf.com/vendors#food") vendors = parse_HTML(content) create_vendor_models(vendors) fb_data = read_data(facebookurl) if len(Event.objects.all()) > 0 and fb_data != "ERROR": event_data_ready = check_recent_fb_json(fb_data["data"]) create_event_models(event_data_ready) establish_relation() elif fb_data != "ERROR": print "WERE here" next_parameter = fb_data["paging"]["cursors"]["after"] next_set = transform_url(facebookurl, next_parameter) second = read_data(next_set) fulldata = fb_data["data"] + second["data"] events = check_less_thirty(fulldata) create_event_models(events) establish_relation() event_list = Event.objects.all().order_by("-event_time") context_dict = {"events": event_list} return render(request, "GingerBites/events.html", context_dict)
def test_transform_url(self): current_url = "https://graph.facebook.com/v2.2/Offthegridsf/events?date_form=U&fields=id,cover,name,description,start_time,location,end_time,going&access_token=CAACEdEose0cBAMzYv1MjCkWoEvkMNFZBLCB2a3MgufPZALEURuZAJ4FkKqgmNCpp1hKJvWHpuz9hp7G0bSWSDGmZBJsrnGn4QS6DlB7h7QUfdgjgdLoTHDHpPSD1LBs3cdaNdFYRZBt8tjs6ZCI5qz26uZCvjnIsZCScil8bhYH4LEVxZC6aOKjyOima4byMHVKetrOf40ZCN0sTPzgXvtPypQJZCRbCu5XrHgZD" after = "aftercode" new_url = transform_url(current_url, after) self.assertEqual(new_url,"https://graph.facebook.com/v2.2/Offthegridsf/events?date_form=U&fields=id,cover,name,description,start_time,location,end_time,going&limit=50&access_token=CAACEdEose0cBAMzYv1MjCkWoEvkMNFZBLCB2a3MgufPZALEURuZAJ4FkKqgmNCpp1hKJvWHpuz9hp7G0bSWSDGmZBJsrnGn4QS6DlB7h7QUfdgjgdLoTHDHpPSD1LBs3cdaNdFYRZBt8tjs6ZCI5qz26uZCvjnIsZCScil8bhYH4LEVxZC6aOKjyOima4byMHVKetrOf40ZCN0sTPzgXvtPypQJZCRbCu5XrHgZD" + "&after=aftercode")