def __get_youtube_id_fallback(self, track, cancellable): """ Get youtube id (fallback) @param track as Track @param cancellable as Gio.Cancellable @return youtube id as str """ try: from beautifulsoup4 import BeautifulSoup except: print("$ sudo pip3 install beautifulsoup4") return None try: unescaped = "%s %s" % (track.artists[0], track.name) search = GLib.uri_escape_string(unescaped.replace(" ", "+"), None, True) uri = "https://www.youtube.com/results?search_query=%s" % search (status, data) = App().task_helper.load_uri_content_sync(uri, cancellable) if not status: return None html = data.decode("utf-8") soup = BeautifulSoup(html, "html.parser") ytems = [] for link in soup.findAll("a"): href = link.get("href") title = link.get("title") if href is None or title is None: continue if href.startswith("/watch?v="): href = href.replace("/watch?v=", "") ytems.append((href, title)) dic = {} best = self.__BAD_SCORE for (yid, title) in ytems: score = self.__get_youtube_score(title, track.name, track.artists[0], track.album.name) if score < best: best = score elif score == best: continue # Keep first result dic[score] = yid # Return url from first dic item if best == self.__BAD_SCORE: return None else: return dic[best] except Exception as e: Logger.warning("YouTubeHelper::__get_youtube_id_fallback(): %s", e) self.__fallback = True return None
from selenium import webdriver from beautifulsoup4 import BeautifulSoup import pandas as pd driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver") products = [] prices = [] ratings = [] driver.get( "https://www.flipkart.com/gaming-laptops-store?otracker=nmenu_sub_Electronics_0_Gaming%20Laptops&otracker=nmenu_sub_Electronics_0_Gaming%20Laptops" ) content = driver.page_source soup = BeautifulSoup(content) for a in soup.findAll('a', href=True, attrs={'class': '_2cLu-l'}): name = a.find('div', attrs={'class': '_2cLu-l'}) price = a.find('div', attrs={'class': '_1vC4OE'}) ratings = a.find('div', attrs={'class': 'hGSR34'}) products.append(name.text) price.append(price.text) ratings.append(rating.text)
toi_headings = toi_soup.find_all('h2') toi_headings = toi_headings[0:-13] # removing footers toi_news = [] for th in toi_headings: if len(th.text) < 25: continue toi_news.append(th.text) #Getting news from Hindustan times ht_r = requests.get("https://www.hindustantimes.com/india-news/") ht_soup = BeautifulSoup(ht_r.content, 'html5lib') ht_headings = ht_soup.findAll("div", {"class": "headingfour"}) ht_headings = ht_headings[2:] ht_news = [] for hth in ht_headings: if len(hth.text) < 25: continue ht_news.append(hth.text) #getting news From The Hindu main_url = " https://newsapi.org/v1/articles?source=the-hindu&sortBy=top&apiKey=98cb343e477940f181f5b38bdb9e3f9d" # fetching data in json format open_th_page = requests.get(main_url).json() # getting all articles in a string article