def get_most_read(key): """ Gets the most read news from a given page :param key: the key of the source page (e.g: g1) :return: a list with the most read news from the page and the name of news source """ ns = get_ns(key) strategy = strategies[key] return strategy(ns.url), ns.name
def get_most_read(key): """ Gets the most read news from a given page :param key: the key of the source page (e.g: g1) :return: a list with the most read news from the page and the name of news source """ ns = get_ns(key) response, content = getpage(ns.url) # Download the page soup = parsepage(content) # Then we parse it strategy = strategies[key] # Then we execute the selected Strategy based on the source return strategy(soup), ns.name
def change_location(): """ Changes the user Location :return: a JSON file with the name of the regional news source based on the new location """ location = request.args.get("location") ns_local = get_ns(location) ns_name = ns_local.name d = dict(ns_name=ns_name) j = json.dumps(d) return j
def change_location(): """ Changes the user Location :return: a JSON file with the name of the regional news source based on the new location """ location = request.args.get('location') ns_local = get_ns(location) ns_name = ns_local.name d = dict(ns_name=ns_name) j = json.dumps(d) return j
def __local_ce(soup): """ Gets the most read news from Ceará Local News (Diário do Nordeste) :param soup: the BeautifulSoup object :return: a list with the most read news from Diário do Nordeste page """ news = [] ns = get_ns('localCE') anchors = soup.find('section', id='mais-lidas').find_all('a') for a in anchors: title = a.string link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __local_ba(soup): """ Gets the most read news from Bahia Local News (A Tarde) :param soup: the BeautifulSoup object :return: a list with the most read news from A Tarde page """ news = [] ns = get_ns('localBA') anchors = soup.find('section', class_='maisLidas row').find_all('a') for a in anchors: title = a.p.string link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __local_se(soup): """ Gets the most read news from Sergipe Local News (Jornal do Dia) :param soup: the BeautifulSoup object :return: a list with the most read news from Jornal do Dia page """ news = [] ns = get_ns('localSE') anchors = soup.find('div', class_='coluna3 bordaTopoCinza').find_all('a') for a in anchors: title = a.string link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __tec_canal(soup): """ DEPRECATED Gets the most read news from the Canaltech page :param soup: the BeautifulSoup object :return: a list with the most read news from the Canaltech page """ news = [] ns = get_ns('tec_canal') articles = soup.find('section', id='most-readed').find_all('article') for article in articles: title = article.a.text link = article.a['href'] news.append(dict(title=title, link=link)) return news
def __local_mt(soup): """ Gets the most read news from Mato Grosso Local News (Gazeta Digital) :param soup: the BeautifulSoup object :return: a list with the most read news from Gazeta Digital page """ news = [] ns = get_ns('localMT') anchors = soup.find_all('a', class_='top10titulo') for a in anchors: title = a.string link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __tec_conv(soup): """ Gets the most read news from the Convergencia Digital page :param soup: the BeautifulSoup object :return: a list with the most read news from the Convergencia Digital page """ news = [] ns = get_ns('tec_conv') anchors = soup.find('div', id='slideshow').find_all('a') for a in anchors: if a.h1: title = a.h1.string link = 'http://www.convergenciadigital.com.br' + a['href'] news.append(dict(title=title, link=link)) return news
def __e_gp(soup): """ Gets the most read news from the Grande Premio page :param soup: the BeautifulSoup object :return: a list with the most read news from the Grande Premio page """ news = [] ns = get_ns('e_gp') anchors = soup.find('div', class_='mhv-mais-lida-posicao').parent.find_all('a') for a in anchors: title = a.string link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __e_placar(soup): """ Gets the most read news from the Placar page :param soup: the BeautifulSoup object :return: a list with the most read news from the Placar page """ news = [] ns = get_ns('e_placar') anchors = soup.find('div', class_='mais-lidas box right').find_all('a') for a in anchors: title = a.find('h1').string + ': ' + a.find('h2').string link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __e_fox_br(soup): """ Gets the most read news from the Fox Sports page :param soup: the BeautifulSoup object :return: a list with the most read news from the Fox Sports page """ news = [] ns = get_ns('e_fox_br') anchors = soup.find('section', class_='home-section most-viewed').find_all('a') for a in anchors: title = a.h3.string link = a['href'] news.append(dict(title=title, link=link)) return news
def __e_espn_br(soup): """ Gets the most read news from the ESPN.com.br page :param soup: the BeautifulSoup object :return: a list with the most read news from the ESPN page """ news = [] ns = get_ns('e_espn_br') anchors = soup.find('div', id='most_viewed-all').find_all('a') for a in anchors: title = a.string link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __en_tititi(soup): """ Gets the most read news from the Tititi page :param soup: the BeautifulSoup object :return: a list with the most read news from the Tititi page """ news = [] ns = get_ns('en_tititi') anchors = soup.find('div', class_='box mais-lidas clearfix').find_all('a') for a in anchors: title = a.p.string link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __e_gp(soup): """ Gets the most read news from the Grande Premio page :param soup: the BeautifulSoup object :return: a list with the most read news from the Grande Premio page """ news = [] ns = get_ns('e_gp') divs = soup.find_all('div', class_='bloco-trending') for div in divs: link = ns.url + div.parent['href'] title = div.find('div', class_='titulo').string news.append(dict(title=title, link=link)) return news
def __tt(soup): """ Gets the most read news from The Telegraph page :param soup: the BeautifulSoup object :return: a list with the most read news from the The Telegraph page """ news = [] ns = get_ns('tt') headers = soup.find('div', class_='mostViewedList').find_all('h3', class_='list-of-entities__item-body-headline') for h in headers: a = h.find('a') link = ns.url + a['href'] title = a.text news.append(dict(title=title, link=link)) return news
def __en_ego(soup): """ Gets the most read news from the Ego page :param soup: the BeautifulSoup object :return: a list with the most read news from the Ego page """ news = [] ns = get_ns('en_ego') anchors = soup.find('div', class_='widget mais-lidas').find_all('a') for a in anchors: title = a['title'] link = a['href'] news.append(dict(title=title, link=link)) return news
def __en_contigo(soup): """ Gets the most read news from the Contigo page :param soup: the BeautifulSoup object :return: a list with the most read news from the Contigo page """ # DEPRECATED news = [] ns = get_ns('en_contigo') anchors = soup.find('div', class_='box mais-lidas clearfix').find_all('a') for a in anchors: title = a.p.string link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __m_rockbizz(soup): """ Gets the most read news from the Rock Bizz page :param soup: the BeautifulSoup object :return: a list with the most read news from the Rock Bizz page """ news = [] ns = get_ns('m_bizz') main = soup.find('main', id='main') articles = main.find_all('article') for article in articles: title = article.a.string link = article.a['href'] news.append(dict(title=title, link=link)) return news
def __local_pr(soup): """ Gets the most read news from Paraná Local News (Paraná Online) :param soup: the BeautifulSoup object :return: a list with the most read news from Paraná Online page """ news = [] ns = get_ns('localPR') anchors = soup.find( 'div', class_='tbn-coluna col-4 c-mais-lidas-comentadas c-sequencia' ).find_all('a') for a in anchors: title = a.string link = a['href'] news.append(dict(title=title, link=link)) return news
def __e_lance(soup): """ Gets the most read news from the Lance.com.br page :param soup: the BeautifulSoup object :return: a list with the most read news from the Lance.com.br page """ news = [] ns = get_ns('e_lance') anchors = soup.find('div', class_='most-read').find_all('a') for a in anchors: title = a.string if title is None: continue link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __e_fox_br(soup): """ Gets the most read news from the Fox Sports page :param soup: the BeautifulSoup object :return: a list with the most read news from the Fox Sports page """ news = [] ns = get_ns('e_fox_br') anchors = soup.find('section', class_='home-section most-viewed').find_all('a') for a in anchors: if a.h3: title = a.h3.string link = a['href'] news.append(dict(title=title, link=link)) return news
def get_state(): """ Gets the user Local News Source based on their IP Address :return: a JSON file with the user State and Local News Source """ state = getstate() # DEBUG - Change here to simulate different locations. Comment otherwise. # state = 'TO' if state == 'notfound': ns_title = '' else: ns_local = get_ns('local' + state) ns_title = ns_local.name + '*' d = dict(state=state, ns_title=ns_title) j = json.dumps(d) return j
def __local_rr(soup): """ Gets the most read news from Roraima Local News (Folha de Boa Vista) :param soup: the BeautifulSoup object :return: a list with the most read news from Folha de Boa Vista page """ news = [] ns = get_ns('localRR') divs = soup.find('div', class_='mais-lidas').find_all('div', class_="ultimas-text") for div in divs: a = div.find('a') title = a.string link = ns.url + a['href'] news.append(dict(title=title, link=link)) return news
def __local_al(soup): """ Gets the most read news from Alagoas Local News (Cada Minuto) :param soup: the BeautifulSoup object :return: a list with the most read news from Cada Minuto page """ news = [] ns = get_ns('localAL') divs = soup.find_all('div', class_='card-news-small') # Incrementer, we only need 4 hits i = 0 for div in divs: title = div.find('span', class_='card-news__title') news.append( dict(title=title.string, link=ns.url + title.parent['href'])) i += 1 if i == 4: break return news
def __m_whiplash(soup): """ Gets the most read news from the Whiplash page :param soup: the BeautifulSoup object :return: a list with the most read news from the Whiplash page """ news = [] ns = get_ns('m_whiplash') anchors = soup.find('table', class_='tabela').find_all('a') i = 0; for a in anchors: title = a.string link = a['href'] news.append(dict(title=title, link=link)) i += 1 if i == 10: break return news
def __m_whiplash(soup): """ Gets the most read news from the Whiplash page :param soup: the BeautifulSoup object :return: a list with the most read news from the Whiplash page """ news = [] ns = get_ns('m_whiplash') paragraphs = soup.find_all('p', class_='linkManchete') i = 0; for p in paragraphs: title = p.string link = p.a['href'] news.append(dict(title=title, link=link)) i += 1 if i == 10: break return news
def get_state(): """ Gets the user Local News Source based on their IP Address :return: a JSON file with the user State and Local News Source """ ip = request.args.get("ip") state = getstate(ip) # DEBUG - Change here to simulate different locations. Comment otherwise. # state = 'TO' if state == "notfound": ns_title = "" else: ns_local = get_ns("local" + state) ns_title = ns_local.name + "*" d = dict(state=state, ns_title=ns_title) j = json.dumps(d) return j