def wrap(*args, **kwargs): if args: raise PositionalArgumentsNotSupported() data = urlencode(kwargs) if PY3: data = bytes(data, 'utf-8') req = Req(url, data=data) try: res = urlopen(req).read().decode() except HTTPError as e: raise SmappiServerError('%s (for "%s")' % (e, url)) except URLError as e: raise SmappiServerError('%s for %s' % (e.args[0], url)) if self._fmt == 'json': res = json.loads(res) if isinstance(res, dict) and 'error' in res: error = res['error'] if isinstance(error, dict): message = error.pop('message', '') if 'code' in error: message += ' (code: %s)' % error['code'] raise SmappiAPIError(message, **error) else: raise SmappiAPIError(error) return res
def getMostBoughtData(): my_url = 'https://finance.yahoo.com/u/yahoo-finance/watchlists/most-bought-by-hedge-funds/' # saves the information from the url into the client Client = Req(my_url) # saves page_hmtl = Client.read() Client.close() # parses the html of the website page_soup = soup(page_hmtl, "html.parser") # finds all the parts of the webpage that would hold the titles symbols = page_soup.findAll("h2", {"class": "Fz(m)"}) tickers = page_soup.findAll("a", {"class": "Fw(b)"}) prices_html = page_soup.findAll( "td", {"class": "data-col2 Ta(end) Pstart(10px) Pend(6px) Fw(b)"}) changes_html = page_soup.findAll( "td", {"class": "data-col4 Ta(end) Pstart(10px) Pend(6px)"}) volumes_html = page_soup.findAll( "td", {"class": "data-col6 Ta(end) Pstart(10px) Pend(6px)"}) avg_volumes_html = page_soup.findAll( "td", {"class": "data-col7 Ta(end) Pstart(10px) Pend(6px)"}) sym = int(symbols[1].text[0:2]) x = 0 data = [] while x < 48 - 1: if len(tickers[x].text ) > 5 or tickers[x].text == "Tech" or tickers[x].text == "News": x = x + 1 else: break y = x while x < y + sym: ticker = tickers[x].text price = prices_html[x - (y + sym)].text change_str = changes_html[x - (y + sym)].text if change_str.startswith('+'): change = change_str[1:] else: change = change_str volume = volumes_html[x - (y + sym)].text avg_volume = avg_volumes_html[x - (y + sym)].text data.append((ticker, price, change, volume, avg_volume)) x = x + 1 return data
def get_insider_trading_data(): my_url = 'http://openinsider.com/screener?s=&o=&pl=&ph=&ll=&lh=&fd=730&fdr=&td=0&tdr=&fdlyl=&fdlyh=&daysago=&xp=1&vl=&vh=&ocl=&och=&sic1=-1&sicl=100&sich=9999&grp=0&nfl=&nfh=&nil=&nih=&nol=&noh=&v2l=&v2h=&oc2l=&oc2h=&sortcol=0&cnt=1000&page=1' Client = Req(my_url) page_html = Client.read() Client.close() page_soup = soup(page_html, "html.parser") # finds the ticker locations using this method findsT = page_soup.findAll("a",{"onmouseout":"UnTip()"}) # finds the title of the person that was insider trading findsTi = page_soup.findAll("td") # finds the title of the person trading on the inside findsNum = page_soup.findAll("td",{"align":"right"}) x = 0 # variable for the title y = 22 # finds the price of the stock when bought z = 23 # finds the quantity of stocks purchased w = 24 # finds the number of stocks already owned a = 72 # finds the title of the person insider trading data = [] while x < 1000: findT = findsT[x] findT = findT["href"].replace("/", "") findP = findsNum[y].text findQ = findsNum[z].text findO = findsNum[w].text findTi = findsTi[a].text data.append((findT, findTi, findP, findQ, findO)) x += 1 y += 12 z += 12 w += 12 a += 17 return data
import bs4 from urllib.request import urlopen as Req from bs4 import BeautifulSoup as soup my_url = 'https://www.newegg.com/Video-Cards-Video-Devices/Category/ID-38?Tpk=graphics%20card' uclient = Req(my_url) page_html = uclient.read() uclient.close() page_soup = soup(page_html, "html.parser") containers = page_soup.findAll("div", {"class": "item-container"}) filename = "products_newegg.csv" f = open(filename, "w") headers = "Brand, Product_name\n" f.write(headers) for container in containers: brand = container.div.div.a.img["title"] title_container = container.findAll("a", {"class": "item-title"}) product_name = title_container[0].text print("Brand: " + brand) print("Product Name: " + product_name) f.write(brand + "," + product_name.replace(",", "|") + "\n")
from bs4 import BeautifulSoup as soup from urllib.request import urlopen as Req url = "https://www.flipkart.com/search?q=iphone&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off" Client = Req(url) page_html = Client.read() page_soup = soup(page_html, "html.parser") # print(page_soup.contents) containers = page_soup.findAll("div", {"class": "_3O0U0u"}) container = containers[0] print(container.div.img['alt']) price = container.findAll("div", {"class": "col col-5-12 _2o7WAb"}) print(price[0].text) rating = container.findAll("div", {"class": "hGSR34"}) print(rating[0].text) filename = "flpphn.csv" f = open(filename, "w") headers = "Products_Name,Pricing,Ratings\n"
Thrasher Magazine's website is parsed for infomation about the recent video uploads. The code is activated with voice commands via an Amazon Echo, Dot, or Spot.""" import os from urllib.request import Request as Req from urllib.request import urlopen as uReq from bs4 import BeautifulSoup as soup import html.parser as parser from flask import Flask from flask_ask import Ask, statement, question, session app = Flask(__name__) ask = Ask(app, "/") my_url = Req("http://www.thrashermagazine.com/articles/videos/", headers={"User-Agent":"Mozilla/5.0"}) # open connection, grabbing the page uClient = uReq(my_url) page_html = uClient.read() uClient.close() # html parsing page_soup = soup(page_html, "html.parser") descriptions = page_soup.findAll("div", {"class":"post-description"}) titles = page_soup.findAll("div", {"class":"post-thumb-container"}) img_url = "https://s3-us-west-1.amazonaws.com/thrasherskill/thrasher-logo.png" def get_info(): for tit, des in zip(titles, descriptions): titled = tit.a.img["alt"].strip() title = parser.HTMLParser().unescape(titled) description = des.text.strip()
# -*- coding: utf-8 -*- """ Created on Tue Apr 14 17:47:10 2020 @author: akhil """ from urllib.request import urlopen as ureq from urllib.request import Request as Req from bs4 import BeautifulSoup as soup from selenium import webdriver import pandas as pd from selenium.webdriver.common.by import By url = 'https://www.lviglobal.com/course-schedule/' req = Req(url, headers={'User-Agent': 'Chrome/80.0'}) x = ureq(req) y = x.read() x.close() page_soup = soup(y, 'html.parser') y a = page_soup.findAll('div', {'class': 'event-desc'}) print(a[0].a['href']) t = a[0].strings for string in a[0].strings: print(string) course_names = [] for i in range(len(a)):
def GetUrl(url): Uclient = Req(url) page_html = Uclient.read() Uclient.close() page_soup = Soup(page_html, "html.parser") return page_soup
from urllib.request import urlopen as Req from bs4 import BeautifulSoup as soup import csv base = "http://www.achemenet.com/" accumulator = [] # parsing for website name begins. Here we go! for index in range(1, 11): head, tail = "http://www.achemenet.com/fr/tree/?/sources-textuelles/textes-par-publication/Wunsch_CM_20/", "/24/0#set" site = head + str(index) + tail page_html = Req(site).read() pg_soup = soup(page_html, "html.parser") for product in pg_soup.find_all("div", class_="item"): target = base + product.find("a").get('href') accumulator.append(target)
#!/usr/bin/env python3 #import Python libraries regex, statistics, urllib.request and Beautiful Soup import locale import re from statistics import mean from urllib.request import urlopen as Req from bs4 import BeautifulSoup as soup # Declare my_url variable my_url = "https://www.carfax.com/Used-Honda-Civic-Type-R_t10063" # Load my_url contents into Scrapee variable Scrapee = Req(my_url) # Extract html to variable Scrapee_html Scrapee_html = Scrapee.read() # Close web page Scrapee.close() # Parse html into node tree and strip html tags, store as variable Scrapee_soup Scrapee_soup = soup(Scrapee_html, "html.parser") #Find matching class data and store into three variables