Exemplo n.º 1
0
 def wrap(*args, **kwargs):
     if args:
         raise PositionalArgumentsNotSupported()
     data = urlencode(kwargs)
     if PY3:
         data = bytes(data, 'utf-8')
     req = Req(url, data=data)
     try:
         res = urlopen(req).read().decode()
     except HTTPError as e:
         raise SmappiServerError('%s (for "%s")' % (e, url))
     except URLError as e:
         raise SmappiServerError('%s for %s' % (e.args[0], url))
     if self._fmt == 'json':
         res = json.loads(res)
         if isinstance(res, dict) and 'error' in res:
             error = res['error']
             if isinstance(error, dict):
                 message = error.pop('message', '')
                 if 'code' in error:
                     message += ' (code: %s)' % error['code']
                 raise SmappiAPIError(message, **error)
             else:
                 raise SmappiAPIError(error)
     return res
Exemplo n.º 2
0
def getMostBoughtData():
    my_url = 'https://finance.yahoo.com/u/yahoo-finance/watchlists/most-bought-by-hedge-funds/'

    # saves the information from the url into the client
    Client = Req(my_url)

    # saves
    page_hmtl = Client.read()
    Client.close()

    # parses the html of the website
    page_soup = soup(page_hmtl, "html.parser")

    # finds all the parts of the webpage that would hold the titles
    symbols = page_soup.findAll("h2", {"class": "Fz(m)"})
    tickers = page_soup.findAll("a", {"class": "Fw(b)"})
    prices_html = page_soup.findAll(
        "td", {"class": "data-col2 Ta(end) Pstart(10px) Pend(6px) Fw(b)"})
    changes_html = page_soup.findAll(
        "td", {"class": "data-col4 Ta(end) Pstart(10px) Pend(6px)"})
    volumes_html = page_soup.findAll(
        "td", {"class": "data-col6 Ta(end) Pstart(10px) Pend(6px)"})
    avg_volumes_html = page_soup.findAll(
        "td", {"class": "data-col7 Ta(end) Pstart(10px) Pend(6px)"})

    sym = int(symbols[1].text[0:2])

    x = 0

    data = []

    while x < 48 - 1:
        if len(tickers[x].text
               ) > 5 or tickers[x].text == "Tech" or tickers[x].text == "News":
            x = x + 1
        else:
            break
    y = x
    while x < y + sym:
        ticker = tickers[x].text
        price = prices_html[x - (y + sym)].text

        change_str = changes_html[x - (y + sym)].text

        if change_str.startswith('+'):
            change = change_str[1:]
        else:
            change = change_str

        volume = volumes_html[x - (y + sym)].text
        avg_volume = avg_volumes_html[x - (y + sym)].text

        data.append((ticker, price, change, volume, avg_volume))

        x = x + 1

    return data
def get_insider_trading_data():
    my_url = 'http://openinsider.com/screener?s=&o=&pl=&ph=&ll=&lh=&fd=730&fdr=&td=0&tdr=&fdlyl=&fdlyh=&daysago=&xp=1&vl=&vh=&ocl=&och=&sic1=-1&sicl=100&sich=9999&grp=0&nfl=&nfh=&nil=&nih=&nol=&noh=&v2l=&v2h=&oc2l=&oc2h=&sortcol=0&cnt=1000&page=1'

    Client = Req(my_url)

    page_html = Client.read()
    Client.close()

    page_soup = soup(page_html, "html.parser")

    # finds the ticker locations using this method
    findsT = page_soup.findAll("a",{"onmouseout":"UnTip()"})

    # finds the title of the person that was insider trading
    findsTi = page_soup.findAll("td")

    # finds the title of the person trading on the inside
    findsNum = page_soup.findAll("td",{"align":"right"})

    x = 0 # variable for the title
    y = 22 # finds the price of the stock when bought
    z = 23 # finds the quantity of stocks purchased
    w = 24 # finds the number of stocks already owned
    a = 72 # finds the title of the person insider trading

    data = []
    
    while x < 1000:
        findT = findsT[x]
        findT = findT["href"].replace("/", "")
        findP = findsNum[y].text
        findQ = findsNum[z].text
        findO = findsNum[w].text
        findTi = findsTi[a].text
        data.append((findT, findTi, findP, findQ, findO))
        x += 1
        y += 12
        z += 12
        w += 12
        a += 17

    return data
Exemplo n.º 4
0
import bs4
from urllib.request import urlopen as Req
from bs4 import BeautifulSoup as soup

my_url = 'https://www.newegg.com/Video-Cards-Video-Devices/Category/ID-38?Tpk=graphics%20card'

uclient = Req(my_url)
page_html = uclient.read()
uclient.close()

page_soup = soup(page_html, "html.parser")

containers = page_soup.findAll("div", {"class": "item-container"})

filename = "products_newegg.csv"
f = open(filename, "w")

headers = "Brand, Product_name\n"
f.write(headers)

for container in containers:
    brand = container.div.div.a.img["title"]

    title_container = container.findAll("a", {"class": "item-title"})
    product_name = title_container[0].text

    print("Brand: " + brand)
    print("Product Name: " + product_name)

    f.write(brand + "," + product_name.replace(",", "|") + "\n")
Exemplo n.º 5
0
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as Req

url = "https://www.flipkart.com/search?q=iphone&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"

Client = Req(url)

page_html = Client.read()

page_soup = soup(page_html, "html.parser")

# print(page_soup.contents)

containers = page_soup.findAll("div", {"class": "_3O0U0u"})

container = containers[0]

print(container.div.img['alt'])

price = container.findAll("div", {"class": "col col-5-12 _2o7WAb"})

print(price[0].text)

rating = container.findAll("div", {"class": "hGSR34"})

print(rating[0].text)

filename = "flpphn.csv"
f = open(filename, "w")

headers = "Products_Name,Pricing,Ratings\n"
Exemplo n.º 6
0
Thrasher Magazine's website is parsed for infomation about the recent video
uploads. The code is activated with voice commands via an Amazon Echo, Dot,
or Spot."""

import os
from urllib.request import Request as Req
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import html.parser as parser
from flask import Flask
from flask_ask import Ask, statement, question, session

app = Flask(__name__)
ask = Ask(app, "/")
my_url = Req("http://www.thrashermagazine.com/articles/videos/", headers={"User-Agent":"Mozilla/5.0"})
# open connection, grabbing the page
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
# html parsing
page_soup = soup(page_html, "html.parser")
descriptions = page_soup.findAll("div", {"class":"post-description"})
titles = page_soup.findAll("div", {"class":"post-thumb-container"})
img_url = "https://s3-us-west-1.amazonaws.com/thrasherskill/thrasher-logo.png"

def get_info():
    for tit, des in zip(titles, descriptions):
        titled = tit.a.img["alt"].strip()
        title = parser.HTMLParser().unescape(titled)
        description = des.text.strip()
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 14 17:47:10 2020

@author: akhil
"""

from urllib.request import urlopen as ureq
from urllib.request import Request as Req
from bs4 import BeautifulSoup as soup
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By

url = 'https://www.lviglobal.com/course-schedule/'
req = Req(url, headers={'User-Agent': 'Chrome/80.0'})
x = ureq(req)
y = x.read()
x.close()
page_soup = soup(y, 'html.parser')
y

a = page_soup.findAll('div', {'class': 'event-desc'})
print(a[0].a['href'])
t = a[0].strings

for string in a[0].strings:
    print(string)

course_names = []
for i in range(len(a)):
def GetUrl(url):
    Uclient = Req(url)
    page_html = Uclient.read()
    Uclient.close()
    page_soup = Soup(page_html, "html.parser")
    return page_soup
from urllib.request import urlopen as Req
from bs4 import BeautifulSoup as soup
import csv

base = "http://www.achemenet.com/"
accumulator = []

# parsing for website name begins. Here we go!
for index in range(1, 11):
    head, tail = "http://www.achemenet.com/fr/tree/?/sources-textuelles/textes-par-publication/Wunsch_CM_20/", "/24/0#set"
    site = head + str(index) + tail
    page_html = Req(site).read()
    pg_soup = soup(page_html, "html.parser")
    for product in pg_soup.find_all("div", class_="item"):
        target = base + product.find("a").get('href')
        accumulator.append(target)
Exemplo n.º 10
0
#!/usr/bin/env python3

#import Python libraries regex, statistics, urllib.request and Beautiful Soup
import locale
import re
from statistics import mean
from urllib.request import urlopen as Req
from bs4 import BeautifulSoup as soup

# Declare my_url variable

my_url = "https://www.carfax.com/Used-Honda-Civic-Type-R_t10063"

# Load my_url contents into Scrapee variable

Scrapee = Req(my_url)

# Extract html to variable Scrapee_html

Scrapee_html = Scrapee.read()

# Close web page

Scrapee.close()

# Parse html into node tree and strip html tags, store as variable Scrapee_soup

Scrapee_soup = soup(Scrapee_html, "html.parser")

#Find matching class data and store into three variables