def scrape(car_type, query): soup = reusables.scrape(query) for item in soup.find_all("li", {"class": "result-row"}): a = item.find_all("a")[0] href = a.get("href") price = item.find("span", {"class": "result-price"}).text if reusables.href_has_not_been_logged(href): post = reusables.scrape(href) attrs = post.find_all("p", {"class": "attrgroup"}) description = '' for attr in attrs: no_html_tags = re.sub('<.+?>', '', str(attr)) no_new_lines = re.sub('\n\n', '\n', no_html_tags) description += no_new_lines + "\n" img = post.find("img")["src"] reusables.check_key_and_send_notification( 'news_with_image', href, "New " + car_type + " Found for " + price + "!", "%23[Original Post](" + href + ")\n\n" + description, img)
import sys sys.path.append('..') import reusables import json base_url = "https://www.producthunt.com" search_url = "https://www.producthunt.com/search?postedDate=30%3Adays" soup = reusables.scrape(base_url) threshold = 100 def get_post_link(div): post = "Post" + div.get("data-test").lstrip("post-item-") script = soup.find_all("script")[6].get_text() \ .lstrip("window.__APOLLO_STATE__ = ") \ .rstrip(";") script_object = json.loads(script) thumbnail_id = script_object.get(post).get("thumbnail").get("id") image_uuid = script_object.get(thumbnail_id).get("image_uuid") image_link = "https://ph-files.imgix.net/" + image_uuid return image_link # blacklist = requests.get("https://ent7ghk7utpt6zj.m.pipedream.net").json().get("blacklist") blacklist = [ "WordPress", "Instagram", "Crypto", "Marketing",
import sys sys.path.append('..') import reusables soup = reusables.scrape( "https://www.ebay.com/sch/i.html?_odkw=gt350+wheel+stock+oem+factory+10053&_udhi=340&_mPrRngCbx=1&LH_BIN=1&_osacat=0&_from=R40&_trksid=p2045573.m570.l1313.TR0.TRC0.H0.Xgt350+wheel+stock+oem+factory+10053+-lexus.TRS0&_nkw=gt350+wheel+stock+oem+factory+10053+-lexus&_sacat=0" ) for item in soup.find_all("li", {"class": "sresult"}): a = item.find_all("a")[0] href = a.get("href") reusables.check_href_and_send_notification("New GT350 Wheel Match!", "", href)
import sys sys.path.append('..') import reusables soup = reusables.scrape("https://houston.craigslist.org/search/sss?query=ipad+air+2&sort=rel&srchType=T&hasPic=1&search_distance=50&postal=77081&min_price=100&max_price=200") for item in soup.find_all("li", {"class":"result-row"}): a = item.find_all("a")[0] href = a.get("href") reusables.check_href_and_send_notification("New iPad Craigslist Match!", href, href)
import sys import requests sys.path.append('..') import reusables soup = reusables.scrape("http://www.mustang6g.com/") for post in soup.find_all("li", {"class":"post"}): title = post.find_all("h2")[0].find_all("a")[0].getText() description = post.find_all("div", {"class":"entry"})[0].find_all("p")[0].getText() title_description = title + " | " + description link = post.find_all("a")[0].get("href") image = post.find_all("a")[2].find_all("img")[0].get("src") if link not in reusables.get_hrefs(): requests.get("https://maker.ifttt.com/trigger/mustang6g/with/key/VzmWoFF515H4lf0MNNVyo?value1=" + title_description + "&value2=" + link + "&value3=" + image, timeout=30) reusables.add_href(link)
import sys import re import requests import urllib sys.path.append('..') import reusables soup = reusables.scrape("https://escapehouston.com/") for item in soup.find_all("div", {"class": "post"}): a = item.find("h2", {"class": "excerpt-title"}).find("a") title = a.getText() href = a.get("href") p = item.find("div", { "class": "excerpt-content" }).find("article").find('p') desc = p.getText() pattern = '(.+) (has|have) (.+) flights from (.+?) to (.+) for (\$.+?),? (.+)\. Flights (.+?)\. (.+)' matches = re.search(pattern, desc) # print('Description:',desc) # print('Matches:') if matches: # for group in matches.groups(): # print(group) airline = matches.group(1) type_of_flight = matches.group(3) origin = matches.group(4) destination = matches.group(5) cost = matches.group(6) connection = matches.group(7)
import sys sys.path.append('..') import reusables soup = reusables.scrape( "https://www.ebay.com/sch/i.html?_mPrRngCbx=1&LH_BIN=1&_from=R40&_sacat=0&_fosrp=1&_nkw=SVE%20R350%20-mercedes&_dcat=6030&rt=nc&_udlo=100&_udhi=500" ) for item in soup.find_all("li", {"class": "sresult"}): a = item.find_all("a")[0] href = a.get("href") reusables.check_href_and_send_notification("New R350 Wheel Match!", "", href)
import sys sys.path.append('..') import reusables #soup = reusables.scrape("https://www.ebay.com/sch/i.html?_udlo=100&_udhi=240&LH_BIN=1&_mPrRngCbx=1&_from=R40&_sacat=0&_nkw=ipad%205th%20generation%20-mini%20-broken%20-air%20space%20gray%20-dent&rt=nc") #soup = reusables.scrape("https://www.ebay.com/sch/i.html?_udlo=100&_udhi=190&LH_BIN=1&_mPrRngCbx=1&_from=R40&_sacat=0&_nkw=ipad%20air%202%20new%20-mini%20-case%20-screen%20-keyboard%20-cover%20-hardcase%20-stand%20-sleeve%20-adapter%20-battery%20-frame&rt=nc") soup = reusables.scrape("https://www.ebay.com/sch/i.html?_odkw=ipad+5th+generation+-aXtion+-keyboard+-mount+-digitizer+-replacement+-screen+-broken+-case&_udlo=116&_udhi=200&_mPrRngCbx=1&LH_BIN=1&_osacat=0&_from=R40&_trksid=p2045573.m570.l1313.TR0.TRC0.H0.Xipad+5th+generation+-aXtion+-keyboard+-mount+-digitizer+-replacement+-screen+-b.TRS0&_nkw=ipad+5th+generation+-aXtion+-keyboard+-mount+-digitizer+-replacement+-screen+-broken+-case+-locked&_sacat=0") for item in soup.find_all("li", {"class":"sresult"}): a = item.find_all("a")[0] href = a.get("href") reusables.check_href_and_send_notification("New iPad eBay Match!", href, href)