def scrape(car_type, query):
    soup = reusables.scrape(query)
    for item in soup.find_all("li", {"class": "result-row"}):
        a = item.find_all("a")[0]
        href = a.get("href")
        price = item.find("span", {"class": "result-price"}).text
        if reusables.href_has_not_been_logged(href):
            post = reusables.scrape(href)
            attrs = post.find_all("p", {"class": "attrgroup"})
            description = ''
            for attr in attrs:
                no_html_tags = re.sub('<.+?>', '', str(attr))
                no_new_lines = re.sub('\n\n', '\n', no_html_tags)
                description += no_new_lines + "\n"
            img = post.find("img")["src"]
            reusables.check_key_and_send_notification(
                'news_with_image', href,
                "New " + car_type + " Found for " + price + "!",
                "%23[Original Post](" + href + ")\n\n" + description, img)
示例#2
0
import sys
sys.path.append('..')
import reusables
import json

base_url = "https://www.producthunt.com"
search_url = "https://www.producthunt.com/search?postedDate=30%3Adays"
soup = reusables.scrape(base_url)

threshold = 100


def get_post_link(div):
    post = "Post" + div.get("data-test").lstrip("post-item-")
    script = soup.find_all("script")[6].get_text() \
        .lstrip("window.__APOLLO_STATE__ = ") \
        .rstrip(";")
    script_object = json.loads(script)
    thumbnail_id = script_object.get(post).get("thumbnail").get("id")
    image_uuid = script_object.get(thumbnail_id).get("image_uuid")
    image_link = "https://ph-files.imgix.net/" + image_uuid
    return image_link


# blacklist = requests.get("https://ent7ghk7utpt6zj.m.pipedream.net").json().get("blacklist")

blacklist = [
    "WordPress",
    "Instagram",
    "Crypto",
    "Marketing",
import sys

sys.path.append('..')
import reusables

soup = reusables.scrape(
    "https://www.ebay.com/sch/i.html?_odkw=gt350+wheel+stock+oem+factory+10053&_udhi=340&_mPrRngCbx=1&LH_BIN=1&_osacat=0&_from=R40&_trksid=p2045573.m570.l1313.TR0.TRC0.H0.Xgt350+wheel+stock+oem+factory+10053+-lexus.TRS0&_nkw=gt350+wheel+stock+oem+factory+10053+-lexus&_sacat=0"
)

for item in soup.find_all("li", {"class": "sresult"}):
    a = item.find_all("a")[0]
    href = a.get("href")

    reusables.check_href_and_send_notification("New GT350 Wheel Match!", "",
                                               href)
示例#4
0
import sys
sys.path.append('..')
import reusables

soup = reusables.scrape("https://houston.craigslist.org/search/sss?query=ipad+air+2&sort=rel&srchType=T&hasPic=1&search_distance=50&postal=77081&min_price=100&max_price=200")

for item in soup.find_all("li", {"class":"result-row"}):
    a = item.find_all("a")[0]
    href = a.get("href")

    reusables.check_href_and_send_notification("New iPad Craigslist Match!", href, href)
import sys

import requests

sys.path.append('..')
import reusables

soup = reusables.scrape("http://www.mustang6g.com/")

for post in soup.find_all("li", {"class":"post"}):
    title = post.find_all("h2")[0].find_all("a")[0].getText()
    description = post.find_all("div", {"class":"entry"})[0].find_all("p")[0].getText()
    title_description = title + " | " + description

    link = post.find_all("a")[0].get("href")
    image = post.find_all("a")[2].find_all("img")[0].get("src")
    if link not in reusables.get_hrefs():
        requests.get("https://maker.ifttt.com/trigger/mustang6g/with/key/VzmWoFF515H4lf0MNNVyo?value1=" + title_description + "&value2=" + link + "&value3=" + image, timeout=30)
        reusables.add_href(link)
import sys
import re
import requests
import urllib
sys.path.append('..')
import reusables

soup = reusables.scrape("https://escapehouston.com/")

for item in soup.find_all("div", {"class": "post"}):
    a = item.find("h2", {"class": "excerpt-title"}).find("a")
    title = a.getText()
    href = a.get("href")

    p = item.find("div", {
        "class": "excerpt-content"
    }).find("article").find('p')
    desc = p.getText()
    pattern = '(.+) (has|have) (.+) flights from (.+?) to (.+) for (\$.+?),? (.+)\. Flights (.+?)\. (.+)'
    matches = re.search(pattern, desc)
    # print('Description:',desc)
    # print('Matches:')
    if matches:
        # for group in matches.groups():
        #     print(group)
        airline = matches.group(1)
        type_of_flight = matches.group(3)
        origin = matches.group(4)
        destination = matches.group(5)
        cost = matches.group(6)
        connection = matches.group(7)
import sys
sys.path.append('..')
import reusables

soup = reusables.scrape(
    "https://www.ebay.com/sch/i.html?_mPrRngCbx=1&LH_BIN=1&_from=R40&_sacat=0&_fosrp=1&_nkw=SVE%20R350%20-mercedes&_dcat=6030&rt=nc&_udlo=100&_udhi=500"
)

for item in soup.find_all("li", {"class": "sresult"}):
    a = item.find_all("a")[0]
    href = a.get("href")

    reusables.check_href_and_send_notification("New R350 Wheel Match!", "",
                                               href)
import sys
sys.path.append('..')
import reusables

#soup = reusables.scrape("https://www.ebay.com/sch/i.html?_udlo=100&_udhi=240&LH_BIN=1&_mPrRngCbx=1&_from=R40&_sacat=0&_nkw=ipad%205th%20generation%20-mini%20-broken%20-air%20space%20gray%20-dent&rt=nc")
#soup = reusables.scrape("https://www.ebay.com/sch/i.html?_udlo=100&_udhi=190&LH_BIN=1&_mPrRngCbx=1&_from=R40&_sacat=0&_nkw=ipad%20air%202%20new%20-mini%20-case%20-screen%20-keyboard%20-cover%20-hardcase%20-stand%20-sleeve%20-adapter%20-battery%20-frame&rt=nc")
soup = reusables.scrape("https://www.ebay.com/sch/i.html?_odkw=ipad+5th+generation+-aXtion+-keyboard+-mount+-digitizer+-replacement+-screen+-broken+-case&_udlo=116&_udhi=200&_mPrRngCbx=1&LH_BIN=1&_osacat=0&_from=R40&_trksid=p2045573.m570.l1313.TR0.TRC0.H0.Xipad+5th+generation+-aXtion+-keyboard+-mount+-digitizer+-replacement+-screen+-b.TRS0&_nkw=ipad+5th+generation+-aXtion+-keyboard+-mount+-digitizer+-replacement+-screen+-broken+-case+-locked&_sacat=0")

for item in soup.find_all("li", {"class":"sresult"}):
    a = item.find_all("a")[0]
    href = a.get("href")

    reusables.check_href_and_send_notification("New iPad eBay Match!", href, href)