def _crawl_michiganwholesaleequipment_category(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    soup_main = soup.find('div', class_="item-list")
    item_links = [
        el.find('a').get('href')
        for el in soup_main.find_all("div", class_="views-field-title")
    ]
    while True:
        next_page_link_el = soup.find("ul", class_="js-pager__items")
        sub_url = next_page_link_el.find("a").get("href")
        sub_text = next_page_link_el.find("span",
                                          class_="visually-hidden").text
        if sub_url is not None and sub_text == "Next page":
            url = f"https://www.michiganwholesaleequipment.com{sub_url}"
            response_text = request_("GET", url).text
            soup = BeautifulSoup(response_text, "html.parser")
            soup_main = soup.find('div', class_="item-list")
            item_links.extend([
                el.find('a').get('href')
                for el in soup_main.find_all("div", class_="views-field-title")
            ])
        else:
            break
    return set(item_links)
def _process_added_items(items):
    for item in items:
        codes_added = []
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        for category in soup.find_all('li', class_="crumb_2"):
            na = re.sub(r"[\n\t]*", "", category.text)
        for description in soup.find_all('table',
                                         id='product-attribute-specs-table'):
            for data in description.find_all('td', class_='data'):
                data_new = re.sub(r"[\n\t\s]*", "", data.text)
                codes_added.append(data_new)
        Capacity = re.sub('[,s]', '', codes_added[4]).upper()
        Marque = codes_added[2].upper()
        Model = codes_added[3]
        Mat = codes_added[6]
        Annee = codes_added[1]
        post_name = na + " " + Capacity + " " + Marque + " " + Annee
        data = {
            'post_name': post_name,
            'capacity': Capacity,
            'marque': Marque,
            'model': Model,
            'mat': Mat,
            'annee': Annee,
            'url': item
        }
        API_ENDPOINT = ""
        request_("POST", API_ENDPOINT, data=data)
示例#3
0
def _process_added_items(items):
    for item in items:
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        try:
            category = soup.find("td", text="Category").text.strip()
        except Exception:
            category = ""
        try:
            capacity = "".join(
                re.findall(
                    r"\d+",
                    soup.find("td",
                              text="Capacity").find_next("td").text)) + "LB"
        except Exception:
            capacity = ""
        try:
            marque = soup.find("td", text="Make").find_next("td").text.strip()
        except Exception:
            marque = ""
        try:
            model = soup.find("td", text="Model").find_next("td").text.strip()
        except Exception:
            model = ""
        try:
            year = soup.find("td", text="Year").find_next("td").text.strip()
        except Exception:
            year = ""
        try:
            mat_1 = soup.find("td", text="Mast").find_next("td").text.strip()
            mat = f"{mat_1}"
        except Exception:
            mat = ""
        try:
            engine = soup.find("td",
                               text="Engine").find_next("td").text.strip()
        except Exception:
            engine = ""
        try:
            forks = soup.find("td", text="Forks").find_next("td").text.strip()
        except Exception:
            forks = ""
        try:
            attachment = soup.find("td",
                                   text="Attachment").find_next("td").text
        except Exception:
            attachment = ""
        data = {
            "post_name": f"{category} {capacity} {marque} {model} {year}",
            "capacity": capacity,
            "marque": marque,
            "model": model,
            "year": year,
            "mat": mat,
            "engine": engine,
            "forks": forks,
            "attachment": attachment,
            "url": item,
        }
        request_("POST", API_ENDPOINT, data=data)
示例#4
0
def _process_added_items(items):
    for item in items:
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        soup_detilas = soup.select(
            ".woocommerce-product-details__short-description ul li")
        if len(soup_detilas) > 0:
            try:
                name = soup.find("h1", class_="product_title").text
            except Exception:
                name = ""
            #print(soup_detilas)
            try:
                capacity = soup_detilas[4].text.split(":")[1]
            except Exception:
                capacity = ""
            try:
                marque = soup_detilas[1].text.split(':')[1]
            except Exception:
                marque = ""
            try:
                model = soup_detilas[0].text.split(":")[1]
            except Exception:
                model = ""
            try:
                mat_2 = soup_detilas[7].text.split(":")[1]
                mat_1 = soup_detilas[8].text.split(":")[1]
                mat = f"{mat_2}-{mat_1}"

            except Exception:
                mat = ""
            try:
                year = soup_detilas[2].text.split(":")[1]
            except Exception:
                year = ""
            try:
                fuel = soup_detilas[6].text.split(":")[1].strip()
            except Exception:
                fuel = ""
            try:
                types = soup_detilas[5].text.split(":")[1].strip()
            except Exception:
                types = ""
            try:
                truck_types = soup_detilas[1].text.split(":")[1].strip()
            except Exception:
                truck_types = ""
            data = {
                "post_name": f"{name} {capacity} {marque} {year}",
                "capacity": capacity,
                "marque": marque,
                "model": model,
                "mat": mat,
                "annee": year,
                "fuel": fuel,
                "type": types,
                "truck_types": truck_types,
                "url": item,
            }
            request_("POST", API_ENDPOINT, data=data)
示例#5
0
def _crawl_ceqinc():
    link = "https://www.ceqinc.ca/inventaire?p={page}&s=1&condition=usage"
    page = 1
    response_text = request_("GET", link.format(page=page)).text
    soup = BeautifulSoup(response_text, "html.parser")
    items = [
        e.a.get("href")
        for e in soup.find_all("div", class_="car-content")
    ]
    while True:
        next_page = soup.find("ul", class_="pagination").find("a", text="»")
        if next_page is None:
            break
        page += 1
        response_text = request_("GET", link.format(page=page)).text
        soup = BeautifulSoup(response_text, "html.parser")
        items.extend([
            e.a.get("href")
            for e in soup.find_all("div", class_="car-content")
        ])
    items = [
        "https://www.ceqinc.ca" + item
        for item in items
    ]
    return items
示例#6
0
def _process_added_items(items):
    for item in items:
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        try:
            name = soup.find("div", class_="section post-header").text.strip()
        except Exception:
            name = ""
        try:
            capacity = soup.find(
                "td", text="Base Capacity (lbs.)").find_next("td").text + "LB"
        except Exception:
            capacity = ""
        try:
            marque = soup.find("td", text="Make:").find_next("td").text
        except Exception:
            marque = ""
        try:
            model = soup.find("td", text="Model:").find_next("td").text
        except Exception:
            model = ""
        try:
            year = soup.find("td", text="Year:").find_next("td").text
        except Exception:
            year = ""
        try:
            mat_1 = soup.find("td", text="Mast Type:").find_next("td").text
            mat_2 = soup.find("td", text="Mast Type:").find_next("td").text
            mat_3 = soup.find("td", text="Mast Type:").find_next("td").text
            mat = f"{mat_1}/{mat_2}/{mat_3}"
        except Exception:
            mat = ""
        try:
            type_s = soup.find("td", text="Machine Type:").find_next("td").text
        except Exception:
            type_s = ""
        try:
            tire = soup.find("td", text="Tires:").find_next("td").text
        except Exception:
            tire = ""
        try:
            hours = soup.find("td", text="Hours:").find_next("td").text
        except Exception:
            hours = ""
        data = {
            "post_name": f"{name}",
            "capacity": capacity,
            "marque": marque,
            "model": model,
            "year": year,
            "mat": mat,
            "type": type_s,
            "tire": tire,
            "hours": hours,
            "url": item,
        }

        request_("POST", API_ENDPOINT, data=data)
def _process_added_items(items):
    for url, item in items:
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        soup_main = soup.find('div', {"id": item})
        try:
            name = soup_main.find("div", class_="lbHeader").text.strip()
        except Exception:
            name = ""
        try:
            capacity = soup.find("td",
                                 text="Capacity:").find_next("td").text + "LB"
        except Exception:
            capacity = ""
        try:
            marque = soup.find("td", text="Mfr:").find_next("td").text
        except Exception:
            marque = ""
        try:
            model = soup.find("td", text="Model #:").find_next("td").text
        except Exception:
            model = ""
        try:
            year = soup.find("td", text="Year:").find_next("td").text
        except Exception:
            year = ""
        try:
            mat = soup.find("td", text="Mast:").find_next("td").text
        except Exception:
            mat = ""
        try:
            type_s = soup.find("td", text="Fuel Type:").find_next("td").text
        except Exception:
            type_s = ""
        try:
            types = soup.find("td", text="Type:").find_next("td").text
        except Exception:
            types = ""
        try:
            description = soup.find("td",
                                    text="Description:").find_next("td").text
        except Exception:
            description = ""
        data = {
            "post_name": f"{name}",
            "capacity": capacity,
            "marque": marque,
            "model": model,
            "year": year,
            "mat": mat,
            "type": type_s,
            "types": types,
            "description": description,
            "url": item,
        }
        API_ENDPOINT = ""
        request_("POST", API_ENDPOINT, data=data)
def _process_added_items(items):
    for item in items:
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        try:
            name = soup.find("h1",class_="product_title").text
        except Exception:
            name = ""
        product_details = soup.find_all('ul', class_="product_details")
        try:
            capacity = product_details[0].find_all('li')[5].text.split(':')[1].strip()
        except Exception:
            capacity = ""
        try:
            marque = product_details[0].find_all('li')[0].text.split(':')[1].strip()
        except Exception:
            marque = ""
        try:
            model = product_details[0].find_all('li')[1].text.split(':')[1].strip()
        except Exception:
            model = ""
        try:
            mat_1 = product_details[0].find_all('li')[6].text.split(':')[1].strip()
            mat_2 = product_details[0].find_all('li')[7].text.split(':')[1].strip()
            mat = f"{mat_1} {mat_2}"
        except Exception:
            mat = ""
        try:
            type_moteur = soup.find("td", text="Type moteur :").find_next("td").text.strip()
        except Exception:
            type_moteur = ""
        try:
            style_pneus = product_details[0].find_all('li')[4].text.split(':')[1].strip()
        except Exception:
            style_pneus = ""
        try:
            fourches = product_details[0].find_all('li')[9].text.split(':')[1].strip()
        except Exception:
            fourches = ""
        try:
            nourriture = product_details[0].find_all('li')[3].text.split(':')[1].strip()
        except Exception:
            nourriture = ""

        data = {
            "post_name": f"{name} {capacity} {marque}",
            "capacity": capacity,
            "marque": marque,
            "model": model,
            "mat": mat,
            "type_moteur": type_moteur,
            "style_pneus": style_pneus,
            "nourriture": nourriture,
            "fourches": fourches,
            "url": item,
        }
        request_("POST", API_ENDPOINT, data=data)
示例#9
0
def _process_added_items(storage_client, items):
    print(f"[manuvic] Got {len(items)} added links")
    for item in items:
        print(f"[manuvic] Processing added link {item}")
        link_data = []
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        for page_title in soup.find_all("h1", class_="page-title"):
            na = re.sub(r"[\n\t]*", "", page_title.text)
        for description in soup.find_all("div", id="product.info.descriptionmod"):
            for data in description.find_all("span", class_="infoValue"):
                data_new = re.sub(r"[\n\t\s]*", "", data.text)
                link_data.append(data_new)
        photo_links = _get_images(soup)
        print(f"[manuvic] Saving {len(photo_links)} photos to the bucket for {item}")
        item_path = _format_item_link(item)
        blob_path = f"manuvic/photos/{item_path}"
        save_photos_to_bucket(
            storage_client,
            blob_path,
            photo_links,
            BUCKET_NAME
        )
        try:
            capacity = link_data[2]
        except IndexError:
            capacity = ""
        try:
            marque = link_data[3]
        except IndexError:
            marque = ""
        try:
            model = link_data[4]
        except IndexError:
            model = ""
        try:
            mat = link_data[8]
        except IndexError:
            mat = ""
        try:
            year = link_data[13]
        except IndexError:
            year = ""
        print(f"[manuvic] Posting data about the {item} to forklift.news website")
        request_(
            "POST",
            API_ENDPOINT,
            data={
                "post_name": f"{na} {capacity} {marque} {year}",
                "capacity": capacity,
                "marque": marque,
                "model": model,
                "mat": mat,
                "annee": year,
                "url": item,
            })
def _process_added_items(items):
    for item in items:
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        try:
            capacity = soup.find("td", text="Capacité :").find_next("td").text.strip() + "LB"
        except Exception:
            capacity = ""
        try:
            marque = soup.find("span", class_="marque").text.strip()
        except Exception:
            marque = ""
        try:
            model = soup.find("span", class_="modele").text.strip()
        except Exception:
            model = ""
        try:
            mat_1 = soup.find("td", text="Mât :").find_next("td").text.strip()
            mat = f"{mat_1}"
        except Exception:
            mat = ""
        try:
            year = soup.find("td", text="Année :").find_next("td").text.strip()
        except Exception:
            year = ""
        try:
            heures = soup.find("td", text="Heures :").find_next("td").text.strip()
        except Exception:
            heures = ""
        try:
            type_moteur = soup.find("td", text="Type moteur :").find_next("td").text.strip()
        except Exception:
            type_moteur = ""
        try:
            style_pneus = soup.find("td", text="Styles pneus :").find_next("td").text.strip()
        except Exception:
            style_pneus = ""
        try:
            fourches = soup.find("td", text="Fourches :").find_next("td").text.strip()
        except Exception:
            fourches = ""
        data = {
            "post_name": f"{capacity} {marque} {year}",
            "capacity": capacity,
            "marque": marque,
            "model": model,
            "mat": mat,
            "annee": year,
            "heures": heures,
            "type_moteur": type_moteur,
            "style_pneus": style_pneus,
            "fourches": fourches,
            "url": item,
        }
        request_("POST", API_ENDPOINT, data=data)
示例#11
0
def _process_added_items(items):
    for item in items:
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        try:
            name = soup.find("div", class_="su-service-title").text.strip()
        except Exception:
            name = ""
        try:
            capacity = soup.find("td",
                                 text="Capacity:").find_next("td").text + "LB"
        except Exception:
            capacity = ""
        try:
            marque = soup.find("td", text="Manufacturer:").find_next("td").text
        except Exception:
            marque = ""
        try:
            model = soup.find("td", text="Model #:").find_next("td").text
        except Exception:
            model = ""
        try:
            year = soup.find("td", text="Year:").find_next("td").text
        except Exception:
            year = ""
        try:
            mat = soup.find("td", text="Mast:").find_next("td").text
        except Exception:
            mat = ""
        try:
            type_s = soup.find("td", text="Type:").find_next("td").text
        except Exception:
            type_s = ""
        try:
            tire = soup.find("td", text="Tire:").find_next("td").text
        except Exception:
            tire = ""
        try:
            condition = soup.find("td", text="Condition:").find_next("td").text
        except Exception:
            condition = ""
        data = {
            "post_name": f"{name}",
            "capacity": capacity,
            "marque": marque,
            "model": model,
            "year": year,
            "mat": mat,
            "type": type_s,
            "tire": tire,
            "condition": condition,
            "url": item,
        }
        request_("POST", API_ENDPOINT, data=data)
def _process_added_items(items):
    for item in items:
        url_link = f"https://www.michiganwholesaleequipment.com{item}"
        source = request_("GET", url_link).text
        soup = BeautifulSoup(source, "html.parser")
        try:
            name = soup.find("section", {
                "id": "block-zurb-foundation-page-title"
            }).text.strip()
        except Exception:
            name = ""
        try:
            capacity = soup.find("div", text="Capacity").find_next("div").text
        except Exception:
            capacity = ""
        try:
            model = soup.find("div",
                              text="Equipment Model").find_next("div").text
        except Exception:
            model = ""
        try:
            hours = soup.find("div", text="Hours").find_next("div").text
        except Exception:
            hours = ""
        try:
            mat = soup.find("div", text="Mast").find_next("div").text
        except Exception:
            mat = ""
        try:
            type_s = soup.find("div", text="Fuel Type").find_next("div").text
        except Exception:
            type_s = ""
        try:
            tire = soup.find("div",
                             text="Equipment Type").find_next("div").text
        except Exception:
            tire = ""
        try:
            year = soup.find("div", text="Year").find_next("div").text
        except Exception:
            year = ""
        data = {
            "post_name": f"{name}",
            "capacity": capacity,
            "hours": hours,
            "model": model,
            "mat": mat,
            "type": type_s,
            "tire": tire,
            "year": year,
            "url": url_link,
        }
        request_("POST", API_ENDPOINT, data=data)
def _process_added_items(items):
    print(f"[almachinery] Got {len(items)} added links")
    for item in items:
        print(f"[almachinery] Processing added link {item}")
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        try:
            name = soup.find("p", class_="category-title").text
        except Exception:
            name = ""
        try:
            capacity = soup.find("span",
                                 text="Capacité").find_next("span").text + "LB"
        except Exception:
            capacity = ""
        try:
            marque = soup.find(
                "span", text="Marque").find_next("span").find("img").get("alt")
        except Exception:
            marque = ""
        try:
            model = soup.find("span",
                              text="No de série").find_next("span").text
        except Exception:
            model = ""
        try:
            mat_1 = soup.find("span",
                              text="Type de mât").find_next("span").text
            mat_2 = soup.find("span",
                              text="Hauteur du mât").find_next("span").text
            mat = f"{mat_1},{mat_2}"
        except Exception:
            mat = ""
        try:
            year = soup.find("span", text="Année").find_next("span").text
        except Exception:
            year = ""
        print(
            f"[almachinery] Posting data about the {item} to forklift.news website"
        )
        request_("POST",
                 "URL",
                 data={
                     "post_name": f"{name} {capacity} {marque} {year}",
                     "capacity": capacity,
                     "marque": marque,
                     "model": model,
                     "mat": mat,
                     "annee": year,
                     "url": item,
                 })
def _crawl_komatsuforklift_category(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    item_links = [(category_link, el.get('data-reveal-id'))
                  for el in soup.find_all("a", class_="img")]
    while True:
        next_page_link_el = soup.find("a", text="next »")
        if next_page_link_el is not None:
            response_text = request_("GET", next_page_link_el["href"]).text
            soup = BeautifulSoup(response_text, "html.parser")
            item_links.extend([(next_page_link_el["href"],
                                el.get("data-reveal-id"))
                               for el in soup.find_all("a", class_="img")])
        else:
            break
    return set(item_links)
示例#15
0
def _crawl_nfelifts_category(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    item_links = [
        el.get('href') for el in soup.find_all("a", class_="more-link")
    ]
    return set(item_links)
示例#16
0
def crawl_manuvic(request):
    if request.method == "POST":
        print("[manuvic] Started crawling website")
        response_text = request_(
            "GET",
            "https://www.manuvic.com/produits/chariots-elevateurs.html?cat=116&product_list_limit=100"
        ).text

        soup = BeautifulSoup(response_text, "html.parser")
        item_links = [
            el.get("href")
            for el in soup.find_all("a", class_="product photo product-item-photo")
        ]

        db = firestore.Client()
        storage_client = storage.Client()

        if not item_links:
            send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "manuvic")
            return "No links were found on manuvic website"

        comparison_result = add_and_compare_new_items(db, "manuvic", item_links)
        added_items, deleted_items = comparison_result["added"], comparison_result["deleted"]
        email_text = ""
        if added_items:
            _process_added_items(storage_client, added_items)
            email_text += format_links_modified("Added", added_items)
        if email_text != "":
            send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "Comparison results for manuvic", email_text)
            return email_text
        else:
            return "No new added or new deleted items found"
    else:
        return "This method is not supported"
示例#17
0
def _crawl_achatusag_category(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    item_links = [
        el.find('a').get('href')
        for el in soup.find_all("div", class_="flex_display")
    ]
    return set(item_links)
def _crawl_multichariots_category(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    item_links = [
        el.find('a').get('href')
        for el in soup.find_all("h3", class_="product-title")
    ]
    return set(item_links)
def _crawl_machinerieplante_category_2(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    item_links = [
        el.get('href')
        for el in soup.find_all("a", class_="Menugj")
    ]
    for ct in item_links:
        url_link = f"http://www.machinerieplante.com/fr/equipement/{ct}"
        response_text = request_("GET", url_link).text
        soup = BeautifulSoup(response_text, "html.parser")
        item_links_test = [
            el.get('href')
            for el in soup.find_all("a", class_="Menugj")
        ]
    item_links.extend(item_links_test)
    return set(item_links)
def _crawl_chariotelevateurhardy_category(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    item_links = [
        el.find('a').get('href')
        for el in soup.find_all("h2", class_="entry-title")
    ]
    return set(item_links)
def _crawl_equipementse3_category(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    item_links = [
        el.find('a').get('href')
        for el in soup.find_all("h2", class_="term-title")
    ]
    return set(item_links)
示例#22
0
def _process_added_items(items):
    print(f"[ceqinc] Got {len(items)} added links")
    for item in items:
        print(f"[ceqinc] Processing added link {item}")
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        try:
            name = soup.find("span", text="Type d'équipement").find_next("strong").text
        except Exception:
            name = ""
        try:
            capacity = "".join(re.findall(r"\d+", soup.find("span", text="Capacité").find_next("strong").text)) + "LB"
        except Exception:
            capacity = ""
        try:
            marque = soup.find("span", text="Marque").find_next("strong").text
        except Exception:
            marque = ""
        try:
            model = soup.find("span", text="Modèle").find_next("strong").text
        except Exception:
            model = ""
        try:
            mat_1 = soup.find("span", text="Hauteur du mât (abaissé)").find_next("strong").text
            mat_2 = soup.find("span", text="Hauteur du mât (élévation)").find_next("strong").text
            mat = f"abaissé: {mat_1}, élévation: {mat_2}"
        except Exception:
            mat = ""
        try:
            year = soup.find("span", text="Année").find_next("strong").text
        except Exception:
            year = ""
        print(f"[ceqinc] Posting data about the {item} to forklift.news website")
        request_(
            "POST",
            "",
            data={
                "post_name": f"{name} {capacity} {marque} {year}",
                "capacity": capacity,
                "marque": marque,
                "model": model,
                "mat": mat,
                "annee": year,
                "url": item,
            })
示例#23
0
def _process_added_items(items):
    for item in items:
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        try:
            capacity = soup.find("div", text="Capacity:").find_next("div").text + "LB"
        except Exception:
            capacity = ""
        try:
            marque = soup.find("div", text="Make:").find_next("div").text
        except Exception:
            marque = ""
        try:
            model = soup.find("div", text="Model:").find_next("div").text
        except Exception:
            model = ""
        try:
            year = soup.find("div", text="Year:").find_next("div").text
        except Exception:
            year = ""
        try:
            type_s = soup.find("div", text="Type:").find_next("div").text
        except Exception:
            type_s = ""
        try:
            tire = soup.find("div", text="Upright:").find_next("div").text
        except Exception:
            tire = ""
        try:
            hours = soup.find("div", text="Hours:Hours:").find_next("div").text
        except Exception:
            hours = ""
        data = {
            "post_name": f"{type_s} {capacity} {marque} {year}",
            "capacity": capacity,
            "marque": marque,
            "model": model,
            "year": year,
            "type": type_s,
            "tire": tire,
            "hours": hours,
            "url": item,
        }

        request_("POST", API_ENDPOINT, data=data)
def _crawl_machinerieplante_category(category_link):
    url_link = f"http://www.machinerieplante.com/fr/equipement/{category_link}"
    response_text = request_("GET", url_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    item_links = [
        el.get('href')
        for el in soup.find_all("a", class_="Lfooter")
    ]
    return set(item_links)
def _crawl_almachinery():
    api_link = "https://www.a1machinery.com/fr/inventaire/api?capacity_from=0&capacity_to=55000&p={page}&referer=/fr/Produits?capacity_from=0&capacity_to=55000&p={page}"
    initial_page = 1
    response = request_("GET", api_link.format(page=initial_page))
    response_json = response.json()
    pages = response_json.get("pages")
    items = [{"link": item["url"]} for item in response_json.get("items")]

    for page in range(2, pages + 1):
        response = request_("GET", api_link.format(page=page))
        response_json = response.json()
        items.extend([{
            "link": item["url"]
        } for item in response_json.get("items")])

    items = [f"https://www.a1machinery.com{item['link']}" for item in items]

    return items
示例#26
0
def _crawl_ldlqc_category(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    item_links = [
        el.find('a').get("href") for el in soup.find_all("div", class_="title")
    ]
    while True:
        next_page_link_el = soup.find("a", class_="next")
        if next_page_link_el is not None:
            response_text = request_("GET", next_page_link_el["href"]).text
            soup = BeautifulSoup(response_text, "html.parser")
            item_links.extend([
                el.find('a').get("href")
                for el in soup.find_all("div", class_="title")
            ])
        else:
            break
    return set(item_links)
示例#27
0
def _crawl_liftnorthamerica_category(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    item_links = [
        el.get('href') for el in soup.find_all("h4", class_="product-title")
    ]
    while True:
        next_page_link_el = soup.find("a", class_="next page-numbers")
        if next_page_link_el is not None:
            url = next_page_link_el["href"]
            response_text = request_("GET", url).text
            soup = BeautifulSoup(response_text, "html.parser")
            item_links.extend([
                el.get("href")
                for el in soup.find_all("h4", class_="product-title")
            ])
        else:
            break
    return set(item_links)
示例#28
0
def _process_added_items(items):
    for item in items:
        source = request_("GET", item).text
        soup = BeautifulSoup(source, "html.parser")
        try:
            name = soup.find("h1", class_="post-title").text
        except Exception:
            name = ""

        soup_detilas = soup.select(".specification li")
        try:
            capacity = soup_detilas[5].text.split(":")[1]
        except Exception:
            capacity = ""

        try:
            marque = soup_detilas[1].text.split(':')[1]
        except Exception:
            marque = ""
        try:
            model = soup_detilas[2].text.split(":")[1]
        except Exception:
            model = ""
        try:
            mat = soup_detilas[9].text.split(":")[1]

        except Exception:
            mat = ""
        try:
            year = soup_detilas[3].text.split(":")[1]
        except Exception:
            year = ""
        data = {
            "post_name": f"{name} {capacity} {marque} {year}",
            "capacity": capacity,
            "marque": marque,
            "model": model,
            "mat": mat,
            "annee": year,
            "url": item,
        }
        request_("POST", API_ENDPOINT, data=data)
def _process_added_items(items):
    print(f"[southeastforklifts] Got {len(items)} added links")
    for item in items:
        print(
            f"[southeastforklifts] Posting data about the {item} to forklift.news website"
        )
        item_data = _parse_item(item)
        request_(
            "POST",
            API_ENDPOINT,
            data={
                "post_name":
                f"{item_data['name']} {item_data['capacity']} {item_data['marque']} {item_data['year']}",
                "capacity": item_data['capacity'],
                "marque": item_data['marque'],
                "model": item_data['model'],
                "mat": item_data['mat'],
                "annee": item_data['year'],
                "url": item,
            })
示例#30
0
def _crawl_canadacrown_category(category_link):
    response_text = request_("GET", category_link).text
    soup = BeautifulSoup(response_text, "html.parser")
    for el in soup.find_all("div", class_="image"):
        try:
            url = el.find('a').get('href')
            base_url = f"{main_link}{url}"
            item_links_all.append(base_url)
        except Exception:
            pass
    return set(item_links_all)