Пример #1
0
 def search(self):
     maker = self.maker_box.get()
     model = self.model_box.get()
     zipcode = int(self.zip_entry.get())
     radius = int(self.radius_var.get())
     condition = self.condition_var.get()
     car_json_file = "cars_com_make_model.json"
     directory = "../data/"
     page_num = 1
     num_per_page = 100
     start_url = generate_url(maker, model, zipcode, radius, car_json_file, condition, page_num, num_per_page)
     csv_name = "{}-{}-{:d}-{:d}-{:s}.csv".format(maker, model, zipcode, radius, condition)
     csv_name = os.path.join(directory, csv_name)
     print("crawling {} {} {}...".format(condition, maker, model))
     craw_from_url(start_url, csv_name)
     print("finish crawling...")
     df = load_csvfile(csv_name)
     car_info = extract_info_from_csvfilename(csv_name)
     price_info = analyze_price(df)
     # print_price_info(price_info, car_info)
     self.mean = int(price_info['mean'])
     self.min = int(price_info['min'])
     self.max = int(price_info['max'])
     self.min_label_text.set(self.min)
     self.mean_label_text.set(self.mean)
     self.max_label_text.set(self.max)
Пример #2
0
def read_and_crawl():
    """
    crawl multiple models, crawl and compare
    """
    if len(sys.argv) != 7:
        print(
            "Usage: >> python {} <maker_model_file> <zip> <radius> <used or new> <json or keyfile> <output_dir>"
            .format(sys.argv[0]))
        print(
            "e.g. python {} <maker_model_file> 53715 25 used <json or keyfile> ./data/"
            .format(sys.argv[0]))
        sys.exit(1)
    with open(sys.argv[1], 'r') as mmfile:
        maker_models = (tuple(line.split(":")) for line in mmfile.readlines())
    # print(maker_models)
    zipcode = int(sys.argv[2])
    radius = int(sys.argv[3])
    condition = sys.argv[4]
    car_json_file = sys.argv[5]
    output_dir = sys.argv[6]
    # if the output_dir does not exist, create it
    os.makedirs(output_dir, exist_ok=True)
    car_infos = []
    price_infos = []
    for maker, model in maker_models:
        maker = maker.strip()
        model = model.strip()
        page_num = 1
        num_per_page = 100
        start_url = generate_url(maker, model, zipcode, radius, car_json_file,
                                 condition, page_num, num_per_page)
        csv_name = "{}-{}-{:d}-{:d}-{:s}.csv".format(maker, model, zipcode,
                                                     radius, condition)
        csv_name = os.path.join(output_dir, csv_name)
        print("crawling {} {} {}...".format(condition, maker, model))
        craw_from_url(start_url, csv_name)
        print("finish crawling...")
        df = load_csvfile(csv_name)
        car_info = extract_info_from_csvfilename(csv_name)
        price_info = analyze_price(df)
        car_infos.append(car_info)
        price_infos.append(price_info)
    plot_price_info(car_infos, price_infos)
Пример #3
0
def pipeline_carscom():
    """
    crawling pipeline for cars.com
    """
    maker, model, zipcode, radius, condition, car_json_file, directory = user_input(
    )
    page_num = 1
    num_per_page = 100
    start_url = generate_url(maker, model, zipcode, radius, car_json_file,
                             condition, page_num, num_per_page)
    csv_name = "{}-{}-{:d}-{:d}-{:s}.csv".format(maker, model, zipcode, radius,
                                                 condition)
    directory = os.path.dirname(os.path.realpath(__file__))
    csv_name = os.path.join(directory, csv_name)
    print("crawling {} {} {}...".format(condition, maker, model))
    craw_from_url(start_url, csv_name)
    print("finish crawling...")
    df = load_csvfile(csv_name)
    car_info = extract_info_from_csvfilename(csv_name)
    price_info = analyze_price(df)
    print_price_info(price_info, car_info)
def pipeline_carscom(directory='./'):
    """
    crawling pipeline for cars.com
    """
    maker, model, zipcode, radius, condition, car_json_file, directory = user_input(
    )
    page_num = 1
    num_per_page = 100
    start_url = generate_url(maker, model, zipcode, radius, car_json_file,
                             condition, page_num, num_per_page)
    csv_name = "{}-{}-{:d}-{:d}-{:s}.csv".format(maker, model, zipcode, radius,
                                                 condition)
    csv_name = os.path.join(directory, csv_name)
    print("crawling...")
    craw_from_url(start_url, csv_name)
    print("finish crawling...")
    df = load_csvfile(csv_name)
    if '/' in csv_name:
        csv_name = csv_name[csv_name.rfind('/') + 1:]
    maker, model = csv_name.split('-')[:2]
    maker = maker.upper()
    model = model.upper()
    analyze_price(df, maker, model)
def pipeline_market_check(directory='./'):
    """
    the whole crawling pipeline for market check

    Args:
        directory: output directory
    """
    maker, model, zipcode, radius, condition, market_key_file, directory = user_input(
    )

    # convert zipcode to latitude and longitude
    zipSearch = ZipcodeSearchEngine()
    zipinfo = zipSearch.by_zipcode(str(zipcode))
    latitude, longtitude = str(zipinfo["Latitude"]), str(zipinfo["Longitude"])

    # read assess key for api
    with open(market_key_file, "r") as key:
        api_key = key.read()

    car_market_url = "http://api.marketcheck.com/v1/search"

    # start with index = 0, rows = 50 (max cars per request)
    max_rows_per_request = 50
    querystring = {
        "api_key": api_key,
        "make": maker,
        "latitude": latitude,
        "longitude": longtitude,
        "radius": str(radius),
        "car_type": condition,
        "seller_type": "dealer",
        "start": "0",
        "rows": str(max_rows_per_request),
    }
    if model != "all":
        querystring["model"] = model
    headers = {'Host': 'marketcheck-prod.apigee.net'}

    response = requests.request("GET",
                                car_market_url,
                                headers=headers,
                                params=querystring)

    cars_json = json.loads(response.text)
    count = (cars_json["num_found"])
    num_of_requests = (count + max_rows_per_request -
                       1) // max_rows_per_request
    print("Total number of request is {:d}".format(num_of_requests))

    short_csv_header = [
        "name", "VIN", "price", "miles", "Exterior Color", "Interior Color"
    ]
    dict_header = [
        "heading", "vin", "price", "miles", "exterior_color", "interior_color"
    ]  # names from API response
    long_csv_header = [
        "name", "VIN", "make", "model", "year", "price", "miles",
        "Exterior Color", "Interior Color", "Seller Name", "Seller Phone",
        "Transmission", "Drivetrain"
    ]  # car attributes stored in csv table
    csv_rows = []  # stores each crawled car as an dictionary

    for ite in range(num_of_requests):
        print("Sending the {:d}th request".format(ite))
        # query the API with new offset json
        if ite != 0:
            querystring["start"] = str(ite * max_rows_per_request)
            response = requests.request("GET",
                                        car_market_url,
                                        headers=headers,
                                        params=querystring)
            cars_json = json.loads(response.text)

        for car in cars_json["listings"]:  # car is a dictionary
            car_dict = {
                ch: car.get(dh, None)
                for ch, dh in zip(short_csv_header, dict_header)
            }
            if "dealer" in car:
                dealer = car["dealer"]
                car_dict["Seller Phone"] = dealer.get("phone", None)
                car_dict["Seller Name"] = dealer.get("name", None)
            if "build" in car:
                build = car["build"]
                car_dict["Transmission"] = build.get("transmission", None)
                car_dict["Drivetrain"] = build.get("drivetrain", None)
                car_dict["year"] = build.get("year", None)
                car_dict["make"] = build.get("make", None)
                car_dict["model"] = build.get("model", None)
            csv_rows.append(dict(car_dict))

    # write data to csv file
    csv_name = "{}-{}-{:d}-{:d}-{:s}.csv".format(maker, model, zipcode, radius,
                                                 condition)
    csv_name = os.path.join(directory, csv_name)
    write_cars_to_csv(csv_name, long_csv_header, csv_rows)

    # do some price analysis there
    df = load_csvfile(csv_name)
    maker, model = maker.upper(), model.upper()
    analyze_price(df, maker, model)