def get_format_product(url, product_model): try: html_str = get_html(url) products = [] products_container = html_str.find(attrs={"class": "mProList"}) if products_container: products_div = products_container.findAll(attrs={"class": "l"}) for product_div in products_div: product = {} product_name_a = product_div.find("a") product_index_href = product_name_a.attrs["href"] product_index_name = product_name_a.text product_descs_lis = product_div.findAll("li") for product_descs_li in product_descs_lis: key = product_descs_li.find("i").text value = product_descs_li.find("span").text product[key.strip()] = value.strip() product['product_index_href'] = product_index_href product['product_index_name'] = product_index_name product['_id'] = get_next_id(product_model) product['version'] = current_version products.append(product) return products except BaseException as e: print('错误:', e) except_handler(url, product_model) return []
def che360_engine_clean(): has_next = True for i in range(0, 100000): page_size = 10 coll = che360_engine_model_detail.find({}).sort( '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size) if not has_next: return has_next = False for data in coll: has_next = True engine = {} engine["desc"] = data.get("cell_model_name", "") engine["engine_model"] = data.get("发动机:", "") engine["series"] = data.get("系列:", "") engine["engine_supp"] = data.get("发动机厂商:", "") engine["cylinders_num"] = data.get("汽缸数:", "") engine["fuel_type"] = data.get("燃料种类:", "") engine["cc"] = data.get("排量:", "") engine["max_power"] = data.get("最大输出功率:", "") engine["max_hp"] = data.get("最大马力:", "") engine["engine_type"] = data.get("发动机形式:", "") engine["nick_name"] = data.get("nick_name", "") engine["_id"] = get_next_id("engine_model_detail") print(engine) print() engine_model_detail.insert_one(engine)
def get_format_product_detail(parent_collection, detail_collection, version): models_josn = truck_parts_db[parent_collection].find({ "version": version }, { "_id": 1, "product_index_href": 1, "version": 1 }).distinct('product_index_href') for model in models_josn: model_index_url = model # ["product_index_href"] model_param_url = model_index_url.replace("index", "param") url = "https://product.360che.com" + model_param_url print(url) html = get_html(url) product_detail_container = html.find(attrs={"class": "parameter-detail"}) product_detail_num = sum(1 for _ in product_detail_container.find( "tr", attrs={ "id": "fixed_top" }, ).findAll("th")) product_details = {} for i in range(1, product_detail_num): product_details[i] = {} rows = product_detail_container.findAll("tr") for row_data in rows: if row_data.get('id', "") == "fixed_top": for i in range(1, product_detail_num): cell_model_name = row_data.findAll("th")[i].find( 'a').string product_details[i]["cell_model_name"] = cell_model_name if row_data.get('class', "") == ["param-row"]: row_id = row_data.findAll("td")[0].text for i in range(1, product_detail_num): value_content_td = row_data.findAll("td") if value_content_td and len(value_content_td) > i: value_content = value_content_td[i] if value_content: value = value_content.find('div').text product_details[i][row_id] = value.strip() for product_details in product_details.values(): product_details["_id"] = get_next_id('filter_model_detail') product_details["parent_id"] = model product_details["version"] = version truck_parts_db[detail_collection].insert(product_details) query = {"product_index_href": model} newvalues = {"$set": {"version": version + 1}} update_ret = truck_parts_db[parent_collection].update_many(query, newvalues) print(update_ret.modified_count)
def __init__(self, name, contact_email, description, image_url, zip_code, latitude, longitude, instructions, address, accepts_opened, city, state): self.id = utils.get_next_id() self.name = name self.contact_email = contact_email self.description = description self.image_url = image_url self.zip_code = zip_code self.latitude = latitude self.longitude = longitude self.instructions = instructions self.address = address self.accepts_opened = accepts_opened self.city = city self.state = state
def eurocvbay_parts_clean(): has_next = True for i in range(0, 100000): page_size = 10 coll = che360_eurocvbay_parts_clean.find({}).sort( '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size) if not has_next: return has_next = False for data in coll: has_next = True filter = {} filter["_id"] = get_next_id("filter_model_detail") filter["desc"] = data.get("product_name", "") filter["replaces"] = data.get("replaces", "") print(filter) print()
def che360_truck_clean(): has_next = True for i in range(0, 100000): page_size = 10 coll = che360_truck_model_detail.find({}).sort( '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size) if not has_next: return has_next = False for data in coll: has_next = True truck = {} truck["desc"] = data.get("cell_model_name", "") truck["announcement_model"] = data.get("公告型号:", "") truck["drive_model"] = data.get("驱动形式:", "") truck["desc"] = data.get("轴距:", "") truck["engine_model"] = data.get("发动机:", "") truck["transmission_model"] = data.get("变速箱:", "") truck["length"] = data.get("车身长度:", "") truck["width"] = data.get("车身宽度:", "") truck["height"] = data.get("车身高度:", "") truck["weight"] = data.get("整车重量:", "") truck["capacity_kg"] = data.get("额定载重:", "") truck["tonnage_level"] = data.get("吨位级别:", "") truck["engine_brand"] = data.get("发动机品牌:", "") truck["cylinders_num"] = data.get("汽缸数:", "") truck["fuel_type"] = data.get("燃料种类:", "") truck["cc"] = data.get("排量:", "") truck["max_hp"] = data.get("最大马力:", "") truck["max_power"] = data.get("最大输出功率:", "") truck["engine_type"] = data.get("发动机形式:", "") truck["transmission_brand"] = data.get("变速箱品牌:", "") truck["forward_gears_num"] = data.get("前进挡位:", "") truck["reverse_gears_num"] = data.get("倒挡数:", "") truck["tyre_type"] = data.get("轮胎规格:", "") truck["tyre_num"] = data.get("轮胎数:", "") truck["tyre_num"] = data.get("弹簧片数:", "") truck["brand"] = data.get("brand_name", "") truck["model"] = data.get("model_name", "") truck["_id"] = get_next_id("truck_model_detail") print(truck) print() truck_model_detail.insert_one(truck)
def cn357_truck_clean(): has_next = True for i in range(0, 10000): page_size = 10 coll = cn357_truck_model_detail.find({}).sort( '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size) if not has_next: return has_next = False for data in coll: has_next = True truck = {} truck["desc"] = data.get("product", "") truck["announcement_model"] = data.get("整车公告:", "") truck["brand"] = data.get("品牌:", "") truck["series"] = data.get("车系:", "") truck["purpose"] = data.get("用途:", "") truck["drive_model"] = data.get("驱动方式:", "") truck["tonnage_level"] = data.get("吨位级别:", "") truck["manufacturer"] = data.get("生产厂家:", "") truck["origin"] = data.get("整车产地:", "") truck["weight"] = data.get("整车重量:", "") truck["length"] = data.get("整车长度:", "") truck["width"] = data.get("整车宽度:", "") truck["height"] = data.get("整车高度:", "") truck["member"] = data.get("准乘人数:", "") truck["engine_model"] = data.get("发动机型号:", "") truck["engine_type"] = data.get("发动机形式:", "") truck["max_power"] = data.get("最大功率:", "") truck["max_hp"] = data.get("最大马力:", "") truck["cc"] = data.get("排量:", "") truck["fuel_type"] = data.get("燃油种类:", "") truck["transmission_model"] = data.get("变速箱型号:", "") truck["forward_gears_num"] = data.get("前进档位数:", "") truck["reverse_gears_num"] = data.get("倒档档位数:", "") truck["chassis_models"] = data.get("底盘型号:", "") truck["plate_spring_num"] = data.get("板簧片数:", "") truck["tyre_num"] = data.get("轮胎数量:", "") truck["tyre_type"] = data.get("轮胎规格:", "") truck["_id"] = get_next_id("truck_model_detail") print(truck) print() truck_model_detail.insert_one(truck)
def cn357_filter_clean(): has_next = True for i in range(0, 100000): page_size = 10 coll = cn357_filter_model_detail.find({}).sort( '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size) if not has_next: return has_next = False for data in coll: has_next = True filter = {} filter["brand"] = data.get("品牌:", "").split(":")[1] models_str = data.get("型号:", "").split(":")[1] models = models_str.split("/") for model in models: filter["model"] = model filter["_id"] = get_next_id("filter_model_detail") print(filter) filter_model_detail.insert_one(filter)
def get_format_product(url, product_model): html_str = get_html(url) products = [] product_list_ul = html_str.findAll(attrs={"class": "products-list"})[0] products_li = product_list_ul.findAll("li") for product_li in products_li: product = {} product_name_a = product_li.find("h5").find("a") product_index_href = product_name_a.attrs["href"] product_index_name = product_name_a.text product_span = product_li.findAll( attrs={"class": "content"})[0].find("span") product_items = product_span.findAll("p") for product_item in product_items: item_type = product_item.find("span") product[item_type.next_element.strip( )] = item_type.next_sibling.strip() product['product_index_href'] = product_index_href product['product_index_name'] = product_index_name product['_id'] = get_next_id(product_model) product['version'] = current_version products.append(product) return products
def che360_filter_clean(): has_next = True for i in range(0, 100000): page_size = 10 coll = che360_air_filter_detail.find({}).sort( '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size) if not has_next: return has_next = False for data in coll: has_next = True filter = {} filter["_id"] = get_next_id("filter_model_detail") filter["desc"] = data.get("cell_model_name", "") filter["type"] = data.get("类型:", "") filter["model"] = data.get("滤清器型号:", "") filter["diameter"] = data.get("直径:", "") filter["height"] = data.get("高度:", "") filter["weight"] = data.get("重量:", "") filter["leakproof_type"] = data.get("密封结构:", "") filter["locating_hole_diameter"] = data.get("定位孔直径:", "") filter["size_model"] = data.get("尺寸型号:", "") filter["market_model"] = data.get("市场型号:", "") filter["flux"] = data.get("流量:", "") filter["filter_level"] = data.get("过滤级别:", "") filter["thread_size"] = data.get("螺纹尺寸:", "") filter["adaptable_truck_models"] = data.get("适用车型:", "").split("/") filter["adaptable_enign_models"] = data.get("适用机型:", "").split("/") filter["adaptable_truck_types"] = data.get("适用车类型:", "").split("/") filter["adaptable_engine_types"] = data.get("适用发动机类型:", "").split("/") filter["alternative_parts_model"] = data.get("可替换滤清器型号:", "") filter["nick_name"] = data.get("可替换滤清器零件号:", "") print(filter) print() filter_model_detail.insert_one(filter)
def create_session(self): self.session_start_time = int(time.time()) self.session_id = get_next_id() self.session_file = os.path.join(self.upload_dir, "{}.mp3".format(self.session_id)) self.metadata_file = os.path.join(self.upload_dir, "{}.json".format(self.session_id)) logging.info("created session, session id: %d", self.session_id)
def test_get_next_id(self): self.assertEqual(1, get_next_id()) self.assertEqual(2, get_next_id())
def create(self, user_id, text): post_id = get_next_id() p = db.Post(post_id, user_id, text) p.save() return p.post_id
def create_session(self): self.session_start_time = int(time.time()) self.session_id = get_next_id() self.session_file = os.path.join(self.upload_dir, '{}.mp3'.format(self.session_id)) self.metadata_file = os.path.join(self.upload_dir, '{}.json'.format(self.session_id)) logging.info('created session, session id: %d', self.session_id)