data["油耗"] = fuel_consum car_year = html.xpath( '//ul[@class="cd_m_info_desc"]/li[1]/span[1]/text()') data["年份"] = car_year return data def get_pro_data(self, html): pass # return data #以下爬取的是车辆的质量检测报告项目 if __name__ == '__main__': r = redis_or.Redis_Data() h = master.Master_Spider("shenzhen") s = Slave_Spisder() f = open("shenzhen_new_data", "a", encoding="utf-8") for i in range(2000): try: data = r.pop_data("shenzhen_youxin") html = h.get_html("http://" + data) # time.sleep(1.2) detai = s.parse_detail_data(html) print(detai) f.write(str(detai) + "\n") except: # f.close() print("异常的连接", data) continue
import master import redis_or import slave # a=master.Master_Spider("shenzhen") # html = a.get_html("https://www.xin.com/beijing/benchi/i3/") # urls=a.get_detail_url(html) q = redis_or.Redis_Data() # for url in urls: # q.set_into_data("test_car_urls",url) url = q.pop_data("test_car_urls") html = master.Master_Spider("shenzhen").get_html(url) a = slave.Slave_Spisder() data = a.parse_detail_data(html) print(data)
import master import redis_or import slave from lxml import etree # a=master.Master_Spider("shenzhen") # html = a.get_html("https://www.xin.com/beijing/benchi/i3/") # urls=a.get_detail_url(html) q = redis_or.Redis_Data() # for url in urls: # q.set_into_data("test_car_urls",url) for i in range(1, 11): url = q.pop_data("test_car_urls") # print(url) html = master.Master_Spider("shenzhen").get_html("https://" + url) print(type(html)) a = slave.Slave_Spisder() data = a.parse_detail_data(html) print(data)
data["变速箱"] = transmiss fuel_mode = html.xpath('//div[@class="cd_m_i_pz"]/dl[3]/dd[4]/span[2]/text()') data["燃油类型"] = fuel_mode drive_mode = html.xpath('//div[@class="cd_m_i_pz"]/dl[3]/dd[5]/span[2]/text()') data["驱动形式"] = drive_mode fuel_consum = html.xpath('//div[@class="cd_m_i_pz"]/dl[3]/dd[6]/span[2]/text()') data["油耗"] = fuel_consum items = html.xpath('/html/body/div[2]/div[13]/div/div[4]/div[1]/dl[1]/dd[1]/span[2]') print(items) return data #以下爬取的是车辆的质量检测报告项目 if __name__ == '__main__': html = master.Master_Spider("shenzhen").get_html("'https://www.xin.com/yrek41mkmg/che69941841.html?channel=a49b117c44837d110753e751863f53") a = Slave_Spisder() data = a.parse_detail_data(html) print(data) # fir = mongo_or.Mongo_Data() # db = fir.create_database("XIAOMI") # coll = fir.create_collection("LAO_JAY",db) # fir.insert_data(data,coll)