def __init__(self): # handlers = [ # ################用户页面############# # (r'/people/([^/]+)', empty), # ################愿望相关############## # (r'/wish/pool', empty), # (r'/wish/([^/]+)', empty), # (r'/wish/([^/]+)/add', empty), # (r'/wish/([^/]+)/edit', empty), # (r'/wish/([^/]+)/delete', empty), # (r'/wish/([^/]+)/reply', empty), # ###############设置系统############### # (r'/settings/profile',empty), # (r'/settings/email',empty), # (r'/settings/profile',empty), # (r'/settings/acount',empty), # ################通知系统############## # (r'/notifications',empty), # (r'/notifications/all',empty) # ] routed_handlers = route.get_routes() routed_handlers.append( tornado.web.url(r"/static/(.*)", t.StaticFileHandler, app_config['static_path'])) routed_handlers.append( tornado.web.url('/.*?', base.PageNotFoundHandler, name='page_not_found')) settings = app_config tornado.web.Application.__init__(self, routed_handlers, **settings) self.session = ConnectDB() self.r = ConnectRDB()
def __init__(self, database, table): db = ConnectDB(database) # db.create_new_table(table) missing = open("missing", "a+") for word in open('words.list'): word = word.rstrip() print '正在添加单词…… ' , word word_xml = GetWord() word_dic = word_xml.get_word_xml(word) if word_dic == None: print '%s Not Found' % word missing.write(word + '\n') continue else: #print word_dic db.insert_a_dict(word_dic)
def __init__(self, database, table): db = ConnectDB(database) # db.create_new_table(table) missing = open("missing", "a+") for word in open('words.list'): word = word.rstrip() print '正在添加单词…… ', word word_xml = GetWord() word_dic = word_xml.get_word_xml(word) if word_dic == None: print '%s Not Found' % word missing.write(word + '\n') continue else: #print word_dic db.insert_a_dict(word_dic)
def SaveData(table_name="", brand="", series="", conf="", status="", URL_="", index=""): conf = json.loads(conf) for (k, v) in conf.items(): spaceid = k models = v[u"车型名称"] if models == '-': continue mth = re.compile(r'(.*)(20\d\d)(.*)') y = re.search(mth, models) if y: year = int(y.group(2)) else: year = 0 guide_price = v[u"厂商指导价(元)"] #f=re.compile(r'(\d+.\d+)') #p=re.search(f,guide_price) #guide_price=p.group() emission_standard = v[u"环保标准"] structure = v[u"车身结构"] level = v[u"级别"] manufacturer = v[u"厂商"] json_text = json.dumps(v, encoding='utf-8', ensure_ascii=False) db = ConnectDB() n = db.select(table_name="spider_json", field="spaceid", value=spaceid) if n != 0: logger.info("spaceid: %s exists " % spaceid) continue db.insert(table_name=table_name, spaceid=spaceid, brand=brand, series=series, models=models, guide_price=guide_price, level=level, emission_standard=emission_standard, structure=structure, status=status, manufacturer=manufacturer, year=year, index=index, json_text=json_text, URL_=URL_) db.dbclose()
def SaveData(table_name="",brand="",series="",conf="",status="",URL_="",index=""): conf=json.loads(conf) for (k,v) in conf.items(): spaceid = k models = v[u"车型名称"] if models == '-': continue mth=re.compile(r'(.*)(20\d\d)(.*)') y=re.search(mth,models) if y: year = int(y.group(2)) else: year = 0 guide_price =v[u"厂商指导价(元)"] #f=re.compile(r'(\d+.\d+)') #p=re.search(f,guide_price) #guide_price=p.group() emission_standard=v[u"环保标准"] structure=v[u"车身结构"] level=v[u"级别"] manufacturer=v[u"厂商"] json_text=json.dumps(v,encoding='utf-8', ensure_ascii=False) db=ConnectDB() n = db.select(table_name="spider_json",field="spaceid",value=spaceid) if n != 0: logger.info("spaceid: %s exists " % spaceid ) continue db.insert(table_name=table_name, spaceid=spaceid, brand=brand, series=series, models=models, guide_price=guide_price, level=level, emission_standard=emission_standard, structure=structure, status=status , manufacturer=manufacturer, year=year, index=index, json_text=json_text, URL_=URL_) db.dbclose()
def giveFirstLatter(first_latter, item): fname = first_latter + ".txt" path = 'D:/pyLearning/spider-master/spider-master/spider/tmp/' + fname #添加类型的记录path 需修改 fobj = open(path, 'r+') fileList = fobj.read().splitlines() fobj.close() column_tit = re.compile(r'column_tit') column_tit = item.find_all("div", attrs={"class": column_tit}) width = re.compile(r'848px') width = item.find_all("td", attrs={"width": width}) for (brand, width) in zip(column_tit, width): print "--" + brand.a.span.text.strip() brands = brand.a.span.text.strip() logger.info(u"--品牌" + brand.a.span.text + " start!...") item_list = re.compile(r'item_list') item_list = width.find_all("div", attrs={"class": item_list}) for i in item_list: print "---" + i.a.get("href") text = i.a.get("href").strip() if text not in fileList: #如果没在文本中找到的话,插入href 并写入数据库 logger.info(u"---车型" + i.a.get("href") + " start!...") href = url2 + i.a.get("href") + "config.htm" logger.info(u"---链接地址 " + href) obj = GetObj(href) html = obj.gethtml() while (True): if not html is None: coding = obj.getcodeing(html) #获取编码类型 soup = BeautifulSoup(html, 'html5lib', from_encoding=coding) base_title = re.compile(r'base_title') base_title = soup.find_all("tr", attrs={"id": base_title}) soup2 = base_title[0] #找到base_title的DOM col = re.compile(r'col') col = soup2.find_all("td", attrs={"scope": col}) #找到td for i in col: model = i.a.text.strip() #获取到model logger.info("model " + model) modid = i.get("id") mod = re.compile(r'(mod_)(.*)') carid = re.search(mod, modid) if hasattr(carid, 'group'): carid = carid.group(2) string = "bname_" + carid db = ConnectDB() n = db.select(table_name="carInfo1", field="vechiclesID", value=carid) if n != 0: logger.info("vechiclesID: %s exists " % carid) continue series = re.compile(string) series = soup.find("td", attrs={"id": series}) if not series is None: #获取到series series = series.a.text.strip() logger.info("series " + string) else: logger.error(string + "not found!!!!") series = "-" string = "type_name_" + carid #获取到carType carType = re.compile(string) carType = soup.find("td", attrs={"id": carType}) if not carType is None: carType = carType.a.text.strip() logger.info("carType " + carType) else: logger.error(string + "not found!!!!") series = "-" string = "m_newseat_" + carid #获取到peopleNum peopleNum = re.compile(string) peopleNum = soup.find("td", attrs={"id": peopleNum}) if not peopleNum is None: peopleNum = peopleNum.text.strip() logger.info("peopleNum " + peopleNum) else: logger.error(string + "not found!!!!") peopleNum = "-" string = "syear_" + carid #获取到marketTime marketTime = re.compile(string) marketTime = soup.find( "td", attrs={"id": marketTime}) if not marketTime is None: marketTime = marketTime.text.strip() logger.info("marketTime " + marketTime) else: logger.error(string + "not found!!!!") marketTime = "-" string = "m_disl_working_mpower_" + carid #获取到engine engine = re.compile(string) engine = soup.find("td", attrs={"id": engine}) if not engine is None: engine = engine.text.strip() logger.info("engine " + engine) else: logger.error(string + "not found!!!") engine = "-" string = "m_mdisl_" + carid displacement = re.compile(string) displacement = soup.find( "td", attrs={"id": displacement}) if not displacement is None: displacement = displacement.text.strip() logger.info("displacement " + displacement) else: logger.error(string + "not found!!!") displacement = "-" db.insertTyre2("carInfo1", carid, brands, series, carType, peopleNum, marketTime, engine, displacement, first_latter, model) db.dbclose() else: logger.error(modid + u" 该处无法获得汽车id!") break break else: time.sleep(360) html = obj.gethtml() fobj = open(path, 'a+') print u'写入' + first_latter + ' ' + text fobj.write(text + '\n') fobj.flush() fobj.close() else: logger.info(u"跳过" + i.a.get("href")) print u"跳过" + i.a.get("href") continue #否则进行下一条判断 print first_latter + u"已完成!!!" logger.info(first_latter + u"已完成!!!")
# -*- coding: utf-8 import threading import re import time import sys import json from bs4 import BeautifulSoup from getobj import GetObj from db import ConnectDB from common import * import urllib2 import random import sys import cookielib db = ConnectDB() url = "http://newcar.xcar.com.cn/price/" url2 = "http://newcar.xcar.com.cn" def giveFirstLatter(first_latter, item): fname = first_latter + ".txt" path = 'D:/pyLearning/spider-master/spider-master/spider/tmp/' + fname #添加类型的记录path 需修改 fobj = open(path, 'r+') fileList = fobj.read().splitlines() fobj.close() column_tit = re.compile(r'column_tit') column_tit = item.find_all("div", attrs={"class": column_tit}) width = re.compile(r'848px') width = item.find_all("td", attrs={"width": width}) for (brand, width) in zip(column_tit, width): print "--" + brand.a.span.text.strip()
def threadSpider(brand,url2): #获取到brand fname = brand+".txt" path ="../file/"+fname fobj = open(path,'a+') fileList = fobj.read().splitlines() print fileList fobj.close() obj = GetObj(url2) html = obj.gethtml() coding = obj.getcodeing(html) soup = BeautifulSoup(html,"html5lib",from_encoding=coding) clearfix = re.compile(r'list clearfix') clearfix = soup.find_all("div",attrs={"class":clearfix}) figure = clearfix[1].find_all("a") for item in figure: flow = item.text.strip() streak = re.sub(r'\([^\)]*\)',"",flow) #获取到花纹 logger.info("streak:" + streak) href = item.get("href") newUrl = url + href obj = GetObj(newUrl) html = obj.gethtml() coding = obj.getcodeing(html); soup = BeautifulSoup(html,"html5lib",from_encoding=coding) clearfix = re.compile(r'products clearfix') clearfix = soup.find("div",attrs={"class":clearfix}) clearfix2 = re.compile(r'product clearfix') clearfix2 = clearfix.find_all("div",attrs={"class":clearfix2}) for i in clearfix2: name = i.a.get("title") #获取到轮胎name print name logger.info("name:" + name) href = i.a.get("href") print href xx= href.split("/") xh = xx[2].split(".") tyreid = xh[0] if href not in fileList: fobj = open(path,'a+') print u'写入'+href fobj.write(href+'\n') fobj.flush() fobj.close() db=ConnectDB() n = db.select(table_name="tyreinfo",field="tyreID",value=tyreid) if n != 0: logger.info("tyreID: %s exists " % tyreid ) print tyreid + u"存在" continue tyreUrl = url + href tyreObj = GetObj(tyreUrl) tyreHtml = tyreObj.gethtml() tyreSoup = BeautifulSoup(tyreHtml,"html5lib",from_encoding=coding) basic = re.compile(r'basic free') basic = tyreSoup.find("div",attrs={"class":basic}) fl = re.compile(r'fl') fl = basic.find("span",attrs={"class":fl}) standard = fl.text.strip() #获取到standard logger.info("standard:" + standard) dl = basic.find_all("dl") loaded = dl[4].dd.text.strip() #获取到load #loaded = re.sub(r'\([^\)]*\)',"",loaded) logger.info("load:" + loaded) speed = dl[5].dd.text.strip() #获取到speed #speed = re.sub(r'\([^\)]*\)',"",speed) logger.info("speed:"+speed) place = dl[6].dd.text.strip() logger.info("place:"+place) pi3c = re.compile(r'clearfix pi3c') pi3c = basic.find("div",attrs={"class":pi3c}) pi3c = pi3c.find_all("em") wearproof = pi3c[0].text.strip() #获取到wearproof #wearproof = "" logger.info("wearproof:"+wearproof) traction = pi3c[1].text.strip() #获取到traction logger.info("traction:"+traction) highTemperature = pi3c[2].text.strip() #获取到highTemperature logger.info("highTemperature:"+highTemperature) db.insert("tyreinfo",tyreid,brand,streak,name,standard,loaded,speed,wearproof,traction,highTemperature) db.dbclose() else: logger.info(u"跳过"+href) print(u"跳过"+href) continue logger.info("finish:" + brand)
def SaveDataToCarInfo(table_name="", brand="", series="", type_name="", conf="", index=""): conf = json.loads(conf) for (k, v) in conf.items(): spaceid = k if (v.has_key("0")): year = v["0"] x = re.compile(r'<(.[^>]*)>') year = re.sub(x, "", year) #x = re.compile(r'') year = year.split('.')[0] #获取的年份 print year else: year = "error" if (v.has_key("284")): peopleNum = v["284"] x = re.compile(r'<(.[^>]*)>') peopleNum = re.sub(x, "", peopleNum) print peopleNum else: peopleNum = "error" if (v.has_key("555")): engine = v["555"] x = re.compile(r'<(.[^>]*)>') engine = re.sub(x, "", engine) print engine else: engine = "error" if (v.has_key("287")): displacement = v["287"] x = re.compile(r'<(.[^>]*)>') displacement = re.sub(x, "", displacement) print displacement else: displacement = "error" if type_name == u"微型车": type = 0 elif type_name == u"小型车": type = 1 elif type_name == u"紧凑型车": type = 2 elif type_name == u"中型车": type = 3 elif type_name == u"中大型车": type = 4 elif type_name == u"大型车": type = 5 elif type_name == u"SUV": type = 6 elif type_name == u"MPV": type = 7 elif type_name == u"跑车": type = 8 elif type_name == u"皮卡": type = 9 elif type_name == u"微面": type = 10 elif type_name == u"轻客": type = 11 db = ConnectDB() n = db.select(table_name="carinfo", field="vechiclesID", value=spaceid) if n != 0: logger.info("spaceid: %s exists " % spaceid) continue db.insertTyre(table_name, spaceid, brand, series, type, peopleNum, year, engine, displacement, index) db.dbclose()
def thrad(type_name, url2): #logger.info("name:%s url: %s" % (type_name,url2)) url2 = url2.encode("utf-8") #用utf-8编码 obj = GetObj(url2) #得到一个爬虫对象 html = obj.gethtml() #获取页面 coding = obj.getcodeing(html) #获取编码类型 soup = BeautifulSoup(html, 'html5lib', from_encoding=coding) #print "----------------------------------------------" #print type_name #print "----------------------------------------------" logger.info("start %s...." % type_name) content = soup.find("div", attrs={"class": ["tab-content-item", "current"]}) #find返回的不是列表是文本 soup = BeautifulSoup(str(content), 'html5lib') #再返回一个soup对象 index = soup.find_all('span', attrs={'class': "font-letter"}) #找到字典顺序 box = soup.find_all( 'div', attrs={'class': ["uibox-con", "rank-list", "rank-list-pic"]}) for (index, box) in zip(index, box): #for item in box: #获取字母分割的DIV 同时获取字母索引 index = index.text.strip() #默认删除空白符 brand_soup = BeautifulSoup(str(box), 'html5lib') #返回一个soup对象 brand_html = brand_soup.find_all('dl') for brand_item in brand_html: #品牌名称 brand = brand_item.dt.text.strip() #品牌 series_html = brand_item.dd series_soup = BeautifulSoup(str(series_html), 'html5lib') #根据<dd>标签找到子目录的soup manufacturer_name = series_soup.find_all('div', attrs={"class": "h3-tit"}) #品牌名称 ul = series_soup.find_all('ul', attrs={"class": "rank-list-ul"}) for (manufacturer, ul_tag) in zip(manufacturer_name, ul): #获取厂商名称 manufacturer = manufacturer.text logger.info("start %s...." % manufacturer) logger.debug(ul_tag) soup = BeautifulSoup(str(ul_tag), 'html5lib') w = re.compile(r's\d+') litag = soup.find_all('li', id=w) for item in litag: #获取车系名称 series = item.h4.text db = ConnectDB() #建立数据库连接 n = db.select(table_name="carinfo", field="series", value=series) #查询 db.dbclose() #关闭连接 if n != 0: logger.info("%s %s %s exists " % (type_name, brand, series)) #如果找到,说明存在该条记录 continue href = item.h4.a.get("href") #如果没找到,则取得他的链接地址 price = item.div.text #记录价格 url_id = href.split("/")[3] #记录url_id #print "●●%s %s %s" % (series,price,url_id) #拼接在售车辆的配置页面URL sale_conf_url = "http://car.autohome.com.cn/config/series/%s.html" % url_id #拼接停售车辆的配置页面URL stop_sale_conf_url = "http://www.autohome.com.cn/%s/sale.html" % url_id url_dic = { "sale_conf_url": sale_conf_url, "stop_sale_conf_url": stop_sale_conf_url } #threads=[] for (url_name, sale_url) in url_dic.items(): #在售 if url_name == "sale_conf_url": status = u"在售" #print sale_url #def get_josn(): log_mess = "%s:%s %s %s %s %s %s %s" % ( status, type_name, index, brand, manufacturer, series, price, url_id) obj = GetObj(sale_url) conf = obj.getconf() if conf: #print conf logger.info(log_mess) """SaveData(table_name="spider_json", #存储到数据库 brand=brand, series=series, conf=conf, status=status, index=index, URL_=sale_conf_url, level=type_name, manufacturer = manufacturer)""" SaveDataToCarInfo("carinfo", brand, series, type_name, conf, index) else: mess = u"没有找到相关配置" logger.info("%s %s" % (log_mess, mess)) #print mess else: #停售 #def get_stop_conf(): status = u"停售" obj = GetObj(sale_url) html = obj.gethtml() coding = obj.getcodeing(html) soup = BeautifulSoup(html, 'html5lib', from_encoding=coding) filter_html = soup.find_all( 'div', attrs={"class": "models_nav"}) log_mess = "%s:%s %s %s %s %s %s %s" % ( status, type_name, index, brand, manufacturer, series, price, url_id) if filter_html: for item in filter_html: href = item.find('a', text=u'参数配置').get("href") stop_sale_conf_url_1 = url + href obj = GetObj(stop_sale_conf_url_1) conf = obj.getconf() if conf: #print conf logger.info("%s %s" % (log_mess, href)) """SaveData(table_name="spider_json", brand=brand, series=series, conf=conf, status=status, index=index, level=type_name, URL_=stop_sale_conf_url_1)""" #print u"在售品牌中的停售车辆" SaveDataToCarInfo( "carinfo", brand, series, type_name, conf, index) else: mess = u"没有找到相关配置" logger.info("%s %s %s" % (log_mess, mess, href)) #print mess else: mess = u"没有找到相关配置" logger.info("%s %s" % (log_mess, mess))
def SaveData(table_name="", brand="", series="", conf="", status="", URL_="", index="", level="", manufacturer=""): conf = json.loads(conf) #转化为python对象 for (k, v) in conf.items(): spaceid = k name = v["567"] x = re.compile(r'span>(.*?)<span') if name == '-': continue name1 = re.search(x, name) if hasattr(name1, "group"): name1 = name1.group(1) else: x = re.compile(r'</span>(.*?)$') name1 = re.search(x, name) if hasattr(name1, "group"): name1 = name1.group(1) else: x = re.compile(r'^(.*?)<span') name1 = re.search(x, name) print name1 year = v["0"] x = re.compile(r'(</span>|>|^)(.*?)($|<|<span)') year = re.search(x, year) year = year.group(2) guide_price = v["219"] x = re.compile( r'(</span>|>|^)([1-9]\d*.\d*|0.\d*[1-9]\d*)($|<|</span>)') #取小数 temp = re.search(x, guide_price) #在数组中搜索 if hasattr(temp, "group"): guide_price = temp.group(2) else: guide_price = 0 print guide_price structure = v["281"] structure = re.search(re.compile(r'(</span>|>|^)(.*?)($|<|<span)'), structure).group(2) print structure emission_standard = v["1072"] emission_standard = re.search( re.compile(r'(</span>|>|^)(.*?)($|<|<span)'), emission_standard).group(2) print emission_standard json_text = json.dumps(v, encoding='utf-8', ensure_ascii=False) db = ConnectDB() n = db.select(table_name="spider_json", field="spaceid", value=spaceid) if n != 0: logger.info("spaceid: %s exists " % spaceid) continue db.insert(table_name=table_name, spaceid=spaceid, brand=brand, series=series, models=name1, guide_price=guide_price, level=level, emission_standard=emission_standard, structure=structure, status=status, manufacturer=manufacturer, year=year, index=index, json_text="", URL_=URL_) db.dbclose()
peopleNum = v["284"] x = re.compile(r'<(.[^>]*)>') peopleNum = re.sub(x, "", peopleNum) print peopleNum engine = v["555"] x = re.compile(r'<(.[^>]*)>') engine = re.sub(x, "", engine) print engine displacement = v["287"] x = re.compile(r'<(.[^>]*)>') displacement = re.sub(x, "", displacement) print displacement db = ConnectDB() db.insertTyre("carinfo", spaceid, "", "", 1, peopleNum, year, engine, displacement, "S") db.dbclose() """for (k,v) in conf_json.items(): spaceid = k name = v["<span class='hs_kw0_configpl'></span><span class='hs_kw1_configpl'></span>"] x = re.compile(r'span>(.*?)<span') if name == '-': continue name1 = re.search(x,name) if hasattr(name1,"group"): name1 = name1.group(1) else: x = re.compile(r'</span>(.*?)$') name1 = re.search(x,name)
def thrad(type_name,url2): logger.info("name:%s url: %s" % (type_name,url2)) url2=url2.encode("utf-8") obj = GetObj(url2) html=obj.gethtml() coding=obj.getcodeing(html) soup=BeautifulSoup(html,'html5lib',from_encoding=coding) #print "----------------------------------------------" #print type_name #print "----------------------------------------------" logger.info("start %s...." % type_name) content=soup.find("div",attrs={"class":["tab-content-item","current"]}) soup=BeautifulSoup(str(content),'html5lib') index = soup.find_all('span',attrs={'class':"font-letter"}) box = soup.find_all('div',attrs={'class':["uibox-con", "rank-list","rank-list-pic"]}) for (index,box) in zip(index,box): #for item in box: #获取字母分割的DIV 同时获取字母索引 index = index.text.strip() brand_soup = BeautifulSoup(str(box),'html5lib') brand_html=brand_soup.find_all('dl') for brand_item in brand_html: #品牌名称 brand = brand_item.dt.text.strip() series_html = brand_item.dd series_soup=BeautifulSoup(str(series_html),'html5lib') manufacturer_name=series_soup.find_all('div',attrs={"class":"h3-tit"}) ul=series_soup.find_all('ul',attrs={"class":"rank-list-ul"}) for (manufacturer,ul_tag) in zip(manufacturer_name,ul): #获取厂商名称 manufacturer=manufacturer.text logger.info("start %s...." % manufacturer ) logger.debug(ul_tag) soup=BeautifulSoup(str(ul_tag),'html5lib') w=re.compile(r's\d+') litag=soup.find_all('li',id=w) for item in litag: #获取车系名称 series=item.h4.text db=ConnectDB() n=db.select(table_name="spider_json",field="series",value=series) db.dbclose() if n != 0: logger.info("%s %s %s exists " % (type_name,brand, series) ) continue href=item.h4.a.get("href") price=item.div.text url_id=href.split("/")[3] #print "●●%s %s %s" % (series,price,url_id) #拼接在售车辆的配置页面URL sale_conf_url="http://car.autohome.com.cn/config/series/%s.html" % url_id #拼接停售车辆的配置页面URL stop_sale_conf_url="http://www.autohome.com.cn/%s/sale.html" % url_id url_dic={"sale_conf_url":sale_conf_url,"stop_sale_conf_url":stop_sale_conf_url} #threads=[] for (url_name,sale_url) in url_dic.items(): #在售 if url_name == "sale_conf_url": status=u"在售" #print sale_url #def get_josn(): log_mess="%s:%s %s %s %s %s %s %s" % (status,type_name,index,brand,manufacturer,series,price,url_id) obj=GetObj(sale_url) conf=obj.getconf() if conf: #print conf logger.info(log_mess) SaveData(table_name="spider_json", brand=brand, series=series, conf=conf, status=status, index=index, URL_=sale_conf_url) else: mess= u"没有找到相关配置" logger.info("%s %s" % (log_mess,mess)) #print mess else: #停售 #def get_stop_conf(): status=u"停售" obj=GetObj(sale_url) html=obj.gethtml() coding=obj.getcodeing(html) soup=BeautifulSoup(html,'html5lib',from_encoding=coding) filter_html=soup.find_all('div',attrs={"class":"models_nav"}) log_mess="%s:%s %s %s %s %s %s %s" % (status,type_name,index,brand,manufacturer,series,price,url_id) if filter_html: for item in filter_html: href=item.find('a',text=u'参数配置').get("href") stop_sale_conf_url_1=url+href obj=GetObj(stop_sale_conf_url_1) conf=obj.getconf() if conf: #print conf logger.info("%s %s" % (log_mess,href)) SaveData(table_name="spider_json", brand=brand, series=series, conf=conf, status=status, index=index, URL_=stop_sale_conf_url_1) #print u"在售品牌中的停售车辆" else: mess= u"没有找到相关配置" logger.info("%s %s %s" % (log_mess,mess,href)) #print mess else: mess= u"没有找到相关配置" logger.info("%s %s" % (log_mess,mess))