def parse_final(self, response): #我去,这个Final_Xpath竟然只会传递一次......你要是动了这个Final_Xpath,那就无法修改回来了 Final_Xpath = response.meta.get('Final_Xpath', None) Some_Info = response.meta.get('Some_Info', None) if 'All_Xpath' not in Final_Xpath.keys(): item = MovieSpiderItem() l = ItemLoader(item=item, response=response) for key in Final_Xpath.keys(): item.fields[key] = Field() try: #itemloader在add_xxx方法找不到值的时候,会自动忽略这个字段,可是我不想忽略它,这时候需要将其置为空("") if map( lambda x: 1 if x else 0, map( lambda x: response.xpath(x).extract() if x != "/" else "", Final_Xpath[key])) in [[ 0, 0 ], [0]] and key != "site_name": map(lambda x: l.add_value(key, ""), ["just_one"]) elif key == "site_name": map(lambda x: l.add_value(key, x), Final_Xpath[key]) else: map( lambda x: l.add_xpath(key, x) if response.xpath(x).extract() != [] else "", Final_Xpath[key]) except Exception, e: print Exception, ":", e if Some_Info: for key in Some_Info.keys(): item.fields[key] = Field() l.add_value(key, Some_Info[key]) yield l.load_item()
except Exception, e: print Exception, ":", e if Some_Info: for key in Some_Info.keys(): item.fields[key] = Field() l.add_value(key, Some_Info[key]) yield l.load_item() else: #感觉这里不能用itemloader的add_xxx方法了,因为要先找到一个页面所有的含有目标item的块,再在每个块里面提取出单个item,itemloader的话是一次性直接全取出,add_xpath不能再细分了;;打算用add_value方法 my_Final_Xpath = Final_Xpath.copy() All_Xpath = my_Final_Xpath['All_Xpath'].copy() del my_Final_Xpath['All_Xpath'] all_xpath = All_Xpath['all_xpath'] del All_Xpath['all_xpath'] for i in response.xpath(all_xpath[0]): item = MovieSpiderItem() l = ItemLoader(item=item, response=response) #把All_Xpath中的数据提取出来 for key in All_Xpath.keys(): item.fields[key] = Field() try: #itemloader在add_xxx方法找不到值的时候,会自动忽略这个字段,可是我不想忽略它,这时候需要将其置为空("") if map( lambda x: 1 if x else 0, map( lambda x: response.xpath(x).extract() if x != "/" else "", Final_Xpath[key])) in [[0, 0], [0]]: map(lambda x: l.add_value(key, ""), ["just_one"]) else: map(