Пример #1
0
    def parse_final(self, response):
        #我去,这个Final_Xpath竟然只会传递一次......你要是动了这个Final_Xpath,那就无法修改回来了
        Final_Xpath = response.meta.get('Final_Xpath', None)
        Some_Info = response.meta.get('Some_Info', None)

        if 'All_Xpath' not in Final_Xpath.keys():
            item = MovieSpiderItem()
            l = ItemLoader(item=item, response=response)
            for key in Final_Xpath.keys():
                item.fields[key] = Field()
                try:
                    #itemloader在add_xxx方法找不到值的时候,会自动忽略这个字段,可是我不想忽略它,这时候需要将其置为空("")
                    if map(
                            lambda x: 1 if x else 0,
                            map(
                                lambda x: response.xpath(x).extract()
                                if x != "/" else "", Final_Xpath[key])) in [[
                                    0, 0
                                ], [0]] and key != "site_name":
                        map(lambda x: l.add_value(key, ""), ["just_one"])
                    elif key == "site_name":
                        map(lambda x: l.add_value(key, x), Final_Xpath[key])
                    else:
                        map(
                            lambda x: l.add_xpath(key, x)
                            if response.xpath(x).extract() != [] else "",
                            Final_Xpath[key])
                except Exception, e:
                    print Exception, ":", e
            if Some_Info:
                for key in Some_Info.keys():
                    item.fields[key] = Field()
                    l.add_value(key, Some_Info[key])
            yield l.load_item()
Пример #2
0
         except Exception, e:
             print Exception, ":", e
     if Some_Info:
         for key in Some_Info.keys():
             item.fields[key] = Field()
             l.add_value(key, Some_Info[key])
     yield l.load_item()
 else:
     #感觉这里不能用itemloader的add_xxx方法了,因为要先找到一个页面所有的含有目标item的块,再在每个块里面提取出单个item,itemloader的话是一次性直接全取出,add_xpath不能再细分了;;打算用add_value方法
     my_Final_Xpath = Final_Xpath.copy()
     All_Xpath = my_Final_Xpath['All_Xpath'].copy()
     del my_Final_Xpath['All_Xpath']
     all_xpath = All_Xpath['all_xpath']
     del All_Xpath['all_xpath']
     for i in response.xpath(all_xpath[0]):
         item = MovieSpiderItem()
         l = ItemLoader(item=item, response=response)
         #把All_Xpath中的数据提取出来
         for key in All_Xpath.keys():
             item.fields[key] = Field()
             try:
                 #itemloader在add_xxx方法找不到值的时候,会自动忽略这个字段,可是我不想忽略它,这时候需要将其置为空("")
                 if map(
                         lambda x: 1 if x else 0,
                         map(
                             lambda x: response.xpath(x).extract()
                             if x != "/" else "",
                             Final_Xpath[key])) in [[0, 0], [0]]:
                     map(lambda x: l.add_value(key, ""), ["just_one"])
                 else:
                     map(