示例#1
0
 def parse(self, response):
     hxs = Selector(response)
     
     # find town id
     rows = hxs.xpath(self.townid_xpath)
     for row in rows:
         row = row.xpath('./td[3]/input/@onclick').extract()
         try:
             row = int(row[0].split('(')[1].split(')')[0])
         except:
             continue
         
         yield self.callback_playa(self.playa_url+"?id=%d" % row, row, self.parse_playa)
示例#2
0
 def parse_playa(self, response, idplaya):
     log.msg(response.url, log.DEBUG)
     hxs = Selector(response)
     
     playa_xpath = {
                    'nombre_playa': './tr[1]/td/table/tr[1]/td[2]/div/text()',
                    'provincia': './tr[1]/td/table/tr[2]/td/div[1]/ul/li[2]/text()',
                    'municipio': './tr[1]/td/table/tr[2]/td/div[2]/ul/li[2]/text()',
                    'cobertura_desde': './tr[1]/td/table/tr[2]/td/div[3]/ul/li[2]/text()',
                    'cobertura_hasta': './tr[1]/td/table/tr[2]/td/div[3]/ul/li[4]/text()',
                    'horario': './tr[1]/td/table/tr[2]/td/div[4]/ul/li[2]/text()',
                    'bandera': './tr[2]/td/div/fieldset/div[1]/div[1]/ul/li[2]/img/@alt',
                    'sello_aenor_iso_9001': './tr[2]/td/div/fieldset/div[1]/div[2]/ul/li[3]/text()',
                    'sello_aenor_iso_14001': './tr[2]/td/div/fieldset/div[1]/div[3]/ul/li[3]/text()',
                    'cantidad_puestos': './tr[2]/td/div/fieldset/div[1]/div[4]/ul/li[3]/text()',
                    'sillas_proximidad': './tr[2]/td/div/fieldset/div[1]/div[5]/ul/li[3]/text()',
                    'cantidad_torres_vigilacia': './tr[2]/td/div/fieldset/div[1]/div[6]/ul/li[3]/text()',
                    'cantidad_torres_intervencion': './tr[2]/td/div/fieldset/div[1]/div[7]/ul/li[3]/text()',
                    'medusas': './tr[2]/td/div/fieldset/div[1]/div[8]/ul/li[3]/text()',
                    'ayuda_banio': './tr[2]/td/div/fieldset/div[1]/div[9]/ul/li[3]/text()',
                    'frecuencia_atencion': './tr[2]/td/div/fieldset/div[1]/div[10]/ul/li[3]/text()',
                    'atencion_discapacitados': './tr[3]/td/div/fieldset/div[1]/div[1]/ul/li[2]/text()',
                    'acceso_discapacitados': './tr[3]/td/div/fieldset/div[1]/div[2]/ul/li[2]/text()',
                    'rampas': '******',
                    'servicios_wc': './tr[3]/td/div/fieldset/div[1]/div[4]/ul/li[2]/text()',
                    'duchas': './tr[3]/td/div/fieldset/div[1]/div[5]/ul/li[2]/text()',
                    'vestuarios': './tr[3]/td/div/fieldset/div[1]/div[6]/ul/li[2]/text()',
                    'parking': './tr[3]/td/div/fieldset/div[1]/div[7]/ul/li[2]/text()',
                    'sombra': './tr[3]/td/div/fieldset/div[1]/div[8]/ul/li[2]/text()',
                    'cantidad_sillas_adaptadas': './tr[3]/td/div/fieldset/div[2]/div[2]/ul/li[2]/text()',
                    'pasarelas': './tr[3]/td/div/fieldset/div[2]/div[3]/ul/li[2]/text()',
                    'atencion_cre': './tr[3]/td/div/fieldset/div[2]/div[4]/ul/li[2]/text()',
                    'atencion_familiar': './tr[3]/td/div/fieldset/div[2]/div[5]/ul/li[2]/text()',
                    'horario_atencion': './tr[3]/td/div/fieldset/div[2]/div[1]/ul/li[2]/text()',
                    'lunes': './tr[3]/td/div/fieldset/div[2]/div[1]/ul/li[3]/table/tr[2]/td[1]/text()',
                    'martes': './tr[3]/td/div/fieldset/div[2]/div[1]/ul/li[3]/table/tr[2]/td[2]/text()',
                    'miercoles': './tr[3]/td/div/fieldset/div[2]/div[1]/ul/li[3]/table/tr[2]/td[3]/text()',
                    'jueves': './tr[3]/td/div/fieldset/div[2]/div[1]/ul/li[3]/table/tr[2]/td[4]/text()',
                    'viernes': './tr[3]/td/div/fieldset/div[2]/div[1]/ul/li[3]/table/tr[2]/td[5]/text()',
                    'sabado': './tr[3]/td/div/fieldset/div[2]/div[1]/ul/li[3]/table/tr[2]/td[6]/text()',
                    'domingo': './tr[3]/td/div/fieldset/div[2]/div[1]/ul/li[3]/table/tr[2]/td[7]/text()',
                    'consejo': '//html/body/table/tr[5]/td/table/tr[4]/td/text()'
                    }
     
     rows = hxs.xpath(self.playa_xpath)
     for row in rows:
         playa = {}
         for key in playa_xpath.keys():
             try:
                 playa[key] = row.xpath(playa_xpath[key]).extract()[0]
             except:
                 playa[key] = ''
             playa['id'] = idplaya
     
     today = time.strftime("%Y-%m-%d")
     doc = {today: playa,
            'nombre_playa': playa['nombre_playa'],
            'municipio': playa['municipio'],
            'provincia': playa['provincia']
            }
     
     del playa['nombre_playa']
     del playa['municipio']
     del playa['provincia']
     
     self.collection.update({"_id": idplaya},{"$set": doc}, upsert=True)