Пример #1
0
 def desc_parser(self):
     result = []
     title = self.sel.xpath('b/text()').extract()
     if title:
         result.append(join_strings(title).lower().capitalize())
     result.append(join_strings(self.sel.xpath('text()').extract(), ', '))
     return [join_strings(result, ' ')]
Пример #2
0
 def desc_parser(self):
     result = []        
     title = self.sel.xpath('b/text()').extract()
     if title: 
         result.append(join_strings(title).lower().capitalize())        
     result.append(join_strings(self.sel.xpath('text()').extract(), ', '))                
     return [join_strings(result, ' ')]
Пример #3
0
 def prices(self):
     if not self._prices:
         price = join_strings(self.price_parser())
         mesure = join_strings(self.mesure_parser())
         price_str = ['%s %s' % (price, mesure) if price else '']
         price_digit = [self.digit_price(price, mesure)]
         self._prices = {'price': price_str, 'price_digit': price_digit}
     return self._prices
Пример #4
0
 def prices(self):
     if not self._prices:            
         price = join_strings(self.price_parser())
         mesure = join_strings(self.mesure_parser())             
         price_str = ['%s %s' % (price, mesure) if price else '']
         price_digit = [self.digit_price(price, mesure)]
         self._prices = {'price' : price_str, 'price_digit' : price_digit}
     return self._prices
Пример #5
0
 def process_section(self, section):
     PATERN = u'Продаю'
     txt = join_strings(section.xpath("text()").extract())
     chains = [x.strip() for x in txt.split(u"→")]
     result = {'sale': False, 'section': None}
     if PATERN in chains:
         result['sale'] = True
         result['section'] = join_strings(chains, ' ')
     return result
Пример #6
0
 def process_section(self, section):
     PATERN = u'Продаю'        
     txt = join_strings(section.xpath("text()").extract())         
     chains = [x.strip() for x in txt.split(u"→")]
     result = {'sale':False, 'section':None}
     if PATERN in chains:
         result['sale'] = True   
         result['section'] = join_strings(chains, ' ')            
     return result
Пример #7
0
 def process_detail_page(self, sel, response):
     saler_type = join_strings(
         sel.xpath('//div[@class="c_face"]/span[@class="lgrey"]/text()').
         extract())
     address = join_strings(
         sel.xpath('//*[@id="list_sale"]/div[@class="card_block"][1]/text()'
                   ).extract(), ', ')
     name = join_strings(
         sel.xpath('//div[@class="c_face"]/text()').extract())
     phone_str = join_strings(
         join_strings(
             sel.xpath('//div[@class="c_phone"]/text()').extract()))
     description = join_strings(
         sel.xpath('//*[@id="list_sale"]/div[@class="card_block"]/text()').
         extract(), '\n')
     price = join_strings(
         sel.xpath('//div[@class="card_price"]/text()').extract())
     mesure = join_strings(
         sel.xpath('//div[@class="card_price"]/span/text()').extract())
     item = RealtyItem()
     item['do_not_process'] = not self.check_saler_type(saler_type)
     item['phone'] = self.filter_phone(phone_str)
     item['name'] = [name]
     item['desc'] = [description]
     item['price'] = ['%s %s' % (price, mesure)]
     item['price_digit'] = [self.get_digit_price(price, mesure)]
     item['link'] = [response.url]
     item['estate_type_id'] = self.estate_type_parser(sel)
     item['region_id'] = self.region_parser(address)
     item['locality_id'] = self.locality_parser(item['region_id'], address)
     #item['microdistrict'] = ''
     #item['street'] = ''
     #item['estate_number'] = ''
     item['room_count'] = self.room_count_parser(self.page_title(sel))
     return item
Пример #8
0
 def commerce_parser(self):
     COMMERCE_CAT_ID = 6        
     full_txt = join_strings(self.sel.xpath('//div[@class="media-body"]/p[@class="text_justify"]/text()').extract())
     txt = join_strings(full_txt.split()[:3], ' ')   
     print  txt
     key = 'commerce_mapper_smart'  
     from django.core.cache import cache
     mapper = cache.get(key)
     if not mapper:                                
         types = EstateType.objects.filter(estate_type_category_id=COMMERCE_CAT_ID) 
         mapper = {}
         for t in types:
             mapper[ur'%s\s' % t.name] = t.id
             mapper[ur'%s\s' % t.name_accs] = t.id
         cache.set(key, mapper, 3600)  
     return self.re_mapper(mapper, txt) or self.ZDANIE 
Пример #9
0
 def desc_parser(self):
     result = []
     result.append(self.title())
     result.append('\n')
     result.append(
         join_strings(
             self.sel.xpath(
                 '//div[@itemprop="description"]//text()').extract(), ', '))
     return result
Пример #10
0
 def filter_phone(self):
     phone = self.phone_parser()
     if not phone:
         return
     phone_str = join_strings(phone, ',')
     phones = re.split(r'[\,|\n]', phone_str)
     result = []
     for phone in phones:
         phone = phone.strip().replace('+7', '8')
         result.append(re.sub('\D', '', phone))
     return result
Пример #11
0
 def filter_phone(self):
     phone = self.phone_parser()
     if not phone:
         return            
     phone_str = join_strings(phone,',')
     phones = re.split(r'[\,|\n]', phone_str)
     result = []
     for phone in phones:         
         phone = phone.strip().replace('+7', '8')
         result.append(re.sub('\D','', phone))
     return result
Пример #12
0
 def process_detail_page(self, sel, response):
     saler_type = join_strings(sel.xpath('//div[@class="c_face"]/span[@class="lgrey"]/text()').extract())                                   
     address = join_strings(sel.xpath('//*[@id="list_sale"]/div[@class="card_block"][1]/text()').extract(), ', ')            
     name = join_strings(sel.xpath('//div[@class="c_face"]/text()').extract())              
     phone_str = join_strings(join_strings(sel.xpath('//div[@class="c_phone"]/text()').extract()))            
     description = join_strings(sel.xpath('//*[@id="list_sale"]/div[@class="card_block"]/text()').extract(), '\n')        
     price = join_strings(sel.xpath('//div[@class="card_price"]/text()').extract())
     mesure = join_strings(sel.xpath('//div[@class="card_price"]/span/text()').extract())                
     item = RealtyItem()
     item['do_not_process'] = not self.check_saler_type(saler_type)
     item['phone'] = self.filter_phone(phone_str)         
     item['name'] = [name]
     item['desc'] = [description]            
     item['price'] = ['%s %s' % (price, mesure)]
     item['price_digit'] = [self.get_digit_price(price, mesure)]
     item['link'] = [response.url]
     item['estate_type_id'] = self.estate_type_parser(sel)                         
     item['region_id'] = self.region_parser(address)   
     item['locality_id'] = self.locality_parser(item['region_id'], address)
     #item['microdistrict'] = ''
     #item['street'] = ''
     #item['estate_number'] = ''
     item['room_count'] = self.room_count_parser(self.page_title(sel))
     return item
Пример #13
0
 def desc_parser(self):
     result = []
     result.append(self.title())
     result.append('\n')
     result.append(join_strings(self.sel.xpath('//div[@class="media-body"]//text()').extract(), ', '))        
     return result
Пример #14
0
 def room_count(self):
     room_parser_result = self.room_count_parser()
     if room_parser_result:
         result = join_strings(room_parser_result)
         return re.sub('\D','', result)
Пример #15
0
 def title_parser(self):
     title = join_strings(self.sel.xpath('b/text()').extract())
     txt = join_strings(self.sel.xpath('text()').extract(), ' ')
     txt = join_strings(txt.split()[0:3], ' ')
     title = u'%s %s' % (title, txt)
     return title
Пример #16
0
                         ur'офис': 35,
                         ur'склад': 53,
#                         'kommercheskaya-nedvizhimost' : 93,
                         }  
        result = self.re_mapper(mapper, page_title)
        if callable(result):
            return result(sel)
        return result or ZDANIE 
    
    def stead_parser(self, sel):
        mapper = {
                    ur'индивидуальное жилищное строительство' : 15, 
                    ur'сельскохозяйственного назначения': 42,
                    ur'коммерческое строительство': 20,
                   }  
        txt = join_strings(sel.xpath('//*[@id="list_sale"]/div[@class="card_block"]').extract())
        print "stead parser %s" % txt 
        return self.re_mapper(mapper, txt)
    
    def re_mapper(self, mapper, txt):
        for key, value in mapper.iteritems():
            matches = re.search(key, txt, re.I | re.U)            
            if matches:
                return value
    
    def get_digit_price(self, price, mesure):
        mesures = {u'тыс. руб.':1000}
        if price:
            if mesure in mesures:
                price_digit = float(price)
                price_digit = int(price_digit * mesures[mesure])                
Пример #17
0
 def title(self):
     if not self._title:
         self._title = join_strings(self.title_parser())
     return self._title
Пример #18
0
 def room_count(self):
     room_parser_result = self.room_count_parser()
     if room_parser_result:
         result = join_strings(room_parser_result)
         return re.sub('\D', '', result)
Пример #19
0
 def page_title(self, sel):
     return join_strings(
         sel.xpath('//*[@id="list_sale"]/h1/text()').extract())
Пример #20
0
 def link(self):
     import hashlib
     id_instead_link = hashlib.md5(join_strings(self.sel.xpath('text()').extract()).encode('utf-8')).hexdigest()
     return [id_instead_link]
Пример #21
0
 def desc_parser(self):        
     result = []
     result.append(self.title())
     result.append('\n')
     result.append(join_strings(self.sel.xpath('//div[@itemprop="description"]//text()').extract(), ', '))        
     return result
Пример #22
0
 def page_title(self, sel):
     return join_strings(sel.xpath('//*[@id="list_sale"]/h1/text()').extract())
Пример #23
0
 def title_parser(self):
     title = join_strings(self.sel.xpath('b/text()').extract())                             
     txt = join_strings(self.sel.xpath('text()').extract(), ' ')            
     txt = join_strings(txt.split()[0:3],' ')        
     title = u'%s %s' % (title, txt)
     return title 
Пример #24
0
 def link(self):
     import hashlib
     id_instead_link = hashlib.md5(
         join_strings(self.sel.xpath('text()').extract()).encode(
             'utf-8')).hexdigest()
     return [id_instead_link]
Пример #25
0
            ur'склад': 53,
            #                         'kommercheskaya-nedvizhimost' : 93,
        }
        result = self.re_mapper(mapper, page_title)
        if callable(result):
            return result(sel)
        return result or ZDANIE

    def stead_parser(self, sel):
        mapper = {
            ur'индивидуальное жилищное строительство': 15,
            ur'сельскохозяйственного назначения': 42,
            ur'коммерческое строительство': 20,
        }
        txt = join_strings(
            sel.xpath(
                '//*[@id="list_sale"]/div[@class="card_block"]').extract())
        print "stead parser %s" % txt
        return self.re_mapper(mapper, txt)

    def re_mapper(self, mapper, txt):
        for key, value in mapper.iteritems():
            matches = re.search(key, txt, re.I | re.U)
            if matches:
                return value

    def get_digit_price(self, price, mesure):
        mesures = {u'тыс. руб.': 1000}
        if price:
            if mesure in mesures:
                price_digit = float(price)
Пример #26
0
 def locality_parser(self):         
     return join_strings(self.sel.re(ur'(?:п\.|ст\.|x\.|г\.)\s(\D+?)\,')) or u'Темрюк'
Пример #27
0
 def title(self):
     if not self._title:
         self._title = join_strings(self.title_parser())
     return self._title