Exemplo n.º 1
0
def ParseDuespaghi(country, url, name, Asset):
    if gL.trace: gL.log(gL.DEBUG)   

    try:    
        rc, content = ReadPage(url)
        if content is None:
            if rc == 404:
                gL.cMySql.execute("Update Asset set Active=%s, Updated=%s where Asset=%s", (0, gL.SetNow(), Asset))
                return True
            else:
                return False
 
        LastReviewDate = content.xpath('//div[@class="metadata-text pull-left"]/text()')  # la prima che trovo e' la piu' recente
        if LastReviewDate:
            LastReviewDate = gL.StdCar(LastReviewDate[0])
            LastReviewDate = LastReviewDate.replace('alle', ' ')
            LastReviewDate = LastReviewDate.replace(',', '')
            LastReviewDate = LastReviewDate.replace('  ', ' ')
            LastReviewDate = LastReviewDate.replace('Recensito il ', '')
            if LastReviewDate is not None and LastReviewDate != '':            
                LastReviewDate = datetime.datetime.strptime(LastReviewDate, '%A %d %B %Y %H:%M').date()
                LastReviewDate = datetime.datetime.combine(LastReviewDate, datetime.time(0, 0))  # mettila in formato datetime.datetime
                # aggiorno la data di ultima recensione sulla tabella asset del source
                rc = gL.dbLastReviewDate(Asset, LastReviewDate)


        AddrWebsite = ''
        AddrCounty  = ''
        AddrStreet  = ''
        AddrZIP     = ''
        AddrPhone   = ''      
        AddrPhone1  = ''      
        AddrCity    = ''               
        AddrStreet = content.xpath('//span[@itemprop="streetAddress"]/text()')
        AddrCity = content.xpath('//span[@itemprop="addressLocality"]/text()')
        AddrCounty = content.xpath('//span[@itemprop="ADDRESSREGION"]/text()')
        AddrZIP = content.xpath('//span[@itemprop="postalCode"]/text()')
        AddrPhone = content.xpath('//*[@itemprop="telephone"]/text()')            
        AddrWebsite = content.xpath('//a[@itemprop="url"]/@href')
        if AddrStreet:
            AddrStreet = gL.StdName(AddrStreet[0])
        if AddrCity:
            AddrCity = gL.StdName(AddrCity[0])
        if AddrCounty:
            AddrCounty = AddrCounty[0]
        if AddrZIP:
            AddrZIP = gL.StdZip(AddrZIP[0])
        if AddrPhone:
            AddrPhone, AddrPhone1 = gL.StdPhone(AddrPhone[0], country)
        if AddrWebsite:
            AddrWebsite = AddrWebsite[0]
        AddrList = {'AddrStreet': AddrStreet,
                    'AddrCity': AddrCity,
                    'AddrCounty': AddrCounty,
                    'AddrZIP': AddrZIP,
                    'AddrPhone': AddrPhone,
                    'AddrPhone1': AddrPhone1,
                    'AddrWebsite': AddrWebsite,
                    'AddrCountry': country}
        rc = gL.dbAssettAddress(Asset, AddrList)

        # gestione dei tag
        # 
        tag = []
        x = content.xpath('//span[@itemprop="servesCuisine"]//text()')
        y = content.xpath('//p[@class="detail-category"]//text()')
        if x:
            for i in x: 
                #tag.append("Cucina")
                cucina = gL.StdName(i)
                tag.append(cucina)
                gL.dbAssetTag(Asset, tag, "Tipologia")
        if y:
            for i in y: 
                #tag.append("Cucina")
                cucina = gL.StdName(i)
                tag.append(cucina)
                gL.dbAssetTag(Asset, tag, "Tipologia")
        #
        # gestione recensioni
        # 
        nreview = 0
        a = content.xpath('//span[@class="review-counter clearfix"]/text()')
        if a:
            a = a[0]
            b = a.split()
            if b:
                nreview = b[0]
            else:
                nreview = 0
        one = content.xpath('//span[@class="fa icon-farfalla star1 on"]')
        two = content.xpath('//span[@class="fa icon-farfalla star2 on"]')
        thre = content.xpath('//span[@class="fa icon-farfalla star3 on"]')
        four = content.xpath('//span[@class="fa icon-farfalla star4 on"]')
        five = content.xpath('//span[@class="fa icon-farfalla star5 on"]')
        punt = 0; r = []
        if five:
            punt = 5
        elif four:
            punt = 4
        elif thre:
            punt = 3
        elif two:
            punt = 2
        elif one:
            punt = 1
        if punt>0:
            r.append((nreview, punt))
            gL.dbAssetReview(Asset, r)        

        price = content.xpath('//*[@itemprop="priceRange"]/text()')
        if price:
            a = price[0]
            PriceAvg = a.replace('€', '')
            PriceList = [['PriceCur', gL.currency],
                            ['PriceAvg', PriceAvg]]
            rc = gL.dbAssetPrice(Asset, PriceList, gL.currency)
    
    except Exception as err:
        gL.log(gL.ERROR, url)
        gL.log(gL.ERROR, err)
        return False

    return True
Exemplo n.º 2
0
def ParseQristoranti(country, url, name, Asset):
    if gL.trace: gL.log(gL.DEBUG)   
    try:
        # leggi la pagina di contenuti
        rc, content = ReadPage(url)
        if rc != 0:
            if rc == 404:
                gL.cMySql.execute("Update Asset set Active=%s, Updated=%s where Asset=%s", (0, gL.SetNow(), Asset))
                return True
            else:
                return False

        cerca = content.xpath('//div[@class="reviewInfo"]/text()')  # la prima che trovo e' la piu' recente
        LastReviewDate = ''
        for a in cerca:
            # cerca: Text='Ultimo aggiornamento: 21 Novembre, 2012'
            # LastReviewDate = a[0]
            tx = "Ultimo aggiornamento: "
            x = a.find(tx)
            if x <= 0:
                continue
            x = x + len(tx)
            a = a.replace(',', '')
            b = a.strip()
            c = b.replace(tx, '')
        
            try:            
                LastReviewDate = datetime.datetime.strptime(c, '%d %B %Y')
                LastReviewDate = datetime.datetime.combine(LastReviewDate, datetime.time(0, 0))  # mettila in formato datetime.datetime
            except :
                try:
                    LastReviewDate = datetime.datetime.strptime(c, '%d %b %Y')   # provo con il mese abbreviato
                    LastReviewDate = datetime.datetime.combine(LastReviewDate, datetime.time(0, 0))  # mettila in formato datetime.datetime
                except:
                    pass
        if LastReviewDate is not None and LastReviewDate != '':            
            # aggiorno la data di ultima recensione sulla tabella asset del source
            rc = gL.dbLastReviewDate(Asset, LastReviewDate)
        
        AddrWebsite = ''
        AddrCounty  = ''
        AddrStreet  = ''
        AddrZIP     = ''
        AddrPhone   = ''      
        AddrPhone1  = ''      
        AddrCity    = ''               
        AddrWebsite = content.xpath('//td[contains(.,"sito")]//@href') # link al sito
        if len(AddrWebsite)>0:
            AddrWebsite = AddrWebsite[0]
        ind = content.xpath('//td[contains(., "Indirizzo")]/following-sibling::td/text()')
        if len(ind) > 0:
            a = ind[0].split(",")
            AddrStreet = gL.StdCar(a[0])
            AddrZIP = AddrCounty = AddrCity = ""
                   
        test = content.xpath('//td[contains(., "Telefono")]/following-sibling::td/text()')
        if len(test) > 0:
            AddrPhone = test[0]
        else:
            AddrPhone = ''
        #AddrPhone = gL.StdPhone(AddrPhone, country)
        AddrPhone, AddrPhone1 = gL.StdPhone(AddrPhone, country)

        AddrList = {'AddrStreet': AddrStreet,
                    'AddrCity': AddrCity,
                    'AddrCounty': AddrCounty,
                    'AddrZIP': AddrZIP,
                    'AddrPhone': AddrPhone,
                    'AddrPhone1': AddrPhone1,
                    'AddrWebsite': AddrWebsite,
                    'AddrCountry': country}

        rc = gL.dbAssettAddress(Asset, AddrList)  
    
        # gestione dei tag
        
        x = content.xpath("//td[contains(., 'Tipo di cucina')]/following-sibling::td/a/text()")   # classificazione
        if len(x)>0:
            tag = []
            #tag.append("Cucina")
            cucina = " ".join(x[0].split())
            tag.append(cucina)
            rc = gL.dbAssetTag(Asset, tag, "Cucina")
        # 
        # Gestione prezzo
        # 
        y = content.xpath('//td[contains(., "Fascia di prezzo")]/following-sibling::td/text()')
        if len(y)>0:
            x = y[0]
            x = gL.StdCar(x)
        PriceFrom = PriceTo = PriceAvg = 0
        if x is not None:
            if x == "bassa":
                PriceFrom = 5
                PriceTo = 12
            if x == "medio-bassa":
                PriceFrom = 12
                PriceTo = 25
            if x == "media":
                PriceFrom = 25
                PriceTo = 40
            if x == "medio-alta":
                PriceFrom = 40
                PriceTo = 60
            if x == "alta":
                PriceFrom = 60
                PriceTo = 100

        PriceList = [['PriceCurr', gL.currency],
                    ['PriceFrom', PriceFrom],
                    ['PriceTo', PriceTo]]
        rc = gL.dbAssetPrice(Asset, PriceList, gL.currency)

        # gestione recensioni
        # 
        r = []
        x = content.xpath('//td[@class="rating_value average"]/text()')[0]   # valutazione
        y = content.xpath('//span[@class="count"]/text()')[0]                   # n. recensioni
        if len(x)>0:
            nreview = locale.atoi(x)
        if len(y)>0:
            punt = locale.atoi(y)
        if len(x)>0:
            r.append((nreview, punt))
            
        #rc = gL.AssettReview(Asset, nreview, int(punt))
        if len(r) > 0:
            gL.dbAssetReview(Asset, r)        

    except Exception as err:
        gL.log(gL.ERROR, url)
        gL.log(gL.ERROR, err)
        return False

    return True
Exemplo n.º 3
0
def ParseTripadvisor(country, url, name, Asset):
    if gL.trace: gL.log(gL.DEBUG)   

    try:
        # leggi la pagina di contenuti
        rc, content = ReadPage(url)
        if rc != 0:
            if rc == 404:
                gL.cMySql.execute("Update Asset set Active=%s, Updated=%s where Asset=%s", (0, gL.SetNow(), Asset))
                return True
            else:
                return False

        LastReviewDate = content.xpath('//span[@class="ratingDate"]/text()')  # la prima che trovo e' la piu' recente
        if LastReviewDate:
            LastReviewDate = LastReviewDate[0]
            LastReviewDate = gL.StdCar(LastReviewDate)
            LastReviewDate = LastReviewDate.replace('Recensito il ', '')
            if LastReviewDate is not None and LastReviewDate != '':            
                LastReviewDate = datetime.datetime.strptime(LastReviewDate, '%d %B %Y')
                LastReviewDate = datetime.datetime.combine(LastReviewDate, datetime.time(0, 0))  # mettila in formato datetime.datetime

                # aggiorno la data di ultima recensione sulla tabella asset del source
                rc = gL.dbLastReviewDate(Asset, LastReviewDate)

        AddrWebsite = ''
        AddrCounty  = ''
        AddrStreet  = ''
        AddrZIP     = ''
        AddrPhone   = ''      
        AddrPhone1  = ''      
        AddrCity    = ''               
        AddrStreet  = content.xpath('//span[@property="v:street-address"]/text()')
        AddrCity = content.xpath('//span[@property="v:locality"]/text()')
        if len(AddrCity) == 0:
            AddrCity = content.xpath('//span[@property="v:municipality"]/text()')
        #AddrCounty = content.xpath('//span[@property="v:country-name"]/text()')
        AddrZIP = content.xpath('//span[@property="v:postal-code"]/text()')
        AddrPhone = content.xpath('//div[@class="fl phoneNumber"]/text()')
   
        if len(AddrStreet)>0:
            AddrStreet = gL.StdName(AddrStreet[0])
        if len(AddrCity)>0:
            AddrCity = gL.StdName(AddrCity[0])
        if len(AddrZIP)>0:
            AddrZIP = gL.StdZip(AddrZIP[0])
        if len(AddrPhone)>0:
            #AddrPhone = gL.StdPhone(AddrPhone[0], country)
            AddrPhone, AddrPhone1 = gL.StdPhone(AddrPhone[0], country)
            if not AddrPhone:
                AddrPhone   = ''; AddrPhone1  = ''
        if len(AddrWebsite)>0:
            AddrWebsite = AddrWebsite[0]

        AddrList = {'AddrStreet': AddrStreet,
                    'AddrCity': AddrCity,
                    'AddrCounty': AddrCounty,
                    'AddrZIP': AddrZIP,
                    'AddrPhone': AddrPhone,
                    'AddrPhone1': AddrPhone1,
                    'AddrCountry': country}
        rc = gL.dbAssettAddress(Asset, AddrList) 
         
        # gestione dei tag
        classify = content.xpath('//div[@class="detail"]//text()')
        tag0 = []
        for i in classify:
            if (i == '\n') or (i == '') or (i == ' '):
                continue                
            tag0.append(i)

        tag = []
        cucina = 0
        for i in tag0:
            x = gL.StdCar(i)
            if x == 'Cucina:':
                cucina = 1
                continue
            if cucina == 1:
                cucina = 0
                i = i.split(',')
                for n in i:
                    n = n.lstrip()
                    tag.append(n)
            else:
                continue

        # rimuovo duplicati dalla lista
        rc = gL.dbAssetTag(Asset, tag, "Cucina")

        # Gestione prezzo        
        price = 0
        cont = 0
        PriceFrom = 0
        PriceTo = 0
        PriceCurr = ''
        for i in tag0:
            if i == 'Fascia prezzo:':
                prezzo = tag0[cont + 1]
                PriceFrom = prezzo.split('-')[0].rstrip()
                PriceTo = prezzo.split('-')[1].lstrip()
                #PriceFrom  = gL.StdCar(PriceFrom)
                #PriceTo    = gL.StdCar(PriceTo)
                break
            cont = cont + 1
        if gL.currency == "EUR":
            if PriceFrom != 0:
                PriceFrom = PriceFrom.replace(u'\xa0€', u'')
            if PriceTo != 0:
                PriceTo = PriceTo.replace(u'\xa0€', u'')

        PriceList = [['PriceCurr', gL.currency],
                     ['PriceFrom', PriceFrom],
                     ['PriceTo', PriceTo]]
        rc = gL.dbAssetPrice(Asset, PriceList, gL.currency)
    
        # gestione recensioni    
        r = []
        for i in range(0, 5):
            punt = str(i + 1)
            mask1 = "\'" + punt + "\'"  # riassunto recensioni
            # mask =
            # '//div[@onclick[contains(.,"value=\'5\'")]]/following-sibling::*/text()'
            mask = '//div[@onclick[contains(.,"value=' + mask1 + '")]]/following-sibling::*/text()'
            nreview = content.xpath(mask)  # num review
            if nreview:
                nreview = locale.atoi(nreview[0])
                r.append((nreview, int(punt)))
            
        #rc = gL.AssettReview(Asset, nreview, int(punt))
        if len(r) > 0:
            gL.dbAssetReview(Asset, r)
    
    except Exception as err:        
        gL.log(gL.ERROR, url, err)
        return False

    return True