def ParseDuespaghi(country, url, name, Asset): if gL.trace: gL.log(gL.DEBUG) try: rc, content = ReadPage(url) if content is None: if rc == 404: gL.cMySql.execute("Update Asset set Active=%s, Updated=%s where Asset=%s", (0, gL.SetNow(), Asset)) return True else: return False LastReviewDate = content.xpath('//div[@class="metadata-text pull-left"]/text()') # la prima che trovo e' la piu' recente if LastReviewDate: LastReviewDate = gL.StdCar(LastReviewDate[0]) LastReviewDate = LastReviewDate.replace('alle', ' ') LastReviewDate = LastReviewDate.replace(',', '') LastReviewDate = LastReviewDate.replace(' ', ' ') LastReviewDate = LastReviewDate.replace('Recensito il ', '') if LastReviewDate is not None and LastReviewDate != '': LastReviewDate = datetime.datetime.strptime(LastReviewDate, '%A %d %B %Y %H:%M').date() LastReviewDate = datetime.datetime.combine(LastReviewDate, datetime.time(0, 0)) # mettila in formato datetime.datetime # aggiorno la data di ultima recensione sulla tabella asset del source rc = gL.dbLastReviewDate(Asset, LastReviewDate) AddrWebsite = '' AddrCounty = '' AddrStreet = '' AddrZIP = '' AddrPhone = '' AddrPhone1 = '' AddrCity = '' AddrStreet = content.xpath('//span[@itemprop="streetAddress"]/text()') AddrCity = content.xpath('//span[@itemprop="addressLocality"]/text()') AddrCounty = content.xpath('//span[@itemprop="ADDRESSREGION"]/text()') AddrZIP = content.xpath('//span[@itemprop="postalCode"]/text()') AddrPhone = content.xpath('//*[@itemprop="telephone"]/text()') AddrWebsite = content.xpath('//a[@itemprop="url"]/@href') if AddrStreet: AddrStreet = gL.StdName(AddrStreet[0]) if AddrCity: AddrCity = gL.StdName(AddrCity[0]) if AddrCounty: AddrCounty = AddrCounty[0] if AddrZIP: AddrZIP = gL.StdZip(AddrZIP[0]) if AddrPhone: AddrPhone, AddrPhone1 = gL.StdPhone(AddrPhone[0], country) if AddrWebsite: AddrWebsite = AddrWebsite[0] AddrList = {'AddrStreet': AddrStreet, 'AddrCity': AddrCity, 'AddrCounty': AddrCounty, 'AddrZIP': AddrZIP, 'AddrPhone': AddrPhone, 'AddrPhone1': AddrPhone1, 'AddrWebsite': AddrWebsite, 'AddrCountry': country} rc = gL.dbAssettAddress(Asset, AddrList) # gestione dei tag # tag = [] x = content.xpath('//span[@itemprop="servesCuisine"]//text()') y = content.xpath('//p[@class="detail-category"]//text()') if x: for i in x: #tag.append("Cucina") cucina = gL.StdName(i) tag.append(cucina) gL.dbAssetTag(Asset, tag, "Tipologia") if y: for i in y: #tag.append("Cucina") cucina = gL.StdName(i) tag.append(cucina) gL.dbAssetTag(Asset, tag, "Tipologia") # # gestione recensioni # nreview = 0 a = content.xpath('//span[@class="review-counter clearfix"]/text()') if a: a = a[0] b = a.split() if b: nreview = b[0] else: nreview = 0 one = content.xpath('//span[@class="fa icon-farfalla star1 on"]') two = content.xpath('//span[@class="fa icon-farfalla star2 on"]') thre = content.xpath('//span[@class="fa icon-farfalla star3 on"]') four = content.xpath('//span[@class="fa icon-farfalla star4 on"]') five = content.xpath('//span[@class="fa icon-farfalla star5 on"]') punt = 0; r = [] if five: punt = 5 elif four: punt = 4 elif thre: punt = 3 elif two: punt = 2 elif one: punt = 1 if punt>0: r.append((nreview, punt)) gL.dbAssetReview(Asset, r) price = content.xpath('//*[@itemprop="priceRange"]/text()') if price: a = price[0] PriceAvg = a.replace('', '') PriceList = [['PriceCur', gL.currency], ['PriceAvg', PriceAvg]] rc = gL.dbAssetPrice(Asset, PriceList, gL.currency) except Exception as err: gL.log(gL.ERROR, url) gL.log(gL.ERROR, err) return False return True
def ParseQristoranti(country, url, name, Asset): if gL.trace: gL.log(gL.DEBUG) try: # leggi la pagina di contenuti rc, content = ReadPage(url) if rc != 0: if rc == 404: gL.cMySql.execute("Update Asset set Active=%s, Updated=%s where Asset=%s", (0, gL.SetNow(), Asset)) return True else: return False cerca = content.xpath('//div[@class="reviewInfo"]/text()') # la prima che trovo e' la piu' recente LastReviewDate = '' for a in cerca: # cerca: Text='Ultimo aggiornamento: 21 Novembre, 2012' # LastReviewDate = a[0] tx = "Ultimo aggiornamento: " x = a.find(tx) if x <= 0: continue x = x + len(tx) a = a.replace(',', '') b = a.strip() c = b.replace(tx, '') try: LastReviewDate = datetime.datetime.strptime(c, '%d %B %Y') LastReviewDate = datetime.datetime.combine(LastReviewDate, datetime.time(0, 0)) # mettila in formato datetime.datetime except : try: LastReviewDate = datetime.datetime.strptime(c, '%d %b %Y') # provo con il mese abbreviato LastReviewDate = datetime.datetime.combine(LastReviewDate, datetime.time(0, 0)) # mettila in formato datetime.datetime except: pass if LastReviewDate is not None and LastReviewDate != '': # aggiorno la data di ultima recensione sulla tabella asset del source rc = gL.dbLastReviewDate(Asset, LastReviewDate) AddrWebsite = '' AddrCounty = '' AddrStreet = '' AddrZIP = '' AddrPhone = '' AddrPhone1 = '' AddrCity = '' AddrWebsite = content.xpath('//td[contains(.,"sito")]//@href') # link al sito if len(AddrWebsite)>0: AddrWebsite = AddrWebsite[0] ind = content.xpath('//td[contains(., "Indirizzo")]/following-sibling::td/text()') if len(ind) > 0: a = ind[0].split(",") AddrStreet = gL.StdCar(a[0]) AddrZIP = AddrCounty = AddrCity = "" test = content.xpath('//td[contains(., "Telefono")]/following-sibling::td/text()') if len(test) > 0: AddrPhone = test[0] else: AddrPhone = '' #AddrPhone = gL.StdPhone(AddrPhone, country) AddrPhone, AddrPhone1 = gL.StdPhone(AddrPhone, country) AddrList = {'AddrStreet': AddrStreet, 'AddrCity': AddrCity, 'AddrCounty': AddrCounty, 'AddrZIP': AddrZIP, 'AddrPhone': AddrPhone, 'AddrPhone1': AddrPhone1, 'AddrWebsite': AddrWebsite, 'AddrCountry': country} rc = gL.dbAssettAddress(Asset, AddrList) # gestione dei tag x = content.xpath("//td[contains(., 'Tipo di cucina')]/following-sibling::td/a/text()") # classificazione if len(x)>0: tag = [] #tag.append("Cucina") cucina = " ".join(x[0].split()) tag.append(cucina) rc = gL.dbAssetTag(Asset, tag, "Cucina") # # Gestione prezzo # y = content.xpath('//td[contains(., "Fascia di prezzo")]/following-sibling::td/text()') if len(y)>0: x = y[0] x = gL.StdCar(x) PriceFrom = PriceTo = PriceAvg = 0 if x is not None: if x == "bassa": PriceFrom = 5 PriceTo = 12 if x == "medio-bassa": PriceFrom = 12 PriceTo = 25 if x == "media": PriceFrom = 25 PriceTo = 40 if x == "medio-alta": PriceFrom = 40 PriceTo = 60 if x == "alta": PriceFrom = 60 PriceTo = 100 PriceList = [['PriceCurr', gL.currency], ['PriceFrom', PriceFrom], ['PriceTo', PriceTo]] rc = gL.dbAssetPrice(Asset, PriceList, gL.currency) # gestione recensioni # r = [] x = content.xpath('//td[@class="rating_value average"]/text()')[0] # valutazione y = content.xpath('//span[@class="count"]/text()')[0] # n. recensioni if len(x)>0: nreview = locale.atoi(x) if len(y)>0: punt = locale.atoi(y) if len(x)>0: r.append((nreview, punt)) #rc = gL.AssettReview(Asset, nreview, int(punt)) if len(r) > 0: gL.dbAssetReview(Asset, r) except Exception as err: gL.log(gL.ERROR, url) gL.log(gL.ERROR, err) return False return True
def ParseTripadvisor(country, url, name, Asset): if gL.trace: gL.log(gL.DEBUG) try: # leggi la pagina di contenuti rc, content = ReadPage(url) if rc != 0: if rc == 404: gL.cMySql.execute("Update Asset set Active=%s, Updated=%s where Asset=%s", (0, gL.SetNow(), Asset)) return True else: return False LastReviewDate = content.xpath('//span[@class="ratingDate"]/text()') # la prima che trovo e' la piu' recente if LastReviewDate: LastReviewDate = LastReviewDate[0] LastReviewDate = gL.StdCar(LastReviewDate) LastReviewDate = LastReviewDate.replace('Recensito il ', '') if LastReviewDate is not None and LastReviewDate != '': LastReviewDate = datetime.datetime.strptime(LastReviewDate, '%d %B %Y') LastReviewDate = datetime.datetime.combine(LastReviewDate, datetime.time(0, 0)) # mettila in formato datetime.datetime # aggiorno la data di ultima recensione sulla tabella asset del source rc = gL.dbLastReviewDate(Asset, LastReviewDate) AddrWebsite = '' AddrCounty = '' AddrStreet = '' AddrZIP = '' AddrPhone = '' AddrPhone1 = '' AddrCity = '' AddrStreet = content.xpath('//span[@property="v:street-address"]/text()') AddrCity = content.xpath('//span[@property="v:locality"]/text()') if len(AddrCity) == 0: AddrCity = content.xpath('//span[@property="v:municipality"]/text()') #AddrCounty = content.xpath('//span[@property="v:country-name"]/text()') AddrZIP = content.xpath('//span[@property="v:postal-code"]/text()') AddrPhone = content.xpath('//div[@class="fl phoneNumber"]/text()') if len(AddrStreet)>0: AddrStreet = gL.StdName(AddrStreet[0]) if len(AddrCity)>0: AddrCity = gL.StdName(AddrCity[0]) if len(AddrZIP)>0: AddrZIP = gL.StdZip(AddrZIP[0]) if len(AddrPhone)>0: #AddrPhone = gL.StdPhone(AddrPhone[0], country) AddrPhone, AddrPhone1 = gL.StdPhone(AddrPhone[0], country) if not AddrPhone: AddrPhone = ''; AddrPhone1 = '' if len(AddrWebsite)>0: AddrWebsite = AddrWebsite[0] AddrList = {'AddrStreet': AddrStreet, 'AddrCity': AddrCity, 'AddrCounty': AddrCounty, 'AddrZIP': AddrZIP, 'AddrPhone': AddrPhone, 'AddrPhone1': AddrPhone1, 'AddrCountry': country} rc = gL.dbAssettAddress(Asset, AddrList) # gestione dei tag classify = content.xpath('//div[@class="detail"]//text()') tag0 = [] for i in classify: if (i == '\n') or (i == '') or (i == ' '): continue tag0.append(i) tag = [] cucina = 0 for i in tag0: x = gL.StdCar(i) if x == 'Cucina:': cucina = 1 continue if cucina == 1: cucina = 0 i = i.split(',') for n in i: n = n.lstrip() tag.append(n) else: continue # rimuovo duplicati dalla lista rc = gL.dbAssetTag(Asset, tag, "Cucina") # Gestione prezzo price = 0 cont = 0 PriceFrom = 0 PriceTo = 0 PriceCurr = '' for i in tag0: if i == 'Fascia prezzo:': prezzo = tag0[cont + 1] PriceFrom = prezzo.split('-')[0].rstrip() PriceTo = prezzo.split('-')[1].lstrip() #PriceFrom = gL.StdCar(PriceFrom) #PriceTo = gL.StdCar(PriceTo) break cont = cont + 1 if gL.currency == "EUR": if PriceFrom != 0: PriceFrom = PriceFrom.replace(u'\xa0', u'') if PriceTo != 0: PriceTo = PriceTo.replace(u'\xa0', u'') PriceList = [['PriceCurr', gL.currency], ['PriceFrom', PriceFrom], ['PriceTo', PriceTo]] rc = gL.dbAssetPrice(Asset, PriceList, gL.currency) # gestione recensioni r = [] for i in range(0, 5): punt = str(i + 1) mask1 = "\'" + punt + "\'" # riassunto recensioni # mask = # '//div[@onclick[contains(.,"value=\'5\'")]]/following-sibling::*/text()' mask = '//div[@onclick[contains(.,"value=' + mask1 + '")]]/following-sibling::*/text()' nreview = content.xpath(mask) # num review if nreview: nreview = locale.atoi(nreview[0]) r.append((nreview, int(punt))) #rc = gL.AssettReview(Asset, nreview, int(punt)) if len(r) > 0: gL.dbAssetReview(Asset, r) except Exception as err: gL.log(gL.ERROR, url, err) return False return True