def QueueViamichelin(country, assettype, source, starturl, pageurl, page): if gL.trace: gL.log(gL.DEBUG) try: #lista = page.xpath('//a[@class="clearfix"]') # funziona href = page.xpath('//a[@class="parseHref jsNodePoiLink"]//@href') test = page.xpath('//h2[@class="parseInnerText jsNodePoiTitle"]//text()') # togli i nomi vuoti nomi = [] for item in test: if item.replace(" ","") != '': nomi.append(item) if len(nomi) > len(href): msg ="%s - %s" % ("Errore nel parsing dei nomi o di href", url) gL.log(gL.ERROR, msg) return False if nomi is None or href is None: msg ="%s - %s" % ("Parsing dei nomi / href senza risultati", url) gL.log(gL.ERROR, msg) return False n = 0 for asset in nomi: if not href[n]: continue name = gL.StdName(nomi[n]) url = gL.SourceBaseUrl + href[n] rc = gL.dbEnqueue(country, assettype, source, starturl, pageurl, url, name) n = n + 1 # next asset except Exception as err: gL.log(gL.ERROR, pageurl) gL.log(gL.ERROR, err) return False return True
def QueueTripadvisor(country, assettype, source, starturl, pageurl, page): if gL.trace: gL.log(gL.DEBUG) try: # leggi la lista e inserisci asset lista = page.xpath('//*[@class="listing" or @class="listing first"]') for asset in lista: name = asset.xpath('.//*[@class="property_title "]//text()')[0] name = gL.StdName(name) url = asset.xpath('.//a[contains(@class,"property_title ")]/@href')[0] url = gL.SourceBaseUrl + url # inserisci o aggiorna l'asset rc = gL.dbEnqueue(country, assettype, source, starturl, pageurl, url, name) except Exception as err: gL.log(gL.ERROR, pageurl) gL.log(gL.ERROR, err) return False return True
def QueueQristoranti(country, assettype, source, starturl, pageurl, page): if gL.trace: gL.log(gL.DEBUG) try: # leggi la lista e inserisci asset lista = page.xpath('//div[@class="contentTitle"]') conta = 0 for asset in lista: name = page.xpath('//div[@class="contentTitle"]/a//text()')[conta] url = page.xpath('//div[@class="contentTitle"]/a//@href')[conta] name = gL.StdName(name) conta = conta + 1 o = urlparse(starturl) link = "http://" + o.hostname + url rc = gL.dbEnqueue(country, assettype, source, starturl, pageurl, link, name) except Exception as err: gL.log(gL.ERROR, pageurl) gL.log(gL.ERROR, err) return False return True
def QueueDuespaghi(country, assettype, source, starturl, pageurl, page): if gL.trace: gL.log(gL.DEBUG) try: lista = page.xpath('//a[@class="clearfix"]') # funziona href = page.xpath('//a[@class="clearfix"]/@href') nomi = page.xpath('//a[@class="clearfix"]/@title') n = 0 if nomi is None or href is None: msg ="%s - %s" % ("Parsing nomi / href senza risultati", url) gL.log(gL.ERROR, msg) return False if len(nomi) != len(href): msg ="%s - %s" % ("Errore nel parsing dei nomi o di href", url) gL.log(gL.ERROR, msg) return False for asset in lista: if not nomi or not lista or not href: msg ="%s - %s" % ("Errore get ", url) gL.log(gL.ERROR, msg) #print("Errore in lettura di ", url) return False if not href[n]: continue name = gL.StdName(nomi[n]) url = gL.SourceBaseUrl + href[n] rc = gL.dbEnqueue(country, assettype, source, starturl, pageurl, url, name) n = n + 1 # next asset except Exception as err: gL.log(gL.ERROR, pageurl) gL.log(gL.ERROR, err) return False return True