def parse(self, response): container = response.css('div.rte') items = container.css('p') lastLoader = None for item in items: txts = item.css('::text') if len(txts) > 2: loader = ItemLoader(item=StoreItem()) for sel in txts: txt = sel.get() if '___' in txt or txt == ' ': continue if 'Tel' in txt: loader.add_value('number', txt[5:]) elif 'Address' in txt: loader.add_value('address', txt[9:]) else: loader.add_value('brandName', txt) loader.add_value('u_id', txt) lastLoader = loader iframesrc = item.css('iframe::attr(src)').get() if iframesrc != None and lastLoader != None: six = iframesrc.index('!2d') eix = iframesrc.index('!3d') fix = iframesrc.index('!', eix + 3) longitude = iframesrc[six + 3:eix] latitude = iframesrc[eix + 3:fix] lastLoader.add_value('latitude', latitude) lastLoader.add_value('longitude', longitude) yield loader.load_item()
def parse(self, response): container = response.css('div.resp-tabs-container div.resp_container')[2] items = container.css('div.row') for item in items: loader = ItemLoader(item=StoreItem(), selector = item) loader.add_css('brandName', 'h1::text') yield loader.load_item()
def parse(self, response): for marker in response.css('div.marker'): loader = ItemLoader(item=StoreItem(), selector=marker) loader.add_css('brandName', 'h4::text') loader.add_css('latitude', '::attr(data-lat)') loader.add_css('longitude', '::attr(data-lng)') loader.add_css('address', 'p.address::text') loader.add_value('u_id', marker.css('h4::text').get()) yield response.follow(marker.css('a')[0], callback=self.parse_store, meta={'store_item': loader.load_item()})
def parse_store(self, response): brandName = response.css('span.LocationName-geo::text').get() number = response.css('div.phone-main::text').get() core_address = response.css('div.Core-address') address = core_address.css('div.c-AddressRow').css( 'span::text').getall() address = ' '.join(address) coords = getcoordinates.get_coordinates(address) loader = ItemLoader(item=StoreItem()) loader.add_value('brandName', brandName) loader.add_value('number', number) loader.add_value('address', address) loader.add_value('latitude', coords['latitude']) loader.add_value('longitude', coords['longitude']) print(loader.load_item())
def parse_stores(self, response): for store in response.css('div.locator-result'): loader = ItemLoader(item=StoreItem()) brandName = store.css('h4::text').get().replace(' Spur', '') number = store.css('strong[itemprop="telephone"]::text').get() address = store.css('p[itemprop="address"]::text').get() latitude = store.css( 'meta[itemprop="latitude"]::attr(content)').get() longitude = store.css( 'meta[itemprop="longitude"]::attr(content)').get() loader.add_value('brandName', brandName) loader.add_value('number', number) loader.add_value('address', address) loader.add_value('latitude', latitude) loader.add_value('longitude', longitude) print(loader.load_item())
def parse(self, response): items = response.css('div.pos-top') for item in items: loader = ItemLoader(item=StoreItem()) brandName = item.css('h3::text').get() if brandName == None: continue loader.add_value('brandName', brandName) details = item.css('p::text').getall() number = details[0].split('/')[0] loader.add_value('number', number) address = details[3] loader.add_value('address', address) coords = get_coordinates(address) loader.add_value('latitude', coords['latitude']) loader.add_value('longitude', coords['longitude']) loader.add_value('u_id', brandName) print(loader.load_item())
def parse_store(self, response): brandName = response.css('h1::text').get().replace('PIATTO', '').strip() address = ' '.join( response.css('div.medium-3')[0].css('p')[0].css('::text').getall()) number = response.css('div.medium-3')[0].css('p')[1].css( '::text').get() coords = response.css('iframe::attr(src)').get() longitude = coords[coords.index('!2d') + 3:coords.index('!3d')] latitude = coords[coords.index('!3d') + 3:coords.index('!2m')] u_id = brandName loader = ItemLoader(item=StoreItem()) loader.add_value('brandName', brandName) loader.add_value('address', address) loader.add_value('number', number) loader.add_value('longitude', longitude) loader.add_value('latitude', latitude) loader.add_value('u_id', u_id) print(loader.load_item())
def parse_store(self, response): brandName = response.css( 'h2.title-bar-medium-left::text').get().replace('Del Forno', '').strip() info = response.css('div.contact-us-left').css('li')[0] address = info.css('p::text').get() coords = info.css('p')[1].css('::text').get() latitude = coords[coords.index('-'):coords.index('|')].strip() longitude = coords[coords.index('|') + 1:].strip() number = response.css('a.my_call::text').get() u_id = brandName loader = ItemLoader(item=StoreItem()) loader.add_value('brandName', brandName) loader.add_value('address', address) loader.add_value('number', number) loader.add_value('u_id', u_id) loader.add_value('latitude', latitude) loader.add_value('longitude', longitude) print(loader.load_item())
def parse(self, response): js = response.css('div.content').css('script::text').get() data = js[js.index('[{') :] data = data[0: data.index("}]'") + 2] results = json.loads(data) for store in results: loader = ItemLoader(item=StoreItem()) brandName = store['DisplayName'].replace("John Dory's ", '') number = store['TelephoneNumber'] address = store['StreetAddress'] + " " + store['Suburb'] + " " + store['City'] latitude = store['Latitude'] longitude = store['Longitude'] u_id = brandName loader.add_value('u_id', u_id) loader.add_value('address', address) loader.add_value('brandName', brandName) loader.add_value('number', number) loader.add_value('latitude', latitude) loader.add_value('longitude', longitude)
import sys import os sys.path.append("./") from storesync.items import StoreItem from scrapy.loader import ItemLoader from storesync.pipelines import StoresyncPipeline url = "https://oceanbasket.com/dev/site/ajaxgetbranches?site_id=1&ignore=true" response = requests.post(url) if not response: print("Error") exit(2) results = json.loads(response.content) posts = [] items = [] for store in results['branches']: loader = ItemLoader(item=StoreItem()) loader.add_value('u_id', store['id']) loader.add_value('address', store['meta_address']) loader.add_value('number', store['meta_phone']) loader.add_value('latitude', store['meta_latitude']) loader.add_value('longitude', store['meta_longitude']) loader.add_value('brandName', store['meta_title']) # TODO CAN FILTER PROVINCES print(loader.load_item())