Пример #1
0
    def parse_stores(self, response):
        location = re.findall(r"[;?]ll=[^(&)]+", response.body_as_unicode())
        addr_full = response.xpath('//div[@class="cq-shop-info"]/p[1]/text()|//div[@id="cq-shop-info"]/p[1]/text()|//div[@id="cq-full-width"]/div[@class="cq-content"]/div/p[@class="shop-address"]/text()').extract()
        if len(addr_full)>2:
            city = addr_full[1]
            postcode = addr_full[2]
        else:
            city = addr_full[1].split(',')[0]
            postcode = addr_full[1].split(',')[1]
        if(len(location)>0):
            lat = float( location[0][4:].split(",")[0])
            lon = float( location[0][4:].split(",")[1])
        else:
            lat =''
            lon=''
        properties = {
            'addr_full': addr_full[0],
            'phone': response.xpath('normalize-space(//div[@id="cq-shop-info"]/p[@class="impinfo"]/text())').extract_first(),
            'city':city,
            'state': '',
            'postcode': postcode,
            'ref': response.url,
            'website': response.url,
            'lat': lat,
            'lon': lon,
        }

        # hours = self.parse_hours(response.xpath('//ul[@class="cleanList srHours srSection"]/li'))
        # if hours:
        #     properties['opening_hours'] = hours

        yield inputoutput(**properties)
Пример #2
0
    def parse(self, response):
        data = json.loads(response.body_as_unicode())
        for key, value in data.items():
            if 'AddressLine' in value:
                addr_full = value['AddressLine'].split(',')
                address = ", ".join(addr_full[:len(addr_full) - 1])
                city = addr_full[len(addr_full) - 1]
            else:
                address = ""
                city = ""
            if 'postcode' in value:
                postcode = value['postcode']
            else:
                postcode = ""
            properties = {
                'ref': key,
                'name': value['branch_name'],
                'addr_full': address,
                'city': city,
                'country': 'United Kingdom',
                'postcode': postcode,
                'lat': value['Latitude'],
                'lon': value['Longitude'],
                'phone': value['telephone'],
            }

            opening_hours = self.store_hours(value)
            if opening_hours:
                properties['opening_hours'] = opening_hours

            yield inputoutput(**properties)
Пример #3
0
    def parse_store(self, response):
        json_data = response.xpath(
            '//head/script[@type="application/ld+json"]/text()')[1].extract()
        json_data = json_data.replace(
            '// if the location file does not have the hours separated into open/close for each day, remove the below section',
            '')
        data = json.loads(json_data)

        properties = {
            'phone':
            data['telephone'],
            'website':
            response.xpath('//head/link[@rel="canonical"]/@href')[0].extract(),
            'ref':
            data['@id'],
            'opening_hours':
            self.store_hours(data['openingHoursSpecification']),
            'lon':
            float(data['geo']['longitude']),
            'lat':
            float(data['geo']['latitude']),
        }

        address = self.address(data['address'])
        if address:
            properties.update(address)

        yield inputoutput(**properties)
Пример #4
0
    def parse(self, response):

        stores = json.loads(response.body_as_unicode())
        for store in stores:
            props = {}
            store_info = store.get('store_info', '')
            props['ref'] = store_info['corporate_id']
            props['lat'] = store_info['latitude']
            props['lon'] = store_info['longitude']
            props['state'] = store_info['region']
            props['city'] = store_info['locality']

            props['opening_hours'] = self.parse_hours(
                store_info.get('store_hours', ''))

            props['addr_full'] = ', '.join([
                store_info['address'],
                store_info.get('address_extended', '')
            ])

            sieve_out = ['website', 'phone', 'postcode', 'country']

            props.update({key: store_info[key] for key in sieve_out})

            yield inputoutput(**props)
Пример #5
0
    def parse(self, response):
        results = json.loads(response.body_as_unicode())
        for data in results['results']:
            ref = data['id_suc']
            name = "Coto " + data['desc_suc']
            street = data['direccion']
            phone = data['telefono']
            lat = data['latitud']
            lon = data['longitud']
            mon_thu = "Mo-Th " + data['hor_lu_a_ju']
            fri = "Fr " + data['hor_vi']
            sat = "Sa " + data['hor_sa']
            sun = "Su " + data['hor_do'] if data['hor_do'] != "Cerrado" else "Su off"
            opening_hours = "{}; {}; {}; {}".format(
                mon_thu, fri, sat, sun).replace(' a ', '-')


            yield inputoutput(
                ref=ref,
                lat=lat,
                lon=lon,
                name=name,
                street=street,
                country="Argentina",
                phone=phone,
                addr_full=street,
                opening_hours=opening_hours
            )
Пример #6
0
    def parse(self, response):
        data = response.xpath('.//div[@class="location-listing-item row"]')

        for store in data:
            ref = self.parse_Ref(store)
            properties = {
                'ref':
                ref,
                'addr_full':
                store.xpath("//span[@class='address']//text()").extract_first(
                ).strip(),
                'city':
                store.xpath(
                    "//span[@class='city']//text()").extract_first().strip(),
                'state':
                store.xpath(
                    "//span[@class='state']//text()").extract_first().strip(),
                'postcode':
                store.xpath(
                    "//span[@class='zip']//text()").extract_first().strip(),
                'phone':
                store.xpath(
                    "//span[@class='phone']//text()").extract_first().strip(),
                'name':
                store.xpath(".//strong//text()").extract_first().strip(),
                'lon':
                store.xpath("@data-lon").extract_first(),
                'lat':
                store.xpath("@data-lat").extract_first()
            }

            yield inputoutput(**properties)
Пример #7
0
    def parse(self, response):
        data = json.loads(response.body_as_unicode())

        for store in data.get('features', []):
            store_info = store['properties']

            properties = {
                "ref": store_info['id'],
                'addr_full': store_info['addressLine1'],
                'city': store_info['addressLine3'],
                'state': store_info['subDivision'],
                'country': store_info['addressLine4'],
                'postcode': store_info['postcode'],
                'phone': store_info.get('telephone'),
                'lon': store['geometry']['coordinates'][0],
                'lat': store['geometry']['coordinates'][1],
            }

            hours = store_info.get('restauranthours')
            try:
                hours = self.store_hours(hours)
                if hours:
                    properties['opening_hours'] = hours
            except:
                self.logger.exception("Couldn't process opening hours: %s",
                                      hours)

            yield inputoutput(**properties)
Пример #8
0
    def parse_detail_product(self, response):
        product = response.meta.get('product')
        open_dates = response.xpath('//table[@id="hours-table"]//tr')
        product['opening_hours'] = self.store_hours(
            open_dates) if len(open_dates) > 0 else u'24/7'

        yield inputoutput(**product)
Пример #9
0
    def parse_us(self, response):
        results = json.loads(response.body_as_unicode())
        stores = results['stores']

        for store_key in stores:
            store_data = stores[store_key]

            properties = {
                'phone': store_data['phone'],
                'addr_full': store_data['address1'].title(),
                'city': store_data['city'].title(),
                'state': store_data['stateCode'],
                'postcode': store_data['postalCode'],
                'lon': float(store_data['longitude']),
                'lat': float(store_data['latitude']),
                'ref': store_key,
            }

            hours = (store_data['storeHours']
                     if 'storeHours' in store_data else None)
            opening_hours = None
            if hours and ("Please call" not in hours):
                opening_hours = self.store_hours(hours)
            if opening_hours:
                properties['opening_hours'] = opening_hours

            yield inputoutput(**properties)
Пример #10
0
    def parse_store(self, response):
        # There are newlines in the opening hours, which is bad JSON. We turn
        # off strict mode so Python's JSON library will parse it.
        json_content = response.xpath('//script[@type="application/ld+json"]/text()').extract_first()
        data = json.loads(json_content, strict=False)
        store_data = data[0]

        properties = {
            'website': store_data['url'],
            'name': store_data['name'],
            'phone': store_data['address']['telephone'],
            'ref': store_data['url'],
            'addr_full': store_data['address']['streetAddress'],
            'postcode': store_data['address']['postalCode'],
            'state': store_data['address']['addressRegion'],
            'city': store_data['address']['addressLocality'],
            'lon': float(store_data['geo']['longitude']),
            'lat': float(store_data['geo']['latitude']),
        }

        opening_hours = self.store_hours(store_data['openingHours'])
        if opening_hours:
            raw = store_data['openingHours']
            formatted = opening_hours
            yield inputoutput(raw, formatted)
Пример #11
0
    def process_store(self, store):
        opening_hours, phone = ('', '')
        data = store.xpath(
            '//div[@class="col-lg-4"]/div/*[not(self::h2 or self::strong)]//text()'
        ).extract()
        normalize_data = [
            val for val in [info.strip() for info in data] if val
        ]
        final_data = [clean for clean in normalize_data if clean not in SIEVE]
        city, state_zip = final_data[2].split(',')
        state, pcode = state_zip.strip().split()
        if 'Phone Number' in final_data:
            phone = final_data[final_data.index('Phone Number') + 1]
        if 'Store Hours' in final_data:
            opening_hours = self.parse_hours(
                final_data[final_data.index('Store Hours') + 1:][0])

        props = {
            'addr_full': final_data[1],
            'ref': store.url,
            'city': city,
            'postcode': pcode,
            'state': state,
            'website': store.url,
            'opening_hours': opening_hours,
            'phone': phone,
        }

        yield inputoutput(**props)
Пример #12
0
    def parse_links(self, response):
        hours = response.xpath(
            '//form[@id="directions-form"]/input[@name="hours"]/@value'
        ).extract_first()
        website = response.xpath(
            '//head/link[@rel="canonical"]/@href').extract_first()
        link_id = website.split("/")[-2]

        # properties = {
        #     "addr_full": response.xpath('//form[@id="directions-form"]/input[@name="address"]/@value').extract_first(),
        #     "city": response.xpath('//form[@id="directions-form"]/input[@name="city"]/@value').extract_first(),
        #     "state": response.xpath('//form[@id="directions-form"]/input[@name="state"]/@value').extract_first(),
        #     "postcode": response.xpath('//form[@id="directions-form"]/input[@name="zip"]/@value').extract_first(),
        #     "phone": response.xpath('//form[@id="directions-form"]/input[@name="phone"]/@value').extract_first(),
        #     "website": website,
        #     "ref": link_id,
        #     "opening_hours": self.process_hours(hours[0]),
        #     "lat": float(response.xpath('//form[@id="directions-form"]/input[@name="lat"]/@value').extract_first()),
        #     "lon": float(response.xpath('//form[@id="directions-form"]/input[@name="long"]/@value').extract_first()),
        # }

        # yield inputoutput(**properties)
        raw = hours[0]
        formatted = self.process_hours(hours[0])
        yield inputoutput(raw, formatted)
Пример #13
0
    def parse(self, response):
        data = json.loads(response.body_as_unicode())

        for store in data:
            # properties = {
            #     "ref": store.get('id'),
            #     "name": store.get('name'),
            #     "addr_full": store.get('address'),
            #     "city": store.get('city'),
            #     "state": store.get('state'),
            #     "postcode": store.get('zip'),
            #     "phone": store.get('telephone'),
            # }

            # if store.get('url'):
            #     properties['website'] = 'https://www.whitecastle.com' + store.get('url')

            # if store.get('latitude'): properties['lat'] = float(store.get('latitude'))
            # if store.get('longitude'): properties['lon'] = float(store.get('longitude'))

            if store.get('timetable'):

                raw = store.get('timetable')
                formatted = self.store_hours(store.get('timetable'))
                yield inputoutput(raw,formatted)
Пример #14
0
    def parse_page(self, response):
        row = response.xpath('//tr')
        for i in row:
            storeid = i.xpath('./td[1]/text()').extract_first()
            name = i.xpath('./td[2]/text()').extract_first()
            street = i.xpath('./td[3]/text()').extract_first()
            city = i.xpath('./td[4]/text()').extract_first()
            state = i.xpath('./td[5]/text()').extract_first()
            postcode = i.xpath('./td[6]/text()').extract_first()
            phone = i.xpath('./td[7]/text()').extract_first()
            lat = i.xpath('./td[8]/text()').extract_first()
            lon = i.xpath('./td[9]/text()').extract_first()
            addr_full = "{} {}, {} {}".format(street, city, state, postcode)

            yield inputoutput(
                ref=storeid,
                name=name,
                street=street,
                city=city,
                state=state,
                postcode=postcode,
                addr_full=addr_full,
                phone=phone,
                lat=lat,
                lon=lon,
            )
Пример #15
0
    def parse(self, response):
        results = json.loads(response.body_as_unicode())
        if results:
            for i in results:
                # ref = i['storeid']
                # name = i['restaurantname']
                # street = i['address1']
                # city = i['city']
                # state = i['statecode']
                # postcode = i['zipcode']
                # phone = i['phone']
                # lon = i['longitude']
                # lat = i['latitude']
                hours = self.convert_hours(i['businesshours'])
                # addr_full = "{} {}, {} {}".format(street, city, state, postcode)

                # yield inputoutput(
                #     ref=ref,
                #     name=name,
                #     street=street,
                #     city=city,
                #     state=state,
                #     postcode=postcode,
                #     addr_full=addr_full,
                #     phone=phone,
                #     lon=lon,
                #     lat=lat,
                #     opening_hours=hours
                # )

                raw = i['businesshours']
                formatted = hours
                yield inputoutput(raw, formatted)
Пример #16
0
    def parse(self, response):
        for match in response.xpath(
                "//h2[contains(@class,'font-weight-700 text-uppercase')]/parent::div/parent::div"
        ):
            # cityState = match.xpath(".//div[contains(@class,'heading-text el-text')]/div/p/text()").extract_first();
            # cityString = cityState.split(",")[0].strip()
            # stateString = cityState.split(",")[1].strip()

            # addressString = match.xpath(".//div[contains(@class,'uncode_text_column')]/p[contains(@style,'text-align: center;')][not(.//strong)]/text()").extract_first().strip()
            # postcodeString = addressString.split(stateString)[1].strip()
            # addressString = addressString.split(stateString)[0].replace(',','').strip().strip(cityString).strip()

            # if(match.xpath(".//div[contains(@class,'uncode_text_column')]/p[contains(@style,'text-align: center;')][not (.//strong)]/br/following-sibling::text()").extract_first() is None):
            #     phoneString = ""
            # else:
            #     phoneString = match.xpath(".//div[contains(@class,'uncode_text_column')]/p[contains(@style,'text-align: center;')][not (.//strong)]/br/following-sibling::text()").extract_first()
            # phoneString = phoneString.replace(' ','').strip()

            hoursString = ""
            unfmthours = ''
            for hoursMatch in match.xpath(
                    ".//p[contains(@style,'text-align: center;')]/strong//following-sibling::text()"
            ):

                unfmthours = unfmthours + ' ' + hoursMatch.extract().replace(
                    '\n', '')
                hoursString = hoursString + ' ' + self.store_hours(
                    hoursMatch.extract().replace('\n', ''))

            raw = unfmthours
            formatted = hoursString
            yield inputoutput(raw, formatted)
Пример #17
0
    def parse(self, response):
        data = json.loads(response.body_as_unicode())

        for store in data['results']:
            # properties = {
            #     "ref": store['id'],
            #     "name": store['name'],
            #     "opening_hours": store['hours']['operating'],
            #     "addr_full": store['address'],
            #     "city": store['city'],
            #     "state": store['state'],
            #     "postcode": store['zip'],
            #     "country": store['country'],
            #     "lon": float(store['lon']),
            #     "lat": float(store['lat']),
            #     "phone": store['phone'],
            # }

            # "opening_hours": store['hours']['operating'],

            raw = store['hours']['operating']
            formatted = store['hours']['operating']
            yield inputoutput(raw,formatted)
            
        next_url = data['next']
        if next_url is not None:
            next_url = response.urljoin(next_url)
            yield scrapy.Request(url=next_url, headers=HEADERS, callback=self.parse)
Пример #18
0
    def parse_store(self, response):
        json_data = response.xpath('//script[@type="text/javascript"]/text()'
                                   ).extract_first().replace('\n', '').replace(
                                       '\t', '').split('.push(')[1].rstrip(')')
        data = json.loads(json_data)
        geojson_data = response.xpath(
            '//script[@class="js-store-finder-initial-state"][@type="application/json"]/text()'
        ).extract_first()
        geodata = json.loads(geojson_data)

        # properties = {
        # 'name': data['seoData']['name'],
        # 'ref': data['seoData']['name'],
        # 'addr_full': data['seoData']['address']['streetAddress'],
        # 'city': data['seoData']['address']['addressLocality'],
        # 'postcode': data['seoData']['address']['postalCode'],
        # 'country': data['seoData']['address']['addressCountry'],
        # 'website': response.request.url,
        # 'opening_hours': str(data['seoData']['openingHours']).replace('[','').replace(']','').replace("'",''),
        # 'lat': float(geodata['store']['latlng']['lat']),
        # 'lon': float(geodata['store']['latlng']['lng']),
        # }

        raw = str(data['seoData']['openingHours'])
        formatted = str(data['seoData']['openingHours']).replace(
            '[', '').replace(']', '').replace("'", '')
        yield inputoutput(raw, formatted)
Пример #19
0
    def parse(self, response):

        data = json.loads(response.body_as_unicode())
        stores = data.get('Stores', None)
        props = {}

        for store in stores:
            props['lat'] = store.pop('Latitude', None)
            props['lon'] = store.pop('Longitude', None)
            props['ref'] = store.pop('StoreID', None)
            props['website'] = URL

            for new_key, old_keys in NORMALIZE_KEYS:
                props[new_key] = ", ".join([
                    store.pop(key, '').strip() for key in old_keys
                    if store[key]
                ])

            opening_hours = normalize_time(store.pop('Hours', ''))

            if opening_hours:
                props['opening_hours'] = opening_hours
                props.pop('Hours', None)

            yield inputoutput(**props)
Пример #20
0
    def parse_location(self, response):
        unp = {}  # Unprocessed properties
        properties = {}
        unp['phone'] = response.xpath(
            '//span[@itemprop="telephone"]/a/text()').extract_first()
        unp['name'] = response.xpath(
            '//span[@itemprop="name"]/h2[@class="loc_d_title"]/text()'
        ).extract_first()
        unp['ref'] = response.url
        unp['website'] = response.url

        addressdiv = response.xpath('//div[@itemprop="address"]')[0]
        unp['addr_full'] = addressdiv.xpath(
            './/span[@itemprop="streetAddress"]/text()').extract_first()
        unp['city'] = addressdiv.xpath(
            './/span[@itemprop="addressLocality"]/text()').extract_first()
        unp['state'] = addressdiv.xpath(
            './/span[@itemprop="addressRegion"]/text()').extract_first()
        unp['postcode'] = addressdiv.xpath(
            './/span[@itemprop="postalCode"]/text()').extract_first()

        hours = response.xpath(
            '//ul[@class="loc_d_times row"]/li/text()').extract()
        raw = hours
        opening_hours = None
        if hours:
            opening_hours = self.store_hours(','.join(hours))

        if opening_hours:
            formatted = opening_hours
            yield inputoutput(raw, formatted)

        for key in unp:
            if unp[key]:
                properties[key] = unp[key].strip()
Пример #21
0
    def parse(self, response):
        # retrieve JSON data from REST endpoint
        # items = response.xpath('//text()').extract()

        # convert data variable from unicode to string
        # items = str(items)

        # convert type string representation of list to type list
        # data = [items]

        # load list into json object for parsing
        jsondata = json.loads(response.body_as_unicode())

        # iterate items
        for item in jsondata['d']['results']:
            # print str(item['Address1'])
            yield inputoutput(
                ref=item['EntityID'],
                lat=float(item['Latitude']),
                lon=float(item['Longitude']),
                addr_full=self.parseAddr(item['Address1'], item['Address2']),
                city=item['Locality'],
                state=item['AdminDistrict'],
                postcode=item['PostalCode'],
                name=item['MallName'],
                phone=item['Phone'],
                opening_hours=item['StoreHours'],
            )
Пример #22
0
    def parse_location(self, response):
        ref = response.xpath('//a[@class="btn set-as-location"]/@data-loc-id').extract_first() \
              or response.request.url

        properties = {
            "phone":
            response.xpath(
                '//div[@class="module"]/p/a/text()').extract_first(),
            "ref":
            ref,
            "name":
            response.xpath(
                '//div[@class="location-details"]/h1/text()').extract_first(),
            "opening_hours":
            self.store_hours(response.xpath('//dl[@class="hours"]')[0]),
            "lon":
            float(
                response.xpath('//span[@id="currentlocdistanceid"]/@data-long'
                               ).extract_first()),
            "lat":
            float(
                response.xpath('//span[@id="currentlocdistanceid"]/@data-lat').
                extract_first()),
        }

        properties.update(self.address(response))

        yield inputoutput(**properties)
Пример #23
0
    def parse_stores(self, response):
        ref = response.meta['id']
        json_data = json.loads(response.body_as_unicode())

        if 'address1' not in json_data:
            return

        properties = {
            'addr_full': json_data['address1'],
            'phone': json_data['phoneNumber'],
            'city': json_data['city'],
            'state': json_data['stateCode'],
            'postcode': json_data['postalCode'],
            'ref': ref,
            'website':
            "http://www.acehardware.com/mystore/index.jsp?store=" + ref,
            'lat': float(json_data['latitude']),
            'lon': float(json_data['longitude']),
        }

        hours = self.parse_hours(json_data['hours'])
        raw = json_data['hours']

        if hours:
            properties['opening_hours'] = hours
            formatted = hours
            yield inputoutput(raw, formatted)
Пример #24
0
    def parse(self, response):
        json_str = response.body_as_unicode()
        data = json.loads(json_str)['locations']

        for store in data:
            store_details = store['bing']
            (num, street) = store_details['AddressLine'].split(' ', 1)

            properties = {
                "phone": store_details['Phone'],
                "ref": store_details['EntityID'],
                "name": store['post']['post_title'],
                "opening_hours": self.store_hours(store_details),
                "lat": store_details['Latitude'],
                "lon": store_details['Longitude'],
                "addr_full": store_details['AddressLine'],
                "housenumber": num,
                "street": street,
                "city": store_details['Locality'],
                "state": store_details['AdminDistrict'],
                "postcode": store_details['PostalCode'],
                "country": store_details['CountryRegion'],
                "website": store['url'],
            }

            yield inputoutput(**properties)
Пример #25
0
    def parse_store(self, response):
        contacts = response.xpath(
            '//ul[@class="contact"]/li/span/text()').extract()

        properties = {
            'addr_full': contacts[0],
            'city': contacts[1],
            'state': contacts[2],
            'postcode': contacts[3],
            'phone': contacts[4],
            'ref': response.url,
            'website': response.url,
        }

        day_groups = response.xpath(
            '//ul[@class="hours"]/li[@class="storehours"]/text()').extract()

        opening_hours = []
        for day_group in day_groups:
            match = re.match(r'(.*): (\d+)-(\d+)', day_group)
            days, f_hr, t_hr = match.groups()
            f_hr = int(f_hr)
            t_hr = int(t_hr) + 12
            opening_hours.append('{} {:02d}:00-{:02d}:00'.format(
                days, f_hr, t_hr))

        if opening_hours:
            properties['opening_hours'] = '; '.join(opening_hours)

        yield inputoutput(**properties)
Пример #26
0
    def parse_store(self, response):

        properties = {
            'website':
            response.xpath(
                '//head/link[@rel="canonical"]/@href').extract_first(),
            'ref':
            str(
                response.xpath('/html/body/div[1]/div[1]/header/h1/text()').
                extract()).strip("['']"),
            'opening_hours':
            re.sub(
                '\s+', ' ',
                response.css('#secondary').extract()[0].split('<h5>Hours</h5>')
                [1].replace('<br>', '').replace('</aside>', '').replace(
                    '\t', ' ').replace('\n', '').replace('\r', ' ')).strip(),
            # 'lon': float(data['geo']['longitude']),   # not lon on page
            # 'lat': float(data['geo']['latitude']),    # not lat on page
        }

        address = self.address(
            response.xpath(
                '/html/body/div[1]/div[1]/aside/address/text()').extract())
        if address:
            properties.update(address)

        yield inputoutput(**properties)
Пример #27
0
 def parse(self, response):
     phoneregex = re.compile('^<a.+>([0-9\-]+)<\/a>$')
     stores = json.loads(response.body_as_unicode())
     for key, value in stores.items():
         all_address = value['address'].split(',')
         len_address = len(all_address)
         state_zipcode = all_address[len_address - 1]
         zipcode = re.findall(r"(\d{5})", state_zipcode)
         addr_full = re.findall(r"^[^(,|.)]+", value['address'])[0]
         if (len(zipcode) > 0):
             zipcode = zipcode[0]
         else:
             zipcode = ''
         state = re.findall(r"([A-Z]{2})", state_zipcode)
         if (len(state) > 0):
             state = state[0]
         else:
             state = ''
         properties = {
             'ref': value['ID'],
             'name': value['title'],
             'addr_full': addr_full,
             'city': value['title'],
             'state': state,
             'postcode': zipcode,
             'lat': value['location']['lat'],
             'lon': value['location']['lng'],
         }
         if value['phone']:
             properties['phone'] = value['phone']
         yield inputoutput(**properties)
Пример #28
0
    def parse_store(self, response):
        address = response.xpath(
            '//div[@class="street"]//text()').extract_first().strip()
        postalCode, city = self.parse_city(
            response.xpath(
                '//div[@class="postal-code-city"]//text()').extract_first())
        phone = response.xpath(
            '//div[@class="field--phone"]//text()').extract_first()
        if phone:
            phone = phone.strip()
        else:
            phone = ""

        properties = {
            'addr_full': address,
            'city': city,
            'name': "McDonald's",
            'postcode': postalCode,
            'phone': phone,
            'ref': response.meta['ref'],
            'lon': response.meta['lon'],
            'lat': response.meta['lat']
        }

        data = re.finditer(r'<span class=\"label\">.*</span>(.*)</li>',
                           response.body_as_unicode())
        opening_hours = self.store_hours(data)
        if opening_hours:
            properties['opening_hours'] = opening_hours

        yield inputoutput(**properties)
Пример #29
0
    def parse(self, response):
        jsonresponse = json.loads(response.body_as_unicode())

        for store in jsonresponse:
            addr_full = store['address'] + ", " + store['city'] + " " + store[
                'state'] + " " + store['zip']
            datestring = store['hours']
            hour_match = re.findall(r"(\d{1,2}:\d{1,2})", datestring)

            for hour in hour_match:
                if hour == "9:00":
                    pass
                else:
                    raise DifferentHours(
                        "Store added with different hours than 09:00-21:00")

            properties = {
                'name': store['store'],
                'addr_full': addr_full,
                'street': store['address'],
                'city': store['city'],
                'state': store['state'],
                'postcode': store['zip'],
                'country': store['country'],
                'phone': store['phone'],
                'website': store['permalink'],
                'opening_hours': '09:00-21:00',
                'ref': store['id'] + " " + store['store'],
                'lat': float(store['lat']),
                'lon': float(store['lng']),
            }

            yield inputoutput(**properties)
Пример #30
0
    def parse(self, response):
        # testing
        response.selector.remove_namespaces()

        for store_elem in response.xpath('//LocationFinderStore'):
            city = store_elem.xpath('./City/text()').extract_first()
            lat = store_elem.xpath('./Latitude/text()').extract_first()
            lon = store_elem.xpath('./Longitude/text()').extract_first()
            ref = store_elem.xpath('./StoreNumber/text()').extract_first()
            addr_full = store_elem.xpath(
                './StreetAddress/text()').extract_first()
            zipcode = store_elem.xpath('./ZipCode/text()').extract_first()
            state = store_elem.xpath('./State/text()').extract_first()
            name = store_elem.xpath('./Name/text()').extract_first()

            properties = {
                'name': name,
                'addr_full': addr_full,
                'city': city,
                'state': state,
                'postcode': zipcode,
                'ref': ref,
                'website': 'http://locations.in-n-out.com/' + ref,
                'lon': float(lon),
                'lat': float(lat),
            }

            yield inputoutput(**properties)