def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text['markets']:
                    self.data.name = 'Penny'
                    self.data.code = 'hupennysup'
                    self.data.postcode = poi_data['address']['zip'].strip()
                    street_tmp = poi_data['address']['street'].split(',')[0]
                    self.data.city = clean_city(poi_data['address']['city'])
                    self.data.original = poi_data['address']['street']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['address']['latitude'],
                        poi_data['address']['longitude'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        street_tmp.title())
                    if 'phone' in poi_data and poi_data['phone'] != '':
                        self.data.phone = clean_phone_to_str(poi_data['phone'])
                    if 'id' in poi_data and poi_data['id'] != '':
                        self.data.ref = poi_data['id'].strip()
                    self.data.public_holiday_open = False
                    # TODO: Parsing opening_hours from datasource
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
示例#2
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['street'])
             if 'BENU Gyógyszertár' not in poi_data['title']:
                 name = poi_data['title'].strip()
                 branch = None
             else:
                 name = 'Benu gyógyszertár'
                 branch = poi_data['title'].strip()
             code = 'hubenupha'
             website = poi_data['description'].strip(
             ) if poi_data['description'] is not None else None
             website = website[19:]
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             city = clean_city(poi_data['city'])
             postcode = poi_data['postal_code'].strip()
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['street']
             ref = None
             if 'phone' in poi_data and poi_data['phone'] != '':
                 phone = clean_phone(poi_data['phone'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#3
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), self.filetype)
     poi_dataset = []
     if soup is not None:
         # parse the html using beautiful soap and store in variable `soup`
         table = soup.find('table',
                           attrs={'class': 'contenttable is-header-top'})
         table_body = table.find('tbody')
         rows = table_body.find_all('tr')
         for row in rows:
             cols = row.find_all('td')
             cols = [element.text.strip() for element in cols]
             poi_dataset.append(cols)
         for poi_data in poi_dataset:
             # Assign: code, postcode, city, name, branch, website, original
             #         street, housenumber, conscriptionnumber, ref, geom
             self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data[2])
             self.data.name = 'Aldi'
             self.data.code = 'hualdisup'
             self.data.postcode = poi_data[0].strip()
             self.data.city = clean_city(poi_data[1])
             self.data.original = poi_data[2]
             self.data.public_holiday_open = False
             self.data.add()
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                # script = soup.find('div', attrs={'data-stores':True})
                text = json.loads(str(soup))
                for poi_data in text['stores']:
                    try:
                        # Assign: code, postcode, city, name, branch, website, original, street, housenumber,
                        # conscriptionnumber, ref, geom
                        self.data.code = 'hutommacon'
                        if poi_data.get(
                                'name'
                        )[2] is not None and poi_data.get('name')[2] != '':
                            self.data.ref = poi_data.get('name')[2]
                        if poi_data.get(
                                'website'
                        ) is not None and poi_data.get('website') != '':
                            self.data.website = poi_data.get('website')
                        else:
                            self.data.website = 'https://tommarket.hu'
                        self.data.lat, self.data.lon = check_hu_boundary(
                            poi_data.get('lat'), poi_data.get('long'))
                        self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                            extract_street_housenumber_better_2(
                                poi_data.get('address'))
                        if poi_data.get('zip') is not None and poi_data.get(
                                'zip') != '':
                            self.data.postcode = poi_data.get('zip')
                        self.data.original = poi_data.get('address')
                        if poi_data.get(
                                'settlement'
                        ) is not None and poi_data.get('settlement') != '':
                            self.data.city = clean_city(
                                poi_data.get('settlement'))
                        else:
                            self.data.city = query_osm_city_name_gpd(
                                self.session, self.data.lat, self.data.lon)
                        if poi_data.get('phone') is not None and poi_data.get(
                                'phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        if poi_data.get('email') is not None and poi_data.get(
                                'email') != '':
                            self.data.phone = poi_data.get('email').strip()
                        self.data.public_holiday_open = False
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
示例#5
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     data = []
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         table = soup.find('table',
                           attrs={'class': 'contenttable is-header-top'})
         table_body = table.find('tbody')
         rows = table_body.find_all('tr')
         for row in rows:
             cols = row.find_all('td')
             cols = [element.text.strip() for element in cols]
             data.append(cols)
         for poi_data in data:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data[2])
             name = 'Aldi'
             code = 'hualdisup'
             postcode = poi_data[0].strip()
             city = clean_city(poi_data[1])
             branch = None
             website = None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data[2]
             geom = None
             ref = None
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#6
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         poi_data = soup.find_all('script', text=re.compile('var\s*marker'))
         poi_data_match = PATTERN_TOM_MARKET.findall(str(poi_data))
         for poi_data in poi_data_match:
             # if poi_data_match is not None else None
             if poi_data == None:
                 print('1')
                 print(str(poi_data))
             else:
                 print(poi_data)
             city, street, housenumber, conscriptionnumber = extract_city_street_housenumber_address(
                 poi_data)
             city = clean_city(city)
             postcode = None
             if postcode is None:
                 postcode = search_for_postcode(self.session, city)
             name = 'Tom Market'
             code = 'hutommacon'
             branch = None
             website = None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data
             ref = None
             geom = None
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    try:
                        self.data.name = 'OIL!'
                        self.data.code = 'huoilfu'
                        if poi_data.get('zip') is not None and poi_data.get(
                                'zip') != '':
                            self.data.postcode = poi_data.get('zip').strip()
                        if poi_data.get('city') is not None and poi_data.get(
                                'city') != '':
                            self.data.city = clean_city(poi_data.get('city'))
                        self.data.lat, self.data.lon = check_hu_boundary(
                            poi_data.get('lat'), poi_data.get('lng'))
                        if poi_data.get(
                                'address'
                        ) is not None and poi_data.get('address') != '':
                            self.data.original = poi_data.get('address')
                            self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                extract_street_housenumber_better_2(
                                    poi_data.get('address'))
                        if poi_data.get('phone') is not None and poi_data.get(
                                'phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        self.data.fuel_octane_95 = True
                        self.data.fuel_diesel = True
                        if poi_data.get(
                                'id') is not None and poi_data.get('id') != '':
                            self.data.ref = poi_data.get('id').strip()
                        if poi_data.get('url') is not None and poi_data.get(
                                'url') != '':
                            self.data.website = poi_data.get('url').strip()
                        else:
                            self.data.website = 'https://www.oil-benzinkutak.hu'
                        if poi_data.get('store') is not None and poi_data.get(
                                'store') != '':
                            tmp = poi_data.get('store').split(' ', 1)
                            self.data.branch = tmp[1].strip().capitalize()
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(
                    extract_javascript_variable(soup, 'boltok_nyers'))
                for poi_data in text:
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    self.data.city = clean_city(poi_data.get('A_VAROS'))
                    self.data.postcode = poi_data.get('A_IRSZ').strip()
                    self.data.branch = poi_data.get('P_NAME').strip()
                    self.data.name = 'Príma' if 'Príma' in self.data.branch else 'CBA'
                    self.data.code = 'huprimacon' if 'Príma' in self.data.branch else 'hucbacon'
                    for i in range(0, 7):
                        self.data.day_open(
                            i,
                            clean_opening_hours_2(
                                poi_data.get('PS_OPEN_FROM_{}'.format(i + 1)))
                            if poi_data.get('PS_OPEN_FROM_{}'.format(i + 1))
                            is not None else None)
                        self.data.day_close(
                            i,
                            clean_opening_hours_2(
                                poi_data.get('PS_OPEN_TO_{}'.format(i + 1)))
                            if poi_data.get('PS_OPEN_TO_{}'.format(i + 1))
                            is not None else None)
                    self.data.original = poi_data.get('A_CIM')
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data.get('PS_GPS_COORDS_LAT'),
                        poi_data.get('PS_GPS_COORDS_LNG'))
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data.get('A_CIM'))
                    if 'PS_PUBLIC_TEL' in poi_data and poi_data.get(
                            'PS_PUBLIC_TEL') != '':
                        self.data.phone = clean_phone_to_str(
                            poi_data.get('PS_PUBLIC_TEL'))
                    else:
                        self.data.phone = None
                    if 'PS_PUBLIC_EMAIL' in poi_data and poi_data.get(
                            'PS_PUBLIC_EMAIL') != '':
                        self.data.email = poi_data.get('PS_PUBLIC_EMAIL')
                    else:
                        self.data.email = None
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
示例#9
0
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                text = json.loads(str(soup))
                for poi_data in text.get('list'):
                    try:
                        if poi_data.get('country_id') != 3:
                            continue
                        else:
                            self.data.name = 'Yves Rocher'
                            self.data.code = 'huyvesrcos'
                            self.data.lat, self.data.lon = \
                                check_hu_boundary(poi_data.get(
                                    'latitude'), poi_data.get('longitude'))
                            self.data.website = 'https://www.yves-rocher.hu{}/'.format(
                                poi_data.get('request_path'))
                            opening = poi_data.get('hours')
                            for i in range(0, 7):
                                if i in opening:
                                    self.data.day_open(
                                        i, opening[i]['hour_from'])
                                    self.data.day_close(
                                        i, opening[i]['hour_to'])
                            self.data.postcode = poi_data.get('zip')
                            self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                extract_street_housenumber_better_2(
                                    poi_data.get('address'))
                            self.data.city = clean_city(poi_data.get('city'))
                            self.data.original = poi_data.get('address')
                            if poi_data.get('phone') is not None and poi_data.get('phone') != '':
                                self.data.phone = clean_phone_to_str(
                                    poi_data.get('phone'))
                            if poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \
                                    and self.data.phone is not None:
                                self.data.phone = '{};{}'.format(self.data.phone,
                                                                 clean_phone_to_str(poi_data.get('mobile')))
                            elif poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \
                                    and self.data.phone is None:
                                self.data.phone = clean_phone_to_str(
                                    poi_data.get('mobile'))
                            self.data.public_holiday_open = False
                            self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
示例#10
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), POST_DATA)
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             name = 'MOL'
             code = 'humolfu'
             postcode = poi_data['postcode'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             city = clean_city(poi_data['city'])
             branch = None
             website = None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data['address']
             ref = None
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#11
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text['data']:
                    '''
                    The Pepco dataset contains all European data. Since the program cannot handle POIs outside Hungary (so far)
                    this will limit only for Hungarian POIs
                    In fact this depends on OSM extract but currently we use only Hungarian OSM extract
                    Select only Hungarian POIs
                    '''
                    if 'city' in poi_data and (
                            poi_data['city'] == '' or query_osm_city_name(
                                self.session, poi_data['city']) is None):
                        continue
                    elif 'city' in poi_data:
                        self.data.city = clean_city(poi_data['city'])
                    else:
                        continue
                    self.data.name = 'Pepco'
                    self.data.code = 'hupepcoclo'
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    self.data.lat, self.data.lon = \
                        check_hu_boundary(
                            poi_data['coordinates']['lat'], poi_data['coordinates']['lng'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                        extract_street_housenumber_better_2(
                            poi_data.get('streetAddress'))
                    self.data.original = poi_data.get('streetAddress')
                    self.data.postcode = poi_data.get('postalCode')
                    # self.data.city = query_osm_city_name_gpd(self.session, self.data.lat, self.data.lon)
                    # Assign opening_hours
                    opening = poi_data['openingHours']
                    for i in range(0, 7):
                        if i in opening:
                            self.data.day_open(i, opening[i]['from'])
                            self.data.day_close(i, opening[i]['to'])
                    # Assign additional informations
                    self.data.phone = clean_phone_to_str(
                        poi_data.get('phoneNumber'))
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(logging.error(e))
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                soup_data = soup.find(
                    'script', {'data-drupal-selector': 'drupal-settings-json'})
                json_data = json.loads(soup_data.text, strict=False)
                for shop in json_data['storesLocator']['BuildCoordinates']:
                    self.data.name = 'Jysk'
                    self.data.code = 'hujyskfur'
                    self.data.lat, self.data.lon = check_hu_boundary(
                        shop.get('lat'), shop.get('lon'))
                    self.data.branch = shop.get('name')
                    internal_id = shop.get('id')
                    shop_soup = save_downloaded_soup('{}?storeId={}'.format(self.link, internal_id),
                                                     os.path.join(self.download_cache,
                                                                  '{}.{}.html'.format(self.filename, internal_id)))
                    self.data.phone = '+36 1 700 8400'
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(
                    extract_javascript_variable(soup, 'totem_stations'))
                for poi_data in text.values():
                    self.data.name = 'Mobil Petrol'
                    self.data.code = 'humobpefu'
                    self.data.website = poi_data.get('description')
                    self.data.city = clean_city(poi_data.get('city'))
                    self.data.original = poi_data.get('address')
                    self.data.lat, self.data.lon = check_hu_boundary(poi_data['location']['lat'],
                                                                     poi_data['location']['lng'])
                    self.data.postcode = None
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data.get('address'))
                    self.data.phone = clean_phone_to_str(poi_data.get('phone'))
                    self.data.public_holiday_open = False
                    if '0-24' in poi_data.get('services'):
                        self.data.nonstop = True
                        self.data.public_holiday_open = True
                    else:
                        if '6-22' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '22:00'
                        elif '6-21' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '21:00'
                        elif '5-22' in poi_data.get('services'):
                            open_from = '05:00'
                            open_to = '22:00'
                        elif '6-18' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '18:00'
                        if 'open_from' in locals() and 'open_to' in locals():
                            for i in range(0, 7):
                                self.data.day_open(i, open_from)
                                self.data.day_close(i, open_to)
                        self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
示例#14
0
 def process(self):
     soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                 self.filetype, POST_DATA)
     if soup is not None:
         text = json.loads(soup)
         for poi_data in text:
             self.data.name = 'Nemzeti Dohánybolt'
             self.data.code = 'hunemdotob'
             self.data.postcode = poi_data.get('postcode').strip()
             self.data.city = clean_city(poi_data['city'])
             self.data.original = poi_data['address']
             self.data.lat, self.data.lon = check_hu_boundary(
                 poi_data['lat'], poi_data['lng'])
             self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             self.data.public_holiday_open = False
             self.data.add()
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    if 'xpres' in poi_data['name']:
                        self.data.name = 'Spar Expressz'
                        self.data.code = 'husparecon'
                    elif 'INTER' in poi_data['name']:
                        self.data.name = 'Interspar'
                        self.data.code = 'husparisup'
                    elif 'market' in poi_data['name']:
                        self.data.name = 'Spar'
                        self.data.code = 'husparsup'
                    elif 'DESPAR' in poi_data['name']:
                        self.data.name = 'DeSpar'
                        self.data.code = 'huspardcon'
                    else:
                        self.data.name = 'Spar'
                        self.data.code = 'husparsup'
                    poi_data['name'] = poi_data['name'].replace(
                        'INTERSPAR', 'Interspar')
                    poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar')
                    ref_match = PATTERN_SPAR_REF.search(poi_data['name'])
                    self.data.ref = ref_match.group(
                        1).strip() if ref_match is not None else None
                    self.data.city = clean_city(poi_data['city'])
                    self.data.postcode = poi_data.get('zipCode').strip()
                    self.data.branch = poi_data['name'].split('(')[0].strip()
                    self.data.website = poi_data['pageUrl'].strip()
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['latitude'], poi_data['longitude'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address'])
                    self.data.original = poi_data['address']
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), self.filetype)
     if soup is not None:
         text = json.loads(soup)
         data = POIDataset()
         for poi_data in text['items']:
             if poi_data['type'] == 'posta':
                 if 'mobilposta' in poi_data['name']:
                     data.name = 'Mobilposta'
                     data.code = 'hupostamp'
                 else:
                     data.name = 'Posta'
                     data.code = 'hupostapo'
                     data.public_holiday_open = False
             elif poi_data['type'] == 'csekkautomata':
                 data.name = 'Posta csekkautomata'
                 data.code = 'hupostacse'
                 data.public_holiday_open = True
             elif poi_data['type'] == 'postamachine':
                 data.name = 'Posta csomagautomata'
                 data.code = 'hupostacso'
                 data.public_holiday_open = True
             elif poi_data['type'] == 'postapoint':
                 data.name = 'PostaPont'
                 data.code = 'hupostapp'
                 data.public_holiday_open = False
             else:
                 logging.error('Non existing Posta type.')
             data.postcode = poi_data['zipCode'].strip()
             data.city = clean_city(poi_data['city'])
             data.branch = poi_data['name']
             data.lat = poi_data['lat']
             data.lon = poi_data['lng']
             data.street, data.housenumber, data.conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             data.original = poi_data['address']
             data.add()
         if data is None or data.lenght() < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             insert_poi_dataframe(self.session, data.process())
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                text = json.loads(str(soup))
                for poi_data in text:
                    try:
                        if 'BENU Gyógyszertár' not in poi_data.get('title'):
                            self.data.name = poi_data.get('title').strip()
                            self.data.branch = None
                        else:
                            self.data.name = 'Benu gyógyszertár'
                            self.data.branch = poi_data.get('title').strip()
                        self.data.code = 'hubenupha'
                        if poi_data.get('description') is not None:
                            pu_match = PATTERN_FULL_URL.match(poi_data.get('description'))
                            self.data.website = pu_match.group(0).strip() if pu_match is not None else None
                        else:
                            self.data.website = None
                        self.data.city = clean_city(poi_data.get('city'))
                        self.data.postcode = poi_data.get('postal_code').strip()
                        self.data.lat, self.data.lon = check_hu_boundary(poi_data.get('lat'), poi_data.get('lng'))
                        self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                            poi_data.get(('street')))
                        self.data.original = poi_data.get('street')
                        if 'phone' in poi_data and poi_data.get('phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        else:
                            self.data.phone = None
                        self.data.public_holiday_open = False
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
示例#18
0
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype, POST_DATA)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    self.data.name = 'MOL'
                    if " Sziget " in poi_data.get('name'):
                        self.data.code = 'humolwfu'
                    else:
                        self.data.code = 'humolfu'
                    self.data.postcode = poi_data.get('postcode').strip()
                    self.data.city = clean_city(poi_data['city'])
                    self.data.original = poi_data['address']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['lat'], poi_data['lng'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address'])
                    self.data.public_holiday_open = True
                    self.data.truck = True if 'kamion_parkolo' in poi_data.get(
                        'servicesin') else False
                    self.data.food = True if 'fresh_corner' in poi_data.get(
                        'servicesin') else False
                    self.data.rent_lpg_bottles = True if 'pb' in poi_data.get(
                        'servicesin') else False
                    self.data.fuel_adblue = True if 'adblue' in poi_data.get(
                        'servicesin') else False
                    self.data.fuel_lpg = True if 'lpg' in poi_data.get(
                        'servicesin') else False
                    self.data.fuel_octane_95 = True
                    self.data.fuel_diesel = True
                    self.data.fuel_octane_100 = True
                    self.data.fuel_diesel_gtl = True
                    self.data.compressed_air = True
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype, None, self.verify_link)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(
                    extract_javascript_variable(soup, 'locations'))
                for poi_data in text:
                    poi_data = poi_data['addresses'][0]
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    self.data.name = 'Rossmann'
                    self.data.code = 'hurossmche'
                    self.data.city = clean_city(poi_data['city'])
                    self.data.postcode = poi_data.get('zip').strip()
                    for i in range(0, 7):
                        if poi_data['business_hours'][WeekDaysLong(
                                i).name.lower()] is not None:
                            opening, closing = clean_opening_hours(
                                poi_data['business_hours'][WeekDaysLong(
                                    i).name.lower()])
                            self.data.day_open_close(i, opening, closing)
                        else:
                            self.data.day_open_close(i, None, None)
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['position'][0], poi_data['position'][1])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address'])
                    self.data.original = poi_data['address']
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
示例#20
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    self.data.name = 'Foxpost'
                    self.data.code = 'hufoxpocso'
                    self.data.postcode = poi_data['zip'].strip()
                    self.data.city = clean_city(poi_data['city'])
                    self.data.branch = poi_data['name']
                    for i in range(0, 7):
                        if poi_data['open'][WeekDaysLongHUUnAccented(
                                i).name.lower()] is not None:
                            opening, closing = clean_opening_hours(
                                poi_data['open'][WeekDaysLongHUUnAccented(
                                    i).name.lower()])
                            self.data.day_open(i, opening)
                            self.data.day_close(i, closing)
                        else:
                            self.data.day_open_close(i, None, None)
                    self.data.original = poi_data['address']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['geolat'], poi_data['geolng'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['street'])
                    self.data.public_holiday_open = False
                    self.data.description = poi_data.get('findme')
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
示例#21
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text.get('stores'):
                    self.data.name = 'OBI'
                    self.data.code = 'huobidiy'
                    self.data.postcode = poi_data['address']['zip'].strip()
                    self.data.city = clean_city(poi_data['address']['city'])
                    self.data.original = poi_data['address']['street']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['address']['lat'], poi_data['address']['lon'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address']['street'])
                    if 'phone' in poi_data and poi_data.get('phone') != '':
                        self.data.phone = clean_phone_to_str(
                            poi_data.get('phone'))
                    if 'storeNumber' in poi_data and poi_data.get(
                            'storeNumber') != '':
                        self.data.ref = poi_data.get('storeNumber').strip()
                    if 'email' in poi_data and poi_data.get('email') != '':
                        self.data.email = clean_email(poi_data.get('email'))
                    if 'path' in poi_data and poi_data.get('path') != '':
                        self.data.website = poi_data.get('path')
                    # TODO: opening hour parser for poi_data.get('hours'), format is like:
                    #  Hétfő - Szombat: 8:00 - 20:00\nVasárnap: 08:00 - 18:00
                    # self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            for pla in soup.findAll('place'):
                try:
                    self.data.name = 'MOL Bubi'
                    self.data.code = 'hububibir'
                    self.data.city = 'Budapest'
                    if pla.get('name') is not None and pla.get('name') != '':
                        self.data.branch = pla.get('name').split('-')[1].strip() \
                            if pla.get('name') is not None else None
                        self.data.ref = pla.get('name').split('-')[0].strip() \
                            if pla.get('name') is not None else None
                    self.data.nonstop = True
                    # self.data.capacity = pla.attrib['bike_racks'].strip() \
                    # if pla.attrib['bike_racks'] is not None else None
                    self.data.lat, self.data.lon = \
                        check_hu_boundary(pla.get('lat').replace(',', '.'),
                                          pla.get('lng').replace(',', '.'))
                    self.data.postcode = query_postcode_osm_external(
                        True, self.session, self.data.lon, self.data.lat, None)
                    self.data.public_holiday_open = True
                    self.data.add()
                except Exception as e:
                    logging.error(e)
                    logging.error(pla)
                    logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')

            logging.error(soup)
示例#23
0
 def process(self):
     soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), None,
                                 self.verify_link)
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         pattern = re.compile('^\s*var\s*places.*')
         script = soup.find('script', text=pattern)
         m = pattern.match(script.get_text())
         data = m.group(0)
         data = clean_javascript_variable(data, 'places')
         text = json.loads(data)
         for poi_data in text:
             poi_data = poi_data['addresses'][0]
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             name = 'Rossmann'
             code = 'hurossmche'
             city = clean_city(poi_data['city'])
             postcode = poi_data['zip'].strip()
             branch = None
             website = None
             nonstop = False
             if poi_data['business_hours']['monday'] is not None:
                 mo_o, mo_c = clean_opening_hours(poi_data['business_hours']['monday'])
             else:
                 mo_o, mo_c = None, None
             if poi_data['business_hours']['tuesday'] is not None:
                 th_o, th_c = clean_opening_hours(poi_data['business_hours']['tuesday'])
             else:
                 th_o, th_c = None, None
             if poi_data['business_hours']['wednesday'] is not None:
                 we_o, we_c = clean_opening_hours(poi_data['business_hours']['wednesday'])
             else:
                 we_o, we_c = None, None
             if poi_data['business_hours']['thursday'] is not None:
                 tu_o, tu_c = clean_opening_hours(poi_data['business_hours']['thursday'])
             else:
                 tu_o, tu_c = None, None
             if poi_data['business_hours']['friday'] is not None:
                 fr_o, fr_c = clean_opening_hours(poi_data['business_hours']['friday'])
             else:
                 fr_o, fr_c = None, None
             if poi_data['business_hours']['saturday'] is not None:
                 sa_o, sa_c = clean_opening_hours(poi_data['business_hours']['saturday'])
             else:
                 sa_o, sa_c = None, None
             if poi_data['business_hours']['sunday'] is not None:
                 su_o, su_c = clean_opening_hours(poi_data['business_hours']['sunday'])
             else:
                 su_o, su_c = None, None
             lat, lon = check_hu_boundary(poi_data['position'][0], poi_data['position'][1])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['address']
             ref = None
             phone = None
             email = None
             insert_data.append(
                 [code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber,
                  ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c,
                  fr_c, sa_c, su_c])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#24
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             if 'xpres' in poi_data['name']:
                 name = 'Spar Expressz'
                 code = 'husparexp'
             elif 'INTER' in poi_data['name']:
                 name = 'Interspar'
                 code = 'husparint'
             elif 'market' in poi_data['name']:
                 name = 'Spar'
                 code = 'husparsup'
             else:
                 name = 'Spar'
                 code = 'husparsup'
             poi_data['name'] = poi_data['name'].replace(
                 'INTERSPAR', 'Interspar')
             poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar')
             ref_match = PATTERN_SPAR_REF.search(poi_data['name'])
             ref = ref_match.group(
                 1).strip() if ref_match is not None else None
             city = clean_city(poi_data['city'])
             postcode = poi_data['zipCode'].strip()
             branch = poi_data['name'].split('(')[0].strip()
             website = poi_data['pageUrl'].strip()
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             lat, lon = check_hu_boundary(poi_data['latitude'],
                                          poi_data['longitude'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['address']
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#25
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     data = []
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         table = soup.find('table', attrs={'class': 'tescoce-table'})
         table_body = table.find('tbody')
         rows = table_body.find_all('tr')
         for row in rows:
             cols = row.find_all('td')
             link = cols[0].find('a').get(
                 'href') if cols[0].find('a') != None else []
             cols = [element.text.strip() for element in cols]
             cols[0] = cols[0].split('\n')[0]
             del cols[-1]
             del cols[-1]
             cols.append(link)
             data.append(cols)
         for poi_data in data:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data[3])
             tesco_replace = re.compile('(expressz{0,1})', re.IGNORECASE)
             poi_data[0] = tesco_replace.sub('Expressz', poi_data[0])
             if 'xpres' in poi_data[0]:
                 name = 'Tesco Expressz'
                 code = 'hutescoexp'
             elif 'xtra' in poi_data[0]:
                 name = 'Tesco Extra'
                 code = 'hutescoext'
             else:
                 name = 'Tesco'
                 code = 'hutescosup'
             poi_data[0] = poi_data[0].replace('TESCO', 'Tesco')
             poi_data[0] = poi_data[0].replace('Bp.', 'Budapest')
             postcode = poi_data[1].strip()
             city = clean_city(poi_data[2].split(',')[0])
             branch = poi_data[0]
             website = poi_data[4]
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data[3]
             geom = None
             ref = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, geom,
                 nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c,
                 th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#26
0
 def process(self):
     try:
         soup = save_downloaded_soup(
             '{}'.format(self.link),
             os.path.join(self.download_cache, self.filename),
             self.filetype)
         if soup is not None:
             text = json.loads(soup)
             for poi_data in text['points']:
                 if poi_data['fiok'] == 1:
                     self.data.name = 'Budapest Bank'
                     self.data.code = 'hubpbank'
                     self.data.public_holiday_open = False
                 else:
                     self.data.name = 'Budapest Bank ATM'
                     self.data.code = 'hubpatm'
                     self.data.public_holiday_open = True
                 self.data.postcode = poi_data['zip']
                 self.data.city = poi_data['city_only']
                 self.data.lat, self.data.lon = check_hu_boundary(
                     poi_data['latitude'], poi_data['longitude'])
                 self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                     extract_street_housenumber_better_2(poi_data['addr'])
                 self.data.original = poi_data['address']
                 self.data.branch = poi_data['name']
                 # Processing opening hours
                 oh = []
                 if poi_data.get('opening') is not None:
                     opening = poi_data.get('opening').split('||')
                     self.data.nonstop = False
                     for i in opening:
                         if 'H:' in i:
                             try:
                                 op = i.replace('H:',
                                                '').split('-')[0].strip()
                             except IndexError as e:
                                 op = None
                             self.data.mo_o = op if open is not None and op != '' else None
                             try:
                                 cl = i.replace('H:',
                                                '').split('-')[1].strip()
                             except IndexError as e:
                                 cl = None
                             self.data.mo_c = cl if open is not None and cl != '' else None
                         elif 'K:' in i:
                             try:
                                 op = i.replace('K:',
                                                '').split('-')[0].strip()
                             except IndexError as e:
                                 op = None
                             self.data.tu_o = op if open is not None and op != '' else None
                             try:
                                 cl = i.replace('K:',
                                                '').split('-')[1].strip()
                             except IndexError as e:
                                 cl = None
                             self.data.tu_c = cl if open is not None and cl != '' else None
                         elif 'Sz:' in i:
                             try:
                                 op = i.replace('Sz:',
                                                '').split('-')[0].strip()
                             except IndexError as e:
                                 op = None
                             self.data.we_o = op if open is not None and op != '' else None
                             try:
                                 cl = i.replace('Sz:',
                                                '').split('-')[1].strip()
                             except IndexError as e:
                                 cl = None
                             self.data.we_c = cl if open is not None and cl != '' else None
                         elif 'Cs:' in i:
                             try:
                                 op = i.replace('Cs:',
                                                '').split('-')[0].strip()
                             except IndexError as e:
                                 op = None
                             self.data.th_o = op if open is not None and op != '' else None
                             try:
                                 cl = i.replace('Cs:',
                                                '').split('-')[1].strip()
                             except IndexError as e:
                                 cl = None
                             self.data.th_c = cl if open is not None and cl != '' else None
                         elif 'P:' in i:
                             try:
                                 op = i.replace('P:',
                                                '').split('-')[0].strip()
                             except IndexError as e:
                                 op = None
                             self.data.fr_o = op if open is not None and op != '' else None
                             try:
                                 cl = i.replace('P:',
                                                '').split('-')[1].strip()
                             except IndexError as e:
                                 cl = None
                             self.data.fr_c = cl if open is not None and cl != '' else None
                 if self.data.code == 'hubpatm':
                     self.data.nonstop = True
                 else:
                     self.data.nonstop = False
                 self.data.add()
     except Exception as e:
         logging.exception('Exception occurred')
         logging.error(e)
示例#27
0
    def process(self):
        soup = save_downloaded_soup(
            '{}'.format(self.link),
            os.path.join(self.download_cache, self.filename))
        insert_data = []
        if soup != None:
            # parse the html using beautiful soap and store in variable `soup`
            pattern = re.compile('^\s*var\s*boltok_nyers.*')
            script = soup.find('script', text=pattern)
            m = pattern.match(script.get_text())
            data = m.group(0)
            data = clean_javascript_variable(data, 'boltok_nyers')
            text = json.loads(data)
            # for l in text:
            # print ('postcode: {postcode}; city: {city}; address: {address}; alt_name: {alt_name}'.format(postcode=l['A_IRSZ'], city=l['A_VAROS'], address=l['A_CIM'], alt_name=l['P_NAME']))

            for poi_data in text:
                # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                    poi_data['A_CIM'])
                city = clean_city(poi_data['A_VAROS'])
                postcode = poi_data['A_IRSZ'].strip()
                branch = poi_data['P_NAME'].strip()
                name = 'Príma' if 'Príma' in branch else 'CBA'
                code = 'huprimacon' if 'Príma' in branch else 'hucbacon'
                website = None
                nonstop = None
                mo_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_1']
                ) if poi_data['PS_OPEN_FROM_1'] is not None else None
                th_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_2']
                ) if poi_data['PS_OPEN_FROM_2'] is not None else None
                we_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_3']
                ) if poi_data['PS_OPEN_FROM_3'] is not None else None
                tu_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_4']
                ) if poi_data['PS_OPEN_FROM_4'] is not None else None
                fr_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_5']
                ) if poi_data['PS_OPEN_FROM_5'] is not None else None
                sa_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_6']
                ) if poi_data['PS_OPEN_FROM_6'] is not None else None
                su_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_7']
                ) if poi_data['PS_OPEN_FROM_7'] is not None else None
                mo_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_1']
                ) if poi_data['PS_OPEN_TO_1'] is not None else None
                th_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_2']
                ) if poi_data['PS_OPEN_TO_2'] is not None else None
                we_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_3']
                ) if poi_data['PS_OPEN_TO_3'] is not None else None
                tu_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_4']
                ) if poi_data['PS_OPEN_TO_4'] is not None else None
                fr_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_5']
                ) if poi_data['PS_OPEN_TO_5'] is not None else None
                sa_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_6']
                ) if poi_data['PS_OPEN_TO_6'] is not None else None
                su_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_7']
                ) if poi_data['PS_OPEN_TO_7'] is not None else None
                original = poi_data['A_CIM']
                lat, lon = check_hu_boundary(poi_data['PS_GPS_COORDS_LAT'],
                                             poi_data['PS_GPS_COORDS_LNG'])
                geom = check_geom(lat, lon)
                postcode = query_postcode_osm_external(
                    self.prefer_osm_postcode, self.session, lat, lon, postcode)
                ref = None
                if 'PS_PUBLIC_TEL' in poi_data and poi_data[
                        'PS_PUBLIC_TEL'] != '':
                    phone = clean_phone(poi_data['PS_PUBLIC_TEL'])
                else:
                    phone = None
                if 'PS_PUBLIC_EMAIL' in poi_data and poi_data[
                        'PS_PUBLIC_EMAIL'] != '':
                    email = poi_data['PS_PUBLIC_EMAIL']
                else:
                    email = None
                insert_data.append([
                    code, postcode, city, name, branch, website, original,
                    street, housenumber, conscriptionnumber, ref, phone, email,
                    geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                    mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                ])
            if len(insert_data) < 1:
                logging.warning('Resultset is empty. Skipping ...')
            else:
                df = pd.DataFrame(insert_data)
                df.columns = POI_COLS
                insert_poi_dataframe(self.session, df)
示例#28
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         pattern = re.compile('var\s*markers\s*=\s*((.*\n)*\]\;)',
                              re.MULTILINE)
         script = soup.find('script', text=pattern)
         m = pattern.search(script.get_text())
         data = m.group(0)
         data = data.replace("'", '"')
         data = clean_javascript_variable(data, 'markers')
         text = json.loads(data)
         for poi_data in text:
             if poi_data['cim'] is not None and poi_data['cim'] != '':
                 postcode, city, street, housenumber, conscriptionnumber = extract_all_address(
                     poi_data['cim'])
             name = 'Avia'
             code = 'huaviafu'
             branch = None
             if city is None:
                 city = poi_data['title']
             ref = poi_data['kutid'] if poi_data[
                 'kutid'] is not None and poi_data['kutid'] != '' else None
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             website = '/toltoallomas/?id={}'.format(str(poi_data['kutid'])) if poi_data['kutid'] is not None and \
                                                                                poi_data['kutid'] != '' else None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data['cim']
             if 'tel' in poi_data and poi_data['tel'] != '':
                 phone = clean_phone(poi_data['tel'])
             else:
                 phone = None
             if 'email' in poi_data and poi_data['email'] != '':
                 email = clean_email(poi_data['email'])
             else:
                 email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#29
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), POST_DATA)
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text['results']:
             name = 'OMV'
             code = 'huomvfu'
             postcode = poi_data['postcode'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address_l'])
             city = clean_city(poi_data['town_l'])
             branch = None
             website = None
             nonstop = None
             if poi_data['open_hours'] is not None:
                 oho, ohc = clean_opening_hours(poi_data['open_hours'])
                 if oho == '00:00' and ohc == '24:00':
                     nonstop = True
                     oho, ohc = None, None
             else:
                 oho, ohc = None, None
             mo_o = oho
             th_o = oho
             we_o = oho
             tu_o = oho
             fr_o = oho
             sa_o = oho
             su_o = oho
             mo_c = ohc
             th_c = ohc
             we_c = ohc
             tu_c = ohc
             fr_c = ohc
             sa_c = ohc
             su_c = ohc
             original = poi_data['address_l']
             ref = None
             lat, lon = check_hu_boundary(poi_data['y'], poi_data['x'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             if 'telnr' in poi_data and poi_data['telnr'] != '':
                 phone = clean_phone(poi_data['telnr'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#30
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         # script = soup.find('div', attrs={'data-stores':True})
         script = soup.find(attrs={'data-stores': True})
         text = json.loads(script['data-stores'])
         for poi_data in text:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             city = clean_city(poi_data['city'])
             branch = poi_data['name']
             if 'xpres' in poi_data['name']:
                 name = 'Tesco Expressz'
                 code = 'hutescoexp'
             elif 'xtra' in poi_data['name']:
                 name = 'Tesco Extra'
                 code = 'hutescoext'
             else:
                 name = 'Tesco'
                 code = 'hutescosup'
             website = poi_data['url']
             nonstop = None
             opening = json.loads(poi_data['opening'])
             mo_o = opening['1'][0]
             th_o = opening['2'][0]
             we_o = opening['3'][0]
             tu_o = opening['4'][0]
             fr_o = opening['5'][0]
             sa_o = opening['6'][0]
             su_o = opening['0'][0]
             mo_c = opening['1'][1]
             th_c = opening['2'][1]
             we_c = opening['3'][1]
             tu_c = opening['4'][1]
             fr_c = opening['5'][1]
             sa_c = opening['6'][1]
             su_c = opening['0'][1]
             lat, lon = check_hu_boundary(poi_data['gpslat'],
                                          poi_data['gpslng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, None)
             original = poi_data['address']
             ref = None
             if 'phone' in poi_data and poi_data['phone'] != '':
                 phone = clean_phone(poi_data['phone'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)