示例#1
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['street'])
             if 'BENU Gyógyszertár' not in poi_data['title']:
                 name = poi_data['title'].strip()
                 branch = None
             else:
                 name = 'Benu gyógyszertár'
                 branch = poi_data['title'].strip()
             code = 'hubenupha'
             website = poi_data['description'].strip(
             ) if poi_data['description'] is not None else None
             website = website[19:]
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             city = clean_city(poi_data['city'])
             postcode = poi_data['postal_code'].strip()
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['street']
             ref = None
             if 'phone' in poi_data and poi_data['phone'] != '':
                 phone = clean_phone(poi_data['phone'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                # script = soup.find('div', attrs={'data-stores':True})
                text = json.loads(str(soup))
                for poi_data in text['stores']:
                    try:
                        # Assign: code, postcode, city, name, branch, website, original, street, housenumber,
                        # conscriptionnumber, ref, geom
                        self.data.code = 'hutommacon'
                        if poi_data.get(
                                'name'
                        )[2] is not None and poi_data.get('name')[2] != '':
                            self.data.ref = poi_data.get('name')[2]
                        if poi_data.get(
                                'website'
                        ) is not None and poi_data.get('website') != '':
                            self.data.website = poi_data.get('website')
                        else:
                            self.data.website = 'https://tommarket.hu'
                        self.data.lat, self.data.lon = check_hu_boundary(
                            poi_data.get('lat'), poi_data.get('long'))
                        self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                            extract_street_housenumber_better_2(
                                poi_data.get('address'))
                        if poi_data.get('zip') is not None and poi_data.get(
                                'zip') != '':
                            self.data.postcode = poi_data.get('zip')
                        self.data.original = poi_data.get('address')
                        if poi_data.get(
                                'settlement'
                        ) is not None and poi_data.get('settlement') != '':
                            self.data.city = clean_city(
                                poi_data.get('settlement'))
                        else:
                            self.data.city = query_osm_city_name_gpd(
                                self.session, self.data.lat, self.data.lon)
                        if poi_data.get('phone') is not None and poi_data.get(
                                'phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        if poi_data.get('email') is not None and poi_data.get(
                                'email') != '':
                            self.data.phone = poi_data.get('email').strip()
                        self.data.public_holiday_open = False
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
示例#3
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     data = []
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         table = soup.find('table',
                           attrs={'class': 'contenttable is-header-top'})
         table_body = table.find('tbody')
         rows = table_body.find_all('tr')
         for row in rows:
             cols = row.find_all('td')
             cols = [element.text.strip() for element in cols]
             data.append(cols)
         for poi_data in data:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data[2])
             name = 'Aldi'
             code = 'hualdisup'
             postcode = poi_data[0].strip()
             city = clean_city(poi_data[1])
             branch = None
             website = None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data[2]
             geom = None
             ref = None
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
 def test_extract_street_housenumber_better_2(self):
     for i in self.addresses:
         original, street, housenumber, conscriptionnumber = i['original'], i['street'], i['housenumber'], i[
             'conscriptionnumber']
         a, b, c = extract_street_housenumber_better_2(original)
         with self.subTest():
             self.assertEqual(street, a)
         with self.subTest():
             self.assertEqual(housenumber, b)
         with self.subTest():
             self.assertEqual(conscriptionnumber, c)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    try:
                        self.data.name = 'OIL!'
                        self.data.code = 'huoilfu'
                        if poi_data.get('zip') is not None and poi_data.get(
                                'zip') != '':
                            self.data.postcode = poi_data.get('zip').strip()
                        if poi_data.get('city') is not None and poi_data.get(
                                'city') != '':
                            self.data.city = clean_city(poi_data.get('city'))
                        self.data.lat, self.data.lon = check_hu_boundary(
                            poi_data.get('lat'), poi_data.get('lng'))
                        if poi_data.get(
                                'address'
                        ) is not None and poi_data.get('address') != '':
                            self.data.original = poi_data.get('address')
                            self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                extract_street_housenumber_better_2(
                                    poi_data.get('address'))
                        if poi_data.get('phone') is not None and poi_data.get(
                                'phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        self.data.fuel_octane_95 = True
                        self.data.fuel_diesel = True
                        if poi_data.get(
                                'id') is not None and poi_data.get('id') != '':
                            self.data.ref = poi_data.get('id').strip()
                        if poi_data.get('url') is not None and poi_data.get(
                                'url') != '':
                            self.data.website = poi_data.get('url').strip()
                        else:
                            self.data.website = 'https://www.oil-benzinkutak.hu'
                        if poi_data.get('store') is not None and poi_data.get(
                                'store') != '':
                            tmp = poi_data.get('store').split(' ', 1)
                            self.data.branch = tmp[1].strip().capitalize()
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
示例#6
0
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                text = json.loads(str(soup))
                for poi_data in text.get('list'):
                    try:
                        if poi_data.get('country_id') != 3:
                            continue
                        else:
                            self.data.name = 'Yves Rocher'
                            self.data.code = 'huyvesrcos'
                            self.data.lat, self.data.lon = \
                                check_hu_boundary(poi_data.get(
                                    'latitude'), poi_data.get('longitude'))
                            self.data.website = 'https://www.yves-rocher.hu{}/'.format(
                                poi_data.get('request_path'))
                            opening = poi_data.get('hours')
                            for i in range(0, 7):
                                if i in opening:
                                    self.data.day_open(
                                        i, opening[i]['hour_from'])
                                    self.data.day_close(
                                        i, opening[i]['hour_to'])
                            self.data.postcode = poi_data.get('zip')
                            self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                extract_street_housenumber_better_2(
                                    poi_data.get('address'))
                            self.data.city = clean_city(poi_data.get('city'))
                            self.data.original = poi_data.get('address')
                            if poi_data.get('phone') is not None and poi_data.get('phone') != '':
                                self.data.phone = clean_phone_to_str(
                                    poi_data.get('phone'))
                            if poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \
                                    and self.data.phone is not None:
                                self.data.phone = '{};{}'.format(self.data.phone,
                                                                 clean_phone_to_str(poi_data.get('mobile')))
                            elif poi_data.get('mobile') is not None and poi_data.get('mobile') != '' \
                                    and self.data.phone is None:
                                self.data.phone = clean_phone_to_str(
                                    poi_data.get('mobile'))
                            self.data.public_holiday_open = False
                            self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
示例#7
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), POST_DATA)
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             name = 'MOL'
             code = 'humolfu'
             postcode = poi_data['postcode'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             city = clean_city(poi_data['city'])
             branch = None
             website = None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data['address']
             ref = None
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#8
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text['data']:
                    '''
                    The Pepco dataset contains all European data. Since the program cannot handle POIs outside Hungary (so far)
                    this will limit only for Hungarian POIs
                    In fact this depends on OSM extract but currently we use only Hungarian OSM extract
                    Select only Hungarian POIs
                    '''
                    if 'city' in poi_data and (
                            poi_data['city'] == '' or query_osm_city_name(
                                self.session, poi_data['city']) is None):
                        continue
                    elif 'city' in poi_data:
                        self.data.city = clean_city(poi_data['city'])
                    else:
                        continue
                    self.data.name = 'Pepco'
                    self.data.code = 'hupepcoclo'
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    self.data.lat, self.data.lon = \
                        check_hu_boundary(
                            poi_data['coordinates']['lat'], poi_data['coordinates']['lng'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                        extract_street_housenumber_better_2(
                            poi_data.get('streetAddress'))
                    self.data.original = poi_data.get('streetAddress')
                    self.data.postcode = poi_data.get('postalCode')
                    # self.data.city = query_osm_city_name_gpd(self.session, self.data.lat, self.data.lon)
                    # Assign opening_hours
                    opening = poi_data['openingHours']
                    for i in range(0, 7):
                        if i in opening:
                            self.data.day_open(i, opening[i]['from'])
                            self.data.day_close(i, opening[i]['to'])
                    # Assign additional informations
                    self.data.phone = clean_phone_to_str(
                        poi_data.get('phoneNumber'))
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(logging.error(e))
示例#9
0
    def process(self):
        try:
            if self.link:
                # soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache,
                #                            self.filename), self.post, self.verify_link, headers=self.headers)
                with open(os.path.join(self.download_cache, self.filename), 'r') as f:
                    text = json.load(f)
                    if text is not None:
                        text = json.loads(text, strict=False)
                        for poi_data in text:
                            try:
                                if 'Kulcs patika' not in poi_data.get('nev'):
                                    self.data.name = poi_data.get(
                                        'nev').strip()
                                    self.data.branch = None
                                else:
                                    self.data.name = 'Kulcs patika'
                                    self.data.branch = poi_data.get(
                                        'nev').strip()
                                self.data.code = 'hukulcspha'
                                if poi_data.get('link') is not None and poi_data.get('link') != '':
                                    self.data.website = poi_data.get('link').strip() if poi_data.get('link') \
                                        is not None else None
                                if poi_data.get('helyseg') is not None and poi_data.get('helyseg') != '':
                                    self.data.city = clean_city(
                                        poi_data.get('helyseg'))
                                self.data.lat, self.data.lon = \
                                    check_hu_boundary(poi_data.get('marker_position')['latitude'],
                                                      poi_data.get('marker_position')['longitude'])
                                if poi_data.get('cim') is not None and poi_data.get('cim') != '':
                                    self.data.original = poi_data.get('cim')
                                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                                        extract_street_housenumber_better_2(
                                            poi_data.get('cim'))
                                if poi_data.get('irsz') is not None and poi_data.get('irsz') != '':
                                    self.data.postcode = poi_data.get(
                                        'irsz').strip()
                                self.data.public_holiday_open = False
                                self.data.add()
                            except Exception as e:
                                logging.error(e)
                                logging.error(poi_data)
                                logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), self.filetype)
     if soup is not None:
         text = json.loads(soup)
         data = POIDataset()
         for poi_data in text['items']:
             if poi_data['type'] == 'posta':
                 if 'mobilposta' in poi_data['name']:
                     data.name = 'Mobilposta'
                     data.code = 'hupostamp'
                 else:
                     data.name = 'Posta'
                     data.code = 'hupostapo'
                     data.public_holiday_open = False
             elif poi_data['type'] == 'csekkautomata':
                 data.name = 'Posta csekkautomata'
                 data.code = 'hupostacse'
                 data.public_holiday_open = True
             elif poi_data['type'] == 'postamachine':
                 data.name = 'Posta csomagautomata'
                 data.code = 'hupostacso'
                 data.public_holiday_open = True
             elif poi_data['type'] == 'postapoint':
                 data.name = 'PostaPont'
                 data.code = 'hupostapp'
                 data.public_holiday_open = False
             else:
                 logging.error('Non existing Posta type.')
             data.postcode = poi_data['zipCode'].strip()
             data.city = clean_city(poi_data['city'])
             data.branch = poi_data['name']
             data.lat = poi_data['lat']
             data.lon = poi_data['lng']
             data.street, data.housenumber, data.conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             data.original = poi_data['address']
             data.add()
         if data is None or data.lenght() < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             insert_poi_dataframe(self.session, data.process())
示例#11
0
    def process(self):
        try:
            csv = pd.read_csv(self.link, encoding='UTF-8', sep=';', skiprows=1)
            if csv is not None:
                poi_dict = csv.to_dict('records')
                for poi_data in poi_dict:
                    self.data.name = 'Mobiliti'
                    self.data.code = 'humobilchs'
                    self.data.ref = poi_data.get('Mobiliti azonosító')
                    self.data.branch = poi_data.get('Töltőpont neve')
                    self.data.postcode = poi_data.get('Irányító szám')
                    self.data.city = clean_city(poi_data.get('Település'))
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                        extract_street_housenumber_better_2(
                            poi_data.get('Cím'))
                    self.data.original = poi_data.get('Cím')
                    temp = poi_data.get('GPS koordináták')
                    if temp is None:
                        continue
                    else:
                        self.data.lat, self.data.lon = temp.split(',')
                    self.data.lat, self.data.lon = check_hu_boundary(
                        self.data.lat, self.data.lon)
                    self.data.socket_chademo = poi_data.get('Darab (CHAdeMO)')
                    self.data.socket_chademo_output = poi_data.get(
                        'Teljesítmény (CHAdeMO)')
                    self.data.socket_type2_combo = poi_data.get('Darab (CCS)')
                    self.data.socket_type2_combo_output = poi_data.get(
                        'Teljesítmény (CCS)')
                    self.data.socket_type2_cable = poi_data.get(
                        'Darab (Type 2)')
                    self.data.socket_type2_cable_output = poi_data.get(
                        'Teljesítmény (Type 2)')
                    self.data.manufacturer = poi_data.get('Gyártó')
                    self.data.model = poi_data.get('Típus')
                    self.data.capacity = poi_data.get('Kapacitás')
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            csv = pd.read_csv(self.link, encoding='UTF-8', sep=';', skiprows=1)
            if csv is not None:
                poi_dict = csv.to_dict('records')
                for poi_data in poi_dict:
                    self.data.name = 'MOL Plugee'
                    self.data.code = 'humolplchs'
                    self.data.ref = poi_data.get('Azonosító')
                    self.data.postcode = poi_data.get('Irányító szám')
                    self.data.city = clean_city(poi_data.get('Település'))
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                        extract_street_housenumber_better_2(
                            poi_data.get('Cím'))
                    self.data.original = poi_data.get('Cím')
                    lat, lng = poi_data.get('X'), poi_data.get('Y')
                    self.data.lat, self.data.lon = check_hu_boundary(
                        lat.replace(',', '.'), lng.replace(',', '.'))
                    self.data.socket_chademo = poi_data.get('Darab (CHAdeMO)')
                    self.data.socket_chademo_output = poi_data.get(
                        'Teljesítmény (CHAdeMO)')
                    self.data.socket_type2_combo = poi_data.get('Darab (CCS)')
                    self.data.socket_type2_combo_output = poi_data.get(
                        'Teljesítmény (CCS)')
                    self.data.socket_type2_cable = poi_data.get(
                        'Darab (Type 2)')
                    self.data.socket_type2_cable_output = poi_data.get(
                        'Teljesítmény (Type 2)')
                    self.data.socket_type2 = poi_data.get(
                        'Darab (Type 2 – kábel nélkül)')
                    self.data.socket_type2_output = poi_data.get(
                        'Teljesítmény (Type 2 – kábel nélkül)')
                    self.data.manufacturer = poi_data.get('Gyártó')
                    self.data.model = poi_data.get('Típus')
                    self.data.capacity = poi_data.get('Kapacitás')
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
示例#13
0
 def process(self):
     soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                 self.filetype, POST_DATA)
     if soup is not None:
         text = json.loads(soup)
         for poi_data in text:
             self.data.name = 'Nemzeti Dohánybolt'
             self.data.code = 'hunemdotob'
             self.data.postcode = poi_data.get('postcode').strip()
             self.data.city = clean_city(poi_data['city'])
             self.data.original = poi_data['address']
             self.data.lat, self.data.lon = check_hu_boundary(
                 poi_data['lat'], poi_data['lng'])
             self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             self.data.public_holiday_open = False
             self.data.add()
示例#14
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    self.data.name = 'Foxpost'
                    self.data.code = 'hufoxpocso'
                    self.data.postcode = poi_data['zip'].strip()
                    self.data.city = clean_city(poi_data['city'])
                    self.data.branch = poi_data['name']
                    for i in range(0, 7):
                        if poi_data['open'][WeekDaysLongHUUnAccented(
                                i).name.lower()] is not None:
                            opening, closing = clean_opening_hours(
                                poi_data['open'][WeekDaysLongHUUnAccented(
                                    i).name.lower()])
                            self.data.day_open(i, opening)
                            self.data.day_close(i, closing)
                        else:
                            self.data.day_open_close(i, None, None)
                    self.data.original = poi_data['address']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['geolat'], poi_data['geolng'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['street'])
                    self.data.public_holiday_open = False
                    self.data.description = poi_data.get('findme')
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(
                    extract_javascript_variable(soup, 'totem_stations'))
                for poi_data in text.values():
                    self.data.name = 'Mobil Petrol'
                    self.data.code = 'humobpefu'
                    self.data.website = poi_data.get('description')
                    self.data.city = clean_city(poi_data.get('city'))
                    self.data.original = poi_data.get('address')
                    self.data.lat, self.data.lon = check_hu_boundary(poi_data['location']['lat'],
                                                                     poi_data['location']['lng'])
                    self.data.postcode = None
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data.get('address'))
                    self.data.phone = clean_phone_to_str(poi_data.get('phone'))
                    self.data.public_holiday_open = False
                    if '0-24' in poi_data.get('services'):
                        self.data.nonstop = True
                        self.data.public_holiday_open = True
                    else:
                        if '6-22' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '22:00'
                        elif '6-21' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '21:00'
                        elif '5-22' in poi_data.get('services'):
                            open_from = '05:00'
                            open_to = '22:00'
                        elif '6-18' in poi_data.get('services'):
                            open_from = '06:00'
                            open_to = '18:00'
                        if 'open_from' in locals() and 'open_to' in locals():
                            for i in range(0, 7):
                                self.data.day_open(i, open_from)
                                self.data.day_close(i, open_to)
                        self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text:
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    if 'xpres' in poi_data['name']:
                        self.data.name = 'Spar Expressz'
                        self.data.code = 'husparecon'
                    elif 'INTER' in poi_data['name']:
                        self.data.name = 'Interspar'
                        self.data.code = 'husparisup'
                    elif 'market' in poi_data['name']:
                        self.data.name = 'Spar'
                        self.data.code = 'husparsup'
                    elif 'DESPAR' in poi_data['name']:
                        self.data.name = 'DeSpar'
                        self.data.code = 'huspardcon'
                    else:
                        self.data.name = 'Spar'
                        self.data.code = 'husparsup'
                    poi_data['name'] = poi_data['name'].replace(
                        'INTERSPAR', 'Interspar')
                    poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar')
                    ref_match = PATTERN_SPAR_REF.search(poi_data['name'])
                    self.data.ref = ref_match.group(
                        1).strip() if ref_match is not None else None
                    self.data.city = clean_city(poi_data['city'])
                    self.data.postcode = poi_data.get('zipCode').strip()
                    self.data.branch = poi_data['name'].split('(')[0].strip()
                    self.data.website = poi_data['pageUrl'].strip()
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['latitude'], poi_data['longitude'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address'])
                    self.data.original = poi_data['address']
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
示例#17
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             if 'xpres' in poi_data['name']:
                 name = 'Spar Expressz'
                 code = 'husparexp'
             elif 'INTER' in poi_data['name']:
                 name = 'Interspar'
                 code = 'husparint'
             elif 'market' in poi_data['name']:
                 name = 'Spar'
                 code = 'husparsup'
             else:
                 name = 'Spar'
                 code = 'husparsup'
             poi_data['name'] = poi_data['name'].replace(
                 'INTERSPAR', 'Interspar')
             poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar')
             ref_match = PATTERN_SPAR_REF.search(poi_data['name'])
             ref = ref_match.group(
                 1).strip() if ref_match is not None else None
             city = clean_city(poi_data['city'])
             postcode = poi_data['zipCode'].strip()
             branch = poi_data['name'].split('(')[0].strip()
             website = poi_data['pageUrl'].strip()
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             lat, lon = check_hu_boundary(poi_data['latitude'],
                                          poi_data['longitude'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['address']
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#18
0
 def process(self):
     try:
         soup = save_downloaded_soup(
             '{}'.format(self.link),
             os.path.join(self.download_cache, self.filename),
             self.filetype)
         if soup is not None:
             text = json.loads(soup)
             for poi_data in text['points']:
                 if poi_data['fiok'] == 1:
                     self.data.name = 'Budapest Bank'
                     self.data.code = 'hubpbank'
                     self.data.public_holiday_open = False
                 else:
                     self.data.name = 'Budapest Bank ATM'
                     self.data.code = 'hubpatm'
                     self.data.public_holiday_open = True
                 self.data.postcode = poi_data['zip']
                 self.data.city = poi_data['city_only']
                 self.data.lat, self.data.lon = check_hu_boundary(
                     poi_data['latitude'], poi_data['longitude'])
                 self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                     extract_street_housenumber_better_2(poi_data['addr'])
                 self.data.original = poi_data['address']
                 self.data.branch = poi_data['name']
                 # Processing opening hours
                 oh = []
                 if poi_data.get('opening') is not None:
                     opening = poi_data.get('opening').split('||')
                     self.data.nonstop = False
                     for i in opening:
                         if 'H:' in i:
                             try:
                                 op = i.replace('H:',
                                                '').split('-')[0].strip()
                             except IndexError as e:
                                 op = None
                             self.data.mo_o = op if open is not None and op != '' else None
                             try:
                                 cl = i.replace('H:',
                                                '').split('-')[1].strip()
                             except IndexError as e:
                                 cl = None
                             self.data.mo_c = cl if open is not None and cl != '' else None
                         elif 'K:' in i:
                             try:
                                 op = i.replace('K:',
                                                '').split('-')[0].strip()
                             except IndexError as e:
                                 op = None
                             self.data.tu_o = op if open is not None and op != '' else None
                             try:
                                 cl = i.replace('K:',
                                                '').split('-')[1].strip()
                             except IndexError as e:
                                 cl = None
                             self.data.tu_c = cl if open is not None and cl != '' else None
                         elif 'Sz:' in i:
                             try:
                                 op = i.replace('Sz:',
                                                '').split('-')[0].strip()
                             except IndexError as e:
                                 op = None
                             self.data.we_o = op if open is not None and op != '' else None
                             try:
                                 cl = i.replace('Sz:',
                                                '').split('-')[1].strip()
                             except IndexError as e:
                                 cl = None
                             self.data.we_c = cl if open is not None and cl != '' else None
                         elif 'Cs:' in i:
                             try:
                                 op = i.replace('Cs:',
                                                '').split('-')[0].strip()
                             except IndexError as e:
                                 op = None
                             self.data.th_o = op if open is not None and op != '' else None
                             try:
                                 cl = i.replace('Cs:',
                                                '').split('-')[1].strip()
                             except IndexError as e:
                                 cl = None
                             self.data.th_c = cl if open is not None and cl != '' else None
                         elif 'P:' in i:
                             try:
                                 op = i.replace('P:',
                                                '').split('-')[0].strip()
                             except IndexError as e:
                                 op = None
                             self.data.fr_o = op if open is not None and op != '' else None
                             try:
                                 cl = i.replace('P:',
                                                '').split('-')[1].strip()
                             except IndexError as e:
                                 cl = None
                             self.data.fr_c = cl if open is not None and cl != '' else None
                 if self.data.code == 'hubpatm':
                     self.data.nonstop = True
                 else:
                     self.data.nonstop = False
                 self.data.add()
     except Exception as e:
         logging.exception('Exception occurred')
         logging.error(e)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text['markets']:
                    self.data.name = 'Penny'
                    self.data.code = 'hupennysup'
                    self.data.postcode = poi_data['address']['zip'].strip()
                    street_tmp = poi_data['address']['street'].split(',')[0]
                    self.data.city = clean_city(poi_data['address']['city'])
                    self.data.original = poi_data['address']['street']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['address']['latitude'],
                        poi_data['address']['longitude'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        street_tmp.title())
                    if 'phone' in poi_data and poi_data['phone'] != '':
                        self.data.phone = clean_phone_to_str(poi_data['phone'])
                    if 'id' in poi_data and poi_data['id'] != '':
                        self.data.ref = poi_data['id'].strip()
                    self.data.public_holiday_open = False
                    # TODO: Parsing opening_hours from datasource
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
示例#20
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                text = json.loads(soup)
                for poi_data in text.get('stores'):
                    self.data.name = 'OBI'
                    self.data.code = 'huobidiy'
                    self.data.postcode = poi_data['address']['zip'].strip()
                    self.data.city = clean_city(poi_data['address']['city'])
                    self.data.original = poi_data['address']['street']
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data['address']['lat'], poi_data['address']['lon'])
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data['address']['street'])
                    if 'phone' in poi_data and poi_data.get('phone') != '':
                        self.data.phone = clean_phone_to_str(
                            poi_data.get('phone'))
                    if 'storeNumber' in poi_data and poi_data.get(
                            'storeNumber') != '':
                        self.data.ref = poi_data.get('storeNumber').strip()
                    if 'email' in poi_data and poi_data.get('email') != '':
                        self.data.email = clean_email(poi_data.get('email'))
                    if 'path' in poi_data and poi_data.get('path') != '':
                        self.data.website = poi_data.get('path')
                    # TODO: opening hour parser for poi_data.get('hours'), format is like:
                    #  Hétfő - Szombat: 8:00 - 20:00\nVasárnap: 08:00 - 18:00
                    # self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
示例#21
0
    def process(self):
        soup = save_downloaded_soup(
            '{}'.format(self.link),
            os.path.join(self.download_cache, self.filename))
        insert_data = []
        if soup != None:
            # parse the html using beautiful soap and store in variable `soup`
            pattern = re.compile('^\s*var\s*boltok_nyers.*')
            script = soup.find('script', text=pattern)
            m = pattern.match(script.get_text())
            data = m.group(0)
            data = clean_javascript_variable(data, 'boltok_nyers')
            text = json.loads(data)
            # for l in text:
            # print ('postcode: {postcode}; city: {city}; address: {address}; alt_name: {alt_name}'.format(postcode=l['A_IRSZ'], city=l['A_VAROS'], address=l['A_CIM'], alt_name=l['P_NAME']))

            for poi_data in text:
                # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                    poi_data['A_CIM'])
                city = clean_city(poi_data['A_VAROS'])
                postcode = poi_data['A_IRSZ'].strip()
                branch = poi_data['P_NAME'].strip()
                name = 'Príma' if 'Príma' in branch else 'CBA'
                code = 'huprimacon' if 'Príma' in branch else 'hucbacon'
                website = None
                nonstop = None
                mo_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_1']
                ) if poi_data['PS_OPEN_FROM_1'] is not None else None
                th_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_2']
                ) if poi_data['PS_OPEN_FROM_2'] is not None else None
                we_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_3']
                ) if poi_data['PS_OPEN_FROM_3'] is not None else None
                tu_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_4']
                ) if poi_data['PS_OPEN_FROM_4'] is not None else None
                fr_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_5']
                ) if poi_data['PS_OPEN_FROM_5'] is not None else None
                sa_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_6']
                ) if poi_data['PS_OPEN_FROM_6'] is not None else None
                su_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_7']
                ) if poi_data['PS_OPEN_FROM_7'] is not None else None
                mo_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_1']
                ) if poi_data['PS_OPEN_TO_1'] is not None else None
                th_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_2']
                ) if poi_data['PS_OPEN_TO_2'] is not None else None
                we_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_3']
                ) if poi_data['PS_OPEN_TO_3'] is not None else None
                tu_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_4']
                ) if poi_data['PS_OPEN_TO_4'] is not None else None
                fr_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_5']
                ) if poi_data['PS_OPEN_TO_5'] is not None else None
                sa_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_6']
                ) if poi_data['PS_OPEN_TO_6'] is not None else None
                su_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_7']
                ) if poi_data['PS_OPEN_TO_7'] is not None else None
                original = poi_data['A_CIM']
                lat, lon = check_hu_boundary(poi_data['PS_GPS_COORDS_LAT'],
                                             poi_data['PS_GPS_COORDS_LNG'])
                geom = check_geom(lat, lon)
                postcode = query_postcode_osm_external(
                    self.prefer_osm_postcode, self.session, lat, lon, postcode)
                ref = None
                if 'PS_PUBLIC_TEL' in poi_data and poi_data[
                        'PS_PUBLIC_TEL'] != '':
                    phone = clean_phone(poi_data['PS_PUBLIC_TEL'])
                else:
                    phone = None
                if 'PS_PUBLIC_EMAIL' in poi_data and poi_data[
                        'PS_PUBLIC_EMAIL'] != '':
                    email = poi_data['PS_PUBLIC_EMAIL']
                else:
                    email = None
                insert_data.append([
                    code, postcode, city, name, branch, website, original,
                    street, housenumber, conscriptionnumber, ref, phone, email,
                    geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                    mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                ])
            if len(insert_data) < 1:
                logging.warning('Resultset is empty. Skipping ...')
            else:
                df = pd.DataFrame(insert_data)
                df.columns = POI_COLS
                insert_poi_dataframe(self.session, df)
示例#22
0
    def process(self):
        soup = save_downloaded_soup(
            '{}'.format(self.link),
            os.path.join(self.download_cache, self.filename))
        insert_data = []
        if soup != None:
            text = json.loads(soup.get_text())
            for poi_data in text['items']:
                if poi_data['type'] == 'posta':
                    if 'mobilposta' in poi_data['name']:
                        name = 'Mobilposta'
                        code = 'hupostamp'
                    else:
                        name = 'Posta'
                        code = 'hupostapo'
                elif poi_data['type'] == 'csekkautomata':
                    name = 'Posta csekkautomata'
                    code = 'hupostacse'
                elif poi_data['type'] == 'postamachine':
                    name = 'Posta csomagautomata'
                    code = 'hupostacso'
                elif poi_data['type'] == 'postapoint':
                    name = 'PostaPont'
                    code = 'hupostapp'
                else:
                    logging.error('Non existing Posta type.')
                postcode = poi_data['zipCode'].strip()
                street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                    poi_data['address'])
                city = clean_city(poi_data['city'])
                branch = poi_data['name']
                website = None
                nonstop = None
                mo_o = None
                th_o = None
                we_o = None
                tu_o = None
                fr_o = None
                sa_o = None
                su_o = None
                mo_c = None
                th_c = None
                we_c = None
                tu_c = None
                fr_c = None
                sa_c = None
                su_c = None

                geom = check_geom(poi_data['lat'], poi_data['lng'])
                original = poi_data['address']
                ref = None
                phone = None
                email = None
                insert_data.append([
                    code, postcode, city, name, branch, website, original,
                    street, housenumber, conscriptionnumber, ref, phone, email,
                    geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                    mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                ])
            if len(insert_data) < 1:
                logging.warning('Resultset is empty. Skipping ...')
            else:
                df = pd.DataFrame(insert_data)
                df.columns = POI_COLS
                insert_poi_dataframe(self.session, df)
    def process(self):
        try:
            soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename),
                                        self.filetype)
            if soup is not None:
                text = json.loads(str(soup))
                for poi_data in text:
                    try:
                        if 'BENU Gyógyszertár' not in poi_data.get('title'):
                            self.data.name = poi_data.get('title').strip()
                            self.data.branch = None
                        else:
                            self.data.name = 'Benu gyógyszertár'
                            self.data.branch = poi_data.get('title').strip()
                        self.data.code = 'hubenupha'
                        if poi_data.get('description') is not None:
                            pu_match = PATTERN_FULL_URL.match(poi_data.get('description'))
                            self.data.website = pu_match.group(0).strip() if pu_match is not None else None
                        else:
                            self.data.website = None
                        self.data.city = clean_city(poi_data.get('city'))
                        self.data.postcode = poi_data.get('postal_code').strip()
                        self.data.lat, self.data.lon = check_hu_boundary(poi_data.get('lat'), poi_data.get('lng'))
                        self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                            poi_data.get(('street')))
                        self.data.original = poi_data.get('street')
                        if 'phone' in poi_data and poi_data.get('phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        else:
                            self.data.phone = None
                        self.data.public_holiday_open = False
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')
示例#24
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         # script = soup.find('div', attrs={'data-stores':True})
         script = soup.find(attrs={'data-stores': True})
         text = json.loads(script['data-stores'])
         for poi_data in text:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             city = clean_city(poi_data['city'])
             branch = poi_data['name']
             if 'xpres' in poi_data['name']:
                 name = 'Tesco Expressz'
                 code = 'hutescoexp'
             elif 'xtra' in poi_data['name']:
                 name = 'Tesco Extra'
                 code = 'hutescoext'
             else:
                 name = 'Tesco'
                 code = 'hutescosup'
             website = poi_data['url']
             nonstop = None
             opening = json.loads(poi_data['opening'])
             mo_o = opening['1'][0]
             th_o = opening['2'][0]
             we_o = opening['3'][0]
             tu_o = opening['4'][0]
             fr_o = opening['5'][0]
             sa_o = opening['6'][0]
             su_o = opening['0'][0]
             mo_c = opening['1'][1]
             th_c = opening['2'][1]
             we_c = opening['3'][1]
             tu_c = opening['4'][1]
             fr_c = opening['5'][1]
             sa_c = opening['6'][1]
             su_c = opening['0'][1]
             lat, lon = check_hu_boundary(poi_data['gpslat'],
                                          poi_data['gpslng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, None)
             original = poi_data['address']
             ref = None
             if 'phone' in poi_data and poi_data['phone'] != '':
                 phone = clean_phone(poi_data['phone'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#25
0
 def process(self):
     soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), None,
                                 self.verify_link)
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         pattern = re.compile('^\s*var\s*places.*')
         script = soup.find('script', text=pattern)
         m = pattern.match(script.get_text())
         data = m.group(0)
         data = clean_javascript_variable(data, 'places')
         text = json.loads(data)
         for poi_data in text:
             poi_data = poi_data['addresses'][0]
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             name = 'Rossmann'
             code = 'hurossmche'
             city = clean_city(poi_data['city'])
             postcode = poi_data['zip'].strip()
             branch = None
             website = None
             nonstop = False
             if poi_data['business_hours']['monday'] is not None:
                 mo_o, mo_c = clean_opening_hours(poi_data['business_hours']['monday'])
             else:
                 mo_o, mo_c = None, None
             if poi_data['business_hours']['tuesday'] is not None:
                 th_o, th_c = clean_opening_hours(poi_data['business_hours']['tuesday'])
             else:
                 th_o, th_c = None, None
             if poi_data['business_hours']['wednesday'] is not None:
                 we_o, we_c = clean_opening_hours(poi_data['business_hours']['wednesday'])
             else:
                 we_o, we_c = None, None
             if poi_data['business_hours']['thursday'] is not None:
                 tu_o, tu_c = clean_opening_hours(poi_data['business_hours']['thursday'])
             else:
                 tu_o, tu_c = None, None
             if poi_data['business_hours']['friday'] is not None:
                 fr_o, fr_c = clean_opening_hours(poi_data['business_hours']['friday'])
             else:
                 fr_o, fr_c = None, None
             if poi_data['business_hours']['saturday'] is not None:
                 sa_o, sa_c = clean_opening_hours(poi_data['business_hours']['saturday'])
             else:
                 sa_o, sa_c = None, None
             if poi_data['business_hours']['sunday'] is not None:
                 su_o, su_c = clean_opening_hours(poi_data['business_hours']['sunday'])
             else:
                 su_o, su_c = None, None
             lat, lon = check_hu_boundary(poi_data['position'][0], poi_data['position'][1])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['address']
             ref = None
             phone = None
             email = None
             insert_data.append(
                 [code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber,
                  ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c,
                  fr_c, sa_c, su_c])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#26
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     data = []
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         table = soup.find('table', attrs={'class': 'tescoce-table'})
         table_body = table.find('tbody')
         rows = table_body.find_all('tr')
         for row in rows:
             cols = row.find_all('td')
             link = cols[0].find('a').get(
                 'href') if cols[0].find('a') != None else []
             cols = [element.text.strip() for element in cols]
             cols[0] = cols[0].split('\n')[0]
             del cols[-1]
             del cols[-1]
             cols.append(link)
             data.append(cols)
         for poi_data in data:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data[3])
             tesco_replace = re.compile('(expressz{0,1})', re.IGNORECASE)
             poi_data[0] = tesco_replace.sub('Expressz', poi_data[0])
             if 'xpres' in poi_data[0]:
                 name = 'Tesco Expressz'
                 code = 'hutescoexp'
             elif 'xtra' in poi_data[0]:
                 name = 'Tesco Extra'
                 code = 'hutescoext'
             else:
                 name = 'Tesco'
                 code = 'hutescosup'
             poi_data[0] = poi_data[0].replace('TESCO', 'Tesco')
             poi_data[0] = poi_data[0].replace('Bp.', 'Budapest')
             postcode = poi_data[1].strip()
             city = clean_city(poi_data[2].split(',')[0])
             branch = poi_data[0]
             website = poi_data[4]
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data[3]
             geom = None
             ref = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, geom,
                 nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c,
                 th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#27
0
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                text = json.loads(
                    extract_javascript_variable(soup, 'boltok_nyers'))
                for poi_data in text:
                    # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                    self.data.city = clean_city(poi_data.get('A_VAROS'))
                    self.data.postcode = poi_data.get('A_IRSZ').strip()
                    self.data.branch = poi_data.get('P_NAME').strip()
                    self.data.name = 'Príma' if 'Príma' in self.data.branch else 'CBA'
                    self.data.code = 'huprimacon' if 'Príma' in self.data.branch else 'hucbacon'
                    for i in range(0, 7):
                        self.data.day_open(
                            i,
                            clean_opening_hours_2(
                                poi_data.get('PS_OPEN_FROM_{}'.format(i + 1)))
                            if poi_data.get('PS_OPEN_FROM_{}'.format(i + 1))
                            is not None else None)
                        self.data.day_close(
                            i,
                            clean_opening_hours_2(
                                poi_data.get('PS_OPEN_TO_{}'.format(i + 1)))
                            if poi_data.get('PS_OPEN_TO_{}'.format(i + 1))
                            is not None else None)
                    self.data.original = poi_data.get('A_CIM')
                    self.data.lat, self.data.lon = check_hu_boundary(
                        poi_data.get('PS_GPS_COORDS_LAT'),
                        poi_data.get('PS_GPS_COORDS_LNG'))
                    self.data.street, self.data.housenumber, self.data.conscriptionnumber = extract_street_housenumber_better_2(
                        poi_data.get('A_CIM'))
                    if 'PS_PUBLIC_TEL' in poi_data and poi_data.get(
                            'PS_PUBLIC_TEL') != '':
                        self.data.phone = clean_phone_to_str(
                            poi_data.get('PS_PUBLIC_TEL'))
                    else:
                        self.data.phone = None
                    if 'PS_PUBLIC_EMAIL' in poi_data and poi_data.get(
                            'PS_PUBLIC_EMAIL') != '':
                        self.data.email = poi_data.get('PS_PUBLIC_EMAIL')
                    else:
                        self.data.email = None
                    self.data.public_holiday_open = False
                    self.data.add()
        except Exception as e:
            logging.exception('Exception occurred')

            logging.error(e)
示例#28
0
 def process(self):
     csv = save_downloaded_pd(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     if csv is not None:
         csv[['Post code']] = csv[['Post code']].fillna('0000')
         csv[['Post code']] = csv[['Post code']].astype(int)
         csv[['Telephone']] = csv[['Telephone']].fillna('0')
         csv[['Telephone']] = csv[['Telephone']].astype(int)
         csv[['City']] = csv[['City']].fillna('')
         csv[['Name']] = csv[['Name']].fillna('')
         insert_data = []
         poi_dict = csv.to_dict('records')
         for poi_data in poi_dict:
             if poi_data['Brand'] == 'Shell':
                 name = 'Shell'
                 code = 'hushellfu'
             elif poi_data['Brand'] == 'Mobilpetrol':
                 name = 'Mobil Petrol'
                 code = 'humobpefu'
             postcode = poi_data['Post code']
             steet_tmp = poi_data['Address'].lower().split()
             for i in range(0, len(steet_tmp) - 2):
                 steet_tmp[i] = steet_tmp[i].capitalize()
             steet_tmp = ' '.join(steet_tmp)
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 steet_tmp)
             if poi_data['City'] != '':
                 city = clean_city(poi_data['City'].title())
             else:
                 if poi_data['Name'] != '':
                     city = clean_city(poi_data['Name'].title())
                 else:
                     city = None
             branch = poi_data['Name'].strip()
             website = None
             if poi_data['24 Hour'] == True:
                 nonstop = True
                 mo_o = None
                 th_o = None
                 we_o = None
                 tu_o = None
                 fr_o = None
                 sa_o = None
                 su_o = None
                 mo_c = None
                 th_c = None
                 we_c = None
                 tu_c = None
                 fr_c = None
                 sa_c = None
                 su_c = None
             else:
                 nonstop = False
                 mo_o = '06:00'
                 th_o = '06:00'
                 we_o = '06:00'
                 tu_o = '06:00'
                 fr_o = '06:00'
                 sa_o = '06:00'
                 su_o = '06:00'
                 mo_c = '22:00'
                 th_c = '22:00'
                 we_c = '22:00'
                 tu_c = '22:00'
                 fr_c = '22:00'
                 sa_c = '22:00'
                 su_c = '22:00'
             original = poi_data['Address']
             ref = None
             lat, lon = check_hu_boundary(poi_data['GPS Latitude'],
                                          poi_data['GPS Longitude'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             if 'Telephone' in poi_data and poi_data['Telephone'] != '':
                 phone = clean_phone(str(poi_data['Telephone']))
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
示例#29
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), POST_DATA)
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text['results']:
             name = 'OMV'
             code = 'huomvfu'
             postcode = poi_data['postcode'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address_l'])
             city = clean_city(poi_data['town_l'])
             branch = None
             website = None
             nonstop = None
             if poi_data['open_hours'] is not None:
                 oho, ohc = clean_opening_hours(poi_data['open_hours'])
                 if oho == '00:00' and ohc == '24:00':
                     nonstop = True
                     oho, ohc = None, None
             else:
                 oho, ohc = None, None
             mo_o = oho
             th_o = oho
             we_o = oho
             tu_o = oho
             fr_o = oho
             sa_o = oho
             su_o = oho
             mo_c = ohc
             th_c = ohc
             we_c = ohc
             tu_c = ohc
             fr_c = ohc
             sa_c = ohc
             su_c = ohc
             original = poi_data['address_l']
             ref = None
             lat, lon = check_hu_boundary(poi_data['y'], poi_data['x'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             if 'telnr' in poi_data and poi_data['telnr'] != '':
                 phone = clean_phone(poi_data['telnr'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
    def process(self):
        try:
            soup = save_downloaded_soup(
                '{}'.format(self.link),
                os.path.join(self.download_cache, self.filename),
                self.filetype)
            if soup is not None:
                # parse the html using beautiful soap and store in variable `soup`
                # script = soup.find('div', attrs={'data-stores':True})
                text = json.loads(str(soup))
                for poi_data in text.get('stores'):
                    try:
                        # Assign: code, postcode, city, name, branch, website, original, street, housenumber,
                        # conscriptionnumber, ref, geom
                        self.data.branch = poi_data.get('store_name')
                        self.data.ref = poi_data.get('goldid')
                        self.data.website = 'https://tesco.hu/aruhazak/aruhaz/{}/'.format(
                            poi_data.get('urlname'))
                        opening = json.loads(poi_data.get('opening'))
                        for i in range(0, 7):
                            ind = str(i + 1) if i != 6 else '0'
                            if ind in opening:
                                self.data.day_open(i, opening[ind][0])
                                self.data.day_close(i, opening[ind][1])
                        self.data.lat, self.data.lon = check_hu_boundary(
                            poi_data.get('gpslat'), poi_data.get('gpslng'))
                        self.data.street, self.data.housenumber, self.data.conscriptionnumber = \
                            extract_street_housenumber_better_2(
                                poi_data.get('address'))
                        self.data.postcode = poi_data.get('zipcode').strip()
                        self.data.city = clean_city(
                            query_osm_city_name_gpd(self.session,
                                                    self.data.lat,
                                                    self.data.lon))
                        if 'xpres' in poi_data.get('name'):
                            if self.data.city not in [
                                    'Győr', 'Sopron', 'Mosonmagyaróvár',
                                    'Levél'
                            ]:
                                self.data.name = 'Tesco Expressz'
                                self.data.code = 'hutescoexp'
                            else:
                                self.data.name = 'S-Market'
                                self.data.code = 'husmrktexp'
                        elif 'xtra' in poi_data.get('name'):
                            self.data.name = 'Tesco Extra'
                            self.data.code = 'hutescoext'
                        else:
                            if self.data.city not in ['Levél']:
                                self.data.name = 'Tesco'
                                self.data.code = 'hutescosup'
                            else:
                                self.data.name = 'S-Market'
                                self.data.code = 'husmrktsup'
                        self.data.original = poi_data.get('address')
                        if poi_data.get('phone') is not None and poi_data.get(
                                'phone') != '':
                            self.data.phone = clean_phone_to_str(
                                poi_data.get('phone'))
                        if poi_data.get('goldid') is not None and poi_data.get(
                                'goldid') != '':
                            self.data.ref = poi_data.get('goldid').strip()
                        self.data.public_holiday_open = False
                        self.data.add()
                    except Exception as e:
                        logging.error(e)
                        logging.error(poi_data)
                        logging.exception('Exception occurred')

        except Exception as e:
            logging.error(e)
            logging.exception('Exception occurred')