Exemplo n.º 1
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['street'])
             if 'BENU Gyógyszertár' not in poi_data['title']:
                 name = poi_data['title'].strip()
                 branch = None
             else:
                 name = 'Benu gyógyszertár'
                 branch = poi_data['title'].strip()
             code = 'hubenupha'
             website = poi_data['description'].strip(
             ) if poi_data['description'] is not None else None
             website = website[19:]
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             city = clean_city(poi_data['city'])
             postcode = poi_data['postal_code'].strip()
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['street']
             ref = None
             if 'phone' in poi_data and poi_data['phone'] != '':
                 phone = clean_phone(poi_data['phone'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemplo n.º 2
0
 def process(self):
     xml = save_downloaded_xml(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     root = etree.fromstring(xml)
     for e in root.iter('place'):
         name = 'MOL Bubi'
         code = 'hububibir'
         housenumber = None
         conscriptionnumber = None
         street = None
         city = 'Budapest'
         branch = e.attrib['name'].split(
             '-')[1].strip() if e.attrib['name'] is not None else None
         ref = e.attrib['name'].split(
             '-')[0].strip() if e.attrib['name'] is not None else None
         capacity = e.attrib['bike_racks'].strip(
         ) if e.attrib['bike_racks'] is not None else None
         website = None
         nonstop = True
         mo_o = None
         th_o = None
         we_o = None
         tu_o = None
         fr_o = None
         sa_o = None
         su_o = None
         mo_c = None
         th_c = None
         we_c = None
         tu_c = None
         fr_c = None
         sa_c = None
         su_c = None
         lat, lon = check_hu_boundary(e.attrib['lat'].replace(',', '.'),
                                      e.attrib['lng'].replace(',', '.'))
         geom = check_geom(lat, lon)
         postcode = query_postcode_osm_external(self.prefer_osm_postcode,
                                                self.session, lat, lon,
                                                None)
         original = None
         ref = None
         phone = None
         email = None
         insert_data.append([
             code, postcode, city, name, branch, website, original, street,
             housenumber, conscriptionnumber, ref, phone, email, geom,
             nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c,
             we_c, tu_c, fr_c, sa_c, su_c
         ])
     print(insert_data)
     if len(insert_data) < 1:
         logging.warning('Resultset is empty. Skipping ...')
     else:
         df = pd.DataFrame(insert_data)
         df.columns = POI_COLS
         insert_poi_dataframe(self.session, df)
Exemplo n.º 3
0
 def process(self):
     if self.link:
         with open(self.link, 'r') as f:
             insert_data = []
             text = json.load(f)
             for poi_data in text['results']:
                 first_element = next(iter(poi_data))
                 if self.name == 'CIB bank':
                     name = 'CIB bank'
                     code = 'hucibbank'
                 else:
                     name = 'CIB'
                     code = 'hucibatm'
                 postcode, city, street, housenumber, conscriptionnumber = extract_all_address(
                     poi_data[first_element]['address'])
                 branch = None
                 website = None
                 nonstop = None
                 mo_o = None
                 th_o = None
                 we_o = None
                 tu_o = None
                 fr_o = None
                 sa_o = None
                 su_o = None
                 mo_c = None
                 th_c = None
                 we_c = None
                 tu_c = None
                 fr_c = None
                 sa_c = None
                 su_c = None
                 lat, lon = check_hu_boundary(
                     poi_data[first_element]['latitude'],
                     poi_data[first_element]['longitude'])
                 geom = check_geom(lat, lon)
                 postcode = query_postcode_osm_external(
                     self.prefer_osm_postcode, self.session, lat, lon,
                     postcode)
                 original = poi_data[first_element]['address']
                 ref = None
                 phone = None
                 email = None
                 insert_data.append([
                     code, postcode, city, name, branch, website, original,
                     street, housenumber, conscriptionnumber, ref, phone,
                     email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o,
                     sa_o, su_o, mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                 ])
             if len(insert_data) < 1:
                 logging.warning('Resultset is empty. Skipping ...')
             else:
                 df = pd.DataFrame(insert_data)
                 df.columns = POI_COLS
                 insert_poi_dataframe(self.session, df)
Exemplo n.º 4
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), POST_DATA)
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             name = 'MOL'
             code = 'humolfu'
             postcode = poi_data['postcode'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             city = clean_city(poi_data['city'])
             branch = None
             website = None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data['address']
             ref = None
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemplo n.º 5
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename), POST_DATA)
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text['results']:
             name = 'OMV'
             code = 'huomvfu'
             postcode = poi_data['postcode'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address_l'])
             city = clean_city(poi_data['town_l'])
             branch = None
             website = None
             nonstop = None
             if poi_data['open_hours'] is not None:
                 oho, ohc = clean_opening_hours(poi_data['open_hours'])
                 if oho == '00:00' and ohc == '24:00':
                     nonstop = True
                     oho, ohc = None, None
             else:
                 oho, ohc = None, None
             mo_o = oho
             th_o = oho
             we_o = oho
             tu_o = oho
             fr_o = oho
             sa_o = oho
             su_o = oho
             mo_c = ohc
             th_c = ohc
             we_c = ohc
             tu_c = ohc
             fr_c = ohc
             sa_c = ohc
             su_c = ohc
             original = poi_data['address_l']
             ref = None
             lat, lon = check_hu_boundary(poi_data['y'], poi_data['x'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             if 'telnr' in poi_data and poi_data['telnr'] != '':
                 phone = clean_phone(poi_data['telnr'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemplo n.º 6
0
 def process(self):
     xml = save_downloaded_xml(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     root = etree.fromstring(xml)
     for e in root.findall('post'):
         if e.find('ServicePointType').text == 'PM':
             name = 'Posta'
             code = 'hupostapo'
         elif e.find('ServicePointType').text == 'CS':
             name = 'Posta csomagautomata'
             code = 'hupostacso'
         elif e.find('ServicePointType').text == 'PP':
             name = 'PostaPont'
             code = 'hupostapp'
         else:
             logging.error('Non existing Posta type.')
         postcode = e.get('zipCode')
         street_tmp_1 = e.find('street/name').text.strip() if e.find(
             'street/name').text is not None else None
         street_tmp_2 = e.find('street/type').text.strip() if e.find(
             'street/type').text is not None else None
         if street_tmp_1 is None:
             street = None
         elif street_tmp_2 is None:
             street = street_tmp_1
         elif street_tmp_1 is not None and street_tmp_2 is not None:
             street = '{} {}'.format(street_tmp_1, street_tmp_2)
         else:
             logging.error('Non handled state!')
         housenumber = e.find('street/houseNumber').text.strip().lower(
         ) if e.find('street/houseNumber').text is not None else None
         conscriptionnumber = None
         city = clean_city(e.find('city').text)
         branch = e.find('name').text if e.find(
             'name').text is not None else None
         website = None
         nonstop = None
         mo_o = None
         th_o = None
         we_o = None
         tu_o = None
         fr_o = None
         sa_o = None
         su_o = None
         mo_c = None
         th_c = None
         we_c = None
         tu_c = None
         fr_c = None
         sa_c = None
         su_c = None
         lat, lon = check_hu_boundary(
             e.find('gpsData/WGSLat').text.replace(',', '.'),
             e.find('gpsData/WGSLon').text.replace(',', '.'))
         geom = check_geom(lat, lon)
         postcode = query_postcode_osm_external(self.prefer_osm_postcode,
                                                self.session, lat, lon,
                                                postcode)
         original = None
         ref = None
         phone = None
         email = None
         insert_data.append([
             code, postcode, city, name, branch, website, original, street,
             housenumber, conscriptionnumber, ref, phone, email, geom,
             nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c,
             we_c, tu_c, fr_c, sa_c, su_c
         ])
     if len(insert_data) < 1:
         logging.warning('Resultset is empty. Skipping ...')
     else:
         df = pd.DataFrame(insert_data)
         df.columns = POI_COLS
         insert_poi_dataframe(self.session, df)
Exemplo n.º 7
0
 def process(self):
     soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), None,
                                 self.verify_link)
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         pattern = re.compile('^\s*var\s*places.*')
         script = soup.find('script', text=pattern)
         m = pattern.match(script.get_text())
         data = m.group(0)
         data = clean_javascript_variable(data, 'places')
         text = json.loads(data)
         for poi_data in text:
             poi_data = poi_data['addresses'][0]
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             name = 'Rossmann'
             code = 'hurossmche'
             city = clean_city(poi_data['city'])
             postcode = poi_data['zip'].strip()
             branch = None
             website = None
             nonstop = False
             if poi_data['business_hours']['monday'] is not None:
                 mo_o, mo_c = clean_opening_hours(poi_data['business_hours']['monday'])
             else:
                 mo_o, mo_c = None, None
             if poi_data['business_hours']['tuesday'] is not None:
                 th_o, th_c = clean_opening_hours(poi_data['business_hours']['tuesday'])
             else:
                 th_o, th_c = None, None
             if poi_data['business_hours']['wednesday'] is not None:
                 we_o, we_c = clean_opening_hours(poi_data['business_hours']['wednesday'])
             else:
                 we_o, we_c = None, None
             if poi_data['business_hours']['thursday'] is not None:
                 tu_o, tu_c = clean_opening_hours(poi_data['business_hours']['thursday'])
             else:
                 tu_o, tu_c = None, None
             if poi_data['business_hours']['friday'] is not None:
                 fr_o, fr_c = clean_opening_hours(poi_data['business_hours']['friday'])
             else:
                 fr_o, fr_c = None, None
             if poi_data['business_hours']['saturday'] is not None:
                 sa_o, sa_c = clean_opening_hours(poi_data['business_hours']['saturday'])
             else:
                 sa_o, sa_c = None, None
             if poi_data['business_hours']['sunday'] is not None:
                 su_o, su_c = clean_opening_hours(poi_data['business_hours']['sunday'])
             else:
                 su_o, su_c = None, None
             lat, lon = check_hu_boundary(poi_data['position'][0], poi_data['position'][1])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['address']
             ref = None
             phone = None
             email = None
             insert_data.append(
                 [code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber,
                  ref, phone, email, geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o, mo_c, th_c, we_c, tu_c,
                  fr_c, sa_c, su_c])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
 def process_geom(self):
     self.geom = check_geom(self.__lat, self.__lon)
Exemplo n.º 9
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             if 'xpres' in poi_data['name']:
                 name = 'Spar Expressz'
                 code = 'husparexp'
             elif 'INTER' in poi_data['name']:
                 name = 'Interspar'
                 code = 'husparint'
             elif 'market' in poi_data['name']:
                 name = 'Spar'
                 code = 'husparsup'
             else:
                 name = 'Spar'
                 code = 'husparsup'
             poi_data['name'] = poi_data['name'].replace(
                 'INTERSPAR', 'Interspar')
             poi_data['name'] = poi_data['name'].replace('SPAR', 'Spar')
             ref_match = PATTERN_SPAR_REF.search(poi_data['name'])
             ref = ref_match.group(
                 1).strip() if ref_match is not None else None
             city = clean_city(poi_data['city'])
             postcode = poi_data['zipCode'].strip()
             branch = poi_data['name'].split('(')[0].strip()
             website = poi_data['pageUrl'].strip()
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             lat, lon = check_hu_boundary(poi_data['latitude'],
                                          poi_data['longitude'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             original = poi_data['address']
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemplo n.º 10
0
 def process(self):
     csv = save_downloaded_pd(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     if csv is not None:
         csv[['Post code']] = csv[['Post code']].fillna('0000')
         csv[['Post code']] = csv[['Post code']].astype(int)
         csv[['Telephone']] = csv[['Telephone']].fillna('0')
         csv[['Telephone']] = csv[['Telephone']].astype(int)
         csv[['City']] = csv[['City']].fillna('')
         csv[['Name']] = csv[['Name']].fillna('')
         insert_data = []
         poi_dict = csv.to_dict('records')
         for poi_data in poi_dict:
             if poi_data['Brand'] == 'Shell':
                 name = 'Shell'
                 code = 'hushellfu'
             elif poi_data['Brand'] == 'Mobilpetrol':
                 name = 'Mobil Petrol'
                 code = 'humobpefu'
             postcode = poi_data['Post code']
             steet_tmp = poi_data['Address'].lower().split()
             for i in range(0, len(steet_tmp) - 2):
                 steet_tmp[i] = steet_tmp[i].capitalize()
             steet_tmp = ' '.join(steet_tmp)
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 steet_tmp)
             if poi_data['City'] != '':
                 city = clean_city(poi_data['City'].title())
             else:
                 if poi_data['Name'] != '':
                     city = clean_city(poi_data['Name'].title())
                 else:
                     city = None
             branch = poi_data['Name'].strip()
             website = None
             if poi_data['24 Hour'] == True:
                 nonstop = True
                 mo_o = None
                 th_o = None
                 we_o = None
                 tu_o = None
                 fr_o = None
                 sa_o = None
                 su_o = None
                 mo_c = None
                 th_c = None
                 we_c = None
                 tu_c = None
                 fr_c = None
                 sa_c = None
                 su_c = None
             else:
                 nonstop = False
                 mo_o = '06:00'
                 th_o = '06:00'
                 we_o = '06:00'
                 tu_o = '06:00'
                 fr_o = '06:00'
                 sa_o = '06:00'
                 su_o = '06:00'
                 mo_c = '22:00'
                 th_c = '22:00'
                 we_c = '22:00'
                 tu_c = '22:00'
                 fr_c = '22:00'
                 sa_c = '22:00'
                 su_c = '22:00'
             original = poi_data['Address']
             ref = None
             lat, lon = check_hu_boundary(poi_data['GPS Latitude'],
                                          poi_data['GPS Longitude'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             if 'Telephone' in poi_data and poi_data['Telephone'] != '':
                 phone = clean_phone(str(poi_data['Telephone']))
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemplo n.º 11
0
    def process(self):
        soup = save_downloaded_soup(
            '{}'.format(self.link),
            os.path.join(self.download_cache, self.filename))
        insert_data = []
        if soup != None:
            # parse the html using beautiful soap and store in variable `soup`
            pattern = re.compile('^\s*var\s*boltok_nyers.*')
            script = soup.find('script', text=pattern)
            m = pattern.match(script.get_text())
            data = m.group(0)
            data = clean_javascript_variable(data, 'boltok_nyers')
            text = json.loads(data)
            # for l in text:
            # print ('postcode: {postcode}; city: {city}; address: {address}; alt_name: {alt_name}'.format(postcode=l['A_IRSZ'], city=l['A_VAROS'], address=l['A_CIM'], alt_name=l['P_NAME']))

            for poi_data in text:
                # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
                street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                    poi_data['A_CIM'])
                city = clean_city(poi_data['A_VAROS'])
                postcode = poi_data['A_IRSZ'].strip()
                branch = poi_data['P_NAME'].strip()
                name = 'Príma' if 'Príma' in branch else 'CBA'
                code = 'huprimacon' if 'Príma' in branch else 'hucbacon'
                website = None
                nonstop = None
                mo_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_1']
                ) if poi_data['PS_OPEN_FROM_1'] is not None else None
                th_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_2']
                ) if poi_data['PS_OPEN_FROM_2'] is not None else None
                we_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_3']
                ) if poi_data['PS_OPEN_FROM_3'] is not None else None
                tu_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_4']
                ) if poi_data['PS_OPEN_FROM_4'] is not None else None
                fr_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_5']
                ) if poi_data['PS_OPEN_FROM_5'] is not None else None
                sa_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_6']
                ) if poi_data['PS_OPEN_FROM_6'] is not None else None
                su_o = clean_opening_hours_2(
                    poi_data['PS_OPEN_FROM_7']
                ) if poi_data['PS_OPEN_FROM_7'] is not None else None
                mo_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_1']
                ) if poi_data['PS_OPEN_TO_1'] is not None else None
                th_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_2']
                ) if poi_data['PS_OPEN_TO_2'] is not None else None
                we_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_3']
                ) if poi_data['PS_OPEN_TO_3'] is not None else None
                tu_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_4']
                ) if poi_data['PS_OPEN_TO_4'] is not None else None
                fr_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_5']
                ) if poi_data['PS_OPEN_TO_5'] is not None else None
                sa_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_6']
                ) if poi_data['PS_OPEN_TO_6'] is not None else None
                su_c = clean_opening_hours_2(
                    poi_data['PS_OPEN_TO_7']
                ) if poi_data['PS_OPEN_TO_7'] is not None else None
                original = poi_data['A_CIM']
                lat, lon = check_hu_boundary(poi_data['PS_GPS_COORDS_LAT'],
                                             poi_data['PS_GPS_COORDS_LNG'])
                geom = check_geom(lat, lon)
                postcode = query_postcode_osm_external(
                    self.prefer_osm_postcode, self.session, lat, lon, postcode)
                ref = None
                if 'PS_PUBLIC_TEL' in poi_data and poi_data[
                        'PS_PUBLIC_TEL'] != '':
                    phone = clean_phone(poi_data['PS_PUBLIC_TEL'])
                else:
                    phone = None
                if 'PS_PUBLIC_EMAIL' in poi_data and poi_data[
                        'PS_PUBLIC_EMAIL'] != '':
                    email = poi_data['PS_PUBLIC_EMAIL']
                else:
                    email = None
                insert_data.append([
                    code, postcode, city, name, branch, website, original,
                    street, housenumber, conscriptionnumber, ref, phone, email,
                    geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                    mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                ])
            if len(insert_data) < 1:
                logging.warning('Resultset is empty. Skipping ...')
            else:
                df = pd.DataFrame(insert_data)
                df.columns = POI_COLS
                insert_poi_dataframe(self.session, df)
Exemplo n.º 12
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         pattern = re.compile('var\s*markers\s*=\s*((.*\n)*\]\;)',
                              re.MULTILINE)
         script = soup.find('script', text=pattern)
         m = pattern.search(script.get_text())
         data = m.group(0)
         data = data.replace("'", '"')
         data = clean_javascript_variable(data, 'markers')
         text = json.loads(data)
         for poi_data in text:
             if poi_data['cim'] is not None and poi_data['cim'] != '':
                 postcode, city, street, housenumber, conscriptionnumber = extract_all_address(
                     poi_data['cim'])
             name = 'Avia'
             code = 'huaviafu'
             branch = None
             if city is None:
                 city = poi_data['title']
             ref = poi_data['kutid'] if poi_data[
                 'kutid'] is not None and poi_data['kutid'] != '' else None
             lat, lon = check_hu_boundary(poi_data['lat'], poi_data['lng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             website = '/toltoallomas/?id={}'.format(str(poi_data['kutid'])) if poi_data['kutid'] is not None and \
                                                                                poi_data['kutid'] != '' else None
             nonstop = None
             mo_o = None
             th_o = None
             we_o = None
             tu_o = None
             fr_o = None
             sa_o = None
             su_o = None
             mo_c = None
             th_c = None
             we_c = None
             tu_c = None
             fr_c = None
             sa_c = None
             su_c = None
             original = poi_data['cim']
             if 'tel' in poi_data and poi_data['tel'] != '':
                 phone = clean_phone(poi_data['tel'])
             else:
                 phone = None
             if 'email' in poi_data and poi_data['email'] != '':
                 email = clean_email(poi_data['email'])
             else:
                 email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemplo n.º 13
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         # parse the html using beautiful soap and store in variable `soup`
         # script = soup.find('div', attrs={'data-stores':True})
         script = soup.find(attrs={'data-stores': True})
         text = json.loads(script['data-stores'])
         for poi_data in text:
             # Assign: code, postcode, city, name, branch, website, original, street, housenumber, conscriptionnumber, ref, geom
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['address'])
             city = clean_city(poi_data['city'])
             branch = poi_data['name']
             if 'xpres' in poi_data['name']:
                 name = 'Tesco Expressz'
                 code = 'hutescoexp'
             elif 'xtra' in poi_data['name']:
                 name = 'Tesco Extra'
                 code = 'hutescoext'
             else:
                 name = 'Tesco'
                 code = 'hutescosup'
             website = poi_data['url']
             nonstop = None
             opening = json.loads(poi_data['opening'])
             mo_o = opening['1'][0]
             th_o = opening['2'][0]
             we_o = opening['3'][0]
             tu_o = opening['4'][0]
             fr_o = opening['5'][0]
             sa_o = opening['6'][0]
             su_o = opening['0'][0]
             mo_c = opening['1'][1]
             th_c = opening['2'][1]
             we_c = opening['3'][1]
             tu_c = opening['4'][1]
             fr_c = opening['5'][1]
             sa_c = opening['6'][1]
             su_c = opening['0'][1]
             lat, lon = check_hu_boundary(poi_data['gpslat'],
                                          poi_data['gpslng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, None)
             original = poi_data['address']
             ref = None
             if 'phone' in poi_data and poi_data['phone'] != '':
                 phone = clean_phone(poi_data['phone'])
             else:
                 phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemplo n.º 14
0
    def process(self):
        soup = save_downloaded_soup(
            '{}'.format(self.link),
            os.path.join(self.download_cache, self.filename))
        insert_data = []
        if soup != None:
            text = json.loads(soup.get_text())
            for poi_data in text['items']:
                if poi_data['type'] == 'posta':
                    if 'mobilposta' in poi_data['name']:
                        name = 'Mobilposta'
                        code = 'hupostamp'
                    else:
                        name = 'Posta'
                        code = 'hupostapo'
                elif poi_data['type'] == 'csekkautomata':
                    name = 'Posta csekkautomata'
                    code = 'hupostacse'
                elif poi_data['type'] == 'postamachine':
                    name = 'Posta csomagautomata'
                    code = 'hupostacso'
                elif poi_data['type'] == 'postapoint':
                    name = 'PostaPont'
                    code = 'hupostapp'
                else:
                    logging.error('Non existing Posta type.')
                postcode = poi_data['zipCode'].strip()
                street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                    poi_data['address'])
                city = clean_city(poi_data['city'])
                branch = poi_data['name']
                website = None
                nonstop = None
                mo_o = None
                th_o = None
                we_o = None
                tu_o = None
                fr_o = None
                sa_o = None
                su_o = None
                mo_c = None
                th_c = None
                we_c = None
                tu_c = None
                fr_c = None
                sa_c = None
                su_c = None

                geom = check_geom(poi_data['lat'], poi_data['lng'])
                original = poi_data['address']
                ref = None
                phone = None
                email = None
                insert_data.append([
                    code, postcode, city, name, branch, website, original,
                    street, housenumber, conscriptionnumber, ref, phone, email,
                    geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                    mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                ])
            if len(insert_data) < 1:
                logging.warning('Resultset is empty. Skipping ...')
            else:
                df = pd.DataFrame(insert_data)
                df.columns = POI_COLS
                insert_poi_dataframe(self.session, df)
Exemplo n.º 15
0
 def process(self):
     soup = save_downloaded_soup(
         '{}'.format(self.link),
         os.path.join(self.download_cache, self.filename))
     insert_data = []
     if soup != None:
         text = json.loads(soup.get_text())
         for poi_data in text:
             name = 'Foxpost'
             code = 'hufoxpocso'
             postcode = poi_data['zip'].strip()
             street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                 poi_data['street'])
             city = clean_city(poi_data['city'])
             branch = poi_data['name']
             website = None
             nonstop = None
             if poi_data['open']['hetfo'] is not None:
                 mo_o, mo_c = clean_opening_hours(poi_data['open']['hetfo'])
             else:
                 mo_o, mo_c = None, None
             if poi_data['open']['kedd'] is not None:
                 th_o, th_c = clean_opening_hours(poi_data['open']['kedd'])
             else:
                 th_o, th_c = None, None
             if poi_data['open']['szerda'] is not None:
                 we_o, we_c = clean_opening_hours(
                     poi_data['open']['szerda'])
             else:
                 we_o, we_c = None, None
             if poi_data['open']['csutortok'] is not None:
                 tu_o, tu_c = clean_opening_hours(
                     poi_data['open']['csutortok'])
             else:
                 tu_o, tu_c = None, None
             if poi_data['open']['pentek'] is not None:
                 fr_o, fr_c = clean_opening_hours(
                     poi_data['open']['pentek'])
             else:
                 fr_o, fr_c = None, None
             if poi_data['open']['szombat'] is not None:
                 sa_o, sa_c = clean_opening_hours(
                     poi_data['open']['szombat'])
             else:
                 sa_o, sa_c = None, None
             if poi_data['open']['vasarnap'] is not None:
                 su_o, su_c = clean_opening_hours(
                     poi_data['open']['vasarnap'])
             else:
                 su_o, su_c = None, None
             original = poi_data['address']
             ref = None
             lat, lon = check_hu_boundary(poi_data['geolat'],
                                          poi_data['geolng'])
             geom = check_geom(lat, lon)
             postcode = query_postcode_osm_external(
                 self.prefer_osm_postcode, self.session, lat, lon, postcode)
             phone = None
             email = None
             insert_data.append([
                 code, postcode, city, name, branch, website, original,
                 street, housenumber, conscriptionnumber, ref, phone, email,
                 geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                 mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
             ])
         if len(insert_data) < 1:
             logging.warning('Resultset is empty. Skipping ...')
         else:
             df = pd.DataFrame(insert_data)
             df.columns = POI_COLS
             insert_poi_dataframe(self.session, df)
Exemplo n.º 16
0
    def process(self):
        '''
        soup = save_downloaded_soup('{}'.format(self.link), os.path.join(self.download_cache, self.filename), POST_DATA)
        insert_data = []
        if soup != None:

            text = json.loads(soup.get_text())
        '''
        with open(os.path.join(self.download_cache, self.filename), 'r') as f:
            insert_data = []
            text = json.load(f)
            for poi_data in text:
                street, housenumber, conscriptionnumber = extract_street_housenumber_better_2(
                    poi_data['cim'])
                if 'Kulcs patika' not in poi_data['nev']:
                    name = poi_data['nev'].strip()
                    branch = None
                else:
                    name = 'Kulcs patika'
                    branch = poi_data['nev'].strip()
                code = 'hukulcspha'
                website = poi_data['link'].strip(
                ) if poi_data['link'] is not None else None
                nonstop = None
                mo_o = None
                th_o = None
                we_o = None
                tu_o = None
                fr_o = None
                sa_o = None
                su_o = None
                mo_c = None
                th_c = None
                we_c = None
                tu_c = None
                fr_c = None
                sa_c = None
                su_c = None
                city = clean_city(poi_data['helyseg'])
                lat, lon = check_hu_boundary(
                    poi_data['marker_position']['latitude'],
                    poi_data['marker_position']['longitude'])
                geom = check_geom(lat, lon)
                postcode = query_postcode_osm_external(
                    self.prefer_osm_postcode, self.session, lat, lon,
                    poi_data['irsz'].strip())
                original = poi_data['cim']
                ref = None
                phone = None
                email = None
                insert_data.append([
                    code, postcode, city, name, branch, website, original,
                    street, housenumber, conscriptionnumber, ref, phone, email,
                    geom, nonstop, mo_o, th_o, we_o, tu_o, fr_o, sa_o, su_o,
                    mo_c, th_c, we_c, tu_c, fr_c, sa_c, su_c
                ])
            if len(insert_data) < 1:
                logging.warning('Resultset is empty. Skipping ...')
            else:
                df = pd.DataFrame(insert_data)
                df.columns = POI_COLS
                insert_poi_dataframe(self.session, df)