parser = argparse.ArgumentParser(description='Read raw data files and parse its contents') parser.add_argument("path", help='Input path of the raw_data files.') parser.add_argument('db', help='Complete path of .db file (sqlite database). Example: ../data/database.db') args = parser.parse_args() if args == None: input_dir = '/home/aksmiyazaki/git/python_oo_improvement/etl_python/raw_data' database_path = '../data/database.db' else: input_dir = str(args.path) database_path = str(args.db) print(f"Running with the following arguments: [{input_dir}] [{database_path}]") db = DatabaseService.get_instance() db.initialize_database_conn(database_path) data_list = listdir(input_dir) geo_point = None geo_parser = geoparse.GeoParser() locator = PointLocator.get_instance() def next_state(cur_state): if cur_state == geoparse.RowDataType.SEEK_LAT: return geoparse.RowDataType.SEEK_LON elif cur_state == geoparse.RowDataType.SEEK_LON: return geoparse.RowDataType.SEEK_DIST elif cur_state == geoparse.RowDataType.SEEK_DIST: return geoparse.RowDataType.SEEK_LAT else: raise Exception(f"[ERROR] There is no next state for current {cur_state}")
def persist_address(self): """ This method persists Address in a star schema. """ self.__db = DatabaseService.get_instance() # If there's more than one occurence of the same lat/lon, just add one to Occurence. sql = f"""Select Latitude, Longitude from Address where Latitude = {self.latitude} and Longitude = {self.longitude}""" res = self.__db.execute_select(sql) if len(res) > 0: print( f"Latitude = {self.latitude} and Longitude = {self.longitude} returned more than once." ) sql = f"""Update Address set Occurences = ((Select Occurences from Address where Latitude = {self.latitude} and Longitude = {self.longitude}) + 1) where Latitude = {self.latitude} and Longitude = {self.longitude}""" res = self.__db.execute(sql) self.__db.commit_changes else: # Adds content to tables # Since the data depends on a third party API, we must expect that stuff may be null. if self.country is not None and self.country != "": sql_ins = "INSERT INTO Country (CountryName) Values (?);" params = (self.country, ) self.__insert_data(sql_ins, params) if self.region is not None and self.region != "": sql_ins = f"INSERT INTO Region (RegionName) Values (?);" params = (self.region, ) self.__insert_data(sql_ins, params) if self.city is not None and self.city != "": sql_ins = f"INSERT INTO City (CityName) Values (?);" params = (self.city, ) self.__insert_data(sql_ins, params) if self.district is not None and self.district != "": sql_ins = f"INSERT INTO District (DistrictName) Values (?)" params = (self.district, ) self.__insert_data(sql_ins, params) if self.postal_code is not None and self.postal_code != "": sql_ins = f"INSERT INTO Street (PostalCode, StreetName) Values (?,?);" params = ( self.postal_code, self.street, ) self.__insert_data(sql_ins, params) sql_ins = f"""INSERT INTO Address (Latitude, Longitude, Number, PostalCode, CountryName, DistrictName, RegionName, CityName, Occurences) Values (?, ?, ?, ?, ?, ?, ?, ?, ?) """ params = (self.latitude, self.longitude, self.number, self.postal_code, self.country, self.district, self.region, self.city, 1) self.__db.execute(sql_ins, params) self.__db.commit_changes()