def test_clean_column_names_blank(self): """clean_column_names with space and None""" self.columns.append(" ") with self.assertRaises(AttributeError): utils.clean_column_names(*self.columns) self.columns.pop(-1) self.columns.append(None) with self.assertRaises(AttributeError): utils.clean_column_names(*self.columns)
def test_clean_column_names_keyword(self): """clean_column_names with sql keywords""" self.columns.append("bday;DROP table very_important;") with self.assertRaises(AttributeError): utils.clean_column_names(*self.columns)
def test_clean_column_names(self): """clean_column_names""" cleaned_args = utils.clean_column_names(*self.columns) self.assertEqual(cleaned_args, ["id", "fname", "birth_day"])
def ogr2lite(conn, input_data, table_name='AUTO', drop=[], dim=2, geom_col="geometry", recast_xy=True): """Converts a shapefile to SQLite table via ogr and pandas. Args: conn (dslw.SpatialDB/apsw.Connection): db connection input_data (str, tuple): path to input data; if input is db, use tuple of (path_to_db, data_table_name) table_name (str): new table name (defaults to shapefile's basename) drop (list): list of extra fields to drop upon conversion dim (int): number of spatial dimensions, default 2 geom_col (str): name of new geometry column; default 'geometry' Returns True if successful. """ # Rename input table if table_name == 'AUTO': if type(input_data) is tuple: table_name = input_data[1].lower() else: table_name = os.path.basename(input_data).split(".")[0].lower() # Convert input data type if type(input_data) is tuple: df = ogr2df(input_data[0], input_data[1], True) else: df = ogr2df(input_data, None, True) # Excape apostrophes -- https://stackoverflow.com/questions/12460742 df = df.applymap( lambda x: x.replace("'", "''") if type(x) is unicode else x) # Double-quote all unicode values -- for SQL formatting df = df.applymap( lambda x: "'{}'".format(x) if type(x) is unicode or x is None else x) # Validate and get spatial reference id # TODO: move to clean option of ogr2df? auth_srid_set = set(df["srid"]) if len(auth_srid_set) > 1: raise IOError("Data cannot have more than one spatial reference") srid = int(list(auth_srid_set)[0]) # Validate and get geometry type shape_set = set(df["shape_type"]) if len(shape_set) > 1: try: # Assumes a set of n > 1 geometry types has a common base # e.g. chooses 'Multipolygon' from {MultiPolygon, Polygon} shape = list(shape_set)[[s.lower().startswith("multi") for s in shape_set].index(True)] is_multi = True # Asserts the assumption above # e.g. raises error for {Line, MultiPoint} because Line != Point assert len(set( [s.lower().replace("multi", "") for s in list(shape_set)])) except (ValueError, AssertionError): raise IOError("Cannot have mixed geometry types. Found: {}".format( shape_set)) # https://gis.stackexchange.com/questions/14203 else: shape = str(list(shape_set)[0]) is_multi = False # Validate and get spatial dimensions dims = df["dims"].ix[0] if recast_xy: dims = "XY" # print("SRID: {}\t SHAPE: {}\t DIMS: {}".format(srid, shape, dims)) # Drop df columns drop.extend(["type", "srid", "dims", "shape_type"]) [df.drop(drop_col, 1, inplace=True) for drop_col in drop if drop_col in df.columns] # print(df) # CREATE TABLE _c = conn.cursor() if dslw.config.ALLOW_TABLE_OVERWRITE: _c.execute(utils.DropTable(table_name, if_exists=True)) columns = utils.clean_column_names(*df.columns) columns.remove("coordinates") create_qry = utils.CreateTable(table_name, *columns) columns.insert(len(columns), geom_col) _c.execute("BEGIN;") _c.execute(create_qry) # AddGeometryColumn add_geom_qry = utils.AddGeometryColumn( table_name, srid, shape, geom_col, dims) _c.execute(add_geom_qry) _c.execute("COMMIT;") # Prepare and INSERT data inserts = [] completed_inserts = 0 n = dslw.config.INSERTS_PER_TRANSACTION for row in df.itertuples(): insert_qry = utils.PrepareInsert( table_name, columns, True, True, is_multi, recast_xy) data = tuple([sanitize_row(r) for r in row[1:]]) + (srid,) # print(data) # Quoting difficulties, ugh... insert_data = insert_qry.replace("?", "{}").format(*data) insert_data = re.sub('JSON\("', "JSON('", insert_data) # Do individual INSERTs if n == 1: try: _c.execute("BEGIN;") _c.execute(insert_data) e = None completed_inserts += 1 except (dslw.apsw.ConstraintError, dslw.apsw.SQLError) as e: print("Records Inserted: {}".format(completed_inserts)) print(create_qry) print(add_geom_qry) print(insert_data) _c.execute("COMMIT;") if e: raise e else: inserts.append(insert_data) # Do grouped INSERTs total = len(inserts) if n == 0: n = int(ceil(0.25 * total)) while inserts: _c.execute("BEGIN;") _c.execute("".join(inserts[:n])) _c.execute("COMMIT;") del inserts[:n] # Not working: _c.execute(insert_qry, data) return True