Пример #1
0
 def test_clean_column_names_blank(self):
     """clean_column_names with space and None"""
     self.columns.append(" ")
     with self.assertRaises(AttributeError):
         utils.clean_column_names(*self.columns)
     self.columns.pop(-1)
     self.columns.append(None)
     with self.assertRaises(AttributeError):
         utils.clean_column_names(*self.columns)
Пример #2
0
 def test_clean_column_names_keyword(self):
     """clean_column_names with sql keywords"""
     self.columns.append("bday;DROP table very_important;")
     with self.assertRaises(AttributeError):
         utils.clean_column_names(*self.columns)
Пример #3
0
 def test_clean_column_names(self):
     """clean_column_names"""
     cleaned_args = utils.clean_column_names(*self.columns)
     self.assertEqual(cleaned_args, ["id", "fname", "birth_day"])        
Пример #4
0
def ogr2lite(conn, input_data, table_name='AUTO', drop=[],
             dim=2, geom_col="geometry", recast_xy=True):
    """Converts a shapefile to SQLite table via ogr and pandas.
    Args:
        conn (dslw.SpatialDB/apsw.Connection): db connection
        input_data (str, tuple): path to input data; if input is db, use tuple
            of (path_to_db, data_table_name)
        table_name (str): new table name (defaults to shapefile's basename)
        drop (list): list of extra fields to drop upon conversion
        dim (int): number of spatial dimensions, default 2
        geom_col (str): name of new geometry column; default 'geometry'
    Returns True if successful.
    """
    # Rename input table
    if table_name == 'AUTO':
        if type(input_data) is tuple:
            table_name = input_data[1].lower()
        else:
            table_name = os.path.basename(input_data).split(".")[0].lower()

    # Convert input data type
    if type(input_data) is tuple:
        df = ogr2df(input_data[0], input_data[1], True)
    else:
        df = ogr2df(input_data, None, True)

    # Excape apostrophes -- https://stackoverflow.com/questions/12460742
    df = df.applymap(
        lambda x: x.replace("'", "''") if type(x) is unicode else x)
    # Double-quote all unicode values -- for SQL formatting
    df = df.applymap(
        lambda x: "'{}'".format(x) if type(x) is unicode or x is None else x)

    # Validate and get spatial reference id
    # TODO: move to clean option of ogr2df?
    auth_srid_set = set(df["srid"])
    if len(auth_srid_set) > 1:
        raise IOError("Data cannot have more than one spatial reference")
    srid = int(list(auth_srid_set)[0])

    # Validate and get geometry type
    shape_set = set(df["shape_type"])
    if len(shape_set) > 1:
        try:
            # Assumes a set of n > 1 geometry types has a common base
            #   e.g. chooses 'Multipolygon' from {MultiPolygon, Polygon}
            shape = list(shape_set)[[s.lower().startswith("multi")
                                     for s in shape_set].index(True)]
            is_multi = True
            # Asserts the assumption above
            #   e.g. raises error for {Line, MultiPoint} because Line != Point
            assert len(set(
                [s.lower().replace("multi", "") for s in list(shape_set)]))
        except (ValueError, AssertionError):
            raise IOError("Cannot have mixed geometry types. Found: {}".format(
                shape_set))
        # https://gis.stackexchange.com/questions/14203
    else:
        shape = str(list(shape_set)[0])
        is_multi = False

    # Validate and get spatial dimensions
    dims = df["dims"].ix[0]
    if recast_xy:
        dims = "XY"

    # print("SRID: {}\t SHAPE: {}\t DIMS: {}".format(srid, shape, dims))

    # Drop df columns
    drop.extend(["type", "srid", "dims", "shape_type"])
    [df.drop(drop_col, 1, inplace=True) for drop_col in drop
     if drop_col in df.columns]
    # print(df)

    # CREATE TABLE
    _c = conn.cursor()
    if dslw.config.ALLOW_TABLE_OVERWRITE:
        _c.execute(utils.DropTable(table_name, if_exists=True))

    columns = utils.clean_column_names(*df.columns)
    columns.remove("coordinates")
    create_qry = utils.CreateTable(table_name, *columns)

    columns.insert(len(columns), geom_col)
    _c.execute("BEGIN;")
    _c.execute(create_qry)

    # AddGeometryColumn
    add_geom_qry = utils.AddGeometryColumn(
        table_name, srid, shape, geom_col, dims)
    _c.execute(add_geom_qry)
    _c.execute("COMMIT;")

    # Prepare and INSERT data
    inserts = []
    completed_inserts = 0
    n = dslw.config.INSERTS_PER_TRANSACTION
    for row in df.itertuples():
        insert_qry = utils.PrepareInsert(
            table_name, columns, True, True, is_multi, recast_xy)
        data = tuple([sanitize_row(r) for r in row[1:]]) + (srid,)
        # print(data)
        # Quoting difficulties, ugh...
        insert_data = insert_qry.replace("?", "{}").format(*data)
        insert_data = re.sub('JSON\("', "JSON('", insert_data)
        # Do individual INSERTs
        if n == 1:
            try:
                _c.execute("BEGIN;")
                _c.execute(insert_data)
                e = None
                completed_inserts += 1
            except (dslw.apsw.ConstraintError, dslw.apsw.SQLError) as e:
                print("Records Inserted: {}".format(completed_inserts))
                print(create_qry)
                print(add_geom_qry)
                print(insert_data)
            _c.execute("COMMIT;")
            if e:
                raise e
        else:
            inserts.append(insert_data)
    # Do grouped INSERTs
    total = len(inserts)
    if n == 0:
        n = int(ceil(0.25 * total))
    while inserts:
        _c.execute("BEGIN;")
        _c.execute("".join(inserts[:n]))
        _c.execute("COMMIT;")
        del inserts[:n]
        # Not working: _c.execute(insert_qry, data)
    return True