def iris_vd(base): from verticapy.learn.datasets import load_iris iris = load_iris(cursor=base.cursor) iris.set_display_parameters(print_info=False) yield iris drop_table(name="public.iris", cursor=base.cursor)
def titanic_vd(base): from verticapy.learn.datasets import load_titanic titanic = load_titanic(cursor=base.cursor) titanic.set_display_parameters(print_info=False) yield titanic drop_table(name="public.titanic", cursor=base.cursor)
def winequality_vd(base): from verticapy.learn.datasets import load_winequality winequality = load_winequality(cursor=base.cursor) yield winequality with warnings.catch_warnings(record=True) as w: drop_table(name="public.winequality", cursor=base.cursor)
def iris_vd(base): from verticapy.learn.datasets import load_iris iris = load_iris(cursor=base.cursor) yield iris with warnings.catch_warnings(record=True) as w: drop_table(name="public.iris", cursor=base.cursor)
def market_vd(base): from verticapy.learn.datasets import load_market market = load_market(cursor=base.cursor) yield market with warnings.catch_warnings(record=True) as w: drop_table(name="public.market", cursor=base.cursor)
def smart_meters_vd(base): from verticapy.learn.datasets import load_smart_meters smart_meters = load_smart_meters(cursor=base.cursor) yield smart_meters with warnings.catch_warnings(record=True) as w: drop_table(name="public.smart_meters", cursor=base.cursor)
def tr_data_vd(base): base.cursor.execute("DROP TABLE IF EXISTS public.tr_data") base.cursor.execute( 'CREATE TABLE IF NOT EXISTS public.tr_data(Id INT, transportation INT, gender VARCHAR, "owned cars" INT, cost VARCHAR, income CHAR(4))' ) base.cursor.execute( "INSERT INTO tr_data VALUES (1, 0, 'Male', 0, 'Cheap', 'Low')") base.cursor.execute( "INSERT INTO tr_data VALUES (2, 0, 'Male', 1, 'Cheap', 'Med')") base.cursor.execute( "INSERT INTO tr_data VALUES (3, 1, 'Female', 1, 'Cheap', 'Med')") base.cursor.execute( "INSERT INTO tr_data VALUES (4, 0, 'Female', 0, 'Cheap', 'Low')") base.cursor.execute( "INSERT INTO tr_data VALUES (5, 0, 'Male', 1, 'Cheap', 'Med')") base.cursor.execute( "INSERT INTO tr_data VALUES (6, 1, 'Male', 0, 'Standard', 'Med')") base.cursor.execute( "INSERT INTO tr_data VALUES (7, 1, 'Female', 1, 'Standard', 'Med')") base.cursor.execute( "INSERT INTO tr_data VALUES (8, 2, 'Female', 1, 'Expensive', 'Hig')") base.cursor.execute( "INSERT INTO tr_data VALUES (9, 2, 'Male', 2, 'Expensive', 'Med')") base.cursor.execute( "INSERT INTO tr_data VALUES (10, 2, 'Female', 2, 'Expensive', 'Hig')") base.cursor.execute("COMMIT") tr_data = vDataFrame(input_relation="public.tr_data", cursor=base.cursor) yield tr_data with warnings.catch_warnings(record=True) as w: drop_table(name="public.tr_data", cursor=base.cursor)
def titanic_vd(base): from verticapy.learn.datasets import load_titanic titanic = load_titanic(cursor=base.cursor) yield titanic with warnings.catch_warnings(record=True) as w: drop_table(name="public.titanic", cursor=base.cursor)
def winequality_vd(base): from verticapy.learn.datasets import load_winequality winequality = load_winequality(cursor=base.cursor) winequality.set_display_parameters(print_info=False) yield winequality drop_table(name="public.winequality", cursor=base.cursor)
def amazon_vd(base): from verticapy.learn.datasets import load_amazon amazon = load_amazon(cursor=base.cursor) yield amazon with warnings.catch_warnings(record=True) as w: drop_table( name="public.amazon", cursor=base.cursor, )
def smart_meters_vd(base): from verticapy.learn.datasets import load_smart_meters smart_meters = load_smart_meters(cursor=base.cursor) smart_meters.set_display_parameters(print_info=False) yield smart_meters drop_table( name="public.smart_meters", cursor=base.cursor, )
def iris_vd(base): from verticapy.learn.datasets import load_iris iris = load_iris(cursor=base.cursor) yield iris drop_table( name="public.iris", cursor=base.cursor, )
def amazon_vd(base): from verticapy.learn.datasets import load_amazon amazon = load_amazon(cursor=base.cursor) yield amazon drop_table( name="public.amazon", cursor=base.cursor, )
def amazon_vd(base): from verticapy.learn.datasets import load_amazon amazon = load_amazon(cursor=base.cursor) amazon.set_display_parameters(print_info=False) yield amazon drop_table( name="public.amazon", cursor=base.cursor, )
def world_vd(base): from verticapy.learn.datasets import load_world world = load_world(cursor=base.cursor) yield world with warnings.catch_warnings(record=True) as w: drop_table( name="public.world", cursor=base.cursor, )
def cities_vd(base): from verticapy.learn.datasets import load_cities cities = load_cities(cursor=base.cursor) yield cities with warnings.catch_warnings(record=True) as w: drop_table( name="public.cities", cursor=base.cursor, )
def market_vd(base): from verticapy.learn.datasets import load_market market = load_market(cursor=base.cursor) market.set_display_parameters(print_info=False) yield market drop_table( name="public.market", cursor=base.cursor, )
def bsk_data_vd(base): base.cursor.execute("DROP TABLE IF EXISTS public.bsk_data") base.cursor.execute( "CREATE TABLE IF NOT EXISTS public.bsk_data(Id INT, col1 FLOAT, col2 FLOAT, col3 FLOAT, col4 FLOAT)" ) base.cursor.execute("INSERT INTO bsk_data VALUES (1, 7.2, 3.6, 6.1, 2.5)") base.cursor.execute("INSERT INTO bsk_data VALUES (2, 7.7, 2.8, 6.7, 2.0)") base.cursor.execute("INSERT INTO bsk_data VALUES (3, 7.7, 3.0, 6.1, 2.3)") base.cursor.execute("INSERT INTO bsk_data VALUES (4, 7.9, 3.8, 6.4, 2.0)") base.cursor.execute("INSERT INTO bsk_data VALUES (5, 4.4, 2.9, 1.4, 0.2)") base.cursor.execute("INSERT INTO bsk_data VALUES (6, 4.6, 3.6, 1.0, 0.2)") base.cursor.execute("INSERT INTO bsk_data VALUES (7, 4.7, 3.2, 1.6, 0.2)") base.cursor.execute("INSERT INTO bsk_data VALUES (8, 6.5, 2.8, 4.6, 1.5)") base.cursor.execute("INSERT INTO bsk_data VALUES (9, 6.8, 2.8, 4.8, 1.4)") base.cursor.execute("INSERT INTO bsk_data VALUES (10, 7.0, 3.2, 4.7, 1.4)") base.cursor.execute("COMMIT") bsk_data = vDataFrame(input_relation="public.bsk_data", cursor=base.cursor) yield bsk_data with warnings.catch_warnings(record=True) as w: drop_table(name="public.bsk_data", cursor=base.cursor)
def test_read_shp(self, cities_vd): with warnings.catch_warnings(record=True) as w: drop_table( name="public.cities_test", cursor=cities_vd._VERTICAPY_VARIABLES_["cursor"], ) cities_vd.to_shp("cities_test", "/home/dbadmin/", shape="Point") vdf = read_shp("/home/dbadmin/cities_test.shp", cities_vd._VERTICAPY_VARIABLES_["cursor"]) assert vdf.shape() == (202, 3) try: os.remove("/home/dbadmin/cities_test.shp") os.remove("/home/dbadmin/cities_test.shx") os.remove("/home/dbadmin/cities_test.dbf") except: pass with warnings.catch_warnings(record=True) as w: drop_table( name="public.cities_test", cursor=cities_vd._VERTICAPY_VARIABLES_["cursor"], )
def market_vd(base): from verticapy.learn.datasets import load_market market = load_market(cursor=base.cursor) yield market drop_table(name="public.market", cursor=base.cursor)
def titanic_vd(base): from verticapy.learn.datasets import load_titanic titanic = load_titanic(cursor=base.cursor) yield titanic drop_table(name="public.titanic", cursor=base.cursor)
def smart_meters_vd(base): from verticapy.learn.datasets import load_smart_meters smart_meters = load_smart_meters(cursor=base.cursor) yield smart_meters drop_table(name="public.smart_meters", cursor=base.cursor)
def test_vDF_to_db(self, titanic_vd): try: drop_view("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"]) drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"]) except: pass # testing relation_type = view try: titanic_vd.copy().to_db(name="verticapy_titanic_tmp", usecols = ["age", "fare", "survived"], relation_type = "view", db_filter = "age > 40", nb_split = 3) titanic_tmp = vDataFrame("verticapy_titanic_tmp", cursor=titanic_vd._VERTICAPY_VARIABLES_["cursor"]) assert titanic_tmp.shape() == (220, 4) assert titanic_tmp["_verticapy_split_"].min() == 0 assert titanic_tmp["_verticapy_split_"].max() == 2 titanic_vd._VERTICAPY_VARIABLES_["cursor"].execute("SELECT table_name FROM view_columns WHERE table_name = 'verticapy_titanic_tmp'") result = titanic_vd._VERTICAPY_VARIABLES_["cursor"].fetchone() assert result[0] == "verticapy_titanic_tmp" except: drop_view("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"]) raise drop_view("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"]) # testing relation_type = table try: titanic_vd.copy().to_db(name="verticapy_titanic_tmp", usecols = ["age", "fare", "survived"], relation_type = "table", db_filter = "age > 40", nb_split = 3) titanic_tmp = vDataFrame("verticapy_titanic_tmp", cursor=titanic_vd._VERTICAPY_VARIABLES_["cursor"]) assert titanic_tmp.shape() == (220, 4) assert titanic_tmp["_verticapy_split_"].min() == 0 assert titanic_tmp["_verticapy_split_"].max() == 2 titanic_vd._VERTICAPY_VARIABLES_["cursor"].execute("SELECT table_name FROM columns WHERE table_name = 'verticapy_titanic_tmp'") result = titanic_vd._VERTICAPY_VARIABLES_["cursor"].fetchone() assert result[0] == "verticapy_titanic_tmp" except: drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"]) raise drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"]) # testing relation_type = temporary table try: titanic_vd.copy().to_db(name="verticapy_titanic_tmp", usecols = ["age", "fare", "survived"], relation_type = "temporary", db_filter = "age > 40", nb_split = 3) titanic_tmp = vDataFrame("verticapy_titanic_tmp", cursor=titanic_vd._VERTICAPY_VARIABLES_["cursor"]) assert titanic_tmp.shape() == (220, 4) assert titanic_tmp["_verticapy_split_"].min() == 0 assert titanic_tmp["_verticapy_split_"].max() == 2 titanic_vd._VERTICAPY_VARIABLES_["cursor"].execute("SELECT table_name FROM columns WHERE table_name = 'verticapy_titanic_tmp'") result = titanic_vd._VERTICAPY_VARIABLES_["cursor"].fetchone() assert result[0] == "verticapy_titanic_tmp" except: drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"]) raise drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"]) # testing relation_type = temporary local table try: titanic_vd.copy().to_db(name="verticapy_titanic_tmp", usecols = ["age", "fare", "survived"], relation_type = "local", db_filter = "age > 40", nb_split = 3) titanic_tmp = vDataFrame("v_temp_schema.verticapy_titanic_tmp", cursor=titanic_vd._VERTICAPY_VARIABLES_["cursor"]) assert titanic_tmp.shape() == (220, 4) assert titanic_tmp["_verticapy_split_"].min() == 0 assert titanic_tmp["_verticapy_split_"].max() == 2 titanic_vd._VERTICAPY_VARIABLES_["cursor"].execute("SELECT table_name FROM columns WHERE table_name = 'verticapy_titanic_tmp'") result = titanic_vd._VERTICAPY_VARIABLES_["cursor"].fetchone() assert result[0] == "verticapy_titanic_tmp" except: drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"]) raise drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
def winequality_vd(base): from verticapy.learn.datasets import load_winequality winequality = load_winequality(cursor=base.cursor) yield winequality drop_table(name="public.winequality", cursor=base.cursor)
def test_vDF_join(self, market_vd): # CREATE TABLE not_fresh AS SELECT * FROM market WHERE Form != 'Fresh'; not_fresh = market_vd.search("Form != 'Fresh'") # CREATE TABLE not_dried AS SELECT * FROM market WHERE Form != 'Dried'; not_dried = market_vd.search("Form != 'Dried'") # CREATE TABLE left_join AS # SELECT a.Name as Name1, b.Name as Name2 # FROM not_fresh AS a LEFT JOIN not_dried AS b ON a.Form = b.Form; left_join = not_fresh.join( not_dried, how="left", on={"Form": "Form"}, expr1=["Name AS Name1"], expr2=["Name AS Name2"], ) assert left_join.shape() == (5886, 2) # SELECT COUNT(*) FROM left_join WHERE Name1 IS NULL; assert left_join["Name1"].count() == 5886 # SELECT COUNT(*) FROM left_join WHERE Name2 IS NULL; assert left_join["Name2"].count() == 5886 - 30 # CREATE TABLE right_join AS # SELECT a.Name as Name1, b.Name as Name2 # FROM not_fresh AS a RIGHT JOIN not_dried AS b ON a.Form = b.Form; right_join = not_fresh.join( not_dried, how="right", on={"Form": "Form"}, expr1=["Name AS Name1"], expr2=["Name AS Name2"], ) assert right_join.shape() == (5946, 2) # SELECT COUNT(*) FROM right_join WHERE Name1 IS NULL; assert right_join["Name1"].count() == 5946 - 90 # SELECT COUNT(*) FROM right_join WHERE Name2 IS NULL; assert right_join["Name2"].count() == 5946 # CREATE TABLE full_join AS # SELECT a.Name as Name1, b.Name as Name2 # FROM not_fresh AS a FULL OUTER JOIN not_dried AS b ON a.Form = b.Form; full_join = not_fresh.join( not_dried, how="full", on={"Form": "Form"}, expr1=["Name AS Name1"], expr2=["Name AS Name2"], ) assert full_join.shape() == (5976, 2) # SELECT COUNT(*) FROM full_join WHERE Name1 IS NULL; assert full_join["Name1"].count() == 5976 - 90 # SELECT COUNT(*) FROM full_join WHERE Name2 IS NULL; assert full_join["Name2"].count() == 5976 - 30 # CREATE TABLE inner_join AS # SELECT a.Name as Name1, b.Name as Name2 # FROM not_fresh AS a INNER JOIN not_dried AS b ON a.Form = b.Form; inner_join = not_fresh.join( not_dried, how="inner", on={"Form": "Form"}, expr1=["Name AS Name1"], expr2=["Name AS Name2"], ) assert inner_join.shape() == (5856, 2) # SELECT COUNT(*) FROM inner_join WHERE Name1 IS NULL; assert inner_join["Name1"].count() == 5856 # SELECT COUNT(*) FROM inner_join WHERE Name2 IS NULL; assert inner_join["Name2"].count() == 5856 # CREATE TABLE natural_join AS # SELECT a.Name as Name1, b.Name as Name2 # FROM not_fresh AS a NATURAL JOIN not_dried AS b; natural_join = not_fresh.join( not_dried, how="natural", expr1=["Name AS Name1"], expr2=["Name AS Name2"] ) assert natural_join.shape() == (194, 2) # SELECT COUNT(*) FROM natural_join WHERE Name1 IS NULL; assert natural_join["Name1"].count() == 194 # SELECT COUNT(*) FROM natural_join WHERE Name2 IS NULL; assert natural_join["Name2"].count() == 194 # CREATE TABLE cross_join AS # SELECT a.Name as Name1, b.Name as Name2 # FROM not_fresh AS a CROSS JOIN not_dried AS b; cross_join = not_fresh.join( not_dried, how="cross", expr1=["Name AS Name1"], expr2=["Name AS Name2"] ) assert cross_join.shape() == (63616, 2) # SELECT COUNT(*) FROM cross_join WHERE Name1 IS NULL; assert cross_join["Name1"].count() == 63616 # SELECT COUNT(*) FROM cross_join WHERE Name2 IS NULL; assert cross_join["Name2"].count() == 63616 # join directly with a Vertica table not_dried.to_db("not_dried", relation_type="local") table_join = not_fresh.join( "v_temp_schema.not_dried", how="natural", expr1=["Name AS Name1"], expr2=["Name AS Name2"], ) assert table_join.shape() == (194, 2) drop_table( "v_temp_schema.not_dried", not_dried._VERTICAPY_VARIABLES_["cursor"], )