def test_most_common_int(self): data = pd.Series([1, 1, 2, 4, 2, 5]) data = pd.DataFrame(data, columns=['col1']) df = Data(data, test_split_percentage=0.5) df.most_common('col1', plot=True, use_test=True) self.assertTrue(True)
def test_most_common_list(self): data = pd.Series([['hi', 'aethos'], ['hi', 'py-automl'], [], ['hi']]) data = pd.DataFrame(data, columns=['col1']) df = Data(data, split=False) df.most_common('col1', plot=True) self.assertTrue(True)
def test_compare_dist_predict(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) df = Data(data, target_field="col3", report_name="test") df.predict_data_sample() self.assertTrue(True)
def test_plot_clusters_pca(self): data, label = make_blobs(100, 4, centers=3) df = pd.DataFrame(data) df['label'] = label df = Data(df, split=False) df.plot_dim_reduction('label', algo='pca', dim=2) self.assertTrue(True)
def test_compare_dist_ks(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) df = Data(data, target_field="col3", report_name="test") df["col4"] = np.random.normal(1, 2, size=(1, 800))[0] df["col4"] = np.random.normal(10, 20, size=(1, 200))[0] df.ks_feature_distribution() self.assertTrue(True)
def test_most_common_str(self): data = pd.Series([ 'hi aethos', 'aethos is awesome', 'hi', 'py-automl is the old name', 'hi everyone' ]) data = pd.DataFrame(data, columns=['col1']) df = Data(data, split=False) df.most_common('col1') self.assertTrue(True)
def test_lineplot(self): np.random.seed(42) df = pd.DataFrame( { "Google": np.random.randn(1000) + 0.2, "Apple": np.random.randn(1000) + 0.17, "date": pd.date_range("1/1/2000", periods=1000), } ) clean = Data(x_train=df, split=False) clean.lineplot(x="date", y=["Google", "Apple"], show_figure=False) self.assertTrue(True)
def test_jointplot(self): data = sns.load_dataset("iris") base = Data( x_train=data, x_test=None, split=True, target_field="species", report_name="test", test_split_percentage=0.5, ) base.jointplot(x="sepal_width", y="sepal_length") self.assertTrue(True)
def test_pairplot_custom(self): data = sns.load_dataset("iris") base = Data( x_train=data, x_test=None, split=True, target_field="species", report_name="test", test_split_percentage=0.5, ) base.pairplot(diag_kind='hist', upper_kind='scatter', lower_kind='kde') self.assertTrue(True)
def test_pairplot(self): data = sns.load_dataset("iris") base = Data( x_train=data, x_test=None, split=True, target_field="species", report_name="test", test_split_percentage=0.5, ) base.pairplot() self.assertTrue(True)
def test_histogram_1(self): data = sns.load_dataset("iris") base = Data( x_train=data, x_test=None, split=True, target_field="species", report_name="test", test_split_percentage=0.5, ) base.histogram("sepal_length") self.assertTrue(True)
def test_correlation_plot(self): data = pd.DataFrame(np.random.rand(100, 10)) base = Data( x_train=data, x_test=None, split=True, target_field="col3", report_name="test", test_split_percentage=0.5, ) base.correlation_matrix(data_labels=True, hide_mirror=True) self.assertTrue(True)
def test_barplot(self): data = sns.load_dataset("iris") base = Data( x_train=data, x_test=None, split=True, target_field="species", report_name="test", test_split_percentage=0.5, ) base.barplot( x="species", y=["sepal_length", "sepal_width"], method="mean", orient="h", barmode="group", ) base.barplot( x="species", y=["sepal_length", "sepal_width"], method="mean", orient="v", barmode="group", ) self.assertTrue(True)