def test_clone_duplicate_(self): ds.df = self.df ds2 = ds.clone_() assert_frame_equal(ds2.df, ds.df) ds2 = Ds() # error clone ds.df = None ds2 = ds.clone_() self.assertRaises(AttributeError) ds.df = "wrong" ds2 = ds.clone_() self.assertRaises(AttributeError) ds2 = ds._duplicate_(self.df, "db", False)
def test_concat(self): df1 = pd.DataFrame([1, 2]) df2 = pd.DataFrame([3, 4]) ds.df = df1 ds2 = Ds(df2) ds.concat(ds, ds2) df3 = pd.DataFrame([1, 2, 3, 4]) self.assertEqual(list(ds.df), list(df3)) ds.df = df1 ds2 = ds.concat_(ds, ds2) self.assertEqual(list(ds2.df), list(df3)) ds2 = None self.assertErr(None, ds.concat_, ds, ds2) self.assertErr(None, ds.concat, ds, ds2)
# -*- coding: utf-8 -*- import pandas as pd from pandas.testing import assert_frame_equal from dataswim.tests.base import BaseDsTest from dataswim import Ds ds = Ds() ds.errs_traceback = False class TestDsDataText(BaseDsTest): def test_flat(self): df1 = pd.DataFrame([["one", "one"], ["two", "two"]], columns=["one", "two"]) ds.df = df1 data = ds.flat_("one") self.assertEqual(data, '0 one 1 two') data = ds.flat_("one", False) self.assertEqual(data, 'one two') ds.df = None data = ds.flat_("one") self.assertRaises(TypeError) """def test_mfw(self): df1 = pd.DataFrame([["one", "one"], ["two", "two"], ["one", "three"]], columns=["one", "two"]) ds.df = df1 ds2 = ds.mfw_("one") df2 = pd.DataFrame([[2, "one"], [1, "two"]], columns=["Frequency", "Word"]).set_index('Word') assert_frame_equal(ds2.df, df2)
def run(dbpath, debug=True): ds = Ds() if debug is True: ds.status("Debug mode is enabled") db.init(dbpath) results, modified_repos = get_results() if debug is False: ds.quiet = True modulepath = os.path.dirname(os.path.realpath(ghobserver.__file__)) # templates_path = modulepath + "/templates/charts" static_path = modulepath + "/static" repos = db.get_repos() ds.connect("sqlite:///" + dbpath) ds.load("gh_commit") ds.relation("repository", "repository", "name", "Repository") ds.rename("date", "Date") ds.rename("additions", "Additions") ds.rename("deletions", "Deletions") ds.rename("changed_files", "Changed files") ds.keep("Repository", "Date", "Additions", "Deletions", "Changed files") ds.date("Date") ds.dateindex("Date") ds.report_path = static_path + "/charts" ds.static_path = ds.report_path ds.backup() ts = [["3Y", "1M"], ["1Y", "1W"], ["3M", "1D"], ["3W", "1D"], ["1W", "1D"], ["100Y", "1M"]] # repos = [repos[0]] for tf in ts: timeframe = tf[0] timerange = tf[1] ds.restore() # all repos slug = "all" # ds.append(["", now(), 0, 0, 0]) ds.df.Date = pd.to_datetime(ds.df.Date, utc=True) ds.dateindex("Date") ds.nowrange("Date", timeframe) if len(ds.df.index) < 2: no_data(ds, slug, timeframe) ds.status("No data for all repositories", timeframe) else: # add repositories names to the rsumed dataset res = [] dss = ds.split_("Repository") for k in dss: d = dss[k] repo = d.df["Repository"].values[0] try: d.rsum(timerange, "Commits") except Exception as e: raise (e) try: d.add("Repository", repo) except Exception as e: raise (e) res.append(d) ds2 = Ds().concat_(*res) ds.rsum(timerange, "Commits") ds.df.iloc[[-1], [3]] = 0 ds.indexcol("Date") pipe_repo(ds, None, timeframe, ds2) # by repos for repo in repos: reposlug = repo["name"] if debug is False: if reposlug not in modified_repos: continue ds.restore() try: ds.append([reposlug, now(), 0, 0, 0]) except Exception as e: raise (e) ds.df.Date = pd.to_datetime(ds.df.Date) ds.dateindex("Date") ds.exact("Repository", reposlug) ds.nowrange("Date", timeframe) if len(ds.df.index) < 2: no_data(ds, reposlug, timeframe) ds.status("No data for", reposlug, timeframe) continue ds.rsum(timerange, "Commits") ds.indexcol("Date") ds.df.iloc[[-1], [3]] = 0 ds, slug = pipe_repo(ds, repo["name"], timeframe) if results: pop_notification(results) db.clean_results() print("ok")