Exemplo n.º 1
0
 def test_clone_duplicate_(self):
     ds.df = self.df
     ds2 = ds.clone_()
     assert_frame_equal(ds2.df, ds.df)
     ds2 = Ds()
     # error clone
     ds.df = None
     ds2 = ds.clone_()
     self.assertRaises(AttributeError)
     ds.df = "wrong"
     ds2 = ds.clone_()
     self.assertRaises(AttributeError)
     ds2 = ds._duplicate_(self.df, "db", False)
Exemplo n.º 2
0
 def test_concat(self):
     df1 = pd.DataFrame([1, 2])
     df2 = pd.DataFrame([3, 4])
     ds.df = df1
     ds2 = Ds(df2)
     ds.concat(ds, ds2)
     df3 = pd.DataFrame([1, 2, 3, 4])
     self.assertEqual(list(ds.df), list(df3))
     ds.df = df1
     ds2 = ds.concat_(ds, ds2)
     self.assertEqual(list(ds2.df), list(df3))
     ds2 = None
     self.assertErr(None, ds.concat_, ds, ds2)
     self.assertErr(None, ds.concat, ds, ds2)
Exemplo n.º 3
0
# -*- coding: utf-8 -*-
import pandas as pd
from pandas.testing import assert_frame_equal
from dataswim.tests.base import BaseDsTest
from dataswim import Ds

ds = Ds()
ds.errs_traceback = False


class TestDsDataText(BaseDsTest):
    def test_flat(self):
        df1 = pd.DataFrame([["one", "one"], ["two", "two"]],
                           columns=["one", "two"])
        ds.df = df1
        data = ds.flat_("one")
        self.assertEqual(data, '0 one 1 two')
        data = ds.flat_("one", False)
        self.assertEqual(data, 'one two')
        ds.df = None
        data = ds.flat_("one")
        self.assertRaises(TypeError)

    """def test_mfw(self):
        df1 = pd.DataFrame([["one", "one"], ["two", "two"], ["one", "three"]],
                           columns=["one", "two"])
        ds.df = df1
        ds2 = ds.mfw_("one")
        df2 = pd.DataFrame([[2, "one"], [1, "two"]],
                           columns=["Frequency", "Word"]).set_index('Word')
        assert_frame_equal(ds2.df, df2)
Exemplo n.º 4
0
def run(dbpath, debug=True):
    ds = Ds()
    if debug is True:
        ds.status("Debug mode is enabled")
    db.init(dbpath)
    results, modified_repos = get_results()
    if debug is False:
        ds.quiet = True
    modulepath = os.path.dirname(os.path.realpath(ghobserver.__file__))
    # templates_path = modulepath + "/templates/charts"
    static_path = modulepath + "/static"
    repos = db.get_repos()
    ds.connect("sqlite:///" + dbpath)
    ds.load("gh_commit")
    ds.relation("repository", "repository", "name", "Repository")
    ds.rename("date", "Date")
    ds.rename("additions", "Additions")
    ds.rename("deletions", "Deletions")
    ds.rename("changed_files", "Changed files")
    ds.keep("Repository", "Date", "Additions", "Deletions", "Changed files")
    ds.date("Date")
    ds.dateindex("Date")
    ds.report_path = static_path + "/charts"
    ds.static_path = ds.report_path
    ds.backup()
    ts = [["3Y", "1M"], ["1Y", "1W"], ["3M", "1D"], ["3W", "1D"], ["1W", "1D"],
          ["100Y", "1M"]]
    # repos = [repos[0]]
    for tf in ts:
        timeframe = tf[0]
        timerange = tf[1]
        ds.restore()
        # all repos
        slug = "all"
        # ds.append(["", now(), 0, 0, 0])
        ds.df.Date = pd.to_datetime(ds.df.Date, utc=True)
        ds.dateindex("Date")
        ds.nowrange("Date", timeframe)
        if len(ds.df.index) < 2:
            no_data(ds, slug, timeframe)
            ds.status("No data for all repositories", timeframe)
        else:
            # add repositories names to the rsumed dataset
            res = []
            dss = ds.split_("Repository")
            for k in dss:
                d = dss[k]
                repo = d.df["Repository"].values[0]
                try:
                    d.rsum(timerange, "Commits")
                except Exception as e:
                    raise (e)
                try:
                    d.add("Repository", repo)
                except Exception as e:
                    raise (e)
                res.append(d)
            ds2 = Ds().concat_(*res)
            ds.rsum(timerange, "Commits")
            ds.df.iloc[[-1], [3]] = 0
            ds.indexcol("Date")
            pipe_repo(ds, None, timeframe, ds2)
        # by repos
        for repo in repos:
            reposlug = repo["name"]
            if debug is False:
                if reposlug not in modified_repos:
                    continue
            ds.restore()
            try:
                ds.append([reposlug, now(), 0, 0, 0])
            except Exception as e:
                raise (e)
            ds.df.Date = pd.to_datetime(ds.df.Date)
            ds.dateindex("Date")
            ds.exact("Repository", reposlug)
            ds.nowrange("Date", timeframe)
            if len(ds.df.index) < 2:
                no_data(ds, reposlug, timeframe)
                ds.status("No data for", reposlug, timeframe)
                continue
            ds.rsum(timerange, "Commits")
            ds.indexcol("Date")
            ds.df.iloc[[-1], [3]] = 0
            ds, slug = pipe_repo(ds, repo["name"], timeframe)
    if results:
        pop_notification(results)
    db.clean_results()
    print("ok")