Пример #1
0
def test_summary_fns(dataframe):
    PrettyPandas(dataframe).total()
    PrettyPandas(dataframe).average()
    PrettyPandas(dataframe).median()
    PrettyPandas(dataframe).max()
    PrettyPandas(dataframe).min()

    out = PrettyPandas(dataframe).total()
    assert len(out.summary_rows) == 1
    assert len(out.summary_cols) == 0

    out = PrettyPandas(dataframe).total(axis=1)
    assert len(out.summary_rows) == 0
    assert len(out.summary_cols) == 1

    out = PrettyPandas(dataframe).total(axis=None)
    assert len(out.summary_rows) == 1
    assert len(out.summary_cols) == 1

    out = PrettyPandas(dataframe).min().max()
    assert len(out.summary_rows) == 2
    assert len(out.summary_cols) == 0

    out = PrettyPandas(dataframe).min().max(axis=1)
    assert len(out.summary_rows) == 1
    assert len(out.summary_cols) == 1
Пример #2
0
def test_summary(dataframe):
    p1 = PrettyPandas(dataframe).total()
    actual = list(p1.data.sum())

    r = p1._apply_summaries()
    row = r.iloc[-1]
    assert (row == actual).all()
Пример #3
0
def test_data_safety(dataframe):
    df1 = copy.deepcopy(dataframe)

    df = PrettyPandas(dataframe)
    df.total()._apply_summaries()

    assert all(dataframe == df1)
    assert all(df.data == df1)
Пример #4
0
def test_data_safety(dataframe):
    df1 = copy.deepcopy(dataframe)

    df = PrettyPandas(dataframe)
    df.total()._translate()

    assert all(dataframe == df1)
    assert all(df.data == df1)
Пример #5
0
def test_summary(dataframe):
    p1 = PrettyPandas(dataframe).total()
    actual = list(p1.data.sum())

    r = p1._translate()
    row = [cell for cell in r['body'][10] if cell['type'] == 'td']
    values = [cell['value'] for cell in sorted(row, key=itemgetter('id'))]

    assert values == actual
Пример #6
0
def test_creation(dataframe):
    PrettyPandas(dataframe)

    try:
        PrettyPandas(None)
    except TypeError:
        assert True

    p1 = PrettyPandas(dataframe)
    assert p1.summary_rows == []
    assert p1.summary_cols == []
    assert p1.formatters == []

    p2 = PrettyPandas(dataframe, summary_rows=['test'])
    assert p2.summary_rows == ['test']
    assert p1.summary_cols == []
    assert p1.formatters == []
Пример #7
0
def present(df):
    '''Present dataframe df nicely by printing to screen'''

    from prettypandas import PrettyPandas
    # prettypandas won't work with repeated indices such as during CST/CDT transition
    try:
        print(PrettyPandas(df.tz_localize(None)).render())
    except:
        print(df.tz_localize(None).to_html())
Пример #8
0
def test_mulitindex():
    df = pd.DataFrame({'A': [1, 2],
                       'B': [3, 4],
                       'D': [4, 3],
                       'C': [6, 7]})

    output = PrettyPandas(df.set_index(['A', 'B'])).total(axis=1)._translate()

    for row in output['body']:
        assert row[-1]['value'] == 10

    for style in output['table_styles']:
        if style['selector'] == 'td:nth-child(5)':
            assert True
            break
    else:
        assert False
Пример #9
0
def test_mulitindex():
    df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'D': [4, 3], 'C': [6, 7]})

    with pytest.raises(ValueError):
        output = PrettyPandas(df.set_index(
            ['A', 'B'])).total(axis=1)._apply_summaries()
Пример #10
0
def prettyframe(dataframe):
    return PrettyPandas(dataframe)
Пример #11
0
female.head(2)

# #### Sex Distribution

# In[9]:

print("{} males ({:.1%}), {} females ({:.1%})".format(len(male),
                                                      len(male) / len(d),
                                                      len(female),
                                                      len(female) / len(d)))

# In[10]:

# Ignore columns with "essay" in the name (they are long)
PrettyPandas(d  # Prettyprints pandas dataframes
             .head(10)  # Sample the first 10 rows
             [[c for c in d.columns if "essay" not in c]
              ])  # Ignore columns with "essay" in the name (they are long)

# ### Age Distribution

# In[11]:

print("Age statistics:\n{}".format(d["age"].describe()))
print()
print("There are {} users older than 80".format((d["age"] > 80).sum()))

# ### Find the age outliers
# Apparently we have one 110-year-old user, and only another one over-80. They might be outliers, let's have a look at their data.

# In[12]:
Пример #12
0
]

aggResult = collection.aggregate(pipeline)
female = pd.DataFrame(list(aggResult))
female.head(2)

print("{} males ({:.1%}), {} females ({:.1%})".format(len(male),
                                                      len(male) / len(d),
                                                      len(female),
                                                      len(female) / len(d)))

# Ignore columns with "essay" in the name (they are long)
# Prettyprints pandas dataframes
# Sample the first 10 rows
PrettyPandas(d.head(10)[[
    c for c in d.columns if "essay" not in c
]])  # Ignore columns with "essay" in the name (they are long)

d["age"].describe()

print("Age statistics:\n{}".format(d["age"].describe()))
print()
print("There are {} users older than 80".format((d["age"] > 80).sum()))

collection.find({"age": {"$gt": 80}}).count()

##Let's assume the 110-year-old lady and the athletic 109-year-old gentleman (who's working on a masters program) are outliers: we get rid of them so the following plots look better. They didn't say much else about themselves, anyway.
##We then remove them
collection.delete_many({"age": {"$gt": 80}})
collection.find({"age": {"$gt": 80}}).count()
print("The dataset now contains {} records".format(
Пример #13
0
# $dbh=mysql_connect('tabs1.gerg.tamu.edu','tabsweb','tabs')
# engine = create_engine('postgresql://*****:*****@localhost:5432/mydatabase')
# query = "SELECT * FROM tabs_D_ven WHERE (date BETWEEN '2016-1-1' - interval 0 day AND '2016-1-1' + interval 0 day) order by obs_time"
# query = args.query
engine = create_engine(
    'mysql+mysqldb://tabsweb:[email protected]/tabsdb')
#engine = create_engine('mysql+mysqldb://tabsweb:[email protected]/tabsdb')
df = pd.read_sql_query(query, engine, index_col=['obs_time'])
# remove extra date/time columns
df = df.drop(['date', 'time'], axis=1)
df.columns = [
    'East [cm/s]', 'North [cm/s]', 'Dir to [°T]', 'WaterT [°C]', 'Tx',
    'Ty'
]
df.index.name = 'Dates [UTC]'
# df['Speed [cm/s]']
# ALSO ROTATED

# # df = pd.read_table('tmp/Dven0kjrpw', delim_whitespace=True, index_col=[0],
# df = pd.read_table(args.result, delim_whitespace=True, index_col=[0],
#                    header=0, names=['Date', 'Time', 'East [cm/s]', 'North [cm/s]', 'Speed [cm/s]',
#                                     'Dir to [°T]', 'WaterT [°C]'],
#                    parse_dates={'Dates [UTC]': ['Date', 'Time']})
# html = df.to_html()
# import pdb; pdb.set_trace()
# print(html)
print(PrettyPandas(df).render())
# return table
# print(df)
print('e')