Пример #1
0
#!/usr/bin/env python
import pandas as pd
from printheader import print_header

cols = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']  # <1>
indices = ['a', 'b', 'c', 'd', 'e', 'f']  # <2>

values = [  # <3>
    [100, 110, 120, 130, 140],
    [200, 210, 220, 230, 240],
    [300, 310, 320, 330, 340],
    [400, 410, 420, 430, 440],
    [500, 510, 520, 530, 540],
    [600, 610, 620, 630, 640],
]
print_header('cols')
print(cols, '\n')

print_header('indices')
print(indices, '\n')

print_header('values')
print(values, '\n')

df = pd.DataFrame(values, index=indices, columns=cols)  # <4>
print_header('DataFrame df')
print(df, '\n')

print_header("df['gamma']")
print(df['alpha':'gamma'])  # <5>
print(df['b':'e'])  # <5>
Пример #2
0
#!/usr/bin/env python
import pandas as pd
from printheader import print_header

index1 = pd.Index(['a', 'b', 'c'], name='letters')  # <1>
index2 = pd.Index(['b', 'a', 'c'])
index3 = pd.Index(['b', 'c', 'd'])
index4 = pd.Index(['red', 'blue', 'green'], name='colors')

print_header("index1, index2, index3", 70)  # <2>
print(index1)
print(index2)
print(index3)
print()

print_header("index2 & index3", 70)
# these are the same
print(index2 & index3)  # <3>
print(index2.intersection(index3))  # <3>
print()

print_header("index2 | index3", 70)
# these are the same
print(index2 | index3)  # <4>
print(index2.union(index3))
print()

print_header("index1.difference(index3)", 70)
print(index1.difference(index3))  # <5>
print()
Пример #3
0
Created on Sun May 19 20:42:32 2013

'''
from pandas.core.frame import DataFrame
from printheader import print_header

cols = ['alpha','beta','gamma','delta','epsilon']
index = ['a','b','c','d','e','f']
values = [
    [100, 110, 120, 130, 140],
    [200, 210, 220, 230, 240],
    [300, 310, 320, 330, 340],
    [400, 410, 420, 430, 440],
    [500, 510, 520, 530, 540],
    [600, 610, 620, 630, 640],
]
print_header('values:')
print(values, '\n\n')

df = DataFrame(values, index=index, columns=cols)
print_header('DataFrame df')
print(df, '\n')

df2 = df.drop(['beta','delta'], axis=1)
print_header("After dropping beta and delta:")
print(df2, '\n')

print_header("After dropping rows b, c, and e")
df3 = df.drop(['b','c','e'], axis=0)
print(df3)
from pandas.core.frame import DataFrame
from printheader import print_header  # <1>

dataset1 = np.arange(9.).reshape((3, 3))  # <2>

df1 = DataFrame(  # <3>
    dataset1,
    columns=['apple', 'banana', 'mango'],
    index=['orange', 'purple', 'blue'])

dataset2 = np.arange(12.).reshape((4, 3))  # <2>

df2 = DataFrame(  # <3>
    dataset2,
    columns=['apple', 'banana', 'kiwi'],
    index=['orange', 'purple', 'blue', 'brown'])

print_header('df1')
print(df1)  # <4>
print()

print_header('df2')
print(df2)  # <4>
print()

print_header('df1 + df2')
print(df1 + df2)  # <5>

print_header('df1.add(df2, fill_value=0)')
print(df1.add(df2, fill_value=0))  # <6>
Пример #5
0
from pandas.core.frame import DataFrame
from printheader import print_header

cols = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
index = ['a', 'b', 'c', 'd', 'e', 'f']
values = [
    [100, 110, 120, 130, 140],
    [200, 210, 220, 230, 240],
    [300, 310, 320, 330, 340],
    [400, 410, 420, 430, 440],
    [500, 510, 520, 530, 540],
    [600, 610, 620, 630, 640],
]

df = DataFrame(values, index=index, columns=cols)
print_header('DataFrame df')
print(df, '\n')

# select a column
print_header("df['alpha']:")
print(df['alpha'], '\n')

# same as above
print_header("df.alpha:")
print(df.alpha, '\n')

# slice rows
print_header("df['b':'e']")
print(df['b':'e'], '\n')

# single row
Пример #6
0
"""

@author: jstrick
Created on Sat May 18 11:36:57 2013

"""
import pandas as pd
from printheader import print_header

# data from
# http://www.rita.dot.gov/bts/sites/rita.dot.gov.bts/files/publications/
# national_transportation_statistics/html/table_01_44.html

airports_df = pd.read_csv('../DATA/airport_boardings.csv', thousands=',')

print_header("ENTIRE DATAFRAME")

print(airports_df, "\n")

print_header("ONLY COLUMN 'CODE'")
print(airports_df['Code'], "\n")

print_header("SELECTED COLUMNS WITH FILTERED ROWS")
columns_wanted = ['Code', '2001 Total', 'Airport']
sort_col = '2001 Total'
max_val = 20000000
selector = airports_df['2001 Total'] > max_val
print(airports_df[selector][columns_wanted])

print_header("COLUMN TOTALS")
print(airports_df[['2001 Total', '2010 Total']].sum(), "\n")
Пример #7
0
#!/usr/bin/env python
import pandas as pd
from printheader import print_header

# data from
# http://www.rita.dot.gov/bts/sites/rita.dot.gov.bts/files/publications/
# national_transportation_statistics/html/table_01_44.html

airports_df = pd.read_csv('../DATA/airport_boardings.csv', thousands=',', index_col=1)  # <1>

print_header("HEAD OF DATAFRAME")

print(airports_df.head(), "\n")

print_header("SELECTED COLUMNS WITH FILTERED ROWS")
columns_wanted = ['2001 Total', 'Airport']
sort_col = '2001 Total'
max_val = 20000000
selector = airports_df['2001 Total'] > max_val
selected = airports_df[selector][columns_wanted]
print(selected)

print_header("COLUMN TOTALS")
print(airports_df[['2001 Total', '2010 Total']].sum(), "\n")

# print_header("'CODE' COLUMN SET AS INDEX")
# airports_df.set_index('Code')
# print(airports_df)

print_header("FIRST FIVE ROWS")
print(airports_df.iloc[:5])
Пример #8
0
print(index, "\n")

values = [
    [100, 110, 120, 930, 140],
    [250, 210, 120, 130, 840],
    [300, 310, 520, 430, 340],
    [275, 410, 420, 330, 777],
    [300, 510, 120, 730, 540],
    [150, 610, 320, 690, 640],
]

ser1 = Series([.1, .2, .3, .4, .5])

df = DataFrame(values, index, cols)
print_header("Basic DataFrame:")
print(df)
print()

print_header("Triple each column")
print(df * 3)
print()

print_header("Multiply column gamma by 1.5")
df['gamma'] *= 1.5
print(df)
print()

# print_header("Multiply by strings")
# df['gamma'] *= 'spam'
# print(df)
Пример #9
0
#!/usr/bin/env python
import pandas as pd
from printheader import print_header

col_names = ["Desc", "1960", "1965", "1970", "1975", "1980", "1985", "1990",
             "1991", "1992", "1993", "1994", "1995", "1996", "1997", "1998",
             "1999", "2000", "2001", "2002", "2003", "2004", "2005", "2006",
             "2007", "2008", "2009", "(R) 2010", "2011"]  # <1>

df = pd.read_csv(  # <2>
    '../DATA/energy_use_quad.csv',
    names=col_names,
    header=None,
    index_col="Desc",
)

print_header("database header")
print(df.head(), "\n")
print("-" * 60)

print_header("Only column 2003")
print(df[:]['2003'], "\n")  # <3>
print("-" * 60)

print_header('Only row "Transportation as percent..."')
print(df.loc['Transportation as percent of total energy consumption'])