Пример #1
0
import numpy as np
import pandas as pd
import ProjectUitlities.Utils as Utils

data = {
    "fname": ["Sumit", "Isha"],
    "lname": ["Chauhan", "Khattar"],
    "Age": [31, 31]
}

df1 = pd.DataFrame(data)

Utils.printSpaces("Original dataframe")
print(df1)

Utils.printSpaces(
    "filter where name is Sumit \n rows not matching the columns will be converted to null"
)

print(df1.where(df1["fname"] == "Sumit").dropna())

Utils.printSpaces("using queries , age >30 ")
print(df1.query("Age > 30"))

Utils.printSpaces("using queries , fname is Sumit ")
print(df1.query("fname == 'Sumit' "))
Пример #2
0
# cov() 	Method is used to find covariance of two series

import numpy as np
import pandas as pd
import ProjectUitlities.Utils as Utils
import matplotlib.pyplot as plt

data = np.array([1, 2, 1, 4, 5])

ser = pd.Series(data=data)

print(ser.add(1))
print(ser.mul(2))
print(ser.mul([1, 2, 3, 4, 5]))

Utils.printSpaces("selecting top 2 rows")
print(ser.head(2))
ser.plot()
# sudo apt-get install python-tk
# sudo apt-get install python3-tk
# plt.show()

Utils.printSpaces("map function can take lambdas")
squared = ser.map(lambda x: x**2)
print(squared)

Utils.printSpaces("to filter the value based on data")
print(squared[squared > 10])

Utils.printSpaces("filter()  method works on index and not data")
names = ["Sumit", "Chauhan", "Male", 31]
Пример #3
0
import pandas as pd
import numpy as np
import ProjectUitlities.Utils as Utils

Utils.printSpaces("Creating series from arrays")
data = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
ser = pd.Series(data)
print(ser)

Utils.printSpaces("Creating series from lists")
list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
ser = pd.Series(list)
print(ser)

Utils.printSpaces("Accessing Element from Series with Position")
print(ser[:5])

Utils.printSpaces("Accessing Element Using Label(index)")
ser = pd.Series(data)
print(ser[3])
Пример #4
0
import pandas as pd
import ProjectUitlities.Utils as Utils

# dictionary of lists
dict = {'name': ["aparna", "pankaj", "sudhir", "Geeku"],
        'degree': ["MBA", "BCA", "M.Tech", "MBA"],
        'score': [90, 40, 80, 98]}

# creating a dataframe from a dictionary
df = pd.DataFrame(dict)

# iterating over rows using iterrows() function
Utils.printSpaces("data frame iterationss using iterrows")
for i, j in df.iterrows():
    print(f'dataframa indes {i} with data: \n {j} \n')
    print()

Utils.printSpaces("Iterating Columns ")
columns = list(df)
print(df)
print(f'columns are  {columns}')
for i in columns:
    # printing the third element of the column
    print(df[i][2])

Utils.printSpaces("printing second row ")
print(df.iloc[1])

Utils.printSpaces("fetching second Columns ")
print(df[df.columns[2]])
Пример #5
0
import collections
import ProjectUitlities.Utils as Utils

# The combined dictionary contains the key and value pairs in a specific sequence eliminating any duplicate keys.
# The best use of ChainMap is to search through multiple dictionaries at a time and
# get the proper key-value pair mapping.
dict1 = {'name': 'Sumit', 'lname': 'Chauhan'}
dict2 = {'name': 'Isha', 'lname': 'Khattar', 'age': 31}

combined = collections.ChainMap(dict1, dict2)  # order of it is important. First pair of keys are taken incase of
# duplicate values. this shows that chain map acts like a kind of a stack
print(combined)
# If there are duplicate keys, then only the value from the first key is preserved.
print('Keys = {}'.format(list(combined.keys())))
print('Values = {}'.format(list(combined.values())))

print(combined.get('name'))

Utils.printSpaces("updating chain maps")
print('just update the dictionary used to create the chained map and it will be done automatically')
dict1['gender'] = 'male'
print('Keys = {}'.format(list(combined.keys())))
print('Values = {}'.format(list(combined.values())))
Пример #6
0
# importing numpy as np
import numpy as np
from ProjectUitlities import Utils

# dictionary of lists
dict = {
    'First Score': [100, 90, np.nan, 95],
    'Second Score': [30, 45, 56, np.nan],
    'Third Score': [np.nan, 40, 80, 98]
}

# creating a dataframe from dictionary

df = pd.DataFrame(dict)

Utils.printSpaces("data cleaning ")

Utils.printSpaces("filling nulls")
# filling missing value using fillna()
# inplace is set to false which means data will not be changed in original dataframes
print(df.fillna(0, inplace=False))

Utils.printSpaces("dropping nulls")
# filling missing value using fillna()
# inplace is set to false which means data will not be changed in original dataframes
print(
    df.dropna()
)  # by default axis is 0 which means all the rows containing null will be dropped.

Utils.printSpaces("dropping columns nulls")
# filling missing value using fillna()
Пример #7
0
import ProjectUitlities.Utils as Utils

Utils.printSpaces("Finding duplicate of 5")
d = [1, 2, 3, 4, 5, 6, 7, 7, 88, 9, 2, 32, 3, 4, 5, 1]
if len(list(filter(lambda x: x == 5, d))) > 1:
    print('there is a duplicate 5')
else:
    print('5 was not duplicated in the list')

Utils.printSpaces("Change things to upper case ")
d = ['sumit', 'chauhan']
print(list(map(lambda x: x.upper(), d)))