import numpy as np import pandas as pd import ProjectUitlities.Utils as Utils data = { "fname": ["Sumit", "Isha"], "lname": ["Chauhan", "Khattar"], "Age": [31, 31] } df1 = pd.DataFrame(data) Utils.printSpaces("Original dataframe") print(df1) Utils.printSpaces( "filter where name is Sumit \n rows not matching the columns will be converted to null" ) print(df1.where(df1["fname"] == "Sumit").dropna()) Utils.printSpaces("using queries , age >30 ") print(df1.query("Age > 30")) Utils.printSpaces("using queries , fname is Sumit ") print(df1.query("fname == 'Sumit' "))
# cov() Method is used to find covariance of two series import numpy as np import pandas as pd import ProjectUitlities.Utils as Utils import matplotlib.pyplot as plt data = np.array([1, 2, 1, 4, 5]) ser = pd.Series(data=data) print(ser.add(1)) print(ser.mul(2)) print(ser.mul([1, 2, 3, 4, 5])) Utils.printSpaces("selecting top 2 rows") print(ser.head(2)) ser.plot() # sudo apt-get install python-tk # sudo apt-get install python3-tk # plt.show() Utils.printSpaces("map function can take lambdas") squared = ser.map(lambda x: x**2) print(squared) Utils.printSpaces("to filter the value based on data") print(squared[squared > 10]) Utils.printSpaces("filter() method works on index and not data") names = ["Sumit", "Chauhan", "Male", 31]
import pandas as pd import numpy as np import ProjectUitlities.Utils as Utils Utils.printSpaces("Creating series from arrays") data = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) ser = pd.Series(data) print(ser) Utils.printSpaces("Creating series from lists") list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ser = pd.Series(list) print(ser) Utils.printSpaces("Accessing Element from Series with Position") print(ser[:5]) Utils.printSpaces("Accessing Element Using Label(index)") ser = pd.Series(data) print(ser[3])
import pandas as pd import ProjectUitlities.Utils as Utils # dictionary of lists dict = {'name': ["aparna", "pankaj", "sudhir", "Geeku"], 'degree': ["MBA", "BCA", "M.Tech", "MBA"], 'score': [90, 40, 80, 98]} # creating a dataframe from a dictionary df = pd.DataFrame(dict) # iterating over rows using iterrows() function Utils.printSpaces("data frame iterationss using iterrows") for i, j in df.iterrows(): print(f'dataframa indes {i} with data: \n {j} \n') print() Utils.printSpaces("Iterating Columns ") columns = list(df) print(df) print(f'columns are {columns}') for i in columns: # printing the third element of the column print(df[i][2]) Utils.printSpaces("printing second row ") print(df.iloc[1]) Utils.printSpaces("fetching second Columns ") print(df[df.columns[2]])
import collections import ProjectUitlities.Utils as Utils # The combined dictionary contains the key and value pairs in a specific sequence eliminating any duplicate keys. # The best use of ChainMap is to search through multiple dictionaries at a time and # get the proper key-value pair mapping. dict1 = {'name': 'Sumit', 'lname': 'Chauhan'} dict2 = {'name': 'Isha', 'lname': 'Khattar', 'age': 31} combined = collections.ChainMap(dict1, dict2) # order of it is important. First pair of keys are taken incase of # duplicate values. this shows that chain map acts like a kind of a stack print(combined) # If there are duplicate keys, then only the value from the first key is preserved. print('Keys = {}'.format(list(combined.keys()))) print('Values = {}'.format(list(combined.values()))) print(combined.get('name')) Utils.printSpaces("updating chain maps") print('just update the dictionary used to create the chained map and it will be done automatically') dict1['gender'] = 'male' print('Keys = {}'.format(list(combined.keys()))) print('Values = {}'.format(list(combined.values())))
# importing numpy as np import numpy as np from ProjectUitlities import Utils # dictionary of lists dict = { 'First Score': [100, 90, np.nan, 95], 'Second Score': [30, 45, 56, np.nan], 'Third Score': [np.nan, 40, 80, 98] } # creating a dataframe from dictionary df = pd.DataFrame(dict) Utils.printSpaces("data cleaning ") Utils.printSpaces("filling nulls") # filling missing value using fillna() # inplace is set to false which means data will not be changed in original dataframes print(df.fillna(0, inplace=False)) Utils.printSpaces("dropping nulls") # filling missing value using fillna() # inplace is set to false which means data will not be changed in original dataframes print( df.dropna() ) # by default axis is 0 which means all the rows containing null will be dropped. Utils.printSpaces("dropping columns nulls") # filling missing value using fillna()
import ProjectUitlities.Utils as Utils Utils.printSpaces("Finding duplicate of 5") d = [1, 2, 3, 4, 5, 6, 7, 7, 88, 9, 2, 32, 3, 4, 5, 1] if len(list(filter(lambda x: x == 5, d))) > 1: print('there is a duplicate 5') else: print('5 was not duplicated in the list') Utils.printSpaces("Change things to upper case ") d = ['sumit', 'chauhan'] print(list(map(lambda x: x.upper(), d)))