示例#1
0
# 6. How to get the items of series A not present in series B?

# Input

import pandas as pd

ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# Solution

import numpy as np

ser_diff = pd.Series(np.setdiff1d(ser1, ser2))

# Solution on website offers the following:
# ser1[~ser1.isin(ser2)]
# the "[]" is syntactic sugar for __get_item__. It can take a boolean array
# of the same length to determine which elements should be returned

import helper_funcs as hf

hf.write_results_str("006", [ser_diff])
示例#2
0
# 3. How to convert the index of a series into a column of a dataframe?

# Input

import numpy as np
import pandas as pd

mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

# My Solution

import helper_funcs as hf

df = ser.reset_index()

hf.write_results_str("003", [df])




示例#3
0
# 2. How to create a series from a list, numpy array and dict?

# Input

import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

# My Solution

import pandas as pd

series_from_list = pd.Series(data=mylist)
series_from_array = pd.Series(data=myarr)
series_from_dict = pd.Series(data=mydict)

import helper_funcs as hf

hf.write_results_str("002",
                     [series_from_list, series_from_array, series_from_dict])
示例#4
0
# 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

import numpy as np
import pandas as pd
import helper_funcs as hf

# Input
ser = pd.Series(np.random.normal(10, 5, 25))

# Solution
summary_series = ser.describe()

# The Website suggests
# print(np.percentile(ser, q=[0, 25, 50, 75, 100]))
# Also possible is
# print(ser.quantile([0, 0.25, 0.5, 0.75, 1.0]))
print(summary_series)
hf.write_results_str("008", [ser, summary_series])
示例#5
0
# 9. How to get frequency counts of unique items of a series?

import numpy as np
import pandas as pd
import helper_funcs as hf

# Input
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

# Solution

counts = ser.value_counts()
hf.write_results_str("009", [ser, counts])
示例#6
0
# 5. How to assign name to the series’ index?

# Input

import pandas as pd

ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

# Solution

ser.name = "alphabets"
# following line also works
# ser.rename("alphabets", inplace=True)

import helper_funcs as hf

hf.write_results_str("005", [ser])
示例#7
0
# 7. How to get the items not common to both series A and series B?

# Input

import pandas as pd

ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# Solution

combined_series = pd.concat([ser1[~ser1.isin(ser2)], ser2[~ser2.isin(ser1)]],
                            axis=0)

# Website suggests using 1d intersection and 1d difference to form the final series.
# The difference between my solution and the website is theirs preserves indices of the union...
# while mine preserves the indices of the original list

import helper_funcs as hf

hf.write_results_str("007", [combined_series])
示例#8
0
# 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

import numpy as np
import pandas as pd
import helper_funcs as hf


# Input
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

# Solution
top2 = list(ser.value_counts()[:2].index)
mod_ser = ser.where(lambda x: (x == top2[1]) | (x == top2[0]), "Other", axis=0)

# Website offers
# print("Top 2 Freq:", ser.value_counts())
# ser[~ser.isin(ser.value_counts().index[:2])] = 'Other'

hf.write_results_str("010", [ser, top2, mod_ser])