def plot_feature_pair(df, xname, yname, ax = None, legend = True, figsize = None, *args, **kwargs):
	"""
	Plot the 'scatter plot' of a pair of two features based on the types of features, 
	e.g., 
	1. numberical vs numbercial - scatter plot with lowess 
	2. numericla vs categorical - density plot grouped by categorical vars 
	3. categorical vs categorical - stacked barchart (hexbin or confusion matrix plot)
	This will help spot useful features that are both common and have extreme patterns (for classification)
	df: DataFrame
	xname: name of feature x (usually an input feature of interest)
	yname: name of feature y (usually the output feature )
	args, kwargs: plotting parameters
	"""
	if ax is None:
		fig, ax = plt.subplots(1, 1, figsize = figsize)

	x_dtype = "numerical" if is_numerical(df, xname) else "categorical"
	y_dtype = "numerical" if is_numerical(df, yname) else "categorical"
	x, y = df[xname], df[yname]
	if x_dtype is "numerical" and y_dtype is "numerical":
		ax.scatter(x, y, color = "blue", s = 10, marker = ".", *args, **kwargs)
		lowessy = sm.nonparametric.lowess(y, x, return_sorted = False)
		ax.plot(sorted(x), sorted(lowessy), "r-", label="lowess", alpha = 1)
		ax.set_xlabel("%s(%s)" % (xname, x_dtype))
		ax.set_ylabel("%s(%s)" % (yname, y_dtype))
	elif x_dtype is "numerical" and y_dtype is "categorical":
		for value, subdf in df.groupby(by = yname):
			if subdf.shape[0] > 1:
				subdf[xname].plot(kind = "density", label = value, ax = ax)
		ax.set_xlabel("%s|%s" % (xname, yname))
	elif x_dtype is "categorical" and y_dtype is "numerical":
		for value, subdf in df.groupby(by = xname):
			if subdf.shape[0] > 1:
				subdf[yname].plot(kind = "density", label = value, ax = ax)
		ax.set_xlabel("%s|%s" % (yname, xname))
	else: # categorical and categorical
		pd.crosstab(df[xname], df[yname], margins = False).plot(kind = 'barh', stacked = True, ax = ax)
		ax.set_xlabel("dist. of %s" % yname)
	if legend: 
		ax.legend(loc = "best")
def find_numerical_features(df):
	return np.asarray([f for f in df.columns if is_numerical(df, f)])