def shot_chart(x, y, title="", kind="scatter", color="b", cmap=None, xlim=(-250, 250), ylim=(422.5, -47.5), court_color="gray", outer_lines=False, court_lw=1, flip_court=False, kde_shade=True, hex_gridsize=None, ax=None, **kwargs): """ Returns an Axes object with player shots plotted. TODO: explain the parameters """ if ax is None: ax = plt.gca() if cmap is None: cmap = sns.light_palette(color, as_cmap=True) if not flip_court: ax.set_xlim(xlim) ax.set_ylim(ylim) else: ax.set_xlim(xlim[::-1]) ax.set_ylim(ylim[::-1]) ax.tick_params(labelbottom="off", labelleft="off") ax.set_title(title, fontsize=18) draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) if kind == "scatter": ax.scatter(x, y, c=color, **kwargs) elif kind == "kde": sns.kdeplot(x, y, shade=kde_shade, cmap=cmap, ax=ax, **kwargs) ax.set_xlabel('') ax.set_ylabel('') elif kind == "hex": if hex_gridsize is None: # Get the number of bins for hexbin using Freedman-Diaconis rule # This is idea was taken from seaborn, which got the calculation # from http://stats.stackexchange.com/questions/798/ from seaborn.distributions import _freedman_diaconis_bins x_bin = _freedman_diaconis_bins(x) y_bin = _freedman_diaconis_bins(y) hex_gridsize = int(np.mean([x_bin, y_bin])) ax.hexbin(x, y, gridsize=hex_gridsize, cmap=cmap, **kwargs) else: raise ValueError("kind must be 'scatter', 'kde', or 'hex'.") return ax
def shot_chart_jointgrid(x, y, data=None, title="", joint_type="scatter", marginals_type="both", cmap=None, joint_color="b", marginals_color="b", xlim=(-250, 250), ylim=(422.5, -47.5), joint_kde_shade=True, marginals_kde_shade=True, hex_gridsize=None, space=0, size=(12, 11), court_color="gray", outer_lines=False, court_lw=1, flip_court=False, joint_kws=None, marginal_kws=None, **kwargs): """ Returns a JointGrid object containing the shot chart. TODO: explain the parameters """ # The joint_kws and marginal_kws idea was taken from seaborn # Create the default empty kwargs for joint and marginal plots if joint_kws is None: joint_kws = {} joint_kws.update(kwargs) if marginal_kws is None: marginal_kws = {} # If a colormap is not provided, then it is based off of the joint_color if cmap is None: cmap = sns.light_palette(joint_color, as_cmap=True) # Flip the court so that the hoop is by the bottom of the plot if flip_court: xlim = xlim[::-1] ylim = ylim[::-1] # Create the JointGrid to draw the shot chart plots onto grid = sns.JointGrid(x=x, y=y, data=data, xlim=xlim, ylim=ylim, space=space) # Joint Plot # Create the main plot of the joint shot chart if joint_type == "scatter": grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws) elif joint_type == "kde": grid = grid.plot_joint(sns.kdeplot, cmap=cmap, shade=joint_kde_shade, **joint_kws) elif joint_type == "hex": if hex_gridsize is None: # Get the number of bins for hexbin using Freedman-Diaconis rule # This is idea was taken from seaborn, which got the calculation # from http://stats.stackexchange.com/questions/798/ from seaborn.distributions import _freedman_diaconis_bins x_bin = _freedman_diaconis_bins(x) y_bin = _freedman_diaconis_bins(y) hex_gridsize = int(np.mean([x_bin, y_bin])) grid = grid.plot_joint(plt.hexbin, gridsize=hex_gridsize, cmap=cmap, **joint_kws) else: raise ValueError("joint_type must be 'scatter', 'kde', or 'hex'.") # Marginal plots # Create the plots on the axis of the main plot of the joint shot chart. if marginals_type == "both": grid = grid.plot_marginals(sns.distplot, color=marginals_color, **marginal_kws) elif marginals_type == "hist": grid = grid.plot_marginals(sns.distplot, color=marginals_color, kde=False, **marginal_kws) elif marginals_type == "kde": grid = grid.plot_marginals(sns.kdeplot, color=marginals_color, shade=marginals_kde_shade, **marginal_kws) else: raise ValueError("marginals_type must be 'both', 'hist', or 'kde'.") # Set the size of the joint shot chart grid.fig.set_size_inches(size) # Extract the the first axes, which is the main plot of the # joint shot chart, and draw the court onto it ax = grid.fig.get_axes()[0] draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) # Get rid of the axis labels grid.set_axis_labels(xlabel="", ylabel="") # Get rid of all tick labels ax.tick_params(labelbottom="off", labelleft="off") # Set the title above the top marginal plot ax.set_title(title, y=1.2, fontsize=18) return grid
def shot_chart_jointgrid(x, y, data=None, joint_type="scatter", title="", joint_color="b", cmap=None, xlim=(-250, 250), ylim=(422.5, -47.5), court_color="gray", court_lw=1, outer_lines=False, flip_court=False, joint_kde_shade=True, gridsize=None, marginals_color="b", marginals_type="both", marginals_kde_shade=True, size=(12, 11), space=0, despine=False, joint_kws=None, marginal_kws=None, **kwargs): """ Returns a JointGrid object containing the shot chart. This function allows for more flexibility in customizing your shot chart than the ``shot_chart_jointplot`` function. Parameters ---------- x, y : strings or vector The x and y coordinates of the shots taken. They can be passed in as vectors (such as a pandas Series) or as columns from the pandas DataFrame passed into ``data``. data : DataFrame, optional DataFrame containing shots where ``x`` and ``y`` represent the shot location coordinates. joint_type : { "scatter", "kde", "hex" }, optional The type of shot chart for the joint plot. title : str, optional The title for the plot. joint_color : matplotlib color, optional Color used to plot the shots on the joint plot. cmap : matplotlib Colormap object or name, optional Colormap for the range of data values. If one isn't provided, the colormap is derived from the value passed to ``color``. Used for KDE and Hexbin joint plots. {x, y}lim : two-tuples, optional The axis limits of the plot. The defaults represent the out of bounds lines and half court line. court_color : matplotlib color, optional The color of the court lines. court_lw : float, optional The linewidth the of the court lines. outer_lines : boolean, optional If ``True`` the out of bound lines are drawn in as a matplotlib Rectangle. flip_court : boolean, optional If ``True`` orients the hoop towards the bottom of the plot. Default is ``False``, which orients the court where the hoop is towards the top of the plot. joint_kde_shade : boolean, optional Default is ``True``, which shades in the KDE contours on the joint plot. gridsize : int, optional Number of hexagons in the x-direction. The default is calculated using the Freedman-Diaconis method. marginals_color : matplotlib color, optional Color used to plot the shots on the marginal plots. marginals_type : { "both", "hist", "kde"}, optional The type of plot for the marginal plots. marginals_kde_shade : boolean, optional Default is ``True``, which shades in the KDE contours on the marginal plots. size : tuple, optional The width and height of the plot in inches. space : numeric, optional The space between the joint and marginal plots. despine : boolean, optional If ``True``, removes the spines. {joint, marginal}_kws : dicts Additional kewyord arguments for joint and marginal plot components. kwargs : key, value pairs Keyword arguments for matplotlib Collection properties or seaborn plots. Returns ------- grid : JointGrid The JointGrid object with the shot chart plotted on it. """ # The joint_kws and marginal_kws idea was taken from seaborn # Create the default empty kwargs for joint and marginal plots if joint_kws is None: joint_kws = {} joint_kws.update(kwargs) if marginal_kws is None: marginal_kws = {} # If a colormap is not provided, then it is based off of the joint_color if cmap is None: cmap = sns.light_palette(joint_color, as_cmap=True) # Flip the court so that the hoop is by the bottom of the plot if flip_court: xlim = xlim[::-1] ylim = ylim[::-1] # Create the JointGrid to draw the shot chart plots onto grid = sns.JointGrid(x=x, y=y, data=data, xlim=xlim, ylim=ylim, space=space) # Joint Plot # Create the main plot of the joint shot chart if joint_type == "scatter": grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws) elif joint_type == "kde": grid = grid.plot_joint(sns.kdeplot, cmap=cmap, shade=joint_kde_shade, **joint_kws) elif joint_type == "hex": if gridsize is None: # Get the number of bins for hexbin using Freedman-Diaconis rule # This is idea was taken from seaborn, which got the calculation # from http://stats.stackexchange.com/questions/798/ from seaborn.distributions import _freedman_diaconis_bins x_bin = _freedman_diaconis_bins(x) y_bin = _freedman_diaconis_bins(y) gridsize = int(np.mean([x_bin, y_bin])) grid = grid.plot_joint(plt.hexbin, gridsize=gridsize, cmap=cmap, **joint_kws) else: raise ValueError("joint_type must be 'scatter', 'kde', or 'hex'.") # Marginal plots # Create the plots on the axis of the main plot of the joint shot chart. if marginals_type == "both": grid = grid.plot_marginals(sns.distplot, color=marginals_color, **marginal_kws) elif marginals_type == "hist": grid = grid.plot_marginals(sns.distplot, color=marginals_color, kde=False, **marginal_kws) elif marginals_type == "kde": grid = grid.plot_marginals(sns.kdeplot, color=marginals_color, shade=marginals_kde_shade, **marginal_kws) else: raise ValueError("marginals_type must be 'both', 'hist', or 'kde'.") # Set the size of the joint shot chart grid.fig.set_size_inches(size) # Extract the the first axes, which is the main plot of the # joint shot chart, and draw the court onto it ax = grid.fig.get_axes()[0] draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) # Get rid of the axis labels grid.set_axis_labels(xlabel="", ylabel="") # Get rid of all tick labels ax.tick_params(labelbottom="off", labelleft="off") # Set the title above the top marginal plot ax.set_title(title, y=1.2, fontsize=18) # Set the spines to match the rest of court lines, makes outer_lines # somewhate unnecessary for spine in ax.spines: ax.spines[spine].set_lw(court_lw) ax.spines[spine].set_color(court_color) # set the marginal spines to be the same as the rest of the spines grid.ax_marg_x.spines[spine].set_lw(court_lw) grid.ax_marg_x.spines[spine].set_color(court_color) grid.ax_marg_y.spines[spine].set_lw(court_lw) grid.ax_marg_y.spines[spine].set_color(court_color) if despine: ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) return grid
def shot_chart(x, y, kind="scatter", title="", color="b", cmap=None, xlim=(-250, 250), ylim=(422.5, -47.5), court_color="gray", court_lw=1, outer_lines=False, flip_court=False, kde_shade=True, gridsize=None, ax=None, despine=False, **kwargs): """ Returns an Axes object with player shots plotted. Parameters ---------- x, y : strings or vector The x and y coordinates of the shots taken. They can be passed in as vectors (such as a pandas Series) or as columns from the pandas DataFrame passed into ``data``. data : DataFrame, optional DataFrame containing shots where ``x`` and ``y`` represent the shot location coordinates. kind : { "scatter", "kde", "hex" }, optional The kind of shot chart to create. title : str, optional The title for the plot. color : matplotlib color, optional Color used to plot the shots cmap : matplotlib Colormap object or name, optional Colormap for the range of data values. If one isn't provided, the colormap is derived from the valuue passed to ``color``. Used for KDE and Hexbin plots. {x, y}lim : two-tuples, optional The axis limits of the plot. court_color : matplotlib color, optional The color of the court lines. court_lw : float, optional The linewidth the of the court lines. outer_lines : boolean, optional If ``True`` the out of bound lines are drawn in as a matplotlib Rectangle. flip_court : boolean, optional If ``True`` orients the hoop towards the bottom of the plot. Default is ``False``, which orients the court where the hoop is towards the top of the plot. kde_shade : boolean, optional Default is ``True``, which shades in the KDE contours. gridsize : int, optional Number of hexagons in the x-direction. The default is calculated using the Freedman-Diaconis method. ax : Axes, optional The Axes object to plot the court onto. despine : boolean, optional If ``True``, removes the spines. kwargs : key, value pairs Keyword arguments for matplotlib Collection properties or seaborn plots. Returns ------- ax : Axes The Axes object with the shot chart plotted on it. """ if ax is None: ax = plt.gca() if cmap is None: cmap = sns.light_palette(color, as_cmap=True) if not flip_court: ax.set_xlim(xlim) ax.set_ylim(ylim) else: ax.set_xlim(xlim[::-1]) ax.set_ylim(ylim[::-1]) ax.tick_params(labelbottom="off", labelleft="off") ax.set_title(title, fontsize=18) draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) if kind == "scatter": ax.scatter(x, y, c=color, **kwargs) elif kind == "kde": sns.kdeplot(x, y, shade=kde_shade, cmap=cmap, ax=ax, **kwargs) ax.set_xlabel('') ax.set_ylabel('') elif kind == "hex": if gridsize is None: # Get the number of bins for hexbin using Freedman-Diaconis rule # This is idea was taken from seaborn, which got the calculation # from http://stats.stackexchange.com/questions/798/ from seaborn.distributions import _freedman_diaconis_bins x_bin = _freedman_diaconis_bins(x) y_bin = _freedman_diaconis_bins(y) gridsize = int(np.mean([x_bin, y_bin])) ax.hexbin(x, y, gridsize=gridsize, cmap=cmap, **kwargs) else: raise ValueError("kind must be 'scatter', 'kde', or 'hex'.") # Set the spines to match the rest of court lines, makes outer_lines # somewhate unnecessary for spine in ax.spines: ax.spines[spine].set_lw(court_lw) ax.spines[spine].set_color(court_color) if despine: ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) return ax
def data_describe(hr, hr_flag): # feature_type_unique = hr.dtypes.unique() ret_dict = {} # if np.dtype('int') in feature_type_unique or np.dtype('float') in feature_type_unique: describe_numeric = describe(hr, include='number') describe_numeric.loc['featureVar', :] = describe_numeric.loc['std', :]**2 top3_list = [ hr[col].value_counts(normalize=True).to_dict() for col in describe_numeric.columns.values ] top3_rate_list = [] for tl in top3_list: temp_list = [] value_list = list(tl.keys()) if len(value_list) == 1: temp_list.append({'key': value_list[0], 'value': 1.00}) elif len(value_list) == 2: temp_list.append({ 'key': value_list[0], 'value': tl[value_list[0]] }) temp_list.append({ 'key': value_list[1], 'value': tl[value_list[1]] }) else: for value in value_list[:2]: temp_list.append({'key': value, 'value': tl[value]}) temp_list.append({ 'key': '其他', 'value': 1 - sum([tl[value] for value in value_list[:2]]) }) top3_rate_list.append(temp_list) top3_numeric = pd.DataFrame(dict( zip(describe_numeric.columns.values, [[trl] for trl in top3_rate_list])), index=['top3']) range_numeric = pd.DataFrame(dict( zip(describe_numeric.columns.values, [ '[' + str(mi) + ', ' + str(ma) + ']' for mi, ma in list( zip(describe_numeric.loc['min', :].tolist(), describe_numeric.loc['max', :].tolist())) ])), index=['featureRange']) value_counts_numeric = pd.DataFrame(dict( zip(describe_numeric.columns.values, [[hr[col].value_counts().to_dict()] for col in describe_numeric.columns.values])), index=['featureValueCounts']) name_numeric = pd.DataFrame(dict( zip(describe_numeric.columns.values, describe_numeric.columns.values.tolist())), index=['featureName']) describe_numeric = describe_numeric.append(name_numeric) describe_numeric = describe_numeric.append(top3_numeric) describe_numeric = describe_numeric.append(range_numeric) describe_numeric = describe_numeric.append(value_counts_numeric) describe_numeric.rename( { 'count': "featureCount", 'mean': "featureMean", 'std': "featureStd", 'min': "featureMin", '25%': "featurePer25", '50%': "featurePer50", '75%': "featurePer75", 'max': "featureMax", }, inplace=True) distribution_list = [] for feature in describe_numeric.columns.values: if isinstance(hr[feature], list): hr[feature] = np.asarray(hr[feature]) hr[feature] = hr[feature].astype(np.float64) x, y = univariate_kdeplot(hr[feature]) kde_list = list(zip(x.tolist(), y.tolist())) bins = min(_freedman_diaconis_bins(hr[feature]), 50) m, bins = np.histogram(hr[feature], bins=bins, density=True) m, bins = m.tolist(), bins.tolist() devided_number = (bins[1] - bins[0]) * len(hr[feature]) temp_list = [] distribution_dict = dict() feature_value_counts_dict = hr[feature].value_counts().to_dict() feature_value_list = list(feature_value_counts_dict.keys()) positive_list, negative_list = [], [] positive_dict = collections.OrderedDict() negative_dict = collections.OrderedDict() for bi in bins[:-1]: positive_dict[bi] = 0 for bi in bins[:-1]: negative_dict[bi] = 0 for feature_value in feature_value_list: pos_neg_value_counts = hr[hr[feature] == feature_value][ hr_flag].value_counts().to_dict() value_bin = find_bin(bins, feature_value) if 0 in pos_neg_value_counts.keys(): negative_dict[value_bin] += pos_neg_value_counts[0] if 1 in pos_neg_value_counts.keys(): positive_dict[value_bin] += pos_neg_value_counts[1] for k in positive_dict.keys(): positive_dict[k] /= devided_number for k in negative_dict.keys(): negative_dict[k] /= devided_number for k, v in positive_dict.items(): positive_list.append((k, v)) for k, v in negative_dict.items(): negative_list.append((k, v)) distribution_dict['feature_name'] = feature distribution_dict['feature_details'] = { 'positive': positive_list, 'negative': negative_list, 'kde': kde_list } temp_list.append(distribution_dict) distribution_list.append(temp_list) numeric_distributions = pd.DataFrame(dict( zip(describe_numeric.columns.values, distribution_list)), index=['featureFreqs']) describe_numeric = describe_numeric.append(numeric_distributions) ret_dict['describe_numeric'] = describe_numeric # if np.dtype('O') in feature_type_unique: describe_category = describe(hr, include='object') top3_list = [ hr[col].value_counts(normalize=True).to_dict() for col in describe_category.columns.values ] top3_rate_list = [] for tl in top3_list: temp_list = [] value_list = list(tl.keys()) if len(value_list) == 1: temp_list.append({'key': value_list[0], 'value': 1.00}) elif len(value_list) == 2: temp_list.append({ 'key': value_list[0], 'value': tl[value_list[0]] }) temp_list.append({ 'key': value_list[1], 'value': tl[value_list[1]] }) else: for value in value_list[:2]: temp_list.append({'key': value, 'value': tl[value]}) temp_list.append({ 'key': '其他', 'value': 1 - sum([tl[value] for value in value_list[:2]]) }) top3_rate_list.append(temp_list) top3_category = pd.DataFrame(dict( zip(describe_category.columns.values, [[trl] for trl in top3_rate_list])), index=['top3']) col_values = [ list(hr[col].value_counts().index) for col in describe_category.columns.values ] col_values_modified = [] for col_value in col_values: col_values_modified.append(map(str, col_value)) col_values_modified = [', '.join(cvm) for cvm in col_values_modified] range_category = pd.DataFrame(dict( zip(describe_category.columns.values, col_values_modified)), index=['featureRange']) value_counts_category = pd.DataFrame(dict( zip(describe_category.columns.values, [[hr[col].value_counts().to_dict()] for col in describe_category.columns.values])), index=['featureValueCounts']) name_category = pd.DataFrame(dict( zip(describe_category.columns.values, describe_category.columns.values.tolist())), index=['featureName']) describe_category = describe_category.append(name_category) describe_category = describe_category.append(top3_category) describe_category = describe_category.append(range_category) describe_category = describe_category.append(value_counts_category) describe_category.rename( { 'count': "featureCount", 'unique': "featureUnique", 'top': "featureTop", 'freq': "featureFreq" }, inplace=True) distribution_list = [] for feature in describe_category.columns.values: temp_list = [] distribution_dict = dict() feature_value_counts_dict = hr[feature].value_counts().to_dict() feature_value_list = list(feature_value_counts_dict.keys()) positive_list, negative_list = [], [] for feature_value in feature_value_list: pos_neg_value_counts = hr[hr[feature] == feature_value][ hr_flag].value_counts().to_dict() if 0 in pos_neg_value_counts.keys(): negative_list.append((feature_value, pos_neg_value_counts[0])) if 1 in pos_neg_value_counts.keys(): positive_list.append((feature_value, pos_neg_value_counts[1])) distribution_dict['feature_name'] = feature distribution_dict['feature_details'] = { 'positive': positive_list, 'negative': negative_list } temp_list.append(distribution_dict) distribution_list.append(temp_list) category_distributions = pd.DataFrame(dict( zip(describe_category.columns.values, distribution_list)), index=['featureFreqs']) describe_category = describe_category.append(category_distributions) ret_dict['describe_category'] = describe_category return ret_dict
'mult': est.vmnu[np.arange(est.N), est.qun.argmax(axis=1), :].argmax(axis=1) + 1 }) mut_table = mut_table.assign(vaf=mut_table.var_counts / mut_table.depth) mut_table = mut_table.assign(vaf_cn=mut_table.vaf * mut_table['total_cn'] / mut_table['mult']) mut_table = mut_table.assign( vaf_purity=mut_table.apply(lambda x: x['vaf'] / est.p * ( (1 - est.p) * 2 + est.p * x['total_cn']) / x['mult'], axis=1)) mut_table = mut_table.assign(trinucleotide=pd.Categorical( mut_table.trinucleotide, ordered=True, categories=range(96))) nb_bins = min(_freedman_diaconis_bins(mut_table.vaf_purity) * 2, 50) final_bins = np.linspace(min(mut_table.vaf_purity), max(mut_table.vaf_purity), nb_bins) # fig, ax = plt.subplots(1, figsize=(8, 28), sharex=False, gridspec_kw={'hspace': 0.08, 'wspace': 0, 'height_ratios': [1, 6, 1]}) clone_cols = sns.husl_palette(mut_table.clone.nunique(), l=0.8, s=.7) est_sigs = [ s for s in selected_sigs if s in mut_table.signature.unique() ] mylist = [color_dict[s] for s in est_sigs] my_palette = sns.color_palette(mylist) #cols = sns.color_palette("Set2", len(est_sigs)) cols = sns.color_palette(my_palette, len(est_sigs)) clone_cols = sns.husl_palette(mut_table.clone.nunique(), l=0.8, s=0.7) fig = plt.figure(figsize=(23, 10), dpi=80)