Exemplo n.º 1
0
 def _update_plot(self, axis, view):
     if self.plot_type == 'regplot':
         sns.regplot(x=view.x,
                     y=view.y,
                     data=view.data,
                     ax=axis,
                     **self.style)
     elif self.plot_type == 'boxplot':
         self.style.pop('return_type', None)
         self.style.pop('figsize', None)
         sns.boxplot(view.data[view.y],
                     view.data[view.x],
                     ax=axis,
                     **self.style)
     elif self.plot_type == 'violinplot':
         sns.violinplot(view.data[view.y],
                        view.data[view.x],
                        ax=axis,
                        **self.style)
     elif self.plot_type == 'interact':
         sns.interactplot(view.x,
                          view.x2,
                          view.y,
                          data=view.data,
                          ax=axis,
                          **self.style)
     elif self.plot_type == 'corrplot':
         sns.corrplot(view.data, ax=axis, **self.style)
     elif self.plot_type == 'lmplot':
         sns.lmplot(x=view.x,
                    y=view.y,
                    data=view.data,
                    ax=axis,
                    **self.style)
     elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']:
         map_opts = [(k, self.style.pop(k)) for k in self.style.keys()
                     if 'map' in k]
         if self.plot_type == 'pairplot':
             g = sns.pairplot(view.data, **self.style)
         elif self.plot_type == 'pairgrid':
             g = sns.PairGrid(view.data, **self.style)
         elif self.plot_type == 'facetgrid':
             g = sns.FacetGrid(view.data, **self.style)
         for opt, args in map_opts:
             plot_fn = getattr(sns, args[0]) if hasattr(
                 sns, args[0]) else getattr(plt, args[0])
             getattr(g, opt)(plot_fn, *args[1:])
         plt.close(self.handles['fig'])
         self.handles['fig'] = plt.gcf()
     else:
         super(SNSFramePlot, self)._update_plot(axis, view)
Exemplo n.º 2
0
 def _update_plot(self, axis, view):
     style = self._process_style(self.style[self.cyclic_index])
     if self.plot_type == 'factorplot':
         opts = dict(style, **({'hue': view.x2} if view.x2 else {}))
         sns.factorplot(x=view.x, y=view.y, data=view.data, **opts)
     elif self.plot_type == 'regplot':
         sns.regplot(x=view.x, y=view.y, data=view.data, ax=axis, **style)
     elif self.plot_type == 'boxplot':
         style.pop('return_type', None)
         style.pop('figsize', None)
         sns.boxplot(view.data[view.y], view.data[view.x], ax=axis, **style)
     elif self.plot_type == 'violinplot':
         if view.x:
             sns.violinplot(view.data[view.y],
                            view.data[view.x],
                            ax=axis,
                            **style)
         else:
             sns.violinplot(view.data, ax=axis, **style)
     elif self.plot_type == 'interact':
         sns.interactplot(view.x,
                          view.x2,
                          view.y,
                          data=view.data,
                          ax=axis,
                          **style)
     elif self.plot_type == 'corrplot':
         sns.corrplot(view.data, ax=axis, **style)
     elif self.plot_type == 'lmplot':
         sns.lmplot(x=view.x, y=view.y, data=view.data, ax=axis, **style)
     elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']:
         style_keys = list(style.keys())
         map_opts = [(k, style.pop(k)) for k in style_keys if 'map' in k]
         if self.plot_type == 'pairplot':
             g = sns.pairplot(view.data, **style)
         elif self.plot_type == 'pairgrid':
             g = sns.PairGrid(view.data, **style)
         elif self.plot_type == 'facetgrid':
             g = sns.FacetGrid(view.data, **style)
         for opt, args in map_opts:
             plot_fn = getattr(sns, args[0]) if hasattr(
                 sns, args[0]) else getattr(plt, args[0])
             getattr(g, opt)(plot_fn, *args[1:])
         plt.close(self.handles['fig'])
         self.handles['fig'] = plt.gcf()
     else:
         super(SNSFramePlot, self)._update_plot(axis, view)
Exemplo n.º 3
0
 def _update_plot(self, axis, view):
     style = self._process_style(self.style[self.cyclic_index])
     if self.plot_type == 'factorplot':
         opts = dict(style, **({'hue': view.x2} if view.x2 else {}))
         sns.factorplot(x=view.x, y=view.y, data=view.data, **opts)
     elif self.plot_type == 'regplot':
         sns.regplot(x=view.x, y=view.y, data=view.data,
                     ax=axis, **style)
     elif self.plot_type == 'boxplot':
         style.pop('return_type', None)
         style.pop('figsize', None)
         sns.boxplot(view.data[view.y], view.data[view.x], ax=axis,
                     **style)
     elif self.plot_type == 'violinplot':
         if view.x:
             sns.violinplot(view.data[view.y], view.data[view.x], ax=axis,
                            **style)
         else:
             sns.violinplot(view.data, ax=axis, **style)
     elif self.plot_type == 'interact':
         sns.interactplot(view.x, view.x2, view.y,
                          data=view.data, ax=axis, **style)
     elif self.plot_type == 'corrplot':
         sns.corrplot(view.data, ax=axis, **style)
     elif self.plot_type == 'lmplot':
         sns.lmplot(x=view.x, y=view.y, data=view.data,
                    ax=axis, **style)
     elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']:
         style_keys = list(style.keys())
         map_opts = [(k, style.pop(k)) for k in style_keys if 'map' in k]
         if self.plot_type == 'pairplot':
             g = sns.pairplot(view.data, **style)
         elif self.plot_type == 'pairgrid':
             g = sns.PairGrid(view.data, **style)
         elif self.plot_type == 'facetgrid':
             g = sns.FacetGrid(view.data, **style)
         for opt, args in map_opts:
             plot_fn = getattr(sns, args[0]) if hasattr(sns, args[0]) else getattr(plt, args[0])
             getattr(g, opt)(plot_fn, *args[1:])
         if self._close_figures:
             plt.close(self.handles['fig'])
         self.handles['fig'] = plt.gcf()
     else:
         super(SNSFramePlot, self)._update_plot(axis, view)
Exemplo n.º 4
0
def plot_synapse_2d(X, zh, output='synapse_cluster_2d.pdf'):
    pca = PCA(n_components=2)
    X_new = pca.fit_transform(X)
    d = {
            #r'$x_1$': X_new[:,0]/(10**5), 
            r'$x_1$': X_new[:,0], 
            #r'$x_2$': X_new[:,1]/(10**6),
            r'$x_2$': X_new[:,1],
            r'$\mathcal{C}_j$': [int(z+1) for z in zh]
    }
    df = pd.DataFrame(data=d)
    g = sns.lmplot(r'$x_1$', r'$x_2$', 
                   data=df, hue=r'$\mathcal{C}_j$', 
                   fit_reg=False, scatter=True, scatter_kws={"s":4})
    g.set(xlabel=r'$x_1$')
    g.set(ylabel=r'$x_2$')
    #g.set(xlabel=r'$x_1$~$(\times 10^6)$')
    #g.set(ylabel=r'$x_2$~$(\times 10^5)$')
    g.savefig(output)
Exemplo n.º 5
0
def plot_synapse_2d(X, zh, output='synapse_cluster_2d.pdf'):
    pca = PCA(n_components=2)
    X_new = pca.fit_transform(X)
    d = {
        #r'$x_1$': X_new[:,0]/(10**5),
        r'$x_1$': X_new[:, 0],
        #r'$x_2$': X_new[:,1]/(10**6),
        r'$x_2$': X_new[:, 1],
        r'$\mathcal{C}_j$': [int(z + 1) for z in zh]
    }
    df = pd.DataFrame(data=d)
    g = sns.lmplot(r'$x_1$',
                   r'$x_2$',
                   data=df,
                   hue=r'$\mathcal{C}_j$',
                   fit_reg=False,
                   scatter=True,
                   scatter_kws={"s": 4})
    g.set(xlabel=r'$x_1$')
    g.set(ylabel=r'$x_2$')
    #g.set(xlabel=r'$x_1$~$(\times 10^6)$')
    #g.set(ylabel=r'$x_2$~$(\times 10^5)$')
    g.savefig(output)
Exemplo n.º 6
0
    def lmplot(self, x=None, y=None, hue=None, data=None, *args, **kwargs):
        """
        Plot data and regression model fits

        Parameters
        ----------
        x : a list of names of variable in data that need to visualize \
            their distribution

        y : a list of names of variable in data that need to visualize \
            its joint distribution against every x above

        hue : the name of a variable in data that provides labels for each \
            category

        data : pandas dataframe

        **kwargs : other arguments in seaborn.jointplot

            palette : palette name, list, or dict, optional

            col_wrap : int, optional

            size : scalar, optional

            aspect : scalar, optional

            markers : matplotlib marker code or list of marker codes, optional

            share{x,y} : bool, optional

            legend : bool, optional

            legend_out : bool, optional

            x_estimator : callable that maps vector -> scalar, optional

            x_bins : int or vector, optional

            x_ci : 'ci', 'sd', int in [0, 100] or None, optional

            scatter : bool, optional

            fit_reg : bool, optional

            ci : int in [0, 100] or None, optional

            n_boot : int, optional

            units : variable name in data, optional

            order : int, optional

            logistic : bool, optional

            lowess : bool, optional

            robust : bool, optional

            logx : bool, optional

            {x,y}_partial : strings in data or matrices

            truncate : bool, optional

            {x,y}_jitter : floats, optional

            {scatter,line}_kws : dictionaries

        Returns
        -------
        JointGrid object with the plot on it

        References
        ----------
        Seaborn lmplot further documentation
        https://seaborn.pydata.org/generated/seaborn.seaborn.lmplot
        """
        # check data
        if not isinstance(data, (pd.DataFrame)):
            raise ValueError('data must be pandas dataframe')

        # check x and y
        if x is None:
            raise ValueError('x can NOT be None')
        else:  # x is NOT None
            if not isinstance(x, (list, tuple, np.ndarray, pd.Index)):
                x = [x]
        if y is None:
            raise ValueError('y can NOT be None')
        else:  # y is NOT None
            if not isinstance(y, (list, tuple, np.ndarray, pd.Index)):
                y = [y]
        if hue is not None:
            if hue not in data.columns.values:
                raise ValueError('{} is NOT in data'.format(hue))

        # no figure configuration needed
        plt.close()
        # iterate thru x
        for i, col_y in enumerate(y):
            if col_y not in data.columns.values:
                raise ValueError('{} is NOT in data'.format(col_y))
            b = data[col_y]
            b_not_nan = np.ones(b.shape[0], dtype=np.bool)
            if np.logical_not(np.isfinite(b)).any():
                logger.warning('RUNTIME WARNING: {} column has inf or nan '
                               ''.format(col_y))
                b = b.replace([-np.inf, np.inf], np.nan)
                # filter
                b_not_nan = np.logical_not(b.isnull())

            for j, col_x in enumerate(x):
                # check if col in data
                if col_x not in data.columns.values:
                    raise ValueError('{} is NOT in data'.format(col_x))
                a = data[col_x]
                a_not_nan = np.ones(a.shape[0], dtype=np.bool)
                if np.logical_not(np.isfinite(a)).any():
                    logger.warning('RUNTIME WARNING: {} column has inf or '
                                   'nan'.format(col_x))
                    a = a.replace([-np.inf, np.inf], np.nan)
                    # filter
                    a_not_nan = np.logical_not(a.isnull())
                # joint filter
                not_nan = b_not_nan & a_not_nan
                joint_grid = sns.lmplot(
                    x=col_x,
                    y=col_y,
                    data=data.loc[not_nan, :],
                    hue=hue,
                    legend=True,
                    legend_out=False,
                    # size=self.size[0],
                    *args,
                    **kwargs)

                joint_grid.fig.axes[0].set_title(label='Reg Fit of {} on {} '
                                                 ''.format(col_y, col_x),
                                                 fontsize=self.title_fontsize)
                joint_grid.fig.axes[0].set_xlabel(xlabel=col_x,
                                                  fontsize=self.label_fontsize)
                joint_grid.fig.axes[0].set_ylabel(ylabel=col_y,
                                                  fontsize=self.label_fontsize)
                joint_grid.fig.axes[0].tick_params(
                    axis='both', which='maj', labelsize=self.tick_fontsize)
                joint_grid.fig.axes[0].legend(loc='upper right')
                joint_grid.fig.subplots_adjust(wspace=0.5,
                                               hspace=0.3,
                                               left=0.125,
                                               right=0.9,
                                               top=0.9,
                                               bottom=0.1)
                joint_grid.fig.tight_layout()
        plt.show()
Exemplo n.º 7
0
# With red crosses.
plt.figure(2)
plt.plot(CarData['hwy'], CarData['cty'], 'r+')
plt.xlabel('hwy')
plt.ylabel('cty')
plt.title('Scatter plot of hwy vs cty')
plt.grid(True)
plt.show()

# You can also change the size of the points depending on a variable
# E.g. if you want to display more common brands in terms of observations
# in a larger way, use "size":
CarData['counts'] = CarData.groupby(['make'])['make'].transform('count')

plt.figure(3)
plt.scatter(CarData['hwy'],
            CarData['cty'],
            marker='o',
            c='r',
            s=CarData['counts'])
plt.xlabel('hwy')
plt.ylabel('cty')
plt.title('Scatter plot of hwy vs cty')
plt.grid(True)
plt.show()

# We use sns.regplot or sns.lmplot also
sns.regplot(x='cty', y='hwy', marker="+", ci=95, data=CarData)
sns.lmplot(x="cty", y="hwy", marker="o", ci=95, data=CarData)
Exemplo n.º 8
0
plt.pie(peace_age, labels=peace_age.index, autopct='%1.1f%%')
plt.show()


sns.jointplot(x="Year",
        y="Age",
        kind='reg',
        data=data)

plt.show()
sns.boxplot(data=data,
         x='Category',
         y='Age')

plt.show()
sns.lmplot('Year','Age',data=data,lowess=True, aspect=2,  line_kws={'color' : 'black'})
plt.show()


# Question 2: What words are most frequently written in the prize motivation?
top_N = 10
stopwords = nltk.corpus.stopwords.words('english')
re_stopwords = r'\b(?:{})\b'.format('|'.join(stopwords))
words = (data['Motivation']
         .str.lower()
         .replace([r'\|', re_stopwords], [' ', ' '], regex=True)
         .str.cat(sep=' ')
         .split()
         )

Exemplo n.º 9
0
	def plot(self, show_samples, show_loadings, sbrn_plt):

		# Normalizer and Delta perform badly
		# They flatten out all difference in a PCA plot
		
		pca = PCA(n_components=self.n_components)
		X_bar = pca.fit_transform(self.X)
		var_exp = pca.explained_variance_ratio_
		var_pc1 = np.round(var_exp[0]*100, decimals=2)
		var_pc2 = np.round(var_exp[1]*100, decimals=2)
		explained_variance = np.round(sum(pca.explained_variance_ratio_)*100, decimals=2)
		comps = pca.components_
		comps = comps.transpose()
		loadings = pca.components_.transpose()
		vocab_weights_p1 = sorted(zip(self.features, comps[:,0]), key=lambda tup: tup[1], reverse=True)
		vocab_weights_p2 = sorted(zip(self.features, comps[:,1]), key=lambda tup: tup[1], reverse=True)

		if sbrn_plt == False:

			# Generate color dictionary
			color_dict = {author:index for index, author in enumerate(sorted(set(self.authors)))}
			cmap = discrete_cmap(len(color_dict), base_cmap='brg')

			if show_samples == True:

				fig = plt.figure(figsize=(8,6))
				ax = fig.add_subplot(111)
				x1, x2 = X_bar[:,0], X_bar[:,1]

				# If anything needs to be invisible in plot, add to exclusion_list

				ax.scatter(x1, x2, 100, edgecolors='none', facecolors='none', cmap='rainbow')
				for index, (p1, p2, a, title) in enumerate(zip(x1, x2, self.authors, self.titles)):
					ax.scatter(p1, p2, marker='o', color=cmap(color_dict[a]), s=20)
					ax.text(p1, p2, title.split('_')[-1], color='black', fontdict={'size': 5})

				# Legend settings (code for making a legend)

				collected_patches = []
				for author in set(self.authors):
					legend_patch = mpatches.Patch(color=cmap(color_dict[author]), label=author.split('-')[0])
					collected_patches.append(legend_patch)
				plt.legend(handles=collected_patches, fontsize=7)

				ax.set_xlabel('Principal Component 1 \n \n Explained Variance: {}% \n Sample Size: {} words/sample \n Number of Features: {} features'.format(str(explained_variance), str(self.sample_size), str(len(self.features))), fontdict={'size': 7})
				ax.set_ylabel('Principal Component 2', fontdict={'size': 7})

				if show_loadings == True:
					ax2 = ax.twinx().twiny()
					l1, l2 = loadings[:,0], loadings[:,1]
					ax2.scatter(l1, l2, 100, edgecolors='none', facecolors='none');
					for x, y, l in zip(l1, l2, self.features):
						ax2.text(x, y, l, ha='center', va="center", color="black",
						fontdict={'family': 'Arial', 'size': 6})

					# Align axes

					# Important to adjust margins first when function words fall outside plot
					# This is due to the axes aligning (def align).

					ax2.margins(x=0.14, y=0.14)
					align_xaxis(ax, 0, ax2, 0)
					align_yaxis(ax, 0, ax2, 0)
					plt.axhline(y=0, ls="--", lw=0.5, c='0.75')
					plt.axvline(x=0, ls="--", lw=0.5, c='0.75')			
					plt.tight_layout()
					plt.show()
				
				elif show_loadings == False:

					plt.axhline(y=0, ls="--", lw=0.5, c='0.75')
					plt.axvline(x=0, ls="--", lw=0.5, c='0.75')

					plt.tight_layout()
					plt.show()

					# Converting PDF to PNG, use pdftoppm in terminal and -rx -ry for resolution settings

				fig.savefig(os.path.dirname(os.getcwd()) + "/pca.pdf", transparent=True, format='pdf')

			elif show_samples == False:

				fig = plt.figure(figsize=(8, 6))
				ax2 = fig.add_subplot(111)
				l1, l2 = loadings[:,0], loadings[:,1]
				ax2.scatter(l1, l2, 100, edgecolors='none', facecolors='none')
				for x, y, l in zip(l1, l2, features):
					ax2.text(x, y, l, ha='center', va='center', color='black',
						fontdict={'family': 'Arial', 'size': 6})

				ax2.set_xlabel('PC1')
				ax2.set_ylabel('PC2')

				align_xaxis(ax, 0, ax2, 0)
				align_yaxis(ax, 0, ax2, 0)

				plt.axhline(y=0, ls="--", lw=0.5, c='0.75')
				plt.axvline(x=0, ls="--", lw=0.5, c='0.75')

				plt.tight_layout()
				plt.show()
				fig.savefig(os.path.dirname(os.getcwd()) + "/pca.pdf", bbox_inches='tight', transparent=True, format='pdf')

				# Converting PDF to PNG, use pdftoppm in terminal and -rx -ry for resolution settings

		else:

			data = [(title.split("_")[0], author, pc1, pc2) for [pc1, pc2], title, author in zip(X_bar, self.titles, self.authors)]
			df = pd.DataFrame(data, columns=['title', 'author', 'PC1', 'PC2'])

			# Get the x in an array
			sns.set_style('darkgrid')
			sns_plot = sns.lmplot('PC1', 'PC2', data=df, fit_reg=False, hue="author",
			           scatter_kws={"marker": "+","s": 100}, markers='o', legend=False)

			plt.legend(loc='upper right')
			plt.tight_layout()
			plt.show()

			sns_plot.savefig(os.path.dirname(os.getcwd()) + "/pca.pdf")