Python lmplot示例，seaborn.lmplot Python示例

示例#1

0

显示文件

文件： analyze_haralick_n_color_hists.py 项目： nateGeorge/IDmyDog-udacity-submission

def get_hara_stats(df):
    # gets statistics on haralick features
    # takes dataframe with haralick and breeds
    x = list(range(1,14))
    xs = []
    haraFG = []
    breed = []
    for i in range(df.shape[0]):
        a = df.iloc[i]
        xs.append(x)
        haraFG.append(a.fgHaralick)
        breed.append([a.breed]*13)
    
    newDF = pd.DataFrame(columns=['Haralick feature', 'Haralick feature value', 'breed'])
    newDF['Haralick feature'] = np.array(xs).flatten()
    newDF['Haralick FG feature value'] = np.array(haraFG).flatten()
    newDF['breed'] = np.array(breed).flatten()
    stds = []
    for i in x:
        stds.append(newDF[newDF['Haralick feature']==i]['Haralick FG feature value'].std()
                    / newDF[newDF['Haralick feature']==i]['Haralick FG feature value'].mean())
    
    data = np.vstack((np.array(x), np.array(stds))).T
    pltDF = pd.DataFrame(columns=['Haralick feature', 'relative standard deviation'], data=data)
    sns.lmplot(x='Haralick feature', y='relative standard deviation', data=pltDF, fit_reg=False)
    plt.xticks(x)
    plt.show()

示例#2

0

显示文件

文件： SDA_layers.py 项目： cgallego/Section3

 def finalLayer(self, X, y, epochs=1):
     print "Final Layer" 
     V = self.predict(X)
     softmax = Layers.SoftmaxLayer(self.Layers[-1].W.shape[1], y.shape[1]) 
     
     #########################
     # Final layer of THE MODEL #
     #########################
     batchsize = X.shape[0]
     softmax, dA_avg_perm = Trainer().train([softmax], V, y, epochs, batchsize)
     self.Layers.append(softmax[0])
     
     ##############
     # Format      
     #################           
     LLdata = [float(L) for L in dA_avg_perm]
     LLiter = [float(it) for it in range(epochs)]
     dfpredata = pd.DataFrame( LLdata )
     dfpredata.columns = ['dA_avg_costs']
     dfpredata['iter'] = LLiter
     
     ############
     ### plotting or cost
     ### the cost we minimize during training is the negative log likelihood of
     ############
     plt.figure()
     sns.lmplot('iter', 'dA_avg_costs', data=dfpredata, fit_reg=False)
     plt.xlabel('epoch', fontsize=14)
     plt.ylabel('softmax error', fontsize=14)
     plt.title('softmax_train_epochs_'+str(epochs), fontsize=9)
     plt.savefig('grid_searchResults/'+'softmax_train_'+str(self.item)+'.png')

示例#3

0

显示文件

文件： SDA_layers.py 项目： cgallego/Section3

 def fine_tune(self, X, y, epochs=1):
     print "Fine Tunning" 
     #########################
     # Fine Tunning THE MODEL #
     #########################
     batchsize = X.shape[0]
     self.Layers, dA_avg_perm = Trainer().train(self.Layers, X, y, epochs, batchsize)
     
     ##############
     # Format      
     #################           
     LLdata = [float(L) for L in dA_avg_perm ]
     LLiter = [float(it) for it in range(epochs)]
     dfinedata = pd.DataFrame( LLdata )
     dfinedata.columns = ['dA_avg_costs']
     dfinedata['iter'] = LLiter
     
     ############
     ### plotting or cost
     ### the cost we minimize during training is the negative log likelihood of
     ############
     plt.figure()
     sns.lmplot('iter', 'dA_avg_costs', data=dfinedata, fit_reg=False)
     plt.xlabel('epoch', fontsize=14)
     plt.ylabel('finetune error', fontsize=14)
     plt.title('fine_tune_structure_'+str([self.structure])+'_train_epochs_'+str(epochs), fontsize=9)
     plt.savefig('grid_searchResults/'+'fine_tune_structure_'+str(self.item)+'.png')

示例#4

0

显示文件

文件： run.py 项目： Karansheraz/themarketingtechnologist

 def visualize_data(self):
     """
     Transform the DataFrame to the 2-dimensional case and visualizes the data. The first tags are used as labels.
     :return:
     """
     logging.debug("Preparing visualization of DataFrame")
     # Reduce dimensionality to 2 features for visualization purposes
     X_visualization = self.reduce_dimensionality(self.X, n_features=2)
     df = self.prepare_dataframe(X_visualization)
     # Set X and Y coordinate for each articles
     df['X coordinate'] = df['coordinates'].apply(lambda x: x[0])
     df['Y coordinate'] = df['coordinates'].apply(lambda x: x[1])
     # Create a list of markers, each tag has its own marker
     n_tags_first = len(self.df['tags_first'].unique())
     markers_choice_list = ['o', 's', '^', '.', 'v', '<', '>', 'D']
     markers_list = [markers_choice_list[i % 8] for i in range(n_tags_first)]
     # Create scatter plot
     sns.lmplot("X coordinate",
                "Y coordinate",
                hue="tags_first",
                data=df,
                fit_reg=False,
                markers=markers_list,
                scatter_kws={"s": 150})
     # Adjust borders and add title
     sns.set(font_scale=2)
     sns.plt.title('Visualization of TMT articles in a 2-dimensional space')
     sns.plt.subplots_adjust(right=0.80, top=0.90, left=0.12, bottom=0.12)
     # Show plot
     sns.plt.show()

示例#5

0

显示文件

文件： deal_rs.py 项目： Kimice/Recoba

def deal_rs():
    data_set = pd.read_csv('datas/result.csv')
    data_set.columns = ['AvH', 'AvD', 'AvA', 'Hc', 'Dc', 'Ac', 'R', 'P']

    sns.set(style='ticks')
    sns.lmplot(x='R', y='P', data=data_set)
    sns.plt.show()

示例#6

0

显示文件

文件： visuals.py 项目： malcolmjmr/trading

def view_timeline(df,x="unix_time",y="rate",plt=plt):
    asset = df.symbol.values[0]
    plt.figure(1, figsize=(15,15))
    sns.lmplot(x=x, y=y, hue="type", data=df, palette=dict(sell="r", buy="g"))
    plt.ylim(df[y].min(), df[y].max())
    plt.title(y+' over time ('+asset+')')
    plt.show()

示例#7

0

显示文件

文件： median_compare_to_consensus.py 项目： XiaoxiaoLiu/morphology_analysis

def plot_compare_median_consensus(output_dir, df_order, metric, type = 'ts',DISPLAY = 0):
    plt.figure()


    if type =='ts':
        #sb.tsplot(data=df_order, value=metric,time='order',unit="algorithm",condition="algorithm",err_style="unit_traces")

        ax = sb.boxplot(x=metric, y="algorithm", data=df_order,
                 whis=np.inf, color="c")

        # Add in points to show each observation
        sb.stripplot(x=metric, y="algorithm", data=df_order,
                jitter=True, size=3, color=".3", linewidth=0)
        ax.set_xscale("log")
        sb.despine(trim=True)

       # plt.xlabel('images sorted by the average neuron distance of the median reconstruction')
        plt.savefig(output_dir + '/ts_compare_median_with_consensus_'+metric+'.png', format='png')


    if type =='lm':
        sb.lmplot(x="order", y=metric, hue="algorithm", data=df_order)
        plt.xlabel('images sorted by the average neuron distance of the median reconstruction')
        plt.savefig(output_dir + '/lm_compare_median_with_consensus_'+metric+'.lm.png', format='png')

    if DISPLAY:
         plt.show()
    plt.close()

示例#8

0

显示文件

文件： vizualize_vcf.py 项目： brspurri/vcfstats

def vcf_stats(vcfin, outdir, sample):
    inp = vcf.Reader(open(vcfin))
    variants = list()
    ref = list()
    alt = list()
    basename = os.path.splitext(os.path.basename(vcfin))[1]
    genotype = {'0/0':'Homozygous Rerfeence','0/1':'Heterozygous','1/1':'Homozygous Alternate', '1/2':'Non Reference Heterozygous'}
    for lines in inp:
        try:
            var = {'Chrom':lines.CHROM,'Pos':lines.POS, 'Ref':lines.REF, 'Alt': ','.join([str(alt) for alt in lines.ALT]),
                 'Sample':sample, 'Genotype': genotype[lines.genotype(sample)['GT']],'Depth_at_reference':lines.genotype(sample)['AD'][0],
                'Depth_at_alternate':lines.genotype(sample)['AD'][1]}
            variants.append(var)
        except KeyError:
            continue
    variants = pd.DataFrame(variants)
    plt.figure()
    sns.set(style='ticks', context='talk')
    sns.lmplot('Depth_at_reference','Depth_at_alternate',hue='Genotype', data=variants, fit_reg=False)
    plt.xlim([0,max([max(variants.Depth_at_reference), max(variants.Depth_at_alternate)])])
    plt.ylim([0,max([max(variants.Depth_at_reference), max(variants.Depth_at_alternate)])])
    plt.xlabel('Depth at reference allele')
    plt.ylabel('Depth at alternate allele')
    plt.title('Allelic depth distribution')
    plt.savefig(basename+'_allele_depth')
    plt.close()
    return

示例#9

0

显示文件

文件： weathernorm_backup.py 项目： yujiex/GSA

def plot_building_temp():
    sns.set_context("paper", font_scale=1.5)
    b = 'AZ0000FF'
    s = 'KTUS'
    filelist = glob.glob(os.getcwd() + '/csv_FY/testWeather/{0}*.csv'.format(b))
    dfs = [pd.read_csv(csv) for csv in filelist]
    col = 'eui_gas'
    dfs2 = [df[[col, 'month', 'year']] for df in dfs]
    df3 = (pd.concat(dfs2))

    temp = pd.read_csv(os.getcwd() + '/csv_FY/weather/weatherData_meanTemp.csv')
    temp['year'] = temp['Unnamed: 0'].map(lambda x: float(x[:4]))
    temp['month'] = temp['Unnamed: 0'].map(lambda x: float(x[5:7]))
    temp.set_index(pd.DatetimeIndex(temp['Unnamed: 0']), inplace=True)
    temp = temp[[s, 'month', 'year']]
    joint2 = pd.merge(df3, temp, on = ['year', 'month'], how = 'inner')
    joint2.to_csv(os.getcwd() + '/csv_FY/testWeather/test_temp.csv', index=False)

    sns.lmplot(s, col, data=joint2, col='year', fit_reg=False)
    plt.xlim((joint2[s].min() - 10, joint2[s].max() + 10))
    plt.ylim((0, joint2[col].max() + 0.1))
    P.savefig(os.getcwd() + '/csv_FY/testWeather/plot/scatter_temp_byyear.png', dpi=150)
    plt.close()

    joint2 = joint2[(2012 < joint2['year']) & (joint2['year'] < 2015)]
    sns.regplot(s, col, data=joint2, fit_reg=False)
    plt.xlim((joint2[s].min() - 10, joint2[s].max() + 10))
    plt.ylim((0, joint2[col].max() + 0.1))
    P.savefig(os.getcwd() + '/csv_FY/testWeather/plot/scatter_temp_1314.png', dpi=150)
    plt.close()

示例#10

0

显示文件

文件： analyze.py 项目： uwcrowdlab/crowd-data

    def plot_scatter_n_accuracy_joint(self, data_objects, labels, label_self, markers):
        """Make plot from this and other data objects.

        Args:
            data_objects ([Data]): Other Data objects to include in plot.
            labels ([str]): Labels to use for Data_objects.
            label_self (str): Label to use for this Data object.

        Returns: Axis object.

        """
        dataframes = [self.df] + [data.df for data in data_objects]
        labels = [label_self] + labels

        acc = []
        n = []
        statistics = []
        for df, label in zip(dataframes, labels):
            acc = df.groupby('worker')['correct'].mean()
            n = df.groupby('worker')['question'].count()
            df_new = pd.concat([acc, n], axis=1)
            df_new['dataset'] = label
            statistics.append(df_new)

        df = pd.concat(statistics, axis=0)
        sns.lmplot('question', 'correct', data=df, hue='dataset',
                   markers=markers, fit_reg=False)
        plt.xlabel('Number of questions answered')
        plt.ylabel('Accuracy')
        plt.xlim((0, None))
        plt.ylim((0, 1))
        plt.title('')
        return plt.gca()

示例#11

0

显示文件

文件： plot_data.py 项目： omid55/teams_in_games

def plot_data(data, has_label=True):
	import numpy as np
	import seaborn as sns
	from sklearn.manifold import TSNE
	from sklearn.decomposition import PCA

	if not has_label:
		data = data.copy()
		data['label'] = np.zeros([len(data),1])

	LIMIT = 4000
	if data.shape[0] > LIMIT:
		dt = data.sample(n=LIMIT, replace=False)
		X = dt.ix[:,:-1]
		labels = dt.ix[:,-1]
	else:
		X = data.ix[:,:-1]
		labels = data.ix[:,-1]

	tsne_model = TSNE(n_components=2, random_state=0)
	np.set_printoptions(suppress=True)
	points1 = tsne_model.fit_transform(X)
	df1 = pd.DataFrame(data=np.column_stack([points1,labels]), columns=["x","y","class"])
	sns.lmplot("x", "y", data=df1, hue='class', fit_reg=False, palette=sns.color_palette('colorblind'))
	sns.plt.title('TNSE')

	pca = PCA(n_components=2)
	pca.fit(X)
	points2 = pca.transform(X)
	df2 = pd.DataFrame(data=np.column_stack([points2,labels]), columns=["x","y","class"])
	sns.lmplot("x", "y", data=df2, hue='class', fit_reg=False, palette=sns.color_palette('colorblind'))
	sns.plt.title('PCA')

示例#12

0

显示文件

文件： figures.py 项目： sshleifer/imsg_stats

def fig2(ppl, fname):
  '''For each contact, plot number of characters sent and received. (UNUSED)'''
  sns.lmplot("lensent", "lenrec",ppl) 
  plt.xlabel('Characters Sent')
  plt.ylabel('Characters Received')
  sns.despine()
  savefig(fname)

示例#13

0

显示文件

文件： analyze_haralick_n_color_hists.py 项目： nateGeorge/IDmyDog-udacity-submission

def show_examples(idxs, printStd=True):
    # prints example dataset from supplied indexs, idxs
    # and plots the foreground haralick features
    x = list(range(1,14))
    xs = []
    hara = []
    breed = []
    for idx in idxs:
        a = hNt.iloc[idx]
        xs.append(x)
        hara.append(np.log(abs(a.fgHaralick)))
        breed.append([a.breed] * 13)
        
        if printStd:
            print('breed:', a.breed)
            print('filename:', a.file)
            print('foreground Haralick:', a.fgHaralick)
            print('background Haralick:', a.bgHaralick)
    
    newDF = pd.DataFrame(columns=['Haralick feature', 'log(Haralick feature value)', 'breed'])
    newDF['Haralick feature'] = np.array(xs).flatten()
    newDF['log(Haralick feature value)'] = np.array(hara).flatten()
    newDF['breed'] = np.array(breed).flatten()
    newDF.sort_values(by='breed', inplace=True)
    sns.lmplot(x='Haralick feature', y='log(Haralick feature value)', data=newDF, fit_reg=False, hue='breed')
    plt.xticks(x)
    plt.show()

示例#14

0

显示文件

文件： weathernorm.py 项目： yujiex/GSA

def plot_building_temp():
    sns.set_context("paper", font_scale=1.5)
    b = "AZ0000FF"
    s = "KTUS"
    filelist = glob.glob(os.getcwd() + "/csv_FY/testWeather/{0}*.csv".format(b))
    dfs = [pd.read_csv(csv) for csv in filelist]
    col = "eui_gas"
    dfs2 = [df[[col, "month", "year"]] for df in dfs]
    df3 = pd.concat(dfs2)

    temp = pd.read_csv(os.getcwd() + "/csv_FY/weather/weatherData_meanTemp.csv")
    temp["year"] = temp["Unnamed: 0"].map(lambda x: float(x[:4]))
    temp["month"] = temp["Unnamed: 0"].map(lambda x: float(x[5:7]))
    temp.set_index(pd.DatetimeIndex(temp["Unnamed: 0"]), inplace=True)
    temp = temp[[s, "month", "year"]]
    joint2 = pd.merge(df3, temp, on=["year", "month"], how="inner")
    joint2.to_csv(os.getcwd() + "/csv_FY/testWeather/test_temp.csv", index=False)

    sns.lmplot(s, col, data=joint2, col="year", fit_reg=False)
    plt.xlim((joint2[s].min() - 10, joint2[s].max() + 10))
    plt.ylim((0, joint2[col].max() + 0.1))
    P.savefig(os.getcwd() + "/csv_FY/testWeather/plot/scatter_temp_byyear.png", dpi=150)
    plt.close()

    joint2 = joint2[(2012 < joint2["year"]) & (joint2["year"] < 2015)]
    sns.regplot(s, col, data=joint2, fit_reg=False)
    plt.xlim((joint2[s].min() - 10, joint2[s].max() + 10))
    plt.ylim((0, joint2[col].max() + 0.1))
    P.savefig(os.getcwd() + "/csv_FY/testWeather/plot/scatter_temp_1314.png", dpi=150)
    plt.close()

示例#15

0

显示文件

文件： substructures.py 项目： sdvillal/manysources

def relationship_spearman_size_source(dir, model="logreg3", feats="ecfps1", dset="bcrp"):
    """
    Plots the relationship between the size of the source vs the average relevant Spearman corr coeff. One point per
    source on the plot.
    """
    small_dict = defaultdict(list)
    # list all spearman files
    for f in glob.glob(op.join(dir, "spearmans_*")):
        if "relfeats" in op.basename(f):
            source = op.basename(f).partition("_lso_relfeats_")[2].partition("_logreg")[0]
            print source
            small_dict["source"].append(source)
            small_dict["size"].append(len(ManysourcesDataset(dset).mols().sources2molids([source])))
            with open(f, "rb") as reader:
                dict_spearman = pickle.load(reader)
                spearmans = map(lambda x: x[0], dict_spearman.values())
                small_dict["average spearman"].append(np.mean(np.array(spearmans)))
    df = pd.DataFrame.from_dict(small_dict)
    import seaborn

    seaborn.set_style("ticks")
    seaborn.set_context("talk")
    seaborn.lmplot(
        "size",
        "average spearman",
        data=df,
        scatter_kws={"marker": "o", "color": "slategray"},
        line_kws={"linewidth": 1, "color": "seagreen"},
    )
    plt.show()

示例#16

0

显示文件

文件： run.py 项目： mageed/conv-exp

    def _corr(self, sel, suffix):
        formula = str('model_accuracy ~ human_accuracy')
        logreg = smf.logit(formula=formula, data=sel).fit()
        summ = logreg.summary()
        if self.html is None:
            print(summ)
        else:
            summ = summ.as_html().replace('class="simpletable"',
                                          'class="simpletable table"')

        sel = sel.rename(columns={'human_accuracy': 'human accuracy',
                                  'model_accuracy': 'model accuracy'})

        sns.lmplot('human accuracy', 'model accuracy', data=sel, x_jitter=.01,
                    y_jitter=.05, logistic=True, truncate=True)

        bins = np.digitize(sel['human accuracy'], np.arange(.05,1,.1))
        #bins[bins==11] = 10
        count = sel['model accuracy'].groupby(bins).count()
        mean = sel['model accuracy'].groupby(bins).mean()
        sns.plt.scatter(.1*mean.index, mean, s=10*count, c='.15',
                        linewidths=0, alpha=.8)
        sns.plt.title(models.NICE_NAMES[self.model_name])
        sns.plt.xlim([-.1, 1.1])
        sns.plt.ylim([-.1, 1.1])
        self.show(pref='corr_sil', suffix=self.model_name + '_' + suffix,
                  caption=suffix + summ)

示例#17

0

显示文件

文件： utils.py 项目： lawsofthought/thorium

    def aucplot(cls, df):

        seaborn.lmplot(data=df,
                       x="b",
                       y="auc",
                       lowess=True,
                       size=5,
                       aspect=2)

示例#18

0

显示文件

文件： treesearch.py 项目： MaxwellRebo/disco-dop

def plot(data, total, title, width=800.0, unit='', dosort=True,
		target=None, target2=None):
	"""A HTML bar plot given a dictionary and max value."""
	if len(data) > 30 and target is not None:
		df = pandas.DataFrame(index=data)
		df[title] = pandas.Series(data, index=df.index)
		df[target.name] = target.ix[df.index]
		if target2 is not None:
			df[target2.name] = target2.ix[df.index]
		if target.dtype == numpy.number:
			if target2 is None:
				seaborn.jointplot(target.name, title, data=df, kind='reg')
			else:
				seaborn.lmplot(target.name, title, data=df, hue=target2.name)
		else:  # X-axis is categorical
			df.sort_values(by=target.name, inplace=True)
			if target2 is None:
				seaborn.barplot(target.name, title, data=df)
			else:
				seaborn.barplot(target.name, title, data=df, hue=target2.name)
			fig = plt.gcf()
			fig.autofmt_xdate()
		# Convert to D3, SVG, javascript etc.
		# import mpld3
		# result = mpld3.fig_to_html(plt.gcf(), template_type='general',
		# 		use_http=True)

		# Convert to PNG
		figfile = io.BytesIO()
		plt.savefig(figfile, format='png')
		result = '<div><img src="data:image/png;base64, %s"/></div>' % (
				base64.b64encode(figfile.getvalue()).decode('utf8'))
		plt.clf()
		return result

	result = ['<div class=barplot>',
			('<text style="font-family: sans-serif; font-size: 16px; ">'
			'%s</text>' % title)]
	if target is not None:
		data = OrderedDict([(key, data[key]) for key in
				target.sort_values().index if key in data])
	keys = {key.split('_')[0] if '_' in key else key[0] for key in data}
	color = {}
	if len(keys) <= 5:
		color.update(zip(keys, range(1, 6)))
	keys = list(data)
	if dosort:
		keys.sort(key=data.get, reverse=True)
	for key in keys:
		result.append('<br><div style="width:%dpx;" class=b%d></div>'
				'<span>%s: %g %s</span>' % (
				int(round(width * data[key] / total)) if data[key] else 0,
				color.get(key.split('_')[0] if '_' in key else key[0], 1)
					if data[key] else 0,
				htmlescape(key), data[key], unit,))
	result.append('</div>\n')
	return '\n'.join(result)

示例#19

0

显示文件

文件： grid_plot.py 项目： egedinc/IntelAIWorkshop

def plot_complexity(models=ALL_MODELS, save_path='../resources/cached_model_grid_scores.csv'):
    grid = pd.read_csv(save_path)    
    grid = grid[grid['model_names'].isin(models)]

    plt.figure(figsize=(12,12));
    sns.lmplot(data=grid, x='time_to_train', y='params', 
               hue='model_names', fit_reg=False, legend=False);
    plt.legend(bbox_to_anchor=(1.05, 1), loc='lower right', borderaxespad=0.);
    plt.show();

示例#20

0

显示文件

文件： graphtools.py 项目： BruceJohnJennerLawso/scrap

def plotScatterLabelled(data, x_param, y_param, huey, output_path, output_directory, output_filename):
	sns.lmplot(x_param, y_param, data, hue=huey, fit_reg=False);
	output_ = "%s/%s/%s" % (output_path, output_directory, output_filename)
	try:
		plt.savefig(output_)
	except IOError:
		os.makedirs('%s/%s/' % (output_path, output_directory))
		plt.savefig(output_)	
	plt.close()

示例#21

0

显示文件

文件： conservacao_uri.py 项目： joaofred/conservacao-uri

def grafico_l2(conjunto, xl=None, yl=None, titulox="", tituloy="", titulo="", filename="", tamanho=5):
    a = np.array(conjunto[0].map(_dic_cruzes))
    b = np.array(conjunto[1].map(_dic_cruzes))
    c = DataFrame([a, b]).transpose()
    c.columns = ["A", "B"]
    sns.lmplot("A", "B", c, x_jitter=0.2, y_jitter=0.3, size=tamanho)
    plt.title(titulo, fontsize=16)
    sns.axlabel(titulox, tituloy, fontsize=fontetamanho)
    plt.savefig(filename)

示例#22

0

显示文件

文件： updateProgressMetrics.py 项目： jamesbsilva/thesis

def seabornScatterPlot(data,xName,yName,titleIn):
    '''
        seabornScatterPlot plots a scatter plot using seaborn.

    :param X: x axis data
    :param Y: y axis data
    :param xName: name of x axis
    :param yName: name of y axis
    :param titleIn: plot title
    '''
    sns.lmplot(xName, yName, data, palette="Set1", fit_reg=False);
    plt.title(titleIn);

示例#23

0

显示文件

文件： week4_visualizations.py 项目： wer61537/Wesleyan-DataMangementVisualization

def lm_plot(df,dep_var, indep_var,grpby,units):
    if grpby:
        seaborn.lmplot(x=indep_var, y=dep_var, data=df, hue =grpby,fit_reg=False )
    else:
        seaborn.lmplot(x=indep_var, y=dep_var, data=df,fit_reg=False)
    
    #seaborn.lmplot(x=indep_var, y=dep_var, data=df, fit_reg=False)
    #would be great to figure out how to remove '_cat'    
    plt.xlabel(indep_var)
    plt.ylabel(dep_var + ", " + units)   
    plt.title("Scatterplot of " + dep_var + " versus " + indep_var)  
    plt.savefig(wd + "Scatterplot_" + dep_var + "_vs_"+ indep_var + '.png')
    plt.close

示例#24

0

显示文件

文件： cor_width_exp.py 项目： LuyiTian/ChIPOmic

def plot_avg_width_exp(mark="H3K4me3"):
    def get_90quantile(arr):
        arr.sort()
        return arr[int(0.9*len(arr))]
    gene_id, EID_list, exp_matrix = get_gene_exp_matrix()
    _, len_dict = get_len_num(mark)
    quantile_arr = np.array([get_90quantile(len_dict[EID]) for EID in EID_list])
    gene_avg = np.mean(exp_matrix, axis=0)
    print quantile_arr
    print gene_avg
    result = pd.DataFrame({'quantile90':quantile_arr,"gene_avg":gene_avg})
    sns.lmplot('quantile90','gene_avg',result)
    plt.show()

示例#25

0

显示文件

文件： analyze.py 项目： uwcrowdlab/crowd-data

 def plot_scatter_n_accuracy(self):
     ax = plt.gca()
     acc = self.df.groupby('worker')['correct'].mean()
     n = self.df.groupby('worker')['question'].count()
     condition = self.df.groupby('worker')['condition'].first()
     df = pd.concat([acc, n, condition], axis=1)
     sns.lmplot('question', 'correct', data=df, hue='condition',
                fit_reg=False)
     plt.xlabel('Number of questions answered')
     plt.ylabel('Accuracy')
     plt.xlim((0, None))
     plt.ylim((0, 1))
     plt.title('')
     return ax

示例#26

0

显示文件

文件： Graphs.py 项目： alanhdu/Dex

    def createScatter(self, event):
        dlg = GraphDialog(self.parent, "Scatterplot Input", ("X", "Y"),
                size=(700, 200), groups=False)
        regress = wx.CheckBox(dlg, label="Add Regression Polynomial?")
        regress.SetValue(True)
        jitter = wx.CheckBox(dlg, label="Jitter?")
        jitter.SetValue(False)
        dlg.Add(jitter)
        ci = dlg.AddSpinCtrl("Confidence (>=100 for None)", 0, 101, 95)
        order = dlg.AddSpinCtrl("Polynomial Degree", 1, 10, 1)

        regress.Bind(wx.EVT_CHECKBOX, 
            lambda e: ci.Enable(regress.GetValue()) and order.Enable(regress.GetValue()))
        dlg.Add(regress)

        if dlg.ShowModal() == wx.ID_OK:
            ds = dlg.GetName()
            dlg.Destroy()
            regress, ci = regress.GetValue(), ci.GetValue()
            order, jitter = order.GetValue(), jitter.GetValue()

            data = self.parent.data[list({b for bs in ds for b in bs})].astype(float)
            snData = pd.DataFrame()
            for x, y in ds: # Deals with silly SNS stuff
                d = {"x":data[x], "y":data[y], "group":np.repeat(y, len(data[x]))}
                d = pd.DataFrame(d)
                snData = snData.append(d, ignore_index=True)

            if jitter:
                xjitter = snData["x"].std() / 4
                yjitter = snData["y"].std() / 4
            else:
                xjitter, yjitter = 0, 0

            try:
                if ci < 100 and regress:
                    sns.lmplot("x", "y", snData, hue="group", ci=ci, order=order, 
                            x_jitter=xjitter, y_jitter=yjitter)
                else:
                    sns.lmplot("x", "y", snData, fit_reg=regress, ci=None, order=order,
                            x_jitter=xjitter, y_jitter=yjitter)
                plt.show()
            except np.RankWarning:
                dlg = wx.MessageDialog(self.parent, "Polynomial Degree Too High",
                        style = wx.OK | wx.ICON_ERROR)
                dlg.ShowModal()
                dlg.Destroy()
                plt.show()

示例#27

0

显示文件

文件： grapher.py 项目： rht/bssim

 def latmeanbw(self):
     # take log of bw array for better sizing
     self.load_block_times()
     normbws = np.array(self.df.bandwidths) 
     g = sns.lmplot("latencies", "means", data=self.df[['latencies', 'means']], scatter_kws={"s": np.log2(normbws) * 10, "alpha" : .5})
     g.set(ylim=(0, 400))
     g = self.with_title(g)

示例#28

0

显示文件

文件： grapher.py 项目： rht/bssim

    def bttime(self):
        print 'loading block time vs time...'
        # get block_time rows for most recent run
        self.cur.execute('SELECT timestamp, time, runid FROM block_times where runid=(select max(runid) from runs)')
        rows = self.cur.fetchall()
        rid = (rows[0][2],)

        # get tuple reflecting run config to show under graph
        self.cur.execute('SELECT * FROM runs where runid=?', rid)
        config = self.cur.fetchone()
        config = map(str, config)
        names = [i[0] for i in self.cur.description]
        desc = str(zip(names, config))

        timestamps = []
        times = []
        for ts, time, rid in rows:
            timestamps.append(ts)
            times.append(time)
        
        timedf = pd.DataFrame.from_dict({'timestamps' : timestamps, 'times' : times})
        # change nanosecond timestamps to seconds
        timedf['timestamps'] = timedf['timestamps'].astype(float) / (1000 * 1000)
        g = sns.lmplot("timestamps", "times", data=timedf)
        print desc
        g.ax.set_title(self.wl)
        g.set_axis_labels("time (seconds)", "block times (ms)")

示例#29

0

显示文件

文件： grapher.py 项目： rht/bssim

    def latdur(self):
        print 'latency vs duration'
        filtered = util.lock_float_field(self.df, 'bandwidths', self.bws)
        if filtered is None:
            return self.latmeanbw()

        g = sns.lmplot("latencies", "durations", data=filtered[['latencies', 'durations', 'bandwidths']].astype(float), col='bandwidths')

示例#30

0

显示文件

文件： logistic_regression.py 项目： dandancat123/bilibili_notes

def draw_boundary(power, l):
    """
    power: polynomial power for mapped feature
    l: lambda constant
    """
    density = 1000
    threshhold = 2 * 10**-3

    final_theta = feature_mapped_logistic_regression(power, l)
    x, y = find_decision_boundary(density, power, final_theta, threshhold)

    df = pd.read_csv('ex2data2.txt', names=['test1', 'test2', 'accepted'])
    sns.lmplot('test1', 'test2', hue='accepted', data=df, size=6, fit_reg=False, scatter_kws={"s": 100})

    plt.scatter(x, y, c='R', s=10)
    plt.title('Decision boundary')

示例#31

0

显示文件

文件： visualizationOfData.py 项目： hellomyzn/Data_Science

sns.boxplot(data=[data1, data2], whis=np.inf)
plt.show()

sns.violinplot(data=[data1, data2])
plt.show()

# バンド幅を細かくしてみましょう。
sns.violinplot(data=data2, bw=0.01)
plt.show()


# Seabornにサンプルデータがあります。
tips = sns.load_dataset("tips")
tips.head()

sns.lmplot("total_bill", "tip", tips, size=10)
plt.show()


# グラフごとにパラメータを変えられます。
sns.lmplot("total_bill", "tip", tips,
           scatter_kws={'marker': 'o', 'color': 'indianred', 's': 10},
           line_kws={'linewidth': 1, 'color': 'blue'})

plt.show()

# 4次関数で回帰曲線をひくこともできます。
sns.lmplot("total_bill", "tip", tips, order=4,
           scatter_kws={"marker": "o", "color": "indianred", "s": 8},
           line_kws={"linewidth": 1, "color": "blue"})
plt.show()

示例#32

0

显示文件

文件： bootstrap.py 项目： spyros2/ce888labs

    return data_mean, lower, upper


if __name__ == "__main__":
    df = pd.read_csv('./salaries.csv')

    data = df.values.T[1]
    boots = []
    for i in range(100, 100000, 1000):
        boot = boostrap(data, data.shape[0], i)
        boots.append([i, boot[0], "mean"])
        boots.append([i, boot[1], "lower"])
        boots.append([i, boot[2], "upper"])

    df_boot = pd.DataFrame(boots,
                           columns=['Boostrap Iterations', 'Mean', "Value"])
    sns_plot = sns.lmplot(df_boot.columns[0],
                          df_boot.columns[1],
                          data=df_boot,
                          fit_reg=False,
                          hue="Value")

    sns_plot.axes[0, 0].set_ylim(0, )
    sns_plot.axes[0, 0].set_xlim(0, 100000)

    sns_plot.savefig("bootstrap_confidence.png", bbox_inches='tight')
    sns_plot.savefig("bootstrap_confidence.pdf", bbox_inches='tight')

    #print ("Mean: %f")%(np.mean(data))
    #print ("Var: %f")%(np.var(data))

示例#33

0

显示文件

    def learn(self, message):
        try:
            for i in range(len(self.connections)):
                print('start learning')

                #우선 필요한 파일을 부른다
                learningFileName = 'learning_1_saved.sav'
                df = joblib.load(learningFileName)

                # 해당 파일에서의 DataFrame (df)를 불러서 출력해본다
                sb.lmplot('x',
                          'y',
                          data=df,
                          fit_reg=False,
                          scatter_kws={"s": 150},
                          hue="cluster")
                plt.title('Before')

                #이전 df2의 마지막 index 번호를 구한다
                lastIndex = len(df.index) - 1
                #그 후 message에서 숫자를 추출하고 새로운 값을 넣되 초기 cluster 번호를 0으로 지정한다.
                Xtext = message[message.index('_') + 1:message.index(',')]
                Ytext = message[message.index(',') + 1:message.index('.')]
                inputX = int(Xtext)
                inputY = int(Ytext)
                df.loc[lastIndex + 1] = [inputX, inputY, 0]

                print('Received input: ', inputX, ', ', inputY)
                # 재학습을 실행한다
                newpoints = df.values
                kmeans = KMeans(n_clusters=5).fit(newpoints)

                # 새로운 'cluster' 라벨을 붙여주고 러닝 결과를 출력한다.
                df['cluster'] = kmeans.labels_
                sb.lmplot('x',
                          'y',
                          data=df,
                          fit_reg=False,
                          scatter_kws={"s": 150},
                          hue="cluster")
                plt.title('After')

                # 각 클러스터의 중심값을 읽어오기
                clusterData = kmeans.cluster_centers_

                # 이 clusterData를 X축 기준으로 정렬하기
                sortedCluster = clusterData[clusterData[:, 0].argsort()]

                # 여기서 3번째 열(클러스터 번호)를 잘라낸다
                finalCutCluster = np.delete(sortedCluster, np.s_[2], axis=1)

                #최종 결과를 파일에 저장하고 (러닝파일 + 클러스터)
                joblib.dump(df, learningFileName)
                np.save('clusterCenter', finalCutCluster)
                print("Learning is complete!")

                #마지막으로 성공했다는 메세지를 보내기
                newMessage = "complete\n"
                print(type(newMessage))
                self.connections[i].sendall(newMessage.encode())
                print("(", newMessage, ") has been sent to client")
        except:
            pass

示例#34

0

显示文件

def plot_iris(iris, col1, col2):
    sns.lmplot(x=col1, y=col2, data=iris, hue="Species", fit_reg=False)
    plt.xlabel(col1)
    plt.ylabel(col2)
    plt.title("Iris species shown by colour")
    plt.show

示例#35

0

显示文件

g.add_legend();

# Difference between M & F split fractions
sns.kdeplot(data.split_frac[data.gender=='M'], label='men', shade=True)
sns.kdeplot(data.split_frac[data.gender=='W'], label='women', shade=True)
plt.xlabel('split_frac');    

# Bimodal distribution among M & F : Violinplot
sns.violinplot?
sns.violinplot('gender', 'split_frac', data=data, palette=['lightblue', 'lightpink']);

# Violin plot as a function of gender and age
data['age_dec'] = data.age.map(lambda age: 10 * (age//10))
data.head()
sns.violinplot('age_dec', 'split_frac', data=data, palette=['lightblue', 'lightpink']); # OR
sns.violinplot('age_dec', 'split_frac', hue='gender', data=data, palette=['lightblue', 'lightpink']); # OR
sns.violinplot('age_dec', 'split_frac', hue='gender', data=data, split=True, inner='quartile', 
               palette=['lightblue', 'lightpink']); # OR
#
men = (data.gender == 'M')
women = (data.gender == 'W')
with sns.axes_style(style=None):
    sns.violinplot("age_dec", "split_frac", hue="gender", data=data, split=True, inner="quartile",
                   palette=["lightblue", "lightpink"]);
# Elder aged
(data.age>80).sum()    

# regplot to fit a linear regression to the data  automatically
g = sns.lmplot('final_sec', 'split_frac', col='gender', data=data,markers=".", scatter_kws=dict(color='c'))
g.map(plt.axhline, y=0.1, color="k", ls=":");

示例#36

0

显示文件

文件： Data analysis.py 项目： RaghavRathi20/Ecom-pred

import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
import seaborn as sns
#%matplotlib inline
customers = pd.read_csv('H:\Ecommerce prediction\Datasets\dt1.csv')

customers.head()

sns.set_palette("GnBu_d")
sns.set_style('whitegrid')
sns.jointplot(x='Time on Website', y='Yearly Amount Spent', data=customers)

sns.jointplot(x='Time on App', y='Yearly Amount Spent', data=customers)
sns.jointplot(x='Time on App',
              y='Length of Membership',
              kind="hex",
              data=customers)
sns.pairplot(customers)

sns.lmplot(x='Length of Membership', y='Yearly Amount Spent', data=customers)

plt.show()

示例#37

0

显示文件

文件： knn_impl_ver_02.py 项目： medical-projects/Breast-cancer-diagnosis-using-KNN

Τυπώνουμε το min και το max κάθε τιμής.
"""
columns = df_dataset.columns
for num in range(1, 31):
    print('Min value of ', columns[num], ' is', min(df_dataset[columns[num]]),
          ' and max value is ', max(df_dataset[columns[num]]), '\n')

# Βρίσκουμε τον βέλτιστο αριθμό γειτόνων
find_best_K(10, dataset)
# Βλέπουμε πως αξιολογείται το μοντέλο μας ανάλογα τον αριθμό των folds.
fold_num_and_accuracy(11, dataset)

df_dataset.dtypes

# Σχεδιασμός διαγραμμάτων ως προς τα χαρακτηριστικά των όγκων και κατάταξη τους σε καλοήθεις και κακοήθεις.
df = pd.read_csv(
    'C:/Users/user/Desktop/ERGASIES_&_ARXEIA/Διαχείριση_Γνώσης_2/data.csv')
sns.lmplot(x='radius_mean', y='texture_mean', hue='diagnosis', data=df)
sns.lmplot(x='perimeter_mean', y='smoothness_mean', hue='diagnosis', data=df)
sns.lmplot(x='area_mean', y='compactness_mean', hue='diagnosis', data=df)

# Κάνουμε προβλέψεις με δεδομένα που ξέρουμε σε ποιο class ανήκουν.
k_nearest_neighbors(dataset, dataset[0:10], 5)  # 1 λάθος.
k_nearest_neighbors(dataset, dataset[0:20], 5)  # 3 λάθη.
k_nearest_neighbors(dataset, dataset[0:100], 5)  #Στα 100 παίρνουμε 10 λάθη.
k_nearest_neighbors(dataset, dataset[0:200], 5)  # 16 λάθη.

add_new_patient_data_and_predict(df_dataset, dataset, 10)
add_new_patient_data_and_predict(df_dataset, dataset, 20)
add_new_patient_data_and_predict(df_dataset, dataset, 30)

示例#38

0

显示文件

文件： linier_regression.py 项目： nandawawank/Python

import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from sklearn.datasets import load_boston

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
boston = load_boston()
boston_df = DataFrame(boston['data'])
boston_df.columns = boston['feature_names']
boston_df['Price'] = boston['target']
print(boston_df.head())

# plt.hist(boston['target'],bins=50)
# plt.scatter(boston['data'][:,5],boston['target'])
# plt.xlabel('Prices in $1000s')
# plt.ylabel('Number of houses')
# plt.show()

sns.lmplot('RM', 'Price', data=boston_df)
plt.show()

示例#39

0

显示文件

# sns.lmplot('Flour', 'Sugar', data=recipes, hue='Type', palette='Set1', fit_reg=False, scatter_kws={"s": 70});
# plt.plot(xx, yy, linewidth=2, color='black')
# plt.plot(xx, yy_down, 'k--')
# plt.plot(xx, yy_up, 'k--')
# plt.show()

# create function to predict muffin of cupcake


def muffin_or_cupcake(flour, sugar):
    if (modal.predict([[flour, sugar]])) == 0:
        print("Muffin")
    else:
        print("Cupcake")


muffin_or_cupcake(10, 55)

# plotting Predicted data

sns.lmplot('Flour',
           'Sugar',
           data=recipes,
           hue='Type',
           palette='Set1',
           fit_reg=False,
           scatter_kws={"s": 70})
plt.plot(xx, yy, linewidth=2, color='black')
plt.plot(10, 55, 'yo', markersize=9)
plt.show()

示例#40

0

显示文件

文件： 1_python.py 项目： myloginid/cdsw-workshop-exercise-1

# Import Data
# ===========
#
# Load data from Google Trends.

data = pd.read_csv('data/GoogleTrendsData.csv',
                   index_col='Date',
                   parse_dates=True)
data.head()

# Show DJIA vs. debt related query volume.
display_charts(data,
               chart_type="stock",
               title="DJIA vs. Debt Query Volume",
               secondary_y="debt")
seaborn.lmplot("debt", "djia", data=data, size=7)

# Detect if search volume is increasing or decreasing in
# any given week by forming a moving average and testing if the current value
# crosses the moving average of the past 3 weeks.
#
# Let's first compute the moving average.

data['debt_mavg'] = data.debt.rolling(window=3, center=False).mean()
data.head()

# Since we want to see if the current value is above the moving average of the
# *preceeding* weeks, we have to shift the moving average timeseries forward by one.

data['debt_mavg'] = data.debt_mavg.shift(1)
data.head()

示例#41

0

显示文件

def association_userInput():

    print("\n===============================================================================")
    print("a. Fixed Acidity")
    print("b. Volatile Acidity")
    print("c. Citric Acid")
    print("d. Residual Sugar")
    print("e. Chlorides")
    print("f. Free Sulfur")
    print("g. Dioxide")
    print("h. Total Sulfur Dioxide")
    print("i. Density")
    print("j. pH")
    print("k. Sulphates")
    print("l. Alcohol")
    print("m. Quality")
    print("===============================================================================")

    print("\nPlease select two characteristics from above to test an association for (enter the letter)")
    print("Note: If one of the characteristics you want to test for is quality, it is recommended you choose this characteristic for characteristic 1.")

    while True:
        choice1 = input("\nCharacteristic 1: ").lower().strip()
        if choice1 == "a":
            choice1 = "fixed acidity"
            break
        if choice1 == "b":
            choice1 = "volatile acidity"
            break
        if choice1 == "c":
            choice1 = "citric acid"
            break
        if choice1 == "d":
            choice1 = "residual sugar"
            break
        if choice1 == "e":
            choice1 = "chlorides"
            break
        if choice1 == "f":
            choice1 = "free sulfur"
            break
        if choice1 == "g":
            choice1 = "dioxide"
            break
        if choice1 == "h":
            choice1 = "total sulfur dioxide"
            break
        if choice1 == "i":
            choice1 = "density"
            break
        if choice1 == "j":
            choice1 = "pH"
            break
        if choice1 == "k":
            choice1 = "sulphates"
            break
        if choice1 == "l":
            choice1 = "alcohol"
            break
        if choice1 == "m":
            choice1 = "quality"
            break
        else:
            print("\nYou must select only one menu choice from above by typing the letter. Please try again.")

    while True:
        choice2 = input("\nCharacteristic 2: ").lower().strip()
        if choice2 == "a":
            choice2 = "fixed acidity"
            break
        if choice2 == "b":
            choice2 = "volatile acidity"
            break
        if choice2 == "c":
            choice2 = "citric acid"
            break
        if choice2 == "d":
            choice2 = "residual sugar"
            break
        if choice2 == "e":
            choice2 = "chlorides"
            break
        if choice2 == "f":
            choice2 = "free sulfur"
            break
        if choice2 == "g":
            choice2 = "dioxide"
            break
        if choice2 == "h":
            choice2 = "total sulfur dioxide"
            break
        if choice2 == "i":
            choice2 = "density"
            break
        if choice2 == "j":
            choice2 = "pH"
            break
        if choice2 == "k":
            choice2 = "sulphates"
            break
        if choice2 == "l":
            choice2 = "alcohol"
            break
        if choice2 == "m":
            choice2 = "quality"
            break
        else:
            print("\nYou must select only one menu choice from above by typing the letter. Please try again.")


    while True:
        wine_choice = input("\nWould like to test for red or white wine? (enter 'red' or 'white'): ").strip().lower()

        if wine_choice == "red":
            try:
                WineCharX = choice1
                WineCharY = choice2
                allWines = pd.read_csv('winequality-both.csv', sep=',', header=0)
                red = allWines.loc[allWines['type'] == 'red', :]

                getCorr = scipy.stats.pearsonr(red[WineCharX], red[WineCharY])
                correlation = str(getCorr[0])
                pValue = str(getCorr[1])
                print("\nFor red wine, the correlation between " + WineCharX + " and " + WineCharY + " is: " + correlation)
                print("With p-value of: " + pValue)

                seaborn.lmplot(x=WineCharX, y=WineCharY, data=red)
                plt.xlabel(WineCharX)
                plt.ylabel(WineCharY)
                plt.title("Red Wine: " + WineCharX + " X " + WineCharY)
                plt.show()

            except (KeyError) as e:
                print("\nError. Please check that your spelling is correct of the wine characteristic you wish to test.")
            break

        if wine_choice == "white":
            try:
                WineCharX = choice1
                WineCharY = choice2
                allWines = pd.read_csv('winequality-both.csv', sep=',', header=0)
                white = allWines.loc[allWines['type'] == 'white', :]

                getCorr = scipy.stats.pearsonr(white[WineCharX], white[WineCharY])
                correlation = str(getCorr[0])
                pValue = str(getCorr[1])
                print("\nFor white wine, the correlation between " + WineCharX + " and " + WineCharY + " is: " + correlation)
                print("With p-value of: " + pValue)

                seaborn.lmplot(x=WineCharX, y=WineCharY, data=white)
                plt.xlabel(WineCharX)
                plt.ylabel(WineCharY)
                plt.title("White Wine: " + WineCharX + " X " + WineCharY)
                plt.show()

            except (KeyError) as e:
                print("\nError. Please check that your spelling is correct of the wine characteristic you wish to test.")
            break

        if wine_choice != "red" or wine_choice != "white":
            print("\nYou must enter either 'red' or 'white' based on which wine you want to test associations for. Please try again.")

    while True:
        after = input("\nWould you like to test more associations or return to the main menu? (enter 'test' or 'main'): ").lower().strip()
        if after == "test":
            association_userInput()
            break
        if after == "main":
            break
        else:
            print("\nYou must enter either 'test' or 'main' based on what you want to do. Please try again.")

示例#42

0

显示文件

文件： good.py 项目： Teanlouise/Summer-Olympics-Analysis

medals_all = round(
    medals_all.groupby(['NOC', 'Country']).Medal_Perc.mean(),
    2).reset_index()  #remove season
medals_all.columns = ['NOC', 'Country', 'Medal_Perc']  # remove season
host_medals = games_total_df[['Year', 'Host_NOC',
                              'Host_Medal_Perc']]  #remove season, games
host_medals.columns = ['Year', 'NOC', 'Host_Medal_Perc']  #remove season, games
host_difference = pd.merge(host_medals, medals_all, how='left')

print(host_difference)
print(noc_total_df)

# Plot of difference with hosting
facet = sns.lmplot(data=host_difference,
                   x='Medal_Perc',
                   y='Host_Medal_Perc',
                   robust=True,
                   palette=['C1'])
plt.plot([0, 15], [0, 15], 'black', linewidth=2, linestyle='dashed')
facet.ax.set_xticks(np.arange(0, 15, 2.5))
facet.ax.set_yticks(np.arange(0, 36, 2.5))
plt.text(8, 7, 'x=y')
facet.ax.ticklabel_format(useOffset=False)
facet.ax.set_xlim(left=0)
facet.ax.set_ylim(bottom=0)
plt.title('The difference in percentage of medals won by host countries')
plt.show()

# Get the top 20 countries
noc_colors = sns.color_palette("Paired", n_colors=11)
noc_colors[-1] = (0.0, 0.0, 0.0)

示例#43

0

显示文件

文件： fisher_test_reads.py 项目： okushnir/PhD

def main():
    # input_dir = "/Users/odedkushnir/Google Drive/Studies/PhD/Stretch_analysis"
    mutation_lst = ["A>G", "T>C", "G>A", "C>T"] # ["A>G", "T>C", "G>A", "C>T", "A>C", "T>G", "A>T", "T>A", "G>C", "C>G", "C>A", "G>T"]
    input_dir = "C:/Users/odedku/Stretch_analysis"#.format(mutation.replace(">", ""))
    for mutation in mutation_lst:
        # mutation = "A>G"
        mutation_in_stretch = 13
        output_dir = input_dir + "_{0}".format(mutation.replace(">", ""))
        try:
            os.mkdir(output_dir)
        except OSError:
            print("Creation of the directory {0} failed".format(output_dir))
        else:
            print("Successfully created the directory {0}".format(output_dir))

        prefix = "20201012_q38/all_parts.blast"
        p2_1 = pd.read_table(input_dir + "/p2_1/{0}".format(prefix), sep="\t")
        p2_2 = pd.read_table(input_dir + "/p2_2/{0}".format(prefix), sep="\t")
        p5_1 = pd.read_table(input_dir + "/p5_1/{0}".format(prefix), sep="\t")
        p5_2 = pd.read_table(input_dir + "/p5_2/{0}".format(prefix), sep="\t")
        p8_1 = pd.read_table(input_dir + "/p8_1/{0}".format(prefix), sep="\t")
        p8_2 = pd.read_table(input_dir + "/p8_2/{0}".format(prefix), sep="\t")
        p10_1 = pd.read_table(input_dir + "/p10_1/{0}".format(prefix), sep="\t")
        p10_2 = pd.read_table(input_dir + "/p10_2/{0}".format(prefix), sep="\t")
        p12_1 = pd.read_table(input_dir + "/p12_1/{0}".format(prefix), sep="\t")
        p12_2 = pd.read_table(input_dir + "/p12_2/{0}".format(prefix), sep="\t")
        barcode_data = pd.read_csv(input_dir + "/barcode/PrimerID_barcode_Results.csv")
        # Dictionary of passage and number of PrimerID
        data_dict = {"p2_1": [p2_1, 23507], "p2_2": [p2_2, 38726], "p5_1": [p5_1, 17903], "p5_2": [p5_2, 12395],
                     "p8_1": [p8_1, 8666], "p8_2": [p8_2, 9990], "p10_1": [p10_1, 6068], "p10_2": [p10_2, 40623],
                     "p12_1": [p12_1, 9668], "p12_2": [p12_2, 11110]}
        control_id = 27962
        """NOT from memory"""
        passage_lst = glob.glob(input_dir + "/p*")
        for passage in passage_lst:
            passage_num = passage.split("\\")[-1]
            try:
                os.mkdir(output_dir + "/{0}".format(passage_num))
                os.mkdir(output_dir + "/{0}/20201012_q38".format(passage_num))
            except OSError:
                print("Creation of the directory {0}/{1}/20201012_q38 failed".format(output_dir, passage_num))
            else:
                print("Successfully created the directory {0}/{1}/20201012_q38".format(output_dir, passage_num))
        create_crosstab_df(input_dir, output_dir, prefix, data_dict, control_id, mutation, mutation_in_stretch)

        """from memory"""
        passage_lst = glob.glob(input_dir + "/p*")
        crosstab_lst = []
        for passage in passage_lst:
            passage_num = passage.split("\\")[-1]
            crosstab_df = pd.read_pickle(output_dir + "/{0}/20201012_q38/corsstab_df.pkl".format(passage_num))
            crosstab_lst.append(crosstab_df)
        """Creation of the final tables and figs"""
        crosstab_df_all = pd.concat(crosstab_lst, axis=1)
        crosstab_df_all = crosstab_df_all[
            ["Control", "p2_1", "p2_2", "p5_1", "p5_2", "p8_1", "p8_2", "p10_1", "p10_2", "p12_1", "p12_2"]]
        crosstab_df_all = crosstab_df_all.iloc[0:4, 9:]
        crosstab_df_all = crosstab_df_all.transpose()
        crosstab_df_all["Stretch_percentage"] = crosstab_df_all["No._of_reads_with_stretch_{0}".format(mutation)] / \
                                                (crosstab_df_all["No._of_reads_with_stretch_{0}".format(mutation)] +
                                                 crosstab_df_all["No._of_reads_without_stretch_{0}".format(mutation)])
        crosstab_df_all["Stretch_percentage"] = crosstab_df_all["Stretch_percentage"] * 100
        crosstab_df_all.reset_index(inplace=True, drop=False)
        crosstab_df_all = crosstab_df_all.rename(columns={"index": "Sample"})
        crosstab_df_all = crosstab_df_all.merge(barcode_data, on="Sample", how="inner")
        crosstab_df_all["Hyper mutation read frequency/sequenced genome"] = crosstab_df_all["Stretch_percentage"] / \
                                                                            crosstab_df_all["PrimerID_barcode"]
        crosstab_df_all["Hyper mutation read frequency/sequenced genome"] = crosstab_df_all[
            "Hyper mutation read frequency/sequenced genome"].astype(float)
        crosstab_df_all["passage"] = np.where(crosstab_df_all["Sample"] != "Control",
                                              crosstab_df_all.apply(lambda x: str(x["Sample"]).split("_")[0].split("p")[-1],
                                                                    axis=1), 0)
        crosstab_df_all["replica"] = np.where(crosstab_df_all["Sample"] != "Control",
                                              crosstab_df_all.apply(lambda x: str(x["Sample"]).split("_")[-1], axis=1), 1)
        crosstab_df_all["passage"] = crosstab_df_all["passage"].astype(int)
        crosstab_df_all.to_csv(output_dir + "/crosstab_df_all.csv", sep=",")
        mean_crosstab_df_all = crosstab_df_all.groupby("passage", as_index=False).mean()
        mean_crosstab_df_all["sem"] = crosstab_df_all.groupby("passage", as_index=False).sem()[
            "Hyper mutation read frequency/sequenced genome"]
        mean_crosstab_df_all["PrimerID_barcode"] = round(mean_crosstab_df_all["PrimerID_barcode"])
        mean_crosstab_df_all.to_csv(output_dir + "/mean_crosstab_df_all.csv", sep=",")

        try:
            os.mkdir(output_dir + "/figs")
        except OSError:
            print("Creation of the directory {0}/figs failed".format(output_dir))
        else:
            print("Successfully created the directory {0}/figs".format(output_dir))
        crosstab_df = pd.read_pickle(output_dir + "/{0}/20201012_q38/corsstab_df.pkl".format(passage_num))
        crosstab_lst.append(crosstab_df)
        slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(crosstab_df_all['passage'],
                                                                            crosstab_df_all[
                                                                                'Stretch_percentage'])
        fig1 = sns.lmplot(x="passage", y="Stretch_percentage", data=crosstab_df_all, fit_reg=True,
                          line_kws={'label': "Linear Reg"}, )
        fig1.set(xlabel="Passage", ylabel="Stretch Percentage [%]", xlim=(0, 12))
        ax = fig1.axes[0, 0]
        ax.legend()
        leg = ax.get_legend()
        leg._loc = 2
        L_labels = leg.get_texts()
        label_line_1 = "y={0:.3g}x+{1:.3g}\nstderr={2:.3g} Rsq={3:.3g}".format(slope1, intercept1, std_err1, r_value1 ** 2)
        L_labels[0].set_text(label_line_1)
        plt.savefig(output_dir + "/figs/points.png", dpi=300)


        slope2, intercept2, r_value2, p_value2, std_err2 = stats.linregress(mean_crosstab_df_all['passage'],
                                                                            mean_crosstab_df_all[
                                                                                'Stretch_percentage'])
        fig2 = sns.lmplot(x="passage", y="Stretch_percentage", data=mean_crosstab_df_all, fit_reg=True,
                          line_kws={'label': "Linear Reg"}, )
        fig2.set(xlabel="Passage", ylabel="Stretch Percentage [%]", xlim=(0, 12))
        ax = fig2.axes[0, 0]
        ax.legend()
        leg = ax.get_legend()
        leg._loc = 2
        L_labels = leg.get_texts()
        label_line_2 = "y={0:.3g}x+{1:.3g}\nstderr={2:.3g} Rsq={3:.3g}".format(slope2, intercept2, std_err2, r_value2 ** 2)
        L_labels[0].set_text(label_line_2)
        plt.savefig(output_dir + "/figs/mean.png", dpi=300)

示例#44

0

显示文件

df['Hour'] = df['timeStamp'].apply(lambda time: time.hour)
df['Month'] = df['timeStamp'].apply(lambda time: time.month)
df['Day of Week'] = df['timeStamp'].apply(lambda time: time.dayofweek)

dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df['Day of Week'] = df['Day of Week'].map(dmap)

sns.countplot(x='Day of Week',data=df,hue='Reason',palette='viridis')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

sns.countplot(x='Month',data=df,hue='Reason',palette='viridis')
byMonth = df.groupby('Month').count()
byMonth.head()
byMonth['twp'].plot()

sns.lmplot(x='Month',y='twp',data=byMonth.reset_index())
df['Date']=df['timeStamp'].apply(lambda t: t.date())

df.groupby('Date').count()['twp'].plot()
plt.tight_layout()

df[df['Reason']=='Traffic'].groupby('Date').count()['twp'].plot()
plt.title('Traffic')
plt.tight_layout()

df[df['Reason']=='Fire'].groupby('Date').count()['twp'].plot()
plt.title('Fire')
plt.tight_layout()

df[df['Reason']=='EMS'].groupby('Date').count()['twp'].plot()
plt.title('EMS')

示例#45

0

显示文件

文件： plt_figsize.py 项目： Godcomplex11/DU

catMTCARS = ['gear','cyl','am','carb','vs']
mtcars[catMTCARS] = mtcars[catMTCARS].astype('category')

plt.figure(figsize=(5,2))
sns.countplot(data=mtcars, x='gear')

plt.figure(figsize=(3,5))
sns.countplot(data=mtcars, x='gear')
#---
#needs one numeric
g = sns.catplot(data=mtcars, x='gear', y='mpg', hue='am')
g.fig.set_figheight(6)
g.fig.set_figheight(3)

#---
sns.lmplot()), use the size and aspect

#
sns.catplot(data=mtcars, x='gear', y='mpg',  hue='am', height=5, aspect=1/1)


#
sns.countplot(data=mtcars, x='gear')
plt.gcf().set_size_inches(4, 3)

#
fig, ax = plt.subplots()
# the size of A4 paper
fig.set_size_inches(5, 4)
sns.violinplot(data=mtcars[['mpg','wt']], inner="points", ax=ax)    
sns.despine()

示例#46

0

显示文件

def single_data(df):
    sns.lmplot('square', 'price', df, fit_reg=True)
    plt.show()
    print(df.head())
    print(df.info())

示例#47

0

显示文件

geo = geo[f1]

geo # Dataframe of regions

lifeEx # Dataframe of life expectancy 

dataset1 = lifeEx.merge(geo, how = 'inner',on ='CountryCode')
type(dataset1)

stats.columns = ['CountryName', 'CountryCode', 'BirthRate', 'InternetUsers','IncomeGroup']
type(dataset1)

merged_dataset = stats.merge(dataset1, how = 'inner',on ='CountryCode')
merged_dataset

# Create the Final Dataset 
final_dataset = merged_dataset[['CountryName_x','CountryCode','BirthRate','InternetUsers','IncomeGroup',1960,2013,'Region']]
final_dataset.columns = ['CountryName','CountryCode','BirthRate','InternetUsers','IncomeGroup','Year_1960','Year_2013','Region']
final_dataset

# Visualizations

# Regression Plot : Life Expectancy in 1960 vs BirthRate per Region
vis3 = sns.lmplot(x='BirthRate',y='Year_1960', data = final_dataset ,fit_reg=False, hue = 'Region', height = 10, aspect = 1)

# Regression Plot : Life Expectancy in 2013 vs BirthRate per Region
vis3 = sns.lmplot(x='BirthRate',y='Year_2013', data = final_dataset ,fit_reg=False, hue = 'Region', height = 10, aspect = 1)

# Regression Plot : BirthRate vs Internet Users per Region 
vis3 = sns.lmplot(x='BirthRate',y='InternetUsers', data = final_dataset ,fit_reg=False, hue = 'Region', height = 10, aspect = 1)

示例#48

0

显示文件

文件： multiple_plot.py 项目： durbek-kosimov/python-course

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')
%matplotlib inline

sns.lmplot('weight', 'mpg', data=df, fit_reg=False, aspect=1, size=5, hue='cylinders', col='origin')
plt.show()

示例#49

0

显示文件

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np

sns.set()

df = pd.read_csv("times_bt_podas_mejor_caso.csv")
df_p = pd.read_csv("times_bt_podas_peor_caso.csv")

# Nano -> Milli
df['time'] /= 1000000.0
df_p['time'] /= 1000000.0
df['peor'] = df_p['time']

df.plot('n', ['time', 'peor'], title='Mejor vs Peor')

r = np.corrcoef(df['time'], df['peor'])[0, 1]
print(r)
#out: r = 0.9366349410059747

ax1 = sns.lmplot(x='time', y='peor', data=df)
plt.xlabel("N")
plt.ylabel("tiempo")

plt.show()

示例#50

0

显示文件

文件： firstTF_3.py 项目： kinam-park/TensorFlowStudy

        vectors_set.append(
            [np.random.normal(3.0, 0.5),
             np.random.normal(1.0, 0.5)])

import matplotlib.pyplot as plt
# 데이터 조작 패키지
import pandas as pd
# 시각화 패키지
import seaborn as sns

#난수 데이터 그래프
df = pd.DataFrame({
    "x": [v[0] for v in vectors_set],
    "y": [v[1] for v in vectors_set]
})
sns.lmplot("x", "y", data=df, fit_reg=False, size=6)
plt.show()

# 4개의 군집으로 그룹화하는 K-평균 알고리즘
import tensorflow as tf

# 무작위 데이터를 가지고 상수 텐서를 생성
vectors = tf.constant(vectors_set)
# 입력데이터에서 무작위로 K개의 데이터를 선택하는 방법 => 텐서플로가 무작위로 섞어서 K개의 중심을 선택하게 함
# K개의 데이터 포인트는 2D텐서로 저장됨
k = 4
centroids = tf.Variable(tf.slice(tf.random_shuffle(vectors), [0, 0], [k, -1]))

# 텐서 구조 확인
print vectors.get_shape()
print centroids.get_shape()

示例#51

0

显示文件

文件： BigMartSales.py 项目： sayandeep87/Big-smart-sale

     # No outlier , No need any operation for Item_Weight
     #full.loc[full['Item_Weight'].isin(outlier),'Item_Weight']=full['Item_Weight'].mean()

     #Item_Outlet_Sales
     BoxPlot=boxplot(full[0:8522]['Item_Outlet_Sales'])
     outlier= BoxPlot['fliers'][0].get_data()[1]
     full.loc[full['Item_Outlet_Sales'].isin(outlier),'Item_Outlet_Sales']=full[0:8522]['Item_Outlet_Sales'].mean()
    

#-----------------Step 5:Exploration analysis of data---------------------------------------------------------

        # Create photocopy of  trian portion of full and assign it full1
        full1=full[0:8522].copy()
        
       # Analying relation between Item_Weight & Item_Outlet_Sales
        sns.lmplot(x='Item_Weight', y='Item_Outlet_Sales', data=full1)
        
       # Analying relation between Item_MRP & Item_Outlet_Sales
        sns.lmplot(x='Item_MRP', y='Item_Outlet_Sales', data=full1)
        
     # Analying relation between Item_Visibility & Item_Outlet_Sales
       full2= full1[(full1['Item_MRP']>=240) & (full1['Item_MRP']<=241)]
       
       sns.lmplot(x='Item_Visibility', y='Item_Outlet_Sales', data=full2)
       
   
      # Analying relation between Item_Id & Item_Outlet_Sales
      # Retrieve numeric part of Item_Identifier and create new column
       full1['Item_Id'] = full1['Item_Identifier'].str[3:].astype(int)
       full2= full1[(full1['Item_MRP']>=240) & (full1['Item_MRP']<=241)]

示例#52

0

显示文件

        print("Grafica para ver que genero de pelicula obtuvo mas likes en facebook.\n")
        print(separador)
        df.groupby('genres')['movie_facebook_likes'].sum().plot(kind='barh',legend='Reverse',color="green")
        plt.xlabel("Suma de likes")
        plt.show()
    elif opcion =="2":
        print("Grafica para ver el promedio de ganancias.\n")
        print(separador)
        df.gross.groupby(df.genres).mean().plot(kind='pie',cmap="Paired")
        plt.axis("equal")
        plt.ylabel("")
        plt.title("Promedio de ganancias")
        plt.show()
    elif opcion =="3":
        print("Grafica para comparar el presupuesto con la calificacion de la pelicula.\n")
        print(separador)
        df.groupby('budget')['imdb_score'].sum().plot(kind='bar',legend='Reverse',color="Black")
        plt.xlabel("Presupuesto")
        plt.ylabel("Calificación")
        plt.show()
    elif opcion =="4":
        print("Grafica de Dispercion para ver la pelicula con mas likes.\n")
        print(separador)
        sns.lmplot(x="num",y="movie_facebook_likes",data=df,fit_reg=False,hue="num",legend=False,palette="Paired")
        plt.show()
    elif opcion =="5":
        darInicio=False
    else:
        print("Debes de elegir una opción valida\n ")
else:
    print("Programa Terminado.")

示例#53

0

显示文件

        https://en.wikipedia.org/wiki/Median_absolute_deviation 
        http://stackoverflow.com/questions/8930370/where-can-i-find-mad-mean-absolute-deviation-in-scipy
    """
    arr = np.ma.array(
        arr).compressed()  # should be faster to not use masked arrays.
    med = np.median(arr)
    return np.median(np.abs(arr - med))


#-------------------------------------------------------------------------------
# Main program.
#-------------------------------------------------------------------------------
if __name__ == "__main__":
    df = pd.read_csv('./customers.csv')
    print((df.columns))
    sns_plot = sns.lmplot(df.columns[0], df.columns[1], data=df, fit_reg=False)

    sns_plot.axes[0, 0].set_ylim(0, )
    sns_plot.axes[0, 0].set_xlim(0, )

    sns_plot.savefig("s_scaterplot.png", bbox_inches='tight')
    sns_plot.savefig("s_scaterplot.pdf", bbox_inches='tight')

    data = df.values.T[1]

    print((("Mean: %f") % (np.mean(data))))
    print((("Median: %f") % (np.median(data))))
    print((("Var: %f") % (np.var(data))))
    print((("std: %f") % (np.std(data))))
    print((("MAD: %f") % (mad(data))))

示例#54

0

显示文件

#fig, axes = plt.subplots(1, 3)

# plot learning rate vs CRPS
#ax = sns.lmplot(x="hp_learning_rate", y="metric_CRPS", hue="task", data=df,)
#ax = sns.scatterplot(data=df, x='hp_learning_rate', y='metric_CRPS', hue='task')
#ax.set(xscale="log")
#ax.set_xlabel("x (learning rate)")
#ax.set_ylabel("y")

height = 4
aspect = 1.2
ax = sns.lmplot(x="hp_learning_rate",
                y="metric_CRPS",
                hue="task",
                ci=None,
                data=df,
                height=height,
                aspect=aspect,
                legend_out=False,
                fit_reg=False)
ax.set(xscale="log", yscale="log")
ax.ax.set_ylim(0.02, )
ax.ax.set_xlabel("x (learning rate)")
ax.ax.set_ylabel("y")

plt.tight_layout()
plt.savefig("y_plot.jpg")
plt.show()

# plot learning rate vs CRPS mapped through psi = Phi^{-1} o F
for task in df.task.unique():

示例#55

0

显示文件

文件： flowers.py 项目： Gentyk/Scientific-Research-Work

from sklearn import svm
import pandas
import matplotlib.pyplot as plt
import seaborn as sns
df = pandas.read_csv('flowers.csv')
df.columns = ['X1', 'X2', 'X3', 'X4', 'Y']
df.head()

from sklearn.model_selection import train_test_split
support = svm.SVC()
X = df.values[:, 0:2]
Y = df.values[:, 4]
#print(Y)
trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.3)

support.fit(trainX, trainY)
print('Accuracy: \n', support.score(testX, testY))
pred = support.predict(testX)
print("!")

sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
sns.lmplot('X1', 'X2', scatter=True, fit_reg=False, data=df, hue='Y')
plt.ylabel('X2')
plt.xlabel('X1')
plt.show()

示例#56

0

显示文件

                  ress_HC_meta]).reset_index(drop=True)
sns.pairplot(ress, hue="Label")

#------------------------------------------------------------------------------
from scipy.stats import spearmanr
import seaborn as sns

automl = AutoML_Regression()

lasso_best, _, _, _ = automl.XGBoost(X_train, y_train, X_test, y_test)
lasso_best.fit(X_train, y_train)
y_pred = lasso_best.predict(X_test)

dt = {"True RRS_Brooding": y_test, "Predicted RRS_Brooding": y_pred}
df = pd.DataFrame(dt)
g = sns.lmplot(x="True RRS_Brooding", y="Predicted RRS_Brooding", data=df)
g.set(ylim=(min(y_test), max(y_test)))
g.set(xlim=(min(y_test), max(y_test)))
plt.text(-3.9,
         max(y_test) - 1, r'MSE = %.2f' % (mean_squared_error(y_test, y_pred)))
plt.text(-3.9,
         max(y_test) - 2, r'Corr = %.2f' % (spearmanr(y_test, y_pred)[0]))

plt.scatter(y_pred, y_test, s=8)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], '--k')
plt.xlim(min(y_test), max(y_test))
plt.ylim(min(y_test), max(y_test))
plt.ylabel('True RRS_Brooding')
plt.xlabel('Predicted RRS_Brooding')
#plt.text(s='Random Forest without Forward varible', x=1,
#            y=2, fontsize=12, multialignment='center')

示例#57

0

显示文件

文件： SBIR+Fase+2+-+python+trend+analyse_BB.py 项目： BasBoek/SBIR2

testje.rolling(3).mean().plot(figsize=(20,10), linewidth=5, fontsize=20)
plt.xlabel('Date', fontsize=20)


# In[29]:
testje.diff().plot(figsize=(20,10),linewidth=5,fontsize=20)
plt.xlabel('Date',fontsize=20)


# In[30]:
import seaborn as sns
sns.set()


# In[31]:
sns.lmplot(x='Br_Mean', y='Gr_Mean',fit_reg=False, data=tableJoin, hue='OBJECTID')


# In[32]:
tableJoin.corr()


# In[33]:
tableJoin.groupby(['OBJECTID']).corr()


# In[34]:
test[["Br_Mean","Gr_Mean","We_Mean"]].diff().plot(figsize=(20,10),linewidth=5,fontsize=20)
plt.xlabel('Date',fontsize=20)

示例#58

0

显示文件

plot = sns.catplot(x="BldgType", y="SalePrice", data=df, kind="boxen")
plot.savefig(path)

# Still, the type of a dwelling seems like it should be important information.
# Investigate whether BldgType produces a significant interaction with either of the following:

# GrLivArea  - Above ground living area
# MoSold     - Month sold

feature = "GrLivArea"
path = "../../../data/kaggleTutorials/output/figures/miE3.png"
plot = sns.lmplot(
    x=feature,
    y="SalePrice",
    hue="BldgType",
    col="BldgType",
    data=df,
    scatter_kws={"edgecolor": 'w'},
    col_wrap=3,
    height=4,
)
plot.savefig(path)

print(mi_scores.head(10))

# Do you recognize the themes here? Location, size, and quality.
# You needn't restrict development to only these top features,
# but you do now have a good place to start.
# Combining these top features with other related features,
# especially those you've identified as creating interactions,
# is a good strategy for coming up with a highly informative set of features to train your model on.

示例#59

0

显示文件

# Boxplot for tip by sex
sns.boxplot(x='sex', y='tip', data=tips)
plt.show()

# Scatter plot of total_bill and tip
sns.regplot(x='total_bill', y='tip', data=tips)
plt.show()

############################################
# Facet plots in Seaborn

import seaborn as sns
import matplotlib.pyplot as plt

# Scatter plot of total_bill and tip faceted by smoker and colored by sex
sns.lmplot(x='total_bill', y='tip', data=tips, hue='sex', col='smoker')
plt.show()

# FacetGrid of time and smoker colored by sex
facet = sns.FacetGrid(tips, col="time", row='smoker', hue='sex')

# Map the scatter plot of total_bill and tip to the FacetGrid
facet.map(plt.scatter, 'total_bill', 'tip')
plt.show()

############################################
# Univariate and Bivariate plots in Matplotlib

import matplotlib.pyplot as plt

# Univariate histogram

示例#60

0

显示文件

文件： Visualization_and_Modeling_for_Stock_Market_News_Headline_Classification_v5.py 项目： rbarrow2727/stockheadlines

X = pd.to_datetime(CATdf.Date)
y = CATdf.Close

#plot
plt.plot(X, y)
plt.gcf().autofmt_xdate()
plt.show()

# In[32]:

#Linear Plot of Volume and HLcat on Market Up/Down
#illustrates low volatilty days more liekly to finish net positive
#also higher volume on low and high volatility days more likely to finish net positive

sns.lmplot('Volume', 'NetUpDown', data=CATdf, hue='HLcat')

# In[33]:

#Graph DJIA Close with HLdiffernce and Volume for insight

index = pd.read_csv('djia_df_cat.csv')

index.Date = pd.to_datetime(index.Date)
plt.figure(figsize=(10, 8))
plt.plot(index.Date, index.Close, label="DJIA closing price")
plt.plot(index.Date, index.HLdifference * 10, label="HLDifference")
#scale volume for readability
plt.plot(index.Date, index.Volume / 100000, label="Volume")
plt.legend()
plt.title("DJIA stocks")