Exemplo n.º 1
0
def log_plot():
    start = int(np.floor(np.log(min(median)) / np.log(10))) + 3
    end = int(np.ceil(np.log(max(median)) / np.log(10))) + 3

    xs = []
    ticks = []
    for i in range(start, end + 1):
        xs.append(10 ** (i - 3))

        if i % 3 == 0:
            ticks.append('{}s'.format(prefix[i / 3]))
        else:
            ticks.append(str(10 ** (i % 3)))

    plt.barh(pos, median, align='center', height=0.25, left=1e-3,
             color=bar_color, lw=0)
    plt.errorbar(median, pos, ecolor=error_bar_color, fmt=None, xerr=err)

    plt.grid(True)
    plt.xlabel('Time')
    plt.xlim(min(xs), max(xs))
    plt.xscale('log')
    plt.xticks(xs, ticks)
    plt.ylim(ymax=size)
    plt.yticks(pos, language)

    plt.twinx()
    plt.ylim(ymax=size)
    plt.yticks(pos, relative)

    plt.savefig('plots/{}.png'.format(pid), bbox_inches='tight')
    plt.clf()
Exemplo n.º 2
0
    def as_pyplot_figure(self, label=1, **kwargs):
        """Returns the explanation as a pyplot figure.

        Will throw an error if you don't have matplotlib installed
        Args:
            label: desired label. If you ask for a label for which an
                   explanation wasn't computed, will throw an exception.
            kwargs: keyword arguments, passed to domain_mapper

        Returns:
            pyplot figure (barchart).
        """
        import matplotlib.pyplot as plt
        exp = self.as_list(label, **kwargs)
        fig = plt.figure()
        vals = [x[1] for x in exp]
        names = [x[0] for x in exp]
        vals.reverse()
        names.reverse()
        colors = ['green' if x > 0 else 'red' for x in vals]
        pos = np.arange(len(exp)) + .5
        plt.barh(pos, vals, align='center', color=colors)
        plt.yticks(pos, names)
        plt.title('Local explanation for class %s' % self.class_names[label])
        return fig
Exemplo n.º 3
0
def plot_errsh():


    results = Control_results;

    fig, ax = plt.subplots()


    #results
    rects_train = plt.barh(ind,results['train_errs'], width,
                    color = 'b',
                    alpha = opacity,
                    xerr =results['train_errs_std']/np.sqrt(10),
                    label = '$train$');
    rects_test = plt.barh(ind+width,results['test_errs'], width,
                    color = 'r',
                    alpha = opacity,
                    xerr =results['test_errs_std']/np.sqrt(10),
                    label = 'test');

    
    plt.ylabel('Performance (Error)');
    plt.title('Error (MSE)')
    plt.yticks(ind+width, Datasets);
    plt.legend();

    #plot and save
    plt.tight_layout();
    plt.savefig('errs'+'.png');
    plt.show();
Exemplo n.º 4
0
def plot(data, reverse=False):
    if reverse:
        data.reverse()
    r = range(len(data))
    plt.barh(r, [d[1] for d in data])
    plt.yticks(r, [d[0] for d in data])
    plt.show()
Exemplo n.º 5
0
def make_entity_plot(filename, title, fixed_noip, fixed_ip, dynamic_noip, dynamic_ip):
    plt.figure(figsize=(12,5))

    plt.title("Settings comparison - " + title)
    
    plt.xlabel('Time (ms)', fontsize=12)
    plt.xlim([0,62000])

    x = 0
    barwidth = 0.5
    bargroupspacing = 1.5

    fixed_noip_mean,fixed_noip_conf = conf_stats(fixed_noip)
    fixed_ip_mean,fixed_ip_conf = conf_stats(fixed_ip)
    dynamic_noip_mean,dynamic_noip_conf = conf_stats(dynamic_noip)
    dynamic_ip_mean,dynamic_ip_conf = conf_stats(dynamic_ip)

    values = [fixed_noip_mean,fixed_ip_mean,dynamic_noip_mean, dynamic_ip_mean]
    errs = [fixed_noip_conf,fixed_ip_conf,dynamic_noip_conf, dynamic_ip_conf]

    y_pos = numpy.arange(len(values))
    plt.barh(y_pos, values, xerr=errs, align='center', color=['r', 'b', 'r', 'b'],  ecolor='black', alpha=0.7)
    plt.yticks(y_pos, ["Fixed | no I.P.", "Fixed | I.P.", "Dynamic | no I.P.", "Dynamic | I.P."])
    plt.savefig(output_file(filename))
    plt.clf()
Exemplo n.º 6
0
def pylot_show():
  sql = 'select * from douban;'  
  cur.execute(sql)
  rows = cur.fetchall()   # 把表中所有字段读取出来
  count = []   # 每个分类的数量
  category = []  # 分类

  for row in rows:
    count.append(int(row[2]))   
    category.append(row[1])
    y_pos = np.arange(len(category))    # 定义y轴坐标数

    #color = cm.jet(np.array(2)/max(count))

    plt.barh(y_pos, count, color='y', align='center', alpha=0.4)  # alpha图表的填充不透明度(0~1)之间
    plt.yticks(y_pos, category)  # 在y轴上做分类名的标记
    plt.grid(axis = 'x')

  for count, y_pos in zip(count, y_pos):
    # 分类个数在图中显示的位置,就是那些数字在柱状图尾部显示的数字
    plt.text(count+3, y_pos, count,  horizontalalignment='center', verticalalignment='center', weight='bold')  
    plt.ylim(+28.0, -2.0) # 可视化范围,相当于规定y轴范围
    plt.title('douban_top250')   # 图表的标题   fontproperties='simhei'
    plt.ylabel('movie category')     # 图表y轴的标记
    plt.subplots_adjust(bottom = 0.15) 
    plt.xlabel('count')  # 图表x轴的标记
    #plt.savefig('douban.png')   # 保存图片
  plt.show()
Exemplo n.º 7
0
Arquivo: photo.py Projeto: shch/weibo
def followsPicture(mp):

    val = []
    label = []
    lst0 = mp.keys()
    #print lst0
    lst1 = mp.values()
    if len(lst1) > 10:
        num = 10
    else:
	num = len(lst1)
    while (num != 0):
        i = lst1.index(max(lst1))
	label.append(lst0[i])
	#print type(lst1[i])
	val.append(int(lst1[i]))
	lst0.pop(i)
        lst1.pop(i)
        num -= 1

    pos = np.arange(10) + .5
    plt.figure(1)
    plt.barh(pos,val,align='center')
    plt.yticks(pos,label)
    plt.xlabel(u'粉丝数目')
    string = u"统计人数:" + str(len(mp.keys()))  
    plt.title(string)
    plt.show()	
def draw_who_by_others():
    table = []
    with open('analysis/who_by_others.csv', 'rb') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for row in reader:
            if not row:
                continue
            lang = row[0]
            mention_val = float(row[1])
            if not lang in tiobe_values:
                continue
            tiobe_val = tiobe_values[lang]
            if not tiobe_val:
                continue
            if not subreddit_has_alias[lang]:
                continue
            table.append((lang, mention_val / tiobe_val))
    print table
    table.sort(key=operator.itemgetter(0), reverse=True)
    print table
    langs, vals = zip(*table)

    y_pos = np.arange(len(langs))

    plt.barh(y_pos, vals, align='center', alpha=0.4)
    plt.yticks(y_pos, langs)
    plt.title('mentioned by others relative to tiobe value')
    plt.savefig('img/mentions_relative_to_tiobe.png', bbox_inches='tight')
Exemplo n.º 9
0
def barGraph(namesByYear):  # Bargraph generator
    """Plotting function used to create bar graphs."""
    plt.title('Births By Name For Input Year')
    plt.xlabel('Births')
    plt.yticks(range(len(namesByYear), 0, -1), [n for (n, t) in namesByYear])
    plt.barh(range(len(namesByYear), 0, -1), [t for (n, t) in namesByYear])
    plt.show()
Exemplo n.º 10
0
def StyleBar(cur, styleval, stylethresh):
    ratingsarray, BeerStyles, BeerStyle_count = \
    StyleRatings(cur, styleval, stylethresh)
    # print(BeerStyles)
    # print(BeerStyle_count)
    # print(ratingsarray)
    n = len(BeerStyles)
    if n < 2:
        print("Too few things are rated, decrease stylethresh")
        return 1
    style_averages, style_std, style_min, style_max = \
    RatingsArrayIntoStats(ratingsarray)
    y_pos = 1.5*np.arange(len(BeerStyles))
    plt.figure(figsize=(18, 8))
    plt.barh(y_pos, style_averages, xerr=style_std, align='center',alpha=0.4)
    plt.yticks(y_pos,BeerStyles, fontsize=16)
    plt.xticks(fontsize=16)
    plt.xlabel('Rating', fontsize=18)
    plt.title(styleval + ' Style Ratings', fontsize=20)
    plt.show()
    # Make dataframe
    ratarr = np.vstack((style_averages, style_std, BeerStyle_count, 
                        style_min, style_max))
    beerstyle_df = pd.DataFrame(data=ratarr.T, index=BeerStyles,
                            columns=['Mean', 'StDev', 'Count', 'Min', 'Max'])
    beerstyle_df['StDev'] = beerstyle_df['StDev'].map('{:,.2f}'.format)
    beerstyle_df['Mean'] = beerstyle_df['Mean'].map('{:,.2f}'.format)
    return beerstyle_df
Exemplo n.º 11
0
def plot_hist_over_category(category_names_avgs_sem_triple_list, plt_number, x_label, groups, printse):
    height_factor = 0.5
    ind = np.linspace(0,len(category_names_avgs_sem_triple_list)*height_factor, num = len(category_names_avgs_sem_triple_list))
    width = 0.25
    fig = plt.figure(figsize=(15.5, 10),dpi=800)
    plot = fig.add_subplot(111)
    plot.tick_params(axis='y', which='major', labelsize= 10 )
    plot.tick_params(axis='x', which='major', labelsize= 10 )
    length = len(category_names_avgs_sem_triple_list)
    l = 0
    it = cycle(["#CCD64B","#C951CA","#CF4831","#90D0D2","#33402A","#513864",
                "#C84179","#DA983D","#CA96C4","#53913D","#CEC898","#70D94C",
                "#CB847E","#796ACB","#74D79C","#60292F","#6C93C4","#627C76",
                "#865229","#838237"])
    color=[next(it) for i in range(length)]
    if printse:
        p1 = plt.barh(ind,  [x[1] for x in category_names_avgs_sem_triple_list], color=color,align='center', height= height_factor, xerr= [x[2] for x in category_names_avgs_sem_triple_list])
    else:
        p1 = plt.barh(ind,  [x[1] for x in category_names_avgs_sem_triple_list], color=color,align='center', height= height_factor)
    plt.yticks(ind, [x[0] for x in category_names_avgs_sem_triple_list])
    plt.xlabel(x_label)
    plt.ylabel("Categories")
    plt.subplots_adjust(bottom=0.15, left=0.14,right=0.95,top=0.95)
    plt.ylim([ind.min()- height_factor, ind.max() + height_factor])
    plt.xlim(min([x[1] for x in category_names_avgs_sem_triple_list])-height_factor, max([x[1] for x in category_names_avgs_sem_triple_list])+height_factor)
    try:
        os.makedirs(plot_path+x_label)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise
    print("da wirds gespeichert:")
    print(plot_path+x_label+"/"+str(plt_number)+"groups_"+str(groups))
    plt.savefig(plot_path+x_label+"/"+str(plt_number)+"groups_"+str(groups))

    plt.close()
Exemplo n.º 12
0
def pylot_show():
    sql = 'select * from douban;'
    cur.execute(sql)
    rows = cur.fetchall()
    count = []
    category = []

    for row in rows:
        count.append(int(row[2]))
        category.append(row[1])
    print(count)
    y_pos = np.arange(len(category))
    print(y_pos)
    print(category)
    colors = np.random.rand(len(count))
    # plt.barh()
    plt.barh(y_pos, count, align='center', alpha=0.4)
    plt.yticks(y_pos, category)
    for count, y_pos in zip(count, y_pos):
        plt.text(count, y_pos, count,  horizontalalignment='center', verticalalignment='center', weight='bold')
    plt.ylim(+28.0, -1.0)
    plt.title(u'豆瓣电影250')
    plt.ylabel(u'电影分类')
    plt.subplots_adjust(bottom = 0.15)
    plt.xlabel(u'分类出现次数')
    plt.savefig('douban.png')
Exemplo n.º 13
0
def plot_feature_importances_cancer(model):
    n_features = cancer.data.shape[1]
    plt.barh(range(n_features), model.feature_importances_, align='center')
    plt.yticks(np.arange(n_features), cancer.feature_names)
    plt.xlabel("특성 중요도")
    plt.ylabel("특성")
    plt.ylim(-1, n_features)
Exemplo n.º 14
0
def get_feature_importance_figure(estimator, feature_names):
    fig, ax = plt.subplots(figsize=(12, 8))
    y_pos = range(len(feature_names))
    plt.barh(y_pos, estimator.feature_importances_)
    ax.set_yticks(y_pos)
    ax.set_yticklabels(feature_names, fontsize=14)
    return fig
Exemplo n.º 15
0
def PlotFeaturesImportance(X,y,featureNames,dataName):
    '''
    Plot the relative contribution/importance of the features.
    Best to reduce to top X features first - for interpretability
    Code example from:
    http://bugra.github.io/work/notes/2014-11-22/an-introduction-to-supervised-learning-scikit-learn/
    '''
    gbc = GradientBoostingClassifier(n_estimators=40)
    gbc.fit(X, y)
    # Get Feature Importance from the classifier
    feature_importance = gbc.feature_importances_
    # Normalize The Features
    feature_importance = 100 * (feature_importance / feature_importance.max())
    sorted_idx = numpy.argsort(feature_importance)
    pos = numpy.arange(sorted_idx.shape[0]) + 4.5
    # pos = numpy.arange(sorted_idx.shape[0])
    # plt.figure(figsize=(16, 12))
    plt.figure(figsize=(14, 9), dpi=250)
    plt.barh(pos, feature_importance[sorted_idx], align='center', color='#7A68A6')
    #plt.yticks(pos, numpy.asanyarray(df.columns.tolist())[sorted_idx]) #ORIG
    plt.yticks(pos, numpy.asanyarray(featureNames)[sorted_idx])

    plt.xlabel('Relative Importance')
    plt.title('%s: Top Features' %(dataName))
    plt.grid('off')
    plt.ion()
    plt.show()
    plt.savefig(str(dataName)+'TopFeatures.png',dpi=200)
Exemplo n.º 16
0
def graph_combine_plot(combined_dictionary, folder_path, figure_size, color_code, sensitive_features, figure_name, font, title_font, target):



	feature_names  = combined_dictionary.keys()
	feature_values = combined_dictionary.values()

	feature_error_values = []
	feature_importance = []
	for tp in feature_values:
		feature_importance.append(tp[0])
		feature_error_values.append(tp[1])


	#feature_importance = 100.0 * (np.array(feature_importance) / np.array(feature_importance).max())
	#print feature_importance

	sorted_idx = np.argsort(feature_importance)

	final_column_list = []
	truncated_feature_importance = []
	final_error_values = []
	for i in xrange(len(feature_importance)):
		cur_feature_name = feature_names[sorted_idx[i]]
		cur_feature_name =  cur_feature_name[0].upper() + cur_feature_name[1:]
		final_column_list.append(cur_feature_name)
		truncated_feature_importance.append(feature_importance[sorted_idx[i]])
		final_error_values.append(feature_error_values[sorted_idx[i]])

	rcParams['figure.figsize'] = figure_size[0], figure_size[1]

	y_pos = np.arange(len(final_column_list))+ 0.5

	color_format = []
	for i in range(len(final_column_list)):
		c_name = final_column_list[i].lower()
		if c_name in sensitive_features:
			print feature_names[i]
			color_format.append('grey')
		else:
			color_format.append(color_code)

	#plt.barh(y_pos, truncated_feature_importance, align='center', color = color_format, xerr=final_error_values, ecolor='k')
	plt.barh(y_pos, truncated_feature_importance, align='center', color = color_format)
	plt.yticks(y_pos, final_column_list, **font)
	plt.xlabel('Combined Attribute Ranking', fontdict=font)
	plt.title('Combined Feature Importance \n across all Methodologies', fontdict=title_font)

	max_x = np.max(np.array(truncated_feature_importance)) + np.max(final_error_values)

	
	plt.xlim([-1,max_x])
	#axes = plt.gca()

	#axes.set_xlim([min_x - 2,105])
	plt.savefig(folder_path + "/" + "{0}.pdf".format(figure_name), bbox_inches = 'tight')
	plt.clf()


	return "plotted"
Exemplo n.º 17
0
def plot(results, total_a, total_b, label_a, label_b, outputFile=None):
    all_rules = sorted(results, key=lambda v: (-len(v['item']), round(abs(v['count_a'] / total_a - v['count_b'] / total_b), 2), round(v['count_a'] / total_a, 2)))

    values_a = [100 * rule['count_a'] / total_a for rule in all_rules]
    values_b = [100 * rule['count_b'] / total_b for rule in all_rules]

    plt.rc('figure', autolayout=True)
    plt.rc('font', size=22)

    fig, ax = plt.subplots(figsize=(24, 18))
    index = range(len(all_rules))
    bar_width = 0.35

    if label_a.startswith('_'):
        label_a = ' ' + label_a
    if label_b.startswith('_'):
        label_b = ' ' + label_b

    bar_a = plt.barh(index, values_a, bar_width, color='b', label=label_a)
    bar_b = plt.barh([i + bar_width for i in index], values_b, bar_width, color='r', label=label_b)

    plt.xlabel('Support')
    plt.ylabel('Rule')
    plt.title('Most interesting deviations')
    plt.yticks([i + bar_width for i in index], [rule_to_str(rule['item']) for rule in all_rules])
    if len(all_rules) > 0:
        plt.legend(handles=[bar_b, bar_a], loc='best')

    if outputFile is not None:
        plt.savefig(outputFile)
    else:
        plt.show()
    plt.close(fig)
def plot_silhouette(km, filename='tmp.png', fast=False):
    """ Saves a silhuette plot to filename, showing the distributions of silhouette scores in clusters. kmeans is a k-means clustering object. If fast is True use score_fast_silhouette to compute scores instead of score_silhouette.

    :param km: a k-means clustering object.
    :type km: :class:`KMeans`
    :param filename: name of output plot.
    :type filename: string
    :param fast: if True use :func:`score_fast_silhouette` to compute scores instead of :func:`score_silhouette`
    :type fast: boolean.

    """
    import matplotlib.pyplot as plt
    plt.figure()
    scoring = score_fast_silhouette if fast else score_silhouette
    scores = [[] for i in range(km.k)]
    for i, c in enumerate(km.clusters):
        scores[c].append(scoring(km, i))
    csizes = map(len, scores)
    cpositions = [sum(csizes[:i]) + (i+1)*3 + csizes[i]/2 for i in range(km.k)]
    scores = reduce(lambda x,y: x + [0]*3 + sorted(y), scores, [])
    plt.barh(range(len(scores)), scores, linewidth=0, color='c')
    plt.yticks(cpositions, map(str, range(km.k)))
    #plt.title('Silhouette plot')
    plt.ylabel('Cluster')
    plt.xlabel('Silhouette value')
    plt.savefig(filename)
Exemplo n.º 19
0
def myplot_hbar_group(df, cols, colors=None, legend_suffix=None, ylabel=None, xlabel=None
					  ,show=False, exp_prefix=None):
					  
	import numpy as np
					  
	plt.figure()
	index = np.arange(df.shape[0])
	if df.shape[0] <= 5:
		bar_width = 0.30
	else:
		bar_width = 0.30

	for col_ix, col in enumerate(cols):
		plt.barh(index + bar_width * col_ix, df[col]
                       ,bar_width, color=colors[col_ix], label=col + legend_suffix[col_ix])

	plt.ylabel(ylabel)
	plt.xlabel(xlabel)
	plt.yticks(index + bar_width, list(df.index), fontsize=4)
	plt.legend(loc='best', shadow=True, fontsize='xx-small')
	plt.title(list(df[-1:].index)[0], fontsize='x-small')
	if show:
		sys.stderr.write("*** displaying a plot...\n")
		plt.show()

	if exp_prefix is not None:
		exp_filename = exp_prefix + "hbar_grp" + '.png'
		print "		exporting plot:{0} ...".format(exp_filename)
		plt.savefig(exp_filename, dpi=200)		
Exemplo n.º 20
0
def basic_training(clf, x_train, x_test, y_train, y_test, plot_importance=False):
  
    print '----------------------'
    print 'Basic training'
    print clf

    start = time()


    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    print "RMSE: {}".format(performance_metric(y_test, y_pred))

    end = time() 
    print "Trained model in {:.4f} seconds".format(end - start) 

    # plot feature importance
    if plot_importance:
        importance = clf.feature_importances_
        importance = 100.0 * (importance / importance.max())

        sorted_idx = np.argsort(importance)
        pos = np.arange(sorted_idx.shape[0]) + .5
        plt.figure()
        plt.barh(pos, importance[sorted_idx], align='center')
        plt.yticks(pos, x_train.columns[sorted_idx])
        plt.xlabel('Relative Importance')
        plt.title('Variable Importance')
        plt.show()

    print 'Done basic training!'  

    return y_pred
Exemplo n.º 21
0
def plot_feature_importance(estimator, columns, n=50):
    """ Plots feature importance

    Parameters
    ----------
    estimator : estimator for which the important features needs to be drawn
    columns : names of all features
    n : int, number of important features to plot

    Return
    ------
    Plot of features importance
    """

    global CURR_FIGURE

    # extract feature importance and normalize them to sum up to 100
    feature_importance = estimator.feature_importances_
    feature_importance = (100.0 * feature_importance) / sum(feature_importance)
    index = np.argsort(feature_importance)[::-1][0:n]
    
    # feature names
    feature_names = columns
    
    # plot
    plt.figure(CURR_FIGURE)
    pos = (np.arange(n) + .5)[::-1]
    plt.barh(pos, feature_importance[index], align='center')
    plt.yticks(pos, feature_names[index])
    plt.xlabel('Relative Importance')
    plt.title(str(n) + ' Most Important Features')
    CURR_FIGURE += 1
Exemplo n.º 22
0
def model_metrics(classifiers, var_names):
    print 'Gini Importances:'

    importances = np.zeros(shape=(len(classifiers), len(var_names)))
    importances_std = np.zeros(shape=(len(classifiers), len(var_names)))
    for i, classifier in enumerate(classifiers):
        importances[i, :] = classifier.feature_importances_
        importances_std[i, :] = np.std([tree.feature_importances_ for tree in classifier.estimators_],
             axis=0)

    mean_importances = np.mean(importances, axis=0)
    std_importances = np.mean(importances_std, axis=0)
    feats = zip(var_names, mean_importances, std_importances)

    # Remove non-important feats:
    feats = [feat for feat in feats if feat[1] > 0.0]

    feats.sort(reverse=True, key=lambda x: x[1])
    print tabulate(feats, headers=['Variable', 'Mean', 'Std'])
    feats.sort(reverse=False, key=lambda x: x[1])

    # Plot the feature importances of the classifier
    plt.figure()
    plt.title("Gini Importance")
    y_pos = np.arange(len(feats))
    plt.barh(y_pos, width=zip(*feats)[1], height=0.5, color='r', xerr=zip(*feats)[2], align="center")
    plt.yticks(y_pos, zip(*feats)[0])
    plt.show()
Exemplo n.º 23
0
Arquivo: plot.py Projeto: nlasso/Redes
def plot_zrtt_treshold(data, output_path):
    threshold = 1
    gateways, zrtts = [], []
    for hop in data:
        ip, pais, zrtt = hop
        gateways.append(ip+"\n"+pais)
        zrtts.append(float(zrtt))
    gateways.reverse()
    zrtts.reverse()
    
    fig = plt.figure()
    y_pos = np.arange(len(gateways))
    plt.barh(y_pos, zrtts, align='center', alpha=0.4)
    plt.yticks(y_pos, gateways, horizontalalignment='right', fontsize=9)
    plt.title('ZRTTs para cada hop')
    plt.xlabel('ZRTT')
    plt.ylabel('Hop')

    # Line at y=0
    plt.vlines(0, -1, len(gateways), alpha=0.4)

    # ZRTT threshold
    plt.vlines(threshold, -1, len(gateways), linestyle='--', color='b', alpha=0.4)
    plt.text(threshold, len(gateways) - 1, 'Umbral', rotation='vertical',
             verticalalignment='top', horizontalalignment='right')
    fig.set_size_inches(6, 9)
    plt.tight_layout() 
    plt.savefig(output_path, dpi=1000, box_inches='tight')
Exemplo n.º 24
0
def plot_unique_by_date(alignment_summaries, metadata):
    plt.figure(figsize=(8, 5.5))
    df_meta = pd.DataFrame.from_csv(metadata)
    df_meta['Date Produced'] = pd.to_datetime(df_meta['Date Produced'])

    alndata = []
    for summary in alignment_summaries:
        alndata.append(simpleseq.sam.get_alignment_metadata(summary))

    unique = pd.Series(np.array([s['uniq_rate'] for s in alndata]),
                       index=alignment_summaries)

    # plot unique alignments
    index = df_meta.index.intersection(unique.index)
    order = df_meta.loc[index].sort(columns='Date Produced', ascending=False).index
    left = np.arange(len(index))
    height = unique.ix[order]
    width = 0.9
    plt.barh(left, height, width)
    plt.yticks(left + 0.5, order, fontsize=10)
    ymin, ymax = 0, len(left)
    plt.ylim((ymin, ymax))
    plt.xlabel('percentage')
    plt.title('comparative alignment summary')
    plt.ylabel('time (descending)')

    # plot klein in-drop line
    plt.vlines(unique['Klein_in_drop'], ymin, ymax, color='indianred', linestyles='--')

    sns.despine()
    plt.tight_layout()
Exemplo n.º 25
0
def plot_freqs(freqs, n=30):
    # plot top n words and their frequencies from greatest to least
    if n > len(freqs):
        n = len(freqs)
    
    # sort in decreasing order
    words_sorted = sorted(freqs, key=freqs.get, reverse=True)
    freqs_sorted = [freqs[word] for word in words_sorted[:n]]
    
    # plot
    fig = plt.figure(figsize=(6,4))
    beautify_plot(fig)
    plt.ylim(0,n)
    #plt.xlim(0,MAX_OF_FREQS)
    
    # Plot in horizontal bars in descending order
    bar_locs = np.arange(n, 0, -1)
    bar_width = 1.0
    plt.barh(bar_locs, freqs_sorted, height=bar_width,
             align='center', color=t20[0], alpha=0.8, linewidth=0)

    # Label each bar with its word
    plt.yticks(range(n-1,-1,-1), words_sorted)
    plt.xlabel('Word Frequency (per billlion)')
    plt.title('Top ' + str(n) + ' words used in Billboard 100 Songs')
    plt.show()
Exemplo n.º 26
0
def lookAtVoltages(voltagetraces,startRec,plotTime):
  voltage_means=zeros(len(voltagetraces))
  for n in range(len(voltagetraces)):
    voltage_means[n]=mean(voltagetraces[n])
  print 'mean voltages (mean,std.dev):'
  meanV=mean(voltage_means)
  stdDev=sqrt(var(voltage_means))
  print meanV,stdDev
  fig=plt.figure()
  ax1=fig.add_axes([.15,.1,.7,.8]) 
  plotVolts=zeros([len(voltList),int(plotTime)])
  for i in range(len(voltList)):
    plotVolts[i,:]=voltagetraces[i][0:int(plotTime)]
    plt.plot(range(int(startRec),int(plotTime)+int(startRec)),voltagetraces[i][0:int(plotTime)],color='0.75',label=str(voltList[i]))
  plt.plot(range(int(startRec),int(plotTime)+int(startRec)),sum(plotVolts,0)/len(voltList),'r',linewidth=3)
  plt.ylabel("rate [Hz]")
  plt.xlabel("time [ms]")
  bins=arange(plotVolts.min(),plotVolts.max(),(plotVolts.max()-plotVolts.min())/50)
  hist=zeros(len(bins)-1)
  for n in range(int(plotTime)):
    hist=hist+histogram(plotVolts[:,n], bins, new=True, normed=False)[0]
  plt.plot(range(int(startRec),int(plotTime)+int(startRec)),zeros(int(plotTime)),'k:',linewidth=3)
  ax2=fig.add_axes([.85,.1,.1,.8]) 
  ax2.set_axis_off()
  plt.barh(bins[:-1],hist[:],height=(bins[1]-bins[0]),edgecolor='b')
  ax1.set_ylim(plotVolts.min(),plotVolts.max())
  ax2.set_ylim(ax1.get_ylim())
  return fig,meanV,stdDev
Exemplo n.º 27
0
def plot_feature_importance(regressor, params, X_test, y_test):
    test_score = np.zeros((params['n_estimators'],), dtype = np.float64)

    for i, y_pred in enumerate(regressor.staged_predict(X_test)):
        test_score[i] = regressor.loss_(y_test, y_pred)

    plt.figure(figsize = (12, 6))
    plt.subplot(1, 2, 1)
    plt.title('MAE Prediction vs. Actual (USD) ')

    plt.plot(np.arange(params['n_estimators']) + 1, regressor.train_score_, 'b-', label = 'Training set Deviance')
    plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-', label = 'Test set deviance')
    plt.legend(loc='upper right')
    plt.xlabel('Boosting Iterations')
    plt.ylabel('Mean absolute error')

    #plot feature importance
    feature_importance = regressor.feature_importances_
    #normalize
    feature_importance = 100.0 * (feature_importance / feature_importance.max())
    
    sorted_idx = np.argsort(feature_importance)
    pos = np.arange(sorted_idx.shape[0]) + .5
    plt.subplot(1, 2, 2)
    plt.barh(pos, feature_importance[sorted_idx], align='center')

    feature_names = np.array(feature_cols)

    plt.yticks(pos, feature_names[sorted_idx])

    plt.xlabel('Relative importance')
    plt.title('Variable Importance')
    
    plt.show()
Exemplo n.º 28
0
def bii_hbar(group,code,in_data):
    [trust,res,div,bel,collab,resall,comfort,iz,score] = in_data
    plt.figure()

    if len(code) == 2 and not isinstance(code, basestring):
        code = code[0] + " " + code[1]
            
        
    val = [mean(trust),mean(res),mean(div),mean(bel),mean(collab),mean(resall),mean(comfort),mean(iz)][-1::-1]
    pos = arange(8)    # the bar centers on the y axis


        
        
        
        
    plt.plot((mean(score), mean(score)), (-1, 8), 'g',label='Average',linewidth=3)
    #plt.barh(pos,val, xerr=err, ecolor='r', align='center',label='Score')
    plt.barh(pos,val, align='center', label='Score')
    if group:
        err = [std(trust),std(res),std(div),std(bel),std(collab),std(resall),std(comfort),std(iz)][-1::-1]
        plt.errorbar(val,pos, xerr=err, label="St Dev", color='r',fmt='o')

    lgd = plt.legend(loc='upper center', shadow=True, fontsize='x-large',bbox_to_anchor=(1.1, 1.1),borderaxespad=0.)
    plt.yticks(pos, (('Tru', 'Res', 'Div', 'Ment Str','Collab', 'Res All', 'Com Zone', 'In Zone'))[-1::-1])
    plt.xlabel('Score')
    plt.title('Results for ' + code, fontweight='bold', y=1.01)
    plt.xlabel(r'$\mathrm{Total \ Innovation \ Index \ Score:}\ %.3f$' %(mean(score)),fontsize='18')
    axes = plt.gca()
    axes.set_xlim([0,10])
#        plt.legend((score_all,score_mean), ('Score','Mean'),bbox_to_anchor=(1.3, 1.3),borderaxespad=0.)
    file_name = "hbar"
    path_name = "static/%s" %file_name
        #path_name = "/Users/johanenglarsson/bii/mod/static/%s" %file_name
    plt.savefig(path_name, bbox_extra_artists=(lgd,), bbox_inches='tight')
Exemplo n.º 29
0
def barh_plot():
    """
    barh plot
    """
    # 生成测试数据
    means_men = (20, 35, 30, 35, 27)
    means_women = (25, 32, 34, 20, 25)

    # 设置标题
    plt.title("横向柱状图", fontproperties=myfont)

    # 设置相关参数
    index = np.arange(len(means_men))
    bar_height = 0.35

    # 画柱状图(水平方向)
    plt.barh(index, means_men, height=bar_height, alpha=0.2, color="b", label="Men")
    plt.barh(index+bar_height, means_women, height=bar_height, alpha=0.8, color="r", label="Women")
    plt.legend(loc="upper right", shadow=True)

    # 设置柱状图标示
    for x, y in zip(index, means_men):
        plt.text(y+0.3, x, y, ha="left", va="center")
    for x, y in zip(index, means_women):
        plt.text(y+0.3, x+bar_height, y, ha="left", va="center")

    # 设置刻度范围/坐标轴名称等
    plt.xlim(0, 45)
    plt.xlabel("Scores")
    plt.ylabel("Group")
    plt.yticks(index+(bar_height/2), ("A", "B", "C", "D", "E"))

    # 图形显示
    plt.show()
    return
Exemplo n.º 30
0
def visualize_silhouette_score(X,y_km):

    cluster_labels = np.unique(y_km)
    n_clusters = cluster_labels.shape[0]
    silhouette_vals = metrics.silhouette_samples(X,
                                         y_km,
                                         metric='euclidean')
    y_ax_lower, y_ax_upper = 0, 0
    yticks = []
    for i, c in enumerate(cluster_labels):
        c_silhouette_vals = silhouette_vals[y_km == c]
        c_silhouette_vals.sort()
        y_ax_upper += len(c_silhouette_vals)
        color = cm.jet(i / n_clusters)
        plt.barh(range(y_ax_lower, y_ax_upper),
                c_silhouette_vals,
                height=1.0,
                edgecolor='none',
                color=color)
        yticks.append((y_ax_lower + y_ax_upper) / 2)
        y_ax_lower += len(c_silhouette_vals)

    silhouette_avg = np.mean(silhouette_vals)
    plt.axvline(silhouette_avg,
                color="red",
                linestyle="--")
    plt.yticks(yticks, cluster_labels + 1)
    plt.ylabel('Cluster')
    plt.xlabel('Silhouette coefficient')
    plt.show()
Exemplo n.º 31
0
    'Inclination', 'Asc Node Longitude', 'Perihelion Distance',
    'Perihelion Arg', 'Perihelion Time', 'Mean Anomaly'
]]

# Using Random Forest Feature importance to select the most important features
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(random_state=1, max_depth=10)

model.fit(X, y)

features = X.columns
importances = model.feature_importances_
indices = np.argsort(importances)[-1:-4:-1]

plt.title('Feature Importances')
plt.barh(range(len(indices)), importances[indices], color='b', align='center')
plt.yticks(range(len(indices)), [features[i] for i in indices])
plt.xlabel('Relative Importance')
plt.show()

# Well we can clearly see in the feature importance graph that there are just three
# variables which contributes by more than 96% to the target then all the other
# variables, the other variables contribute with less than 1%, so we will just
# keep only those three variables:
# (Minimum Orbit Intersection, Est Dia in M(average) and Absolute Magnitude)

# Plotting the Data since we have only three features
X = X.iloc[:, indices]


def plotData2D(X=X, y_=y):
plt.savefig('mytable.png')

# INSPECTION ======================================================================================
pylab.rcParams['figure.figsize'] = (14.5, 6.0)

crimes_rating = crimeData['Category'].value_counts()
print('San Francisco Crimes\n')
print('Category\t\tNumber of occurences')
print(crimes_rating)

top = 18
y_pos = np.arange(len(crimes_rating[0:top].keys()))

plt.barh(y_pos,
         crimes_rating[0:top].get_values(),
         align='center',
         alpha=0.4,
         color='blue')
plt.yticks(y_pos, [x.title() for x in crimes_rating[0:top].keys()],
           fontsize=11)
plt.xlabel('Number of occurences', fontsize=14)
plt.title('San Francisco Crime Classification', fontsize=26)
plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
plt.savefig("crimes_occurences.png")
"""
Add new features to the dataset:
    Weekday (Monday, Tuesday, ...)
    Hour of day
    Month
    Year
    Day of month
df.set_index(['所在区域'], inplace=True)
# 根据所在区域做聚集操作
groupby_obj = df.groupby("所在区域")
# 根据所在区域算出总价的平均值
total_list = groupby_obj.mean()["总价"]
print(total_list)
# 取出索引作为标签
total_index_list = total_list.index
print(total_list, total_index_list)
# 设置刻度
y_ticks = range(len(total_index_list))
x_ticks = range(0, 260, 10)
# 打开画布
plt.figure(figsize=(20, 9), dpi=80)
# 设置柱子
plt.barh(y=y_ticks, width=total_list, height=0.2, color='orange')
# 设置y轴刻度
plt.yticks(ticks=y_ticks, labels=total_index_list)
# 设置x轴刻度
plt.xticks(ticks=x_ticks)

# 增加标签
plt.xlabel("区域平均价格(单位:万元)")
plt.ylabel("区域名称")
plt.title("成都2020年2月份各区平均房价一览图")

# 打开网格
plt.grid()
# 保存图片
plt.savefig(to_path)
# 展示图片
Exemplo n.º 34
0
# Generating Feature importance data for Random Forest model built
importances = trained_model.feature_importances_
indices = np.argsort(importances)[::-1]
trainlabels = list(train_x.columns.drop('msno', 1))
importanceList = np.array((importances)).tolist()
featureList = {}
for i in range(len(trainlabels)):
    featureList[trainlabels[i]] = importanceList[i]

# Getting top ten features as per feature importance generated above
vallist = featureList.values()
vallist.sort()
import operator

sorted_d = sorted(featureList.items(),
                  key=operator.itemgetter(1),
                  reverse=True)

# Plotting Feature importance data for Random Forest model built
plt.figure()
plt.rcParams['figure.figsize'] = 17, 12
plt.title("Feature importances")
plt.yticks(range(train_x.shape[1] - 1), train_x.columns.drop('msno', 1))
plt.barh(indices, importances[indices], color="b", align="center")
plt.xlim([
    -1,
])
plt.xlabel('Features importance score')
plt.show()
Exemplo n.º 35
0
    "智取威虎山",
    "大闹天竺",
    "金刚狼3:殊死一战",
    "蜘蛛侠:英雄归来",
    "悟空传",
    "银河护卫队2",
    "情圣",
    "新木乃伊",
]
## 票房前20的电影数据(即y轴数据),单位:亿
y = [
    56.01, 26.94, 17.53, 16.49, 15.45, 12.96, 11.8, 11.61, 11.28, 11.12, 10.49,
    10.3, 8.75, 7.55, 7.32, 6.99, 6.88, 6.86, 6.58, 6.23
]

## 设置图形大小与清晰度
pyplot.figure(figsize=(20, 8), dpi=80)

## 绘制条形图(横状)
# 注: 因为不能使用字符串,所以使用可迭代的数字数据
# 参数height设置条形高度,参数color设置条形颜色
pyplot.barh(range(len(x)), y, height=0.3, color='orange')

## 设置y轴刻度间距,字符串,中文显示,显示旋转角度
pyplot.yticks(range(len(x)), x)

## 设置网格,alpha设置网格线透明度
pyplot.grid(alpha=0.3)

## 展示条形图
pyplot.show()
Exemplo n.º 36
0
# Seems like a a numeric variable so we will change it to float fractions:
loan_stats_data['revol_util'] = loan_stats_data['revol_util'].str.rstrip(
    '%').astype('float') / 100

print('zip_code:')
print(loan_stats_data.zip_code.unique())
print("Too many categories. We won't use this variable in further analysis.")

# Find numeric variables:
numeric_data = loan_stats_data._get_numeric_data()

# Find correlations between each column to the loan_status_bool:
r = numeric_data.corr(method="pearson")

plt.figure(figsize=(20, 10))
plt.barh(r.columns[:-1], r.loan_status_bool[:-1])
plt.axvline(color='black')
plt.xlim(-0.4, 0.4)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.ylabel('Parameter', fontsize=25)
plt.xlabel('Correlation coefficient (r)', fontsize=25)
plt.title('Correlations between all numerical\nparameters and loan status',
          fontsize=30)
plt.show()

# Choose only numeric parametirs with an absolute correlation greater than 0.1:
r = r.loan_status_bool[:-1]
corr_params = r.values
is_corr = np.where(np.abs(corr_params) > 0.1)
selected_numeric_params = numeric_data[r.index[is_corr]]
Exemplo n.º 37
0
def traffic_plot_bar(X, column):
    a = X[column].value_counts()
    objects = tuple(a.index)
    y_pos = np.arange(len(objects))
    plt.barh(y_pos, a, tick_label=objects)
    plt.show()
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pyodbc as db
import matplotlib.pyplot as plt
#Create connection string to connect DBTest database with windows authentication
con = db.connect(
    'DRIVER={ODBC Driver 13 for SQL Server};SERVER=.;Trusted_Connection=yes;DATABASE=db_final'
)
cursor = con.cursor()

cursor.execute(
    "SELECT distinct [Fakultas], YEAR(tanggal), COUNT(mahasiswa) FROM [db_final].[dbo].[MHS]  GROUP BY [Fakultas],YEAR(tanggal) ORDER BY COUNT(mahasiswa) DESC"
)
rows = cursor.fetchall()

df = pd.DataFrame([[ij for ij in i] for i in rows])
df.rename(columns={0: 'Fakultas', 1: 'Tahun', 2: 'Jumlah'}, inplace=True)

y = df['Jumlah']
x = df['Fakultas']

plt.barh(x, y)
plt.show()

df
Exemplo n.º 39
0
print('#', 50 * "-")
# -----------------------
data = np.random.rand(1024, 2)
plt.scatter(data[:, 0], data[:, 1])
plt.show()
print('#', 50 * "-")
# -----------------------
data = [5, 10, 30, 8]
plt.bar(range(len(data)), data)
plt.show()
print('#', 50 * "-")
# -----------------------
plt.bar(range(len(data)), data, width=1.)
plt.show()

plt.barh(range(len(data)), data)
plt.show()
print('#', 50 * "-")
# -----------------------
data = np.random.rand(3, 4)
x8 = np.arange(4)
plt.bar(x8 + 0.00, data[0], color='b', width=0.25)
plt.bar(x8 + 0.25, data[1], color='g', width=0.25)
plt.bar(x8 + 0.50, data[2], color='r', width=0.25)
plt.show()
print('#', 50 * "-")
# -----------------------
a = np.random.rand(4)
b = np.random.rand(4)
x9 = np.arange(4)
plt.bar(x9, a, color='b')
Exemplo n.º 40
0
plt.style.use('fivethirtyeight')

plt.bar(lang, popularity)

plt.title('Popularity of Languages')
plt.xlabel('languages')
plt.ylabel('Popularity')

plt.tight_layout()
plt.show()

# In[37]:

# horizontal bar using barh
plt.style.use('fivethirtyeight')

# use reverse function
lang.reverse()
popularity.reverse()

plt.barh(lang, popularity)

plt.title('Popularity of Languages')
#plt.xlabel('languages')
plt.xlabel('Popularity')

plt.tight_layout()
plt.show()

# In[ ]:
Exemplo n.º 41
0
def anual_comparison(personas=0, graphs=True):
    yeardata = gen_yeardata(personas, graphs)
    for b in range(1, 3):
        for a in range(1, 3):
            if b == 2 and a == 1:
                continue
            key = {1: 'DOY', 2: 'MES'}  # parametro a
            key2 = {1: 'Econs [kWh]', 2: '$'}  # parametro b
            tit = {
                1: 'Calefón A ',
                2: 'Bomba de calor ',
                3: 'Gas con Alm. ',
                4: 'Gas Inst. '
            }

            if b == 1:
                es_base = calefon(yeardata, 'B').groupby(
                    [key[a]])[key2[b]].sum().reset_index()
                es_caleA = calefon(yeardata, 'A').groupby(
                    [key[a]])[key2[b]].sum().reset_index()
                es_caleA_col = calefon(yeardata, 'A', True).groupby(
                    [key[a]])[key2[b]].sum().reset_index()
                es_bba = bbacalor(yeardata).groupby(
                    [key[a]])[key2[b]].sum().reset_index()
                es_bba_col = bbacalor(yeardata, True).groupby(
                    [key[a]])[key2[b]].sum().reset_index()
                es_gasal = gas_almacen(yeardata).groupby(
                    [key[a]])[key2[b]].sum().reset_index()
                es_gasal_col = gas_almacen(yeardata, True).groupby(
                    [key[a]])[key2[b]].sum().reset_index()
                es_gasin = gas_inst(yeardata).groupby(
                    [key[a]])[key2[b]].sum().reset_index()
                es_gasin_col = gas_inst(yeardata, True).groupby(
                    [key[a]])[key2[b]].sum().reset_index()
                esc = {1: es_caleA, 2: es_bba, 3: es_gasal, 4: es_gasin}
                esc3 = {
                    1: es_caleA_col,
                    2: es_bba_col,
                    3: es_gasal_col,
                    4: es_gasin_col
                }
                labels = {1: '', 2: '', 3: '', 4: ''}
                ylab = 'Consumo kWh/día'
                if a == 2:
                    ylab = 'Consumo kWh/mes'
                    totalE = {
                        'Esc. Base': es_base[key2[b]].sum(),
                        tit[1]: esc[1][key2[b]].sum(),
                        tit[2]: esc[2][key2[b]].sum(),
                        tit[3]: esc[3][key2[b]].sum(),
                        tit[4]: esc[4][key2[b]].sum(),
                        tit[1] + ' con Col.': esc3[1][key2[b]].sum(),
                        tit[2] + ' con Col.': esc3[2][key2[b]].sum(),
                        tit[3] + ' con Col.': esc3[3][key2[b]].sum(),
                        tit[4] + ' con Col.': esc3[4][key2[b]].sum()
                    }

            else:
                ylab = 'Consumo $/mes'
                tar = {1: ressimple, 2: resdoble, 3: gasres, 4: supergas}
                es_base = ressimple(calefon(yeardata, 'B'))
                es_caleA_rs = tar[1](calefon(yeardata, 'A'))
                es_caleA_rs_col = tar[1](calefon(yeardata, 'A', True))
                es_caleA_dh = tar[2](calefon(yeardata, 'A'))
                es_caleA_dh_col = tar[2](calefon(yeardata, 'A', True))
                es_bba_rs = tar[1](bbacalor(yeardata))
                es_bba_rs_col = tar[1](bbacalor(yeardata, True))
                es_bba_dh = tar[2](bbacalor(yeardata))
                es_bba_dh_col = tar[2](bbacalor(yeardata, True))
                es_gasal_gn = tar[3](gas_almacen(yeardata))
                es_gasal_gn_col = tar[3](gas_almacen(yeardata, True))
                es_gasal_glp = tar[4](gas_almacen(yeardata))
                es_gasal_glp_col = tar[4](gas_almacen(yeardata, True))
                es_gasin_gn = tar[3](gas_inst(yeardata))
                es_gasin_gn_col = tar[3](gas_inst(yeardata, True))
                es_gasin_glp = tar[4](gas_inst(yeardata))
                es_gasin_glp_col = tar[4](gas_inst(yeardata, True))
                esc = {
                    1: es_caleA_rs,
                    2: es_bba_rs,
                    3: es_gasal_gn,
                    4: es_gasin_gn
                }
                esc2 = {
                    1: es_caleA_dh,
                    2: es_bba_dh,
                    3: es_gasal_glp,
                    4: es_gasin_glp
                }
                esc3 = {
                    1: es_caleA_rs_col,
                    2: es_bba_rs_col,
                    3: es_gasal_gn_col,
                    4: es_gasin_gn_col
                }
                esc4 = {
                    1: es_caleA_dh_col,
                    2: es_bba_dh_col,
                    3: es_gasal_glp_col,
                    4: es_gasin_glp_col
                }
                labels = {
                    1: 'Res. Simp.',
                    2: 'Res. Simp.',
                    3: 'Gas Nat.',
                    4: 'Gas Nat.'
                }
                labels2 = {
                    1: 'Doble Hor.',
                    2: 'Doble Hor.',
                    3: 'GLP',
                    4: 'GLP'
                }
                totalP = {
                    'Esc. Base': es_base[key2[b]].sum(),
                    tit[1] + labels[1]: esc[1][key2[b]].sum(),
                    tit[2] + labels[2]: esc[2][key2[b]].sum(),
                    tit[3] + labels[3]: esc[3][key2[b]].sum(),
                    tit[4] + labels[4]: esc[4][key2[b]].sum(),
                    tit[1] + labels2[1]: esc2[1][key2[b]].sum(),
                    tit[2] + labels2[2]: esc2[2][key2[b]].sum(),
                    tit[3] + labels2[3]: esc2[3][key2[b]].sum(),
                    tit[4] + labels2[4]: esc2[4][key2[b]].sum(),
                    tit[1] + 'con colect ' + labels[1]: esc3[1][key2[b]].sum(),
                    tit[2] + 'con colect ' + labels[2]: esc3[2][key2[b]].sum(),
                    tit[3] + 'con colect ' + labels[3]: esc3[3][key2[b]].sum(),
                    tit[4] + 'con colect ' + labels[4]: esc3[4][key2[b]].sum(),
                    tit[1] + 'con colect ' + labels2[1]:
                    esc4[1][key2[b]].sum(),
                    tit[2] + 'con colect ' + labels2[2]:
                    esc4[2][key2[b]].sum(),
                    tit[3] + 'con colect ' + labels2[3]:
                    esc4[3][key2[b]].sum(),
                    tit[4] + 'con colect ' + labels2[4]:
                    esc4[4][key2[b]].sum()
                }

            if graphs:
                plt.figure()
                style.use('fivethirtyeight')

                maxy = int(round(es_base[key2[b]].max() * 1.3))
                y = [i for i in range(0, maxy, round(maxy / 5))]

                ax1 = plt.subplot(4, 1, 1)
                ax2 = plt.subplot(4, 1, 2)
                ax3 = plt.subplot(4, 1, 3)
                ax4 = plt.subplot(4, 1, 4)

                ax = {1: ax1, 2: ax2, 3: ax3, 4: ax4}
                for i in range(1, 5):
                    if i == 1:
                        ax[i] = plt.subplot(4, 1, 1)
                    else:
                        ax[i] = plt.subplot(4, 1, i, sharex=ax1)
                    plt.plot(es_base[key[a]],
                             es_base[key2[b]],
                             label='Esc. Base',
                             color='r',
                             linewidth=1.5)
                    plt.plot(esc[i][key[a]],
                             esc[i][key2[b]],
                             label=labels[i] + ' sin colector',
                             color='b',
                             linewidth=1.5)
                    plt.plot(esc3[i][key[a]],
                             esc3[i][key2[b]],
                             label=labels[i] + ' con colector',
                             color='tab:orange',
                             linewidth=1.5)

                    if b == 2:
                        plt.plot(esc2[i][key[a]],
                                 esc2[i][key2[b]],
                                 label=labels2[i] + ' sin colector',
                                 color='m',
                                 linewidth=1.5)
                        plt.plot(esc4[i][key[a]],
                                 esc4[i][key2[b]],
                                 label=labels2[i] + ' con colector',
                                 color='tab:gray',
                                 linewidth=1.5)
                        plt.fill_between(
                            es_base[key[a]],
                            es_base[key2[b]],
                            esc2[i][key2[b]],
                            where=(es_base[key2[b]] > esc2[i][key2[b]]),
                            color='g',
                            alpha=0.2)
                        plt.fill_between(
                            es_base[key[a]],
                            esc2[i][key2[b]],
                            es_base[key2[b]],
                            where=(es_base[key2[b]] < esc2[i][key2[b]]),
                            color='y',
                            alpha=0.2)

                    plt.fill_between(
                        es_base[key[a]],
                        es_base[key2[b]],
                        esc[i][key2[b]],
                        where=(es_base[key2[b]] > esc[i][key2[b]]),
                        color='g',
                        alpha=0.2)
                    plt.fill_between(
                        es_base[key[a]],
                        esc[i][key2[b]],
                        es_base[key2[b]],
                        where=(es_base[key2[b]] < esc[i][key2[b]]),
                        color='y',
                        alpha=0.2)
                    if i == 3:
                        plt.ylabel(ylab, fontsize=24)
                    plt.yticks(y, fontsize=20)
                    plt.ylim(-maxy * 0.03, maxy)
                    plt.legend(fontsize=10, loc=2)
                    plt.setp(ax[i].get_xticklabels(), visible=False)
                    plt.grid(True)
                    plt.title('Esc. ' + tit[i], fontsize=28, color='c')
                    if i == 4:
                        plt.setp(ax[i].get_xticklabels(), visible=True)
                        x = [i for i in range(1, 13)]
                        plt.xlabel("Mes del año", fontsize=24)
                        if a == 1:
                            x = [i for i in range(15, 366, 15)]
                            x.insert(0, 1)
                            plt.xlabel("día del año", fontsize=24)
                        plt.xticks(x, fontsize=20)

                plt.subplots_adjust(hspace=0.15)

    ordenadaE = pd.DataFrame(totalE,
                             index=['kWh/año'
                                    ]).transpose().sort_values(by='kWh/año',
                                                               ascending=False)
    ordenadaP = pd.DataFrame(totalP,
                             index=['$/año'
                                    ]).transpose().sort_values(by='$/año',
                                                               ascending=False)

    if graphs:
        plt.figure()
        ax = plt.subplot(1, 1, 1)
        plt.title('Consumo calentamiento de agua caliente anual',
                  fontsize=28,
                  color='c')
        plt.barh(range(len(ordenadaE)), ordenadaE.iloc[:, 0], align='center')
        plt.yticks(range(len(ordenadaE)), ordenadaE.index, fontsize=20)
        plt.xlabel('kWh / año', fontsize=24)
        plt.xticks(fontsize=20)
        ax.set_axisbelow(True)
        plt.grid(False, axis='y')
        porc = [
            int((i / ordenadaE.at['Esc. Base', 'kWh/año']) * 100)
            for i in ordenadaE.values
        ]
        labels = [str(i) + '% del Esc. Base' for i in porc]
        rects = ax.patches
        # esp = 0.3 *rects[-1].get_width()
        for rect, label in zip(rects, labels):
            width = rect.get_width()
            ax.text(width * 0.8,
                    rect.get_y(),
                    label,
                    ha='center',
                    va='bottom',
                    fontsize=20,
                    color='r',
                    weight='bold')
        # rect.get_y() + rect.get_height()/2

        plt.figure()
        ax = plt.subplot(1, 1, 1)
        plt.title('Costo calentamiento de agua caliente anual',
                  fontsize=28,
                  color='c')
        plt.barh(range(len(ordenadaP)), ordenadaP.iloc[:, 0], align='center')
        plt.yticks(range(len(ordenadaP)), ordenadaP.index, fontsize=20)
        plt.xlabel('miles de $ / año', fontsize=24)
        plt.xticks(fontsize=20)
        ax.set_axisbelow(True)
        plt.grid(False, axis='y')
        porc = [
            int((i / ordenadaP.at['Esc. Base', '$/año']) * 100)
            for i in ordenadaP.values
        ]
        labels = [str(i) + '% del Esc. Base' for i in porc]
        rects = ax.patches
        # esp = 0.35 * rects[-1].get_width()
        for rect, label in zip(rects, labels):
            width = rect.get_width()
            ax.text(width * 0.9,
                    rect.get_y(),
                    label,
                    ha='center',
                    va='bottom',
                    fontsize=18,
                    color='r',
                    weight='bold')

        scale_x = 1000
        ticks_x = ticker.FuncFormatter(
            lambda x, pos: '{0:g}'.format(x / scale_x))
        ax.xaxis.set_major_formatter(ticks_x)
        plt.subplots_adjust(left=0.18)

    ordenadaP.index.name = 'Sistema'
    ordenadaE.index.name = 'Sistema'
    ordenadaP = ordenadaP.round(0)
    ordenadaE = ordenadaE.round(0)

    hoy = dt.date.today().strftime("%d-%m-%y")
    ordenadaE.to_csv(r'generated_csvs\Consumo anual Energia ' + hoy + '.csv')
    ordenadaP.to_csv(r'generated_csvs\Costo anual ' + hoy + '.csv')

    return [ordenadaE, ordenadaP]
    #print(f"{type(flat_info['id'])},   {type(flat_info['rooms'])},   {type(flat_info['type'])},  {type(flat_info['price'])}")

    subway_dict[subway].append(flat_info)

#print(subway_dict.keys())
# TODO 2: подсчитайте и выведите на печать количество новостроек, расположенных рядом с каждым из метро. Используйте вариант прохода по словарю, который вам больше нравится

#list(subway_dict)
#print(type(subway_dict))

Chart = dict()
for k, v in subway_dict.items():
    # print(v)
    col_novostroy = 0
    for v1 in v:
        if v1["type"] == "новостройка":
            #print(v1)
            col_novostroy += 1

    print(f"{k} - {col_novostroy}")
    Chart.update({k: col_novostroy})

plt.barh(range(len(Chart)), list(Chart.values()), align='center')
plt.yticks(range(len(Chart)), list(Chart.keys()))
plt.xlabel('Кол-во новостроек рядом с метро')
plt.title('Анализ расположения новостроек относительно станций метро')

plt.show()

# In[ ]:
Exemplo n.º 43
0
from matplotlib import pyplot as plt
import csv
from collections import Counter

plt.style.use('fivethirtyeight')

with open('data.csv') as file:
    csv_reader = csv.DictReader(file) # generates a dictionary iterator

    lc = Counter()
    # row = next(csv_reader)      # gives next element of the iterator    
    
    for row in csv_reader:
        lc.update(row['LanguagesWorkedWith'].split(';'))

lang = []
fcount = []

for data in lc.most_common(15):
    lang.append(data[0])
    fcount.append(data[1])

plt.barh(lang, fcount)

plt.title("Most popular languages")
# plt.xlabel('Programming Language')
plt.xlabel('Frequency')

plt.tight_layout()
plt.show()
Exemplo n.º 44
0
            random_state=0)
y_km = km.fit_predict(X)

cluster_labels = np.unique(y_km)
n_clusters = cluster_labels.shape[0]
silhouette_vals = silhouette_samples(X, y_km, metric='euclidean')
y_ax_lower, y_ax_upper = 0, 0
yticks = []
for i, c in enumerate(cluster_labels):
    c_silhouette_vals = silhouette_vals[y_km == c]
    c_silhouette_vals.sort()
    y_ax_upper += len(c_silhouette_vals)
    color = cm.jet(float(i) / n_clusters)
    plt.barh(range(y_ax_lower, y_ax_upper),
             c_silhouette_vals,
             height=1.0,
             edgecolor='none',
             color=color)

    yticks.append((y_ax_lower + y_ax_upper) / 2.)
    y_ax_lower += len(c_silhouette_vals)

silhouette_avg = np.mean(silhouette_vals)
plt.axvline(silhouette_avg, color="red", linestyle="--")

plt.yticks(yticks, cluster_labels + 1)
plt.ylabel('Cluster')
plt.xlabel('Silhouette coefficient')

plt.tight_layout()
#plt.savefig('images/11_04.png', dpi=300)
    fast_mimic=True,
    random_state=RANDOM_STATE)
t_aft = time.time()
clock_time[3] = t_aft - t_bef
MIMIC_fitness_curve = best_fitness_curve

# print("----------------------------------")
# print("MIMIC")
# print(best_state)
# print(best_fitness)
# print(fitness_curve)
# print("----------------------------------")

# Clock time different algorithms
plt.figure()
plt.barh(algorithms, clock_time, align='center')
plt.title("Randomized Optimization", fontsize=16, fontweight='bold')
plt.suptitle("Knapsack (50 samples)", fontsize=10)
plt.ylabel('Algorithm')
plt.xlabel('Time (seconds)')
plt.savefig('knapsack_time50.png', bbox_inches="tight")

# Fitness curve for different algorithms
plt.figure()
temp = max(len(RHC_fitness_curve), len(SA_fitness_curve),
           len(GA_fitness_curve), len(MIMIC_fitness_curve))
x_1 = np.linspace(1, temp, len(RHC_fitness_curve))
x_2 = np.linspace(1, temp, len(SA_fitness_curve))
x_3 = np.linspace(1, temp, len(GA_fitness_curve))
x_4 = np.linspace(1, temp, len(MIMIC_fitness_curve))
y_1 = RHC_fitness_curve
Exemplo n.º 46
0
from matplotlib import pyplot as plt
import matplotlib

"""绘制条形图"""
font = {'family': 'MicroSoft YaHei'}
matplotlib.rc('font', **font)  # 使支持中文

x = ["战狼2","速度与激情8","功夫瑜伽","西游伏妖篇","变形金刚5:最后的骑士","摔跤吧!爸爸","加勒比海盗5:死无对证","金刚:骷髅岛","极限特工:终极回归","生化危机6:终章","乘风破浪","神偷奶爸3","智取威虎山","大闹天竺","金刚狼3:殊死一战","蜘蛛侠:英雄归来","悟空传","银河护卫队2","情圣","新木乃伊",]

y = [56.01,26.94,17.53,16.49,15.45,12.96,11.8,11.61,11.28,11.12,10.49,10.3,8.75,7.55,7.32,6.99,6.88,6.86,6.58,6.23]

plt.figure(figsize=(20, 8), dpi=80)  # 设置图形大小

# plt.bar(range(len(x)), y, width=0.3)  # 绘制条形图,线条宽度
plt.barh(range(len(x)), y, height=0.3, color='orange')  # 绘制横着的条形图,横着的用height控制线条宽度
# 设置字符串到x轴
plt.yticks(range(len(x)),x)

plt.grid(alpha=0.3)  # 添加网格
plt.ylabel('电影名称')
plt.xlabel('票房')
plt.title('票房前20的电影')
plt.savefig('./01.png')
plt.show()
Exemplo n.º 47
0
  plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.14,), ncol=3, borderaxespad=0.)

  for ext in [ "png", "svg" ]:
    fname = os.path.join(options.outputdir, "{0}.{1}".format(options.suffix, ext))
    plt.savefig(fname, dpi=100)
    print "Saved {0}".format(fname)

  barlabels = []
  barvalues = []
  for n in range(1, len(data)):
    l = l = data[n][0]
    splitOn='VM'
    l = ('\n%s'%splitOn).join(l.split(splitOn))
    barlabels.append(l)
    barvalues.append(float(data[n][1]))

  plt.clf()
  plt.barh(bottom=range(0, len(data)-1),
           height=0.5,
           width=barvalues,
           align='center')
  plt.yticks(numpy.arange(len(data)-1),
             barlabels)
  plt.grid(True)
  plt.title('Network Performance - Testcase {0}'.format(options.suffix))
  plt.xlabel("Testcase {0} - Mbits/sec".format(options.suffix))
  for ext in [ "png", "svg" ]:
    fname = os.path.join(options.outputdir, "{0}.bar.{1}".format(options.suffix, ext))
    plt.savefig(fname, dpi=100)
    print "Saved {0}".format(fname)
Exemplo n.º 48
0
def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label,
                   output_path, to_show, plot_color, true_p_bar):
    # sort the dictionary by decreasing value, into a list of tuples
    sorted_dic_by_value = sorted(dictionary.items(),
                                 key=operator.itemgetter(1))
    # unpacking the list of tuples into two lists
    sorted_keys, sorted_values = zip(*sorted_dic_by_value)
    #
    if true_p_bar != "":
        """
         Special case to draw in:
            - green -> TP: True Positives (object detected and matches ground-truth)
            - red -> FP: False Positives (object detected but does not match ground-truth)
            - orange -> FN: False Negatives (object not detected but present in the ground-truth)
        """
        fp_sorted = []
        tp_sorted = []
        for key in sorted_keys:
            fp_sorted.append(dictionary[key] - true_p_bar[key])
            tp_sorted.append(true_p_bar[key])
        plt.barh(range(n_classes),
                 fp_sorted,
                 align='center',
                 color='crimson',
                 label='False Positive')
        plt.barh(range(n_classes),
                 tp_sorted,
                 align='center',
                 color='forestgreen',
                 label='True Positive',
                 left=fp_sorted)
        # add legend
        plt.legend(loc='lower right')
        """
         Write number on side of bar
        """
        fig = plt.gcf()  # gcf - get current figure
        axes = plt.gca()
        r = fig.canvas.get_renderer()
        for i, val in enumerate(sorted_values):
            fp_val = fp_sorted[i]
            tp_val = tp_sorted[i]
            fp_str_val = " " + str(fp_val)
            tp_str_val = fp_str_val + " " + str(tp_val)
            # trick to paint multicolor with offset:
            # first paint everything and then repaint the first number
            t = plt.text(val,
                         i,
                         tp_str_val,
                         color='forestgreen',
                         va='center',
                         fontweight='bold')
            plt.text(val,
                     i,
                     fp_str_val,
                     color='crimson',
                     va='center',
                     fontweight='bold')
            if i == (len(sorted_values) - 1):  # largest bar
                adjust_axes(r, t, fig, axes)
    else:
        plt.barh(range(n_classes), sorted_values, color=plot_color)
        """
         Write number on side of bar
        """
        fig = plt.gcf()  # gcf - get current figure
        axes = plt.gca()
        r = fig.canvas.get_renderer()
        for i, val in enumerate(sorted_values):
            str_val = " " + str(val)  # add a space before
            if val < 1.0:
                str_val = " {0:.2f}".format(val)
            t = plt.text(val,
                         i,
                         str_val,
                         color=plot_color,
                         va='center',
                         fontweight='bold')
            # re-set axes to show number inside the figure
            if i == (len(sorted_values) - 1):  # largest bar
                adjust_axes(r, t, fig, axes)
    # set window title
    fig.canvas.set_window_title(window_title)
    # write classes in y axis
    tick_font_size = 12
    plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
    """
     Re-scale height accordingly
    """
    init_height = fig.get_figheight()
    # comput the matrix height in points and inches
    dpi = fig.dpi
    height_pt = n_classes * (tick_font_size * 1.4)  # 1.4 (some spacing)
    height_in = height_pt / dpi
    # compute the required figure height
    top_margin = 0.15  # in percentage of the figure height
    bottom_margin = 0.05  # in percentage of the figure height
    figure_height = height_in / (1 - top_margin - bottom_margin)
    # set new height
    if figure_height > init_height:
        fig.set_figheight(figure_height)

    # set plot title
    plt.title(plot_title, fontsize=14)
    # set axis titles
    # plt.xlabel('classes')
    plt.xlabel(x_label, fontsize='large')
    # adjust size of window
    fig.tight_layout()
    # save the plot
    fig.savefig(output_path)
    # show image
    if to_show:
        plt.show()
    # close the plot
    plt.close()
Exemplo n.º 49
0
        else:
            mmin.append(float(row[1]))
            mmax.append(float(tmmax - tm))

        m.append(float(row[1]))

merror = [mmin, mmax]

y_pos = np.arange(len(x))

plt.xlim(10, 90)
plt.ylim()
# Create bars
fig1 = plt.barh(y_pos,
                l,
                xerr=lerror,
                height=0.20,
                label='Mixed noise (4000 samples)')
fig2 = plt.barh(y_pos + 0.20,
                m,
                xerr=merror,
                height=0.20,
                label='Mixed noise (2000 samples)')
fig3 = plt.barh(y_pos + 0.40,
                k,
                xerr=kerror,
                height=0.20,
                label='Enviromental noise')
fig3 = plt.barh(y_pos + 0.60, y, xerr=yerror, height=0.20, label='No noise')

# Create names on the x-axis
Exemplo n.º 50
0

predictions = seq_model.predict(test_images[:20])
predictions.shape


# In[27]:


figsize(12,3)
i = 0
plt.subplot(121)
plt.imshow(test_images[i,:,:,0], cmap=plt.cm.gray)
plt.axis("off")
plt.subplot(122)
plt.barh(np.arange(10), predictions[i,:])
plt.yticks(np.arange(10), classes)
plt.grid(True);


# You can write the model to disk to retrieve later using `Model.save()`. Historically this had used HDF5 but with TF2 more integration is coming with the tensorflow-specific `SavedModel` format. HDF5 has trouble with custom objects and nested models.

# In[28]:


seq_model.save("seq_model.h5")


# In[ ]:

Exemplo n.º 51
0
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import font_manager, rc
from matplotlib import style

font_name = font_manager.FontProperties(
    fname="c:/Windows/Fonts/malgun.ttf").get_name()
rc('font', family=font_name)
style.use('ggplot')

industry = ['Insect', 'Reptile', 'Aquatic', 'Bird', 'Mammal']
fluctuations = [3, 3, 5, 4, 2]

fig = plt.figure(figsize=(8, 4))
ax = fig.add_subplot(111)

ypos = np.arange(5)
rects = plt.barh(ypos, fluctuations, align='center', height=0.5)
plt.yticks(ypos, industry)

plt.xlabel('등락률')
plt.show()
df_5 = df_5.sort_values(by='count_M', ascending=True)
df_5.set_index(['LinkType'], inplace=True)


# In[3]:


# draw bidirectional bar chart
index = np.arange(len(df_5))
lColor = (1/256, 1/256, 256/256, 3/3)     # blue
rColor = (256/256, 1/256, 1/256, 3/3)      # red
plt.figure(figsize=(16,6))
# using the arrangement, the data is stacked up, that is, a multi-dimensional bar chart
plt.barh(
    index, 
    df_5['count_M'], 
    color = lColor
)
plt.barh(
    index, 
    -df_5['count_F'], 
    color = rColor
)
plt.xticks([-40000, -20000, 0, 20000, 40000], ['40000', '20000', '0', '20000', '40000'])    # Set the scale (originally negative for left side of the origin, now all positive)
plt.yticks(index, df_5.index)
plt.legend(['M', 'F'])
plt.show()


# In[ ]:
def plot_feature_importances(clf, feature_names):
    c_features = len(feature_names)
    plt.barh(range(c_features), clf.feature_importances_)
    plt.xlabel("Feature importance")
    plt.ylabel("Feature name")
    plt.yticks(numpy.arange(c_features), feature_names)
Exemplo n.º 54
0
plt.figure(figsize=(16, 2))
plt.title(
    'Mean vs Max Value',
    fontsize=20,
    fontweight='bold',
    y=1.05,
)
plt.xlabel('Value [M€]')

max_value = dataset['ValueNum'].max()
objects = ('Max Value', 'Mean Value')
y_pos = np.arange(len(objects))
performance = [max_value / 1000000, mean_value / 1000000]

plt.barh(y_pos, performance, align='center', alpha=0.5)
plt.yticks(y_pos, objects)

plt.show()

# <b>NOTE:</b> Mean Value of Player in FIFA 18 is about €2.4M. It seems to be lots of money, but it is still nothing when we compare it to €123M - Value of Neymar. Moreover Value of players is unequally distributed - only 23.2% of players cross this Value.

# ### 3.5. Over or Under Mean Wage
# <a id="over_or_under_mean_wage"></a>

# In[41]:

print('Mean wage of player in FIFA 18 is around: €' +
      str(round(mean_wage, -3)) + '.')

# In[42]:
Exemplo n.º 55
0
                 markeredgecolor='black',
                 markerfacecolor='firebrick')
plt.boxplot(part_4.result_5,
            meanprops=meanpoint,
            meanline=False,
            showmeans=True)
plt.ylabel('Temperature')
plt.title('Average temperatures measured outside')

#9
part_4.result_9.sort_values().plot(kind='bar', color='g')
plt.ylim([0, 10300])
plt.xlabel('Temperature')
plt.ylabel('Appearances')
plt.title('Top 3 appearance')
plt.vlines(-1, 0, 10500, colors='k', linestyles='dashed')
plt.vlines(3, 0, 10500, colors='k', linestyles='dashed')
plt.xlim(-2, 4)
plt.legend('③❸')

#6
y = (data.groupby("temp")["temp"])
count = (data.groupby("temp")["temp"].count())
y_pos = np.arange(21, 52)
plt.barh(y_pos, count, alpha=0.9)
plt.xlabel('Data amount')
plt.ylabel('Temperature')
plt.title('Data amount by temperature')
plt.legend()

plt.show()
Exemplo n.º 56
0
def index():
    df2 = playstore.copy()

    # Statistik
    top_category = pd.crosstab(index=df2['Category'],
                               columns='Jumlah').sort_values(
                                   'Jumlah', ascending=False).reset_index()
    # Dictionary stats digunakan untuk menyimpan beberapa data yang digunakan untuk menampilkan nilai di value box dan tabel
    stats = {
        'most_categories':
        top_category['Category'][0],
        'total':
        top_category['Jumlah'][0],
        'rev_table':
        playstore.groupby(['Category', 'App']).agg({
            'Reviews': 'mean',
            'Rating': 'mean',
        }).sort_values('Reviews', ascending=False).head(10).reset_index().
        to_html(classes=[
            'table thead-light table-striped table-bordered table-hover table-sm'
        ])
    }

    ## Bar Plot
    cat_order = df2.groupby('Category').agg({
        'App': 'count'
    }).rename({
        'Category': 'Total'
    }, axis=1).sort_values('App', ascending=False).head()
    X = cat_order.index
    Y = cat_order['App']
    my_colors = 'rgbkymc'
    # bagian ini digunakan untuk membuat kanvas/figure
    fig = plt.figure(figsize=(8, 3), dpi=300)
    fig.add_subplot()
    # bagian ini digunakan untuk membuat bar plot
    plt.barh(X, Y, color=my_colors)
    # bagian ini digunakan untuk menyimpan plot dalam format image.png
    plt.savefig('cat_order.png', bbox_inches="tight")

    # bagian ini digunakan untuk mengconvert matplotlib png ke base64 agar dapat ditampilkan ke template html
    figfile = BytesIO()
    plt.savefig(figfile, format='png')
    figfile.seek(0)
    figdata_png = base64.b64encode(figfile.getvalue())
    # variabel result akan dimasukkan ke dalam parameter di fungsi render_template() agar dapat ditampilkan di
    # halaman html
    result = str(figdata_png)[2:-1]

    ## Scatter Plot
    X = df2['Reviews'].values  # axis x
    Y = df2['Rating'].values  # axis y
    area = playstore[
        'Installs'].values / 10000000  # ukuran besar/kecilnya lingkaran scatter plot
    fig = plt.figure(figsize=(5, 5))
    fig.add_subplot()
    # isi nama method untuk scatter plot, variabel x, dan variabel y
    plt.scatter(x=X, y=Y, s=area, alpha=0.3)
    plt.xlabel('Reviews')
    plt.ylabel('Rating')
    plt.savefig('rev_rat.png', bbox_inches="tight")

    figfile = BytesIO()
    plt.savefig(figfile, format='png')
    figfile.seek(0)
    figdata_png = base64.b64encode(figfile.getvalue())
    result2 = str(figdata_png)[2:-1]

    ## Histogram Size Distribution
    X = (playstore['Size'] / 1000000).values
    fig = plt.figure(figsize=(5, 5))
    fig.add_subplot()
    plt.hist(X, bins=100, density=True, alpha=0.75)
    plt.xlabel('Size')
    plt.ylabel('Frequency')
    plt.savefig('hist_size.png', bbox_inches="tight")

    figfile = BytesIO()
    plt.savefig(figfile, format='png')
    figfile.seek(0)
    figdata_png = base64.b64encode(figfile.getvalue())
    result3 = str(figdata_png)[2:-1]

    ## Buatlah sebuah plot yang menampilkan insight di dalam data
    contentRat = df2.groupby('Content Rating').agg({
        'App': 'count'
    }).rename({
        'Category': 'Total'
    }, axis=1).sort_values('App', ascending=False).head(5)
    X = contentRat.index
    Y = contentRat['App']
    my_colors = 'rgbkymc'
    # bagian ini digunakan untuk membuat kanvas/figure
    fig = plt.figure(figsize=(10, 5), dpi=300)
    fig.add_subplot()
    # bagian ini digunakan untuk membuat bar plot
    plt.bar(X, Y, color=my_colors)
    # bagian ini digunakan untuk menyimpan plot dalam format image.png
    plt.savefig('Content_Rating.png', bbox_inches="tight")

    figfile = BytesIO()
    plt.savefig(figfile, format='png')
    figfile.seek(0)
    figdata_png = base64.b64encode(figfile.getvalue())
    result4 = str(figdata_png)[2:-1]

    # Tambahkan hasil result plot pada fungsi render_template()
    return render_template(
        'index.html',
        stats=stats,
        result=result,
        result2=result2,
        result3=result3,
        result4=result4,
    )
Exemplo n.º 57
0
def predict_new_image(img_url, model_nn, metainfo):

    # Download the image
    print('Downloading image.')
    response = requests.get(img_url, stream=True)
    with open('temp_img.jpg', 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)

    # Load image and convert to RGB color scheme
    try:
        im = Image.open('temp_img.jpg').convert(mode='RGB', colors=256)
    except IOError:
        print('Image cannot be loaded.')

    # Extract size of image
    width, height = im.size

    # Extract min dimension for squaring of image
    min_dim = min(width, height)

    # Compute image offset for squaring of image
    offset_x = width - min_dim
    offset_y = height - min_dim

    # Square image
    im_squared = im.crop((offset_x / 2., offset_y / 2.,
                          offset_x / 2. + min_dim, offset_y / 2. + min_dim))

    # Resize image
    im_resized = im_squared.resize((224, 224), resample=1)

    # Put image into numpy array
    img = np.array(im_resized) / 255

    # Extract features using Mobilenet
    print('Feature Extraction.')
    module_url = 'https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4'
    m = tf.keras.Sequential([hub.KerasLayer(module_url, trainable=False)])
    m.build([None, 224, 224, 3])
    nn_features = m.predict(img[None, ...])

    # Compute class predictino probabilities
    print('Plotting report.')
    sigm = model_nn['model'].decision_function(nn_features).squeeze()
    probability = np.exp(sigm) / np.sum(np.exp(sigm)) * 100

    # Get class names
    class_names = metainfo['class_names']

    # Get predicted and true label of image
    predicted_idx = np.argmax(probability)
    predicted_prob = probability[predicted_idx]
    predicted_label = class_names[predicted_idx]

    # Plot overview figure
    fig = plt.figure(figsize=(13, 6))
    gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1])

    # Plot prediction probabilities
    ax = plt.subplot(gs[0])
    plt.title('Prediction Probability')
    y_pos = np.arange(len(probability))
    plt.barh(y_pos, probability, color='#BFBFBF')

    # Set y-label text
    y_label_text = [
        '{}: {:5.1f}%'.format(e, probability[i])
        for i, e in enumerate(class_names)
    ]
    ax.set_yticks(y_pos)
    ax.set_yticklabels(y_label_text)
    ylim = list(plt.ylim())
    plt.vlines(1 / len(probability) * 100, *ylim, linestyles=':', linewidth=2)
    plt.ylim(ylim)

    # Plot image
    ax = plt.subplot(gs[1])
    plt.title('Image')
    plt.imshow(img)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])

    # Add information text to image
    info_txt = '\nThis is to {:.02f}% a {}!'.format(predicted_prob,
                                                    predicted_label)
    plt.xlabel(info_txt, fontdict={'size': 21})

    plt.tight_layout()
    plt.show()
Exemplo n.º 58
0
    eafn = len(x['eaf'])
    ismn = len(x['ism'])
    isfn = len(x['isf'])

    objects = ('Greater European Male', 'Greater European Female',
               'Jewish Male', 'Jewish Female', 'Greater African Male',
               'Greater African Female', 'Hispanic Male', 'Hispanic Female',
               'East Asian Male', 'East Asian Female',
               'Indian Subcontinent Male', 'Indian Subcontinent Female')
    y_pos = np.arange(len(objects))
    avgSalary = [gem, gef, jm, jf, gam, gaf, hm, hf, eam, eaf, ism, isf]
    num = [gemn, gefn, jmn, jfn, gamn, gafn, hmn, hfn, eamn, eafn, ismn, isfn]
    print(avgSalary)
    print(num)

    plt.barh(y_pos, avgSalary, align='center', alpha=0.5)
    plt.yticks(y_pos, objects)
    plt.xlabel('Average Yearly Salary')
    plt.ylabel('Race and Gender')
    plt.title('Average Salary by Race and Gender')

    plt.show()

with open('finalData/all_data.csv', 'r') as read_file:
    csv_reader = reader(read_file)
    count = 0
    x = {'male': [], 'female': []}
    for line in csv_reader:
        if count != 0:
            salary = float(line[1])
            if line[0] == 'male':
Exemplo n.º 59
0
plt.ylabel('True label')

# graph plot lr
fig = plt.figure(4)
ax = fig.add_subplot()
cax = ax.matshow(cf_matrix_lr)
plt.title('Confusion matrix of SubTask B (LR)')
fig.colorbar(cax)
ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
plt.xlabel('Predicted label')
plt.ylabel('True label')

classifier_list = ['SVM', 'DT', 'KNN', 'LR']
accuracy_list = [accuracy_svm, accuracy_dt, accuracy_knn, accuracy_lr]

fig = plt.figure(5)
ax = fig.add_subplot()
plt.title("Accuracy Comparison for SubTask B")
plt.ylabel('Classifier')
plt.xlabel('Accuracy (%)')
plt.barh(classifier_list, accuracy_list)
for i, j in enumerate(accuracy_list):
    ax.text(j + 1, i + .10, str(j), color='blue')
plt.show()

file = open('classifier_dump_b', 'wb')
file1 = open('accuracy_list_dump_b', 'wb')
pl.dump(classifier_list, file)
pl.dump(accuracy_list, file1)
Exemplo n.º 60
0
        importances[indices],
        color=random_colors,
        align="center")
plt.xticks(range(X.shape[1]), header1, rotation=90)
plt.xlim([-1, X.shape[1]])
plt.show()

#####################################################
#	                                                 #
#                  Models Accuracy                  #
#	                                                 #
#####################################################

from collections import OrderedDict
model_accuracy = OrderedDict(sorted(model_accuracy.items(),
                                    key=lambda t: t[1]))

plt.figure()
plt.gcf().subplots_adjust(left=0.22)
plt.title("Models Accuracy")
plt.barh(range(len(model_accuracy)),
         model_accuracy.values(),
         align='center',
         color='#009688')
plt.yticks(range(len(model_accuracy)), model_accuracy.keys(), rotation=0)
axes = plt.gca()
axes.set_xlim([0.8, 1.0])
plt.xlabel("Accuracy")
plt.ylabel('Classifier')
plt.show()