Exemplo n.º 1
0
 def length_dist(self, pat_out="genes_lengths.png"):
     '''Gets a list of sequence lengths, creates a dataframe and plots it using ggplot.
     Then saves the file in specified path.'''
     len_ditribution = [len(i) for i in self.num]
     df = pd.DataFrame({"record_length": np.array(len_ditribution)})
     pl = ggplot(df, aes(x="record_length")) + geom_density()
     pl.save(pat_out)
Exemplo n.º 2
0
    def _post_density_plot(self, func=None, x_name='', plot_title='', include_doses=None, boot_samps=1000):

        from ggplot import aes, ggplot, geom_density, ggtitle
        import pandas as pd

        if include_doses is None:
            include_doses = range(1, self.num_doses + 1)

        def my_func(x, samp):
            tox_probs = _pi_T(x, mu=samp[:, 0], beta=samp[:, 1])
            eff_probs = _pi_E(x, mu=samp[:, 2], beta1=samp[:, 3], beta2=samp[:, 4])
            u = self.metric(eff_probs, tox_probs)
            return u
        if func is None:
            func = my_func

        x_boot = []
        dose_indices = []
        samp = self.pds._samp
        p = self.pds._probs
        p /= p.sum()
        for i, x in enumerate(self.scaled_doses()):
            dose_index = i+1
            if dose_index in include_doses:
                x = func(x, samp)
                x_boot.extend(np.random.choice(x, size=boot_samps, replace=True, p=p))
                dose_indices.extend(np.repeat(dose_index, boot_samps))
        df = pd.DataFrame({x_name: x_boot, 'Dose': dose_indices})
        return ggplot(aes(x=x_name, fill='Dose'), data=df) + geom_density(alpha=0.6) + ggtitle(plot_title)
Exemplo n.º 3
0
    def density_plot(by='dpsi_zscore', categorical=True):

        if categorical:
            data_dict = {
                'muts increasing AAA':
                np.array([x[by] for x in variants['increase']]),
                'muts decreasing AAA':
                np.array([x[by] for x in variants['decrease']]),
                'muts not changing AAA length':
                np.array([x[by] for x in variants['constant']])
            }
        else:
            data_dict = OrderedDict(
                (change,
                 np.array(
                     [x[by] for x in variants['all']
                      if x['change'] == change])) for change in aaa_changes if
                len([x[by]
                     for x in variants['all'] if x['change'] == change]) > 1)

        plot = (
            ggplot(aes(x='value', colour='variable', fill='variable'),
                   data=prepare_data_frame(data_dict)) +
            ggtitle('Impact of variants affecting poly AAA sequences on %s' %
                    by) + xlab(by) + ylab('Kernel density estimate') +
            geom_density(alpha=0.6))

        return plot
Exemplo n.º 4
0
    def density_chart(self, conn, column, table_chosen, title):

        data_df = dfile.single_selector(conn=conn,
                                        table=table_chosen,
                                        column=column)

        density_plot = ggplot(
            aes(x=column),
            data=data_df) + geom_density() + theme_gray() + labs(title=title)
        now = datetime.datetime.now()
        b = now
        print(b)
        print(b - a)
        print(density_plot)
Exemplo n.º 5
0
    def build_list_of_plots(self, var):

        plots = []

        for tbl in self.tables:
            df_from_original_table_on_variable = pd.DataFrame(
                self.get_variable_data_from_original_table(tbl, var))
            df_from_intersection_on_variable = pd.DataFrame(
                self.get_variable_data_from_intersection_table(var))

            df_from_intersection_on_variable.append(
                df_from_intersection_on_variable)
            g = ggplot(df_from_intersection_on_variable,
                       aes(x='var', color='table')) + geom_density()
            g.make()
Exemplo n.º 6
0
def render(data, bin_width, plot_density=False):
    if plot_density:
        plot = ggplot.ggplot(data, ggplot.aes(x='datetime', color='conversationWithName')) \
               + ggplot.geom_density() \
               + ggplot.scale_x_date(labels='%b %Y') \
               + ggplot.ggtitle('Conversation Densities') \
               + ggplot.ylab('Density') \
               + ggplot.xlab('Date')
    else:
        plot = ggplot.ggplot(data, ggplot.aes(x='datetime', fill='conversationWithName')) \
               + ggplot.geom_histogram(alpha=0.6, binwidth=bin_width) \
               + ggplot.scale_x_date(labels='%b %Y', breaks='6 months') \
               + ggplot.ggtitle('Message Breakdown') \
               + ggplot.ylab('Number of Messages') \
               + ggplot.xlab('Date')

    print(plot)
Exemplo n.º 7
0
from ggplot import aes, diamonds, geom_density, ggplot
import matplotlib.pyplot as plt

from bokeh import mpl
from bokeh.plotting import output_file, show

g = ggplot(diamonds, aes(x='price', color='cut')) + geom_density()
g.draw()

plt.title("Density ggplot-based plot in Bokeh.")

output_file("density.html", title="density.py example")

show(mpl.to_bokeh())
Exemplo n.º 8
0
print(subjects_words_count.describe())

#%%
import ggplot as gg

df = pd.DataFrame(subjects_words_count, columns = ["count"])

hist =  gg.ggplot(df, gg.aes(x = "count"))
hist += gg.xlab("# of words") +\
        gg.ylab("Frequency") +\
        gg.ggtitle("Frequency of words")

hist += gg.geom_vline(x = df.mean(), color="red")
hist += gg.geom_vline(x = df.median(), color="blue")
hist += gg.geom_density(color="green")
hist += gg.geom_histogram(binwidth=1, color="grey")

hist

#%%

# 1st attemtp to classify subjects per tag

X_raw_train = subjects_train
X_raw_test = subjects_test

Y_train = raw_data_train.target
Y_test = raw_data_test.target

target_names = raw_data_train.target_names
Exemplo n.º 9
0
wordfreq = {key: 0 for key in worduniq}
wordscore = {key: 0 for key in worduniq}
wordprobs = {key: 0 for key in worduniq}

for w in wordbag:
    wordfreq[w] += 1
for i in range(nrow):
    for j in words[i]:
        wordscore[j] += outcomes[i]
for i in wordprobs:
    wordprobs[i] = float(wordscore[i]) / wordfreq[i]

b1 = pandas.Series([np.mean([wordprobs[w] for w in l]) if len(l) else 0 for l in words])
d1 = pandas.concat((b1, outcomes), axis=1)
d1.columns = ['prob', 'out']
g1 = ggplot(d1, aes(x='prob', color='out')) + geom_density()

cutoff = 0.3

bpreds = b1 > cutoff
print(float(sum(bpreds == outcomes)) / nrow)  # 0.922524752475

# classify test data
words = test[textVar].apply(lambda x: prepare_document(x)).tolist()
b2 = [np.mean([wordprobs.get(w) if w in wordprobs else 0 for w in l]) if len(l) else 0 for l in words]
with open('submission.csv', 'wb') as f:
    f.write(idVar + ',' + outVar + '\n')
    for i in range(len(b2)):
        row = test[idVar][i] + ',' + ('1' if b2[i] > cutoff else '0')
        print(row)
        f.write(row + '\n')
Exemplo n.º 10
0
    "rain", "ENTRIESn_hourly", "EXITSn_hourly"
]]
turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining",
                                   "not raining")
turnstile_rain.groupby("rain2").describe()

turnstile_rain = turnstile_weather[[
    "rain", "ENTRIESn_hourly", "EXITSn_hourly"
]]
turnstile_rain["ENTRIESn_hourly_log10"] = np.log10(
    turnstile_rain["ENTRIESn_hourly"] + 1)
turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining",
                                   "not raining")
set1 = brewer2mpl.get_map('Set1', 'qualitative', 3).mpl_colors
plot = gg.ggplot(turnstile_rain, gg.aes(x="ENTRIESn_hourly_log10", color="rain2")) + \
       gg.geom_density() + \
       gg.facet_wrap("rain2", scales="fixed") + \
       gg.scale_colour_manual(values=set1) + \
       gg.xlab("log10(entries per hour)") + \
       gg.ylab("Number of turnstiles") + \
       gg.ggtitle("Entries per hour whilst raining and not raining")
plot

np.random.seed(42)
data = pd.Series(np.random.normal(loc=180, scale=40, size=600))
data.hist()

p = turnstile_weather["ENTRIESn_hourly"].hist()
pylab.suptitle("Entries per hour across all stations")
pylab.xlabel("Entries per hour")
pylab.ylabel("Number of occurrences")
Exemplo n.º 11
0
 def density(self, inp1, inp2, inp3):
     return gg.ggplot(self.data, gg.aes(x=inp1, color=inp2, fill=inp2)) +\
          gg.geom_density(alpha=0.5, size=5) +\
          gg.facet_grid(inp3) +\
          gg.ggtitle('Density of Fare by Sex and Survival Status') +\
          gg.ylab('Survival Status')
ax.set_title("Total entries/exits per hour by hour across all stations")
ax.legend(["Entries", "Exits"])
ax.set_ylabel("Entries/exits per hour (1e6 is a million)")
ax.set_xlabel("Hour (0 is midnight, 12 is noon, 23 is 11pm)")
ax.set_xlim(0, 23)

turnstile_rain = turnstile_weather[["rain", "ENTRIESn_hourly", "EXITSn_hourly"]]
turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining", "not raining")
turnstile_rain.groupby("rain2").describe()

turnstile_rain = turnstile_weather[["rain", "ENTRIESn_hourly", "EXITSn_hourly"]]
turnstile_rain["ENTRIESn_hourly_log10"] = np.log10(turnstile_rain["ENTRIESn_hourly"] + 1)
turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining", "not raining")
set1 = brewer2mpl.get_map('Set1', 'qualitative', 3).mpl_colors
plot = gg.ggplot(turnstile_rain, gg.aes(x="ENTRIESn_hourly_log10", color="rain2")) + \
       gg.geom_density() + \
       gg.facet_wrap("rain2", scales="fixed") + \
       gg.scale_colour_manual(values=set1) + \
       gg.xlab("log10(entries per hour)") + \
       gg.ylab("Number of turnstiles") + \
       gg.ggtitle("Entries per hour whilst raining and not raining")
plot

np.random.seed(42)
data = pd.Series(np.random.normal(loc=180, scale=40, size=600))
data.hist()

p = turnstile_weather["ENTRIESn_hourly"].hist()
pylab.suptitle("Entries per hour across all stations")
pylab.xlabel("Entries per hour")
pylab.ylabel("Number of occurrences")