Exemplo n.º 1
0
class FractionTaxaBarStack(Graph):
    """Comparing all fractions across all pools in a barstack"""
    short_name = 'fraction_taxa_barstack'

    def plot(self):
        self.frame = OrderedDict((('%s - %s' % (p,f), getattr(p.fractions, f).rdp.phyla)
                     for f in ('low', 'med', 'big') for p in self.parent.pools))
        self.frame = pandas.DataFrame(self.frame)
        self.frame = self.frame.fillna(0)
        self.frame = self.frame.transpose()
        self.frame = self.frame.apply(lambda x: 100*x/x.sum(), axis=1)
        # Sort the table by sum #
        sums = self.frame.sum()
        sums.sort(ascending=False)
        self.frame = self.frame.reindex_axis(sums.keys(), axis=1)
        # Plot #
        fig = pyplot.figure()
        axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors)
        fig = pyplot.gcf()
        # Other #
        axes.set_title('Species relative abundances per fraction per pool')
        axes.set_ylabel('Relative abundances in percent')
        axes.xaxis.grid(False)
        axes.yaxis.grid(False)
        axes.set_ylim([0,100])
        # Put a legend below current axis
        axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.20), fancybox=True, shadow=True, ncol=5)
        # Save it #
        self.save_plot(fig, axes, width=24.0, height=14.0, bottom=0.30, top=0.97, left=0.04, right=0.98)
        self.frame.to_csv(self.csv_path)
        pyplot.close(fig)
Exemplo n.º 2
0
def perf_stats_bootstrap(returns, factor_returns=None, return_stats=True,
                         **kwargs):
    """Calculates various bootstrapped performance metrics of a strategy.

    Parameters
    ----------
    returns : pd.Series
        Daily returns of the strategy, noncumulative.
         - See full explanation in tears.create_full_tear_sheet.
    factor_returns : pd.Series, optional
        Daily noncumulative returns of the benchmark factor to which betas are
        computed. Usually a benchmark such as market returns.
         - This is in the same style as returns.
         - If None, do not compute alpha, beta, and information ratio.
    return_stats : boolean (optional)
        If True, returns a DataFrame of mean, median, 5 and 95 percentiles
        for each perf metric.
        If False, returns a DataFrame with the bootstrap samples for
        each perf metric.

    Returns
    -------
    pd.DataFrame
        if return_stats is True:
        - Distributional statistics of bootstrapped sampling
        distribution of performance metrics.
        if return_stats is False:
        - Bootstrap samples for each performance metric.
    """

    bootstrap_values = OrderedDict()

    for stat_func in SIMPLE_STAT_FUNCS:
        stat_name = STAT_FUNC_NAMES[stat_func.__name__]
        bootstrap_values[stat_name] = calc_bootstrap(stat_func,
                                                     returns)

    if factor_returns is not None:
        for stat_func in FACTOR_STAT_FUNCS:
            stat_name = STAT_FUNC_NAMES[stat_func.__name__]
            bootstrap_values[stat_name] = calc_bootstrap(
                stat_func,
                returns,
                factor_returns=factor_returns)

    bootstrap_values = pd.DataFrame(bootstrap_values)

    if return_stats:
        stats = bootstrap_values.apply(calc_distribution_stats)
        return stats.T[['mean', 'median', '5%', '95%']]
    else:
        return bootstrap_values
Exemplo n.º 3
0
def perf_stats_bootstrap(returns,
                         factor_returns=None,
                         return_stats=True,
                         **kwargs):
    """Calculates various bootstrapped performance metrics of a strategy.

    Parameters
    ----------
    returns : pd.Series
        Daily returns of the strategy, noncumulative.
         - See full explanation in tears.create_full_tear_sheet.
    factor_returns : pd.Series, optional
        Daily noncumulative returns of the benchmark factor to which betas are
        computed. Usually a benchmark such as market returns.
         - This is in the same style as returns.
         - If None, do not compute alpha, beta, and information ratio.
    return_stats : boolean (optional)
        If True, returns a DataFrame of mean, median, 5 and 95 percentiles
        for each perf metric.
        If False, returns a DataFrame with the bootstrap samples for
        each perf metric.

    Returns
    -------
    pd.DataFrame
        if return_stats is True:
        - Distributional statistics of bootstrapped sampling
        distribution of performance metrics.
        if return_stats is False:
        - Bootstrap samples for each performance metric.
    """

    bootstrap_values = OrderedDict()

    for stat_func in SIMPLE_STAT_FUNCS:
        stat_name = STAT_FUNC_NAMES[stat_func.__name__]
        bootstrap_values[stat_name] = calc_bootstrap(stat_func, returns)

    if factor_returns is not None:
        for stat_func in FACTOR_STAT_FUNCS:
            stat_name = STAT_FUNC_NAMES[stat_func.__name__]
            bootstrap_values[stat_name] = calc_bootstrap(
                stat_func, returns, factor_returns=factor_returns)

    bootstrap_values = pd.DataFrame(bootstrap_values)

    if return_stats:
        stats = bootstrap_values.apply(calc_distribution_stats)
        return stats.T[['mean', 'median', '5%', '95%']]
    else:
        return bootstrap_values
Exemplo n.º 4
0
class FractionTaxaBarStack(Graph):
    """Comparing all fractions across all pools in a barstack"""
    short_name = 'fraction_taxa_barstack'

    def plot(self):
        self.frame = OrderedDict(
            (('%s - %s' % (p, f), getattr(p.fractions, f).rdp.phyla)
             for f in ('low', 'med', 'big') for p in self.parent.pools))
        self.frame = pandas.DataFrame(self.frame)
        self.frame = self.frame.fillna(0)
        self.frame = self.frame.transpose()
        self.frame = self.frame.apply(lambda x: 100 * x / x.sum(), axis=1)
        # Sort the table by sum #
        sums = self.frame.sum()
        sums.sort(ascending=False)
        self.frame = self.frame.reindex_axis(sums.keys(), axis=1)
        # Plot #
        fig = pyplot.figure()
        axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors)
        fig = pyplot.gcf()
        # Other #
        axes.set_title('Species relative abundances per fraction per pool')
        axes.set_ylabel('Relative abundances in percent')
        axes.xaxis.grid(False)
        axes.yaxis.grid(False)
        axes.set_ylim([0, 100])
        # Put a legend below current axis
        axes.legend(loc='upper center',
                    bbox_to_anchor=(0.5, -0.20),
                    fancybox=True,
                    shadow=True,
                    ncol=5)
        # Save it #
        self.save_plot(fig,
                       axes,
                       width=24.0,
                       height=14.0,
                       bottom=0.30,
                       top=0.97,
                       left=0.04,
                       right=0.98)
        self.frame.to_csv(self.csv_path)
        pyplot.close(fig)
Exemplo n.º 5
0
class FractionTaxaBarStack(Graph):
    short_name = 'fraction_taxa_barstack'
    bottom = 0.4
    top = 0.95
    left = 0.1
    right = 0.95
    formats = ('pdf', 'eps')

    def plot(self):
        # Make Frame #
        self.frame = OrderedDict(
            (('%s - %s' % (p, f), getattr(p.fractions, f).rdp.phyla)
             for f in ('low', 'med', 'big') for p in self.parent.pools))
        self.frame = pandas.DataFrame(self.frame)
        self.frame = self.frame.fillna(0)
        # Rename #
        new_names = {
            u"run001-pool01 - low": "2-step PCR low",
            u"run001-pool02 - low": "2-step PCR low",
            u"run001-pool03 - low": "2-step PCR low",
            u"run001-pool04 - low": "1-step PCR low",
            u"run002-pool01 - low": "New chem low",
            u"run001-pool01 - med": "2-step PCR med",
            u"run001-pool02 - med": "2-step PCR med",
            u"run001-pool03 - med": "2-step PCR med",
            u"run001-pool04 - med": "1-step PCR med",
            u"run002-pool01 - med": "New chem med",
            u"run001-pool01 - big": "2-step PCR high",
            u"run001-pool02 - big": "2-step PCR high",
            u"run001-pool03 - big": "2-step PCR high",
            u"run001-pool04 - big": "1-step PCR high",
            u"run002-pool01 - big": "New chem high",
        }
        self.frame.rename(columns=new_names, inplace=True)
        self.frame = self.frame.transpose()
        # Group low abundant into 'others' #
        low_abundance = self.frame.sum() < 30000
        other_count = self.frame.loc[:, low_abundance].sum(axis=1)
        self.frame = self.frame.loc[:, ~low_abundance]
        self.frame['Others'] = other_count
        # Normalize #
        self.frame = self.frame.apply(lambda x: 100 * x / x.sum(), axis=1)
        # Sort the table by sum #
        sums = self.frame.sum()
        sums.sort(ascending=False)
        self.frame = self.frame.reindex_axis(sums.keys(), axis=1)
        # Plot #
        fig = pyplot.figure()
        axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors)
        fig = pyplot.gcf()
        # Other #
        axes.set_ylabel('Relative abundances in percent')
        axes.xaxis.grid(False)
        axes.yaxis.grid(False)
        axes.set_ylim([0, 100])
        # Put a legend below current axis
        axes.legend(loc='upper center',
                    bbox_to_anchor=(0.5, -0.40),
                    fancybox=True,
                    shadow=True,
                    ncol=5,
                    prop={'size': 10})
        # Font size #
        axes.tick_params(axis='x', which='major', labelsize=11)
        # Save it #
        self.save_plot(fig, axes)
        self.frame.to_csv(self.csv_path)
        pyplot.close(fig)
Exemplo n.º 6
0
class FractionTaxaBarStack(Graph):
    """This is figure 3 of the paper"""

    short_name = 'fraction_taxa_barstack'
    bottom = 0.4
    top = 0.95
    left = 0.1
    right = 0.95
    formats = ('pdf', 'eps')

    def plot(self):
        # Make Frame #
        self.frame = OrderedDict((('%s - %s' % (p,f), getattr(p.fractions, f).rdp.phyla)
                     for f in ('low', 'med', 'big') for p in self.parent.pools))
        self.frame = pandas.DataFrame(self.frame)
        self.frame = self.frame.fillna(0)
        # Rename #
        new_names = {
            u"run001-pool01 - low": "2-step PCR low",
            u"run001-pool02 - low": "2-step PCR low",
            u"run001-pool03 - low": "2-step PCR low",
            u"run001-pool04 - low": "1-step PCR low",
            u"run002-pool01 - low": "New chem low",
            u"run001-pool01 - med": "2-step PCR med",
            u"run001-pool02 - med": "2-step PCR med",
            u"run001-pool03 - med": "2-step PCR med",
            u"run001-pool04 - med": "1-step PCR med",
            u"run002-pool01 - med": "New chem med",
            u"run001-pool01 - big": "2-step PCR high",
            u"run001-pool02 - big": "2-step PCR high",
            u"run001-pool03 - big": "2-step PCR high",
            u"run001-pool04 - big": "1-step PCR high",
            u"run002-pool01 - big": "New chem high",
        }
        self.frame.rename(columns=new_names, inplace=True)
        self.frame = self.frame.transpose()
        # Group low abundant into 'others' #
        low_abundance = self.frame.sum() < 30000
        other_count = self.frame.loc[:, low_abundance].sum(axis=1)
        self.frame = self.frame.loc[:, ~low_abundance]
        self.frame['Others'] = other_count
        # Normalize #
        self.frame = self.frame.apply(lambda x: 100*x/x.sum(), axis=1)
        # Sort the table by sum #
        sums = self.frame.sum()
        sums.sort(ascending=False)
        self.frame = self.frame.reindex_axis(sums.keys(), axis=1)
        # Plot #
        fig = pyplot.figure()
        axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors)
        fig = pyplot.gcf()
        # Other #
        axes.set_ylabel('Relative abundances in percent')
        axes.xaxis.grid(False)
        axes.yaxis.grid(False)
        axes.set_ylim([0,100])
        # Put a legend below current axis
        axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.40), fancybox=True, shadow=True, ncol=5, prop={'size':10})
        # Font size #
        axes.tick_params(axis='x', which='major', labelsize=11)
        # Save it #
        self.save_plot(fig, axes)
        self.frame.to_csv(self.csv_path)
        pyplot.close(fig)