Пример #1
0
def get_baseline_stats(keys):
    '''
    get raw baseline values of multiple parameters.

    :param keys: list
    parameters to be included.

    :return:
    '''

    # list of raw data sets (for inspection)
    raw_sets = []

    # lists to store statistics from every data_set
    data_sets_means = []
    data_sets_err = []
    significance_list = []
    for key in keys:

        # get the data
        raw = raw_data.get_raw_data(key)['c']

        # exclude week days
        week_ends = [day for day in raw.index if day in [0, 7, 14, 21, 28]]
        raw = raw.loc[week_ends]

        # transpose, stack, drop irrelevant levels
        transposed = raw.T
        stacked = transposed.stack()
        raw = stacked.droplevel(['replicate', 'days'])

        # rename resulting Series
        raw.name = key

        # append to raw_sets list
        raw_sets.append(raw)

        # compute statistics
        group_by_soil = raw.groupby(raw.index)
        means = group_by_soil.mean()
        std_err = group_by_soil.sem()

        # compute significance and rename
        booleans = significance.get_significance_booleans(raw)
        annotations = significance.annotate(booleans)
        annotations.name = key

        # append statistics to lists
        data_sets_means.append(means)
        data_sets_err.append(std_err)
        significance_list.append(annotations)

    # combine stats from all data sets into Dataframe
    baseline_means = pandas.concat(data_sets_means, axis=1)
    baseline_std_errors = pandas.concat(data_sets_err, axis=1)
    baseline_significance = pandas.concat(significance_list, axis=1)

    return Stats(means=baseline_means,stde=baseline_std_errors,),\
           baseline_significance,\
           raw_sets
Пример #2
0
def stack_data(raw_data, normalize_by=None, treatment: str = None):
    '''
    stack raw data .

    parameters
    ----------

    raw_data: str or DataFrame
        if a string is given it will be used as an argument for get_raw_data()..

    normalize_by: function, optional
        how to normalize the data.

    treatment: str, optional
        can be either "t" (use data from treated samples) or "c" (use data from control samples)

    returns
    -------
    stacked_data_set: Series
        Series.index are the group id's for regression
        and Series.values are the results being regressed.
        Series.name is the name of the data set.
        '''

    # data
    data = get_raw_data(raw_data) if type(raw_data) == str else raw_data
    data = (normalize_by(data)
            if normalize_by else data[treatment] if treatment else data)

    # stack
    columns_level_names = data.columns.names
    stacked_data_set = data.stack(columns_level_names)

    return stacked_data_set
def combine_raw_data(data_set_name, ):
    '''
    combine raw_data from all LTTs.

    :param data_set_name: str
    which data set to use.

    :param treatment: str
    't' for treated samples, 'c' for control samples

    :return: combined_raw_data: Series
    index is sampling event.
    '''
    def drop_outliers(data, drop_index):
        is_respiration_data = data.name == 'Resp'

        if is_respiration_data:
            data.drop(drop_index, inplace=True)

    # get raw data
    combined_raw = get_raw_data(data_set_name)['t']

    # stack and drop 'soil' and 'replicate' levels
    columns_levels = combined_raw.columns.names
    combined_raw = combined_raw.stack(columns_levels).droplevel(columns_levels)

    # name raw data
    combined_raw.name = data_set_name

    # # drop outlying data
    # indices_to_drop = [0.17, 7, 7.17]
    # drop_outliers(combined_raw, indices_to_drop)

    # # insert initial control values for respiration data of treated samples
    # if data_set_name == 'Resp' and treatment == 't':
    #     # get data
    #     control_raw = get_raw_data(data_set_name)['c']
    #     # first sampling raw data
    #     first_row = control_raw.iloc[0]
    #     # drop all index levels
    #     first_row.reset_index(drop=True)
    #     # set all index labels to 0
    #     new_index = [0 for i in first_row.index]
    #     first_row.index = new_index
    #     # insert control first sampling data as the first value of treated samples data
    #     raw = first_row.append(raw)

    return combined_raw
def get_carbon_efficiency(treatment):

    wknds = [0, 7, 14, 21]

    # raw data
    raw_mbc = get_raw_data('MBC')[treatment]
    raw_mbc = raw_mbc.loc[wknds]  # start-finish of first 3 weeks

    # get stats
    mbc_stats = get_stats(raw_mbc)
    mbc_means = mbc_stats.means
    mbc_errors = mbc_stats.stde

    # weekly change
    weekly_mbc_change = mbc_means.diff()
    weekly_mbc_change = weekly_mbc_change.shift(-1).drop(21)

    # associated errors for weekly change
    errors_squared = mbc_errors**2
    add_errors = errors_squared.add(errors_squared.shift(1))
    square_root = add_errors**0.5
    error_mbc_change = square_root.shift(-1).drop(21)

    # weekly respiration stats
    weekly_respiration_stats = get_weekly_respiration(treatment)
    weekly_respiration = weekly_respiration_stats.means
    repiration_error = weekly_respiration_stats.stde

    # impose same index for MBC and Respiration data
    index = weekly_respiration.index
    weekly_mbc_change.index = index
    error_mbc_change.index = index

    # assimilation-to-consumption ratio (CUE)
    CUE = weekly_mbc_change / (weekly_respiration + weekly_mbc_change)

    # error propogation
    growth_relative_err = error_mbc_change / weekly_mbc_change
    respiration_relative_err = repiration_error / weekly_respiration
    CUE_error = propagate_error(CUE, respiration_relative_err,
                                growth_relative_err)

    return Stats(
        means=CUE,
        stde=CUE_error,
    )
def get_raw_ltt(
    data_set_name,
    ltt,
):
    '''
    get treated samples raw data, for a given LTT.

    :param data_set_name:
    :param ltt:
    :return:
    '''

    raw_data = get_raw_data(data_set_name)['t']  # treated samples
    raw_ltt = raw_data[ltt]

    # stack and drop 'replicate' level
    stacked = raw_ltt.stack()
    droped_n_stacked = stacked.droplevel('replicate')
    raw_ltt = droped_n_stacked

    raw_ltt.name = data_set_name

    return raw_ltt
Пример #6
0
    # drop irrelevant days
    t = drop_days(t, days_to_drop) if days_to_drop else t
    data = data.loc[t]

    # dependent variable abd weights
    y = data['mean'].values
    error = data['error'].values

    # intialize model and perform fit
    fit = model.fit(y, t=t, weights=error, nan_policy='omit')

    return fit


if __name__ == '__main__':

    data_set_name = 'MBC'
    soil = 'MIN'
    raw = get_raw_data(data_set_name)
    raw = baseline_normalize(raw)[soil]
    mean = raw.T.mean()
    sem = raw.T.sem()
    mean[0] = 0
    sem[0] = 0
    mean.name = 'mean'
    sem.name = 'error'
    data = pandas.concat([mean, sem], axis=1)

    data.dropna(how='any', inplace=True)
    fit = fit(data, weekly_growth_decay)
Пример #7
0
# todo compute stde

from pandas import DataFrame, MultiIndex

from data.raw_data import get_raw_data
from data.stats import get_stats
from data.helpers import *

SOILS = Constants.LTTs

#raw data
RAW_DATA = get_raw_data('Resp')

# limits of time intervals between samplings
timepoints = RAW_DATA.index.values
n_intervals = len(timepoints) - 1
INTERVALS_LIST = [
    [timepoints[i], timepoints[i + 1]] for i in range(n_intervals)
]  # a list of intervals start and end (i.e [0, 2] for the interval between incubation start and 2 h)
intervals_arrayed = numpy.asarray(INTERVALS_LIST)
intervals = intervals_arrayed.T  # array.shape-->(2, len(SAMPLING_TIMEPOINTS))
BEGININGS = intervals[0]
ENDINGS = intervals[1]
SAMPLING_INTERVALS = ENDINGS - BEGININGS


def get_mean_rates(treatment):
    '''
    get average rate between every two consecutive sampling points.

    :param treatment: str