def default(self, link, name, kwargs): """ Adds a file meta dependent aggregation to a Stack. Checks the Link definition against the file meta and produces either a numerical or categorical summary tabulation including marginal the results. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. """ view = View(link, name, kwargs) pos, relation, rel_to, weights, text = view.get_std_params() meta = link.get_meta() categorical = ['single', 'delimited set'] numeric = ['int', 'float'] string = ['string'] categorizable = categorical + numeric x_type, y_type, transpose = self._get_method_types(link) q = qp.Quantity(link, weight=weights) if q.type == 'array' and not q.y == '@': pass else: if link.y == '@': if x_type in categorical or x_type == 'array': view_df = q.count().result elif x_type in numeric: view_df = q.summarize().result view_df.drop((link.x, 'All'), axis=0, inplace=True) elif x_type in string: view_df = tools.view.agg.make_default_str_view(data, x=link.x) elif link.x == '@': if y_type in categorical: view_df = q.count().result elif y_type in numeric: view_df = q.summarize().result view_df.drop((link.y, 'All'), axis=1, inplace=True) else: if x_type in categorical and y_type in categorizable: view_df = q.count().result elif x_type in numeric and y_type in categorizable: view_df = q.summarize().result view_df.drop((link.x, 'All'), axis=0, inplace=True) view_df.drop((link.y, 'All'), axis=1, inplace=True) notation = view.notation('default', ':') view.dataframe = view_df view._notation = notation link[notation] = view
def default(self, link, name, kwargs): """ Adds a file meta dependent aggregation to a Stack. Checks the Link definition against the file meta and produces either a numerical or categorical summary tabulation including marginal the results. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. """ view = View(link, kwargs) pos, relation, rel_to, weights, text = view.std_params() meta = link.get_meta() categorical = ['single', 'delimited set'] numeric = ['int', 'float'] string = ['string'] categorizable = categorical + numeric x_type, y_type, transpose = self._get_method_types(link) q = qp.Quantity(link, weight=weights) if link.y == '@': if x_type in categorical: view_df = q.count() elif x_type in numeric: view_df = q.describe() elif x_type in string: view_df = tools.view.agg.make_default_str_view(data, x=link.x) elif link.x == '@': if y_type in categorical: view_df = q.count() elif y_type in numeric: view_df = q.describe() else: if x_type in categorical and y_type in categorizable: view_df = q.count() elif x_type in numeric and y_type in categorizable: view_df = q.describe() relation = view.spec_relation() notation = view.notation('default', name, relation) view.dataframe = view_df.result view.name = notation link[notation] = view
def coltests(self, link, name, kwargs): """ Will test appropriate views from a Stack for stat. sig. differences. Tests can be performed on frequency aggregations (generated by ``frequency``) and means (from ``summarize``) and will compare all unique column pair combinations. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific): text : str, optional, default None Sets an optional label in the meta component of the view that is used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). metric : {'props', 'means'}, default 'props' Determines whether a proportion or means test algorithm is performed. test_total : bool, deafult False If True, the each View's y-axis column will be tested against the uncoditional total of its x-axis. mimic : {'Dim', 'askia'}, default 'Dim' It is possible to mimic the test logics used in other statistical software packages by passing them as instructions. The method will then choose the appropriate test parameters. level: {'high', 'mid', 'low'} or float Sets the level of significance to which the test is carried out. Given as str the levels correspond to ``'high'`` = 0.01, ``'mid'`` = 0.05 and ``'low'`` = 0.1. If a float is passed the specified level will be used. flags : list of two int, default None Base thresholds for Dimensions-like tests, e.g. [30, 100]. First int is minimum base for reported results, second int controls small base indication. Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. .. note:: Mimicking the askia software (``mimic`` = ``'askia'``) restricts the values to be one of ``'high'``, ``'low'``, ``'mid'``. Any other value passed will make the algorithm fall back to ``'low'``. Mimicking Dimensions (``mimic`` = ``'Dim'``) can use either the str or float version. """ view = View(link, name, kwargs=kwargs) axis, condition, rel_to, weights, text = view.get_std_params() cache = self._cache = link.get_cache() metric = kwargs.get('metric', 'props') mimic = kwargs.get('mimic', 'Dim') level = kwargs.get('level', 'low') flags = kwargs.get('flag_bases', None) test_total = kwargs.get('test_total', False) stack = link.stack get = 'count' if metric == 'props' else 'mean' views = self._get_view_names(cache, stack, weights, get=get) for in_view in views: try: view = View(link, name, kwargs=kwargs) condition = in_view.split('|')[2] test = qp.Test(link, in_view, test_total) if mimic == 'Dim': test.set_params(level=level, flag_bases=flags) elif mimic == 'askia': test.set_params(testtype='unpooled', level=level, mimic=mimic, use_ebase=False, ovlp_correc=False, cwi_filter=True) view_df = test.run() notation = view.notation( 't.{}.{}.{}{}'.format(metric, mimic, '{:.2f}'.format(test.level)[2:], '+@' if test_total else ''), condition) view.dataframe = view_df view._notation = notation link[notation] = view except: pass
def descriptives(self, link, name, kwargs): """ Adds num. distribution statistics of a Link defintion to the Stack. ``descriptives`` views can apply a range of summary statistics. Measures include statistics of centrality, dispersion and mass. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific) text : str, optional, default None Sets an optional label suffix for the meta component of the view which will be appended to the statistic name and used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). stats : str, default 'mean' The measure to compute. exclude : list of int Codes that will not be considered calculating the result. rescale : dict A mapping of {old code: new code}, e.g.:: { 1: 0, 2: 25, 3: 50, 4: 75, 5: 100 } drop : bool If ``rescale`` provides a new scale defintion, ``drop`` will remove all codes that are not transformed. Acts as a shorthand for manually passing any remaining codes in ``exclude``. Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. """ view = View(link, name, kwargs=kwargs) if not view._x['is_multi'] or kwargs.get('source'): view = View(link, name, kwargs=kwargs) axis, condition, rel_to, weights, text = view.get_std_params() logic, expand, complete, calc, exclude, rescale = view.get_edit_params( ) stat = kwargs.get('stats', 'mean') view._kwargs['calc_only'] = True w = weights if weights is not None else None q = qp.Quantity(link, w) if kwargs.get('source', None): q = self._swap_and_rebase(q, kwargs['source']) if q.type == 'array' and not q.y == '@': pass else: if exclude is not None: q.exclude(exclude, axis=axis) if rescale is not None: drop = kwargs.get('drop', False) q.rescale(rescale, drop) if drop: view._kwargs['exclude'] = q.miss_x condition = view.spec_condition(link) q.summarize(stat=stat, margin=False, as_df=True) if calc: q.calc(calc, result_only=True) method_nota = 'd.' + stat + '.c:f' else: method_nota = 'd.' + stat notation = view.notation(method_nota, condition) view.cbases = q.cbase view.rbases = q.rbase if q.type == 'array': view.dataframe = q.result.T if link.y == '@' else q.result else: view.dataframe = q.result view._notation = notation view.translate_metric(set_value='meta') view._kwargs['exclude'] = q.miss_x link[notation] = view
def frequency(self, link, name, kwargs): """ Adds count-based views on a Link defintion to the Stack object. ``frequency`` is able to compute several aggregates that are based on the count of code values in uni- or bivariate Links. This includes bases / samples sizes, raw or normalized cell frequencies and code summaries like simple and complex nets. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific) text : str, optional, default None Sets an optional label in the meta component of the view that is used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). logic : list of int, list of dicts or core.tools.view.logic operation If a list is passed this instructs a simple net of the codes given as int. Multiple nets can be generated via a list of dicts that map names to lists of ints. For complex logical statements, expression are parsed to identify the qualifying rows in the data. For example:: # simple net 'logic': [1, 2, 3] # multiple nets/code groups 'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}] # code logic 'logic': has_all([1, 2, 3]) calc : TODO calc_only : TODO Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. .. note:: Net codes take into account if a variable is multi-coded. The net will therefore consider qualifying cases and not the raw sum of the frequencies per category, i.e. no multiple counting of cases. """ view = View(link, name, kwargs=kwargs) axis, condition, rel_to, weights, text = view.get_std_params() logic, expand, complete, calc, exclude, rescale = view.get_edit_params( ) # ==================================================================== # this block of kwargs should be removed # parameter overwriting should be done using the template # NOT QP core code! if kwargs.get('combine', False): view._kwargs['expand'], expand = None, None view._kwargs['complete'], complete = False, False if logic is not None: for no, logic_def in enumerate(logic): if 'expand' in logic_def.keys(): logic_def['expand'] = None logic[no] = logic_def view._kwargs['logic'] = logic # ==================================================================== w = weights if weights is not None else None ignore = True if name == 'cbase_gross' else False q = qp.Quantity(link, w, ignore_flags=ignore) if q.type == 'array' and not q.y == '@': pass else: if logic is not None: try: q.group(groups=logic, axis=axis, expand=expand, complete=complete) except NotImplementedError, e: warnings.warn('NotImplementedError: {}'.format(e)) return None q.count(axis=None, as_df=False, margin=False) condition = view.spec_condition(link, q.logical_conditions, expand) else:
def descriptives(self, link, name, kwargs): """ Adds num. distribution statistics of a Link defintion to the Stack. ``descriptives`` views can apply a range of summary statistics. Measures include statistics of centrality, dispersion and mass. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific) text : str, optional, default None Sets an optional label suffix for the meta component of the view which will be appended to the statistic name and used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). exclude : list of int Codes that will not be considered calculating the result. rescale : dict A mapping of {old code: new code}, e.g.:: { 1: 0, 2: 25, 3: 50, 4: 75, 5: 100 } stats : str, default 'mean' The measure to compute. Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. """ view = View(link, kwargs=kwargs) if not view._x['is_multi']: func_name = 'descriptives' func_type = 'distribution statistics' pos, relation, rel_to, weights, text = view.std_params() stat = kwargs.get('stats', 'mean') exclude = view.missing() rescale = view.rescaling() q = qp.Quantity(link, weights) if exclude is not None: q = q.missingfy(exclude, keep_base=False) if rescale is not None: q = q.rescale(rescale) view.fulltext_for_stat(stat) relation = view.spec_relation(link) view_df = q.describe(show=stat, margin=False, as_df=True) notation = view.notation(stat, name, relation) view.cbases = view_df.cbase view.rbases = view_df.rbase view.dataframe = view_df.result view.name = notation link[notation] = view
def coltests(self, link, name, kwargs): """ Will test appropriate views from a Stack for stat. sig. differences. Tests can be performed on frequency aggregations (generated by ``frequency``) and means (from ``descriptives``) and will compare all unique column pair combinations. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific): text : str, optional, default None Sets an optional label in the meta component of the view that is used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). metric : {'props', 'means'}, default 'props' Determines whether a proportion or means test algorithm is performed. mimic : {'Dim', 'askia'}, default 'Dim' It is possible to mimic the test logics used in other statistical software packages by passing them as instructions. The method will then choose the appropriate test parameters. level: {'high', 'mid', 'low'} or float Sets the level of significance to which the test is carried out. Given as str the levels correspond to ``'high'`` = 0.01, ``'mid'`` = 0.05 and ``'low'`` = 0.1. If a float is passed the specified level will be used. Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. .. note:: Mimicking the askia software (``mimic`` = ``'askia'``) restricts the values to be one of ``'high'``, ``'low'``, ``'mid'``. Any other value passed will make the algorithm fall back to ``'low'``. Mimicking Dimensions (``mimic`` = ``'Dim'``) can use either the str or float version. """ func_name = 'coltests' func_type = 'column differences tests' view = View(link, kwargs=kwargs) pos, relation, rel_to, weights, text = view.std_params() metric = kwargs.get('metric', 'props') mimic = kwargs.get('mimic', 'Dim') level = kwargs.get('level', 'low') stack = link.stack get = 'count' if metric == 'props' else 'mean' views = self._get_view_names(stack, weights, get=get) for in_view in views: try: view = View(link, kwargs=kwargs) relation = in_view.split('|')[2] test = qp.Test(link, in_view) if mimic == 'Dim': test.set_params(level=level) elif mimic == 'askia': test.set_params(testtype='unpooled', level=level, mimic=mimic, use_ebase=False, ovlp_correc=False, cwi_filter=True) view_df = test.run() siglevel = test.level notation = tools.view.query.set_fullname( pos, '%s.%s.%s.%s' % ('tests', metric, mimic, "{:.2f}".format(siglevel)[2:]), relation, rel_to, weights, name) view.dataframe = view_df view.name = notation link[notation] = view except: pass
def frequency(self, link, name, kwargs): """ Adds count-based views on a Link defintion to the Stack object. ``frequency`` is able to compute several aggregates that are based on the count of code values in uni- or bivariate Links. This includes bases / samples sizes, raw or normalized cell frequencies and code summaries like simple and complex nets. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific) text : str, optional, default None Sets an optional label in the meta component of the view that is used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). logic : list of int, list of dicts or core.tools.view.logic operation If a list is passed this instructs a simple net of the codes given as int. Multiple nets can be generated via a list of dicts that map names to lists of ints. For complex logical statements, expression are parsed to identify the qualifying rows in the data. For example:: # simple net 'logic': [1, 2, 3] # multiple nets/code groups 'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}] # code logic 'logic': has_all([1, 2, 3]) calc : TODO calc_only : TODO Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. .. note:: Net codes take into account if a variable is multi-coded. The net will therefore consider qualifying cases and not the raw sum of the frequencies per category, i.e. no multiple counting of cases. """ func_name = 'frequency' func_type = 'countbased' view = View(link, kwargs=kwargs) pos, relation, rel_to, weights, text = view.std_params() q = qp.Quantity(link, weights) logic = kwargs.get('logic', None) calc = kwargs.get('calc', None) val_name = None if name in ['ebase', 'cbase', 'rbase']: freq = q.count(name, margin=False, as_df=False) elif name in ['counts', 'c%', 'r%']: freq = q.count('freq', margin=False, as_df=False) elif logic: if isinstance(logic, list): if not isinstance(logic[0], dict): val_name = name if calc: calc_only = kwargs.get('calc_only', False) else: calc_only = False freq = q.combine(logic, op=calc, op_only=calc_only, margin=False, as_df=False) relation = view.spec_relation() else: val_name = name casedata = link.get_data().copy() idx, relation = tools.view.logic.get_logic_index( casedata[link.x], logic, casedata) filtered_q = qp.Quantity(link, weights, idx) freq = filtered_q.combine(margin=False, as_df=False) view.cbases = freq.cbase view.rbases = freq.rbase if rel_to is not None: base = 'col' if rel_to == 'y' else 'row' freq = freq.normalize(base) view_df = freq.to_df(val_name).result notation = view.notation(func_name, name, relation) view.name = notation view.dataframe = view_df link[notation] = view
def frequency(self, link, name, kwargs): """ Adds count-based views on a Link defintion to the Stack object. ``frequency`` is able to compute several aggregates that are based on the count of code values in uni- or bivariate Links. This includes bases / samples sizes, raw or normalized cell frequencies and code summaries like simple and complex nets. Parameters ---------- link : Quantipy Link object. name : str The shortname applied to the view. kwargs : dict Keyword arguments (specific) text : str, optional, default None Sets an optional label in the meta component of the view that is used when the view is passed into a Quantipy build (e.g. Excel, Powerpoint). logic : list of int, list of dicts or core.tools.view.logic operation If a list is passed this instructs a simple net of the codes given as int. Multiple nets can be generated via a list of dicts that map names to lists of ints. For complex logical statements, expression are parsed to identify the qualifying rows in the data. For example:: # simple net 'logic': [1, 2, 3] # multiple nets/code groups 'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}] # code logic 'logic': has_all([1, 2, 3]) calc : TODO calc_only : TODO Returns ------- None Adds requested View to the Stack, storing it under the full view name notation key. .. note:: Net codes take into account if a variable is multi-coded. The net will therefore consider qualifying cases and not the raw sum of the frequencies per category, i.e. no multiple counting of cases. """ view = View(link, name, kwargs=kwargs) axis, condition, rel_to, weights, text = view.get_std_params() logic, expand, complete, calc, exclude, rescale = view.get_edit_params( ) # ==================================================================== # This block of kwargs should be removed # parameter overwriting should be done using the template # NOT QP core code! if kwargs.get('combine', False): view._kwargs['expand'], expand = None, None view._kwargs['complete'], complete = False, False if logic is not None: for no, logic_def in enumerate(logic): if 'expand' in list(logic_def.keys()): logic_def['expand'] = None logic[no] = logic_def view._kwargs['logic'] = logic # -------------------------------------------------------------------- # This block of code resolves the rel_to arg. in order to be able to use # rebased % computations. We are also adjusting for the regular notation # string here... # We need to avoid the forced overwriting of the kwarg and use the actual # rel_to != 'x', 'y', 'counts_sum' string... per_cell = False if not rel_to in ['', None, 'x', 'y', 'counts_sum']: view._kwargs['rel_to'] = 'y' rel_to_kind = rel_to.split('.') if len(rel_to_kind) == 2: rel_to = rel_to_kind[0] if rel_to_kind[1] == 'cells': per_cell = True elif rel_to_kind[1] == 'y': per_cell = False try: link['x|f|:||{}|counts'.format( weights)]._kwargs['rebased'] = True except: pass # ==================================================================== w = weights if weights is not None else None ignore = True if name == 'cbase_gross' else False q = qp.Quantity(link, w, ignore_flags=ignore) if q.type == 'array' and not q.y == '@': pass else: if q.leveled: leveled = Level(q) if rel_to is not None: leveled.percent() elif axis == 'x': leveled.base() else: leveled.count() view.dataframe = leveled.lvldf elif logic is not None: try: q.group(groups=logic, axis=axis, expand=expand, complete=complete) except NotImplementedError as e: warnings.warn('NotImplementedError: {}'.format(e)) return None q.count(axis=None, as_df=False, margin=False) condition = view.spec_condition(link, q.logical_conditions, expand) else: eff = True if name == 'ebase' else False raw = True if name in ['counts_sum', 'c%_sum'] else False cum_sum = True if name in ['counts_cumsum', 'c%_cumsum' ] else False if cum_sum: axis = None if eff: axis = 'x' q.count(axis=axis, raw_sum=raw, effective=eff, cum_sum=cum_sum, margin=False, as_df=False) if rel_to is not None: if q.type == 'array': rel_to = 'y' q.normalize(rel_to, per_cell=per_cell) q.to_df() view.cbases = q.cbase view.rbases = q.rbase if calc is not None: calc_only = kwargs.get('calc_only', False) q.calc(calc, axis, result_only=calc_only) if calc is not None or name in [ 'counts_sum', 'c%_sum', 'counts_cumsum', 'c%_cumsum' ]: method_nota = 'f.c:f' else: method_nota = 'f' notation = view.notation(method_nota, condition) view._notation = notation if not q.leveled: if q.type == 'array': view.dataframe = q.result.T if link.y == '@' else q.result else: view.dataframe = q.result view._kwargs['exclude'] = q.miss_x link[notation] = view