Пример #1
0
    def default(self, link, name, kwargs):
        """
        Adds a file meta dependent aggregation to a Stack.

        Checks the Link definition against the file meta and produces
        either a numerical or categorical summary tabulation including
        marginal the results.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
        """
        view = View(link, name, kwargs)
        pos, relation, rel_to, weights, text = view.get_std_params()
        meta = link.get_meta()
        categorical = ['single', 'delimited set']
        numeric = ['int', 'float']
        string = ['string']
        categorizable = categorical + numeric
        x_type, y_type, transpose = self._get_method_types(link)
        q = qp.Quantity(link, weight=weights)
        if q.type == 'array' and not q.y == '@':
            pass
        else:
            if link.y == '@':
                if x_type in categorical or x_type == 'array':
                    view_df = q.count().result
                elif x_type in numeric:
                    view_df = q.summarize().result
                    view_df.drop((link.x, 'All'), axis=0, inplace=True)
                elif x_type in string:
                    view_df = tools.view.agg.make_default_str_view(data,
                                                                   x=link.x)
            elif link.x == '@':
                if y_type in categorical:
                    view_df = q.count().result
                elif y_type in numeric:
                    view_df = q.summarize().result
                    view_df.drop((link.y, 'All'), axis=1, inplace=True)
            else:
                if x_type in categorical and y_type in categorizable:
                    view_df = q.count().result
                elif x_type in numeric and y_type in categorizable:
                    view_df = q.summarize().result
                    view_df.drop((link.x, 'All'), axis=0, inplace=True)
                    view_df.drop((link.y, 'All'), axis=1, inplace=True)
            notation = view.notation('default', ':')
            view.dataframe = view_df
            view._notation = notation
            link[notation] = view
Пример #2
0
    def default(self, link, name, kwargs):
        """
        Adds a file meta dependent aggregation to a Stack.

        Checks the Link definition against the file meta and produces
        either a numerical or categorical summary tabulation including
        marginal the results.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
        """
        view = View(link, kwargs)
        pos, relation, rel_to, weights, text = view.std_params()
        meta = link.get_meta()
        categorical = ['single', 'delimited set']
        numeric = ['int', 'float']
        string = ['string']
        categorizable = categorical + numeric
        x_type, y_type, transpose = self._get_method_types(link)
        q = qp.Quantity(link, weight=weights)
        if link.y == '@':
            if x_type in categorical:
                view_df = q.count()
            elif x_type in numeric:
                view_df = q.describe()
            elif x_type in string:
                view_df = tools.view.agg.make_default_str_view(data, x=link.x)
        elif link.x == '@':
            if y_type in categorical:
                view_df = q.count()
            elif y_type in numeric:
                view_df = q.describe()
        else:
            if x_type in categorical and y_type in categorizable:
                view_df = q.count()
            elif x_type in numeric and y_type in categorizable:
                view_df =  q.describe()
        
        relation = view.spec_relation()
        notation = view.notation('default', name, relation)
        view.dataframe = view_df.result
        view.name = notation
        link[notation] = view
Пример #3
0
    def coltests(self, link, name, kwargs):
        """
        Will test appropriate views from a Stack for stat. sig. differences.

        Tests can be performed on frequency aggregations (generated by
        ``frequency``) and means (from ``summarize``) and will compare all
        unique column pair combinations.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific):
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        metric : {'props', 'means'}, default 'props'
            Determines whether a proportion or means test algorithm is
            performed.
        test_total : bool, deafult False
            If True, the each View's y-axis column will be tested against the
            uncoditional total of its x-axis.
        mimic : {'Dim', 'askia'}, default 'Dim'
            It is possible to mimic the test logics used in other statistical
            software packages by passing them as instructions. The method will
            then choose the appropriate test parameters.
        level: {'high', 'mid', 'low'} or float
            Sets the level of significance to which the test is carried out.
            Given as str the levels correspond to ``'high'`` = 0.01, ``'mid'``
            = 0.05 and ``'low'`` = 0.1. If a float is passed the specified
            level will be used.
        flags : list of two int, default None
            Base thresholds for Dimensions-like tests, e.g. [30, 100]. First
            int is minimum base for reported results, second int controls small
            base indication.

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.

        .. note::

            Mimicking the askia software (``mimic`` = ``'askia'``)
            restricts the values to be one of ``'high'``, ``'low'``,
            ``'mid'``. Any other value passed will make the algorithm fall
            back to ``'low'``. Mimicking Dimensions (``mimic`` =
            ``'Dim'``) can use either the str or float version.
        """
        view = View(link, name, kwargs=kwargs)
        axis, condition, rel_to, weights, text = view.get_std_params()
        cache = self._cache = link.get_cache()
        metric = kwargs.get('metric', 'props')
        mimic = kwargs.get('mimic', 'Dim')
        level = kwargs.get('level', 'low')
        flags = kwargs.get('flag_bases', None)
        test_total = kwargs.get('test_total', False)
        stack = link.stack
        get = 'count' if metric == 'props' else 'mean'
        views = self._get_view_names(cache, stack, weights, get=get)
        for in_view in views:
            try:
                view = View(link, name, kwargs=kwargs)
                condition = in_view.split('|')[2]
                test = qp.Test(link, in_view, test_total)
                if mimic == 'Dim':
                    test.set_params(level=level, flag_bases=flags)
                elif mimic == 'askia':
                    test.set_params(testtype='unpooled',
                                    level=level,
                                    mimic=mimic,
                                    use_ebase=False,
                                    ovlp_correc=False,
                                    cwi_filter=True)
                view_df = test.run()
                notation = view.notation(
                    't.{}.{}.{}{}'.format(metric, mimic,
                                          '{:.2f}'.format(test.level)[2:],
                                          '+@' if test_total else ''),
                    condition)
                view.dataframe = view_df
                view._notation = notation
                link[notation] = view
            except:
                pass
Пример #4
0
    def descriptives(self, link, name, kwargs):
        """
        Adds num. distribution statistics of a Link defintion to the Stack.

        ``descriptives`` views can apply a range of summary statistics.
        Measures include statistics of centrality, dispersion and mass.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label suffix for the meta component of the view
            which will be appended to the statistic name and used when the
            view is passed into a Quantipy build (e.g. Excel, Powerpoint).
        stats : str, default 'mean'
            The measure to compute.
        exclude : list of int
             Codes that will not be considered calculating the result.
        rescale : dict
            A mapping of {old code: new code}, e.g.::

                {
                 1: 0,
                 2: 25,
                 3: 50,
                 4: 75,
                 5: 100
                }
        drop : bool
            If ``rescale`` provides a new scale defintion, ``drop`` will remove
            all codes that are not transformed. Acts as a shorthand for manually
            passing any remaining codes in ``exclude``.

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
        """
        view = View(link, name, kwargs=kwargs)
        if not view._x['is_multi'] or kwargs.get('source'):
            view = View(link, name, kwargs=kwargs)
            axis, condition, rel_to, weights, text = view.get_std_params()
            logic, expand, complete, calc, exclude, rescale = view.get_edit_params(
            )
            stat = kwargs.get('stats', 'mean')
            view._kwargs['calc_only'] = True
            w = weights if weights is not None else None
            q = qp.Quantity(link, w)

            if kwargs.get('source', None):
                q = self._swap_and_rebase(q, kwargs['source'])
            if q.type == 'array' and not q.y == '@':
                pass
            else:
                if exclude is not None:
                    q.exclude(exclude, axis=axis)
                if rescale is not None:
                    drop = kwargs.get('drop', False)
                    q.rescale(rescale, drop)
                    if drop:
                        view._kwargs['exclude'] = q.miss_x
                condition = view.spec_condition(link)
                q.summarize(stat=stat, margin=False, as_df=True)
                if calc:
                    q.calc(calc, result_only=True)
                    method_nota = 'd.' + stat + '.c:f'
                else:
                    method_nota = 'd.' + stat
                notation = view.notation(method_nota, condition)
                view.cbases = q.cbase
                view.rbases = q.rbase
                if q.type == 'array':
                    view.dataframe = q.result.T if link.y == '@' else q.result
                else:
                    view.dataframe = q.result
                view._notation = notation
                view.translate_metric(set_value='meta')
                view._kwargs['exclude'] = q.miss_x
                link[notation] = view
Пример #5
0
    def frequency(self, link, name, kwargs):
        """
        Adds count-based views on a Link defintion to the Stack object.

        ``frequency`` is able to compute several aggregates that are based on
        the count of code values in uni- or bivariate Links. This includes
        bases / samples sizes, raw or normalized cell frequencies and code
        summaries like simple and complex nets.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        logic : list of int, list of dicts or core.tools.view.logic operation
            If a list is passed this instructs a simple net of the codes given
            as int. Multiple nets can be generated via a list of dicts that
            map names to lists of ints. For complex logical statements,
            expression are parsed to identify the qualifying rows in the data.
            For example::

                # simple net
                'logic': [1, 2, 3]

                # multiple nets/code groups
                'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}]

                # code logic
                'logic': has_all([1, 2, 3])

        calc : TODO

        calc_only : TODO

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.

        .. note:: Net codes take into account if a variable is
                  multi-coded. The net will therefore consider qualifying
                  cases and not the raw sum of the frequencies
                  per category, i.e. no multiple counting of cases.
        """
        view = View(link, name, kwargs=kwargs)
        axis, condition, rel_to, weights, text = view.get_std_params()
        logic, expand, complete, calc, exclude, rescale = view.get_edit_params(
        )
        # ====================================================================
        # this block of kwargs should be removed
        # parameter overwriting should be done using the template
        # NOT QP core code!
        if kwargs.get('combine', False):
            view._kwargs['expand'], expand = None, None
            view._kwargs['complete'], complete = False, False
            if logic is not None:
                for no, logic_def in enumerate(logic):
                    if 'expand' in logic_def.keys():
                        logic_def['expand'] = None
                        logic[no] = logic_def
                view._kwargs['logic'] = logic
        # ====================================================================
        w = weights if weights is not None else None
        ignore = True if name == 'cbase_gross' else False
        q = qp.Quantity(link, w, ignore_flags=ignore)
        if q.type == 'array' and not q.y == '@':
            pass
        else:
            if logic is not None:
                try:
                    q.group(groups=logic,
                            axis=axis,
                            expand=expand,
                            complete=complete)
                except NotImplementedError, e:
                    warnings.warn('NotImplementedError: {}'.format(e))
                    return None
                q.count(axis=None, as_df=False, margin=False)
                condition = view.spec_condition(link, q.logical_conditions,
                                                expand)
            else:
Пример #6
0
    def descriptives(self, link, name, kwargs):
        """
        Adds num. distribution statistics of a Link defintion to the Stack.

        ``descriptives`` views can apply a range of summary statistics.
        Measures include statistics of centrality, dispersion and mass.
        
        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label suffix for the meta component of the view
            which will be appended to the statistic name and used when the
            view is passed into a Quantipy build (e.g. Excel, Powerpoint). 
        exclude : list of int
             Codes that will not be considered calculating the result.
        rescale : dict
            A mapping of {old code: new code}, e.g.::

                {
                 1: 0,
                 2: 25,
                 3: 50,
                 4: 75,
                 5: 100
                }

        stats : str, default 'mean'
            The measure to compute.

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
        """
        view = View(link, kwargs=kwargs)
        if not view._x['is_multi']:
            func_name = 'descriptives'
            func_type = 'distribution statistics'
            pos, relation, rel_to, weights, text = view.std_params()

            stat = kwargs.get('stats', 'mean')
            exclude = view.missing()
            rescale = view.rescaling()
            q = qp.Quantity(link, weights)         
            
            if exclude is not None:
                q = q.missingfy(exclude, keep_base=False)
            if rescale is not None:
                q = q.rescale(rescale)
            view.fulltext_for_stat(stat)
            relation = view.spec_relation(link)
            view_df = q.describe(show=stat, margin=False, as_df=True)
            notation = view.notation(stat, name, relation)
            view.cbases = view_df.cbase
            view.rbases = view_df.rbase
            view.dataframe = view_df.result
            view.name = notation
            link[notation] = view
Пример #7
0
    def coltests(self, link, name, kwargs):
        """
        Will test appropriate views from a Stack for stat. sig. differences.

        Tests can be performed on frequency aggregations (generated by
        ``frequency``) and means (from ``descriptives``) and will compare all
        unique column pair combinations.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific):
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        metric : {'props', 'means'}, default 'props'
            Determines whether a proportion or means test algorithm is
            performed.
        mimic : {'Dim', 'askia'}, default 'Dim'
            It is possible to mimic the test logics used in other statistical
            software packages by passing them as instructions. The method will
            then choose the appropriate test parameters.
        level: {'high', 'mid', 'low'} or float
            Sets the level of significance to which the test is carried out.
            Given as str the levels correspond to ``'high'`` = 0.01, ``'mid'``
            = 0.05 and ``'low'`` = 0.1. If a float is passed the specified
            level will be used.

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
            
        .. note::

            Mimicking the askia software (``mimic`` = ``'askia'``)
            restricts the values to be one of ``'high'``, ``'low'``,
            ``'mid'``. Any other value passed will make the algorithm fall
            back to ``'low'``. Mimicking Dimensions (``mimic`` = 
            ``'Dim'``) can use either the str or float version.
        """
        func_name = 'coltests'
        func_type = 'column differences tests'
        view = View(link, kwargs=kwargs)
        pos, relation, rel_to, weights, text = view.std_params()

        metric = kwargs.get('metric', 'props')
        mimic = kwargs.get('mimic', 'Dim')
        level = kwargs.get('level', 'low')
        stack = link.stack

        get = 'count' if metric == 'props' else 'mean' 
        views = self._get_view_names(stack, weights, get=get)
        for in_view in views:                
            try:
                view = View(link, kwargs=kwargs)
                relation = in_view.split('|')[2]                
                test = qp.Test(link, in_view)
                if mimic == 'Dim':
                    test.set_params(level=level)
                elif mimic == 'askia':
                    test.set_params(testtype='unpooled',
                                    level=level, mimic=mimic,
                                    use_ebase=False,
                                    ovlp_correc=False,
                                    cwi_filter=True)
                view_df = test.run()
                siglevel = test.level
                notation = tools.view.query.set_fullname(
                    pos,
                    '%s.%s.%s.%s' % ('tests',
                                     metric,
                                     mimic,
                                     "{:.2f}".format(siglevel)[2:]),
                    relation, rel_to, weights, name)               

                view.dataframe = view_df
                view.name = notation

                link[notation] = view
            except:
                pass
Пример #8
0
    def frequency(self, link, name, kwargs):
        """
        Adds count-based views on a Link defintion to the Stack object.

        ``frequency`` is able to compute several aggregates that are based on
        the count of code values in uni- or bivariate Links. This includes
        bases / samples sizes, raw or normalized cell frequencies and code
        summaries like simple and complex nets.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        logic : list of int, list of dicts or core.tools.view.logic operation
            If a list is passed this instructs a simple net of the codes given
            as int. Multiple nets can be generated via a list of dicts that
            map names to lists of ints. For complex logical statements,
            expression are parsed to identify the qualifying rows in the data.
            For example::

                # simple net
                'logic': [1, 2, 3]

                # multiple nets/code groups
                'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}]
                
                # code logic
                'logic': has_all([1, 2, 3])         

        calc : TODO

        calc_only : TODO

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.

        .. note:: Net codes take into account if a variable is
                  multi-coded. The net will therefore consider qualifying
                  cases and not the raw sum of the frequencies
                  per category, i.e. no multiple counting of cases.
        """
        func_name = 'frequency'
        func_type = 'countbased'
        view = View(link, kwargs=kwargs)
        pos, relation, rel_to, weights, text = view.std_params()
        q = qp.Quantity(link, weights)        
        logic = kwargs.get('logic', None)
        calc = kwargs.get('calc', None)
        val_name = None

        if name in ['ebase', 'cbase', 'rbase']:
            freq = q.count(name, margin=False, as_df=False)
        elif name in ['counts', 'c%', 'r%']:
            freq = q.count('freq', margin=False, as_df=False)
        elif logic:
            if isinstance(logic, list):
                if not isinstance(logic[0], dict):
                    val_name = name
                if calc:
                    calc_only = kwargs.get('calc_only', False)
                else:
                    calc_only = False
                freq = q.combine(logic, op=calc, op_only=calc_only,
                                 margin=False, as_df=False)
                relation = view.spec_relation()
            else:
                val_name = name
                casedata = link.get_data().copy()
                idx, relation = tools.view.logic.get_logic_index(
                    casedata[link.x], logic, casedata)
                filtered_q = qp.Quantity(link, weights, idx)
                freq = filtered_q.combine(margin=False, as_df=False)
        view.cbases = freq.cbase
        view.rbases = freq.rbase
        if rel_to is not None:
            base = 'col' if rel_to == 'y' else 'row'
            freq = freq.normalize(base)
        view_df = freq.to_df(val_name).result
        notation = view.notation(func_name, name, relation)
        view.name = notation        
        view.dataframe = view_df
        link[notation] = view
Пример #9
0
    def frequency(self, link, name, kwargs):
        """
        Adds count-based views on a Link defintion to the Stack object.

        ``frequency`` is able to compute several aggregates that are based on
        the count of code values in uni- or bivariate Links. This includes
        bases / samples sizes, raw or normalized cell frequencies and code
        summaries like simple and complex nets.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        logic : list of int, list of dicts or core.tools.view.logic operation
            If a list is passed this instructs a simple net of the codes given
            as int. Multiple nets can be generated via a list of dicts that
            map names to lists of ints. For complex logical statements,
            expression are parsed to identify the qualifying rows in the data.
            For example::

                # simple net
                'logic': [1, 2, 3]

                # multiple nets/code groups
                'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}]

                # code logic
                'logic': has_all([1, 2, 3])

        calc : TODO

        calc_only : TODO

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.

        .. note:: Net codes take into account if a variable is
                  multi-coded. The net will therefore consider qualifying
                  cases and not the raw sum of the frequencies
                  per category, i.e. no multiple counting of cases.
        """
        view = View(link, name, kwargs=kwargs)
        axis, condition, rel_to, weights, text = view.get_std_params()
        logic, expand, complete, calc, exclude, rescale = view.get_edit_params(
        )
        # ====================================================================
        # This block of kwargs should be removed
        # parameter overwriting should be done using the template
        # NOT QP core code!
        if kwargs.get('combine', False):
            view._kwargs['expand'], expand = None, None
            view._kwargs['complete'], complete = False, False
            if logic is not None:
                for no, logic_def in enumerate(logic):
                    if 'expand' in list(logic_def.keys()):
                        logic_def['expand'] = None
                        logic[no] = logic_def
                view._kwargs['logic'] = logic
        # --------------------------------------------------------------------
        # This block of code resolves the rel_to arg. in order to be able to use
        # rebased % computations. We are also adjusting for the regular notation
        # string here...
        # We need to avoid the forced overwriting of the kwarg and use the actual
        # rel_to != 'x', 'y', 'counts_sum' string...
        per_cell = False
        if not rel_to in ['', None, 'x', 'y', 'counts_sum']:
            view._kwargs['rel_to'] = 'y'
            rel_to_kind = rel_to.split('.')
            if len(rel_to_kind) == 2:
                rel_to = rel_to_kind[0]
                if rel_to_kind[1] == 'cells':
                    per_cell = True
                elif rel_to_kind[1] == 'y':
                    per_cell = False
            try:
                link['x|f|:||{}|counts'.format(
                    weights)]._kwargs['rebased'] = True
            except:
                pass
        # ====================================================================
        w = weights if weights is not None else None
        ignore = True if name == 'cbase_gross' else False
        q = qp.Quantity(link, w, ignore_flags=ignore)
        if q.type == 'array' and not q.y == '@':
            pass
        else:
            if q.leveled:
                leveled = Level(q)
                if rel_to is not None:
                    leveled.percent()
                elif axis == 'x':
                    leveled.base()
                else:
                    leveled.count()
                view.dataframe = leveled.lvldf
            elif logic is not None:
                try:
                    q.group(groups=logic,
                            axis=axis,
                            expand=expand,
                            complete=complete)
                except NotImplementedError as e:
                    warnings.warn('NotImplementedError: {}'.format(e))
                    return None
                q.count(axis=None, as_df=False, margin=False)
                condition = view.spec_condition(link, q.logical_conditions,
                                                expand)
            else:
                eff = True if name == 'ebase' else False
                raw = True if name in ['counts_sum', 'c%_sum'] else False
                cum_sum = True if name in ['counts_cumsum', 'c%_cumsum'
                                           ] else False
                if cum_sum: axis = None
                if eff: axis = 'x'
                q.count(axis=axis,
                        raw_sum=raw,
                        effective=eff,
                        cum_sum=cum_sum,
                        margin=False,
                        as_df=False)
            if rel_to is not None:
                if q.type == 'array':
                    rel_to = 'y'
                q.normalize(rel_to, per_cell=per_cell)
            q.to_df()
            view.cbases = q.cbase
            view.rbases = q.rbase
            if calc is not None:
                calc_only = kwargs.get('calc_only', False)
                q.calc(calc, axis, result_only=calc_only)
            if calc is not None or name in [
                    'counts_sum', 'c%_sum', 'counts_cumsum', 'c%_cumsum'
            ]:
                method_nota = 'f.c:f'
            else:
                method_nota = 'f'
            notation = view.notation(method_nota, condition)
            view._notation = notation
            if not q.leveled:
                if q.type == 'array':
                    view.dataframe = q.result.T if link.y == '@' else q.result
                else:
                    view.dataframe = q.result
            view._kwargs['exclude'] = q.miss_x

            link[notation] = view
Пример #10
0
    def coltests(self, link, name, kwargs):
        """
        Will test appropriate views from a Stack for stat. sig. differences.

        Tests can be performed on frequency aggregations (generated by
        ``frequency``) and means (from ``descriptives``) and will compare all
        unique column pair combinations.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific):
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        metric : {'props', 'means'}, default 'props'
            Determines whether a proportion or means test algorithm is
            performed.
        mimic : {'Dim', 'askia'}, default 'Dim'
            It is possible to mimic the test logics used in other statistical
            software packages by passing them as instructions. The method will
            then choose the appropriate test parameters.
        level: {'high', 'mid', 'low'} or float
            Sets the level of significance to which the test is carried out.
            Given as str the levels correspond to ``'high'`` = 0.01, ``'mid'``
            = 0.05 and ``'low'`` = 0.1. If a float is passed the specified
            level will be used.

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
            
        .. note::

            Mimicking the askia software (``mimic`` = ``'askia'``)
            restricts the values to be one of ``'high'``, ``'low'``,
            ``'mid'``. Any other value passed will make the algorithm fall
            back to ``'low'``. Mimicking Dimensions (``mimic`` = 
            ``'Dim'``) can use either the str or float version.
        """
        func_name = 'coltests'
        func_type = 'column differences tests'
        view = View(link, kwargs=kwargs)
        pos, relation, rel_to, weights, text = view.std_params()

        metric = kwargs.get('metric', 'props')
        mimic = kwargs.get('mimic', 'Dim')
        level = kwargs.get('level', 'low')
        stack = link.stack

        get = 'count' if metric == 'props' else 'mean'
        views = self._get_view_names(stack, weights, get=get)
        for in_view in views:
            try:
                view = View(link, kwargs=kwargs)
                relation = in_view.split('|')[2]
                test = qp.Test(link, in_view)
                if mimic == 'Dim':
                    test.set_params(level=level)
                elif mimic == 'askia':
                    test.set_params(testtype='unpooled',
                                    level=level,
                                    mimic=mimic,
                                    use_ebase=False,
                                    ovlp_correc=False,
                                    cwi_filter=True)
                view_df = test.run()
                siglevel = test.level
                notation = tools.view.query.set_fullname(
                    pos, '%s.%s.%s.%s' %
                    ('tests', metric, mimic, "{:.2f}".format(siglevel)[2:]),
                    relation, rel_to, weights, name)

                view.dataframe = view_df
                view.name = notation

                link[notation] = view
            except:
                pass
Пример #11
0
    def descriptives(self, link, name, kwargs):
        """
        Adds num. distribution statistics of a Link defintion to the Stack.

        ``descriptives`` views can apply a range of summary statistics.
        Measures include statistics of centrality, dispersion and mass.
        
        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label suffix for the meta component of the view
            which will be appended to the statistic name and used when the
            view is passed into a Quantipy build (e.g. Excel, Powerpoint). 
        exclude : list of int
             Codes that will not be considered calculating the result.
        rescale : dict
            A mapping of {old code: new code}, e.g.::

                {
                 1: 0,
                 2: 25,
                 3: 50,
                 4: 75,
                 5: 100
                }

        stats : str, default 'mean'
            The measure to compute.

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.
        """
        view = View(link, kwargs=kwargs)
        if not view._x['is_multi']:
            func_name = 'descriptives'
            func_type = 'distribution statistics'
            pos, relation, rel_to, weights, text = view.std_params()

            stat = kwargs.get('stats', 'mean')
            exclude = view.missing()
            rescale = view.rescaling()
            q = qp.Quantity(link, weights)

            if exclude is not None:
                q = q.missingfy(exclude, keep_base=False)
            if rescale is not None:
                q = q.rescale(rescale)
            view.fulltext_for_stat(stat)
            relation = view.spec_relation(link)
            view_df = q.describe(show=stat, margin=False, as_df=True)
            notation = view.notation(stat, name, relation)
            view.cbases = view_df.cbase
            view.rbases = view_df.rbase
            view.dataframe = view_df.result
            view.name = notation
            link[notation] = view
Пример #12
0
    def frequency(self, link, name, kwargs):
        """
        Adds count-based views on a Link defintion to the Stack object.

        ``frequency`` is able to compute several aggregates that are based on
        the count of code values in uni- or bivariate Links. This includes
        bases / samples sizes, raw or normalized cell frequencies and code
        summaries like simple and complex nets.

        Parameters
        ----------
        link : Quantipy Link object.
        name : str
            The shortname applied to the view.
        kwargs : dict
        Keyword arguments (specific)
        text : str, optional, default None
            Sets an optional label in the meta component of the view that is
            used when the view is passed into a Quantipy build (e.g. Excel,
            Powerpoint).
        logic : list of int, list of dicts or core.tools.view.logic operation
            If a list is passed this instructs a simple net of the codes given
            as int. Multiple nets can be generated via a list of dicts that
            map names to lists of ints. For complex logical statements,
            expression are parsed to identify the qualifying rows in the data.
            For example::

                # simple net
                'logic': [1, 2, 3]

                # multiple nets/code groups
                'logic': [{'A': [1, 2]}, {'B': [3, 4]}, {'C', [5, 6]}]
                
                # code logic
                'logic': has_all([1, 2, 3])         

        calc : TODO

        calc_only : TODO

        Returns
        -------
        None
            Adds requested View to the Stack, storing it under the full
            view name notation key.

        .. note:: Net codes take into account if a variable is
                  multi-coded. The net will therefore consider qualifying
                  cases and not the raw sum of the frequencies
                  per category, i.e. no multiple counting of cases.
        """
        func_name = 'frequency'
        func_type = 'countbased'
        view = View(link, kwargs=kwargs)
        pos, relation, rel_to, weights, text = view.std_params()
        q = qp.Quantity(link, weights)
        logic = kwargs.get('logic', None)
        calc = kwargs.get('calc', None)
        val_name = None

        if name in ['ebase', 'cbase', 'rbase']:
            freq = q.count(name, margin=False, as_df=False)
        elif name in ['counts', 'c%', 'r%']:
            freq = q.count('freq', margin=False, as_df=False)
        elif logic:
            if isinstance(logic, list):
                if not isinstance(logic[0], dict):
                    val_name = name
                if calc:
                    calc_only = kwargs.get('calc_only', False)
                else:
                    calc_only = False
                freq = q.combine(logic,
                                 op=calc,
                                 op_only=calc_only,
                                 margin=False,
                                 as_df=False)
                relation = view.spec_relation()
            else:
                val_name = name
                casedata = link.get_data().copy()
                idx, relation = tools.view.logic.get_logic_index(
                    casedata[link.x], logic, casedata)
                filtered_q = qp.Quantity(link, weights, idx)
                freq = filtered_q.combine(margin=False, as_df=False)
        view.cbases = freq.cbase
        view.rbases = freq.rbase
        if rel_to is not None:
            base = 'col' if rel_to == 'y' else 'row'
            freq = freq.normalize(base)
        view_df = freq.to_df(val_name).result
        notation = view.notation(func_name, name, relation)
        view.name = notation
        view.dataframe = view_df
        link[notation] = view