Exemplo n.º 1
0
    def print_ij(self, statistic, names=None, format=None):
        """Print table of given cross-statistic."""

        values = self.statistics[statistic]
        if names is None: names = self.names
        if format is None:
            stat = statistic.replace('_ij', '')
            if stat in Datab.spec_index: format = Datab.spec[Datab.spec_index[stat]][2]
            else: format = '%9.4f'

        print ' '.join([strings.fmt(string, format) for string in [' '] + names])
        for i in range(self.nvars):
            print strings.fmt(names[i], format),
            for j in range(self.nvars):
                print format % values[i, j],
            print ''
Exemplo n.º 2
0
    def header(self, spec=False, fields=None, exclude=[], rename=[], delimiter=' '):
        """
        Return header, as the string that would be output by the output() method.
        """

        if fields is None: fields = [s[0] for s in self.spec]
        field_name = dict([(field, field) for field in fields])
        if len(rename):
            for field_rename in rename:
                if field_rename[0] not in field_name: continue
                field_name[field_rename[0]] = field_rename[1]

        for skip_field in exclude:
            if skip_field in fields: fields.remove(skip_field)

        if spec:
            lines = ['#fields ' + ' '.join([field_name[f] for f in fields]),
                     '#types ' + ' '.join([self.field_spec[i][1] for i in fields]),
                     '#formats ' + ' '.join([self.field_spec[i][2] for i in fields])]
            return '\n'.join(lines)
        else:
            return delimiter.join([strings.fmt(field_name[f], self.field_spec[f][2])
                             for f in fields])
Exemplo n.º 3
0
def bucketer(*data_field_splits, **kwargs):
    """
    Helper function for constructing bucket options. Pass output of this function
    to stats.summary(), etc like this --
    stats.summary(data['1_252'], data['weight'],
                  **stats.bucketer([pred1, 'var1', (-.1, 0, .1)], [pred2, 'var2', 0]))

    *data_field_splits:
    List of triples, (data, field_name, [split_val1, ...]).
    If the third element here is a scalar, it gets cast as a list with one element.
    If the third element here is None, bucketing is done evaluating data as True/False.
    If the first element here is a tuple, it is deemed to be a set of True/False arrays,
    and the third element is assumed to be a list of labels (if None, label with integers).

    **kwargs:
    Can handle the following options: label_all, label_other, formats.
    """
    
    def _recurse_buckets(overlays_labels, overlays, labels):
        if not overlays_labels: return overlays, labels
        
        new_overs, new_labels = [], []
        for layer_over, layer_label in zip(*overlays_labels.pop(-1)):
            for existing_over, existing_label in zip(overlays, labels):
                new_overs.append(existing_over & layer_over)
                new_labels.append(layer_label + '|' + existing_label)
                
        return _recurse_buckets(overlays_labels, new_overs, new_labels)

    
    label_all = kwargs.get('label_all', 'All')
    label_other = kwargs.get('label_other', 'Other')

    formats = kwargs['formats'] if 'formats' in kwargs else '%6.1f'
    fmt = formats + '_%-' + formats[1:]
    str_fmt = '%' + str(strings.fmt_length(formats)) + 's ' + \
              strings.fmt(' ', formats)
    name_fmt = '%-' + str(strings.fmt_length(formats) * 2 + 1) + 's'
               
    overlays_labels = []
    name = ''
    for d_f_s in data_field_splits:
        data, field, splits = d_f_s
        if numpy.isscalar(splits): splits = [splits]
        if name: name += '|'
        name += name_fmt % field
        
        overlays = [numpy.ones(numpy.shape(data[0] if type(data) == tuple else data),
                               dtype=bool)] if label_all else []
        labels = [str_fmt % label_all] if label_all else []

        if type(data) == tuple:
            # multiple boolean bucketing
            for i in range(len(data)):
                overlays.append(data[i])
                labels.append(name_fmt % (i if splits is None else splits[i]))
            overlays_labels.append((overlays, labels))
            continue
        
            
        if splits is None:
            # boolean bucketing
            overlays.append(data)
            labels.append(str_fmt % 'True')
            overlays.append(~data)
            labels.append(str_fmt % 'False')
            overlays_labels.append((overlays, labels))
            continue
        
        other_overlay = numpy.ones(numpy.shape(data), dtype=bool) if label_other else None
        for count, value in enumerate(splits):
            if count == 0:
                overlays.append(data < value)
                if label_other: other_overlay &= ~overlays[-1]
                labels.append(fmt % (-numpy.inf, value))
            if count > 0 and len(splits) > 1:
                overlays.append((data < value) & (data >= splits[count - 1]))
                if label_other: other_overlay &= ~overlays[-1]
                labels.append(fmt % (splits[count - 1], value))
            if count == len(splits) - 1:
                overlays.append(data >= value)
                if label_other: other_overlay &= ~overlays[-1]
                labels.append(fmt % (value, numpy.inf))
                
        if label_other:
            overlays += [other_overlay]
            labels += [str_fmt % label_other]
            
        overlays_labels.append((overlays, labels))

    overlays, labels = _recurse_buckets(overlays_labels, *overlays_labels.pop(-1))
    labels = [l.replace('-inf_', '    _') for l in labels]
    labels = [l.replace('_inf', '_   ') for l in labels]
    return {'buckets': overlays, 'labels': labels,
            'name': name, 'label_all': None, 'label_other': None}