def print_ij(self, statistic, names=None, format=None): """Print table of given cross-statistic.""" values = self.statistics[statistic] if names is None: names = self.names if format is None: stat = statistic.replace('_ij', '') if stat in Datab.spec_index: format = Datab.spec[Datab.spec_index[stat]][2] else: format = '%9.4f' print ' '.join([strings.fmt(string, format) for string in [' '] + names]) for i in range(self.nvars): print strings.fmt(names[i], format), for j in range(self.nvars): print format % values[i, j], print ''
def header(self, spec=False, fields=None, exclude=[], rename=[], delimiter=' '): """ Return header, as the string that would be output by the output() method. """ if fields is None: fields = [s[0] for s in self.spec] field_name = dict([(field, field) for field in fields]) if len(rename): for field_rename in rename: if field_rename[0] not in field_name: continue field_name[field_rename[0]] = field_rename[1] for skip_field in exclude: if skip_field in fields: fields.remove(skip_field) if spec: lines = ['#fields ' + ' '.join([field_name[f] for f in fields]), '#types ' + ' '.join([self.field_spec[i][1] for i in fields]), '#formats ' + ' '.join([self.field_spec[i][2] for i in fields])] return '\n'.join(lines) else: return delimiter.join([strings.fmt(field_name[f], self.field_spec[f][2]) for f in fields])
def bucketer(*data_field_splits, **kwargs): """ Helper function for constructing bucket options. Pass output of this function to stats.summary(), etc like this -- stats.summary(data['1_252'], data['weight'], **stats.bucketer([pred1, 'var1', (-.1, 0, .1)], [pred2, 'var2', 0])) *data_field_splits: List of triples, (data, field_name, [split_val1, ...]). If the third element here is a scalar, it gets cast as a list with one element. If the third element here is None, bucketing is done evaluating data as True/False. If the first element here is a tuple, it is deemed to be a set of True/False arrays, and the third element is assumed to be a list of labels (if None, label with integers). **kwargs: Can handle the following options: label_all, label_other, formats. """ def _recurse_buckets(overlays_labels, overlays, labels): if not overlays_labels: return overlays, labels new_overs, new_labels = [], [] for layer_over, layer_label in zip(*overlays_labels.pop(-1)): for existing_over, existing_label in zip(overlays, labels): new_overs.append(existing_over & layer_over) new_labels.append(layer_label + '|' + existing_label) return _recurse_buckets(overlays_labels, new_overs, new_labels) label_all = kwargs.get('label_all', 'All') label_other = kwargs.get('label_other', 'Other') formats = kwargs['formats'] if 'formats' in kwargs else '%6.1f' fmt = formats + '_%-' + formats[1:] str_fmt = '%' + str(strings.fmt_length(formats)) + 's ' + \ strings.fmt(' ', formats) name_fmt = '%-' + str(strings.fmt_length(formats) * 2 + 1) + 's' overlays_labels = [] name = '' for d_f_s in data_field_splits: data, field, splits = d_f_s if numpy.isscalar(splits): splits = [splits] if name: name += '|' name += name_fmt % field overlays = [numpy.ones(numpy.shape(data[0] if type(data) == tuple else data), dtype=bool)] if label_all else [] labels = [str_fmt % label_all] if label_all else [] if type(data) == tuple: # multiple boolean bucketing for i in range(len(data)): overlays.append(data[i]) labels.append(name_fmt % (i if splits is None else splits[i])) overlays_labels.append((overlays, labels)) continue if splits is None: # boolean bucketing overlays.append(data) labels.append(str_fmt % 'True') overlays.append(~data) labels.append(str_fmt % 'False') overlays_labels.append((overlays, labels)) continue other_overlay = numpy.ones(numpy.shape(data), dtype=bool) if label_other else None for count, value in enumerate(splits): if count == 0: overlays.append(data < value) if label_other: other_overlay &= ~overlays[-1] labels.append(fmt % (-numpy.inf, value)) if count > 0 and len(splits) > 1: overlays.append((data < value) & (data >= splits[count - 1])) if label_other: other_overlay &= ~overlays[-1] labels.append(fmt % (splits[count - 1], value)) if count == len(splits) - 1: overlays.append(data >= value) if label_other: other_overlay &= ~overlays[-1] labels.append(fmt % (value, numpy.inf)) if label_other: overlays += [other_overlay] labels += [str_fmt % label_other] overlays_labels.append((overlays, labels)) overlays, labels = _recurse_buckets(overlays_labels, *overlays_labels.pop(-1)) labels = [l.replace('-inf_', ' _') for l in labels] labels = [l.replace('_inf', '_ ') for l in labels] return {'buckets': overlays, 'labels': labels, 'name': name, 'label_all': None, 'label_other': None}