Пример #1
0
 def __init__(self, dataset):
     self.mean = mean(dataset)
     self.std = std(dataset, self.mean)
     self.percentiles = percentiles(dataset, PERCENTILES)
     self.calculated_percentiles = [
         self.mean - 2 * self.std, self.mean - self.std,
         self.mean - 0.67 * self.std, self.mean,
         self.mean + 0.67 * self.std, self.mean + self.std,
         self.mean + 2 * self.std
     ]
     self.max = max(Dataset.get_num_column_or_list(dataset))
     self.min = min(Dataset.get_num_column_or_list(dataset))
     self.is_normal = self._is_normal(self.percentiles,
                                      self.calculated_percentiles)
     self.length = len(dataset)
Пример #2
0
def hist(dataset, **kwargs):
    defaults = {
        'grid': False,
        'xlabel': '',
        'ylabel': '',
        'title': '',
        'output': None,
    }
    plot_params = {
        'bins': 20,
        'normed': 1,
        'facecolor': 'green',
        'alpha': 0.75,
    }
    plot_params.update(kwargs)
    [plot_params.pop(k) for k in defaults if k in plot_params]
    defaults.update(kwargs)

    values = list(Dataset.get_num_column_or_list(dataset))

    n, bins, patches = plt.hist(values, **plot_params)
    plt.xlabel(defaults['xlabel'])
    plt.ylabel(defaults['ylabel'])
    plt.title(defaults['title'])
    plt.grid(defaults['grid'])

    filename = defaults['output'] or get_tmp_file_name('.png')
    plt.savefig(filename)

    return filename
Пример #3
0
def hist(dataset, **kwargs):
    defaults = {
        'grid': False,
        'xlabel': '',
        'ylabel': '',
        'title': '',
        'output': None,
    }
    graph_params = {
        'bins': 20,
        'normed': 1,
        'facecolor': 'green',
        'alpha': 0.75,
    }
    graph_params.update(kwargs)
    defaults.update(kwargs)
    [graph_params.pop(k) for k in defaults if k in graph_params]

    values = list(Dataset.get_num_column_or_list(dataset))

    n, bins, patches = plt.hist(values, **graph_params)
    plt.xlabel(defaults['xlabel'])
    plt.ylabel(defaults['ylabel'])
    plt.title(defaults['title'])
    plt.grid(defaults['grid'])

    filename = defaults['output'] or get_tmp_file_name('.png')
    plt.savefig(filename)

    return filename
Пример #4
0
 def __init__(self, dataset):
     self.mean = mean(dataset)
     self.std = std(dataset, self.mean)
     self.percentiles = percentiles(dataset,  PERCENTILES)
     self.calculated_percentiles = [
         self.mean - 2 * self.std,
         self.mean - self.std,
         self.mean - 0.67 * self.std,
         self.mean,
         self.mean + 0.67 * self.std,
         self.mean + self.std,
         self.mean + 2 * self.std]
     self.max = max(Dataset.get_num_column_or_list(dataset))
     self.min = min(Dataset.get_num_column_or_list(dataset))
     self.is_normal = self._is_normal(
         self.percentiles, self.calculated_percentiles)
     self.length = len(dataset)
Пример #5
0
def mean(dataset):
    values = Dataset.get_num_column_or_list(dataset)
    return sum(values) / float(len(dataset))
Пример #6
0
def percentiles(dataset, vals):
    n = len(dataset)
    values = list(Dataset.get_num_column_or_list(dataset))
    values.sort()
    return [_percentile(values, n, p) for p in vals]
Пример #7
0
def std(dataset, m=None):
    n = len(dataset)
    values = Dataset.get_num_column_or_list(dataset)
    if m is None:
        m = mean(dataset)
    return (sum((i - m) ** 2  for i in values) / float(n)) ** 0.5
Пример #8
0
def mean(dataset):
    values = Dataset.get_num_column_or_list(dataset)
    return sum(values) / float(len(dataset))
Пример #9
0
def percentiles(dataset, vals):
    n = len(dataset)
    values = list(Dataset.get_num_column_or_list(dataset))
    values.sort()
    return [_percentile(values, n, p) for p in vals]
Пример #10
0
def std(dataset, m=None):
    n = len(dataset)
    values = Dataset.get_num_column_or_list(dataset)
    if m is None:
        m = mean(dataset)
    return (sum((i - m)**2 for i in values) / float(n))**0.5