def test_dataset_iterator(self): """Dataset must correctly work as iterable object.""" dts = Dataset() N = 10 for i in range(N): dts.add_row([i]) for i in range(2): self.assertEqual(N, len([i for i in dts]))
def test_load_invalid_date_type(self): """Test invalid data for dataset fails with verbose message.""" dts = Dataset([Dataset.DATE]) try: dts.load([['aa-bb-cc']]) self.fail('dataset has loaded invalid data') except dts.ParseError as exc: self.assertEqual( 'Invalid value "aa-bb-cc" in line 1 ' 'for "d" column type (index: 0)', str(exc))
def test_load_invalid_date_type(self): """Test invalid data for dataset fails with verbose message.""" dts = Dataset([Dataset.DATE]) try: dts.load([['aa-bb-cc']]) self.fail('dataset has loaded invalid data') except dts.ParseError as exc: self.assertEqual('Invalid value "aa-bb-cc" in line 1 ' 'for "d" column type (index: 0)', str(exc))
def test_get_dataset_columns(self): """Test column splitting correctness. | 0 | 1 | 2 | | 3 | 4 | 5 | """ dts = Dataset() [dts.add_row(i) for i in chunk(range(6), 3)] for i in range(3): self.assertEqual([i, i + 3], list(dts.column(i))) # test multiple self.assertEqual([(0, 1), (3, 4)], list(dts.column(0, 1))) self.assertEqual([(1, 2), (4, 5)], list(dts.column(1, 2)))
def test_get_dataset_columns(self): """Test column splitting correctness. | 0 | 1 | 2 | | 3 | 4 | 5 | """ dts = Dataset() [dts.add_row(i) for i in chunk(range(6), 3)] for i in range(3): self.assertEqual([i, i + 3], list(dts.column(i))) # test multiple self.assertEqual([(0, 1), (3, 4)], list(dts.column(0,1))) self.assertEqual([(1, 2), (4, 5)], list(dts.column(1,2)))
def __init__(self, dataset): self.mean = mean(dataset) self.std = std(dataset, self.mean) self.percentiles = percentiles(dataset, PERCENTILES) self.calculated_percentiles = [ self.mean - 2 * self.std, self.mean - self.std, self.mean - 0.67 * self.std, self.mean, self.mean + 0.67 * self.std, self.mean + self.std, self.mean + 2 * self.std ] self.max = max(Dataset.get_num_column_or_list(dataset)) self.min = min(Dataset.get_num_column_or_list(dataset)) self.is_normal = self._is_normal(self.percentiles, self.calculated_percentiles) self.length = len(dataset)
def hist(dataset, **kwargs): defaults = { 'grid': False, 'xlabel': '', 'ylabel': '', 'title': '', 'output': None, } graph_params = { 'bins': 20, 'normed': 1, 'facecolor': 'green', 'alpha': 0.75, } graph_params.update(kwargs) defaults.update(kwargs) [graph_params.pop(k) for k in defaults if k in graph_params] values = list(Dataset.get_num_column_or_list(dataset)) n, bins, patches = plt.hist(values, **graph_params) plt.xlabel(defaults['xlabel']) plt.ylabel(defaults['ylabel']) plt.title(defaults['title']) plt.grid(defaults['grid']) filename = defaults['output'] or get_tmp_file_name('.png') plt.savefig(filename) return filename
def hist(dataset, **kwargs): defaults = { 'grid': False, 'xlabel': '', 'ylabel': '', 'title': '', 'output': None, } plot_params = { 'bins': 20, 'normed': 1, 'facecolor': 'green', 'alpha': 0.75, } plot_params.update(kwargs) [plot_params.pop(k) for k in defaults if k in plot_params] defaults.update(kwargs) values = list(Dataset.get_num_column_or_list(dataset)) n, bins, patches = plt.hist(values, **plot_params) plt.xlabel(defaults['xlabel']) plt.ylabel(defaults['ylabel']) plt.title(defaults['title']) plt.grid(defaults['grid']) filename = defaults['output'] or get_tmp_file_name('.png') plt.savefig(filename) return filename
def __init__(self, dataset): self.mean = mean(dataset) self.std = std(dataset, self.mean) self.percentiles = percentiles(dataset, PERCENTILES) self.calculated_percentiles = [ self.mean - 2 * self.std, self.mean - self.std, self.mean - 0.67 * self.std, self.mean, self.mean + 0.67 * self.std, self.mean + self.std, self.mean + 2 * self.std] self.max = max(Dataset.get_num_column_or_list(dataset)) self.min = min(Dataset.get_num_column_or_list(dataset)) self.is_normal = self._is_normal( self.percentiles, self.calculated_percentiles) self.length = len(dataset)
def show_metric(metric): conn = sqlite3.connect(DB) points = get_points(conn, metric) conn.close() if not os.path.exists(IMAGE_DIR): os.makedirs(IMAGE_DIR) fname = IMAGE_DIR + '/%s.png' % metric d = Dataset([Dataset.DATE, Dataset.FLOAT]) d.load(points) plot_date(d, output=fname, figsize=(14, 7), linestyle='-') data = open(fname, 'rb').read() r = Response() r.body = data r.code = '200 OK' r.headers = [('content-type', 'image/png'), ('content-length', str(len(r.body)))] return r
def mean(dataset): values = Dataset.get_num_column_or_list(dataset) return sum(values) / float(len(dataset))
def std(dataset, m=None): n = len(dataset) values = Dataset.get_num_column_or_list(dataset) if m is None: m = mean(dataset) return (sum((i - m)**2 for i in values) / float(n))**0.5
def percentiles(dataset, vals): n = len(dataset) values = list(Dataset.get_num_column_or_list(dataset)) values.sort() return [_percentile(values, n, p) for p in vals]
def std(dataset, m=None): n = len(dataset) values = Dataset.get_num_column_or_list(dataset) if m is None: m = mean(dataset) return (sum((i - m) ** 2 for i in values) / float(n)) ** 0.5