def group(data, group_by, sample_width): """Groups the data rows read from csv files into groups of data.""" import datetime groups = [[[]]] for i in range(4): groups[0].append([]) cutoff = 'second' beginning_of_period = time.datetime_from_row( data[get_first_data_point_index(data)], cutoff) period = datetime.timedelta(days=1000000) if group_by == 'day': period = datetime.timedelta(days=1) elif group_by == 'week': period = datetime.timedelta(weeks=1) sample_width = datetime.timedelta(minutes=sample_width) group_index = 0 first_round = True for row in data: datetime_current_row = time.datetime_from_row(row, cutoff) # Is it time to start adding data from the beginning of the period again? if datetime_current_row - beginning_of_period >= period: first_round = False beginning_of_period += period while datetime_current_row - beginning_of_period >= period: beginning_of_period += period group_index = math.floor( (datetime_current_row - beginning_of_period) / sample_width) # Did we just go outside the current sample? A sample is e.g. a single box in a box plot. new_index = math.floor( (datetime_current_row - beginning_of_period) / sample_width) if new_index != group_index: group_index = new_index if first_round: groups.append([]) if len(groups[group_index]) < 1: groups[group_index] = [[] for i in range(5)] for i in range(5): groups[group_index][i].append(row[i]) return groups
def group(data, group_by, sample_width): """Groups the data rows read from csv files into groups of data.""" import datetime groups = [[[]]] for i in range(4): groups[0].append([]) cutoff = 'second' beginning_of_period = time.datetime_from_row(data[get_first_data_point_index(data)], cutoff) period = datetime.timedelta(days=1000000) if group_by == 'day': period = datetime.timedelta(days=1) elif group_by == 'week': period = datetime.timedelta(weeks=1) sample_width = datetime.timedelta(minutes=sample_width) group_index = 0 first_round = True for row in data: datetime_current_row = time.datetime_from_row(row, cutoff) # Is it time to start adding data from the beginning of the period again? if datetime_current_row - beginning_of_period >= period: first_round = False beginning_of_period += period while datetime_current_row - beginning_of_period >= period: beginning_of_period += period group_index = math.floor((datetime_current_row - beginning_of_period) / sample_width) # Did we just go outside the current sample? A sample is e.g. a single box in a box plot. new_index = math.floor((datetime_current_row - beginning_of_period) / sample_width) if new_index != group_index: group_index = new_index if first_round: groups.append([]) if len(groups[group_index]) < 1: groups[group_index] = [[] for i in range(5)] for i in range(5): groups[group_index][i].append(row[i]) return groups
def filter_weekends(rows, include): """Either only include weekends or entirely exclude them.""" return [ row for row in rows if include ^ numpy.is_busday(time.datetime_from_row(row).date()) ]
def fetch(config, url): """Fetches and stores metrics from Sensor at the URL given.""" new_path = os.path.join( config.data_folder, datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S.csv')) new_temp_path = new_path + 'temp' if not url.startswith('http://'): url = 'http://' + url url += '/export.csv' if config.verbose: click.echo('Fetching data from ' + url + ' and saving it in ' + new_temp_path) try: previous_path = sorted(glob.glob(config.data_folder + '/*.csv'))[-1] except IndexError: previous_path = None try: urllib.request.urlretrieve(url, new_temp_path) except urllib.error.URLError as e: click.echo('Failed to establish an HTTP connection.') click.echo(e.reason) sys.exit(1) except urllib.error.HTTPError as e: click.echo('Managed to connect but failed with HTTP Error code: ' + e.code) click.echo(e.reason) sys.exit(2) try: new_rows = csvio.loadOne(new_temp_path) if not new_rows[0][0] == "Device:": click.echo('Managed to connect and fetch data from something, ' 'but it was not a CSV from a Comet Web Sensor.') click.echo((new_rows[0][0])) sys.exit(3) # Here we'll try to remove overlapping data points with the last file. # It get's nasty due to time ajustments done by the sensor. if previous_path is not None: previous_rows = csvio.loadOne(previous_path) data_start = data.get_first_data_point_index(previous_rows) time_of_newest_data_in_previous = time.datetime_from_row( previous_rows[data_start], 'second') filtered_rows = [] for row in new_rows: if data.not_data_point(row): continue time_of_row = time.datetime_from_row(row) if time_of_newest_data_in_previous < time_of_row: filtered_rows.append(row) if not filtered_rows: if config.verbose: click.echo('No new rows found in fetched data.') sys.exit(0) else: filtered_rows = new_rows if config.verbose: click.echo('Rewriting treated CSV to: ' + new_path) csvio.writeRows(filtered_rows, new_path) finally: os.remove(new_temp_path)
def filter_weekends(rows, include): """Either only include weekends or entirely exclude them.""" return [row for row in rows if include ^ numpy.is_busday(time.datetime_from_row(row).date())]
def fetch(config, url): """Fetches and stores metrics from Sensor at the URL given.""" new_path = os.path.join(config.data_folder, datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S.csv')) new_temp_path = new_path + 'temp' if not url.startswith('http://'): url = 'http://' + url url += '/export.csv' if config.verbose: click.echo('Fetching data from ' + url + ' and saving it in ' + new_temp_path) try: previous_path = sorted(glob.glob(config.data_folder + '/*.csv'))[-1] except IndexError: previous_path = None try: urllib.request.urlretrieve(url, new_temp_path) except urllib.error.URLError as e: click.echo('Failed to establish an HTTP connection.') click.echo(e.reason) sys.exit(1) except urllib.error.HTTPError as e: click.echo('Managed to connect but failed with HTTP Error code: ' + e.code) click.echo(e.reason) sys.exit(2) try: new_rows = csvio.loadOne(new_temp_path) if not new_rows[0][0] == "Device:": click.echo('Managed to connect and fetch data from something, ' 'but it was not a CSV from a Comet Web Sensor.') click.echo((new_rows[0][0])) sys.exit(3) # Here we'll try to remove overlapping data points with the last file. # It get's nasty due to time ajustments done by the sensor. if previous_path is not None: previous_rows = csvio.loadOne(previous_path) data_start = data.get_first_data_point_index(previous_rows) time_of_newest_data_in_previous = time.datetime_from_row(previous_rows[data_start], 'second') filtered_rows = [] for row in new_rows: if data.not_data_point(row): continue time_of_row = time.datetime_from_row(row) if time_of_newest_data_in_previous < time_of_row: filtered_rows.append(row) if not filtered_rows: if config.verbose: click.echo('No new rows found in fetched data.') sys.exit(0) else: filtered_rows = new_rows if config.verbose: click.echo('Rewriting treated CSV to: ' + new_path) csvio.writeRows(filtered_rows, new_path) finally: os.remove(new_temp_path)