def infer_valid_days(channel, wear_bouts, valid_criterion=timedelta(hours=10)): #Generate day-long windows start = time_utilities.start_of_day(channel.timestamps[0]) day_windows = [] while start < channel.timeframe[1]: day_windows.append(Bout.Bout(start, start+timedelta(days=1))) start += timedelta(days=1) valid_windows = [] invalid_windows = [] for window in day_windows: #how much does all of wear_bouts intersect with window? intersections = Bout.bout_list_intersection([window], wear_bouts) total = Bout.total_time(intersections) # If the amount of overlap exceeds the valid criterion, it is valid if total >= valid_criterion: #window.draw_properties={"lw":0, "facecolor":[1,0,0], "alpha":0.25} valid_windows.append(window) else: invalid_windows.append(window) return(invalid_windows, valid_windows)
def test_bouts(): one_bouts = counts.bouts(1, 1) # There are 7 bouts assert len(one_bouts) == 7 # Their length is 1, 2, 3, .. 7 assert Bout.total_time(one_bouts) == timedelta(minutes=7 + 6 + 5 + 4 + 3 + 2 + 1) # Keeping bouts >= i minutes means there should be 7-(i-1) left for i in range(1, 7): i_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=i)) assert len(i_or_longer) == 7 - (i - 1) # One manual check three_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=3)) assert len(three_or_longer) == 5 # This should exclude the 1 bout at exactly 3 minutes three_plus_bit_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=3, seconds=1)) assert len(three_plus_bit_or_longer) == 4 # No bouts should be this long eight_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=8)) assert len(eight_or_longer) == 0 # There is nothing above 1 in the file, should be 0 bouts two_bouts = counts.bouts(2, 989) assert len(two_bouts) == 0
def test_extracted_bouts(): one_bouts = counts.bouts(1,1) zero_bouts = counts.bouts(0,0) # Bouts where counts == 0 and counts == 1 should be mutually excluse # So there should be no intersections between them intersections = Bout.bout_list_intersection(one_bouts, zero_bouts) assert(len(intersections) == 0) # A bout that spans the whole time period should completely intersect with bouts where counts == 1 one_big_bout = Bout.Bout(counts.timestamps[0]-timedelta(days=1), counts.timestamps[-1]+timedelta(days=1)) one_intersections = Bout.bout_list_intersection(one_bouts, [one_big_bout]) assert(Bout.total_time(one_intersections) == Bout.total_time(one_bouts)) # Same for zeros zero_intersections = Bout.bout_list_intersection(zero_bouts, [one_big_bout]) assert(Bout.total_time(zero_intersections) == Bout.total_time(zero_bouts)) # Filling in the bout gaps of one bouts should recreate the zero bouts inverse_of_one_bouts = Bout.time_period_minus_bouts((counts.timeframe[0], counts.timeframe[1]+timedelta(minutes=1)), one_bouts) # They should have the same n assert(len(inverse_of_one_bouts) == len(zero_bouts)) # Same total amount of time assert(Bout.total_time(inverse_of_one_bouts) == Bout.total_time(zero_bouts))
def test_artificial_bouts(): start_a = datetime.strptime("01/01/2000", "%d/%m/%Y") end_a = start_a + timedelta(hours=1) bout_a = Bout.Bout(start_a, end_a) # Hour long bout assert (bout_a.length == timedelta(hours=1)) start_b = datetime.strptime("01/01/2000", "%d/%m/%Y") end_b = start_a + timedelta(minutes=15) bout_b = Bout.Bout(start_b, end_b) # They share common time assert (bout_a.overlaps(bout_b)) # 15 minutes, to be precise intersection = bout_a.intersection(bout_b) assert (intersection.length == timedelta(minutes=15)) start_c = datetime.strptime("01/02/2000", "%d/%m/%Y") end_c = start_c + timedelta(days=1) bout_c = Bout.Bout(start_c, end_c) # No overlap of those bouts assert (not bout_a.overlaps(bout_c)) # bout_a ends exactly as bout_d starts # there should be no overlap (0 common time) start_d = end_a end_d = start_d + timedelta(minutes=1) bout_d = Bout.Bout(start_d, end_d) assert (not bout_a.overlaps(bout_d))
def summary_statistics(self, statistics=[("generic", "mean")], time_period=False, name=""): if time_period == False: windows = [Bout(self.timeframe[0], self.timeframe[1]+timedelta(days=1111))] else: windows = [Bout(time_period[0],time_period[1])] return self.build_statistics_channels(windows, statistics, name=name)
def test_nonwear_amount(): # File contains 24 hours of 1s, then 15 hours of 0s, then 9 hours of 1s, then 24 hours of 1s nonwear_bouts, wear_bouts = channel_inference.infer_nonwear_actigraph(counts) # There is 1 nonwear bout and 2 wear bouts surrounding it assert(len(nonwear_bouts) == 1) assert(len(wear_bouts) == 2) Bout.cache_lengths(nonwear_bouts) Bout.cache_lengths(wear_bouts) nw_bout = nonwear_bouts[0] # The nonwear bout is 15 hours long assert(nw_bout.length == timedelta(hours=15)) # Summarise the data before deleting the nonwear summary_before = Time_Series.Time_Series("") summary_before.add_channels(counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"]),("cutpoints", [[0,0],[0,1],[1,1]])])) # Number of 1s = 24 hours then 9 hours then 24 hours assert(summary_before.get_channel("AG_Counts_sum").data[0] == (24+9+24)*60) # 15 hours of 0s assert(summary_before.get_channel("AG_Counts_0_0").data[0] == 15*60) # Sum should = number of 1s assert(summary_before.get_channel("AG_Counts_1_1").data[0] == (24+9+24)*60) # n should be 3 days = 1440*3 = 24*3*60 assert(summary_before.get_channel("AG_Counts_n").data[0] == 24*3*60) # Missing should be 0 assert(summary_before.get_channel("AG_Counts_missing").data[0] == 0) counts.delete_windows(nonwear_bouts) # Summarise the data after deleting the nonwear summary_after = Time_Series.Time_Series("") summary_after.add_channels(counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"]),("cutpoints", [[0,0],[0,1],[1,1]])])) # Sum shouldn't have changed assert(summary_after.get_channel("AG_Counts_sum").data[0] == (24+9+24)*60) # All the 0s were nonwear, so there should now be no 0s assert(summary_after.get_channel("AG_Counts_0_0").data[0] == 0) # And the number of 1s shouldn't have changed assert(summary_after.get_channel("AG_Counts_1_1").data[0] == (24+9+24)*60) # n should have reduced by 15 hours = 15*60 assert(summary_after.get_channel("AG_Counts_n").data[0] == (24+9+24)*60) # missing should have gone up by 15 hours = 15*60 assert(summary_after.get_channel("AG_Counts_missing").data[0] == 15*60)
def test_bouts(): # There are 8 bouts of 0s zero_bouts = counts.bouts(0, 0) assert (len(zero_bouts) == 8) # There are 8 bouts of 1s one_bouts = counts.bouts(1, 1) assert (len(one_bouts) == 8) # Since there are only 1s and 0s in the file, there should be 1 bout of 0 to 1 both_bouts = counts.bouts(0, 1) assert (len(both_bouts) == 1) # The timestamps of that 1 bout should match the start and end of the channel timestamps # But "end" of bout occurs 1 minute after end of channel assert (both_bouts[0].start_timestamp == counts.timestamps[0]) assert (both_bouts[0].end_timestamp == counts.timestamps[-1] + timedelta(minutes=1)) # Changing the max value shouldn't change anything bouts = counts.bouts(0, 23) assert (len(bouts) == 1) # Same for the minimum value bouts = counts.bouts(-340, 23) assert (len(bouts) == 1) # Should be no bouts 2 or above bouts = counts.bouts(2, 23) assert (len(bouts) == 0) # Same for below 0 bouts = counts.bouts(-32323, -2) assert (len(bouts) == 0) # The data is in 1 minute epochs total_zero_time = Bout.total_time(zero_bouts) total_one_time = Bout.total_time(one_bouts) total_both_time = Bout.total_time(both_bouts) assert (total_zero_time == timedelta(minutes=10 * 30)) assert (total_one_time == timedelta(minutes=16 * 30)) assert (total_both_time == total_zero_time + total_one_time) # Integer seconds spent at 0 should be 300 minutes * 60 = 18000 seconds total_zero_time_seconds = total_zero_time.total_seconds() assert (total_zero_time_seconds == 10 * 30 * 60) # Inverting bouts within a period # Since the file is 0s and 1s, the total time - the time spent @ 0 should = time spent @ 1 not_zero_bouts = Bout.time_period_minus_bouts( (counts.timestamps[0], counts.timestamps[-1] + timedelta(minutes=1)), zero_bouts) total_not_zero_time = Bout.total_time(not_zero_bouts) assert (total_not_zero_time == total_one_time)
def test_bouts(): # There are 8 bouts of 0s zero_bouts = counts.bouts(0,0) assert(len(zero_bouts) == 8) # There are 8 bouts of 1s one_bouts = counts.bouts(1,1) assert(len(one_bouts) == 8) # Since there are only 1s and 0s in the file, there should be 1 bout of 0 to 1 both_bouts = counts.bouts(0,1) assert(len(both_bouts) == 1) # The timestamps of that 1 bout should match the start and end of the channel timestamps # But "end" of bout occurs 1 minute after end of channel assert(both_bouts[0].start_timestamp == counts.timestamps[0]) assert(both_bouts[0].end_timestamp == counts.timestamps[-1]+timedelta(minutes=1)) # Changing the max value shouldn't change anything bouts = counts.bouts(0,23) assert(len(bouts) == 1) # Same for the minimum value bouts = counts.bouts(-340,23) assert(len(bouts) == 1) # Should be no bouts 2 or above bouts = counts.bouts(2,23) assert(len(bouts) == 0) # Same for below 0 bouts = counts.bouts(-32323,-2) assert(len(bouts) == 0) # The data is in 1 minute epochs total_zero_time = Bout.total_time(zero_bouts) total_one_time = Bout.total_time(one_bouts) total_both_time = Bout.total_time(both_bouts) assert(total_zero_time == timedelta(minutes=10*30)) assert(total_one_time == timedelta(minutes=16*30)) assert(total_both_time == total_zero_time + total_one_time) # Integer seconds spent at 0 should be 300 minutes * 60 = 18000 seconds total_zero_time_seconds = total_zero_time.total_seconds() assert(total_zero_time_seconds == 10*30*60) # Inverting bouts within a period # Since the file is 0s and 1s, the total time - the time spent @ 0 should = time spent @ 1 not_zero_bouts = Bout.time_period_minus_bouts((counts.timestamps[0],counts.timestamps[-1]+timedelta(minutes=1)), zero_bouts) total_not_zero_time = Bout.total_time(not_zero_bouts) assert(total_not_zero_time == total_one_time)
def infer_nonwear_actigraph(counts, zero_minutes=timedelta(minutes=60)): """Given an Actigraph counts signal, infer nonwear as consecutive zeros of a given duration. """ # List all bouts where the signal was <= 0 nonwear_bouts = counts.bouts(-999999, 0) # Limit those bouts to the minimum duration specified in "zero_minutes" nonwear_bouts = Bout.limit_to_lengths(nonwear_bouts, min_length=zero_minutes) # Invert the nonwear bouts to get wear bouts wear_bouts = Bout.time_period_minus_bouts([counts.timeframe[0], counts.timeframe[1]], nonwear_bouts) return nonwear_bouts, wear_bouts
def test_f(): # Case F # Multiple deletions producing consistent results origin = counts.timestamps[0] # Delete first 2 hours start = origin end = origin + timedelta(hours=2) # Summarise the data before deletion summary_before = Time_Series.Time_Series("") summary_before.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) counts.delete_windows([Bout.Bout(start, end)]) # Summarise the data after deletion summary_after_a = Time_Series.Time_Series("") summary_after_a.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) # Delete midday to 2pm start = origin + timedelta(hours=12) end = origin + timedelta(hours=14) counts.delete_windows([Bout.Bout(start, end)]) # Summarise the data after deletion summary_after_b = Time_Series.Time_Series("") summary_after_b.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) # 20 hours left assert (summary_after_b.get_channel("AG_Counts_n").data[0] == 20 * 60) # 4 hours missing assert (summary_after_b.get_channel("AG_Counts_missing").data[0] == 4 * 60) # Sum data should be 20 1s assert (summary_after_b.get_channel("AG_Counts_sum").data[0] == 20 * 60)
def window_statistics(self, start_dts, end_dts, statistics): window = Bout.Bout(start_dts, end_dts) bouts = self.bouts_involved(window) output_row = [] if (len(bouts) > 0): for stat in statistics: if stat[0] == "generic": for val1 in stat[1]: if val1 == "sum": intersection = Bout.bout_list_intersection([window],bouts) Bout.cache_lengths(intersection) sum_seconds = Bout.total_time(intersection).total_seconds() output_row.append(sum_seconds) elif val1 == "mean": intersection = Bout.bout_list_intersection([window],bouts) Bout.cache_lengths(intersection) sum_seconds = Bout.total_time(intersection).total_seconds() if sum_seconds >0 and len(bouts) > 0: output_row.append( sum_seconds / len(bouts) ) else: output_row.append(0) elif val1 == "n": output_row.append( len(bouts) ) else: print("nooooooooo") print(stat) print(statistics) output_row.append(-1) elif stat[0] == "sdx": # ("sdx", [10,20,30,40,50,60,70,80,90]) sdx_results = sdx(bouts, stat[1]) for r in sdx_results: output_row.append(r) else: # No bouts in this Bout_Collection overlapping this window # There was no data for the time period # Output -1 for each missing variable for i in range(self.expected_results(statistics)): output_row.append(-1) return output_row
def test_a(): # Case A # Both timestamps preceed data origin = counts.timestamps[0] start = origin - timedelta(days=2) end = origin - timedelta(days=1) # Summarise the data before deletion summary_before = Time_Series.Time_Series("") summary_before.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) counts.delete_windows([Bout.Bout(start, end)]) # Summarise the data after deletion summary_after = Time_Series.Time_Series("") summary_after.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) # All values should be identical, loop through them and assert equality suffixes = "sum n missing 0_0 0_1 1_1".split(" ") for suffix in suffixes: assert (summary_before.get_channel("AG_Counts_" + suffix).data[0] == summary_after.get_channel("AG_Counts_" + suffix).data[0])
def piecewise_statistics(self, window_size, statistics=[("generic", "mean")], time_period=False, name=""): if time_period == False: start = self.timeframe[0] - timedelta(hours=self.timeframe[0].hour, minutes=self.timeframe[0].minute, seconds=self.timeframe[0].second, microseconds=self.timeframe[0].microsecond) end = self.timeframe[1] + timedelta(hours=23-self.timeframe[1].hour, minutes=59-self.timeframe[1].minute, seconds=59-self.timeframe[1].second, microseconds=999999-self.timeframe[1].microsecond) else: start = time_period[0] end = time_period[1] #print("Piecewise statistics: {}".format(self.name)) windows = [] start_dts = start end_dts = start + window_size while start_dts < end: window = Bout(start_dts, end_dts) windows.append(window) start_dts = start_dts + window_size end_dts = end_dts + window_size return self.build_statistics_channels(windows, statistics, name=name)
def window_statistics(self, start_dts, end_dts, statistics): window = Bout(start_dts, end_dts) bouts = self.bouts_involved(window) output_row = [] if (len(bouts) > 0): for stat in statistics: if stat[0] == "generic": for val1 in stat[1]: if val1 == "sum": intersection = bout_list_intersection([window],bouts) cache_lengths(intersection) sum_seconds = total_time(intersection).total_seconds() output_row.append(sum_seconds) elif val1 == "mean": intersection = bout_list_intersection([window],bouts) cache_lengths(intersection) sum_seconds = total_time(intersection).total_seconds() if sum_seconds >0 and len(bouts) > 0: output_row.append( sum_seconds / len(bouts) ) else: output_row.append(0) elif val1 == "n": output_row.append( len(bouts) ) else: print("nooooooooo") print(stat) print(statistics) output_row.append(-1) elif stat[0] == "sdx": # ("sdx", [10,20,30,40,50,60,70,80,90]) sdx_results = sdx(bouts, stat[1]) for r in sdx_results: output_row.append(r) else: # No bouts in this Bout_Collection overlapping this window # There was no data for the time period # Output -1 for each missing variable for i in range(self.expected_results(statistics)): output_row.append(-1) return output_row
def produce_binary_channels(bouts, lengths, skeleton_channel): Bout.cache_lengths(bouts) bouts.sort(key=lambda x: x.length, reverse=True) channels = [] for length in lengths: # Drop bouts from list if their length is less than x minutes bouts = Bout.limit_to_lengths(bouts, min_length=length, sorted=True) channel_name = "{}_mt{}".format(skeleton_channel.name,length) # Clone the blank channel, set data to 1 where time is inside any of the bouts skeleton_copy = copy.deepcopy(skeleton_channel) chan = Channel.channel_from_bouts(bouts, False, False, channel_name, skeleton=skeleton_copy) channels.append(chan) return channels
def sdx(bouts, percentages): total_time_minutes = Bout.total_time(bouts).total_seconds()/60 Bout.cache_lengths(bouts) bouts.sort(key=lambda x : x.length) highest_length_minutes = int(bouts[-1].length.total_seconds()/60) targets_minutes = [int((total_time_minutes)/100.0 * percentage) for percentage in percentages] results = [] #print("Number of bouts: ", len(bouts)) #print("Total time mins: ", total_time_minutes) #print("Highest length mins", highest_length_minutes) #print(targets_minutes) current_target_index = 0 target_minutes = targets_minutes[current_target_index] for length in range(1, highest_length_minutes+1): included_bouts = [b for b in bouts if b.length.total_seconds()/60 <= length] #print(included_bouts) total_included_time_minutes = Bout.total_time(included_bouts).total_seconds()/60 #print(length, total_included_time_minutes) while total_included_time_minutes >= target_minutes: #print(">target_minutes", target_minutes) #length is the result results.append(length) current_target_index += 1 if current_target_index == len(targets_minutes): target_minutes = 999999999 else: target_minutes = targets_minutes[current_target_index] if current_target_index == len(targets_minutes): break #print(results) return results
def infer_still_bouts_triaxial(x, y, z, window_size=timedelta(seconds=10), noise_cutoff_mg=13, minimum_length=timedelta(seconds=10)): # Get windows of standard deviation in each axis x_std = x.piecewise_statistics(window_size, statistics=[("generic", ["std"])], time_period=x.timeframe)[0] y_std = y.piecewise_statistics(window_size, statistics=[("generic", ["std"])], time_period=y.timeframe)[0] z_std = z.piecewise_statistics(window_size, statistics=[("generic", ["std"])], time_period=z.timeframe)[0] # Find bouts where standard deviation is below threshold for long periods x_bouts = x_std.bouts(0, float(noise_cutoff_mg)/1000.0) y_bouts = y_std.bouts(0, float(noise_cutoff_mg)/1000.0) z_bouts = z_std.bouts(0, float(noise_cutoff_mg)/1000.0) x_bouts = Bout.limit_to_lengths(x_bouts, min_length=minimum_length) y_bouts = Bout.limit_to_lengths(y_bouts, min_length=minimum_length) z_bouts = Bout.limit_to_lengths(z_bouts, min_length=minimum_length) # Get the times where those bouts overlap x_intersect_y = Bout.bout_list_intersection(x_bouts, y_bouts) x_intersect_y_intersect_z = Bout.bout_list_intersection(x_intersect_y, z_bouts) return x_intersect_y_intersect_z
def test_bouts(): one_bouts = counts.bouts(1, 1) # There are 7 bouts assert (len(one_bouts) == 7) # Their length is 1, 2, 3, .. 7 assert (Bout.total_time(one_bouts) == timedelta(minutes=7 + 6 + 5 + 4 + 3 + 2 + 1)) # Keeping bouts >= i minutes means there should be 7-(i-1) left for i in range(1, 7): i_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=i)) assert (len(i_or_longer) == 7 - (i - 1)) # One manual check three_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=3)) assert (len(three_or_longer) == 5) # This should exclude the 1 bout at exactly 3 minutes three_plus_bit_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta( minutes=3, seconds=1)) assert (len(three_plus_bit_or_longer) == 4) # No bouts should be this long eight_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=8)) assert (len(eight_or_longer) == 0) # There is nothing above 1 in the file, should be 0 bouts two_bouts = counts.bouts(2, 989) assert (len(two_bouts) == 0)
def test_nonwear_positions(): # Case 1: Nonwear at very beginning of file ts1, header1 = data_loading.load(os.path.abspath(__file__).replace(os.path.basename(__file__), "") + "_data/testfile23.dat", "Actigraph", datetime_format="%d/%m/%Y") counts1 = ts1.get_channel("AG_Counts") nonwear_bouts1, wear_bouts1 = channel_inference.infer_nonwear_actigraph(counts1) # Case 2: Nonwear in middle of file ts2, header2 = data_loading.load(os.path.abspath(__file__).replace(os.path.basename(__file__), "") + "_data/testfile24.dat", "Actigraph", datetime_format="%d/%m/%Y") counts2 = ts2.get_channel("AG_Counts") nonwear_bouts2, wear_bouts2 = channel_inference.infer_nonwear_actigraph(counts2) # Case 3: Nonwear at very end of file ts3, header3 = data_loading.load(os.path.abspath(__file__).replace(os.path.basename(__file__), "") + "_data/testfile25.dat", "Actigraph", datetime_format="%d/%m/%Y") counts3 = ts3.get_channel("AG_Counts") nonwear_bouts3, wear_bouts3 = channel_inference.infer_nonwear_actigraph(counts3) # They should all have the same duration of wear & nonwear assert(Bout.total_time(nonwear_bouts1) == timedelta(hours=2)) assert(Bout.total_time(nonwear_bouts1) == Bout.total_time(nonwear_bouts2)) assert(Bout.total_time(nonwear_bouts1) == Bout.total_time(nonwear_bouts3)) assert(Bout.total_time(wear_bouts1) == Bout.total_time(wear_bouts2)) assert(Bout.total_time(wear_bouts1) == Bout.total_time(wear_bouts3)) # Delete the relevant nonwear bouts from each channel counts1.delete_windows(nonwear_bouts1) counts2.delete_windows(nonwear_bouts2) counts3.delete_windows(nonwear_bouts3) # Total data should be equal assert(sum(counts1.data) == sum(counts2.data)) assert(sum(counts1.data) == sum(counts3.data)) # Summary level mean should also be the same s1 = counts1.summary_statistics()[0] s2 = counts2.summary_statistics()[0] s3 = counts3.summary_statistics()[0] assert(s1.data[0] == s2.data[0]) assert(s1.data[0] == s3.data[0])
def infer_nonwear_triaxial(x, y, z, minimum_length=timedelta(hours=1), noise_cutoff_mg=13, return_nonwear_binary=False): ''' Use the 3 channels of triaxial acceleration to infer periods of nonwear ''' # Get an exhaustive list of bouts where the monitor was still x_intersect_y_intersect_z = infer_still_bouts_triaxial(x,y,z, noise_cutoff_mg=noise_cutoff_mg, minimum_length=minimum_length) # Restrict those bouts to only those with a length that exceeds the minimum length criterion x_intersect_y_intersect_z = Bout.limit_to_lengths(x_intersect_y_intersect_z, min_length=minimum_length) # Legacy code - probably going to delete this if return_nonwear_binary: # Create a parallel, binary channel indicating if that time point was in or out of wear nonwear_binary = Channel.channel_from_bouts(x_intersect_y_intersect_z, x.timeframe, False, "nonwear", skeleton=x) return (x_intersect_y_intersect_z, nonwear_binary) else: return x_intersect_y_intersect_z
def test_b(): # Case B # First timestamp preceeds data, second doesn't origin = counts.timestamps[0] start = origin - timedelta(hours=12) end = origin + timedelta(hours=12) # Summarise the data before deletion summary_before = Time_Series.Time_Series("") summary_before.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) counts.delete_windows([Bout.Bout(start, end)]) # Summarise the data after deletion summary_after = Time_Series.Time_Series("") summary_after.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) # n should go down and missing should go up assert (summary_before.get_channel("AG_Counts_n").data[0] > summary_after.get_channel("AG_Counts_n").data[0]) assert (summary_before.get_channel("AG_Counts_missing").data[0] < summary_after.get_channel("AG_Counts_missing").data[0]) # Should only be 12 hours left assert (summary_after.get_channel("AG_Counts_n").data[0] == 12 * 60) # And 12 hours missing assert (summary_after.get_channel("AG_Counts_missing").data[0] == 12 * 60)
def test_c(): # Case C # Both timestamps inside data origin = counts.timestamps[0] start = origin + timedelta(hours=6) end = origin + timedelta(hours=7) # Summarise the data before deletion summary_before = Time_Series.Time_Series("") summary_before.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) counts.delete_windows([Bout.Bout(start, end)]) # Summarise the data after deletion summary_after = Time_Series.Time_Series("") summary_after.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) # n should go down and missing should go up assert (summary_before.get_channel("AG_Counts_n").data[0] > summary_after.get_channel("AG_Counts_n").data[0]) assert (summary_before.get_channel("AG_Counts_missing").data[0] < summary_after.get_channel("AG_Counts_missing").data[0]) # Should only be 23 hours left assert (summary_after.get_channel("AG_Counts_n").data[0] == 23 * 60) # And 1 hours missing assert (summary_after.get_channel("AG_Counts_missing").data[0] == 1 * 60)
def calibrate(x,y,z, allow_overwrite=True, budget=1000, noise_cutoff_mg=13): """ Use still bouts in the given triaxial data to calibrate it and return the calibrated channels """ calibration_diagnostics = OrderedDict() vm = channel_inference.infer_vector_magnitude(x,y,z) # Get a list of bouts where standard deviation in each axis is below given threshold ("still") still_bouts = channel_inference.infer_still_bouts_triaxial(x,y,z, noise_cutoff_mg=noise_cutoff_mg, minimum_length=timedelta(minutes=1)) num_still_bouts = len(still_bouts) num_still_seconds = Bout.total_time(still_bouts).total_seconds() # Summarise VM in 10s intervals vm_windows = vm.piecewise_statistics(timedelta(seconds=10), [("generic", ["mean"])], time_period=vm.timeframe)[0] # Get a list where VM was between 0.5 and 1.5g ("reasonable") reasonable_bouts = vm_windows.bouts(0.5, 1.5) num_reasonable_bouts = len(reasonable_bouts) num_reasonable_seconds = Bout.total_time(reasonable_bouts).total_seconds() # We only want still bouts where the VM level was within 0.5g of 1g # Therefore insersect "still" time with "reasonable" time still_bouts = Bout.bout_list_intersection(reasonable_bouts, still_bouts) # And we only want bouts where it was still and reasonable for 10s or longer still_bouts = Bout.limit_to_lengths(still_bouts, min_length = timedelta(seconds=10)) num_final_bouts = len(still_bouts) num_final_seconds = Bout.total_time(still_bouts).total_seconds() # Get the average X,Y,Z for each still bout (inside which, by definition, XYZ should not change) still_x, num_samples = x.build_statistics_channels(still_bouts, [("generic", ["mean", "n"])]) still_y = y.build_statistics_channels(still_bouts, [("generic", ["mean"])])[0] still_z = z.build_statistics_channels(still_bouts, [("generic", ["mean"])])[0] # Get the octant positions of the points to calibrate on occupancy = octant_occupancy(still_x.data, still_y.data, still_z.data) # Are they fairly distributed? comparisons = {"x<0":[0,1,2,3], "x>0":[4,5,6,7], "y<0":[0,1,4,5], "y>0":[2,3,6,7], "z<0":[0,2,4,6], "z>0":[1,3,5,7]} for axis in ["x", "y", "z"]: mt = sum(occupancy[comparisons[axis + ">0"]]) lt = sum(occupancy[comparisons[axis + "<0"]]) calibration_diagnostics[axis + "_inequality"] = abs(mt-lt)/sum(occupancy) # Calculate the initial error without doing any calibration start_error = evaluate_solution(still_x, still_y, still_z, num_samples, [0,1,0,1,0,1]) # Do offset and scale calibration by default offset_only_calibration = False calibration_diagnostics["calibration_method"] = "offset and scale" # If we have less than 500 points to calibrate with, or if more than 2 octants are empty if len(still_x.data) < 500 or sum(occupancy == 0) > 2: offset_only_calibration = True calibration_diagnostics["calibration_method"] = "offset only" # Search for the correct way to calibrate the data calibration_parameters = find_calibration_parameters(still_x.data, still_y.data, still_z.data, offset_only=offset_only_calibration) for param,value in zip("x_offset,x_scale,y_offset,y_scale,z_offset,z_scale".split(","), calibration_parameters): calibration_diagnostics[param] = value for i,occ in enumerate(occupancy): calibration_diagnostics["octant_"+str(i)] = occ # Calculate the final error after calibration end_error = evaluate_solution(still_x, still_y, still_z, num_samples, calibration_parameters) calibration_diagnostics["start_error"] = start_error calibration_diagnostics["end_error"] = end_error calibration_diagnostics["num_final_bouts"] = num_final_bouts calibration_diagnostics["num_final_seconds"] = num_final_seconds calibration_diagnostics["num_still_bouts"] = num_still_bouts calibration_diagnostics["num_still_seconds"] = num_still_seconds calibration_diagnostics["num_reasonable_bouts"] = num_reasonable_bouts calibration_diagnostics["num_reasonable_seconds"] = num_reasonable_seconds if allow_overwrite: # If we do not need to preserve the original x,y,z values, we can just calibrate that data # Apply the best calibration factors to the data do_calibration(x, y, z, calibration_parameters) return (x, y, z, calibration_diagnostics) else: # Else we create an independent copy of the raw data and calibrate that instead cal_x = copy.deepcopy(x) cal_y = copy.deepcopy(y) cal_z = copy.deepcopy(z) # Apply the best calibration factors to the data do_calibration(cal_x, cal_y, cal_z, calibration_parameters) return (cal_x, cal_y, cal_z, calibration_diagnostics)
def process_file(job_details): id_num = str(job_details["pid"]) filename = job_details["filename"] filename_short = os.path.basename(filename).split('.')[0] meta = os.path.join(results_folder, "metadata_{}.csv".format(filename_short)) # check if analysis_meta already exists... if os.path.isfile(meta): os.remove(meta) battery_max = 0 if monitor_type == "GeneActiv": battery_max = GA_battery_max elif monitor_type == "Axivity": battery_max = AX_battery_max epochs = [timedelta(minutes=n) for n in epoch_minutes] # Use 'epochs_minutes' variable to create the corresponding names to the epochs defined names = [] plots_list = [] for n in epoch_minutes: name = "" if n % 60 == 0: # If the epoch is a multiple of 60, it will be named in hours, e.g. '1h' name = "{}h".format(int(n / 60)) elif n % 60 != 0: # If the epoch is NOT a multiple of 60, it will be named in seconds, e.g. '15m' name = "{}m".format(n) names.append(name) if n in epoch_plot: plots_list.append(name) # fast-load the data to identify any anomalies: qc_ts, qc_header = data_loading.fast_load(filename, monitor_type) qc_channels = qc_ts.get_channels(["X", "Y", "Z"]) anomalies = diagnostics.diagnose_fix_anomalies(qc_channels, discrepancy_threshold=2) # Load the data ts, header = data_loading.load(filename, monitor_type, compress=False) header["processed_file"] = os.path.basename(filename) # some monitors have manufacturers parameters applied to them, let's preserve these but rename them: var_list = [ "x_gain", "x_offset", "y_gain", "y_offset", "z_gain", "z_offset", "calibration_date" ] for var in var_list: if var in header.keys(): header[("manufacturers_%s" % var)] = header[var] header.pop(var) x, y, z, battery, temperature, integrity = ts.get_channels( ["X", "Y", "Z", "Battery", "Temperature", "Integrity"]) initial_channels = [x, y, z, battery, temperature, integrity] # create dictionary of anomalies total and types anomalies_dict = {"QC_anomalies_total": len(anomalies)} # check whether any anomalies have been found: if len(anomalies) > 0: anomalies_file = os.path.join( results_folder, "{}_anomalies.csv".format(filename_short)) df = pd.DataFrame(anomalies) for type in anomaly_types: anomalies_dict["QC_anomaly_{}".format(type)] = ( df.anomaly_type.values == type).sum() df = df.set_index("anomaly_type") # print record of anomalies to anomalies_file df.to_csv(anomalies_file) # if anomalies have been found, fix these anomalies channels = diagnostics.fix_anomalies(anomalies, initial_channels) else: for type in anomaly_types: anomalies_dict["QC_anomaly_{}".format(type)] = 0 # if no anomalies channels = initial_channels first_channel = channels[0] # Convert timestamps to offsets from the first timestamp start, offsets = Channel.timestamps_to_offsets(first_channel.timestamps) # As timestamps are sparse, expand them to 1 per observation offsets = Channel.interpolate_offsets(offsets, len(first_channel.data)) # For each channel, convert to offset timestamps for c in channels: c.start = start c.set_contents(c.data, offsets, timestamp_policy="offset") # find approximate first and last battery percentage values first_battery_pct = round((battery.data[1] / battery_max) * 100, 2) last_battery_pct = round((battery.data[-1] / battery_max) * 100, 2) # Calculate the time frame to use start = time_utilities.start_of_day(x.timeframe[0]) end = time_utilities.end_of_day(x.timeframe[-1]) tp = (start, end) # if the sampling frequency is greater than 40Hz if x.frequency > 40: # apply a low pass filter x = pampro_fourier.low_pass_filter(x, 20, frequency=x.frequency, order=4) x.name = "X" # because LPF^ changes the name, we want to override that y = pampro_fourier.low_pass_filter(y, 20, frequency=y.frequency, order=4) y.name = "Y" z = pampro_fourier.low_pass_filter(z, 20, frequency=z.frequency, order=4) z.name = "Z" # find any bouts where data is "missing" BEFORE calibration missing_bouts = [] if -111 in x.data: # extract the bouts of the data channels where the data == -111 (the missing value) missing = x.bouts(-111, -111) # add a buffer of 2 minutes (120 seconds) to the beginning and end of each bout for item in missing: bout_start = max(item.start_timestamp - timedelta(seconds=120), x.timeframe[0]) bout_end = min(item.end_timestamp + timedelta(seconds=120), x.timeframe[1]) new_bout = Bout.Bout(start_timestamp=bout_start, end_timestamp=bout_end) missing_bouts.append(new_bout) else: pass x.delete_windows(missing_bouts) y.delete_windows(missing_bouts) z.delete_windows(missing_bouts) integrity.fill_windows(missing_bouts, fill_value=1) ################ CALIBRATION ####################### # extract still bouts calibration_ts, calibration_header = triaxial_calibration.calibrate_stepone( x, y, z, noise_cutoff_mg=noise_cutoff_mg) # Calibrate the acceleration to local gravity cal_diagnostics = triaxial_calibration.calibrate_steptwo( calibration_ts, calibration_header, calibration_statistics=False) # calibrate data triaxial_calibration.do_calibration(x, y, z, temperature=None, cp=cal_diagnostics) x.delete_windows(missing_bouts) y.delete_windows(missing_bouts) z.delete_windows(missing_bouts) temperature.delete_windows(missing_bouts) battery.delete_windows(missing_bouts) # Derive some signal features vm = channel_inference.infer_vector_magnitude(x, y, z) vm.delete_windows(missing_bouts) if "HPFVM" in stats: vm_hpf = channel_inference.infer_vm_hpf(vm) else: vm_hpf = None if "ENMO" in stats: enmo = channel_inference.infer_enmo(vm) else: enmo = None if "PITCH" and "ROLL" in stats: pitch, roll = channel_inference.infer_pitch_roll(x, y, z) else: pitch = roll = None # Infer nonwear and mask those data points in the signal nonwear_bouts = channel_inference.infer_nonwear_triaxial( x, y, z, noise_cutoff_mg=noise_cutoff_mg) for bout in nonwear_bouts: # Show non-wear bouts in purple bout.draw_properties = {'lw': 0, 'alpha': 0.75, 'facecolor': '#764af9'} for channel, channel_name in zip( [enmo, vm_hpf, pitch, roll, temperature, battery], ["ENMO", "HPFVM", "PITCH", "ROLL", "Temperature", "Battery"]): if channel_name in stats: # Collapse the sample data to a processing epoch (in seconds) so data is summarised epoch_level_channel = channel.piecewise_statistics( timedelta(seconds=processing_epoch), time_period=tp)[0] epoch_level_channel.name = channel_name if channel_name in ["Temperature", "Battery"]: pass else: epoch_level_channel.delete_windows(nonwear_bouts) epoch_level_channel.delete_windows(missing_bouts) ts.add_channel(epoch_level_channel) # collapse binary integrity channel epoch_level_channel = integrity.piecewise_statistics( timedelta(seconds=int(processing_epoch)), statistics=[("binary", ["flag"])], time_period=tp)[0] epoch_level_channel.name = "Integrity" epoch_level_channel.fill_windows(missing_bouts, fill_value=1) ts.add_channel(epoch_level_channel) # create and open results files results_files = [ os.path.join(results_folder, "{}_{}.csv".format(name, filename_short)) for name in names ] files = [open(file, "w") for file in results_files] # Write the column headers to the created files for f in files: f.write(pampro_utilities.design_file_header(stats) + "\n") # writing out and plotting results for epoch, name, f in zip(epochs, names, files): results_ts = ts.piecewise_statistics(epoch, statistics=stats, time_period=tp, name=id_num) results_ts.write_channels_to_file(file_target=f) f.flush() if name in plots_list: # for each statistic in the plotting dictionary, produce a plot in the results folder for stat, plot in plotting_dict.items(): try: results_ts[stat].add_annotations(nonwear_bouts) results_ts.draw([[stat]], file_target=os.path.join( results_folder, plot.format(filename_short, name))) except KeyError: pass header["processing_script"] = version header["analysis_resolutions"] = names header["noise_cutoff_mg"] = noise_cutoff_mg header["processing_epoch"] = processing_epoch header["QC_first_battery_pct"] = first_battery_pct header["QC_last_battery_pct"] = last_battery_pct metadata = {**header, **anomalies_dict, **cal_diagnostics} # write metadata to file pampro_utilities.dict_write(meta, id_num, metadata) for c in ts: del c.data del c.timestamps del c.indices del c.cached_indices
def test_extracted_bouts(): one_bouts = counts.bouts(1, 1) zero_bouts = counts.bouts(0, 0) # Bouts where counts == 0 and counts == 1 should be mutually excluse # So there should be no intersections between them intersections = Bout.bout_list_intersection(one_bouts, zero_bouts) assert (len(intersections) == 0) # A bout that spans the whole time period should completely intersect with bouts where counts == 1 one_big_bout = Bout.Bout(counts.timestamps[0] - timedelta(days=1), counts.timestamps[-1] + timedelta(days=1)) one_intersections = Bout.bout_list_intersection(one_bouts, [one_big_bout]) assert (Bout.total_time(one_intersections) == Bout.total_time(one_bouts)) # Same for zeros zero_intersections = Bout.bout_list_intersection(zero_bouts, [one_big_bout]) assert (Bout.total_time(zero_intersections) == Bout.total_time(zero_bouts)) # Filling in the bout gaps of one bouts should recreate the zero bouts inverse_of_one_bouts = Bout.time_period_minus_bouts( (counts.timeframe[0], counts.timeframe[1] + timedelta(minutes=1)), one_bouts) # They should have the same n assert (len(inverse_of_one_bouts) == len(zero_bouts)) # Same total amount of time assert ( Bout.total_time(inverse_of_one_bouts) == Bout.total_time(zero_bouts))
def qc_analysis(job_details): id_num = str(job_details["pid"]) filename = job_details["filename"] filename_short = os.path.basename(filename).split('.')[0] battery_max = 0 if filetype == "GeneActiv": battery_max = GA_battery_max elif filetype == "Axivity": battery_max = AX_battery_max # Load the data from the hdf5 file ts, header = data_loading.fast_load(filename, filetype) header["QC_filename"] = os.path.basename(filename) x, y, z, battery, temperature = ts.get_channels(["X", "Y", "Z", "Battery", "Temperature"]) # create a channel of battery percentage, based on the assumed battery maximum value battery_pct = Channel.Channel.clone(battery) battery_pct.data = (battery.data / battery_max) * 100 channels = [x, y, z, battery, temperature, battery_pct] anomalies = diagnostics.diagnose_fix_anomalies(channels, discrepancy_threshold=2) # create dictionary of anomalies types anomalies_dict = dict() # check whether any anomalies have been found: if len(anomalies) > 0: anomalies_file = os.path.join(anomalies_folder, "{}_anomalies.csv".format(filename_short)) df = pd.DataFrame(anomalies) for type in anomaly_types: anomalies_dict["QC_anomaly_{}".format(type)] = (df.anomaly_type.values == type).sum() df = df.set_index("anomaly_type") # print record of anomalies to anomalies_file df.to_csv(anomalies_file) else: for type in anomaly_types: anomalies_dict["QC_anomaly_{}".format(type)] = 0 # check for axis anomalies axes_dict = diagnostics.diagnose_axes(x, y, z, noise_cutoff_mg=13) axis_anomaly = False for key, val in axes_dict.items(): anomalies_dict["QC_{}".format(key)] = val if key.endswith("max"): if val > axis_max: axis_anomaly = True elif key.endswith("min"): if val < axis_min: axis_anomaly = True # create a "check battery" flag: check_battery = False # calculate first and last battery percentages first_battery_pct = round((battery_pct.data[1]),2) last_battery_pct = round((battery_pct.data[-1]),2) header["QC_first_battery_pct"] = first_battery_pct header["QC_last_battery_pct"] = last_battery_pct # calculate lowest battery percentage # check if battery.pct has a missing_value, exclude those values if they exist if battery_pct.missing_value == "None": lowest_battery_pct = min(battery_pct.data) else: test_array = np.delete(battery_pct.data, np.where(battery_pct.data == battery_pct.missing_value)) lowest_battery_pct = min(test_array) header["QC_lowest_battery_pct"] = round(lowest_battery_pct,2) header["QC_lowest_battery_threshold"] = battery_minimum # find the maximum battery discharge in any 24hr period: max_discharge = battery_pct.channel_max_decrease(time_period=timedelta(hours=discharge_hours)) header["QC_max_discharge"] = round(max_discharge, 2) header["QC_discharge_time_period"] = "{} hours".format(discharge_hours) header["QC_discharge_threshold"] = discharge_pct # change flag if lowest battery percentage dips below battery_minimum at any point # OR maximum discharge greater than discharge_pct over time period "hours = discharge_hours" if lowest_battery_pct < battery_minimum or max_discharge > discharge_pct: check_battery = True header["QC_check_battery"] = str(check_battery) header["QC_axis_anomaly"] = str(axis_anomaly) # Calculate the time frame to use start = time_utilities.start_of_day(x.timeframe[0]) end = time_utilities.end_of_day(x.timeframe[-1]) tp = (start, end) results_ts = Time_Series.Time_Series("") # Derive some signal features vm = channel_inference.infer_vector_magnitude(x, y, z) enmo = channel_inference.infer_enmo(vm) enmo.minimum = 0 enmo.maximum = enmo_max # Infer nonwear nonwear_bouts = channel_inference.infer_nonwear_for_qc(x, y, z, noise_cutoff_mg=noise_cutoff_mg) # Use nonwear bouts to calculate wear bouts wear_bouts = Bout.time_period_minus_bouts(enmo.timeframe, nonwear_bouts) # Use wear bouts to calculate the amount of wear time in the file in hours, save to meta data total_wear = Bout.total_time(wear_bouts) total_seconds_wear = total_wear.total_seconds() total_hours_wear = round(total_seconds_wear/3600) header["QC_total_hours_wear"] = total_hours_wear # Split the enmo channel into lists of bouts for each quadrant: ''' quadrant_0 = 00:00 -> 06: 00 quadrant_1 = 06:00 -> 12: 00 quadrant_2 = 12:00 -> 18: 00 quadrant_3 = 18:00 -> 00: 00 ''' q_0, q_1, q_2, q_3 = channel_inference.create_quadrant_bouts(enmo) # calculate the intersection of each set of bouts with wear_bouts, then calculate the wear time in each quadrant. sum_quadrant_wear = 0 for quadrant, name1, name2 in ([q_0, "QC_hours_wear_quadrant_0", "QC_pct_wear_quadrant_0"], [q_1, "QC_hours_wear_quadrant_1", "QC_pct_wear_quadrant_1"], [q_2, "QC_hours_wear_quadrant_2", "QC_pct_wear_quadrant_2"], [q_3, "QC_hours_wear_quadrant_3", "QC_pct_wear_quadrant_3"]): quadrant_wear = Bout.bout_list_intersection(quadrant, wear_bouts) seconds_wear = Bout.total_time(quadrant_wear).total_seconds() hours_wear = round(seconds_wear / 3600) header[name1] = hours_wear header[name2] = round(((hours_wear / total_hours_wear) * 100), 2) for bout in nonwear_bouts: # Show non-wear bouts in purple bout.draw_properties = {'lw': 0, 'alpha': 0.75, 'facecolor': '#764af9'} for channel, channel_name in zip([enmo, battery_pct],["ENMO", "Battery_percentage"]): channel.name = channel_name results_ts.add_channel(channel) if PLOT == "YES": # Plot statistics as subplots in one plot file per data file results_ts["ENMO"].add_annotations(nonwear_bouts) results_ts.draw_qc(plotting_df, file_target=os.path.join(charts_folder,"{}_plots.png".format(filename_short))) header["QC_script"] = version # file of metadata from qc process qc_output = os.path.join(results_folder, "qc_meta_{}.csv".format(filename_short)) # check if qc_output already exists... if os.path.isfile(qc_output): os.remove(qc_output) metadata = {**header, **anomalies_dict} # write metadata to file pampro_utilities.dict_write(qc_output, id_num, metadata) for c in ts: del c.data del c.timestamps del c.indices del c.cached_indices