def getActivitySummary(epochFile, nonWearFile, summary, activityClassification=True, timeZone='Europe/London', startTime=None, endTime=None, epochPeriod=30, stationaryStd=13, minNonWearDuration=60, mgCutPointMVPA=100, mgCutPointVPA=425, activityModel="activityModels/walmsley-nov20.tar", intensityDistribution=False, useRecommendedImputation=True, psd=False, fourierFrequency=False, fourierWithAcc=False, m10l5=False, verbose=False): """Calculate overall activity summary from <epochFile> data Get overall activity summary from input <epochFile>. This is achieved by 1) get interrupt and data error summary vals 2) check if data occurs at a daylight savings crossover 3) calculate wear-time statistics, and write nonWear episodes to file 4) predict activity from features, and add label column 5) calculate imputation values to replace nan PA metric values 6) calculate empirical cumulative distribution function of vector magnitudes 7) derive main movement summaries (overall, weekday/weekend, and hour) :param str epochFile: Input csv.gz file of processed epoch data :param str nonWearFile: Output filename for non wear .csv.gz episodes :param dict summary: Output dictionary containing all summary metrics :param bool activityClassification: Perform machine learning of activity states :param str timeZone: timezone in country/city format to be used for daylight savings crossover check :param datetime startTime: Remove data before this time in analysis :param datetime endTime: Remove data after this time in analysis :param int epochPeriod: Size of epoch time window (in seconds) :param int stationaryStd: Threshold (in mg units) for stationary vs not :param int minNonWearDuration: Minimum duration of nonwear events (minutes) :param int mgCutPointMVPA: Milli-gravity threshold for moderate intensity activity :param int mgCutPointVPA: Milli-gravity threshold for vigorous intensity activity :param str activityModel: Input tar model file which contains random forest pickle model, HMM priors/transitions/emissions npy files, and npy file of METS for each activity state :param bool intensityDistribution: Add intensity outputs to dict <summary> :param bool useRecommendedImputation: Highly recommended method to impute missing data using data from other days around the same time :param bool verbose: Print verbose output :return: Pandas dataframe of activity epoch data :rtype: pandas.DataFrame :return: Activity prediction labels (empty if <activityClassification>==False) :rtype: list(str) :return: Write .csv.gz non wear episodes file to <nonWearFile> :rtype: void :return: Movement summary values written to dict <summary> :rtype: void :Example: >>> import summariseEpoch >>> summary = {} >>> epochData, labels = summariseEpoch.getActivitySummary( "epoch.csv.gz", "nonWear.csv.gz", summary) <nonWear file written to "nonWear.csv.gz" and dict "summary" update with outcomes> """ accUtils.toScreen("=== Summarizing ===") if isinstance(epochFile, pd.DataFrame): e = epochFile else: # Use python PANDAS framework to read in and store epochs e = pd.read_csv( epochFile, index_col=['time'], parse_dates=['time'], date_parser=accUtils.date_parser, ) # Remove data before/after user specified start/end times rows = e.shape[0] tz = pytz.timezone(timeZone) if startTime: localStartTime = tz.localize(startTime) e = e[e.index >= localStartTime] if endTime: localEndTime = tz.localize(endTime) e = e[e.index <= localEndTime] # Quit if no data left if e.shape[0] == 0: print("No rows remaining after start/end time removal") print("Previously there were %d rows, now shape: %s" % (rows, str(e.shape))) sys.exit(-9) # Get start & end times startTime = e.index[0] endTime = e.index[-1] summary['file-startTime'] = accUtils.date_strftime(startTime) summary['file-endTime'] = accUtils.date_strftime(endTime) summary['file-firstDay(0=mon,6=sun)'] = startTime.weekday() # Get interrupt and data error summary vals e = get_interrupts(e, epochPeriod, summary) # Check daylight savings time crossover check_daylight_savings_crossovers(e, summary) # Calculate wear-time statistics, and write nonWear episodes to file get_wear_time_stats(e, epochPeriod, stationaryStd, minNonWearDuration, nonWearFile, summary) # Calculate and include data quality statistics get_total_reads(e, epochPeriod, summary) get_clips(e, epochPeriod, summary) # Predict activity from features, and add label column if activityClassification: e, labels = accClassification.activityClassification(e, activityModel) else: labels = [] # enmo : Euclidean Norm Minus One # Trunc : negative values truncated to zero (i.e never negative) # emmo = 1 - sqrt(x, y, z) # enmoTrunc = max(enmo, 0) e['acc'] = e['enmoTrunc'] * 1000 # convert enmoTrunc to milli-G units # Calculate imputation values to replace nan PA metric values e = perform_wearTime_imputation(e, verbose) e['CutPointMVPA'] = e['accImputed'] >= mgCutPointMVPA e['CutPointVPA'] = e['accImputed'] >= mgCutPointVPA # Calculate empirical cumulative distribution function of vector magnitudes if intensityDistribution: calculateECDF(e, 'acc', summary, useRecommendedImputation) # Calculate circadian metrics if psd: circadianRhythms.calculatePSD(e, epochPeriod, fourierWithAcc, labels, summary) if fourierFrequency: circadianRhythms.calculateFourierFreq(e, epochPeriod, fourierWithAcc, labels, summary) if m10l5: circadianRhythms.calculateM10L5(e, epochPeriod, summary) # Main movement summaries writeMovementSummaries(e, labels, summary, useRecommendedImputation) # Return physical activity summary return e, labels
def getActivitySummary(epochFile, nonWearFile, summary, activityClassification=True, startTime=None, endTime=None, epochPeriod=30, stationaryStd=13, minNonWearDuration=60, mgMVPA=100, mgVPA=425, activityModel="activityModels/doherty2018.tar", intensityDistribution=False, verbose=False): """Calculate overall activity summary from <epochFile> data Get overall activity summary from input <epochFile>. This is achieved by 1) get interrupt and data error summary vals 2) check if data occurs at a daylight savings crossover 3) calculate wear-time statistics, and write nonWear episodes to file 4) predict activity from features, and add label column 5) calculate imputation values to replace nan PA metric values 6) calculate empirical cumulative distribution function of vector magnitudes 7) derive main movement summaries (overall, weekday/weekend, and hour) :param str epochFile: Input csv.gz file of processed epoch data :param str nonWearFile: Output filename for non wear .csv.gz episodes :param dict summary: Output dictionary containing all summary metrics :param bool activityClassification: Perform machine learning of activity states :param datetime startTime: Remove data before this time in analysis :param datetime endTime: Remove data after this time in analysis :param int epochPeriod: Size of epoch time window (in seconds) :param int stationaryStd: Threshold (in mg units) for stationary vs not :param int minNonWearDuration: Minimum duration of nonwear events (minutes) :param int mgMVPA: Milli-gravity threshold for moderate intensity activity :param int mgVPA: Milli-gravity threshold for vigorous intensity activity :param str activityModel: Input tar model file which contains random forest pickle model, HMM priors/transitions/emissions npy files, and npy file of METS for each activity state :param bool intensityDistribution: Add intensity outputs to dict <summary> :param bool verbose: Print verbose output :return: Pandas dataframe of activity epoch data :rtype: pandas.DataFrame :return: Activity prediction labels (empty if <activityClassification>==False) :rtype: list(str) :return: Write .csv.gz non wear episodes file to <nonWearFile> :rtype: void :return: Movement summary values written to dict <summary> :rtype: void :Example: >>> import summariseEpoch >>> summary = {} >>> epochData, labels = summariseEpoch.getActivitySummary( "epoch.csv.gz", "nonWear.csv.gz", summary) <nonWear file written to "nonWear.csv.gz" and dict "summary" update with outcomes> """ if isinstance(epochFile, pd.DataFrame): e = epochFile else: # use python PANDAS framework to read in and store epochs e = pd.read_csv(epochFile, parse_dates=['time'], index_col=['time'], compression='gzip').sort_index() # remove data before/after user specified start/end times rows = e.shape[0] if startTime: e = e[e.index >= startTime] if endTime: e = e[e.index <= endTime] # quit if no data left if e.shape[0] == 0: print("no rows remaining after start/end time removal") print("previously there were %d rows, now shape: %s" % (rows, str(e.shape))) sys.exit(-9) # get start & end times startTime = pd.to_datetime(e.index.values[0]) endTime = pd.to_datetime(e.index.values[-1]) summary['file-startTime'] = startTime.strftime('%Y-%m-%d %H:%M:%S') summary['file-endTime'] = endTime.strftime('%Y-%m-%d %H:%M:%S') summary['file-firstDay(0=mon,6=sun)'] = startTime.weekday() # get interrupt and data error summary vals interruptMins = get_interrupts(e, epochPeriod, summary) # check if data occurs at a daylight savings crossover e = check_daylight_savings_crossover(e, startTime, endTime, summary) # calculate wear-time statistics, and write nonWear episodes to file get_wear_time_stats(e, epochPeriod, stationaryStd, minNonWearDuration, nonWearFile, summary) # predict activity from features, and add label column if activityClassification: e, labels = accClassification.activityClassification(e, activityModel) else: labels = [] # enmo : Euclidean Norm Minus One # Trunc : negative values truncated to zero (i.e never negative) # emmo = 1 - sqrt(x, y, z) # enmoTrunc = max(enmo, 0) e['acc'] = e['enmoTrunc'] * 1000 # convert enmoTrunc to milli-G units # calculate imputation values to replace nan PA metric values e = perform_wearTime_imputation(e, verbose) e['MVPA'] = e['accImputed'] >= mgMVPA e['VPA'] = e['accImputed'] >= mgVPA # calculate empirical cumulative distribution function of vector magnitudes if intensityDistribution: calculateECDF(e, 'acc', summary) # main movement summaries writeMovementSummaries(e, labels, summary) # return physical activity summary return e, labels
def getActivitySummary(epochFile, nonWearFile, summary, activityClassification = True, startTime = None, endTime = None, epochPeriod = 30, stationaryStd = 13, minNonWearDuration = 60, mgMVPA = 100, mgVPA = 425, activityModel = "activityModels/doherty2018.tar", intensityDistribution = False, verbose = False): """Calculate overall activity summary from <epochFile> data Get overall activity summary from input <epochFile>. This is achieved by 1) get interrupt and data error summary vals 2) check if data occurs at a daylight savings crossover 3) calculate wear-time statistics, and write nonWear episodes to file 4) predict activity from features, and add label column 5) calculate imputation values to replace nan PA metric values 6) calculate empirical cumulative distribution function of vector magnitudes 7) derive main movement summaries (overall, weekday/weekend, and hour) :param str epochFile: Input csv.gz file of processed epoch data :param str nonWearFile: Output filename for non wear .csv.gz episodes :param dict summary: Output dictionary containing all summary metrics :param bool activityClassification: Perform machine learning of activity states :param datetime startTime: Remove data before this time in analysis :param datetime endTime: Remove data after this time in analysis :param int epochPeriod: Size of epoch time window (in seconds) :param int stationaryStd: Threshold (in mg units) for stationary vs not :param int minNonWearDuration: Minimum duration of nonwear events (minutes) :param int mgMVPA: Milli-gravity threshold for moderate intensity activity :param int mgVPA: Milli-gravity threshold for vigorous intensity activity :param str activityModel: Input tar model file which contains random forest pickle model, HMM priors/transitions/emissions npy files, and npy file of METS for each activity state :param bool intensityDistribution: Add intensity outputs to dict <summary> :param bool verbose: Print verbose output :return: Pandas dataframe of activity epoch data :rtype: pandas.DataFrame :return: Activity prediction labels (empty if <activityClassification>==False) :rtype: list(str) :return: Write .csv.gz non wear episodes file to <nonWearFile> :rtype: void :return: Movement summary values written to dict <summary> :rtype: void :Example: >>> import summariseEpoch >>> summary = {} >>> epochData, labels = summariseEpoch.getActivitySummary( "epoch.csv.gz", "nonWear.csv.gz", summary) <nonWear file written to "nonWear.csv.gz" and dict "summary" update with outcomes> """ if isinstance(epochFile, pd.DataFrame): e = epochFile else: # use python PANDAS framework to read in and store epochs e = pd.read_csv(epochFile, parse_dates=['time'], index_col=['time'], compression='gzip').sort_index() # remove data before/after user specified start/end times rows = e.shape[0] if startTime: e = e[e.index >= startTime] if endTime: e = e[e.index <= endTime] # quit if no data left if e.shape[0] == 0: print("no rows remaining after start/end time removal") print("previously there were %d rows, now shape: %s" % (rows, str(e.shape))) sys.exit(-9) # get start & end times startTime = pd.to_datetime(e.index.values[0]) endTime = pd.to_datetime(e.index.values[-1]) summary['file-startTime'] = startTime.strftime('%Y-%m-%d %H:%M:%S') summary['file-endTime'] = endTime.strftime('%Y-%m-%d %H:%M:%S') summary['file-firstDay(0=mon,6=sun)'] = startTime.weekday() # get interrupt and data error summary vals interruptMins = get_interrupts(e, epochPeriod, summary) # check if data occurs at a daylight savings crossover e = check_daylight_savings_crossover(e, startTime, endTime, summary) # calculate wear-time statistics, and write nonWear episodes to file get_wear_time_stats(e, epochPeriod, stationaryStd, minNonWearDuration, nonWearFile, summary) # predict activity from features, and add label column if activityClassification: e, labels = accClassification.activityClassification(e, activityModel) else: labels = [] # enmo : Euclidean Norm Minus One # Trunc : negative values truncated to zero (i.e never negative) # emmo = 1 - sqrt(x, y, z) # enmoTrunc = max(enmo, 0) e['acc'] = e['enmoTrunc'] * 1000 # convert enmoTrunc to milli-G units # calculate imputation values to replace nan PA metric values e = perform_wearTime_imputation(e, verbose) e['MVPA'] = e['accImputed'] >= mgMVPA e['VPA'] = e['accImputed'] >= mgVPA # calculate empirical cumulative distribution function of vector magnitudes if intensityDistribution: calculateECDF(e, 'acc', summary) # main movement summaries writeMovementSummaries(e, labels, summary) # return physical activity summary return e, labels