def create_project(path_project): """Generate project based on values in *d*.""" from collections import OrderedDict import datetime import importlib import os import shutil import yamlord from .config import paths, fnames # Get path to pylleo requirements file module = importlib.util.find_spec('smartmove') module_path = os.path.split(module.origin)[0] # Copy configuration files from `smartmove/_templates/` to `project_path` fname_cfg_project = fnames['cfg']['project'] fname_cfg_exp = fnames['cfg']['exp_bounds'] fname_cfg_ann = fnames['cfg']['ann'] fname_cfg_glide = fnames['cfg']['glide'] fname_cfg_filt = fnames['cfg']['filt'] for fname in [ fname_cfg_project, fname_cfg_exp, fname_cfg_ann, fname_cfg_glide, fname_cfg_filt ]: src = os.path.join(module_path, '_templates', fname) dst = os.path.join(path_project, fname) shutil.copyfile(src, dst) # Add creation datetime and versions to `cfg_project` d = yamlord.read_yaml(os.path.join(path_project, fname_cfg_project)) d['meta'] = OrderedDict() d.move_to_end('meta', last=False) d['meta']['created'] = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') d['meta']['versions'] = utils.get_versions('smartmove') yamlord.write_yaml(d, os.path.join(path_project, fname_cfg_project)) # Create project sub-paths if not existing for key in paths.keys(): p = os.path.join(path_project, paths[key]) if not os.path.isdir(p): os.makedirs(p, exist_ok=True) print('\nYour project directory has been created at {}.\n' 'You must now copy your datalogger data to the `{}` directory, ' 'the body condition `.csv` files to the `{}` directory, and the CTD ' '`.mat` file to the `{}` directory'.format(path_project, paths['tag'], paths['csv'], paths['ctd'])) return None
def write_yaml_file(filename, d, overwrite=False): """ Accepts filepath, dictionary. Writes dictionary in yaml to file path, recursively creating path if necessary """ if not os.path.exists(os.path.dirname(filename)) and overwrite is False: try: os.makedirs(os.path.dirname(filename)) except OSError as exc: if exc.errno != errno.EEXIST: raise logging.debug("Writing yaml file {}".format(filename)) logging.debug(d) yamlord.write_yaml(d, filename)
def write(self, file=None): if file is not None: self.file = file return yamlord.write_yaml(self.data, self.file)
def process(path_project, path_analysis, cfg_ann): from collections import OrderedDict import numpy import os import pandas import pyotelem import yamlord from . import pre from ..config import paths, fnames print(path_analysis) path_output = _join(path_project, paths['ann'], path_analysis) file_field = _join(path_project, paths['csv'], fnames['csv']['field']) file_isotope = _join(path_project, paths['csv'], fnames['csv']['isotope']) field, isotope = pre.add_rhomod(file_field, file_isotope) # EXPERIMENT INPUT post = OrderedDict() post['input_exp'] = OrderedDict() # n experiments and animals post['n_field'] = len(field) post['n_animals'] = len(field['animal'].unique()) # Min max values of rho_mod and % lipid for each seal post['exp'] = OrderedDict() post['iso'] = OrderedDict() for a in numpy.unique(field['animal']): # Field experiment values post['exp'][a] = OrderedDict() mask = field['animal'] == a post['exp'][a]['min_rhomod'] = field[mask]['rho_mod'].min() post['exp'][a]['max_rhomod'] = field[mask]['rho_mod'].max() # Isotope experiment values post['iso'][a] = OrderedDict() mask = isotope['animal'] == a.capitalize() post['iso'][a]['min_mass'] = isotope[mask]['mass_kg'].min() post['iso'][a]['max_mass'] = isotope[mask]['mass_kg'].max() # ANN CONFIG results = pandas.read_pickle(_join(path_output, fnames['ann']['tune'])) post['ann'] = OrderedDict() # Number of network configurations post['ann']['n_configs'] = len(results) # Load training data file_train = _join(path_output, 'data_train.p') file_valid = _join(path_output, 'data_valid.p') file_test = _join(path_output, 'data_test.p') train = pandas.read_pickle(file_train) valid = pandas.read_pickle(file_valid) test = pandas.read_pickle(file_test) # Number of samples compiled, train, valid, test post['ann']['n'] = OrderedDict() post['ann']['n']['train'] = len(train[0]) post['ann']['n']['valid'] = len(valid[0]) post['ann']['n']['test'] = len(test[0]) post['ann']['n']['all'] = len(train[0]) + len(valid[0]) + len(test[0]) # percentage of compiled dataset in train, valid, test post['ann']['n']['perc_train'] = len(train[0]) / post['ann']['n']['all'] post['ann']['n']['perc_valid'] = len(valid[0]) / post['ann']['n']['all'] post['ann']['n']['perc_test'] = len(test[0]) / post['ann']['n']['all'] # Total tuning time post['ann']['total_train_time'] = results['train_time'].sum() # POSTPROCESS VALUES # Best/worst classification accuracies mask_best = results['accuracy'] == results['accuracy'].max() best_idx = results['train_time'][mask_best].idxmin() mask_worst = results['accuracy'] == results['accuracy'].min() worst_idx = results['train_time'][mask_worst].idxmax() post['ann']['best_idx'] = best_idx post['ann']['worst_idx'] = worst_idx # Get min/max accuracy and training time for all configurations post['ann']['metrics'] = OrderedDict() for key in ['accuracy', 'train_time']: post['ann']['metrics'][key] = OrderedDict() post['ann']['metrics'][key]['max_idx'] = results[key].argmax() post['ann']['metrics'][key]['min_idx'] = results[key].argmin() post['ann']['metrics'][key]['max'] = results[key].max() post['ann']['metrics'][key]['min'] = results[key].min() post['ann']['metrics'][key]['best'] = results[key][best_idx] post['ann']['metrics'][key]['worst'] = results[key][worst_idx] # Optimal network results post['ann']['opt'] = OrderedDict() net = results['net'][best_idx] # Loop 10 times taking mean prediction time # Each loop, 100k iterations of timing file_test = _join(path_output, fnames['ann']['test']) test = pandas.read_pickle(file_test) features = numpy.expand_dims(test[0][0], axis=0) t_pred = time_prediction(net, features) post['ann']['opt']['t_pred'] = t_pred # Filesize of trained NN file_net_best = './net.tmp' pandas.to_pickle(net, file_net_best) st = os.stat(file_net_best) os.remove(file_net_best) post['ann']['opt']['trained_size'] = st.st_size / 1000 #kB # %step between subsets of test for dataset size test post['ann']['dataset'] = 'numpy.arange(0,1,0.03))[1:]' # Tune confusion matrices (cms) from most optimal configuration # one field per dataset `train`, `valid`, and `test` # first level `targets` if for all datasets post['ann']['bins'] = OrderedDict() file_tune_cms = _join(path_output, fnames['ann']['cms_tune']) tune_cms = pandas.read_pickle(file_tune_cms) bins = tune_cms['targets'] # Range of each bin, density, lipid percent bin_range = range(len(bins) - 1) rho_lo = numpy.array([bins[i] for i in bin_range]) rho_hi = numpy.array([bins[i + 1] for i in bin_range]) # Note density is converted from kg/m^3 to g/cm^3 for `dens2lip` lip_lo = pyotelem.physio_seal.dens2lip(rho_lo * 0.001) lip_hi = pyotelem.physio_seal.dens2lip(rho_hi * 0.001) # Generate bin ranges as strings fmt_bin = r'{:7.2f} <= rho_mod < {:7.2f}' fmt_lip = r'{:6.2f} >= lipid % > {:6.2f}' str_bin = [fmt_bin.format(lo, hi) for lo, hi in zip(rho_lo, rho_hi)] str_lip = [fmt_lip.format(lo, hi) for lo, hi in zip(lip_lo, lip_hi)] path_sgls = _join(path_output, fnames['ann']['sgls']) sgls_ann = pandas.read_pickle(path_sgls) post['ann']['bins']['values'] = list(bins) post['ann']['bins']['value_range'] = str_bin post['ann']['bins']['value_diff'] = list(numpy.diff(bins)) # Note density is converted from kg/m^3 to g/cm^3 for `dens2lip` lipid_perc = pyotelem.physio_seal.dens2lip(bins * 0.001) post['ann']['bins']['lipid_perc'] = list(lipid_perc) post['ann']['bins']['lipid_range'] = str_lip post['ann']['bins']['lipid_diff'] = list(numpy.diff(lipid_perc)) precision = calculate_precision(tune_cms['validation']['cm']) post['ann']['bins']['precision'] = [ None, ] * len(bins) targets = tune_cms['validation']['targets'] for i in range(len(bins)): if bins[i] in targets: post['ann']['bins']['precision'][i] = precision[bins[i] == targets] else: post['ann']['bins']['precision'][i] = 'None' # Save post processing results as YAML file_post = _join(path_output, fnames['ann']['post']) yamlord.write_yaml(post, file_post) return post
def run(path_project, path_analysis, cfg_project, cfg_ann, sgls_all, plots=False, debug=False): ''' Compile subglide data, tune network architecture and test dataset size Args ---- cfg_project: OrderedDict Dictionary of configuration parameters for the current project cfg_ann: OrderedDict Dictionary of configuration parameters for the ANN debug: bool Swith for running single network configuration plots: bool Switch for generating diagnostic plots after each network training Returns ------- cfg: dict Dictionary of network configuration parameters used data: tuple Tuple collecting training, validation, and test sets. Also includes bin deliniation values results: tuple Tuple collecting results dataframes and confusion matrices Note ---- The validation set is split into `validation` and `test` sets, the first used for initial comparisons of various net configuration accuracies and the second for a clean test set to get an true accuracy, as reusing the `validation` set can cause the routine to overfit to the validation set. ''' from collections import OrderedDict import climate import numpy import os import pandas import theano import yamlord from . import utils_ann from .utils_ann import ppickle from ..config import paths, fnames # Environment settings - logging, Theano, load configuration, set paths #--------------------------------------------------------------------------- climate.enable_default_logging() theano.config.compute_test_value = 'ignore' # Configuration settings if debug is True: for key in cfg_ann['net_tuning'].keys(): cfg_ann['net_tuning'][key] = [ cfg_ann['net_tuning'][key][0], ] # Drop fields missing values sgls_nonan = sgls_all.dropna() print('\nSplit and normalize input/output data') features = cfg_ann['net_all']['features'] target = cfg_ann['net_all']['target'] n_targets = cfg_ann['net_all']['n_targets'] valid_frac = cfg_ann['net_all']['valid_frac'] # Normalize input (features) and output (target) nsgls, bins = _normalize_data(sgls_nonan, features, target, n_targets) # Get indices of train, validation and test datasets ind_train, ind_valid, ind_test = _split_indices(nsgls, valid_frac) # Split dataframes into train, validation and test (features, targets) tuples train, valid, test = _create_datasets(nsgls, ind_train, ind_valid, ind_test, features, target) print('train', len(train[0]), len(train[1])) print('valid', len(valid[0]), len(valid[1])) print('test', len(test[0]), len(test[1])) # Save information on input data to config cfg_ann['net_all']['targets'] = [float(b) for b in bins] # Tuning - find optimal network architecture #--------------------------------------------------------------------------- print('\nTune netork configuration') # Get all dict of all configuration permutations of params in `tune_params` configs = _get_configs(cfg_ann['net_tuning']) # Cycle through configurations storing configuration, net in `results_tune` n_features = len(cfg_ann['net_all']['features']) n_targets = cfg_ann['net_all']['n_targets'] print('\nNumber of features: {}'.format(n_features)) print('Number of targets: {}\n'.format(n_targets)) results_tune, tune_accuracy, cms_tune = _tune_net( train, valid, test, bins, configs, n_features, n_targets, plots, ) # Get neural net configuration with best accuracy best_config = get_best(results_tune, 'config') # Test effect of dataset size #--------------------------------------------------------------------------- print('\nRun percentage of datasize tests') # Get randomly sorted and subsetted datasets to test effect of dataset_size # i.e. - a dataset with the first `subset_fraction` of samples. results_dataset, data_accuracy, cms_data = _test_dataset_size( best_config, train, valid, test, bins, n_features, n_targets, plots, debug) print('\nTest data accuracy (Configuration tuning): {}'.format( tune_accuracy)) print( 'Test data accuracy (Datasize test): {}'.format(data_accuracy)) # Save results and configuration to output directory #--------------------------------------------------------------------------- # Create output directory if it does not exist path_output = os.path.join(path_project, paths['ann'], path_analysis) os.makedirs(path_output, exist_ok=True) # Save updated `cfg_ann` to output directory file_cfg_ann = os.path.join(path_output, fnames['cfg']['ann']) yamlord.write_yaml(cfg_ann, os.path.join(path_output, file_cfg_ann)) # Compiled SGLs before NaN drop and normalization utils_ann.ppickle(sgls_all, os.path.join(path_output, fnames['ann']['sgls'])) # Compiled SGLs after NaN drop and normalization utils_ann.ppickle(nsgls, os.path.join(path_output, fnames['ann']['sgls_norm'])) # Save output data to analysis output directory tune_fname = fnames['ann']['tune'] datasize_fname = fnames['ann']['dataset'] ppickle(results_tune, os.path.join(path_output, tune_fname)) ppickle(results_dataset, os.path.join(path_output, datasize_fname)) # Save train, validation, test datasets ppickle(train, os.path.join(path_output, fnames['ann']['train'])) ppickle(valid, os.path.join(path_output, fnames['ann']['valid'])) ppickle(test, os.path.join(path_output, fnames['ann']['test'])) ppickle(cms_tune, os.path.join(path_output, fnames['ann']['cms_tune'])) ppickle(cms_data, os.path.join(path_output, fnames['ann']['cms_data'])) return cfg_ann, (train, valid, test), (results_tune, results_dataset, cms_tune, cms_data)
def _process_tag_data(path_project, cfg_project, cfg_glide, path_exp, tag, fs_a, plots=True, debug=False): '''Calculate body conditions summary statistics Args ---- path_project: Parent path for project cfg_project: OrderedDict Dictionary of configuration parameters for the current project cfg_glide: OrderedDict Dictionary of configuration parameters for glide identification path_exp: str Directory name of `tag` data being processed tag: pandas.DataFrame Data loaded from tag with associated sensors fs_a: float Sampling frequency (i.e. number of samples per second) plots: bool Switch for turning on plots (Default `True`). When activated plots for reviewing signal processing will be displayed. debug: bool Switch for turning on debugging (Default `False`). When activated values for `cutoff_freq` and `J` will be set to generic values and diagnostic plots of the `speed` parameter in `tag` will be displayed. Returns ------- cfg: OrderedDict tag: pandas.DataFrame Data loaded from tag with associated sensors, with added fields from signal processing dives: pandas.DataFrame Start and stop indices and attributes for dive events in `tag` data, including: start_idx, stop_idx, dive_dur, depths_max, depths_max_idx, depths_mean, compr_mean. masks: pandas.DataFrame Boolean masks for slicing identified dives, glides, and sub-glides from the `tag` dataframe. exp_ind: OrderedDict Start and stop indices of `tag` data to be analyzed ''' from collections import OrderedDict import numpy from os.path import join as _join import pandas import pyotelem from pyotelem.plots import plotdives, plotdsp import yamlord import copy from .. import utils from . import utils_ctd from ..config import paths, fnames exp_idxs = [None, None] file_cfg_exp = _join(path_project, fnames['cfg']['exp_bounds']) cfg = copy.deepcopy(cfg_glide) try: cfg_exp = yamlord.read_yaml(file_cfg_exp) except: cfg_exp = OrderedDict() # 1 Select indices for analysis #-------------------------------------------------------------------------- print('* Select indices for analysis\n') if path_exp in cfg_exp: exp_idxs[0] = cfg_exp[path_exp]['start_idx'] exp_idxs[1] = cfg_exp[path_exp]['stop_idx'] else: # Plot accelerometer axes, depths, and propeller speed plotdives.plot_triaxial_depths_speed(tag) # Get indices user input - mask exp_idxs[0] = pyotelem.utils.recursive_input('Analysis start index', int) exp_idxs[1] = pyotelem.utils.recursive_input('Analysis stop index', int) cfg_exp[path_exp] = OrderedDict() cfg_exp[path_exp]['start_idx'] = exp_idxs[0] cfg_exp[path_exp]['stop_idx'] = exp_idxs[1] yamlord.write_yaml(cfg_exp, file_cfg_exp) # Create dataframe for storing masks for various views of the data masks = pandas.DataFrame(index=range(len(tag)), dtype=bool) # Create mask of values to be considered part of the analysis masks['exp'] = False masks['exp'][exp_idxs[0]:exp_idxs[1]] = True # Create indices array `exp_ind` for analysis exp_ind = numpy.where(masks['exp'])[0] # 1.3 Calculate pitch, roll, and heading #-------------------------------------------------------------------------- print('* Calculate pitch, roll, heading\n') tag['p'], tag['r'], tag['h'] = pyotelem.dynamics.prh( tag['Ax_g'].values, tag['Ay_g'].values, tag['Az_g'].values) # 2 Define dives #-------------------------------------------------------------------------- print('* Define dives\n') dives, masks['dive'] = pyotelem.dives.finddives2(tag['depth'].values, cfg_glide['min_depth']) # 3.2.1 Determine `stroke_frq` fluking rate and cut-off frequency #-------------------------------------------------------------------------- print('* Get stroke frequency\n') # calculate power spectrum of the accelerometer data at the whale frame Ax_g = tag['Ax_g'][masks['exp']].values Az_g = tag['Az_g'][masks['exp']].values # NOTE change `stroke_ratio` here to modify selectio method # should be OK other than t_max, these values are too high if debug is False: cutoff_frq, stroke_frq, stroke_ratio = pyotelem.glides.get_stroke_freq( Ax_g, Az_g, fs_a, cfg_glide['nperseg'], cfg_glide['peak_thresh'], stroke_ratio=None) # Store user input cutoff and stroke frequencies cfg['cutoff_frq'] = cutoff_frq cfg['stroke_frq'] = stroke_frq cfg['stroke_ratio'] = stroke_ratio else: cutoff_frq = 0.3 cfg['cutoff_frq'] = cutoff_frq # 3.2.2 Separate low and high frequency signals #-------------------------------------------------------------------------- print('* Separate accelerometry to high and low-pass signals\n') order = 5 cutoff_str = str(cfg['cutoff_frq']) for btype, suffix in zip(['low', 'high'], ['lf', 'hf']): b, a, = pyotelem.dsp.butter_filter(cfg['cutoff_frq'], fs_a, order=order, btype=btype) for param in ['Ax_g', 'Ay_g', 'Az_g']: key = '{}_{}_{}'.format(param, suffix, cutoff_str) tag[key] = pyotelem.dsp.butter_apply(b, a, tag[param].values) # Plot low and high frequency accelerometer signals if plots is True: plotdsp.plot_lf_hf(tag['Ax_g'][masks['exp']], tag['Ax_g_lf_' + cutoff_str][masks['exp']], tag['Ax_g_hf_' + cutoff_str][masks['exp']], title='x axis') plotdsp.plot_lf_hf(tag['Ay_g'][masks['exp']], tag['Ay_g_lf_' + cutoff_str][masks['exp']], tag['Ay_g_hf_' + cutoff_str][masks['exp']], title='y axis') plotdsp.plot_lf_hf(tag['Az_g'][masks['exp']], tag['Az_g_lf_' + cutoff_str][masks['exp']], tag['Az_g_hf_' + cutoff_str][masks['exp']], title='z axis') # 3.2.3 Calculate the smooth pitch from the low pass filter acceleration # signal to avoid incorporating signals above the stroking periods #-------------------------------------------------------------------------- print('* Calculate low-pass pitch, roll, heading\n') prh_lf = pyotelem.dynamics.prh( tag['Ax_g_lf_' + cutoff_str].values, tag['Ay_g_lf_' + cutoff_str].values, tag['Az_g_lf_' + cutoff_str].values, ) tag['p_lf'], tag['r_lf'], tag['h_lf'] = prh_lf # 4 Define precise descent and ascent phases #-------------------------------------------------------------------------- print('* Get precise indices of descents, ascents, phase and bottom\n') masks['des'], masks['asc'] = pyotelem.dives.get_des_asc2( tag['depth'].values, masks['dive'].values, tag['p_lf'].values, cfg['cutoff_frq'], fs_a, order=5) # Typecast `des` and `asc` columns to `bool` masks = masks.astype(bool) if plots is True: plotdives.plot_dives_pitch(tag['depth'][masks['exp']], masks['dive'][masks['exp']], masks['des'][masks['exp']], masks['asc'][masks['exp']], tag['p'][masks['exp']], tag['p_lf'][masks['exp']]) # 8 Estimate seawater density around the tagged animal #-------------------------------------------------------------------------- print('* Estimate seawater density\n') # Study location and max depth to average salinities lon = cfg_project['experiment']['coords']['lon'] lat = cfg_project['experiment']['coords']['lat'] lat = cfg_project['experiment']['coords']['lat'] max_depth = cfg_project['experiment']['net_depth'] # Read data fname_ctd = cfg_project['experiment']['fname_ctd'] file_ctd_mat = _join(path_project, paths['ctd'], fname_ctd) t = tag['temperature'].values tag['dsw'] = utils_ctd.get_seawater_densities(file_ctd_mat, t, lon, lat, max_depth) # 6.1 Extract strokes and glides using heave # high-pass filtered (HPF) acceleration signal, axis=3 #-------------------------------------------------------------------------- # Two methods for estimating stroke frequency `stroke_frq`: # * from the body rotations (pry) using the magnetometer method # * from the dorso-ventral axis of the HPF acceleration signal. # For both methods, t_max and J need to be determined. # Choose a value for J based on a plot showing distribution of signals: # hpf-x, when detecting glides in the next step use Ahf_Anlf() with axis=0 # hpf-z when detecting glides in the next step use Ahf_Anlf() with axis=2 print('* Get fluke signal threshold\n') if debug is False: # Plot PSD for J selection Ax_g_hf = tag['Ax_g_hf_' + cutoff_str][masks['exp']].values Az_g_hf = tag['Az_g_hf_' + cutoff_str][masks['exp']].values f_wx, Sx, Px, dpx = pyotelem.dsp.calc_PSD_welch(Ax_g_hf, fs_a, nperseg=512) f_wz, Sz, Pz, dpz = pyotelem.dsp.calc_PSD_welch(Az_g_hf, fs_a, nperseg=512) import matplotlib.pyplot as plt fig, (ax1, ax2) = plt.subplots(1, 2) ax1.plot(f_wx, Sx, label='hf-x PSD') ax1.plot(f_wz, Sz, label='hf-z PSD') ax1.legend(loc='upper right') ax2.plot(tag['datetimes'][masks['exp']], Ax_g_hf, label='hf-x') ax2.plot(tag['datetimes'][masks['exp']], Az_g_hf, label='hf-z') ax2.legend(loc='upper right') fig.autofmt_xdate() plt.show() # Get user selection for J - select one for both axes cfg['J'] = pyotelem.utils.recursive_input('J (fluke magnitude)', float) else: cfg['J'] = 0.4 return cfg, tag, dives, masks, exp_ind
def run(path_project, cfg_project, cfg_glide, cfg_filt, sgl_dur, plots=True, debug=False): '''Run glide identification on data in configuration paths Args ---- path_project: Parent path for project cfg_project: OrderedDict Dictionary of configuration parameters for the current project cfg_glide: OrderedDict Dictionary of configuration parameters for glide identification cfg_filt: OrderedDict Dictionary of configuration parameters for filtering sub-glides sgl_dur: int Duration of sub-glide splits (seconds) plots: bool Switch for turning on plots (Default `True`). When activated plots for reviewing signal processing will be displayed. debug: bool Switch for turning on debugging (Default `False`). When activated values for `cutoff_freq` and `J` will be set to generic values and diagnostic plots of the `speed` parameter in `tag` will be displayed. Attributes ---------- cutoff_frq: float Cutoff frequency for separating low and high frequency signals stroke_frq: float Frequency at which maximum power is seen in accelerometer PSD J: Frequency of stroke signal in accelerometer data (m/s2) t_max: int Maximum duration allowable for a fluke stroke in seconds, it can be set as 1/`stroke_frq` J: Magnitude threshold for detecting a fluke stroke in [m/s2] Returns ------- tag: pandas.DataFrame Data loaded from tag with associated sensors dives: pandas.DataFrame Start and stop indices and attributes for dive events in `tag` data, including: start_idx, stop_idx, dive_dur, depths_max, depths_max_idx, depths_mean, compr_mean. GL: ndarray, (n, 2) Start and stop indices and attributes of glide events in `tag` data, sgls: pandas.DataFrame Contains sub-glide summary information of `tag` data ''' from collections import OrderedDict import numpy import os from os.path import join as _join import pandas import pyotelem from pyotelem.plots import plotdynamics, plotglides import yamlord from ..config import paths, fnames from .. import utils from . import utils_lleo # Input filenames fname_cal = fnames['tag']['cal'] fname_cal_prop = fnames['csv']['cal_prop'] # Output filenames fname_cfg_glide = fnames['cfg']['glide'] fname_cfg_filt = fnames['cfg']['filt'] fname_dives = fnames['glide']['dives'] fname_glide_ratio = fnames['glide']['glide_ratio'] fname_mask_tag = fnames['glide']['mask_tag'] fname_mask_tag_glides = fnames['glide']['mask_tag_glides'] fname_sgls = fnames['glide']['sgls'] fname_mask_tag_sgls = fnames['glide']['mask_tag_sgls'] fname_mask_tag_filt = fnames['glide']['mask_tag_filt'] fname_mask_sgls_filt = fnames['glide']['mask_sgls_filt'] # Fields to ignore when concatenating output path names ignore = [ 'nperseg', 'peak_thresh', 'alpha', 'min_depth', 't_max', 'last_modified' ] # Generate list of paths in tag data directory path_exps = list() for path_exp in os.listdir(_join(path_project, paths['tag'])): # Only process directories if os.path.isdir(_join(path_project, paths['tag'], path_exp)): path_exps.append(path_exp) # Get user selection of tag data paths to process path_exps = sorted(path_exps) msg = 'paths numbers to process:\n' process_ind = pyotelem.utils.get_dir_indices(msg, path_exps) # Process selected tag experiments for i in process_ind: path_exp = path_exps[i] fname_tag = fnames['tag']['data'].format(path_exp) # Get correct calibration path given tag ID number tag_model = path_exp.replace('-', '').split('_')[1].lower() tag_id = int(path_exp.split('_')[2]) year = int(path_exp[:4]) month = int(path_exp[4:6]) path_cal_acc = cfg_project['cal'][tag_model][tag_id][year][month] print('Tag calibration file path: {}\n'.format(path_cal_acc)) # Currently creating a new configuration for each exp path_cfg_glide = path_exp print('Processing: {}\n'.format(path_exp)) # Run glide analysis # Output paths out_data = _join(path_project, paths['tag'], path_exp) os.makedirs(out_data, exist_ok=True) # LOAD DATA #---------- # linearly interpolated tag to accelerometer sensor path_data_tag = _join(path_project, paths['tag'], path_exp) file_cal_acc = _join(path_project, paths['tag'], path_cal_acc, fname_cal) file_cal_prop = _join(path_project, paths['csv'], fname_cal_prop) tag, dt_a, fs_a = utils_lleo.load_lleo(path_data_tag, file_cal_acc, file_cal_prop) # Plot speed if debug is on if debug: exp_ind = range(len(tag)) plotdynamics.plot_swim_speed(exp_ind, tag['speed'].values) # Signal process data, calculate derived data and find stroke frequencies cfg_glide_exp, tag, dives, masks, exp_ind = _process_tag_data( path_project, cfg_project, cfg_glide, path_exp, tag, fs_a, plots=plots, debug=debug) # Save data tag.to_pickle(_join(out_data, fname_tag)) dives.to_pickle(_join(out_data, fname_dives)) masks.to_pickle(_join(out_data, fname_mask_tag)) # Find Glides #------------ GL, masks = _process_glides(cfg_glide_exp, tag, fs_a, dives, masks, plots=plots, debug=debug) # Create output path from concatenating parameters in `cfg_glide_exp` dname_glide = utils.cat_path(cfg_glide_exp, ignore) out_glide = _join(path_project, paths['glide'], path_exp, dname_glide) os.makedirs(out_glide, exist_ok=True) # Save glide data to concatenated path masks['glides'].to_pickle(_join(out_glide, fname_mask_tag_glides)) # Save glide analysis configuration cfg_glide_exp['last_modified'] = _now_str() file_cfg_glide_exp = _join(out_glide, fname_cfg_glide) yamlord.write_yaml(cfg_glide_exp, file_cfg_glide_exp) # SPLIT GLIDES TO SUB-GLIDES #-------------------------- # Split into sub-glides, generate summary tables sgls, masks['sgls'] = _process_sgls(tag, fs_a, dives, GL, sgl_dur) # Create output path from passed `sgls` duration out_sgls = _join(out_glide, 'dur_{}'.format(sgl_dur)) os.makedirs(out_sgls, exist_ok=True) # Save sgls data to path for passed `sgls` duration sgls.to_pickle(_join(out_sgls, fname_sgls)) masks['sgls'].to_pickle(_join(out_sgls, fname_mask_tag_sgls)) # FILTER AND PLOT SUB-GLIDES #-------------------------- # Get masks of `tag` and `sgls` data for sgls matching constraints exp_ind = numpy.where(masks['exp'])[0] mask_tag_filt, mask_sgls_filt = utils.filter_sgls( len(tag), exp_ind, sgls, cfg_filt['pitch_thresh'], cfg_filt['min_depth'], cfg_filt['max_depth_delta'], cfg_filt['min_speed'], cfg_filt['max_speed'], cfg_filt['max_speed_delta']) # Plot filtered sgls plotglides.plot_sgls( masks['exp'], tag['depth'].values, mask_tag_filt, sgls, mask_sgls_filt, tag['Az_g_hf_' + str(cfg_glide_exp['cutoff_frq'])]) # Create output path from concatenating parameters in `cfg_filt` dname_filt = utils.cat_path(cfg_filt, ignore) out_filt = _join(out_sgls, dname_filt) os.makedirs(out_filt, exist_ok=True) # Save filtered sgls data to concatenated path pandas.to_pickle(mask_tag_filt, _join(out_filt, fname_mask_tag_filt)) pandas.to_pickle(mask_sgls_filt, _join(out_filt, fname_mask_sgls_filt)) # Save symlink to data and masks in filter directory out_paths = [ out_data, out_data, out_glide, out_glide, out_sgls, out_sgls, out_sgls ] sym_fnames = [ fname_tag, fname_mask_tag, fname_cfg_glide, fname_mask_tag_glides, fname_cfg_filt, fname_mask_tag_sgls, fname_sgls ] for out_path, fname in zip(out_paths, sym_fnames): rel_path = os.path.relpath(_join(out_path, fname), out_filt) utils.symlink(rel_path, _join(out_filt, fname)) # Save sub-glide analysis configuration cfg_filt['last_modified'] = _now_str() file_cfg_filt = _join(out_sgls, fname_cfg_filt) yamlord.write_yaml(cfg_filt, file_cfg_filt) return tag, dives, GL, sgls