def test_check_input_ext(): test_filename = 'help' ext = 'py' out_filename = utils.check_input_ext(test_filename, ext) assert str(out_filename) == f'{test_filename}.{ext}' test_filename = 'help.txt.gz' assert str(utils.check_input_ext(test_filename, ext)) == 'help.py'
def phys2bids(filename, info=False, indir='.', outdir='.', heur_file=None, sub=None, ses=None, chtrig=0, chsel=None, num_timepoints_expected=None, tr=None, thr=None, pad=9, ch_name=[], yml='', debug=False, quiet=False): """ Run main workflow of phys2bids. Runs the parser, does some checks on input, then imports the right interface file to read the input. If only info is required, it returns a summary onscreen. Otherwise, it operates on the input to return a .tsv.gz file, possibly in BIDS format. Raises ------ NotImplementedError If the file extension is not supported yet. """ # Check options to make them internally coherent pt. I # #!# This can probably be done while parsing? outdir = os.path.abspath(outdir) os.makedirs(outdir, exist_ok=True) os.makedirs(os.path.join(outdir, 'code'), exist_ok=True) conversion_path = os.path.join(outdir, 'code', 'conversion') os.makedirs(conversion_path, exist_ok=True) # Create logfile name basename = 'phys2bids_' extension = 'tsv' isotime = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S') logname = os.path.join(conversion_path, (basename + isotime + '.' + extension)) # Set logging format log_formatter = logging.Formatter( '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s', datefmt='%Y-%m-%dT%H:%M:%S') # Set up logging file and open it for writing log_handler = logging.FileHandler(logname) log_handler.setFormatter(log_formatter) sh = logging.StreamHandler() if quiet: logging.basicConfig(level=logging.WARNING, handlers=[log_handler, sh], format='%(levelname)-10s %(message)s') elif debug: logging.basicConfig(level=logging.DEBUG, handlers=[log_handler, sh], format='%(levelname)-10s %(message)s') else: logging.basicConfig(level=logging.INFO, handlers=[log_handler, sh], format='%(levelname)-10s %(message)s') version_number = _version.get_versions()['version'] LGR.info(f'Currently running phys2bids version {version_number}') LGR.info(f'Input file is {filename}') # Save call.sh arg_str = ' '.join(sys.argv[1:]) call_str = f'phys2bids {arg_str}' f = open(os.path.join(conversion_path, 'call.sh'), "a") f.write(f'#!bin/bash \n{call_str}') f.close() # Check options to make them internally coherent pt. II # #!# This can probably be done while parsing? indir = os.path.abspath(indir) if chtrig < 0: raise Exception( 'Wrong trigger channel. Channel indexing starts with 0!') filename, ftype = utils.check_input_type(filename, indir) if heur_file: heur_file = utils.check_input_ext(heur_file, '.py') utils.check_file_exists(heur_file) infile = os.path.join(indir, filename) utils.check_file_exists(infile) if isinstance(num_timepoints_expected, int): num_timepoints_expected = [num_timepoints_expected] if isinstance(tr, (int, float)): tr = [tr] if tr is not None and num_timepoints_expected is not None: # If tr and ntp were specified, check that tr is either length one or ntp. if len(num_timepoints_expected) != len(tr) and len(tr) != 1: raise Exception('Number of sequence types listed with TR ' 'doesn\'t match expected number of runs in ' 'the session') # Read file! LGR.info(f'Reading the file {infile}') if ftype == 'acq': from phys2bids.io import load_acq phys_in = load_acq(infile, chtrig) elif ftype == 'txt': from phys2bids.io import load_txt phys_in = load_txt(infile, chtrig) elif ftype == 'mat': from phys2bids.io import load_mat phys_in = load_mat(infile, chtrig) LGR.info('Checking that units of measure are BIDS compatible') for index, unit in enumerate(phys_in.units): phys_in.units[index] = bids.bidsify_units(unit) LGR.info('Reading infos') phys_in.print_info(filename) # #!# Here the function viz.plot_channel should be called viz.plot_all(phys_in.ch_name, phys_in.timeseries, phys_in.units, phys_in.freq, infile, conversion_path) # If only info were asked, end here. if info: return # The next few lines remove the undesired channels from phys_in. if chsel: LGR.info('Dropping unselected channels') for i in reversed(range(0, phys_in.ch_amount)): if i not in chsel: phys_in.delete_at_index(i) # If requested, change channel names. if ch_name: LGR.info('Renaming channels with given names') phys_in.rename_channels(ch_name) # Checking acquisition type via user's input if tr is not None and num_timepoints_expected is not None: # Multi-run acquisition type section # Check list length, more than 1 means multi-run if len(num_timepoints_expected) > 1: # if multi-run of same sequence type, pad list with ones # and multiply array with user's input if len(tr) == 1: tr = np.ones(len(num_timepoints_expected)) * tr[0] # Sum of values in ntp_list should be equivalent to num_timepoints_found phys_in.check_trigger_amount( thr=thr, num_timepoints_expected=sum(num_timepoints_expected), tr=1) # Check that sum of tp expected is equivalent to num_timepoints_found, # if it passes call slice4phys if phys_in.num_timepoints_found != sum(num_timepoints_expected): raise Exception('The number of triggers found is different ' 'than expected. Better stop now than break ' 'something.') # slice the recording based on user's entries # !!! ATTENTION: PHYS_IN GETS OVERWRITTEN AS DICTIONARY phys_in = slice4phys(phys_in, num_timepoints_expected, tr, phys_in.thr, pad) # returns a dictionary in the form {run_idx: phys_in[startpoint, endpoint]} # save a figure for each run | give the right acquisition parameters for runs fileprefix = os.path.join( conversion_path, os.path.splitext(os.path.basename(filename))[0]) for i, run in enumerate(phys_in.keys()): plot_fileprefix = f'{fileprefix}_0{run}' viz.export_trigger_plot(phys_in[run], chtrig, plot_fileprefix, tr[i], num_timepoints_expected[i], filename, sub, ses) # Single run acquisition type, or : nothing to split workflow else: # Run analysis on trigger channel to get first timepoint # and the time offset. phys_in.check_trigger_amount(thr, num_timepoints_expected[0], tr[0]) # save a figure of the trigger fileprefix = os.path.join( conversion_path, os.path.splitext(os.path.basename(filename))[0]) viz.export_trigger_plot(phys_in, chtrig, fileprefix, tr[0], num_timepoints_expected[0], filename, sub, ses) # Reassign phys_in as dictionary # !!! ATTENTION: PHYS_IN GETS OVERWRITTEN AS DICTIONARY phys_in = {1: phys_in} else: LGR.warning('Skipping trigger pulse count. If you want to run it, ' 'call phys2bids using both "-ntp" and "-tr" arguments') # !!! ATTENTION: PHYS_IN GETS OVERWRITTEN AS DICTIONARY phys_in = {1: phys_in} # The next few lines create a dictionary of different BlueprintInput # objects, one for each unique frequency for each run in phys_in # they also save the amount of runs and unique frequencies run_amount = len(phys_in) uniq_freq_list = set(phys_in[1].freq) freq_amount = len(uniq_freq_list) if freq_amount > 1: LGR.info(f'Found {freq_amount} different frequencies in input!') if run_amount > 1: LGR.info(f'Found {run_amount} different scans in input!') LGR.info(f'Preparing {freq_amount*run_amount} output files.') # Create phys_out dict that will have a blueprint object for each different frequency phys_out = {} if heur_file is not None and sub is not None: LGR.info(f'Preparing BIDS output using {heur_file}') # If heuristics are used, init a dict of arguments to pass to use_heuristic heur_args = { 'heur_file': heur_file, 'sub': sub, 'ses': ses, 'filename': filename, 'outdir': outdir, 'run': '', 'record_label': '' } # Generate participants.tsv file if it doesn't exist already. # Update the file if the subject is not in the file. # Do not update if the subject is already in the file. bids.participants_file(outdir, yml, sub) # Generate dataset_description.json file if it doesn't exist already. bids.dataset_description_file(outdir) # Generate README file if it doesn't exist already. bids.readme_file(outdir) cp( heur_file, os.path.join( conversion_path, os.path.splitext(os.path.basename(heur_file))[0] + '.py')) elif heur_file is not None and sub is None: LGR.warning('While "-heur" was specified, option "-sub" was not.\n' 'Skipping BIDS formatting.') # Export a (set of) phys_out for each element in phys_in # run keys start from 1 (human friendly) for run in phys_in.keys(): for uniq_freq in uniq_freq_list: # Initialise the key for the (possibly huge amount of) dictionary entries key = f'{run}_{uniq_freq}' # copy the phys_in object to the new dict entry phys_out[key] = deepcopy(phys_in[run]) # this counter will take into account how many channels are eliminated count = 0 # for each channel in the original phys_in object # take the frequency for idx, i in enumerate(phys_in[run].freq): # if that frequency is different than the frequency of the phys_obj entry if i != uniq_freq: # eliminate that channel from the dict since we only want channels # with the same frequency phys_out[key].delete_at_index(idx - count) # take into acount the elimination so in the next eliminated channel we # eliminate correctly count += 1 # Also create a BlueprintOutput object for each unique frequency found. # Populate it with the corresponding blueprint input and replace it # in the dictionary. # Add time channel in the proper frequency. if uniq_freq != phys_in[run].freq[0]: phys_out[key].ch_name.insert(0, phys_in[run].ch_name[0]) phys_out[key].units.insert(0, phys_in[run].units[0]) phys_out[key].timeseries.insert( 0, np.linspace(phys_in[run].timeseries[0][0], phys_in[run].timeseries[0][-1], num=phys_out[key].timeseries[0].shape[0])) # Add trigger channel in the proper frequency. if uniq_freq != phys_in[run].freq[chtrig]: phys_out[key].ch_name.insert(1, phys_in[run].ch_name[chtrig]) phys_out[key].units.insert(1, phys_in[run].units[chtrig]) phys_out[key].timeseries.insert( 1, np.interp(phys_out[key].timeseries[0], phys_in[run].timeseries[0], phys_in[run].timeseries[chtrig])) phys_out[key] = BlueprintOutput.init_from_blueprint(phys_out[key]) # Preparing output parameters: name and folder. for uniq_freq in uniq_freq_list: key = f'{run}_{uniq_freq}' # If possible, prepare bids renaming. if heur_file is not None and sub is not None: # Add run info to heur_args if more than one run is present if run_amount > 1: heur_args['run'] = f'{run:02d}' # Append "recording-freq" to filename if more than one freq if freq_amount > 1: heur_args['record_label'] = f'{uniq_freq:.0f}Hz' phys_out[key].filename = bids.use_heuristic(**heur_args) # If any filename exists already because of multirun, append labels # But warn about the non-validity of this BIDS-like name. if run_amount > 1: if any([ phys.filename == phys_out[key].filename for phys in phys_out.values() ]): phys_out[key].filename = (f'{phys_out[key].filename}' f'_take-{run}') LGR.warning( 'Identified multiple outputs with the same name.\n' 'Appending fake label to avoid overwriting.\n' '!!! ATTENTION !!! the output is not BIDS compliant.\n' 'Please check heuristics to solve the problem.') else: phys_out[key].filename = os.path.join( outdir, os.path.splitext(os.path.basename(filename))[0]) # Append "run" to filename if more than one run if run_amount > 1: phys_out[ key].filename = f'{phys_out[key].filename}_{run:02d}' # Append "freq" to filename if more than one freq if freq_amount > 1: phys_out[ key].filename = f'{phys_out[key].filename}_{uniq_freq:.0f}Hz' LGR.info(f'Exporting files for run {run} freq {uniq_freq}') np.savetxt(phys_out[key].filename + '.tsv.gz', phys_out[key].timeseries, fmt='%.8e', delimiter='\t') print_json(phys_out[key].filename, phys_out[key].freq, phys_out[key].start_time, phys_out[key].ch_name) print_summary( filename, num_timepoints_expected, phys_in[run].num_timepoints_found, uniq_freq, phys_out[key].start_time, os.path.join( conversion_path, os.path.splitext(os.path.basename( phys_out[key].filename))[0]))
def phys2bids(filename, info=False, indir='.', outdir='.', heur_file=None, sub=None, ses=None, chtrig=0, chsel=None, num_timepoints_expected=0, tr=1, thr=None, ch_name=[], chplot='', debug=False, quiet=False): """ Main workflow of phys2bids. Runs the parser, does some checks on input, then imports the right interface file to read the input. If only info is required, it returns a summary onscreen. Otherwise, it operates on the input to return a .tsv.gz file, possibily in BIDS format. Raises ------ NotImplementedError If the file extension is not supported yet. """ # Check options to make them internally coherent pt. I # #!# This can probably be done while parsing? outdir = utils.check_input_dir(outdir) utils.path_exists_or_make_it(outdir) # Create logfile name basename = 'phys2bids_' extension = 'tsv' isotime = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S') logname = os.path.join(outdir, (basename + isotime + '.' + extension)) # Set logging format log_formatter = logging.Formatter( '%(asctime)s\t%(name)-12s\t%(levelname)-8s\t%(message)s', datefmt='%Y-%m-%dT%H:%M:%S') # Set up logging file and open it for writing log_handler = logging.FileHandler(logname) log_handler.setFormatter(log_formatter) sh = logging.StreamHandler() if quiet: logging.basicConfig(level=logging.WARNING, handlers=[log_handler, sh]) elif debug: logging.basicConfig(level=logging.DEBUG, handlers=[log_handler, sh]) else: logging.basicConfig(level=logging.INFO, handlers=[log_handler, sh]) version_number = _version.get_versions()['version'] LGR.info(f'Currently running phys2bids version {version_number}') LGR.info(f'Input file is {filename}') # Check options to make them internally coherent pt. II # #!# This can probably be done while parsing? indir = utils.check_input_dir(indir) filename, ftype = utils.check_input_type(filename, indir) if heur_file: heur_file = utils.check_input_ext(heur_file, '.py') utils.check_file_exists(heur_file) infile = os.path.join(indir, filename) utils.check_file_exists(infile) # Read file! if ftype == 'acq': from phys2bids.interfaces.acq import populate_phys_input elif ftype == 'txt': from phys2bids.interfaces.txt import populate_phys_input else: # #!# We should add a logger here. raise NotImplementedError('Currently unsupported file type.') LGR.info(f'Reading the file {infile}') phys_in = populate_phys_input(infile, chtrig) LGR.info('Reading infos') phys_in.print_info(filename) # #!# Here the function viz.plot_channel should be called if chplot != '' or info: viz.plot_all(phys_in.ch_name, phys_in.timeseries, phys_in.units, phys_in.freq, infile, chplot) # If only info were asked, end here. if info: return # Create trigger plot. If possible, to have multiple outputs in the same # place, adds sub and ses label. if tr != 0 and num_timepoints_expected != 0: # Run analysis on trigger channel to get first timepoint and the time offset. # #!# Get option of no trigger! (which is wrong practice or Respiract) phys_in.check_trigger_amount(chtrig, thr, num_timepoints_expected, tr) LGR.info('Plot trigger') plot_path = os.path.join( outdir, os.path.splitext(os.path.basename(filename))[0]) if sub: plot_path += f'_sub-{sub}' if ses: plot_path += f'_ses-{ses}' viz.plot_trigger(phys_in.timeseries[0], phys_in.timeseries[chtrig], plot_path, tr, phys_in.thr, num_timepoints_expected, filename) else: LGR.warning('Skipping trigger pulse count. If you want to run it, ' 'call phys2bids using "-ntp" and "-tr" arguments') # The next few lines remove the undesired channels from phys_in. if chsel: LGR.info('Dropping unselected channels') for i in reversed(range(0, phys_in.ch_amount)): if i not in chsel: phys_in.delete_at_index(i) # If requested, change channel names. if ch_name: LGR.info('Renaming channels with given names') phys_in.rename_channels(ch_name) # The next few lines create a dictionary of different BlueprintInput # objects, one for each unique frequency in phys_in uniq_freq_list = set(phys_in.freq) output_amount = len(uniq_freq_list) if output_amount > 1: LGR.warning(f'Found {output_amount} different frequencies in input!') LGR.info(f'Preparing {output_amount} output files.') phys_out = {} # create phys_out dict that will have a # blueprint object per frequency # for each different frequency for uniq_freq in uniq_freq_list: # copy the phys_in object to the new dict entry phys_out[uniq_freq] = deepcopy(phys_in) # this counter will take into account how many channels are eliminated count = 0 # for each channel in the original phys_in object # take the frequency for idx, i in enumerate(phys_in.freq): # if that frequency is different than the frequency of the phys_obj entry if i != uniq_freq: # eliminate that channel from the dict since we only want channels # with the same frequency phys_out[uniq_freq].delete_at_index(idx - count) # take into acount the elimination so in the next eliminated channel we # eliminate correctly count += 1 # Also create a BlueprintOutput object for each unique frequency found. # Populate it with the corresponding blueprint input and replace it # in the dictionary. phys_out[uniq_freq] = BlueprintOutput.init_from_blueprint( phys_out[uniq_freq]) if heur_file and sub: LGR.info(f'Preparing BIDS output using {heur_file}') elif heur_file and not sub: LGR.warning(f'While "-heur" was specified, option "-sub" was not.\n' f'Skipping BIDS formatting.') # Preparing output parameters: name and folder. for uniq_freq in uniq_freq_list: # If possible, prepare bids renaming. if heur_file and sub: if output_amount > 1: # Add "recording-freq" to filename if more than one freq outfile = use_heuristic(heur_file, sub, ses, filename, outdir, uniq_freq) else: outfile = use_heuristic(heur_file, sub, ses, filename, outdir) else: outfile = os.path.join( outdir, os.path.splitext(os.path.basename(filename))[0]) if output_amount > 1: # Append "freq" to filename if more than one freq outfile = f'{outfile}_{uniq_freq}' LGR.info(f'Exporting files for freq {uniq_freq}') savetxt(outfile + '.tsv.gz', phys_out[uniq_freq].timeseries, fmt='%.8e', delimiter='\t') print_json(outfile, phys_out[uniq_freq].freq, phys_out[uniq_freq].start_time, phys_out[uniq_freq].ch_name) print_summary(filename, num_timepoints_expected, phys_in.num_timepoints_found, uniq_freq, phys_out[uniq_freq].start_time, outfile)
def _main(argv=None): """ Main workflow of phys2bids. Runs the parser, does some checks on input, then imports the right interface file to read the input. If only info is required, it returns a summary onscreen. Otherwise, it operates on the input to return a .tsv.gz file, possibily in BIDS format. """ options = _get_parser().parse_args(argv) # Check options to make them internally coherent # #!# This can probably be done while parsing? options.indir = utils.check_input_dir(options.indir) options.outdir = utils.check_input_dir(options.outdir) options.filename, ftype = utils.check_input_type(options.filename, options.indir) if options.heur_file: options.heur_file = utils.check_input_ext(options.heur_file, '.py') utils.check_file_exists(options.heur_file) infile = os.path.join(options.indir, options.filename) utils.check_file_exists(infile) outfile = os.path.join( options.outdir, os.path.splitext(os.path.basename(options.filename))[0]) # Read file! if ftype == 'acq': from phys2bids.interfaces.acq import populate_phys_input elif ftype == 'txt': from phys2bids.interfaces.txt import populate_phys_input else: # #!# We should add a logger here. raise NotImplementedError('Currently unsupported file type.') print('Reading the file') phys_in = populate_phys_input(infile, options.chtrig) print('Reading infos') phys_in.print_info(options.filename) # #!# Here the function viz.plot_channel should be called # for the desired channels. # If only info were asked, end here. if options.info: return # Run analysis on trigger channel to get first timepoint and the time offset. # #!# Get option of no trigger! (which is wrong practice or Respiract) phys_in.check_trigger_amount(options.thr, options.num_timepoints_expected, options.tr) # Create output folder if necessary print('Checking that the output folder exists') utils.path_exists_or_make_it(options.outdir) # Create trigger plot. If possible, to have multiple outputs in the same # place, adds sub and ses label. print('Plot trigger') plot_path = deepcopy(outfile) if options.sub: plot_path += f'_sub-{options.sub}' if options.ses: plot_path += f'_sub-{options.ses}' viz.plot_trigger(phys_in.timeseries[0], phys_in.timeseries[1], plot_path, options) # The next few lines remove the undesired channels from phys_in. if options.chsel: print('Dropping unselected channels') for i in reversed(range(0, phys_in.ch_amount)): if i not in options.chsel: phys_in.delete_at_index(i) # If requested, change channel names. if options.ch_name: print('Renaming channels with given names') phys_in.rename_channels(options.ch_name) # The next few lines create a dictionary of different BlueprintInput # objects, one for each unique frequency in phys_in uniq_freq_list = set(phys_in.freq) output_amount = len(uniq_freq_list) if output_amount > 1: print(f'Found {output_amount} different frequencies in input!') print(f'Preparing {output_amount} output files.') phys_out = {} for uniq_freq in uniq_freq_list: phys_out[uniq_freq] = deepcopy(phys_in) for i in reversed(phys_in.freq): if i != uniq_freq: phys_out[uniq_freq].delete_at_index(phys_in.ch_amount - i - 1) # Also create a BlueprintOutput object for each unique frequency found. # Populate it with the corresponding blueprint input and replace it # in the dictionary. phys_out[uniq_freq] = BlueprintOutput.init_from_blueprint( phys_out[uniq_freq]) if options.heur_file and options.sub: print(f'Preparing BIDS output using {options.heur_file}') elif options.heur_file and not options.sub: print(f'While "-heur" was specified, option "-sub" was not.\n' f'Skipping BIDS formatting.') for uniq_freq in uniq_freq_list: # If possible, prepare bids renaming. if options.heur_file and options.sub: if output_amount > 1: # Add "recording-freq" to filename if more than one freq outfile = use_heuristic(options.heur_file, options.sub, options.ses, options.filename, options.outdir, uniq_freq) else: outfile = use_heuristic(options.heur_file, options.sub, options.ses, options.filename, options.outdir) elif output_amount > 1: # Append "freq" to filename if more than one freq outfile = f'outfile_{uniq_freq}' print(f'Exporting files for freq {uniq_freq}') savetxt(outfile + '.tsv.gz', phys_out[uniq_freq].timeseries, fmt='%.8e', delimiter='\t') print_json(outfile, phys_out[uniq_freq].freq, phys_out[uniq_freq].start_time, phys_out[uniq_freq].ch_name) print_summary(options.filename, options.num_timepoints_expected, phys_in.num_timepoints_found, uniq_freq, phys_out[uniq_freq].start_time, outfile)