def check_and_prepare_pdf(plot_file): """ Check if format is supported by matplotlib, then determine if PdfPages object needs to be prepared for plotting to pdf. Input ----- plot_file: string of plot file name and format Output ------ plot_file: PdfPages object with plot_file name if format is '.pdf' """ if plot_file is not None: plot_file_ext = splitext(plot_file)[-1] if plot_file_ext == '.pdf': plot_file = PdfPages(plot_file) else: try: plt.savefig(plot_file) except ValueError: cerr('E: Format {} is not supported!'.format(plot_file_ext)) cexit('Exiting...') return plot_file
def get_traces(args, dbh): traces = [] if dbh is None: # get from infile infile = args.file if infile is False: cexit('E - Please provide a filename or Sqlite database path') abspath = os.path.abspath( args.file ) if abspath in cache_traces: traces.append((abspath, cache_traces[abspath])) else: from fatools.lib.fautil.traceio import read_abif_stream with open( abspath, 'rb') as instream: t = read_abif_stream(instream) cache_traces[abspath] = t traces.append((abspath, t)) else: pass return traces
def do_genemapper2tab(args, dbh): species = None if args.species: species = args.species for infile in args.infiles: sample_set = defaultdict(list) csv_in = csv.DictReader( open(infile) ) assay_list = {} for row in csv_in: assay = row['Sample File'] sample = row['Sample Name'] run_name = row['Run Name'] panel = row['Panel'] marker = row['Marker'] if assay in assay_list: if assay_list[assay] != run_name: cexit('Inconsistence or duplicate FSA file name: %s' % assay) else: assay_list[assay] = run_name token = (sample, assay, panel) sample_set[token].append( marker ) outfile = open(infile + '.tab', 'w') outfile.write('SAMPLE\tASSAY\tPANEL\tOPTIONS\n') for token in sorted(sample_set.keys()): sample, assay, panel = token markers = sample_set[token] db_panel = dbh.get_panel(panel) s_panel_markers = set( x.upper() for x in db_panel.get_marker_codes()) s_assay_markers = set( ('%s/%s' % (species, x) if (species and '/' not in x) else x).upper() for x in markers ) excludes = s_panel_markers - s_assay_markers if s_assay_markers - s_panel_markers: cexit('ERROR inconsistent marker(s) for sample %s assay %s: %s' % (sample, assay, str(s_assay_markers - s_panel_markers))) if excludes: excludes = 'exclude=%s' % ','.join(excludes) else: excludes = '' outfile.write('%s\t%s\t%s\t%s\n' % (sample, assay, panel, excludes)) outfile.close()
def do_export(args, dbh): from fatools.lib.analytics.export import export query = get_query( args, dbh ) analytical_sets = query.get_filtered_analytical_sets() if analytical_sets.total_samples <= 0: cexit('ERR - query does not yield any sample data') else: cerr('INFO - total sampel number: %d' % analytical_sets.total_samples) output = export( analytical_sets, dbh, outfile = args.outfile, format = args.outformat ) cout('Done.')
def do_viewbin(args, dbh): if not args.marker: cexit('ERR - please provide marker code') markers = [ dbh.get_marker(code) for code in args.marker.split(',') ] batch = dbh.get_batch( args.batch or 'default') for m in markers: cout('Marker: %s' % m.label) cout(' Bin Mean 25%P 75%P Width') cout(' ====================================') for binset in m.get_bin(batch).sortedbins: cout(' %3d %5.2f %5.2f %5.2f %4.2f' % (binset[0], binset[1], binset[2], binset[3], binset[3] - binset[2]))
def do_viewbin(args, dbh): if not args.marker: cexit('ERR - please provide marker code') markers = [dbh.get_marker(code) for code in args.marker.split(',')] batch = dbh.get_batch(args.batch or 'default') for m in markers: cout('Marker: %s' % m.label) cout(' Bin Mean 25%P 75%P Width') cout(' ====================================') for binset in m.get_bin(batch).sortedbins: cout(' %3d %5.2f %5.2f %5.2f %4.2f' % (binset[0], binset[1], binset[2], binset[3], binset[3] - binset[2]))
def do_initbin(args, dbh): if not args.marker: cexit('ERR - please provide marker code') if '-' not in args.range: cexit('ERR - please provide range for bin') if not args.batch: args.batch = 'default' batch = dbh.get_batch( args.batch ) markers = [ dbh.get_marker(code) for code in args.marker.split(',') ] ranges = args.range.split('-') start_range = int(ranges[0]) end_range = int(ranges[1]) print(markers) for m in markers: m.initbins(start_range, end_range, batch) cerr('INFO - bin for marker %s with batch %s has been created.' % (m.label, batch.code))
def do_initbin(args, dbh): if not args.marker: cexit('ERR - please provide marker code') if '-' not in args.range: cexit('ERR - please provide range for bin') if not args.batch: args.batch = 'default' batch = dbh.get_batch(args.batch) markers = [dbh.get_marker(code) for code in args.marker.split(',')] ranges = args.range.split('-') start_range = int(ranges[0]) end_range = int(ranges[1]) print(markers) for m in markers: m.initbins(start_range, end_range, batch) cerr('INFO - bin for marker %s with batch %s has been created.' % (m.label, batch.code))
def get_fsa_list( args, dbh ): """ get fsa instance from database based on parameters in args """ if not args.batch: cexit('ERR: using database requires --batch argument!', 1) batch = dbh.get_batch( args.batch ) if not batch: cexit('ERR: batch %s not found!' % args.batch, 1) samples = [] if args.sample: samples = args.sample.split(',') fsas = [] if args.fsa: fsas = args.assay.split(',') panels = [] if args.panel: panels = args.panel.split(',') markers = [] if args.marker: markers = dbh.get_markers(args.panel.split(',')) fsa_list = [] for sample in batch.samples: if samples and sample.code not in samples: continue for assay in sample.assays: if assays and assay.filename not in assays: continue if panels and assay.panel.code not in panels: continue fsa_list.append( (assay, sample.code) ) cerr('I: number of assays to be processed: %d' % len(assay_list)) return fsa_list
def get_fsa_list(args, dbh): """ get fsa instance from database based on parameters in args """ if not args.batch: cexit('ERR: using database requires --batch argument!', 1) batch = dbh.get_batch(args.batch) if not batch: cexit('ERR: batch %s not found!' % args.batch, 1) samples = [] if args.sample: samples = args.sample.split(',') fsas = [] if args.fsa: fsas = args.assay.split(',') panels = [] if args.panel: panels = args.panel.split(',') markers = [] if args.marker: markers = dbh.get_markers(args.panel.split(',')) fsa_list = [] for sample in batch.samples: if samples and sample.code not in samples: continue for assay in sample.assays: if assays and assay.filename not in assays: continue if panels and assay.panel.code not in panels: continue fsa_list.append((assay, sample.code)) cerr('I: number of assays to be processed: %d' % len(assay_list)) return fsa_list
def do_initsample(args, dbh): if not args.batch: cerr('ERR: batch code must be supplied!') sys.exit(1) b = dbh.Batch.search(args.batch, dbh.session) cout('INFO - using batch code: %s' % b.code) name, ext = os.path.splitext(args.infile) if ext in ['.csv', '.tab', '.tsv']: delim = ',' if ext == '.csv' else '\t' dict_samples, errlog, sample_codes = b.get_sample_class().csv2dict( open(args.infile), with_report=True, delimiter=delim) if dict_samples is None: cout('Error processing sample info file') cout('\n'.join(errlog)) cexit('Terminated!') elif ext in ['.json', '.yaml']: payload = yaml.load(open(args.infile)) sample_codes = payload['codes'] dict_samples = payload['samples'] inserted = 0 updated = 0 # get default location and subject first (to satisfy RDBMS constraints) null_location = dbh.search_location(auto=True) #null_subject = dbh.search_subject('null', auto=True) ## <- this shouldn't be here !! session = dbh.session() with session.no_autoflush: for sample_code in sample_codes: d_sample = dict_samples[sample_code] db_sample = b.search_sample(sample_code) if not db_sample: db_sample = b.add_sample(sample_code) inserted += 1 cout('INFO - sample: %s added.' % db_sample.code) db_sample.location = null_location #db_sample.subject = null_subject #print(d_sample) #dbh.session().flush( [db_sample] ) else: cout('INFO - sample: %s being updated...' % db_sample.code) updated += 1 db_sample.update(d_sample) session.flush([db_sample]) cout('INFO - inserted new %d sample(s), updated %d sample(s)' % (inserted, updated)) return inrows = csv.reader( open(args.infile), delimiter=',' if args.infile.endswith('.csv') else '\t') next(inrows) # discard the 1st line counter = 0 for row in inrows: s = b.add_sample(row[0]) counter += 1 cout('INFO - sample: %s added.' % s.code) cout('INFO - number of new sample(s): %d' % counter)
def do_initsample(args, dbh): if not args.batch: cerr('ERR: batch code must be supplied!') sys.exit(1) b = dbh.Batch.search(args.batch, dbh.session) cout('INFO - using batch code: %s' % b.code) name, ext = os.path.splitext( args.infile ) if ext in [ '.csv', '.tab', '.tsv' ]: delim = ',' if ext == '.csv' else '\t' dict_samples, errlog, sample_codes = b.get_sample_class().csv2dict( open(args.infile), with_report=True, delimiter = delim ) if dict_samples is None: cout('Error processing sample info file') cout('\n'.join(errlog)) cexit('Terminated!') elif ext in ['.json', '.yaml']: payload = yaml.load( open(args.infile) ) sample_codes = payload['codes'] dict_samples = payload['samples'] inserted=0 updated=0 # get default location and subject first (to satisfy RDBMS constraints) null_location = dbh.search_location(auto=True) #null_subject = dbh.search_subject('null', auto=True) ## <- this shouldn't be here !! session = dbh.session() with session.no_autoflush: for sample_code in sample_codes: d_sample = dict_samples[sample_code] db_sample = b.search_sample( sample_code ) if not db_sample: db_sample = b.add_sample( sample_code ) inserted += 1 cout('INFO - sample: %s added.' % db_sample.code) db_sample.location = null_location #db_sample.subject = null_subject #print(d_sample) #dbh.session().flush( [db_sample] ) else: cout('INFO - sample: %s being updated...' % db_sample.code) updated += 1 db_sample.update( d_sample ) session.flush( [db_sample] ) cout('INFO - inserted new %d sample(s), updated %d sample(s)' % (inserted, updated)) return inrows = csv.reader( open(args.infile), delimiter = ',' if args.infile.endswith('.csv') else '\t' ) next(inrows) # discard the 1st line counter = 0 for row in inrows: s = b.add_sample( row[0] ) counter += 1 cout('INFO - sample: %s added.' % s.code) cout('INFO - number of new sample(s): %d' % counter)
def open_fsa( args ): """ open FSA file(s) and prepare fsa instances requires: args.file, args.panel, args.panelfile """ from fatools.lib.fileio.models import Marker, Panel, FSA if not args.panel: cexit('ERR: using FSA file(s) requires --panel argument!') if not args.panelfile: cerr('WARN: using default built-in panels') Panel.upload(params.default_panels) else: with open(args.panelfile) as f: # open a YAML file that describe panel sets Panel.upload(yaml.load(f)) if not args.markerfile: Marker.upload(params.default_markers) else: raise NotImplementedError() panel = Panel.get_panel(args.panel) fsa_list = [] index = 1 # prepare caching cache_path = None if not args.no_cache: cache_path = os.path.join(os.path.expanduser('~'), '.fatools_caches', 'channels') if args.cache_path is not None: cache_path = os.path.join(args.cache_path, '.fatools_caches', 'channels') if not os.path.exists(cache_path): os.makedirs(cache_path) if args.file: for fsa_filename in args.file.split(','): fsa_filename = fsa_filename.strip() fsa = FSA.from_file(fsa_filename, panel, cache=not args.no_cache, cache_path=cache_path) # yield (fsa, str(i)) fsa_list.append( (fsa, str(index)) ) index += 1 elif args.infile: with open(args.infile) as f: buf, delim = detect_buffer( f.read() ) inrows = csv.DictReader( StringIO(buf), delimiter=delim ) line = 1 index = 1 for r in inrows: line += 1 fsa_filename = r['FILENAME'].strip() if fsa_filename.startswith('#'): continue if r.get('OPTIONS', None): options = tokenize( r['OPTIONS'] ) else: options = None panel_code = r.get('PANEL', None) or args.panel panel = Panel.get_panel(panel_code) fsa = FSA.from_file(fsa_filename, panel, options, cache=not args.no_cache, cache_path=cache_path) if 'SAMPLE' in inrows.fieldnames: # yield (fsa, r['SAMPLE']) fsa_list.append( (fsa, r['SAMPLE']) ) else: # yield (fsa, str(index)) fsa_list.append( (fsa, str(index)) ) index += 1 return fsa_list
def open_fsa(args, _params): """ open FSA file(s) and prepare fsa instances requires: args.file, args.panel, args.panelfile """ from fatools.lib.fileio.models import Marker, Panel, FSA if not args.panel: cexit('ERR: using FSA file(s) requires --panel argument!') if not args.panelfile: cerr('WARN: using default built-in panels') Panel.upload(params.default_panels) else: with open(args.panelfile) as f: # open a YAML file that describe panel sets import yaml Panel.upload(yaml.load(f)) if not args.markerfile: Marker.upload(params.default_markers) else: raise NotImplementedError() panel = Panel.get_panel(args.panel) fsa_list = [] index = 1 # prepare caching if args.use_cache: if not os.path.exists('.fatools_caches/channels'): os.makedirs('.fatools_caches/channels') if args.file: for fsa_filename in args.file.split(','): fsa_filename = fsa_filename.strip() if args.indir != "": filename = args.indir + "/" + fsa_filename else: filename = fsa_filename fsa = FSA.from_file(filename, panel, _params, cache=not args.no_cache) # yield (fsa, str(i)) fsa_list.append((fsa, str(index))) index += 1 elif args.infile: with open(args.infile) as f: buf, delim = detect_buffer(f.read()) inrows = csv.DictReader(StringIO(buf), delimiter=delim) line = 1 index = 1 for r in inrows: line += 1 fsa_filename = r['FILENAME'].strip() if fsa_filename.startswith('#'): continue if r.get('OPTIONS', None): options = tokenize(r['OPTIONS']) else: options = None panel_code = r.get('PANEL', None) or args.panel panel = Panel.get_panel(panel_code) fsa = FSA.from_file(fsa_filename, panel, _params, options, cache=not args.no_cache) if 'SAMPLE' in inrows.fieldnames: # yield (fsa, r['SAMPLE']) fsa_list.append((fsa, r['SAMPLE'])) else: # yield (fsa, str(index)) fsa_list.append((fsa, str(index))) index += 1 elif args.indir: import glob for fsa_filename in sorted(glob.glob(args.indir + "/*.fsa")): fsa_filename = fsa_filename.strip() fsa = FSA.from_file(fsa_filename, panel, _params, cache=not args.no_cache) # yield (fsa, str(i)) fsa_list.append((fsa, str(index))) index += 1 return fsa_list